[NET_SCHED]: kill PSCHED_AUDIT_TDIFF
[safe/jmp/linux-2.6] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/string.h>
20 #include <linux/mm.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
23 #include <linux/in.h>
24 #include <linux/errno.h>
25 #include <linux/interrupt.h>
26 #include <linux/if_ether.h>
27 #include <linux/inet.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/notifier.h>
31 #include <net/ip.h>
32 #include <net/netlink.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
35 #include <net/sock.h>
36 #include <net/pkt_sched.h>
37
38
39 /*      Class-Based Queueing (CBQ) algorithm.
40         =======================================
41
42         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
43                  Management Models for Packet Networks",
44                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45
46                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
47
48                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
49                  Parameters", 1996
50
51                  [4] Sally Floyd and Michael Speer, "Experimental Results
52                  for Class-Based Queueing", 1998, not published.
53
54         -----------------------------------------------------------------------
55
56         Algorithm skeleton was taken from NS simulator cbq.cc.
57         If someone wants to check this code against the LBL version,
58         he should take into account that ONLY the skeleton was borrowed,
59         the implementation is different. Particularly:
60
61         --- The WRR algorithm is different. Our version looks more
62         reasonable (I hope) and works when quanta are allowed to be
63         less than MTU, which is always the case when real time classes
64         have small rates. Note, that the statement of [3] is
65         incomplete, delay may actually be estimated even if class
66         per-round allotment is less than MTU. Namely, if per-round
67         allotment is W*r_i, and r_1+...+r_k = r < 1
68
69         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70
71         In the worst case we have IntServ estimate with D = W*r+k*MTU
72         and C = MTU*r. The proof (if correct at all) is trivial.
73
74
75         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76         interpret some places, which look like wrong translations
77         from NS. Anyone is advised to find these differences
78         and explain to me, why I am wrong 8).
79
80         --- Linux has no EOI event, so that we cannot estimate true class
81         idle time. Workaround is to consider the next dequeue event
82         as sign that previous packet is finished. This is wrong because of
83         internal device queueing, but on a permanently loaded link it is true.
84         Moreover, combined with clock integrator, this scheme looks
85         very close to an ideal solution.  */
86
87 struct cbq_sched_data;
88
89
90 struct cbq_class
91 {
92         struct cbq_class        *next;          /* hash table link */
93         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
94
95 /* Parameters */
96         u32                     classid;
97         unsigned char           priority;       /* class priority */
98         unsigned char           priority2;      /* priority to be used after overlimit */
99         unsigned char           ewma_log;       /* time constant for idle time calculation */
100         unsigned char           ovl_strategy;
101 #ifdef CONFIG_NET_CLS_POLICE
102         unsigned char           police;
103 #endif
104
105         u32                     defmap;
106
107         /* Link-sharing scheduler parameters */
108         long                    maxidle;        /* Class parameters: see below. */
109         long                    offtime;
110         long                    minidle;
111         u32                     avpkt;
112         struct qdisc_rate_table *R_tab;
113
114         /* Overlimit strategy parameters */
115         void                    (*overlimit)(struct cbq_class *cl);
116         psched_tdiff_t          penalty;
117
118         /* General scheduler (WRR) parameters */
119         long                    allot;
120         long                    quantum;        /* Allotment per WRR round */
121         long                    weight;         /* Relative allotment: see below */
122
123         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
124         struct cbq_class        *split;         /* Ptr to split node */
125         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
126         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
127         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
128                                                    parent otherwise */
129         struct cbq_class        *sibling;       /* Sibling chain */
130         struct cbq_class        *children;      /* Pointer to children chain */
131
132         struct Qdisc            *q;             /* Elementary queueing discipline */
133
134
135 /* Variables */
136         unsigned char           cpriority;      /* Effective priority */
137         unsigned char           delayed;
138         unsigned char           level;          /* level of the class in hierarchy:
139                                                    0 for leaf classes, and maximal
140                                                    level of children + 1 for nodes.
141                                                  */
142
143         psched_time_t           last;           /* Last end of service */
144         psched_time_t           undertime;
145         long                    avgidle;
146         long                    deficit;        /* Saved deficit for WRR */
147         psched_time_t           penalized;
148         struct gnet_stats_basic bstats;
149         struct gnet_stats_queue qstats;
150         struct gnet_stats_rate_est rate_est;
151         spinlock_t              *stats_lock;
152         struct tc_cbq_xstats    xstats;
153
154         struct tcf_proto        *filter_list;
155
156         int                     refcnt;
157         int                     filters;
158
159         struct cbq_class        *defaults[TC_PRIO_MAX+1];
160 };
161
162 struct cbq_sched_data
163 {
164         struct cbq_class        *classes[16];           /* Hash table of all classes */
165         int                     nclasses[TC_CBQ_MAXPRIO+1];
166         unsigned                quanta[TC_CBQ_MAXPRIO+1];
167
168         struct cbq_class        link;
169
170         unsigned                activemask;
171         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
172                                                                    with backlog */
173
174 #ifdef CONFIG_NET_CLS_POLICE
175         struct cbq_class        *rx_class;
176 #endif
177         struct cbq_class        *tx_class;
178         struct cbq_class        *tx_borrowed;
179         int                     tx_len;
180         psched_time_t           now;            /* Cached timestamp */
181         psched_time_t           now_rt;         /* Cached real time */
182         unsigned                pmask;
183
184         struct hrtimer          delay_timer;
185         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
186                                                    started when CBQ has
187                                                    backlog, but cannot
188                                                    transmit just now */
189         psched_tdiff_t          wd_expires;
190         int                     toplevel;
191         u32                     hgenerator;
192 };
193
194
195 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196
197
198 static __inline__ unsigned cbq_hash(u32 h)
199 {
200         h ^= h>>8;
201         h ^= h>>4;
202         return h&0xF;
203 }
204
205 static __inline__ struct cbq_class *
206 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
207 {
208         struct cbq_class *cl;
209
210         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
211                 if (cl->classid == classid)
212                         return cl;
213         return NULL;
214 }
215
216 #ifdef CONFIG_NET_CLS_POLICE
217
218 static struct cbq_class *
219 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
220 {
221         struct cbq_class *cl, *new;
222
223         for (cl = this->tparent; cl; cl = cl->tparent)
224                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
225                         return new;
226
227         return NULL;
228 }
229
230 #endif
231
232 /* Classify packet. The procedure is pretty complicated, but
233    it allows us to combine link sharing and priority scheduling
234    transparently.
235
236    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237    so that it resolves to split nodes. Then packets are classified
238    by logical priority, or a more specific classifier may be attached
239    to the split node.
240  */
241
242 static struct cbq_class *
243 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
244 {
245         struct cbq_sched_data *q = qdisc_priv(sch);
246         struct cbq_class *head = &q->link;
247         struct cbq_class **defmap;
248         struct cbq_class *cl = NULL;
249         u32 prio = skb->priority;
250         struct tcf_result res;
251
252         /*
253          *  Step 1. If skb->priority points to one of our classes, use it.
254          */
255         if (TC_H_MAJ(prio^sch->handle) == 0 &&
256             (cl = cbq_class_lookup(q, prio)) != NULL)
257                 return cl;
258
259         *qerr = NET_XMIT_BYPASS;
260         for (;;) {
261                 int result = 0;
262                 defmap = head->defaults;
263
264                 /*
265                  * Step 2+n. Apply classifier.
266                  */
267                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
268                         goto fallback;
269
270                 if ((cl = (void*)res.class) == NULL) {
271                         if (TC_H_MAJ(res.classid))
272                                 cl = cbq_class_lookup(q, res.classid);
273                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
274                                 cl = defmap[TC_PRIO_BESTEFFORT];
275
276                         if (cl == NULL || cl->level >= head->level)
277                                 goto fallback;
278                 }
279
280 #ifdef CONFIG_NET_CLS_ACT
281                 switch (result) {
282                 case TC_ACT_QUEUED:
283                 case TC_ACT_STOLEN:
284                         *qerr = NET_XMIT_SUCCESS;
285                 case TC_ACT_SHOT:
286                         return NULL;
287                 }
288 #elif defined(CONFIG_NET_CLS_POLICE)
289                 switch (result) {
290                 case TC_POLICE_RECLASSIFY:
291                         return cbq_reclassify(skb, cl);
292                 case TC_POLICE_SHOT:
293                         return NULL;
294                 default:
295                         break;
296                 }
297 #endif
298                 if (cl->level == 0)
299                         return cl;
300
301                 /*
302                  * Step 3+n. If classifier selected a link sharing class,
303                  *         apply agency specific classifier.
304                  *         Repeat this procdure until we hit a leaf node.
305                  */
306                 head = cl;
307         }
308
309 fallback:
310         cl = head;
311
312         /*
313          * Step 4. No success...
314          */
315         if (TC_H_MAJ(prio) == 0 &&
316             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
317             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
318                 return head;
319
320         return cl;
321 }
322
323 /*
324    A packet has just been enqueued on the empty class.
325    cbq_activate_class adds it to the tail of active class list
326    of its priority band.
327  */
328
329 static __inline__ void cbq_activate_class(struct cbq_class *cl)
330 {
331         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
332         int prio = cl->cpriority;
333         struct cbq_class *cl_tail;
334
335         cl_tail = q->active[prio];
336         q->active[prio] = cl;
337
338         if (cl_tail != NULL) {
339                 cl->next_alive = cl_tail->next_alive;
340                 cl_tail->next_alive = cl;
341         } else {
342                 cl->next_alive = cl;
343                 q->activemask |= (1<<prio);
344         }
345 }
346
347 /*
348    Unlink class from active chain.
349    Note that this same procedure is done directly in cbq_dequeue*
350    during round-robin procedure.
351  */
352
353 static void cbq_deactivate_class(struct cbq_class *this)
354 {
355         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
356         int prio = this->cpriority;
357         struct cbq_class *cl;
358         struct cbq_class *cl_prev = q->active[prio];
359
360         do {
361                 cl = cl_prev->next_alive;
362                 if (cl == this) {
363                         cl_prev->next_alive = cl->next_alive;
364                         cl->next_alive = NULL;
365
366                         if (cl == q->active[prio]) {
367                                 q->active[prio] = cl_prev;
368                                 if (cl == q->active[prio]) {
369                                         q->active[prio] = NULL;
370                                         q->activemask &= ~(1<<prio);
371                                         return;
372                                 }
373                         }
374                         return;
375                 }
376         } while ((cl_prev = cl) != q->active[prio]);
377 }
378
379 static void
380 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
381 {
382         int toplevel = q->toplevel;
383
384         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
385                 psched_time_t now;
386                 psched_tdiff_t incr;
387
388                 PSCHED_GET_TIME(now);
389                 incr = PSCHED_TDIFF(now, q->now_rt);
390                 PSCHED_TADD2(q->now, incr, now);
391
392                 do {
393                         if (PSCHED_TLESS(cl->undertime, now)) {
394                                 q->toplevel = cl->level;
395                                 return;
396                         }
397                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
398         }
399 }
400
401 static int
402 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
403 {
404         struct cbq_sched_data *q = qdisc_priv(sch);
405         int len = skb->len;
406         int ret;
407         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
408
409 #ifdef CONFIG_NET_CLS_POLICE
410         q->rx_class = cl;
411 #endif
412         if (cl == NULL) {
413                 if (ret == NET_XMIT_BYPASS)
414                         sch->qstats.drops++;
415                 kfree_skb(skb);
416                 return ret;
417         }
418
419 #ifdef CONFIG_NET_CLS_POLICE
420         cl->q->__parent = sch;
421 #endif
422         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
423                 sch->q.qlen++;
424                 sch->bstats.packets++;
425                 sch->bstats.bytes+=len;
426                 cbq_mark_toplevel(q, cl);
427                 if (!cl->next_alive)
428                         cbq_activate_class(cl);
429                 return ret;
430         }
431
432         sch->qstats.drops++;
433         cbq_mark_toplevel(q, cl);
434         cl->qstats.drops++;
435         return ret;
436 }
437
438 static int
439 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
440 {
441         struct cbq_sched_data *q = qdisc_priv(sch);
442         struct cbq_class *cl;
443         int ret;
444
445         if ((cl = q->tx_class) == NULL) {
446                 kfree_skb(skb);
447                 sch->qstats.drops++;
448                 return NET_XMIT_CN;
449         }
450         q->tx_class = NULL;
451
452         cbq_mark_toplevel(q, cl);
453
454 #ifdef CONFIG_NET_CLS_POLICE
455         q->rx_class = cl;
456         cl->q->__parent = sch;
457 #endif
458         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
459                 sch->q.qlen++;
460                 sch->qstats.requeues++;
461                 if (!cl->next_alive)
462                         cbq_activate_class(cl);
463                 return 0;
464         }
465         sch->qstats.drops++;
466         cl->qstats.drops++;
467         return ret;
468 }
469
470 /* Overlimit actions */
471
472 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
473
474 static void cbq_ovl_classic(struct cbq_class *cl)
475 {
476         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
477         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
478
479         if (!cl->delayed) {
480                 delay += cl->offtime;
481
482                 /*
483                    Class goes to sleep, so that it will have no
484                    chance to work avgidle. Let's forgive it 8)
485
486                    BTW cbq-2.0 has a crap in this
487                    place, apparently they forgot to shift it by cl->ewma_log.
488                  */
489                 if (cl->avgidle < 0)
490                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
491                 if (cl->avgidle < cl->minidle)
492                         cl->avgidle = cl->minidle;
493                 if (delay <= 0)
494                         delay = 1;
495                 PSCHED_TADD2(q->now, delay, cl->undertime);
496
497                 cl->xstats.overactions++;
498                 cl->delayed = 1;
499         }
500         if (q->wd_expires == 0 || q->wd_expires > delay)
501                 q->wd_expires = delay;
502
503         /* Dirty work! We must schedule wakeups based on
504            real available rate, rather than leaf rate,
505            which may be tiny (even zero).
506          */
507         if (q->toplevel == TC_CBQ_MAXLEVEL) {
508                 struct cbq_class *b;
509                 psched_tdiff_t base_delay = q->wd_expires;
510
511                 for (b = cl->borrow; b; b = b->borrow) {
512                         delay = PSCHED_TDIFF(b->undertime, q->now);
513                         if (delay < base_delay) {
514                                 if (delay <= 0)
515                                         delay = 1;
516                                 base_delay = delay;
517                         }
518                 }
519
520                 q->wd_expires = base_delay;
521         }
522 }
523
524 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
525    they go overlimit
526  */
527
528 static void cbq_ovl_rclassic(struct cbq_class *cl)
529 {
530         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
531         struct cbq_class *this = cl;
532
533         do {
534                 if (cl->level > q->toplevel) {
535                         cl = NULL;
536                         break;
537                 }
538         } while ((cl = cl->borrow) != NULL);
539
540         if (cl == NULL)
541                 cl = this;
542         cbq_ovl_classic(cl);
543 }
544
545 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
546
547 static void cbq_ovl_delay(struct cbq_class *cl)
548 {
549         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
550         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
551
552         if (!cl->delayed) {
553                 psched_time_t sched = q->now;
554                 ktime_t expires;
555
556                 delay += cl->offtime;
557                 if (cl->avgidle < 0)
558                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
559                 if (cl->avgidle < cl->minidle)
560                         cl->avgidle = cl->minidle;
561                 PSCHED_TADD2(q->now, delay, cl->undertime);
562
563                 if (delay > 0) {
564                         sched += delay + cl->penalty;
565                         cl->penalized = sched;
566                         cl->cpriority = TC_CBQ_MAXPRIO;
567                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
568
569                         expires = ktime_set(0, 0);
570                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
571                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
572                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
573                                                   expires)) > 0)
574                                 q->delay_timer.expires = expires;
575                         hrtimer_restart(&q->delay_timer);
576                         cl->delayed = 1;
577                         cl->xstats.overactions++;
578                         return;
579                 }
580                 delay = 1;
581         }
582         if (q->wd_expires == 0 || q->wd_expires > delay)
583                 q->wd_expires = delay;
584 }
585
586 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
587
588 static void cbq_ovl_lowprio(struct cbq_class *cl)
589 {
590         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
591
592         cl->penalized = q->now + cl->penalty;
593
594         if (cl->cpriority != cl->priority2) {
595                 cl->cpriority = cl->priority2;
596                 q->pmask |= (1<<cl->cpriority);
597                 cl->xstats.overactions++;
598         }
599         cbq_ovl_classic(cl);
600 }
601
602 /* TC_CBQ_OVL_DROP: penalize class by dropping */
603
604 static void cbq_ovl_drop(struct cbq_class *cl)
605 {
606         if (cl->q->ops->drop)
607                 if (cl->q->ops->drop(cl->q))
608                         cl->qdisc->q.qlen--;
609         cl->xstats.overactions++;
610         cbq_ovl_classic(cl);
611 }
612
613 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
614                                        psched_time_t now)
615 {
616         struct cbq_class *cl;
617         struct cbq_class *cl_prev = q->active[prio];
618         psched_time_t sched = now;
619
620         if (cl_prev == NULL)
621                 return 0;
622
623         do {
624                 cl = cl_prev->next_alive;
625                 if (now - cl->penalized > 0) {
626                         cl_prev->next_alive = cl->next_alive;
627                         cl->next_alive = NULL;
628                         cl->cpriority = cl->priority;
629                         cl->delayed = 0;
630                         cbq_activate_class(cl);
631
632                         if (cl == q->active[prio]) {
633                                 q->active[prio] = cl_prev;
634                                 if (cl == q->active[prio]) {
635                                         q->active[prio] = NULL;
636                                         return 0;
637                                 }
638                         }
639
640                         cl = cl_prev->next_alive;
641                 } else if (sched - cl->penalized > 0)
642                         sched = cl->penalized;
643         } while ((cl_prev = cl) != q->active[prio]);
644
645         return sched - now;
646 }
647
648 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
649 {
650         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
651                                                 delay_timer);
652         struct Qdisc *sch = q->watchdog.qdisc;
653         psched_time_t now;
654         psched_tdiff_t delay = 0;
655         unsigned pmask;
656
657         PSCHED_GET_TIME(now);
658
659         pmask = q->pmask;
660         q->pmask = 0;
661
662         while (pmask) {
663                 int prio = ffz(~pmask);
664                 psched_tdiff_t tmp;
665
666                 pmask &= ~(1<<prio);
667
668                 tmp = cbq_undelay_prio(q, prio, now);
669                 if (tmp > 0) {
670                         q->pmask |= 1<<prio;
671                         if (tmp < delay || delay == 0)
672                                 delay = tmp;
673                 }
674         }
675
676         if (delay) {
677                 ktime_t time;
678
679                 time = ktime_set(0, 0);
680                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
681                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
682         }
683
684         sch->flags &= ~TCQ_F_THROTTLED;
685         netif_schedule(sch->dev);
686         return HRTIMER_NORESTART;
687 }
688
689
690 #ifdef CONFIG_NET_CLS_POLICE
691
692 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
693 {
694         int len = skb->len;
695         struct Qdisc *sch = child->__parent;
696         struct cbq_sched_data *q = qdisc_priv(sch);
697         struct cbq_class *cl = q->rx_class;
698
699         q->rx_class = NULL;
700
701         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
702
703                 cbq_mark_toplevel(q, cl);
704
705                 q->rx_class = cl;
706                 cl->q->__parent = sch;
707
708                 if (cl->q->enqueue(skb, cl->q) == 0) {
709                         sch->q.qlen++;
710                         sch->bstats.packets++;
711                         sch->bstats.bytes+=len;
712                         if (!cl->next_alive)
713                                 cbq_activate_class(cl);
714                         return 0;
715                 }
716                 sch->qstats.drops++;
717                 return 0;
718         }
719
720         sch->qstats.drops++;
721         return -1;
722 }
723 #endif
724
725 /*
726    It is mission critical procedure.
727
728    We "regenerate" toplevel cutoff, if transmitting class
729    has backlog and it is not regulated. It is not part of
730    original CBQ description, but looks more reasonable.
731    Probably, it is wrong. This question needs further investigation.
732 */
733
734 static __inline__ void
735 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
736                     struct cbq_class *borrowed)
737 {
738         if (cl && q->toplevel >= borrowed->level) {
739                 if (cl->q->q.qlen > 1) {
740                         do {
741                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
742                                         q->toplevel = borrowed->level;
743                                         return;
744                                 }
745                         } while ((borrowed=borrowed->borrow) != NULL);
746                 }
747 #if 0
748         /* It is not necessary now. Uncommenting it
749            will save CPU cycles, but decrease fairness.
750          */
751                 q->toplevel = TC_CBQ_MAXLEVEL;
752 #endif
753         }
754 }
755
756 static void
757 cbq_update(struct cbq_sched_data *q)
758 {
759         struct cbq_class *this = q->tx_class;
760         struct cbq_class *cl = this;
761         int len = q->tx_len;
762
763         q->tx_class = NULL;
764
765         for ( ; cl; cl = cl->share) {
766                 long avgidle = cl->avgidle;
767                 long idle;
768
769                 cl->bstats.packets++;
770                 cl->bstats.bytes += len;
771
772                 /*
773                    (now - last) is total time between packet right edges.
774                    (last_pktlen/rate) is "virtual" busy time, so that
775
776                          idle = (now - last) - last_pktlen/rate
777                  */
778
779                 idle = PSCHED_TDIFF(q->now, cl->last);
780                 if ((unsigned long)idle > 128*1024*1024) {
781                         avgidle = cl->maxidle;
782                 } else {
783                         idle -= L2T(cl, len);
784
785                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
786                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
787                    cl->avgidle == true_avgidle/W,
788                    hence:
789                  */
790                         avgidle += idle - (avgidle>>cl->ewma_log);
791                 }
792
793                 if (avgidle <= 0) {
794                         /* Overlimit or at-limit */
795
796                         if (avgidle < cl->minidle)
797                                 avgidle = cl->minidle;
798
799                         cl->avgidle = avgidle;
800
801                         /* Calculate expected time, when this class
802                            will be allowed to send.
803                            It will occur, when:
804                            (1-W)*true_avgidle + W*delay = 0, i.e.
805                            idle = (1/W - 1)*(-true_avgidle)
806                            or
807                            idle = (1 - W)*(-cl->avgidle);
808                          */
809                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
810
811                         /*
812                            That is not all.
813                            To maintain the rate allocated to the class,
814                            we add to undertime virtual clock,
815                            necessary to complete transmitted packet.
816                            (len/phys_bandwidth has been already passed
817                            to the moment of cbq_update)
818                          */
819
820                         idle -= L2T(&q->link, len);
821                         idle += L2T(cl, len);
822
823                         PSCHED_TADD2(q->now, idle, cl->undertime);
824                 } else {
825                         /* Underlimit */
826
827                         PSCHED_SET_PASTPERFECT(cl->undertime);
828                         if (avgidle > cl->maxidle)
829                                 cl->avgidle = cl->maxidle;
830                         else
831                                 cl->avgidle = avgidle;
832                 }
833                 cl->last = q->now;
834         }
835
836         cbq_update_toplevel(q, this, q->tx_borrowed);
837 }
838
839 static __inline__ struct cbq_class *
840 cbq_under_limit(struct cbq_class *cl)
841 {
842         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
843         struct cbq_class *this_cl = cl;
844
845         if (cl->tparent == NULL)
846                 return cl;
847
848         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
849             !PSCHED_TLESS(q->now, cl->undertime)) {
850                 cl->delayed = 0;
851                 return cl;
852         }
853
854         do {
855                 /* It is very suspicious place. Now overlimit
856                    action is generated for not bounded classes
857                    only if link is completely congested.
858                    Though it is in agree with ancestor-only paradigm,
859                    it looks very stupid. Particularly,
860                    it means that this chunk of code will either
861                    never be called or result in strong amplification
862                    of burstiness. Dangerous, silly, and, however,
863                    no another solution exists.
864                  */
865                 if ((cl = cl->borrow) == NULL) {
866                         this_cl->qstats.overlimits++;
867                         this_cl->overlimit(this_cl);
868                         return NULL;
869                 }
870                 if (cl->level > q->toplevel)
871                         return NULL;
872         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
873                  PSCHED_TLESS(q->now, cl->undertime));
874
875         cl->delayed = 0;
876         return cl;
877 }
878
879 static __inline__ struct sk_buff *
880 cbq_dequeue_prio(struct Qdisc *sch, int prio)
881 {
882         struct cbq_sched_data *q = qdisc_priv(sch);
883         struct cbq_class *cl_tail, *cl_prev, *cl;
884         struct sk_buff *skb;
885         int deficit;
886
887         cl_tail = cl_prev = q->active[prio];
888         cl = cl_prev->next_alive;
889
890         do {
891                 deficit = 0;
892
893                 /* Start round */
894                 do {
895                         struct cbq_class *borrow = cl;
896
897                         if (cl->q->q.qlen &&
898                             (borrow = cbq_under_limit(cl)) == NULL)
899                                 goto skip_class;
900
901                         if (cl->deficit <= 0) {
902                                 /* Class exhausted its allotment per
903                                    this round. Switch to the next one.
904                                  */
905                                 deficit = 1;
906                                 cl->deficit += cl->quantum;
907                                 goto next_class;
908                         }
909
910                         skb = cl->q->dequeue(cl->q);
911
912                         /* Class did not give us any skb :-(
913                            It could occur even if cl->q->q.qlen != 0
914                            f.e. if cl->q == "tbf"
915                          */
916                         if (skb == NULL)
917                                 goto skip_class;
918
919                         cl->deficit -= skb->len;
920                         q->tx_class = cl;
921                         q->tx_borrowed = borrow;
922                         if (borrow != cl) {
923 #ifndef CBQ_XSTATS_BORROWS_BYTES
924                                 borrow->xstats.borrows++;
925                                 cl->xstats.borrows++;
926 #else
927                                 borrow->xstats.borrows += skb->len;
928                                 cl->xstats.borrows += skb->len;
929 #endif
930                         }
931                         q->tx_len = skb->len;
932
933                         if (cl->deficit <= 0) {
934                                 q->active[prio] = cl;
935                                 cl = cl->next_alive;
936                                 cl->deficit += cl->quantum;
937                         }
938                         return skb;
939
940 skip_class:
941                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
942                                 /* Class is empty or penalized.
943                                    Unlink it from active chain.
944                                  */
945                                 cl_prev->next_alive = cl->next_alive;
946                                 cl->next_alive = NULL;
947
948                                 /* Did cl_tail point to it? */
949                                 if (cl == cl_tail) {
950                                         /* Repair it! */
951                                         cl_tail = cl_prev;
952
953                                         /* Was it the last class in this band? */
954                                         if (cl == cl_tail) {
955                                                 /* Kill the band! */
956                                                 q->active[prio] = NULL;
957                                                 q->activemask &= ~(1<<prio);
958                                                 if (cl->q->q.qlen)
959                                                         cbq_activate_class(cl);
960                                                 return NULL;
961                                         }
962
963                                         q->active[prio] = cl_tail;
964                                 }
965                                 if (cl->q->q.qlen)
966                                         cbq_activate_class(cl);
967
968                                 cl = cl_prev;
969                         }
970
971 next_class:
972                         cl_prev = cl;
973                         cl = cl->next_alive;
974                 } while (cl_prev != cl_tail);
975         } while (deficit);
976
977         q->active[prio] = cl_prev;
978
979         return NULL;
980 }
981
982 static __inline__ struct sk_buff *
983 cbq_dequeue_1(struct Qdisc *sch)
984 {
985         struct cbq_sched_data *q = qdisc_priv(sch);
986         struct sk_buff *skb;
987         unsigned activemask;
988
989         activemask = q->activemask&0xFF;
990         while (activemask) {
991                 int prio = ffz(~activemask);
992                 activemask &= ~(1<<prio);
993                 skb = cbq_dequeue_prio(sch, prio);
994                 if (skb)
995                         return skb;
996         }
997         return NULL;
998 }
999
1000 static struct sk_buff *
1001 cbq_dequeue(struct Qdisc *sch)
1002 {
1003         struct sk_buff *skb;
1004         struct cbq_sched_data *q = qdisc_priv(sch);
1005         psched_time_t now;
1006         psched_tdiff_t incr;
1007
1008         PSCHED_GET_TIME(now);
1009         incr = PSCHED_TDIFF(now, q->now_rt);
1010
1011         if (q->tx_class) {
1012                 psched_tdiff_t incr2;
1013                 /* Time integrator. We calculate EOS time
1014                    by adding expected packet transmission time.
1015                    If real time is greater, we warp artificial clock,
1016                    so that:
1017
1018                    cbq_time = max(real_time, work);
1019                  */
1020                 incr2 = L2T(&q->link, q->tx_len);
1021                 PSCHED_TADD(q->now, incr2);
1022                 cbq_update(q);
1023                 if ((incr -= incr2) < 0)
1024                         incr = 0;
1025         }
1026         PSCHED_TADD(q->now, incr);
1027         q->now_rt = now;
1028
1029         for (;;) {
1030                 q->wd_expires = 0;
1031
1032                 skb = cbq_dequeue_1(sch);
1033                 if (skb) {
1034                         sch->q.qlen--;
1035                         sch->flags &= ~TCQ_F_THROTTLED;
1036                         return skb;
1037                 }
1038
1039                 /* All the classes are overlimit.
1040
1041                    It is possible, if:
1042
1043                    1. Scheduler is empty.
1044                    2. Toplevel cutoff inhibited borrowing.
1045                    3. Root class is overlimit.
1046
1047                    Reset 2d and 3d conditions and retry.
1048
1049                    Note, that NS and cbq-2.0 are buggy, peeking
1050                    an arbitrary class is appropriate for ancestor-only
1051                    sharing, but not for toplevel algorithm.
1052
1053                    Our version is better, but slower, because it requires
1054                    two passes, but it is unavoidable with top-level sharing.
1055                 */
1056
1057                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1058                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1059                         break;
1060
1061                 q->toplevel = TC_CBQ_MAXLEVEL;
1062                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1063         }
1064
1065         /* No packets in scheduler or nobody wants to give them to us :-(
1066            Sigh... start watchdog timer in the last case. */
1067
1068         if (sch->q.qlen) {
1069                 sch->qstats.overlimits++;
1070                 if (q->wd_expires)
1071                         qdisc_watchdog_schedule(&q->watchdog,
1072                                                 now + q->wd_expires);
1073         }
1074         return NULL;
1075 }
1076
1077 /* CBQ class maintanance routines */
1078
1079 static void cbq_adjust_levels(struct cbq_class *this)
1080 {
1081         if (this == NULL)
1082                 return;
1083
1084         do {
1085                 int level = 0;
1086                 struct cbq_class *cl;
1087
1088                 if ((cl = this->children) != NULL) {
1089                         do {
1090                                 if (cl->level > level)
1091                                         level = cl->level;
1092                         } while ((cl = cl->sibling) != this->children);
1093                 }
1094                 this->level = level+1;
1095         } while ((this = this->tparent) != NULL);
1096 }
1097
1098 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1099 {
1100         struct cbq_class *cl;
1101         unsigned h;
1102
1103         if (q->quanta[prio] == 0)
1104                 return;
1105
1106         for (h=0; h<16; h++) {
1107                 for (cl = q->classes[h]; cl; cl = cl->next) {
1108                         /* BUGGGG... Beware! This expression suffer of
1109                            arithmetic overflows!
1110                          */
1111                         if (cl->priority == prio) {
1112                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1113                                         q->quanta[prio];
1114                         }
1115                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1116                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1117                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1118                         }
1119                 }
1120         }
1121 }
1122
1123 static void cbq_sync_defmap(struct cbq_class *cl)
1124 {
1125         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1126         struct cbq_class *split = cl->split;
1127         unsigned h;
1128         int i;
1129
1130         if (split == NULL)
1131                 return;
1132
1133         for (i=0; i<=TC_PRIO_MAX; i++) {
1134                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1135                         split->defaults[i] = NULL;
1136         }
1137
1138         for (i=0; i<=TC_PRIO_MAX; i++) {
1139                 int level = split->level;
1140
1141                 if (split->defaults[i])
1142                         continue;
1143
1144                 for (h=0; h<16; h++) {
1145                         struct cbq_class *c;
1146
1147                         for (c = q->classes[h]; c; c = c->next) {
1148                                 if (c->split == split && c->level < level &&
1149                                     c->defmap&(1<<i)) {
1150                                         split->defaults[i] = c;
1151                                         level = c->level;
1152                                 }
1153                         }
1154                 }
1155         }
1156 }
1157
1158 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1159 {
1160         struct cbq_class *split = NULL;
1161
1162         if (splitid == 0) {
1163                 if ((split = cl->split) == NULL)
1164                         return;
1165                 splitid = split->classid;
1166         }
1167
1168         if (split == NULL || split->classid != splitid) {
1169                 for (split = cl->tparent; split; split = split->tparent)
1170                         if (split->classid == splitid)
1171                                 break;
1172         }
1173
1174         if (split == NULL)
1175                 return;
1176
1177         if (cl->split != split) {
1178                 cl->defmap = 0;
1179                 cbq_sync_defmap(cl);
1180                 cl->split = split;
1181                 cl->defmap = def&mask;
1182         } else
1183                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1184
1185         cbq_sync_defmap(cl);
1186 }
1187
1188 static void cbq_unlink_class(struct cbq_class *this)
1189 {
1190         struct cbq_class *cl, **clp;
1191         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1192
1193         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1194                 if (cl == this) {
1195                         *clp = cl->next;
1196                         cl->next = NULL;
1197                         break;
1198                 }
1199         }
1200
1201         if (this->tparent) {
1202                 clp=&this->sibling;
1203                 cl = *clp;
1204                 do {
1205                         if (cl == this) {
1206                                 *clp = cl->sibling;
1207                                 break;
1208                         }
1209                         clp = &cl->sibling;
1210                 } while ((cl = *clp) != this->sibling);
1211
1212                 if (this->tparent->children == this) {
1213                         this->tparent->children = this->sibling;
1214                         if (this->sibling == this)
1215                                 this->tparent->children = NULL;
1216                 }
1217         } else {
1218                 BUG_TRAP(this->sibling == this);
1219         }
1220 }
1221
1222 static void cbq_link_class(struct cbq_class *this)
1223 {
1224         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1225         unsigned h = cbq_hash(this->classid);
1226         struct cbq_class *parent = this->tparent;
1227
1228         this->sibling = this;
1229         this->next = q->classes[h];
1230         q->classes[h] = this;
1231
1232         if (parent == NULL)
1233                 return;
1234
1235         if (parent->children == NULL) {
1236                 parent->children = this;
1237         } else {
1238                 this->sibling = parent->children->sibling;
1239                 parent->children->sibling = this;
1240         }
1241 }
1242
1243 static unsigned int cbq_drop(struct Qdisc* sch)
1244 {
1245         struct cbq_sched_data *q = qdisc_priv(sch);
1246         struct cbq_class *cl, *cl_head;
1247         int prio;
1248         unsigned int len;
1249
1250         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1251                 if ((cl_head = q->active[prio]) == NULL)
1252                         continue;
1253
1254                 cl = cl_head;
1255                 do {
1256                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1257                                 sch->q.qlen--;
1258                                 if (!cl->q->q.qlen)
1259                                         cbq_deactivate_class(cl);
1260                                 return len;
1261                         }
1262                 } while ((cl = cl->next_alive) != cl_head);
1263         }
1264         return 0;
1265 }
1266
1267 static void
1268 cbq_reset(struct Qdisc* sch)
1269 {
1270         struct cbq_sched_data *q = qdisc_priv(sch);
1271         struct cbq_class *cl;
1272         int prio;
1273         unsigned h;
1274
1275         q->activemask = 0;
1276         q->pmask = 0;
1277         q->tx_class = NULL;
1278         q->tx_borrowed = NULL;
1279         qdisc_watchdog_cancel(&q->watchdog);
1280         hrtimer_cancel(&q->delay_timer);
1281         q->toplevel = TC_CBQ_MAXLEVEL;
1282         PSCHED_GET_TIME(q->now);
1283         q->now_rt = q->now;
1284
1285         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1286                 q->active[prio] = NULL;
1287
1288         for (h = 0; h < 16; h++) {
1289                 for (cl = q->classes[h]; cl; cl = cl->next) {
1290                         qdisc_reset(cl->q);
1291
1292                         cl->next_alive = NULL;
1293                         PSCHED_SET_PASTPERFECT(cl->undertime);
1294                         cl->avgidle = cl->maxidle;
1295                         cl->deficit = cl->quantum;
1296                         cl->cpriority = cl->priority;
1297                 }
1298         }
1299         sch->q.qlen = 0;
1300 }
1301
1302
1303 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1304 {
1305         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1306                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1307                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1308         }
1309         if (lss->change&TCF_CBQ_LSS_EWMA)
1310                 cl->ewma_log = lss->ewma_log;
1311         if (lss->change&TCF_CBQ_LSS_AVPKT)
1312                 cl->avpkt = lss->avpkt;
1313         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1314                 cl->minidle = -(long)lss->minidle;
1315         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1316                 cl->maxidle = lss->maxidle;
1317                 cl->avgidle = lss->maxidle;
1318         }
1319         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1320                 cl->offtime = lss->offtime;
1321         return 0;
1322 }
1323
1324 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1325 {
1326         q->nclasses[cl->priority]--;
1327         q->quanta[cl->priority] -= cl->weight;
1328         cbq_normalize_quanta(q, cl->priority);
1329 }
1330
1331 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1332 {
1333         q->nclasses[cl->priority]++;
1334         q->quanta[cl->priority] += cl->weight;
1335         cbq_normalize_quanta(q, cl->priority);
1336 }
1337
1338 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1339 {
1340         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1341
1342         if (wrr->allot)
1343                 cl->allot = wrr->allot;
1344         if (wrr->weight)
1345                 cl->weight = wrr->weight;
1346         if (wrr->priority) {
1347                 cl->priority = wrr->priority-1;
1348                 cl->cpriority = cl->priority;
1349                 if (cl->priority >= cl->priority2)
1350                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1351         }
1352
1353         cbq_addprio(q, cl);
1354         return 0;
1355 }
1356
1357 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1358 {
1359         switch (ovl->strategy) {
1360         case TC_CBQ_OVL_CLASSIC:
1361                 cl->overlimit = cbq_ovl_classic;
1362                 break;
1363         case TC_CBQ_OVL_DELAY:
1364                 cl->overlimit = cbq_ovl_delay;
1365                 break;
1366         case TC_CBQ_OVL_LOWPRIO:
1367                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1368                     ovl->priority2-1 <= cl->priority)
1369                         return -EINVAL;
1370                 cl->priority2 = ovl->priority2-1;
1371                 cl->overlimit = cbq_ovl_lowprio;
1372                 break;
1373         case TC_CBQ_OVL_DROP:
1374                 cl->overlimit = cbq_ovl_drop;
1375                 break;
1376         case TC_CBQ_OVL_RCLASSIC:
1377                 cl->overlimit = cbq_ovl_rclassic;
1378                 break;
1379         default:
1380                 return -EINVAL;
1381         }
1382         cl->penalty = ovl->penalty;
1383         return 0;
1384 }
1385
1386 #ifdef CONFIG_NET_CLS_POLICE
1387 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1388 {
1389         cl->police = p->police;
1390
1391         if (cl->q->handle) {
1392                 if (p->police == TC_POLICE_RECLASSIFY)
1393                         cl->q->reshape_fail = cbq_reshape_fail;
1394                 else
1395                         cl->q->reshape_fail = NULL;
1396         }
1397         return 0;
1398 }
1399 #endif
1400
1401 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1402 {
1403         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1404         return 0;
1405 }
1406
1407 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1408 {
1409         struct cbq_sched_data *q = qdisc_priv(sch);
1410         struct rtattr *tb[TCA_CBQ_MAX];
1411         struct tc_ratespec *r;
1412
1413         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1414             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1415             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1416                 return -EINVAL;
1417
1418         if (tb[TCA_CBQ_LSSOPT-1] &&
1419             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1420                 return -EINVAL;
1421
1422         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1423
1424         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1425                 return -EINVAL;
1426
1427         q->link.refcnt = 1;
1428         q->link.sibling = &q->link;
1429         q->link.classid = sch->handle;
1430         q->link.qdisc = sch;
1431         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1432                                             sch->handle)))
1433                 q->link.q = &noop_qdisc;
1434
1435         q->link.priority = TC_CBQ_MAXPRIO-1;
1436         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1437         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1438         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1439         q->link.overlimit = cbq_ovl_classic;
1440         q->link.allot = psched_mtu(sch->dev);
1441         q->link.quantum = q->link.allot;
1442         q->link.weight = q->link.R_tab->rate.rate;
1443
1444         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1445         q->link.avpkt = q->link.allot/2;
1446         q->link.minidle = -0x7FFFFFFF;
1447         q->link.stats_lock = &sch->dev->queue_lock;
1448
1449         qdisc_watchdog_init(&q->watchdog, sch);
1450         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1451         q->delay_timer.function = cbq_undelay;
1452         q->toplevel = TC_CBQ_MAXLEVEL;
1453         PSCHED_GET_TIME(q->now);
1454         q->now_rt = q->now;
1455
1456         cbq_link_class(&q->link);
1457
1458         if (tb[TCA_CBQ_LSSOPT-1])
1459                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1460
1461         cbq_addprio(q, &q->link);
1462         return 0;
1463 }
1464
1465 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1466 {
1467         unsigned char *b = skb_tail_pointer(skb);
1468
1469         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1470         return skb->len;
1471
1472 rtattr_failure:
1473         nlmsg_trim(skb, b);
1474         return -1;
1475 }
1476
1477 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1478 {
1479         unsigned char *b = skb_tail_pointer(skb);
1480         struct tc_cbq_lssopt opt;
1481
1482         opt.flags = 0;
1483         if (cl->borrow == NULL)
1484                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1485         if (cl->share == NULL)
1486                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1487         opt.ewma_log = cl->ewma_log;
1488         opt.level = cl->level;
1489         opt.avpkt = cl->avpkt;
1490         opt.maxidle = cl->maxidle;
1491         opt.minidle = (u32)(-cl->minidle);
1492         opt.offtime = cl->offtime;
1493         opt.change = ~0;
1494         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1495         return skb->len;
1496
1497 rtattr_failure:
1498         nlmsg_trim(skb, b);
1499         return -1;
1500 }
1501
1502 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1503 {
1504         unsigned char *b = skb_tail_pointer(skb);
1505         struct tc_cbq_wrropt opt;
1506
1507         opt.flags = 0;
1508         opt.allot = cl->allot;
1509         opt.priority = cl->priority+1;
1510         opt.cpriority = cl->cpriority+1;
1511         opt.weight = cl->weight;
1512         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1513         return skb->len;
1514
1515 rtattr_failure:
1516         nlmsg_trim(skb, b);
1517         return -1;
1518 }
1519
1520 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1521 {
1522         unsigned char *b = skb_tail_pointer(skb);
1523         struct tc_cbq_ovl opt;
1524
1525         opt.strategy = cl->ovl_strategy;
1526         opt.priority2 = cl->priority2+1;
1527         opt.pad = 0;
1528         opt.penalty = cl->penalty;
1529         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1530         return skb->len;
1531
1532 rtattr_failure:
1533         nlmsg_trim(skb, b);
1534         return -1;
1535 }
1536
1537 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1538 {
1539         unsigned char *b = skb_tail_pointer(skb);
1540         struct tc_cbq_fopt opt;
1541
1542         if (cl->split || cl->defmap) {
1543                 opt.split = cl->split ? cl->split->classid : 0;
1544                 opt.defmap = cl->defmap;
1545                 opt.defchange = ~0;
1546                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1547         }
1548         return skb->len;
1549
1550 rtattr_failure:
1551         nlmsg_trim(skb, b);
1552         return -1;
1553 }
1554
1555 #ifdef CONFIG_NET_CLS_POLICE
1556 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1557 {
1558         unsigned char *b = skb_tail_pointer(skb);
1559         struct tc_cbq_police opt;
1560
1561         if (cl->police) {
1562                 opt.police = cl->police;
1563                 opt.__res1 = 0;
1564                 opt.__res2 = 0;
1565                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1566         }
1567         return skb->len;
1568
1569 rtattr_failure:
1570         nlmsg_trim(skb, b);
1571         return -1;
1572 }
1573 #endif
1574
1575 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1576 {
1577         if (cbq_dump_lss(skb, cl) < 0 ||
1578             cbq_dump_rate(skb, cl) < 0 ||
1579             cbq_dump_wrr(skb, cl) < 0 ||
1580             cbq_dump_ovl(skb, cl) < 0 ||
1581 #ifdef CONFIG_NET_CLS_POLICE
1582             cbq_dump_police(skb, cl) < 0 ||
1583 #endif
1584             cbq_dump_fopt(skb, cl) < 0)
1585                 return -1;
1586         return 0;
1587 }
1588
1589 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1590 {
1591         struct cbq_sched_data *q = qdisc_priv(sch);
1592         unsigned char *b = skb_tail_pointer(skb);
1593         struct rtattr *rta;
1594
1595         rta = (struct rtattr*)b;
1596         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1597         if (cbq_dump_attr(skb, &q->link) < 0)
1598                 goto rtattr_failure;
1599         rta->rta_len = skb_tail_pointer(skb) - b;
1600         return skb->len;
1601
1602 rtattr_failure:
1603         nlmsg_trim(skb, b);
1604         return -1;
1605 }
1606
1607 static int
1608 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1609 {
1610         struct cbq_sched_data *q = qdisc_priv(sch);
1611
1612         q->link.xstats.avgidle = q->link.avgidle;
1613         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1614 }
1615
1616 static int
1617 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1618                struct sk_buff *skb, struct tcmsg *tcm)
1619 {
1620         struct cbq_class *cl = (struct cbq_class*)arg;
1621         unsigned char *b = skb_tail_pointer(skb);
1622         struct rtattr *rta;
1623
1624         if (cl->tparent)
1625                 tcm->tcm_parent = cl->tparent->classid;
1626         else
1627                 tcm->tcm_parent = TC_H_ROOT;
1628         tcm->tcm_handle = cl->classid;
1629         tcm->tcm_info = cl->q->handle;
1630
1631         rta = (struct rtattr*)b;
1632         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1633         if (cbq_dump_attr(skb, cl) < 0)
1634                 goto rtattr_failure;
1635         rta->rta_len = skb_tail_pointer(skb) - b;
1636         return skb->len;
1637
1638 rtattr_failure:
1639         nlmsg_trim(skb, b);
1640         return -1;
1641 }
1642
1643 static int
1644 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1645         struct gnet_dump *d)
1646 {
1647         struct cbq_sched_data *q = qdisc_priv(sch);
1648         struct cbq_class *cl = (struct cbq_class*)arg;
1649
1650         cl->qstats.qlen = cl->q->q.qlen;
1651         cl->xstats.avgidle = cl->avgidle;
1652         cl->xstats.undertime = 0;
1653
1654         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1655                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1656
1657         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1658 #ifdef CONFIG_NET_ESTIMATOR
1659             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1660 #endif
1661             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1662                 return -1;
1663
1664         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1665 }
1666
1667 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1668                      struct Qdisc **old)
1669 {
1670         struct cbq_class *cl = (struct cbq_class*)arg;
1671
1672         if (cl) {
1673                 if (new == NULL) {
1674                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1675                                                      cl->classid)) == NULL)
1676                                 return -ENOBUFS;
1677                 } else {
1678 #ifdef CONFIG_NET_CLS_POLICE
1679                         if (cl->police == TC_POLICE_RECLASSIFY)
1680                                 new->reshape_fail = cbq_reshape_fail;
1681 #endif
1682                 }
1683                 sch_tree_lock(sch);
1684                 *old = xchg(&cl->q, new);
1685                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1686                 qdisc_reset(*old);
1687                 sch_tree_unlock(sch);
1688
1689                 return 0;
1690         }
1691         return -ENOENT;
1692 }
1693
1694 static struct Qdisc *
1695 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1696 {
1697         struct cbq_class *cl = (struct cbq_class*)arg;
1698
1699         return cl ? cl->q : NULL;
1700 }
1701
1702 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1703 {
1704         struct cbq_class *cl = (struct cbq_class *)arg;
1705
1706         if (cl->q->q.qlen == 0)
1707                 cbq_deactivate_class(cl);
1708 }
1709
1710 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1711 {
1712         struct cbq_sched_data *q = qdisc_priv(sch);
1713         struct cbq_class *cl = cbq_class_lookup(q, classid);
1714
1715         if (cl) {
1716                 cl->refcnt++;
1717                 return (unsigned long)cl;
1718         }
1719         return 0;
1720 }
1721
1722 static void cbq_destroy_filters(struct cbq_class *cl)
1723 {
1724         struct tcf_proto *tp;
1725
1726         while ((tp = cl->filter_list) != NULL) {
1727                 cl->filter_list = tp->next;
1728                 tcf_destroy(tp);
1729         }
1730 }
1731
1732 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1733 {
1734         struct cbq_sched_data *q = qdisc_priv(sch);
1735
1736         BUG_TRAP(!cl->filters);
1737
1738         cbq_destroy_filters(cl);
1739         qdisc_destroy(cl->q);
1740         qdisc_put_rtab(cl->R_tab);
1741 #ifdef CONFIG_NET_ESTIMATOR
1742         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1743 #endif
1744         if (cl != &q->link)
1745                 kfree(cl);
1746 }
1747
1748 static void
1749 cbq_destroy(struct Qdisc* sch)
1750 {
1751         struct cbq_sched_data *q = qdisc_priv(sch);
1752         struct cbq_class *cl;
1753         unsigned h;
1754
1755 #ifdef CONFIG_NET_CLS_POLICE
1756         q->rx_class = NULL;
1757 #endif
1758         /*
1759          * Filters must be destroyed first because we don't destroy the
1760          * classes from root to leafs which means that filters can still
1761          * be bound to classes which have been destroyed already. --TGR '04
1762          */
1763         for (h = 0; h < 16; h++)
1764                 for (cl = q->classes[h]; cl; cl = cl->next)
1765                         cbq_destroy_filters(cl);
1766
1767         for (h = 0; h < 16; h++) {
1768                 struct cbq_class *next;
1769
1770                 for (cl = q->classes[h]; cl; cl = next) {
1771                         next = cl->next;
1772                         cbq_destroy_class(sch, cl);
1773                 }
1774         }
1775 }
1776
1777 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1778 {
1779         struct cbq_class *cl = (struct cbq_class*)arg;
1780
1781         if (--cl->refcnt == 0) {
1782 #ifdef CONFIG_NET_CLS_POLICE
1783                 struct cbq_sched_data *q = qdisc_priv(sch);
1784
1785                 spin_lock_bh(&sch->dev->queue_lock);
1786                 if (q->rx_class == cl)
1787                         q->rx_class = NULL;
1788                 spin_unlock_bh(&sch->dev->queue_lock);
1789 #endif
1790
1791                 cbq_destroy_class(sch, cl);
1792         }
1793 }
1794
1795 static int
1796 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1797                  unsigned long *arg)
1798 {
1799         int err;
1800         struct cbq_sched_data *q = qdisc_priv(sch);
1801         struct cbq_class *cl = (struct cbq_class*)*arg;
1802         struct rtattr *opt = tca[TCA_OPTIONS-1];
1803         struct rtattr *tb[TCA_CBQ_MAX];
1804         struct cbq_class *parent;
1805         struct qdisc_rate_table *rtab = NULL;
1806
1807         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1808                 return -EINVAL;
1809
1810         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1811             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1812                 return -EINVAL;
1813
1814         if (tb[TCA_CBQ_FOPT-1] &&
1815             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1816                 return -EINVAL;
1817
1818         if (tb[TCA_CBQ_RATE-1] &&
1819             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1820                         return -EINVAL;
1821
1822         if (tb[TCA_CBQ_LSSOPT-1] &&
1823             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1824                         return -EINVAL;
1825
1826         if (tb[TCA_CBQ_WRROPT-1] &&
1827             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1828                         return -EINVAL;
1829
1830 #ifdef CONFIG_NET_CLS_POLICE
1831         if (tb[TCA_CBQ_POLICE-1] &&
1832             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1833                         return -EINVAL;
1834 #endif
1835
1836         if (cl) {
1837                 /* Check parent */
1838                 if (parentid) {
1839                         if (cl->tparent && cl->tparent->classid != parentid)
1840                                 return -EINVAL;
1841                         if (!cl->tparent && parentid != TC_H_ROOT)
1842                                 return -EINVAL;
1843                 }
1844
1845                 if (tb[TCA_CBQ_RATE-1]) {
1846                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1847                         if (rtab == NULL)
1848                                 return -EINVAL;
1849                 }
1850
1851                 /* Change class parameters */
1852                 sch_tree_lock(sch);
1853
1854                 if (cl->next_alive != NULL)
1855                         cbq_deactivate_class(cl);
1856
1857                 if (rtab) {
1858                         rtab = xchg(&cl->R_tab, rtab);
1859                         qdisc_put_rtab(rtab);
1860                 }
1861
1862                 if (tb[TCA_CBQ_LSSOPT-1])
1863                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1864
1865                 if (tb[TCA_CBQ_WRROPT-1]) {
1866                         cbq_rmprio(q, cl);
1867                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1868                 }
1869
1870                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1871                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1872
1873 #ifdef CONFIG_NET_CLS_POLICE
1874                 if (tb[TCA_CBQ_POLICE-1])
1875                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1876 #endif
1877
1878                 if (tb[TCA_CBQ_FOPT-1])
1879                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1880
1881                 if (cl->q->q.qlen)
1882                         cbq_activate_class(cl);
1883
1884                 sch_tree_unlock(sch);
1885
1886 #ifdef CONFIG_NET_ESTIMATOR
1887                 if (tca[TCA_RATE-1])
1888                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1889                                 cl->stats_lock, tca[TCA_RATE-1]);
1890 #endif
1891                 return 0;
1892         }
1893
1894         if (parentid == TC_H_ROOT)
1895                 return -EINVAL;
1896
1897         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1898             tb[TCA_CBQ_LSSOPT-1] == NULL)
1899                 return -EINVAL;
1900
1901         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1902         if (rtab == NULL)
1903                 return -EINVAL;
1904
1905         if (classid) {
1906                 err = -EINVAL;
1907                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1908                         goto failure;
1909         } else {
1910                 int i;
1911                 classid = TC_H_MAKE(sch->handle,0x8000);
1912
1913                 for (i=0; i<0x8000; i++) {
1914                         if (++q->hgenerator >= 0x8000)
1915                                 q->hgenerator = 1;
1916                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1917                                 break;
1918                 }
1919                 err = -ENOSR;
1920                 if (i >= 0x8000)
1921                         goto failure;
1922                 classid = classid|q->hgenerator;
1923         }
1924
1925         parent = &q->link;
1926         if (parentid) {
1927                 parent = cbq_class_lookup(q, parentid);
1928                 err = -EINVAL;
1929                 if (parent == NULL)
1930                         goto failure;
1931         }
1932
1933         err = -ENOBUFS;
1934         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1935         if (cl == NULL)
1936                 goto failure;
1937         cl->R_tab = rtab;
1938         rtab = NULL;
1939         cl->refcnt = 1;
1940         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1941                 cl->q = &noop_qdisc;
1942         cl->classid = classid;
1943         cl->tparent = parent;
1944         cl->qdisc = sch;
1945         cl->allot = parent->allot;
1946         cl->quantum = cl->allot;
1947         cl->weight = cl->R_tab->rate.rate;
1948         cl->stats_lock = &sch->dev->queue_lock;
1949
1950         sch_tree_lock(sch);
1951         cbq_link_class(cl);
1952         cl->borrow = cl->tparent;
1953         if (cl->tparent != &q->link)
1954                 cl->share = cl->tparent;
1955         cbq_adjust_levels(parent);
1956         cl->minidle = -0x7FFFFFFF;
1957         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1958         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1959         if (cl->ewma_log==0)
1960                 cl->ewma_log = q->link.ewma_log;
1961         if (cl->maxidle==0)
1962                 cl->maxidle = q->link.maxidle;
1963         if (cl->avpkt==0)
1964                 cl->avpkt = q->link.avpkt;
1965         cl->overlimit = cbq_ovl_classic;
1966         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1967                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1968 #ifdef CONFIG_NET_CLS_POLICE
1969         if (tb[TCA_CBQ_POLICE-1])
1970                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1971 #endif
1972         if (tb[TCA_CBQ_FOPT-1])
1973                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1974         sch_tree_unlock(sch);
1975
1976 #ifdef CONFIG_NET_ESTIMATOR
1977         if (tca[TCA_RATE-1])
1978                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1979                         cl->stats_lock, tca[TCA_RATE-1]);
1980 #endif
1981
1982         *arg = (unsigned long)cl;
1983         return 0;
1984
1985 failure:
1986         qdisc_put_rtab(rtab);
1987         return err;
1988 }
1989
1990 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1991 {
1992         struct cbq_sched_data *q = qdisc_priv(sch);
1993         struct cbq_class *cl = (struct cbq_class*)arg;
1994         unsigned int qlen;
1995
1996         if (cl->filters || cl->children || cl == &q->link)
1997                 return -EBUSY;
1998
1999         sch_tree_lock(sch);
2000
2001         qlen = cl->q->q.qlen;
2002         qdisc_reset(cl->q);
2003         qdisc_tree_decrease_qlen(cl->q, qlen);
2004
2005         if (cl->next_alive)
2006                 cbq_deactivate_class(cl);
2007
2008         if (q->tx_borrowed == cl)
2009                 q->tx_borrowed = q->tx_class;
2010         if (q->tx_class == cl) {
2011                 q->tx_class = NULL;
2012                 q->tx_borrowed = NULL;
2013         }
2014 #ifdef CONFIG_NET_CLS_POLICE
2015         if (q->rx_class == cl)
2016                 q->rx_class = NULL;
2017 #endif
2018
2019         cbq_unlink_class(cl);
2020         cbq_adjust_levels(cl->tparent);
2021         cl->defmap = 0;
2022         cbq_sync_defmap(cl);
2023
2024         cbq_rmprio(q, cl);
2025         sch_tree_unlock(sch);
2026
2027         if (--cl->refcnt == 0)
2028                 cbq_destroy_class(sch, cl);
2029
2030         return 0;
2031 }
2032
2033 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2034 {
2035         struct cbq_sched_data *q = qdisc_priv(sch);
2036         struct cbq_class *cl = (struct cbq_class *)arg;
2037
2038         if (cl == NULL)
2039                 cl = &q->link;
2040
2041         return &cl->filter_list;
2042 }
2043
2044 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2045                                      u32 classid)
2046 {
2047         struct cbq_sched_data *q = qdisc_priv(sch);
2048         struct cbq_class *p = (struct cbq_class*)parent;
2049         struct cbq_class *cl = cbq_class_lookup(q, classid);
2050
2051         if (cl) {
2052                 if (p && p->level <= cl->level)
2053                         return 0;
2054                 cl->filters++;
2055                 return (unsigned long)cl;
2056         }
2057         return 0;
2058 }
2059
2060 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2061 {
2062         struct cbq_class *cl = (struct cbq_class*)arg;
2063
2064         cl->filters--;
2065 }
2066
2067 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2068 {
2069         struct cbq_sched_data *q = qdisc_priv(sch);
2070         unsigned h;
2071
2072         if (arg->stop)
2073                 return;
2074
2075         for (h = 0; h < 16; h++) {
2076                 struct cbq_class *cl;
2077
2078                 for (cl = q->classes[h]; cl; cl = cl->next) {
2079                         if (arg->count < arg->skip) {
2080                                 arg->count++;
2081                                 continue;
2082                         }
2083                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2084                                 arg->stop = 1;
2085                                 return;
2086                         }
2087                         arg->count++;
2088                 }
2089         }
2090 }
2091
2092 static struct Qdisc_class_ops cbq_class_ops = {
2093         .graft          =       cbq_graft,
2094         .leaf           =       cbq_leaf,
2095         .qlen_notify    =       cbq_qlen_notify,
2096         .get            =       cbq_get,
2097         .put            =       cbq_put,
2098         .change         =       cbq_change_class,
2099         .delete         =       cbq_delete,
2100         .walk           =       cbq_walk,
2101         .tcf_chain      =       cbq_find_tcf,
2102         .bind_tcf       =       cbq_bind_filter,
2103         .unbind_tcf     =       cbq_unbind_filter,
2104         .dump           =       cbq_dump_class,
2105         .dump_stats     =       cbq_dump_class_stats,
2106 };
2107
2108 static struct Qdisc_ops cbq_qdisc_ops = {
2109         .next           =       NULL,
2110         .cl_ops         =       &cbq_class_ops,
2111         .id             =       "cbq",
2112         .priv_size      =       sizeof(struct cbq_sched_data),
2113         .enqueue        =       cbq_enqueue,
2114         .dequeue        =       cbq_dequeue,
2115         .requeue        =       cbq_requeue,
2116         .drop           =       cbq_drop,
2117         .init           =       cbq_init,
2118         .reset          =       cbq_reset,
2119         .destroy        =       cbq_destroy,
2120         .change         =       NULL,
2121         .dump           =       cbq_dump,
2122         .dump_stats     =       cbq_dump_stats,
2123         .owner          =       THIS_MODULE,
2124 };
2125
2126 static int __init cbq_module_init(void)
2127 {
2128         return register_qdisc(&cbq_qdisc_ops);
2129 }
2130 static void __exit cbq_module_exit(void)
2131 {
2132         unregister_qdisc(&cbq_qdisc_ops);
2133 }
2134 module_init(cbq_module_init)
2135 module_exit(cbq_module_exit)
2136 MODULE_LICENSE("GPL");