[NET_SCHED]: Remove unnecessary stats_lock pointers
[safe/jmp/linux-2.6] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/string.h>
20 #include <linux/mm.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
23 #include <linux/in.h>
24 #include <linux/errno.h>
25 #include <linux/interrupt.h>
26 #include <linux/if_ether.h>
27 #include <linux/inet.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/notifier.h>
31 #include <net/ip.h>
32 #include <net/netlink.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
35 #include <net/sock.h>
36 #include <net/pkt_sched.h>
37
38
39 /*      Class-Based Queueing (CBQ) algorithm.
40         =======================================
41
42         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
43                  Management Models for Packet Networks",
44                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45
46                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
47
48                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
49                  Parameters", 1996
50
51                  [4] Sally Floyd and Michael Speer, "Experimental Results
52                  for Class-Based Queueing", 1998, not published.
53
54         -----------------------------------------------------------------------
55
56         Algorithm skeleton was taken from NS simulator cbq.cc.
57         If someone wants to check this code against the LBL version,
58         he should take into account that ONLY the skeleton was borrowed,
59         the implementation is different. Particularly:
60
61         --- The WRR algorithm is different. Our version looks more
62         reasonable (I hope) and works when quanta are allowed to be
63         less than MTU, which is always the case when real time classes
64         have small rates. Note, that the statement of [3] is
65         incomplete, delay may actually be estimated even if class
66         per-round allotment is less than MTU. Namely, if per-round
67         allotment is W*r_i, and r_1+...+r_k = r < 1
68
69         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70
71         In the worst case we have IntServ estimate with D = W*r+k*MTU
72         and C = MTU*r. The proof (if correct at all) is trivial.
73
74
75         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76         interpret some places, which look like wrong translations
77         from NS. Anyone is advised to find these differences
78         and explain to me, why I am wrong 8).
79
80         --- Linux has no EOI event, so that we cannot estimate true class
81         idle time. Workaround is to consider the next dequeue event
82         as sign that previous packet is finished. This is wrong because of
83         internal device queueing, but on a permanently loaded link it is true.
84         Moreover, combined with clock integrator, this scheme looks
85         very close to an ideal solution.  */
86
87 struct cbq_sched_data;
88
89
90 struct cbq_class
91 {
92         struct cbq_class        *next;          /* hash table link */
93         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
94
95 /* Parameters */
96         u32                     classid;
97         unsigned char           priority;       /* class priority */
98         unsigned char           priority2;      /* priority to be used after overlimit */
99         unsigned char           ewma_log;       /* time constant for idle time calculation */
100         unsigned char           ovl_strategy;
101 #ifdef CONFIG_NET_CLS_POLICE
102         unsigned char           police;
103 #endif
104
105         u32                     defmap;
106
107         /* Link-sharing scheduler parameters */
108         long                    maxidle;        /* Class parameters: see below. */
109         long                    offtime;
110         long                    minidle;
111         u32                     avpkt;
112         struct qdisc_rate_table *R_tab;
113
114         /* Overlimit strategy parameters */
115         void                    (*overlimit)(struct cbq_class *cl);
116         psched_tdiff_t          penalty;
117
118         /* General scheduler (WRR) parameters */
119         long                    allot;
120         long                    quantum;        /* Allotment per WRR round */
121         long                    weight;         /* Relative allotment: see below */
122
123         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
124         struct cbq_class        *split;         /* Ptr to split node */
125         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
126         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
127         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
128                                                    parent otherwise */
129         struct cbq_class        *sibling;       /* Sibling chain */
130         struct cbq_class        *children;      /* Pointer to children chain */
131
132         struct Qdisc            *q;             /* Elementary queueing discipline */
133
134
135 /* Variables */
136         unsigned char           cpriority;      /* Effective priority */
137         unsigned char           delayed;
138         unsigned char           level;          /* level of the class in hierarchy:
139                                                    0 for leaf classes, and maximal
140                                                    level of children + 1 for nodes.
141                                                  */
142
143         psched_time_t           last;           /* Last end of service */
144         psched_time_t           undertime;
145         long                    avgidle;
146         long                    deficit;        /* Saved deficit for WRR */
147         psched_time_t           penalized;
148         struct gnet_stats_basic bstats;
149         struct gnet_stats_queue qstats;
150         struct gnet_stats_rate_est rate_est;
151         struct tc_cbq_xstats    xstats;
152
153         struct tcf_proto        *filter_list;
154
155         int                     refcnt;
156         int                     filters;
157
158         struct cbq_class        *defaults[TC_PRIO_MAX+1];
159 };
160
161 struct cbq_sched_data
162 {
163         struct cbq_class        *classes[16];           /* Hash table of all classes */
164         int                     nclasses[TC_CBQ_MAXPRIO+1];
165         unsigned                quanta[TC_CBQ_MAXPRIO+1];
166
167         struct cbq_class        link;
168
169         unsigned                activemask;
170         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
171                                                                    with backlog */
172
173 #ifdef CONFIG_NET_CLS_POLICE
174         struct cbq_class        *rx_class;
175 #endif
176         struct cbq_class        *tx_class;
177         struct cbq_class        *tx_borrowed;
178         int                     tx_len;
179         psched_time_t           now;            /* Cached timestamp */
180         psched_time_t           now_rt;         /* Cached real time */
181         unsigned                pmask;
182
183         struct hrtimer          delay_timer;
184         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
185                                                    started when CBQ has
186                                                    backlog, but cannot
187                                                    transmit just now */
188         psched_tdiff_t          wd_expires;
189         int                     toplevel;
190         u32                     hgenerator;
191 };
192
193
194 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
195
196
197 static __inline__ unsigned cbq_hash(u32 h)
198 {
199         h ^= h>>8;
200         h ^= h>>4;
201         return h&0xF;
202 }
203
204 static __inline__ struct cbq_class *
205 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
206 {
207         struct cbq_class *cl;
208
209         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
210                 if (cl->classid == classid)
211                         return cl;
212         return NULL;
213 }
214
215 #ifdef CONFIG_NET_CLS_POLICE
216
217 static struct cbq_class *
218 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
219 {
220         struct cbq_class *cl, *new;
221
222         for (cl = this->tparent; cl; cl = cl->tparent)
223                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
224                         return new;
225
226         return NULL;
227 }
228
229 #endif
230
231 /* Classify packet. The procedure is pretty complicated, but
232    it allows us to combine link sharing and priority scheduling
233    transparently.
234
235    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
236    so that it resolves to split nodes. Then packets are classified
237    by logical priority, or a more specific classifier may be attached
238    to the split node.
239  */
240
241 static struct cbq_class *
242 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
243 {
244         struct cbq_sched_data *q = qdisc_priv(sch);
245         struct cbq_class *head = &q->link;
246         struct cbq_class **defmap;
247         struct cbq_class *cl = NULL;
248         u32 prio = skb->priority;
249         struct tcf_result res;
250
251         /*
252          *  Step 1. If skb->priority points to one of our classes, use it.
253          */
254         if (TC_H_MAJ(prio^sch->handle) == 0 &&
255             (cl = cbq_class_lookup(q, prio)) != NULL)
256                 return cl;
257
258         *qerr = NET_XMIT_BYPASS;
259         for (;;) {
260                 int result = 0;
261                 defmap = head->defaults;
262
263                 /*
264                  * Step 2+n. Apply classifier.
265                  */
266                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
267                         goto fallback;
268
269                 if ((cl = (void*)res.class) == NULL) {
270                         if (TC_H_MAJ(res.classid))
271                                 cl = cbq_class_lookup(q, res.classid);
272                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
273                                 cl = defmap[TC_PRIO_BESTEFFORT];
274
275                         if (cl == NULL || cl->level >= head->level)
276                                 goto fallback;
277                 }
278
279 #ifdef CONFIG_NET_CLS_ACT
280                 switch (result) {
281                 case TC_ACT_QUEUED:
282                 case TC_ACT_STOLEN:
283                         *qerr = NET_XMIT_SUCCESS;
284                 case TC_ACT_SHOT:
285                         return NULL;
286                 }
287 #elif defined(CONFIG_NET_CLS_POLICE)
288                 switch (result) {
289                 case TC_POLICE_RECLASSIFY:
290                         return cbq_reclassify(skb, cl);
291                 case TC_POLICE_SHOT:
292                         return NULL;
293                 default:
294                         break;
295                 }
296 #endif
297                 if (cl->level == 0)
298                         return cl;
299
300                 /*
301                  * Step 3+n. If classifier selected a link sharing class,
302                  *         apply agency specific classifier.
303                  *         Repeat this procdure until we hit a leaf node.
304                  */
305                 head = cl;
306         }
307
308 fallback:
309         cl = head;
310
311         /*
312          * Step 4. No success...
313          */
314         if (TC_H_MAJ(prio) == 0 &&
315             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
316             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
317                 return head;
318
319         return cl;
320 }
321
322 /*
323    A packet has just been enqueued on the empty class.
324    cbq_activate_class adds it to the tail of active class list
325    of its priority band.
326  */
327
328 static __inline__ void cbq_activate_class(struct cbq_class *cl)
329 {
330         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
331         int prio = cl->cpriority;
332         struct cbq_class *cl_tail;
333
334         cl_tail = q->active[prio];
335         q->active[prio] = cl;
336
337         if (cl_tail != NULL) {
338                 cl->next_alive = cl_tail->next_alive;
339                 cl_tail->next_alive = cl;
340         } else {
341                 cl->next_alive = cl;
342                 q->activemask |= (1<<prio);
343         }
344 }
345
346 /*
347    Unlink class from active chain.
348    Note that this same procedure is done directly in cbq_dequeue*
349    during round-robin procedure.
350  */
351
352 static void cbq_deactivate_class(struct cbq_class *this)
353 {
354         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
355         int prio = this->cpriority;
356         struct cbq_class *cl;
357         struct cbq_class *cl_prev = q->active[prio];
358
359         do {
360                 cl = cl_prev->next_alive;
361                 if (cl == this) {
362                         cl_prev->next_alive = cl->next_alive;
363                         cl->next_alive = NULL;
364
365                         if (cl == q->active[prio]) {
366                                 q->active[prio] = cl_prev;
367                                 if (cl == q->active[prio]) {
368                                         q->active[prio] = NULL;
369                                         q->activemask &= ~(1<<prio);
370                                         return;
371                                 }
372                         }
373                         return;
374                 }
375         } while ((cl_prev = cl) != q->active[prio]);
376 }
377
378 static void
379 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
380 {
381         int toplevel = q->toplevel;
382
383         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
384                 psched_time_t now;
385                 psched_tdiff_t incr;
386
387                 now = psched_get_time();
388                 incr = now - q->now_rt;
389                 now = q->now + incr;
390
391                 do {
392                         if (cl->undertime < now) {
393                                 q->toplevel = cl->level;
394                                 return;
395                         }
396                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
397         }
398 }
399
400 static int
401 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
402 {
403         struct cbq_sched_data *q = qdisc_priv(sch);
404         int len = skb->len;
405         int ret;
406         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
407
408 #ifdef CONFIG_NET_CLS_POLICE
409         q->rx_class = cl;
410 #endif
411         if (cl == NULL) {
412                 if (ret == NET_XMIT_BYPASS)
413                         sch->qstats.drops++;
414                 kfree_skb(skb);
415                 return ret;
416         }
417
418 #ifdef CONFIG_NET_CLS_POLICE
419         cl->q->__parent = sch;
420 #endif
421         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
422                 sch->q.qlen++;
423                 sch->bstats.packets++;
424                 sch->bstats.bytes+=len;
425                 cbq_mark_toplevel(q, cl);
426                 if (!cl->next_alive)
427                         cbq_activate_class(cl);
428                 return ret;
429         }
430
431         sch->qstats.drops++;
432         cbq_mark_toplevel(q, cl);
433         cl->qstats.drops++;
434         return ret;
435 }
436
437 static int
438 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
439 {
440         struct cbq_sched_data *q = qdisc_priv(sch);
441         struct cbq_class *cl;
442         int ret;
443
444         if ((cl = q->tx_class) == NULL) {
445                 kfree_skb(skb);
446                 sch->qstats.drops++;
447                 return NET_XMIT_CN;
448         }
449         q->tx_class = NULL;
450
451         cbq_mark_toplevel(q, cl);
452
453 #ifdef CONFIG_NET_CLS_POLICE
454         q->rx_class = cl;
455         cl->q->__parent = sch;
456 #endif
457         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
458                 sch->q.qlen++;
459                 sch->qstats.requeues++;
460                 if (!cl->next_alive)
461                         cbq_activate_class(cl);
462                 return 0;
463         }
464         sch->qstats.drops++;
465         cl->qstats.drops++;
466         return ret;
467 }
468
469 /* Overlimit actions */
470
471 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
472
473 static void cbq_ovl_classic(struct cbq_class *cl)
474 {
475         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
476         psched_tdiff_t delay = cl->undertime - q->now;
477
478         if (!cl->delayed) {
479                 delay += cl->offtime;
480
481                 /*
482                    Class goes to sleep, so that it will have no
483                    chance to work avgidle. Let's forgive it 8)
484
485                    BTW cbq-2.0 has a crap in this
486                    place, apparently they forgot to shift it by cl->ewma_log.
487                  */
488                 if (cl->avgidle < 0)
489                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
490                 if (cl->avgidle < cl->minidle)
491                         cl->avgidle = cl->minidle;
492                 if (delay <= 0)
493                         delay = 1;
494                 cl->undertime = q->now + delay;
495
496                 cl->xstats.overactions++;
497                 cl->delayed = 1;
498         }
499         if (q->wd_expires == 0 || q->wd_expires > delay)
500                 q->wd_expires = delay;
501
502         /* Dirty work! We must schedule wakeups based on
503            real available rate, rather than leaf rate,
504            which may be tiny (even zero).
505          */
506         if (q->toplevel == TC_CBQ_MAXLEVEL) {
507                 struct cbq_class *b;
508                 psched_tdiff_t base_delay = q->wd_expires;
509
510                 for (b = cl->borrow; b; b = b->borrow) {
511                         delay = b->undertime - q->now;
512                         if (delay < base_delay) {
513                                 if (delay <= 0)
514                                         delay = 1;
515                                 base_delay = delay;
516                         }
517                 }
518
519                 q->wd_expires = base_delay;
520         }
521 }
522
523 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
524    they go overlimit
525  */
526
527 static void cbq_ovl_rclassic(struct cbq_class *cl)
528 {
529         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
530         struct cbq_class *this = cl;
531
532         do {
533                 if (cl->level > q->toplevel) {
534                         cl = NULL;
535                         break;
536                 }
537         } while ((cl = cl->borrow) != NULL);
538
539         if (cl == NULL)
540                 cl = this;
541         cbq_ovl_classic(cl);
542 }
543
544 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
545
546 static void cbq_ovl_delay(struct cbq_class *cl)
547 {
548         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
549         psched_tdiff_t delay = cl->undertime - q->now;
550
551         if (!cl->delayed) {
552                 psched_time_t sched = q->now;
553                 ktime_t expires;
554
555                 delay += cl->offtime;
556                 if (cl->avgidle < 0)
557                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
558                 if (cl->avgidle < cl->minidle)
559                         cl->avgidle = cl->minidle;
560                 cl->undertime = q->now + delay;
561
562                 if (delay > 0) {
563                         sched += delay + cl->penalty;
564                         cl->penalized = sched;
565                         cl->cpriority = TC_CBQ_MAXPRIO;
566                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
567
568                         expires = ktime_set(0, 0);
569                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
570                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
571                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
572                                                   expires)) > 0)
573                                 q->delay_timer.expires = expires;
574                         hrtimer_restart(&q->delay_timer);
575                         cl->delayed = 1;
576                         cl->xstats.overactions++;
577                         return;
578                 }
579                 delay = 1;
580         }
581         if (q->wd_expires == 0 || q->wd_expires > delay)
582                 q->wd_expires = delay;
583 }
584
585 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
586
587 static void cbq_ovl_lowprio(struct cbq_class *cl)
588 {
589         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
590
591         cl->penalized = q->now + cl->penalty;
592
593         if (cl->cpriority != cl->priority2) {
594                 cl->cpriority = cl->priority2;
595                 q->pmask |= (1<<cl->cpriority);
596                 cl->xstats.overactions++;
597         }
598         cbq_ovl_classic(cl);
599 }
600
601 /* TC_CBQ_OVL_DROP: penalize class by dropping */
602
603 static void cbq_ovl_drop(struct cbq_class *cl)
604 {
605         if (cl->q->ops->drop)
606                 if (cl->q->ops->drop(cl->q))
607                         cl->qdisc->q.qlen--;
608         cl->xstats.overactions++;
609         cbq_ovl_classic(cl);
610 }
611
612 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
613                                        psched_time_t now)
614 {
615         struct cbq_class *cl;
616         struct cbq_class *cl_prev = q->active[prio];
617         psched_time_t sched = now;
618
619         if (cl_prev == NULL)
620                 return 0;
621
622         do {
623                 cl = cl_prev->next_alive;
624                 if (now - cl->penalized > 0) {
625                         cl_prev->next_alive = cl->next_alive;
626                         cl->next_alive = NULL;
627                         cl->cpriority = cl->priority;
628                         cl->delayed = 0;
629                         cbq_activate_class(cl);
630
631                         if (cl == q->active[prio]) {
632                                 q->active[prio] = cl_prev;
633                                 if (cl == q->active[prio]) {
634                                         q->active[prio] = NULL;
635                                         return 0;
636                                 }
637                         }
638
639                         cl = cl_prev->next_alive;
640                 } else if (sched - cl->penalized > 0)
641                         sched = cl->penalized;
642         } while ((cl_prev = cl) != q->active[prio]);
643
644         return sched - now;
645 }
646
647 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
648 {
649         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
650                                                 delay_timer);
651         struct Qdisc *sch = q->watchdog.qdisc;
652         psched_time_t now;
653         psched_tdiff_t delay = 0;
654         unsigned pmask;
655
656         now = psched_get_time();
657
658         pmask = q->pmask;
659         q->pmask = 0;
660
661         while (pmask) {
662                 int prio = ffz(~pmask);
663                 psched_tdiff_t tmp;
664
665                 pmask &= ~(1<<prio);
666
667                 tmp = cbq_undelay_prio(q, prio, now);
668                 if (tmp > 0) {
669                         q->pmask |= 1<<prio;
670                         if (tmp < delay || delay == 0)
671                                 delay = tmp;
672                 }
673         }
674
675         if (delay) {
676                 ktime_t time;
677
678                 time = ktime_set(0, 0);
679                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
680                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
681         }
682
683         sch->flags &= ~TCQ_F_THROTTLED;
684         netif_schedule(sch->dev);
685         return HRTIMER_NORESTART;
686 }
687
688
689 #ifdef CONFIG_NET_CLS_POLICE
690
691 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
692 {
693         int len = skb->len;
694         struct Qdisc *sch = child->__parent;
695         struct cbq_sched_data *q = qdisc_priv(sch);
696         struct cbq_class *cl = q->rx_class;
697
698         q->rx_class = NULL;
699
700         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
701
702                 cbq_mark_toplevel(q, cl);
703
704                 q->rx_class = cl;
705                 cl->q->__parent = sch;
706
707                 if (cl->q->enqueue(skb, cl->q) == 0) {
708                         sch->q.qlen++;
709                         sch->bstats.packets++;
710                         sch->bstats.bytes+=len;
711                         if (!cl->next_alive)
712                                 cbq_activate_class(cl);
713                         return 0;
714                 }
715                 sch->qstats.drops++;
716                 return 0;
717         }
718
719         sch->qstats.drops++;
720         return -1;
721 }
722 #endif
723
724 /*
725    It is mission critical procedure.
726
727    We "regenerate" toplevel cutoff, if transmitting class
728    has backlog and it is not regulated. It is not part of
729    original CBQ description, but looks more reasonable.
730    Probably, it is wrong. This question needs further investigation.
731 */
732
733 static __inline__ void
734 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
735                     struct cbq_class *borrowed)
736 {
737         if (cl && q->toplevel >= borrowed->level) {
738                 if (cl->q->q.qlen > 1) {
739                         do {
740                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
741                                         q->toplevel = borrowed->level;
742                                         return;
743                                 }
744                         } while ((borrowed=borrowed->borrow) != NULL);
745                 }
746 #if 0
747         /* It is not necessary now. Uncommenting it
748            will save CPU cycles, but decrease fairness.
749          */
750                 q->toplevel = TC_CBQ_MAXLEVEL;
751 #endif
752         }
753 }
754
755 static void
756 cbq_update(struct cbq_sched_data *q)
757 {
758         struct cbq_class *this = q->tx_class;
759         struct cbq_class *cl = this;
760         int len = q->tx_len;
761
762         q->tx_class = NULL;
763
764         for ( ; cl; cl = cl->share) {
765                 long avgidle = cl->avgidle;
766                 long idle;
767
768                 cl->bstats.packets++;
769                 cl->bstats.bytes += len;
770
771                 /*
772                    (now - last) is total time between packet right edges.
773                    (last_pktlen/rate) is "virtual" busy time, so that
774
775                          idle = (now - last) - last_pktlen/rate
776                  */
777
778                 idle = q->now - cl->last;
779                 if ((unsigned long)idle > 128*1024*1024) {
780                         avgidle = cl->maxidle;
781                 } else {
782                         idle -= L2T(cl, len);
783
784                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
785                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
786                    cl->avgidle == true_avgidle/W,
787                    hence:
788                  */
789                         avgidle += idle - (avgidle>>cl->ewma_log);
790                 }
791
792                 if (avgidle <= 0) {
793                         /* Overlimit or at-limit */
794
795                         if (avgidle < cl->minidle)
796                                 avgidle = cl->minidle;
797
798                         cl->avgidle = avgidle;
799
800                         /* Calculate expected time, when this class
801                            will be allowed to send.
802                            It will occur, when:
803                            (1-W)*true_avgidle + W*delay = 0, i.e.
804                            idle = (1/W - 1)*(-true_avgidle)
805                            or
806                            idle = (1 - W)*(-cl->avgidle);
807                          */
808                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
809
810                         /*
811                            That is not all.
812                            To maintain the rate allocated to the class,
813                            we add to undertime virtual clock,
814                            necessary to complete transmitted packet.
815                            (len/phys_bandwidth has been already passed
816                            to the moment of cbq_update)
817                          */
818
819                         idle -= L2T(&q->link, len);
820                         idle += L2T(cl, len);
821
822                         cl->undertime = q->now + idle;
823                 } else {
824                         /* Underlimit */
825
826                         cl->undertime = PSCHED_PASTPERFECT;
827                         if (avgidle > cl->maxidle)
828                                 cl->avgidle = cl->maxidle;
829                         else
830                                 cl->avgidle = avgidle;
831                 }
832                 cl->last = q->now;
833         }
834
835         cbq_update_toplevel(q, this, q->tx_borrowed);
836 }
837
838 static __inline__ struct cbq_class *
839 cbq_under_limit(struct cbq_class *cl)
840 {
841         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
842         struct cbq_class *this_cl = cl;
843
844         if (cl->tparent == NULL)
845                 return cl;
846
847         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
848                 cl->delayed = 0;
849                 return cl;
850         }
851
852         do {
853                 /* It is very suspicious place. Now overlimit
854                    action is generated for not bounded classes
855                    only if link is completely congested.
856                    Though it is in agree with ancestor-only paradigm,
857                    it looks very stupid. Particularly,
858                    it means that this chunk of code will either
859                    never be called or result in strong amplification
860                    of burstiness. Dangerous, silly, and, however,
861                    no another solution exists.
862                  */
863                 if ((cl = cl->borrow) == NULL) {
864                         this_cl->qstats.overlimits++;
865                         this_cl->overlimit(this_cl);
866                         return NULL;
867                 }
868                 if (cl->level > q->toplevel)
869                         return NULL;
870         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
871
872         cl->delayed = 0;
873         return cl;
874 }
875
876 static __inline__ struct sk_buff *
877 cbq_dequeue_prio(struct Qdisc *sch, int prio)
878 {
879         struct cbq_sched_data *q = qdisc_priv(sch);
880         struct cbq_class *cl_tail, *cl_prev, *cl;
881         struct sk_buff *skb;
882         int deficit;
883
884         cl_tail = cl_prev = q->active[prio];
885         cl = cl_prev->next_alive;
886
887         do {
888                 deficit = 0;
889
890                 /* Start round */
891                 do {
892                         struct cbq_class *borrow = cl;
893
894                         if (cl->q->q.qlen &&
895                             (borrow = cbq_under_limit(cl)) == NULL)
896                                 goto skip_class;
897
898                         if (cl->deficit <= 0) {
899                                 /* Class exhausted its allotment per
900                                    this round. Switch to the next one.
901                                  */
902                                 deficit = 1;
903                                 cl->deficit += cl->quantum;
904                                 goto next_class;
905                         }
906
907                         skb = cl->q->dequeue(cl->q);
908
909                         /* Class did not give us any skb :-(
910                            It could occur even if cl->q->q.qlen != 0
911                            f.e. if cl->q == "tbf"
912                          */
913                         if (skb == NULL)
914                                 goto skip_class;
915
916                         cl->deficit -= skb->len;
917                         q->tx_class = cl;
918                         q->tx_borrowed = borrow;
919                         if (borrow != cl) {
920 #ifndef CBQ_XSTATS_BORROWS_BYTES
921                                 borrow->xstats.borrows++;
922                                 cl->xstats.borrows++;
923 #else
924                                 borrow->xstats.borrows += skb->len;
925                                 cl->xstats.borrows += skb->len;
926 #endif
927                         }
928                         q->tx_len = skb->len;
929
930                         if (cl->deficit <= 0) {
931                                 q->active[prio] = cl;
932                                 cl = cl->next_alive;
933                                 cl->deficit += cl->quantum;
934                         }
935                         return skb;
936
937 skip_class:
938                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
939                                 /* Class is empty or penalized.
940                                    Unlink it from active chain.
941                                  */
942                                 cl_prev->next_alive = cl->next_alive;
943                                 cl->next_alive = NULL;
944
945                                 /* Did cl_tail point to it? */
946                                 if (cl == cl_tail) {
947                                         /* Repair it! */
948                                         cl_tail = cl_prev;
949
950                                         /* Was it the last class in this band? */
951                                         if (cl == cl_tail) {
952                                                 /* Kill the band! */
953                                                 q->active[prio] = NULL;
954                                                 q->activemask &= ~(1<<prio);
955                                                 if (cl->q->q.qlen)
956                                                         cbq_activate_class(cl);
957                                                 return NULL;
958                                         }
959
960                                         q->active[prio] = cl_tail;
961                                 }
962                                 if (cl->q->q.qlen)
963                                         cbq_activate_class(cl);
964
965                                 cl = cl_prev;
966                         }
967
968 next_class:
969                         cl_prev = cl;
970                         cl = cl->next_alive;
971                 } while (cl_prev != cl_tail);
972         } while (deficit);
973
974         q->active[prio] = cl_prev;
975
976         return NULL;
977 }
978
979 static __inline__ struct sk_buff *
980 cbq_dequeue_1(struct Qdisc *sch)
981 {
982         struct cbq_sched_data *q = qdisc_priv(sch);
983         struct sk_buff *skb;
984         unsigned activemask;
985
986         activemask = q->activemask&0xFF;
987         while (activemask) {
988                 int prio = ffz(~activemask);
989                 activemask &= ~(1<<prio);
990                 skb = cbq_dequeue_prio(sch, prio);
991                 if (skb)
992                         return skb;
993         }
994         return NULL;
995 }
996
997 static struct sk_buff *
998 cbq_dequeue(struct Qdisc *sch)
999 {
1000         struct sk_buff *skb;
1001         struct cbq_sched_data *q = qdisc_priv(sch);
1002         psched_time_t now;
1003         psched_tdiff_t incr;
1004
1005         now = psched_get_time();
1006         incr = now - q->now_rt;
1007
1008         if (q->tx_class) {
1009                 psched_tdiff_t incr2;
1010                 /* Time integrator. We calculate EOS time
1011                    by adding expected packet transmission time.
1012                    If real time is greater, we warp artificial clock,
1013                    so that:
1014
1015                    cbq_time = max(real_time, work);
1016                  */
1017                 incr2 = L2T(&q->link, q->tx_len);
1018                 q->now += incr2;
1019                 cbq_update(q);
1020                 if ((incr -= incr2) < 0)
1021                         incr = 0;
1022         }
1023         q->now += incr;
1024         q->now_rt = now;
1025
1026         for (;;) {
1027                 q->wd_expires = 0;
1028
1029                 skb = cbq_dequeue_1(sch);
1030                 if (skb) {
1031                         sch->q.qlen--;
1032                         sch->flags &= ~TCQ_F_THROTTLED;
1033                         return skb;
1034                 }
1035
1036                 /* All the classes are overlimit.
1037
1038                    It is possible, if:
1039
1040                    1. Scheduler is empty.
1041                    2. Toplevel cutoff inhibited borrowing.
1042                    3. Root class is overlimit.
1043
1044                    Reset 2d and 3d conditions and retry.
1045
1046                    Note, that NS and cbq-2.0 are buggy, peeking
1047                    an arbitrary class is appropriate for ancestor-only
1048                    sharing, but not for toplevel algorithm.
1049
1050                    Our version is better, but slower, because it requires
1051                    two passes, but it is unavoidable with top-level sharing.
1052                 */
1053
1054                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1055                     q->link.undertime == PSCHED_PASTPERFECT)
1056                         break;
1057
1058                 q->toplevel = TC_CBQ_MAXLEVEL;
1059                 q->link.undertime = PSCHED_PASTPERFECT;
1060         }
1061
1062         /* No packets in scheduler or nobody wants to give them to us :-(
1063            Sigh... start watchdog timer in the last case. */
1064
1065         if (sch->q.qlen) {
1066                 sch->qstats.overlimits++;
1067                 if (q->wd_expires)
1068                         qdisc_watchdog_schedule(&q->watchdog,
1069                                                 now + q->wd_expires);
1070         }
1071         return NULL;
1072 }
1073
1074 /* CBQ class maintanance routines */
1075
1076 static void cbq_adjust_levels(struct cbq_class *this)
1077 {
1078         if (this == NULL)
1079                 return;
1080
1081         do {
1082                 int level = 0;
1083                 struct cbq_class *cl;
1084
1085                 if ((cl = this->children) != NULL) {
1086                         do {
1087                                 if (cl->level > level)
1088                                         level = cl->level;
1089                         } while ((cl = cl->sibling) != this->children);
1090                 }
1091                 this->level = level+1;
1092         } while ((this = this->tparent) != NULL);
1093 }
1094
1095 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1096 {
1097         struct cbq_class *cl;
1098         unsigned h;
1099
1100         if (q->quanta[prio] == 0)
1101                 return;
1102
1103         for (h=0; h<16; h++) {
1104                 for (cl = q->classes[h]; cl; cl = cl->next) {
1105                         /* BUGGGG... Beware! This expression suffer of
1106                            arithmetic overflows!
1107                          */
1108                         if (cl->priority == prio) {
1109                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1110                                         q->quanta[prio];
1111                         }
1112                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1113                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1114                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1115                         }
1116                 }
1117         }
1118 }
1119
1120 static void cbq_sync_defmap(struct cbq_class *cl)
1121 {
1122         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1123         struct cbq_class *split = cl->split;
1124         unsigned h;
1125         int i;
1126
1127         if (split == NULL)
1128                 return;
1129
1130         for (i=0; i<=TC_PRIO_MAX; i++) {
1131                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1132                         split->defaults[i] = NULL;
1133         }
1134
1135         for (i=0; i<=TC_PRIO_MAX; i++) {
1136                 int level = split->level;
1137
1138                 if (split->defaults[i])
1139                         continue;
1140
1141                 for (h=0; h<16; h++) {
1142                         struct cbq_class *c;
1143
1144                         for (c = q->classes[h]; c; c = c->next) {
1145                                 if (c->split == split && c->level < level &&
1146                                     c->defmap&(1<<i)) {
1147                                         split->defaults[i] = c;
1148                                         level = c->level;
1149                                 }
1150                         }
1151                 }
1152         }
1153 }
1154
1155 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1156 {
1157         struct cbq_class *split = NULL;
1158
1159         if (splitid == 0) {
1160                 if ((split = cl->split) == NULL)
1161                         return;
1162                 splitid = split->classid;
1163         }
1164
1165         if (split == NULL || split->classid != splitid) {
1166                 for (split = cl->tparent; split; split = split->tparent)
1167                         if (split->classid == splitid)
1168                                 break;
1169         }
1170
1171         if (split == NULL)
1172                 return;
1173
1174         if (cl->split != split) {
1175                 cl->defmap = 0;
1176                 cbq_sync_defmap(cl);
1177                 cl->split = split;
1178                 cl->defmap = def&mask;
1179         } else
1180                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1181
1182         cbq_sync_defmap(cl);
1183 }
1184
1185 static void cbq_unlink_class(struct cbq_class *this)
1186 {
1187         struct cbq_class *cl, **clp;
1188         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1189
1190         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1191                 if (cl == this) {
1192                         *clp = cl->next;
1193                         cl->next = NULL;
1194                         break;
1195                 }
1196         }
1197
1198         if (this->tparent) {
1199                 clp=&this->sibling;
1200                 cl = *clp;
1201                 do {
1202                         if (cl == this) {
1203                                 *clp = cl->sibling;
1204                                 break;
1205                         }
1206                         clp = &cl->sibling;
1207                 } while ((cl = *clp) != this->sibling);
1208
1209                 if (this->tparent->children == this) {
1210                         this->tparent->children = this->sibling;
1211                         if (this->sibling == this)
1212                                 this->tparent->children = NULL;
1213                 }
1214         } else {
1215                 BUG_TRAP(this->sibling == this);
1216         }
1217 }
1218
1219 static void cbq_link_class(struct cbq_class *this)
1220 {
1221         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1222         unsigned h = cbq_hash(this->classid);
1223         struct cbq_class *parent = this->tparent;
1224
1225         this->sibling = this;
1226         this->next = q->classes[h];
1227         q->classes[h] = this;
1228
1229         if (parent == NULL)
1230                 return;
1231
1232         if (parent->children == NULL) {
1233                 parent->children = this;
1234         } else {
1235                 this->sibling = parent->children->sibling;
1236                 parent->children->sibling = this;
1237         }
1238 }
1239
1240 static unsigned int cbq_drop(struct Qdisc* sch)
1241 {
1242         struct cbq_sched_data *q = qdisc_priv(sch);
1243         struct cbq_class *cl, *cl_head;
1244         int prio;
1245         unsigned int len;
1246
1247         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1248                 if ((cl_head = q->active[prio]) == NULL)
1249                         continue;
1250
1251                 cl = cl_head;
1252                 do {
1253                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1254                                 sch->q.qlen--;
1255                                 if (!cl->q->q.qlen)
1256                                         cbq_deactivate_class(cl);
1257                                 return len;
1258                         }
1259                 } while ((cl = cl->next_alive) != cl_head);
1260         }
1261         return 0;
1262 }
1263
1264 static void
1265 cbq_reset(struct Qdisc* sch)
1266 {
1267         struct cbq_sched_data *q = qdisc_priv(sch);
1268         struct cbq_class *cl;
1269         int prio;
1270         unsigned h;
1271
1272         q->activemask = 0;
1273         q->pmask = 0;
1274         q->tx_class = NULL;
1275         q->tx_borrowed = NULL;
1276         qdisc_watchdog_cancel(&q->watchdog);
1277         hrtimer_cancel(&q->delay_timer);
1278         q->toplevel = TC_CBQ_MAXLEVEL;
1279         q->now = psched_get_time();
1280         q->now_rt = q->now;
1281
1282         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1283                 q->active[prio] = NULL;
1284
1285         for (h = 0; h < 16; h++) {
1286                 for (cl = q->classes[h]; cl; cl = cl->next) {
1287                         qdisc_reset(cl->q);
1288
1289                         cl->next_alive = NULL;
1290                         cl->undertime = PSCHED_PASTPERFECT;
1291                         cl->avgidle = cl->maxidle;
1292                         cl->deficit = cl->quantum;
1293                         cl->cpriority = cl->priority;
1294                 }
1295         }
1296         sch->q.qlen = 0;
1297 }
1298
1299
1300 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1301 {
1302         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1303                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1304                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1305         }
1306         if (lss->change&TCF_CBQ_LSS_EWMA)
1307                 cl->ewma_log = lss->ewma_log;
1308         if (lss->change&TCF_CBQ_LSS_AVPKT)
1309                 cl->avpkt = lss->avpkt;
1310         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1311                 cl->minidle = -(long)lss->minidle;
1312         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1313                 cl->maxidle = lss->maxidle;
1314                 cl->avgidle = lss->maxidle;
1315         }
1316         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1317                 cl->offtime = lss->offtime;
1318         return 0;
1319 }
1320
1321 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1322 {
1323         q->nclasses[cl->priority]--;
1324         q->quanta[cl->priority] -= cl->weight;
1325         cbq_normalize_quanta(q, cl->priority);
1326 }
1327
1328 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1329 {
1330         q->nclasses[cl->priority]++;
1331         q->quanta[cl->priority] += cl->weight;
1332         cbq_normalize_quanta(q, cl->priority);
1333 }
1334
1335 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1336 {
1337         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1338
1339         if (wrr->allot)
1340                 cl->allot = wrr->allot;
1341         if (wrr->weight)
1342                 cl->weight = wrr->weight;
1343         if (wrr->priority) {
1344                 cl->priority = wrr->priority-1;
1345                 cl->cpriority = cl->priority;
1346                 if (cl->priority >= cl->priority2)
1347                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1348         }
1349
1350         cbq_addprio(q, cl);
1351         return 0;
1352 }
1353
1354 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1355 {
1356         switch (ovl->strategy) {
1357         case TC_CBQ_OVL_CLASSIC:
1358                 cl->overlimit = cbq_ovl_classic;
1359                 break;
1360         case TC_CBQ_OVL_DELAY:
1361                 cl->overlimit = cbq_ovl_delay;
1362                 break;
1363         case TC_CBQ_OVL_LOWPRIO:
1364                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1365                     ovl->priority2-1 <= cl->priority)
1366                         return -EINVAL;
1367                 cl->priority2 = ovl->priority2-1;
1368                 cl->overlimit = cbq_ovl_lowprio;
1369                 break;
1370         case TC_CBQ_OVL_DROP:
1371                 cl->overlimit = cbq_ovl_drop;
1372                 break;
1373         case TC_CBQ_OVL_RCLASSIC:
1374                 cl->overlimit = cbq_ovl_rclassic;
1375                 break;
1376         default:
1377                 return -EINVAL;
1378         }
1379         cl->penalty = ovl->penalty;
1380         return 0;
1381 }
1382
1383 #ifdef CONFIG_NET_CLS_POLICE
1384 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1385 {
1386         cl->police = p->police;
1387
1388         if (cl->q->handle) {
1389                 if (p->police == TC_POLICE_RECLASSIFY)
1390                         cl->q->reshape_fail = cbq_reshape_fail;
1391                 else
1392                         cl->q->reshape_fail = NULL;
1393         }
1394         return 0;
1395 }
1396 #endif
1397
1398 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1399 {
1400         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1401         return 0;
1402 }
1403
1404 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1405 {
1406         struct cbq_sched_data *q = qdisc_priv(sch);
1407         struct rtattr *tb[TCA_CBQ_MAX];
1408         struct tc_ratespec *r;
1409
1410         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1411             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1412             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1413                 return -EINVAL;
1414
1415         if (tb[TCA_CBQ_LSSOPT-1] &&
1416             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1417                 return -EINVAL;
1418
1419         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1420
1421         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1422                 return -EINVAL;
1423
1424         q->link.refcnt = 1;
1425         q->link.sibling = &q->link;
1426         q->link.classid = sch->handle;
1427         q->link.qdisc = sch;
1428         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1429                                             sch->handle)))
1430                 q->link.q = &noop_qdisc;
1431
1432         q->link.priority = TC_CBQ_MAXPRIO-1;
1433         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1434         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1435         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1436         q->link.overlimit = cbq_ovl_classic;
1437         q->link.allot = psched_mtu(sch->dev);
1438         q->link.quantum = q->link.allot;
1439         q->link.weight = q->link.R_tab->rate.rate;
1440
1441         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1442         q->link.avpkt = q->link.allot/2;
1443         q->link.minidle = -0x7FFFFFFF;
1444
1445         qdisc_watchdog_init(&q->watchdog, sch);
1446         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1447         q->delay_timer.function = cbq_undelay;
1448         q->toplevel = TC_CBQ_MAXLEVEL;
1449         q->now = psched_get_time();
1450         q->now_rt = q->now;
1451
1452         cbq_link_class(&q->link);
1453
1454         if (tb[TCA_CBQ_LSSOPT-1])
1455                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1456
1457         cbq_addprio(q, &q->link);
1458         return 0;
1459 }
1460
1461 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1462 {
1463         unsigned char *b = skb_tail_pointer(skb);
1464
1465         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1466         return skb->len;
1467
1468 rtattr_failure:
1469         nlmsg_trim(skb, b);
1470         return -1;
1471 }
1472
1473 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1474 {
1475         unsigned char *b = skb_tail_pointer(skb);
1476         struct tc_cbq_lssopt opt;
1477
1478         opt.flags = 0;
1479         if (cl->borrow == NULL)
1480                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1481         if (cl->share == NULL)
1482                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1483         opt.ewma_log = cl->ewma_log;
1484         opt.level = cl->level;
1485         opt.avpkt = cl->avpkt;
1486         opt.maxidle = cl->maxidle;
1487         opt.minidle = (u32)(-cl->minidle);
1488         opt.offtime = cl->offtime;
1489         opt.change = ~0;
1490         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1491         return skb->len;
1492
1493 rtattr_failure:
1494         nlmsg_trim(skb, b);
1495         return -1;
1496 }
1497
1498 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1499 {
1500         unsigned char *b = skb_tail_pointer(skb);
1501         struct tc_cbq_wrropt opt;
1502
1503         opt.flags = 0;
1504         opt.allot = cl->allot;
1505         opt.priority = cl->priority+1;
1506         opt.cpriority = cl->cpriority+1;
1507         opt.weight = cl->weight;
1508         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1509         return skb->len;
1510
1511 rtattr_failure:
1512         nlmsg_trim(skb, b);
1513         return -1;
1514 }
1515
1516 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1517 {
1518         unsigned char *b = skb_tail_pointer(skb);
1519         struct tc_cbq_ovl opt;
1520
1521         opt.strategy = cl->ovl_strategy;
1522         opt.priority2 = cl->priority2+1;
1523         opt.pad = 0;
1524         opt.penalty = cl->penalty;
1525         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1526         return skb->len;
1527
1528 rtattr_failure:
1529         nlmsg_trim(skb, b);
1530         return -1;
1531 }
1532
1533 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1534 {
1535         unsigned char *b = skb_tail_pointer(skb);
1536         struct tc_cbq_fopt opt;
1537
1538         if (cl->split || cl->defmap) {
1539                 opt.split = cl->split ? cl->split->classid : 0;
1540                 opt.defmap = cl->defmap;
1541                 opt.defchange = ~0;
1542                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1543         }
1544         return skb->len;
1545
1546 rtattr_failure:
1547         nlmsg_trim(skb, b);
1548         return -1;
1549 }
1550
1551 #ifdef CONFIG_NET_CLS_POLICE
1552 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1553 {
1554         unsigned char *b = skb_tail_pointer(skb);
1555         struct tc_cbq_police opt;
1556
1557         if (cl->police) {
1558                 opt.police = cl->police;
1559                 opt.__res1 = 0;
1560                 opt.__res2 = 0;
1561                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1562         }
1563         return skb->len;
1564
1565 rtattr_failure:
1566         nlmsg_trim(skb, b);
1567         return -1;
1568 }
1569 #endif
1570
1571 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1572 {
1573         if (cbq_dump_lss(skb, cl) < 0 ||
1574             cbq_dump_rate(skb, cl) < 0 ||
1575             cbq_dump_wrr(skb, cl) < 0 ||
1576             cbq_dump_ovl(skb, cl) < 0 ||
1577 #ifdef CONFIG_NET_CLS_POLICE
1578             cbq_dump_police(skb, cl) < 0 ||
1579 #endif
1580             cbq_dump_fopt(skb, cl) < 0)
1581                 return -1;
1582         return 0;
1583 }
1584
1585 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1586 {
1587         struct cbq_sched_data *q = qdisc_priv(sch);
1588         unsigned char *b = skb_tail_pointer(skb);
1589         struct rtattr *rta;
1590
1591         rta = (struct rtattr*)b;
1592         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1593         if (cbq_dump_attr(skb, &q->link) < 0)
1594                 goto rtattr_failure;
1595         rta->rta_len = skb_tail_pointer(skb) - b;
1596         return skb->len;
1597
1598 rtattr_failure:
1599         nlmsg_trim(skb, b);
1600         return -1;
1601 }
1602
1603 static int
1604 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1605 {
1606         struct cbq_sched_data *q = qdisc_priv(sch);
1607
1608         q->link.xstats.avgidle = q->link.avgidle;
1609         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1610 }
1611
1612 static int
1613 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1614                struct sk_buff *skb, struct tcmsg *tcm)
1615 {
1616         struct cbq_class *cl = (struct cbq_class*)arg;
1617         unsigned char *b = skb_tail_pointer(skb);
1618         struct rtattr *rta;
1619
1620         if (cl->tparent)
1621                 tcm->tcm_parent = cl->tparent->classid;
1622         else
1623                 tcm->tcm_parent = TC_H_ROOT;
1624         tcm->tcm_handle = cl->classid;
1625         tcm->tcm_info = cl->q->handle;
1626
1627         rta = (struct rtattr*)b;
1628         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1629         if (cbq_dump_attr(skb, cl) < 0)
1630                 goto rtattr_failure;
1631         rta->rta_len = skb_tail_pointer(skb) - b;
1632         return skb->len;
1633
1634 rtattr_failure:
1635         nlmsg_trim(skb, b);
1636         return -1;
1637 }
1638
1639 static int
1640 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1641         struct gnet_dump *d)
1642 {
1643         struct cbq_sched_data *q = qdisc_priv(sch);
1644         struct cbq_class *cl = (struct cbq_class*)arg;
1645
1646         cl->qstats.qlen = cl->q->q.qlen;
1647         cl->xstats.avgidle = cl->avgidle;
1648         cl->xstats.undertime = 0;
1649
1650         if (cl->undertime != PSCHED_PASTPERFECT)
1651                 cl->xstats.undertime = cl->undertime - q->now;
1652
1653         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1654             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1655             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1656                 return -1;
1657
1658         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1659 }
1660
1661 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1662                      struct Qdisc **old)
1663 {
1664         struct cbq_class *cl = (struct cbq_class*)arg;
1665
1666         if (cl) {
1667                 if (new == NULL) {
1668                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1669                                                      cl->classid)) == NULL)
1670                                 return -ENOBUFS;
1671                 } else {
1672 #ifdef CONFIG_NET_CLS_POLICE
1673                         if (cl->police == TC_POLICE_RECLASSIFY)
1674                                 new->reshape_fail = cbq_reshape_fail;
1675 #endif
1676                 }
1677                 sch_tree_lock(sch);
1678                 *old = xchg(&cl->q, new);
1679                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1680                 qdisc_reset(*old);
1681                 sch_tree_unlock(sch);
1682
1683                 return 0;
1684         }
1685         return -ENOENT;
1686 }
1687
1688 static struct Qdisc *
1689 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1690 {
1691         struct cbq_class *cl = (struct cbq_class*)arg;
1692
1693         return cl ? cl->q : NULL;
1694 }
1695
1696 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1697 {
1698         struct cbq_class *cl = (struct cbq_class *)arg;
1699
1700         if (cl->q->q.qlen == 0)
1701                 cbq_deactivate_class(cl);
1702 }
1703
1704 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1705 {
1706         struct cbq_sched_data *q = qdisc_priv(sch);
1707         struct cbq_class *cl = cbq_class_lookup(q, classid);
1708
1709         if (cl) {
1710                 cl->refcnt++;
1711                 return (unsigned long)cl;
1712         }
1713         return 0;
1714 }
1715
1716 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1717 {
1718         struct cbq_sched_data *q = qdisc_priv(sch);
1719
1720         BUG_TRAP(!cl->filters);
1721
1722         tcf_destroy_chain(cl->filter_list);
1723         qdisc_destroy(cl->q);
1724         qdisc_put_rtab(cl->R_tab);
1725         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1726         if (cl != &q->link)
1727                 kfree(cl);
1728 }
1729
1730 static void
1731 cbq_destroy(struct Qdisc* sch)
1732 {
1733         struct cbq_sched_data *q = qdisc_priv(sch);
1734         struct cbq_class *cl;
1735         unsigned h;
1736
1737 #ifdef CONFIG_NET_CLS_POLICE
1738         q->rx_class = NULL;
1739 #endif
1740         /*
1741          * Filters must be destroyed first because we don't destroy the
1742          * classes from root to leafs which means that filters can still
1743          * be bound to classes which have been destroyed already. --TGR '04
1744          */
1745         for (h = 0; h < 16; h++) {
1746                 for (cl = q->classes[h]; cl; cl = cl->next) {
1747                         tcf_destroy_chain(cl->filter_list);
1748                         cl->filter_list = NULL;
1749                 }
1750         }
1751         for (h = 0; h < 16; h++) {
1752                 struct cbq_class *next;
1753
1754                 for (cl = q->classes[h]; cl; cl = next) {
1755                         next = cl->next;
1756                         cbq_destroy_class(sch, cl);
1757                 }
1758         }
1759 }
1760
1761 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1762 {
1763         struct cbq_class *cl = (struct cbq_class*)arg;
1764
1765         if (--cl->refcnt == 0) {
1766 #ifdef CONFIG_NET_CLS_POLICE
1767                 struct cbq_sched_data *q = qdisc_priv(sch);
1768
1769                 spin_lock_bh(&sch->dev->queue_lock);
1770                 if (q->rx_class == cl)
1771                         q->rx_class = NULL;
1772                 spin_unlock_bh(&sch->dev->queue_lock);
1773 #endif
1774
1775                 cbq_destroy_class(sch, cl);
1776         }
1777 }
1778
1779 static int
1780 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1781                  unsigned long *arg)
1782 {
1783         int err;
1784         struct cbq_sched_data *q = qdisc_priv(sch);
1785         struct cbq_class *cl = (struct cbq_class*)*arg;
1786         struct rtattr *opt = tca[TCA_OPTIONS-1];
1787         struct rtattr *tb[TCA_CBQ_MAX];
1788         struct cbq_class *parent;
1789         struct qdisc_rate_table *rtab = NULL;
1790
1791         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1792                 return -EINVAL;
1793
1794         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1795             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1796                 return -EINVAL;
1797
1798         if (tb[TCA_CBQ_FOPT-1] &&
1799             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1800                 return -EINVAL;
1801
1802         if (tb[TCA_CBQ_RATE-1] &&
1803             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1804                         return -EINVAL;
1805
1806         if (tb[TCA_CBQ_LSSOPT-1] &&
1807             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1808                         return -EINVAL;
1809
1810         if (tb[TCA_CBQ_WRROPT-1] &&
1811             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1812                         return -EINVAL;
1813
1814 #ifdef CONFIG_NET_CLS_POLICE
1815         if (tb[TCA_CBQ_POLICE-1] &&
1816             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1817                         return -EINVAL;
1818 #endif
1819
1820         if (cl) {
1821                 /* Check parent */
1822                 if (parentid) {
1823                         if (cl->tparent && cl->tparent->classid != parentid)
1824                                 return -EINVAL;
1825                         if (!cl->tparent && parentid != TC_H_ROOT)
1826                                 return -EINVAL;
1827                 }
1828
1829                 if (tb[TCA_CBQ_RATE-1]) {
1830                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1831                         if (rtab == NULL)
1832                                 return -EINVAL;
1833                 }
1834
1835                 /* Change class parameters */
1836                 sch_tree_lock(sch);
1837
1838                 if (cl->next_alive != NULL)
1839                         cbq_deactivate_class(cl);
1840
1841                 if (rtab) {
1842                         rtab = xchg(&cl->R_tab, rtab);
1843                         qdisc_put_rtab(rtab);
1844                 }
1845
1846                 if (tb[TCA_CBQ_LSSOPT-1])
1847                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1848
1849                 if (tb[TCA_CBQ_WRROPT-1]) {
1850                         cbq_rmprio(q, cl);
1851                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1852                 }
1853
1854                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1855                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1856
1857 #ifdef CONFIG_NET_CLS_POLICE
1858                 if (tb[TCA_CBQ_POLICE-1])
1859                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1860 #endif
1861
1862                 if (tb[TCA_CBQ_FOPT-1])
1863                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1864
1865                 if (cl->q->q.qlen)
1866                         cbq_activate_class(cl);
1867
1868                 sch_tree_unlock(sch);
1869
1870                 if (tca[TCA_RATE-1])
1871                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1872                                               &sch->dev->queue_lock,
1873                                               tca[TCA_RATE-1]);
1874                 return 0;
1875         }
1876
1877         if (parentid == TC_H_ROOT)
1878                 return -EINVAL;
1879
1880         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1881             tb[TCA_CBQ_LSSOPT-1] == NULL)
1882                 return -EINVAL;
1883
1884         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1885         if (rtab == NULL)
1886                 return -EINVAL;
1887
1888         if (classid) {
1889                 err = -EINVAL;
1890                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1891                         goto failure;
1892         } else {
1893                 int i;
1894                 classid = TC_H_MAKE(sch->handle,0x8000);
1895
1896                 for (i=0; i<0x8000; i++) {
1897                         if (++q->hgenerator >= 0x8000)
1898                                 q->hgenerator = 1;
1899                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1900                                 break;
1901                 }
1902                 err = -ENOSR;
1903                 if (i >= 0x8000)
1904                         goto failure;
1905                 classid = classid|q->hgenerator;
1906         }
1907
1908         parent = &q->link;
1909         if (parentid) {
1910                 parent = cbq_class_lookup(q, parentid);
1911                 err = -EINVAL;
1912                 if (parent == NULL)
1913                         goto failure;
1914         }
1915
1916         err = -ENOBUFS;
1917         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1918         if (cl == NULL)
1919                 goto failure;
1920         cl->R_tab = rtab;
1921         rtab = NULL;
1922         cl->refcnt = 1;
1923         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1924                 cl->q = &noop_qdisc;
1925         cl->classid = classid;
1926         cl->tparent = parent;
1927         cl->qdisc = sch;
1928         cl->allot = parent->allot;
1929         cl->quantum = cl->allot;
1930         cl->weight = cl->R_tab->rate.rate;
1931
1932         sch_tree_lock(sch);
1933         cbq_link_class(cl);
1934         cl->borrow = cl->tparent;
1935         if (cl->tparent != &q->link)
1936                 cl->share = cl->tparent;
1937         cbq_adjust_levels(parent);
1938         cl->minidle = -0x7FFFFFFF;
1939         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1940         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1941         if (cl->ewma_log==0)
1942                 cl->ewma_log = q->link.ewma_log;
1943         if (cl->maxidle==0)
1944                 cl->maxidle = q->link.maxidle;
1945         if (cl->avpkt==0)
1946                 cl->avpkt = q->link.avpkt;
1947         cl->overlimit = cbq_ovl_classic;
1948         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1949                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1950 #ifdef CONFIG_NET_CLS_POLICE
1951         if (tb[TCA_CBQ_POLICE-1])
1952                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1953 #endif
1954         if (tb[TCA_CBQ_FOPT-1])
1955                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1956         sch_tree_unlock(sch);
1957
1958         if (tca[TCA_RATE-1])
1959                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1960                                   &sch->dev->queue_lock, tca[TCA_RATE-1]);
1961
1962         *arg = (unsigned long)cl;
1963         return 0;
1964
1965 failure:
1966         qdisc_put_rtab(rtab);
1967         return err;
1968 }
1969
1970 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1971 {
1972         struct cbq_sched_data *q = qdisc_priv(sch);
1973         struct cbq_class *cl = (struct cbq_class*)arg;
1974         unsigned int qlen;
1975
1976         if (cl->filters || cl->children || cl == &q->link)
1977                 return -EBUSY;
1978
1979         sch_tree_lock(sch);
1980
1981         qlen = cl->q->q.qlen;
1982         qdisc_reset(cl->q);
1983         qdisc_tree_decrease_qlen(cl->q, qlen);
1984
1985         if (cl->next_alive)
1986                 cbq_deactivate_class(cl);
1987
1988         if (q->tx_borrowed == cl)
1989                 q->tx_borrowed = q->tx_class;
1990         if (q->tx_class == cl) {
1991                 q->tx_class = NULL;
1992                 q->tx_borrowed = NULL;
1993         }
1994 #ifdef CONFIG_NET_CLS_POLICE
1995         if (q->rx_class == cl)
1996                 q->rx_class = NULL;
1997 #endif
1998
1999         cbq_unlink_class(cl);
2000         cbq_adjust_levels(cl->tparent);
2001         cl->defmap = 0;
2002         cbq_sync_defmap(cl);
2003
2004         cbq_rmprio(q, cl);
2005         sch_tree_unlock(sch);
2006
2007         if (--cl->refcnt == 0)
2008                 cbq_destroy_class(sch, cl);
2009
2010         return 0;
2011 }
2012
2013 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2014 {
2015         struct cbq_sched_data *q = qdisc_priv(sch);
2016         struct cbq_class *cl = (struct cbq_class *)arg;
2017
2018         if (cl == NULL)
2019                 cl = &q->link;
2020
2021         return &cl->filter_list;
2022 }
2023
2024 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2025                                      u32 classid)
2026 {
2027         struct cbq_sched_data *q = qdisc_priv(sch);
2028         struct cbq_class *p = (struct cbq_class*)parent;
2029         struct cbq_class *cl = cbq_class_lookup(q, classid);
2030
2031         if (cl) {
2032                 if (p && p->level <= cl->level)
2033                         return 0;
2034                 cl->filters++;
2035                 return (unsigned long)cl;
2036         }
2037         return 0;
2038 }
2039
2040 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2041 {
2042         struct cbq_class *cl = (struct cbq_class*)arg;
2043
2044         cl->filters--;
2045 }
2046
2047 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2048 {
2049         struct cbq_sched_data *q = qdisc_priv(sch);
2050         unsigned h;
2051
2052         if (arg->stop)
2053                 return;
2054
2055         for (h = 0; h < 16; h++) {
2056                 struct cbq_class *cl;
2057
2058                 for (cl = q->classes[h]; cl; cl = cl->next) {
2059                         if (arg->count < arg->skip) {
2060                                 arg->count++;
2061                                 continue;
2062                         }
2063                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2064                                 arg->stop = 1;
2065                                 return;
2066                         }
2067                         arg->count++;
2068                 }
2069         }
2070 }
2071
2072 static struct Qdisc_class_ops cbq_class_ops = {
2073         .graft          =       cbq_graft,
2074         .leaf           =       cbq_leaf,
2075         .qlen_notify    =       cbq_qlen_notify,
2076         .get            =       cbq_get,
2077         .put            =       cbq_put,
2078         .change         =       cbq_change_class,
2079         .delete         =       cbq_delete,
2080         .walk           =       cbq_walk,
2081         .tcf_chain      =       cbq_find_tcf,
2082         .bind_tcf       =       cbq_bind_filter,
2083         .unbind_tcf     =       cbq_unbind_filter,
2084         .dump           =       cbq_dump_class,
2085         .dump_stats     =       cbq_dump_class_stats,
2086 };
2087
2088 static struct Qdisc_ops cbq_qdisc_ops = {
2089         .next           =       NULL,
2090         .cl_ops         =       &cbq_class_ops,
2091         .id             =       "cbq",
2092         .priv_size      =       sizeof(struct cbq_sched_data),
2093         .enqueue        =       cbq_enqueue,
2094         .dequeue        =       cbq_dequeue,
2095         .requeue        =       cbq_requeue,
2096         .drop           =       cbq_drop,
2097         .init           =       cbq_init,
2098         .reset          =       cbq_reset,
2099         .destroy        =       cbq_destroy,
2100         .change         =       NULL,
2101         .dump           =       cbq_dump,
2102         .dump_stats     =       cbq_dump_stats,
2103         .owner          =       THIS_MODULE,
2104 };
2105
2106 static int __init cbq_module_init(void)
2107 {
2108         return register_qdisc(&cbq_qdisc_ops);
2109 }
2110 static void __exit cbq_module_exit(void)
2111 {
2112         unregister_qdisc(&cbq_qdisc_ops);
2113 }
2114 module_init(cbq_module_init)
2115 module_exit(cbq_module_exit)
2116 MODULE_LICENSE("GPL");