2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
23 /* Class-Based Queueing (CBQ) algorithm.
24 =======================================
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27 Management Models for Packet Networks",
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
38 -----------------------------------------------------------------------
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
45 --- The WRR algorithm is different. Our version looks more
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
71 struct cbq_sched_data;
76 struct Qdisc_class_common common;
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
80 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
84 #ifdef CONFIG_NET_CLS_ACT
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
95 struct qdisc_rate_table *R_tab;
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
99 psched_tdiff_t penalty;
101 /* General scheduler (WRR) parameters */
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
115 struct Qdisc *q; /* Elementary queueing discipline */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
129 long deficit; /* Saved deficit for WRR */
130 psched_time_t penalized;
131 struct gnet_stats_basic bstats;
132 struct gnet_stats_queue qstats;
133 struct gnet_stats_rate_est rate_est;
134 struct tc_cbq_xstats xstats;
136 struct tcf_proto *filter_list;
141 struct cbq_class *defaults[TC_PRIO_MAX+1];
144 struct cbq_sched_data
146 struct Qdisc_class_hash clhash; /* Hash table of all classes */
147 int nclasses[TC_CBQ_MAXPRIO+1];
148 unsigned quanta[TC_CBQ_MAXPRIO+1];
150 struct cbq_class link;
153 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
156 #ifdef CONFIG_NET_CLS_ACT
157 struct cbq_class *rx_class;
159 struct cbq_class *tx_class;
160 struct cbq_class *tx_borrowed;
162 psched_time_t now; /* Cached timestamp */
163 psched_time_t now_rt; /* Cached real time */
166 struct hrtimer delay_timer;
167 struct qdisc_watchdog watchdog; /* Watchdog timer,
171 psched_tdiff_t wd_expires;
177 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
179 static __inline__ struct cbq_class *
180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
182 struct Qdisc_class_common *clc;
184 clc = qdisc_class_find(&q->clhash, classid);
187 return container_of(clc, struct cbq_class, common);
190 #ifdef CONFIG_NET_CLS_ACT
192 static struct cbq_class *
193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
195 struct cbq_class *cl, *new;
197 for (cl = this->tparent; cl; cl = cl->tparent)
198 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
206 /* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
216 static struct cbq_class *
217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
224 struct tcf_result res;
227 * Step 1. If skb->priority points to one of our classes, use it.
229 if (TC_H_MAJ(prio^sch->handle) == 0 &&
230 (cl = cbq_class_lookup(q, prio)) != NULL)
233 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
236 defmap = head->defaults;
239 * Step 2+n. Apply classifier.
241 if (!head->filter_list ||
242 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
245 if ((cl = (void*)res.class) == NULL) {
246 if (TC_H_MAJ(res.classid))
247 cl = cbq_class_lookup(q, res.classid);
248 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
249 cl = defmap[TC_PRIO_BESTEFFORT];
251 if (cl == NULL || cl->level >= head->level)
255 #ifdef CONFIG_NET_CLS_ACT
259 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
262 case TC_ACT_RECLASSIFY:
263 return cbq_reclassify(skb, cl);
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
281 * Step 4. No success...
283 if (TC_H_MAJ(prio) == 0 &&
284 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
285 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
299 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
300 int prio = cl->cpriority;
301 struct cbq_class *cl_tail;
303 cl_tail = q->active[prio];
304 q->active[prio] = cl;
306 if (cl_tail != NULL) {
307 cl->next_alive = cl_tail->next_alive;
308 cl_tail->next_alive = cl;
311 q->activemask |= (1<<prio);
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
321 static void cbq_deactivate_class(struct cbq_class *this)
323 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
324 int prio = this->cpriority;
325 struct cbq_class *cl;
326 struct cbq_class *cl_prev = q->active[prio];
329 cl = cl_prev->next_alive;
331 cl_prev->next_alive = cl->next_alive;
332 cl->next_alive = NULL;
334 if (cl == q->active[prio]) {
335 q->active[prio] = cl_prev;
336 if (cl == q->active[prio]) {
337 q->active[prio] = NULL;
338 q->activemask &= ~(1<<prio);
344 } while ((cl_prev = cl) != q->active[prio]);
348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
350 int toplevel = q->toplevel;
352 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
356 now = psched_get_time();
357 incr = now - q->now_rt;
361 if (cl->undertime < now) {
362 q->toplevel = cl->level;
365 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
372 struct cbq_sched_data *q = qdisc_priv(sch);
373 int uninitialized_var(ret);
374 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
376 #ifdef CONFIG_NET_CLS_ACT
380 if (ret & __NET_XMIT_BYPASS)
386 #ifdef CONFIG_NET_CLS_ACT
387 cl->q->__parent = sch;
389 ret = qdisc_enqueue(skb, cl->q);
390 if (ret == NET_XMIT_SUCCESS) {
392 sch->bstats.packets++;
393 sch->bstats.bytes += qdisc_pkt_len(skb);
394 cbq_mark_toplevel(q, cl);
396 cbq_activate_class(cl);
400 if (net_xmit_drop_count(ret)) {
402 cbq_mark_toplevel(q, cl);
408 /* Overlimit actions */
410 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
412 static void cbq_ovl_classic(struct cbq_class *cl)
414 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
415 psched_tdiff_t delay = cl->undertime - q->now;
418 delay += cl->offtime;
421 Class goes to sleep, so that it will have no
422 chance to work avgidle. Let's forgive it 8)
424 BTW cbq-2.0 has a crap in this
425 place, apparently they forgot to shift it by cl->ewma_log.
428 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
429 if (cl->avgidle < cl->minidle)
430 cl->avgidle = cl->minidle;
433 cl->undertime = q->now + delay;
435 cl->xstats.overactions++;
438 if (q->wd_expires == 0 || q->wd_expires > delay)
439 q->wd_expires = delay;
441 /* Dirty work! We must schedule wakeups based on
442 real available rate, rather than leaf rate,
443 which may be tiny (even zero).
445 if (q->toplevel == TC_CBQ_MAXLEVEL) {
447 psched_tdiff_t base_delay = q->wd_expires;
449 for (b = cl->borrow; b; b = b->borrow) {
450 delay = b->undertime - q->now;
451 if (delay < base_delay) {
458 q->wd_expires = base_delay;
462 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
466 static void cbq_ovl_rclassic(struct cbq_class *cl)
468 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
469 struct cbq_class *this = cl;
472 if (cl->level > q->toplevel) {
476 } while ((cl = cl->borrow) != NULL);
483 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
485 static void cbq_ovl_delay(struct cbq_class *cl)
487 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
488 psched_tdiff_t delay = cl->undertime - q->now;
490 if (test_bit(__QDISC_STATE_DEACTIVATED,
491 &qdisc_root_sleeping(cl->qdisc)->state))
495 psched_time_t sched = q->now;
498 delay += cl->offtime;
500 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
501 if (cl->avgidle < cl->minidle)
502 cl->avgidle = cl->minidle;
503 cl->undertime = q->now + delay;
506 sched += delay + cl->penalty;
507 cl->penalized = sched;
508 cl->cpriority = TC_CBQ_MAXPRIO;
509 q->pmask |= (1<<TC_CBQ_MAXPRIO);
511 expires = ktime_set(0, 0);
512 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
513 if (hrtimer_try_to_cancel(&q->delay_timer) &&
514 ktime_to_ns(ktime_sub(
515 hrtimer_get_expires(&q->delay_timer),
517 hrtimer_set_expires(&q->delay_timer, expires);
518 hrtimer_restart(&q->delay_timer);
520 cl->xstats.overactions++;
525 if (q->wd_expires == 0 || q->wd_expires > delay)
526 q->wd_expires = delay;
529 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
531 static void cbq_ovl_lowprio(struct cbq_class *cl)
533 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
535 cl->penalized = q->now + cl->penalty;
537 if (cl->cpriority != cl->priority2) {
538 cl->cpriority = cl->priority2;
539 q->pmask |= (1<<cl->cpriority);
540 cl->xstats.overactions++;
545 /* TC_CBQ_OVL_DROP: penalize class by dropping */
547 static void cbq_ovl_drop(struct cbq_class *cl)
549 if (cl->q->ops->drop)
550 if (cl->q->ops->drop(cl->q))
552 cl->xstats.overactions++;
556 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
559 struct cbq_class *cl;
560 struct cbq_class *cl_prev = q->active[prio];
561 psched_time_t sched = now;
567 cl = cl_prev->next_alive;
568 if (now - cl->penalized > 0) {
569 cl_prev->next_alive = cl->next_alive;
570 cl->next_alive = NULL;
571 cl->cpriority = cl->priority;
573 cbq_activate_class(cl);
575 if (cl == q->active[prio]) {
576 q->active[prio] = cl_prev;
577 if (cl == q->active[prio]) {
578 q->active[prio] = NULL;
583 cl = cl_prev->next_alive;
584 } else if (sched - cl->penalized > 0)
585 sched = cl->penalized;
586 } while ((cl_prev = cl) != q->active[prio]);
591 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
593 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
595 struct Qdisc *sch = q->watchdog.qdisc;
597 psched_tdiff_t delay = 0;
600 now = psched_get_time();
606 int prio = ffz(~pmask);
611 tmp = cbq_undelay_prio(q, prio, now);
614 if (tmp < delay || delay == 0)
622 time = ktime_set(0, 0);
623 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
624 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
627 sch->flags &= ~TCQ_F_THROTTLED;
628 __netif_schedule(qdisc_root(sch));
629 return HRTIMER_NORESTART;
632 #ifdef CONFIG_NET_CLS_ACT
633 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
635 struct Qdisc *sch = child->__parent;
636 struct cbq_sched_data *q = qdisc_priv(sch);
637 struct cbq_class *cl = q->rx_class;
641 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
644 cbq_mark_toplevel(q, cl);
647 cl->q->__parent = sch;
649 ret = qdisc_enqueue(skb, cl->q);
650 if (ret == NET_XMIT_SUCCESS) {
652 sch->bstats.packets++;
653 sch->bstats.bytes += qdisc_pkt_len(skb);
655 cbq_activate_class(cl);
658 if (net_xmit_drop_count(ret))
669 It is mission critical procedure.
671 We "regenerate" toplevel cutoff, if transmitting class
672 has backlog and it is not regulated. It is not part of
673 original CBQ description, but looks more reasonable.
674 Probably, it is wrong. This question needs further investigation.
677 static __inline__ void
678 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
679 struct cbq_class *borrowed)
681 if (cl && q->toplevel >= borrowed->level) {
682 if (cl->q->q.qlen > 1) {
684 if (borrowed->undertime == PSCHED_PASTPERFECT) {
685 q->toplevel = borrowed->level;
688 } while ((borrowed=borrowed->borrow) != NULL);
691 /* It is not necessary now. Uncommenting it
692 will save CPU cycles, but decrease fairness.
694 q->toplevel = TC_CBQ_MAXLEVEL;
700 cbq_update(struct cbq_sched_data *q)
702 struct cbq_class *this = q->tx_class;
703 struct cbq_class *cl = this;
708 for ( ; cl; cl = cl->share) {
709 long avgidle = cl->avgidle;
712 cl->bstats.packets++;
713 cl->bstats.bytes += len;
716 (now - last) is total time between packet right edges.
717 (last_pktlen/rate) is "virtual" busy time, so that
719 idle = (now - last) - last_pktlen/rate
722 idle = q->now - cl->last;
723 if ((unsigned long)idle > 128*1024*1024) {
724 avgidle = cl->maxidle;
726 idle -= L2T(cl, len);
728 /* true_avgidle := (1-W)*true_avgidle + W*idle,
729 where W=2^{-ewma_log}. But cl->avgidle is scaled:
730 cl->avgidle == true_avgidle/W,
733 avgidle += idle - (avgidle>>cl->ewma_log);
737 /* Overlimit or at-limit */
739 if (avgidle < cl->minidle)
740 avgidle = cl->minidle;
742 cl->avgidle = avgidle;
744 /* Calculate expected time, when this class
745 will be allowed to send.
747 (1-W)*true_avgidle + W*delay = 0, i.e.
748 idle = (1/W - 1)*(-true_avgidle)
750 idle = (1 - W)*(-cl->avgidle);
752 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
756 To maintain the rate allocated to the class,
757 we add to undertime virtual clock,
758 necessary to complete transmitted packet.
759 (len/phys_bandwidth has been already passed
760 to the moment of cbq_update)
763 idle -= L2T(&q->link, len);
764 idle += L2T(cl, len);
766 cl->undertime = q->now + idle;
770 cl->undertime = PSCHED_PASTPERFECT;
771 if (avgidle > cl->maxidle)
772 cl->avgidle = cl->maxidle;
774 cl->avgidle = avgidle;
779 cbq_update_toplevel(q, this, q->tx_borrowed);
782 static __inline__ struct cbq_class *
783 cbq_under_limit(struct cbq_class *cl)
785 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
786 struct cbq_class *this_cl = cl;
788 if (cl->tparent == NULL)
791 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
797 /* It is very suspicious place. Now overlimit
798 action is generated for not bounded classes
799 only if link is completely congested.
800 Though it is in agree with ancestor-only paradigm,
801 it looks very stupid. Particularly,
802 it means that this chunk of code will either
803 never be called or result in strong amplification
804 of burstiness. Dangerous, silly, and, however,
805 no another solution exists.
807 if ((cl = cl->borrow) == NULL) {
808 this_cl->qstats.overlimits++;
809 this_cl->overlimit(this_cl);
812 if (cl->level > q->toplevel)
814 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
820 static __inline__ struct sk_buff *
821 cbq_dequeue_prio(struct Qdisc *sch, int prio)
823 struct cbq_sched_data *q = qdisc_priv(sch);
824 struct cbq_class *cl_tail, *cl_prev, *cl;
828 cl_tail = cl_prev = q->active[prio];
829 cl = cl_prev->next_alive;
836 struct cbq_class *borrow = cl;
839 (borrow = cbq_under_limit(cl)) == NULL)
842 if (cl->deficit <= 0) {
843 /* Class exhausted its allotment per
844 this round. Switch to the next one.
847 cl->deficit += cl->quantum;
851 skb = cl->q->dequeue(cl->q);
853 /* Class did not give us any skb :-(
854 It could occur even if cl->q->q.qlen != 0
855 f.e. if cl->q == "tbf"
860 cl->deficit -= qdisc_pkt_len(skb);
862 q->tx_borrowed = borrow;
864 #ifndef CBQ_XSTATS_BORROWS_BYTES
865 borrow->xstats.borrows++;
866 cl->xstats.borrows++;
868 borrow->xstats.borrows += qdisc_pkt_len(skb);
869 cl->xstats.borrows += qdisc_pkt_len(skb);
872 q->tx_len = qdisc_pkt_len(skb);
874 if (cl->deficit <= 0) {
875 q->active[prio] = cl;
877 cl->deficit += cl->quantum;
882 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
883 /* Class is empty or penalized.
884 Unlink it from active chain.
886 cl_prev->next_alive = cl->next_alive;
887 cl->next_alive = NULL;
889 /* Did cl_tail point to it? */
894 /* Was it the last class in this band? */
897 q->active[prio] = NULL;
898 q->activemask &= ~(1<<prio);
900 cbq_activate_class(cl);
904 q->active[prio] = cl_tail;
907 cbq_activate_class(cl);
915 } while (cl_prev != cl_tail);
918 q->active[prio] = cl_prev;
923 static __inline__ struct sk_buff *
924 cbq_dequeue_1(struct Qdisc *sch)
926 struct cbq_sched_data *q = qdisc_priv(sch);
930 activemask = q->activemask&0xFF;
932 int prio = ffz(~activemask);
933 activemask &= ~(1<<prio);
934 skb = cbq_dequeue_prio(sch, prio);
941 static struct sk_buff *
942 cbq_dequeue(struct Qdisc *sch)
945 struct cbq_sched_data *q = qdisc_priv(sch);
949 now = psched_get_time();
950 incr = now - q->now_rt;
953 psched_tdiff_t incr2;
954 /* Time integrator. We calculate EOS time
955 by adding expected packet transmission time.
956 If real time is greater, we warp artificial clock,
959 cbq_time = max(real_time, work);
961 incr2 = L2T(&q->link, q->tx_len);
964 if ((incr -= incr2) < 0)
973 skb = cbq_dequeue_1(sch);
976 sch->flags &= ~TCQ_F_THROTTLED;
980 /* All the classes are overlimit.
984 1. Scheduler is empty.
985 2. Toplevel cutoff inhibited borrowing.
986 3. Root class is overlimit.
988 Reset 2d and 3d conditions and retry.
990 Note, that NS and cbq-2.0 are buggy, peeking
991 an arbitrary class is appropriate for ancestor-only
992 sharing, but not for toplevel algorithm.
994 Our version is better, but slower, because it requires
995 two passes, but it is unavoidable with top-level sharing.
998 if (q->toplevel == TC_CBQ_MAXLEVEL &&
999 q->link.undertime == PSCHED_PASTPERFECT)
1002 q->toplevel = TC_CBQ_MAXLEVEL;
1003 q->link.undertime = PSCHED_PASTPERFECT;
1006 /* No packets in scheduler or nobody wants to give them to us :-(
1007 Sigh... start watchdog timer in the last case. */
1010 sch->qstats.overlimits++;
1012 qdisc_watchdog_schedule(&q->watchdog,
1013 now + q->wd_expires);
1018 /* CBQ class maintanance routines */
1020 static void cbq_adjust_levels(struct cbq_class *this)
1027 struct cbq_class *cl;
1029 if ((cl = this->children) != NULL) {
1031 if (cl->level > level)
1033 } while ((cl = cl->sibling) != this->children);
1035 this->level = level+1;
1036 } while ((this = this->tparent) != NULL);
1039 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1041 struct cbq_class *cl;
1042 struct hlist_node *n;
1045 if (q->quanta[prio] == 0)
1048 for (h = 0; h < q->clhash.hashsize; h++) {
1049 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1050 /* BUGGGG... Beware! This expression suffer of
1051 arithmetic overflows!
1053 if (cl->priority == prio) {
1054 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1057 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1058 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1059 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1065 static void cbq_sync_defmap(struct cbq_class *cl)
1067 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1068 struct cbq_class *split = cl->split;
1075 for (i=0; i<=TC_PRIO_MAX; i++) {
1076 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1077 split->defaults[i] = NULL;
1080 for (i=0; i<=TC_PRIO_MAX; i++) {
1081 int level = split->level;
1083 if (split->defaults[i])
1086 for (h = 0; h < q->clhash.hashsize; h++) {
1087 struct hlist_node *n;
1088 struct cbq_class *c;
1090 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1092 if (c->split == split && c->level < level &&
1094 split->defaults[i] = c;
1102 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1104 struct cbq_class *split = NULL;
1107 if ((split = cl->split) == NULL)
1109 splitid = split->common.classid;
1112 if (split == NULL || split->common.classid != splitid) {
1113 for (split = cl->tparent; split; split = split->tparent)
1114 if (split->common.classid == splitid)
1121 if (cl->split != split) {
1123 cbq_sync_defmap(cl);
1125 cl->defmap = def&mask;
1127 cl->defmap = (cl->defmap&~mask)|(def&mask);
1129 cbq_sync_defmap(cl);
1132 static void cbq_unlink_class(struct cbq_class *this)
1134 struct cbq_class *cl, **clp;
1135 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1137 qdisc_class_hash_remove(&q->clhash, &this->common);
1139 if (this->tparent) {
1148 } while ((cl = *clp) != this->sibling);
1150 if (this->tparent->children == this) {
1151 this->tparent->children = this->sibling;
1152 if (this->sibling == this)
1153 this->tparent->children = NULL;
1156 WARN_ON(this->sibling != this);
1160 static void cbq_link_class(struct cbq_class *this)
1162 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1163 struct cbq_class *parent = this->tparent;
1165 this->sibling = this;
1166 qdisc_class_hash_insert(&q->clhash, &this->common);
1171 if (parent->children == NULL) {
1172 parent->children = this;
1174 this->sibling = parent->children->sibling;
1175 parent->children->sibling = this;
1179 static unsigned int cbq_drop(struct Qdisc* sch)
1181 struct cbq_sched_data *q = qdisc_priv(sch);
1182 struct cbq_class *cl, *cl_head;
1186 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1187 if ((cl_head = q->active[prio]) == NULL)
1192 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1195 cbq_deactivate_class(cl);
1198 } while ((cl = cl->next_alive) != cl_head);
1204 cbq_reset(struct Qdisc* sch)
1206 struct cbq_sched_data *q = qdisc_priv(sch);
1207 struct cbq_class *cl;
1208 struct hlist_node *n;
1215 q->tx_borrowed = NULL;
1216 qdisc_watchdog_cancel(&q->watchdog);
1217 hrtimer_cancel(&q->delay_timer);
1218 q->toplevel = TC_CBQ_MAXLEVEL;
1219 q->now = psched_get_time();
1222 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1223 q->active[prio] = NULL;
1225 for (h = 0; h < q->clhash.hashsize; h++) {
1226 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1229 cl->next_alive = NULL;
1230 cl->undertime = PSCHED_PASTPERFECT;
1231 cl->avgidle = cl->maxidle;
1232 cl->deficit = cl->quantum;
1233 cl->cpriority = cl->priority;
1240 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1242 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1243 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1244 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1246 if (lss->change&TCF_CBQ_LSS_EWMA)
1247 cl->ewma_log = lss->ewma_log;
1248 if (lss->change&TCF_CBQ_LSS_AVPKT)
1249 cl->avpkt = lss->avpkt;
1250 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1251 cl->minidle = -(long)lss->minidle;
1252 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1253 cl->maxidle = lss->maxidle;
1254 cl->avgidle = lss->maxidle;
1256 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1257 cl->offtime = lss->offtime;
1261 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1263 q->nclasses[cl->priority]--;
1264 q->quanta[cl->priority] -= cl->weight;
1265 cbq_normalize_quanta(q, cl->priority);
1268 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1270 q->nclasses[cl->priority]++;
1271 q->quanta[cl->priority] += cl->weight;
1272 cbq_normalize_quanta(q, cl->priority);
1275 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1277 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1280 cl->allot = wrr->allot;
1282 cl->weight = wrr->weight;
1283 if (wrr->priority) {
1284 cl->priority = wrr->priority-1;
1285 cl->cpriority = cl->priority;
1286 if (cl->priority >= cl->priority2)
1287 cl->priority2 = TC_CBQ_MAXPRIO-1;
1294 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1296 switch (ovl->strategy) {
1297 case TC_CBQ_OVL_CLASSIC:
1298 cl->overlimit = cbq_ovl_classic;
1300 case TC_CBQ_OVL_DELAY:
1301 cl->overlimit = cbq_ovl_delay;
1303 case TC_CBQ_OVL_LOWPRIO:
1304 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1305 ovl->priority2-1 <= cl->priority)
1307 cl->priority2 = ovl->priority2-1;
1308 cl->overlimit = cbq_ovl_lowprio;
1310 case TC_CBQ_OVL_DROP:
1311 cl->overlimit = cbq_ovl_drop;
1313 case TC_CBQ_OVL_RCLASSIC:
1314 cl->overlimit = cbq_ovl_rclassic;
1319 cl->penalty = ovl->penalty;
1323 #ifdef CONFIG_NET_CLS_ACT
1324 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1326 cl->police = p->police;
1328 if (cl->q->handle) {
1329 if (p->police == TC_POLICE_RECLASSIFY)
1330 cl->q->reshape_fail = cbq_reshape_fail;
1332 cl->q->reshape_fail = NULL;
1338 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1340 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1344 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1345 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1346 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1347 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1348 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1349 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1350 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1351 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1354 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1356 struct cbq_sched_data *q = qdisc_priv(sch);
1357 struct nlattr *tb[TCA_CBQ_MAX + 1];
1358 struct tc_ratespec *r;
1361 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1365 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1368 r = nla_data(tb[TCA_CBQ_RATE]);
1370 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1373 err = qdisc_class_hash_init(&q->clhash);
1378 q->link.sibling = &q->link;
1379 q->link.common.classid = sch->handle;
1380 q->link.qdisc = sch;
1381 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1384 q->link.q = &noop_qdisc;
1386 q->link.priority = TC_CBQ_MAXPRIO-1;
1387 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1388 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1389 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1390 q->link.overlimit = cbq_ovl_classic;
1391 q->link.allot = psched_mtu(qdisc_dev(sch));
1392 q->link.quantum = q->link.allot;
1393 q->link.weight = q->link.R_tab->rate.rate;
1395 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1396 q->link.avpkt = q->link.allot/2;
1397 q->link.minidle = -0x7FFFFFFF;
1399 qdisc_watchdog_init(&q->watchdog, sch);
1400 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1401 q->delay_timer.function = cbq_undelay;
1402 q->toplevel = TC_CBQ_MAXLEVEL;
1403 q->now = psched_get_time();
1406 cbq_link_class(&q->link);
1408 if (tb[TCA_CBQ_LSSOPT])
1409 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1411 cbq_addprio(q, &q->link);
1415 qdisc_put_rtab(q->link.R_tab);
1419 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1421 unsigned char *b = skb_tail_pointer(skb);
1423 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1431 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1433 unsigned char *b = skb_tail_pointer(skb);
1434 struct tc_cbq_lssopt opt;
1437 if (cl->borrow == NULL)
1438 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1439 if (cl->share == NULL)
1440 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1441 opt.ewma_log = cl->ewma_log;
1442 opt.level = cl->level;
1443 opt.avpkt = cl->avpkt;
1444 opt.maxidle = cl->maxidle;
1445 opt.minidle = (u32)(-cl->minidle);
1446 opt.offtime = cl->offtime;
1448 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1456 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1458 unsigned char *b = skb_tail_pointer(skb);
1459 struct tc_cbq_wrropt opt;
1462 opt.allot = cl->allot;
1463 opt.priority = cl->priority+1;
1464 opt.cpriority = cl->cpriority+1;
1465 opt.weight = cl->weight;
1466 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1474 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1476 unsigned char *b = skb_tail_pointer(skb);
1477 struct tc_cbq_ovl opt;
1479 opt.strategy = cl->ovl_strategy;
1480 opt.priority2 = cl->priority2+1;
1482 opt.penalty = cl->penalty;
1483 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1491 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1493 unsigned char *b = skb_tail_pointer(skb);
1494 struct tc_cbq_fopt opt;
1496 if (cl->split || cl->defmap) {
1497 opt.split = cl->split ? cl->split->common.classid : 0;
1498 opt.defmap = cl->defmap;
1500 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1509 #ifdef CONFIG_NET_CLS_ACT
1510 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1512 unsigned char *b = skb_tail_pointer(skb);
1513 struct tc_cbq_police opt;
1516 opt.police = cl->police;
1519 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1529 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1531 if (cbq_dump_lss(skb, cl) < 0 ||
1532 cbq_dump_rate(skb, cl) < 0 ||
1533 cbq_dump_wrr(skb, cl) < 0 ||
1534 cbq_dump_ovl(skb, cl) < 0 ||
1535 #ifdef CONFIG_NET_CLS_ACT
1536 cbq_dump_police(skb, cl) < 0 ||
1538 cbq_dump_fopt(skb, cl) < 0)
1543 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1545 struct cbq_sched_data *q = qdisc_priv(sch);
1546 struct nlattr *nest;
1548 nest = nla_nest_start(skb, TCA_OPTIONS);
1550 goto nla_put_failure;
1551 if (cbq_dump_attr(skb, &q->link) < 0)
1552 goto nla_put_failure;
1553 nla_nest_end(skb, nest);
1557 nla_nest_cancel(skb, nest);
1562 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1564 struct cbq_sched_data *q = qdisc_priv(sch);
1566 q->link.xstats.avgidle = q->link.avgidle;
1567 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1571 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1572 struct sk_buff *skb, struct tcmsg *tcm)
1574 struct cbq_class *cl = (struct cbq_class*)arg;
1575 struct nlattr *nest;
1578 tcm->tcm_parent = cl->tparent->common.classid;
1580 tcm->tcm_parent = TC_H_ROOT;
1581 tcm->tcm_handle = cl->common.classid;
1582 tcm->tcm_info = cl->q->handle;
1584 nest = nla_nest_start(skb, TCA_OPTIONS);
1586 goto nla_put_failure;
1587 if (cbq_dump_attr(skb, cl) < 0)
1588 goto nla_put_failure;
1589 nla_nest_end(skb, nest);
1593 nla_nest_cancel(skb, nest);
1598 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1599 struct gnet_dump *d)
1601 struct cbq_sched_data *q = qdisc_priv(sch);
1602 struct cbq_class *cl = (struct cbq_class*)arg;
1604 cl->qstats.qlen = cl->q->q.qlen;
1605 cl->xstats.avgidle = cl->avgidle;
1606 cl->xstats.undertime = 0;
1608 if (cl->undertime != PSCHED_PASTPERFECT)
1609 cl->xstats.undertime = cl->undertime - q->now;
1611 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1612 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1613 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1616 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1619 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1622 struct cbq_class *cl = (struct cbq_class*)arg;
1626 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1628 cl->common.classid);
1632 #ifdef CONFIG_NET_CLS_ACT
1633 if (cl->police == TC_POLICE_RECLASSIFY)
1634 new->reshape_fail = cbq_reshape_fail;
1638 *old = xchg(&cl->q, new);
1639 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1641 sch_tree_unlock(sch);
1648 static struct Qdisc *
1649 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1651 struct cbq_class *cl = (struct cbq_class*)arg;
1653 return cl ? cl->q : NULL;
1656 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1658 struct cbq_class *cl = (struct cbq_class *)arg;
1660 if (cl->q->q.qlen == 0)
1661 cbq_deactivate_class(cl);
1664 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1666 struct cbq_sched_data *q = qdisc_priv(sch);
1667 struct cbq_class *cl = cbq_class_lookup(q, classid);
1671 return (unsigned long)cl;
1676 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1678 struct cbq_sched_data *q = qdisc_priv(sch);
1680 WARN_ON(cl->filters);
1682 tcf_destroy_chain(&cl->filter_list);
1683 qdisc_destroy(cl->q);
1684 qdisc_put_rtab(cl->R_tab);
1685 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1691 cbq_destroy(struct Qdisc* sch)
1693 struct cbq_sched_data *q = qdisc_priv(sch);
1694 struct hlist_node *n, *next;
1695 struct cbq_class *cl;
1698 #ifdef CONFIG_NET_CLS_ACT
1702 * Filters must be destroyed first because we don't destroy the
1703 * classes from root to leafs which means that filters can still
1704 * be bound to classes which have been destroyed already. --TGR '04
1706 for (h = 0; h < q->clhash.hashsize; h++) {
1707 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1708 tcf_destroy_chain(&cl->filter_list);
1710 for (h = 0; h < q->clhash.hashsize; h++) {
1711 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1713 cbq_destroy_class(sch, cl);
1715 qdisc_class_hash_destroy(&q->clhash);
1718 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1720 struct cbq_class *cl = (struct cbq_class*)arg;
1722 if (--cl->refcnt == 0) {
1723 #ifdef CONFIG_NET_CLS_ACT
1724 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1725 struct cbq_sched_data *q = qdisc_priv(sch);
1727 spin_lock_bh(root_lock);
1728 if (q->rx_class == cl)
1730 spin_unlock_bh(root_lock);
1733 cbq_destroy_class(sch, cl);
1738 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1742 struct cbq_sched_data *q = qdisc_priv(sch);
1743 struct cbq_class *cl = (struct cbq_class*)*arg;
1744 struct nlattr *opt = tca[TCA_OPTIONS];
1745 struct nlattr *tb[TCA_CBQ_MAX + 1];
1746 struct cbq_class *parent;
1747 struct qdisc_rate_table *rtab = NULL;
1752 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1760 cl->tparent->common.classid != parentid)
1762 if (!cl->tparent && parentid != TC_H_ROOT)
1766 if (tb[TCA_CBQ_RATE]) {
1767 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1772 /* Change class parameters */
1775 if (cl->next_alive != NULL)
1776 cbq_deactivate_class(cl);
1779 rtab = xchg(&cl->R_tab, rtab);
1780 qdisc_put_rtab(rtab);
1783 if (tb[TCA_CBQ_LSSOPT])
1784 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1786 if (tb[TCA_CBQ_WRROPT]) {
1788 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1791 if (tb[TCA_CBQ_OVL_STRATEGY])
1792 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1794 #ifdef CONFIG_NET_CLS_ACT
1795 if (tb[TCA_CBQ_POLICE])
1796 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1799 if (tb[TCA_CBQ_FOPT])
1800 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1803 cbq_activate_class(cl);
1805 sch_tree_unlock(sch);
1808 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1809 qdisc_root_sleeping_lock(sch),
1814 if (parentid == TC_H_ROOT)
1817 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1818 tb[TCA_CBQ_LSSOPT] == NULL)
1821 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1827 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1831 classid = TC_H_MAKE(sch->handle,0x8000);
1833 for (i=0; i<0x8000; i++) {
1834 if (++q->hgenerator >= 0x8000)
1836 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1842 classid = classid|q->hgenerator;
1847 parent = cbq_class_lookup(q, parentid);
1854 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1860 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1861 &pfifo_qdisc_ops, classid)))
1862 cl->q = &noop_qdisc;
1863 cl->common.classid = classid;
1864 cl->tparent = parent;
1866 cl->allot = parent->allot;
1867 cl->quantum = cl->allot;
1868 cl->weight = cl->R_tab->rate.rate;
1872 cl->borrow = cl->tparent;
1873 if (cl->tparent != &q->link)
1874 cl->share = cl->tparent;
1875 cbq_adjust_levels(parent);
1876 cl->minidle = -0x7FFFFFFF;
1877 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1878 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1879 if (cl->ewma_log==0)
1880 cl->ewma_log = q->link.ewma_log;
1882 cl->maxidle = q->link.maxidle;
1884 cl->avpkt = q->link.avpkt;
1885 cl->overlimit = cbq_ovl_classic;
1886 if (tb[TCA_CBQ_OVL_STRATEGY])
1887 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1888 #ifdef CONFIG_NET_CLS_ACT
1889 if (tb[TCA_CBQ_POLICE])
1890 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1892 if (tb[TCA_CBQ_FOPT])
1893 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1894 sch_tree_unlock(sch);
1896 qdisc_class_hash_grow(sch, &q->clhash);
1899 gen_new_estimator(&cl->bstats, &cl->rate_est,
1900 qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
1902 *arg = (unsigned long)cl;
1906 qdisc_put_rtab(rtab);
1910 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1912 struct cbq_sched_data *q = qdisc_priv(sch);
1913 struct cbq_class *cl = (struct cbq_class*)arg;
1916 if (cl->filters || cl->children || cl == &q->link)
1921 qlen = cl->q->q.qlen;
1923 qdisc_tree_decrease_qlen(cl->q, qlen);
1926 cbq_deactivate_class(cl);
1928 if (q->tx_borrowed == cl)
1929 q->tx_borrowed = q->tx_class;
1930 if (q->tx_class == cl) {
1932 q->tx_borrowed = NULL;
1934 #ifdef CONFIG_NET_CLS_ACT
1935 if (q->rx_class == cl)
1939 cbq_unlink_class(cl);
1940 cbq_adjust_levels(cl->tparent);
1942 cbq_sync_defmap(cl);
1945 sch_tree_unlock(sch);
1947 if (--cl->refcnt == 0)
1948 cbq_destroy_class(sch, cl);
1953 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1955 struct cbq_sched_data *q = qdisc_priv(sch);
1956 struct cbq_class *cl = (struct cbq_class *)arg;
1961 return &cl->filter_list;
1964 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1967 struct cbq_sched_data *q = qdisc_priv(sch);
1968 struct cbq_class *p = (struct cbq_class*)parent;
1969 struct cbq_class *cl = cbq_class_lookup(q, classid);
1972 if (p && p->level <= cl->level)
1975 return (unsigned long)cl;
1980 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1982 struct cbq_class *cl = (struct cbq_class*)arg;
1987 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1989 struct cbq_sched_data *q = qdisc_priv(sch);
1990 struct cbq_class *cl;
1991 struct hlist_node *n;
1997 for (h = 0; h < q->clhash.hashsize; h++) {
1998 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1999 if (arg->count < arg->skip) {
2003 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2012 static const struct Qdisc_class_ops cbq_class_ops = {
2015 .qlen_notify = cbq_qlen_notify,
2018 .change = cbq_change_class,
2019 .delete = cbq_delete,
2021 .tcf_chain = cbq_find_tcf,
2022 .bind_tcf = cbq_bind_filter,
2023 .unbind_tcf = cbq_unbind_filter,
2024 .dump = cbq_dump_class,
2025 .dump_stats = cbq_dump_class_stats,
2028 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2030 .cl_ops = &cbq_class_ops,
2032 .priv_size = sizeof(struct cbq_sched_data),
2033 .enqueue = cbq_enqueue,
2034 .dequeue = cbq_dequeue,
2035 .peek = qdisc_peek_dequeued,
2039 .destroy = cbq_destroy,
2042 .dump_stats = cbq_dump_stats,
2043 .owner = THIS_MODULE,
2046 static int __init cbq_module_init(void)
2048 return register_qdisc(&cbq_qdisc_ops);
2050 static void __exit cbq_module_exit(void)
2052 unregister_qdisc(&cbq_qdisc_ops);
2054 module_init(cbq_module_init)
2055 module_exit(cbq_module_exit)
2056 MODULE_LICENSE("GPL");