2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
23 /* Class-Based Queueing (CBQ) algorithm.
24 =======================================
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27 Management Models for Packet Networks",
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
38 -----------------------------------------------------------------------
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
45 --- The WRR algorithm is different. Our version looks more
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
71 struct cbq_sched_data;
76 struct cbq_class *next; /* hash table link */
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
81 unsigned char priority; /* class priority */
82 unsigned char priority2; /* priority to be used after overlimit */
83 unsigned char ewma_log; /* time constant for idle time calculation */
84 unsigned char ovl_strategy;
85 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
91 /* Link-sharing scheduler parameters */
92 long maxidle; /* Class parameters: see below. */
96 struct qdisc_rate_table *R_tab;
98 /* Overlimit strategy parameters */
99 void (*overlimit)(struct cbq_class *cl);
100 psched_tdiff_t penalty;
102 /* General scheduler (WRR) parameters */
104 long quantum; /* Allotment per WRR round */
105 long weight; /* Relative allotment: see below */
107 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
108 struct cbq_class *split; /* Ptr to split node */
109 struct cbq_class *share; /* Ptr to LS parent in the class tree */
110 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
111 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
113 struct cbq_class *sibling; /* Sibling chain */
114 struct cbq_class *children; /* Pointer to children chain */
116 struct Qdisc *q; /* Elementary queueing discipline */
120 unsigned char cpriority; /* Effective priority */
121 unsigned char delayed;
122 unsigned char level; /* level of the class in hierarchy:
123 0 for leaf classes, and maximal
124 level of children + 1 for nodes.
127 psched_time_t last; /* Last end of service */
128 psched_time_t undertime;
130 long deficit; /* Saved deficit for WRR */
131 psched_time_t penalized;
132 struct gnet_stats_basic bstats;
133 struct gnet_stats_queue qstats;
134 struct gnet_stats_rate_est rate_est;
135 struct tc_cbq_xstats xstats;
137 struct tcf_proto *filter_list;
142 struct cbq_class *defaults[TC_PRIO_MAX+1];
145 struct cbq_sched_data
147 struct cbq_class *classes[16]; /* Hash table of all classes */
148 int nclasses[TC_CBQ_MAXPRIO+1];
149 unsigned quanta[TC_CBQ_MAXPRIO+1];
151 struct cbq_class link;
154 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
157 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
158 struct cbq_class *rx_class;
160 struct cbq_class *tx_class;
161 struct cbq_class *tx_borrowed;
163 psched_time_t now; /* Cached timestamp */
164 psched_time_t now_rt; /* Cached real time */
167 struct hrtimer delay_timer;
168 struct qdisc_watchdog watchdog; /* Watchdog timer,
172 psched_tdiff_t wd_expires;
178 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
181 static __inline__ unsigned cbq_hash(u32 h)
188 static __inline__ struct cbq_class *
189 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
191 struct cbq_class *cl;
193 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
194 if (cl->classid == classid)
199 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
201 static struct cbq_class *
202 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
204 struct cbq_class *cl, *new;
206 for (cl = this->tparent; cl; cl = cl->tparent)
207 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
215 /* Classify packet. The procedure is pretty complicated, but
216 it allows us to combine link sharing and priority scheduling
219 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
220 so that it resolves to split nodes. Then packets are classified
221 by logical priority, or a more specific classifier may be attached
225 static struct cbq_class *
226 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
228 struct cbq_sched_data *q = qdisc_priv(sch);
229 struct cbq_class *head = &q->link;
230 struct cbq_class **defmap;
231 struct cbq_class *cl = NULL;
232 u32 prio = skb->priority;
233 struct tcf_result res;
236 * Step 1. If skb->priority points to one of our classes, use it.
238 if (TC_H_MAJ(prio^sch->handle) == 0 &&
239 (cl = cbq_class_lookup(q, prio)) != NULL)
242 *qerr = NET_XMIT_BYPASS;
245 defmap = head->defaults;
248 * Step 2+n. Apply classifier.
250 if (!head->filter_list ||
251 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
254 if ((cl = (void*)res.class) == NULL) {
255 if (TC_H_MAJ(res.classid))
256 cl = cbq_class_lookup(q, res.classid);
257 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
258 cl = defmap[TC_PRIO_BESTEFFORT];
260 if (cl == NULL || cl->level >= head->level)
264 #ifdef CONFIG_NET_CLS_ACT
268 *qerr = NET_XMIT_SUCCESS;
271 case TC_ACT_RECLASSIFY:
272 return cbq_reclassify(skb, cl);
274 #elif defined(CONFIG_NET_CLS_POLICE)
276 case TC_POLICE_RECLASSIFY:
277 return cbq_reclassify(skb, cl);
288 * Step 3+n. If classifier selected a link sharing class,
289 * apply agency specific classifier.
290 * Repeat this procdure until we hit a leaf node.
299 * Step 4. No success...
301 if (TC_H_MAJ(prio) == 0 &&
302 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
303 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
310 A packet has just been enqueued on the empty class.
311 cbq_activate_class adds it to the tail of active class list
312 of its priority band.
315 static __inline__ void cbq_activate_class(struct cbq_class *cl)
317 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
318 int prio = cl->cpriority;
319 struct cbq_class *cl_tail;
321 cl_tail = q->active[prio];
322 q->active[prio] = cl;
324 if (cl_tail != NULL) {
325 cl->next_alive = cl_tail->next_alive;
326 cl_tail->next_alive = cl;
329 q->activemask |= (1<<prio);
334 Unlink class from active chain.
335 Note that this same procedure is done directly in cbq_dequeue*
336 during round-robin procedure.
339 static void cbq_deactivate_class(struct cbq_class *this)
341 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
342 int prio = this->cpriority;
343 struct cbq_class *cl;
344 struct cbq_class *cl_prev = q->active[prio];
347 cl = cl_prev->next_alive;
349 cl_prev->next_alive = cl->next_alive;
350 cl->next_alive = NULL;
352 if (cl == q->active[prio]) {
353 q->active[prio] = cl_prev;
354 if (cl == q->active[prio]) {
355 q->active[prio] = NULL;
356 q->activemask &= ~(1<<prio);
362 } while ((cl_prev = cl) != q->active[prio]);
366 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
368 int toplevel = q->toplevel;
370 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
374 now = psched_get_time();
375 incr = now - q->now_rt;
379 if (cl->undertime < now) {
380 q->toplevel = cl->level;
383 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
388 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
390 struct cbq_sched_data *q = qdisc_priv(sch);
393 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
395 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
399 if (ret == NET_XMIT_BYPASS)
405 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
406 cl->q->__parent = sch;
408 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
410 sch->bstats.packets++;
411 sch->bstats.bytes+=len;
412 cbq_mark_toplevel(q, cl);
414 cbq_activate_class(cl);
419 cbq_mark_toplevel(q, cl);
425 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
427 struct cbq_sched_data *q = qdisc_priv(sch);
428 struct cbq_class *cl;
431 if ((cl = q->tx_class) == NULL) {
438 cbq_mark_toplevel(q, cl);
440 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
442 cl->q->__parent = sch;
444 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
446 sch->qstats.requeues++;
448 cbq_activate_class(cl);
456 /* Overlimit actions */
458 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
460 static void cbq_ovl_classic(struct cbq_class *cl)
462 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
463 psched_tdiff_t delay = cl->undertime - q->now;
466 delay += cl->offtime;
469 Class goes to sleep, so that it will have no
470 chance to work avgidle. Let's forgive it 8)
472 BTW cbq-2.0 has a crap in this
473 place, apparently they forgot to shift it by cl->ewma_log.
476 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
477 if (cl->avgidle < cl->minidle)
478 cl->avgidle = cl->minidle;
481 cl->undertime = q->now + delay;
483 cl->xstats.overactions++;
486 if (q->wd_expires == 0 || q->wd_expires > delay)
487 q->wd_expires = delay;
489 /* Dirty work! We must schedule wakeups based on
490 real available rate, rather than leaf rate,
491 which may be tiny (even zero).
493 if (q->toplevel == TC_CBQ_MAXLEVEL) {
495 psched_tdiff_t base_delay = q->wd_expires;
497 for (b = cl->borrow; b; b = b->borrow) {
498 delay = b->undertime - q->now;
499 if (delay < base_delay) {
506 q->wd_expires = base_delay;
510 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
514 static void cbq_ovl_rclassic(struct cbq_class *cl)
516 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
517 struct cbq_class *this = cl;
520 if (cl->level > q->toplevel) {
524 } while ((cl = cl->borrow) != NULL);
531 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
533 static void cbq_ovl_delay(struct cbq_class *cl)
535 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
536 psched_tdiff_t delay = cl->undertime - q->now;
539 psched_time_t sched = q->now;
542 delay += cl->offtime;
544 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
545 if (cl->avgidle < cl->minidle)
546 cl->avgidle = cl->minidle;
547 cl->undertime = q->now + delay;
550 sched += delay + cl->penalty;
551 cl->penalized = sched;
552 cl->cpriority = TC_CBQ_MAXPRIO;
553 q->pmask |= (1<<TC_CBQ_MAXPRIO);
555 expires = ktime_set(0, 0);
556 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
557 if (hrtimer_try_to_cancel(&q->delay_timer) &&
558 ktime_to_ns(ktime_sub(q->delay_timer.expires,
560 q->delay_timer.expires = expires;
561 hrtimer_restart(&q->delay_timer);
563 cl->xstats.overactions++;
568 if (q->wd_expires == 0 || q->wd_expires > delay)
569 q->wd_expires = delay;
572 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
574 static void cbq_ovl_lowprio(struct cbq_class *cl)
576 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
578 cl->penalized = q->now + cl->penalty;
580 if (cl->cpriority != cl->priority2) {
581 cl->cpriority = cl->priority2;
582 q->pmask |= (1<<cl->cpriority);
583 cl->xstats.overactions++;
588 /* TC_CBQ_OVL_DROP: penalize class by dropping */
590 static void cbq_ovl_drop(struct cbq_class *cl)
592 if (cl->q->ops->drop)
593 if (cl->q->ops->drop(cl->q))
595 cl->xstats.overactions++;
599 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
602 struct cbq_class *cl;
603 struct cbq_class *cl_prev = q->active[prio];
604 psched_time_t sched = now;
610 cl = cl_prev->next_alive;
611 if (now - cl->penalized > 0) {
612 cl_prev->next_alive = cl->next_alive;
613 cl->next_alive = NULL;
614 cl->cpriority = cl->priority;
616 cbq_activate_class(cl);
618 if (cl == q->active[prio]) {
619 q->active[prio] = cl_prev;
620 if (cl == q->active[prio]) {
621 q->active[prio] = NULL;
626 cl = cl_prev->next_alive;
627 } else if (sched - cl->penalized > 0)
628 sched = cl->penalized;
629 } while ((cl_prev = cl) != q->active[prio]);
634 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
636 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
638 struct Qdisc *sch = q->watchdog.qdisc;
640 psched_tdiff_t delay = 0;
643 now = psched_get_time();
649 int prio = ffz(~pmask);
654 tmp = cbq_undelay_prio(q, prio, now);
657 if (tmp < delay || delay == 0)
665 time = ktime_set(0, 0);
666 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
667 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
670 sch->flags &= ~TCQ_F_THROTTLED;
671 netif_schedule(sch->dev);
672 return HRTIMER_NORESTART;
676 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
678 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
681 struct Qdisc *sch = child->__parent;
682 struct cbq_sched_data *q = qdisc_priv(sch);
683 struct cbq_class *cl = q->rx_class;
687 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
689 cbq_mark_toplevel(q, cl);
692 cl->q->__parent = sch;
694 if (cl->q->enqueue(skb, cl->q) == 0) {
696 sch->bstats.packets++;
697 sch->bstats.bytes+=len;
699 cbq_activate_class(cl);
712 It is mission critical procedure.
714 We "regenerate" toplevel cutoff, if transmitting class
715 has backlog and it is not regulated. It is not part of
716 original CBQ description, but looks more reasonable.
717 Probably, it is wrong. This question needs further investigation.
720 static __inline__ void
721 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
722 struct cbq_class *borrowed)
724 if (cl && q->toplevel >= borrowed->level) {
725 if (cl->q->q.qlen > 1) {
727 if (borrowed->undertime == PSCHED_PASTPERFECT) {
728 q->toplevel = borrowed->level;
731 } while ((borrowed=borrowed->borrow) != NULL);
734 /* It is not necessary now. Uncommenting it
735 will save CPU cycles, but decrease fairness.
737 q->toplevel = TC_CBQ_MAXLEVEL;
743 cbq_update(struct cbq_sched_data *q)
745 struct cbq_class *this = q->tx_class;
746 struct cbq_class *cl = this;
751 for ( ; cl; cl = cl->share) {
752 long avgidle = cl->avgidle;
755 cl->bstats.packets++;
756 cl->bstats.bytes += len;
759 (now - last) is total time between packet right edges.
760 (last_pktlen/rate) is "virtual" busy time, so that
762 idle = (now - last) - last_pktlen/rate
765 idle = q->now - cl->last;
766 if ((unsigned long)idle > 128*1024*1024) {
767 avgidle = cl->maxidle;
769 idle -= L2T(cl, len);
771 /* true_avgidle := (1-W)*true_avgidle + W*idle,
772 where W=2^{-ewma_log}. But cl->avgidle is scaled:
773 cl->avgidle == true_avgidle/W,
776 avgidle += idle - (avgidle>>cl->ewma_log);
780 /* Overlimit or at-limit */
782 if (avgidle < cl->minidle)
783 avgidle = cl->minidle;
785 cl->avgidle = avgidle;
787 /* Calculate expected time, when this class
788 will be allowed to send.
790 (1-W)*true_avgidle + W*delay = 0, i.e.
791 idle = (1/W - 1)*(-true_avgidle)
793 idle = (1 - W)*(-cl->avgidle);
795 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
799 To maintain the rate allocated to the class,
800 we add to undertime virtual clock,
801 necessary to complete transmitted packet.
802 (len/phys_bandwidth has been already passed
803 to the moment of cbq_update)
806 idle -= L2T(&q->link, len);
807 idle += L2T(cl, len);
809 cl->undertime = q->now + idle;
813 cl->undertime = PSCHED_PASTPERFECT;
814 if (avgidle > cl->maxidle)
815 cl->avgidle = cl->maxidle;
817 cl->avgidle = avgidle;
822 cbq_update_toplevel(q, this, q->tx_borrowed);
825 static __inline__ struct cbq_class *
826 cbq_under_limit(struct cbq_class *cl)
828 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
829 struct cbq_class *this_cl = cl;
831 if (cl->tparent == NULL)
834 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
840 /* It is very suspicious place. Now overlimit
841 action is generated for not bounded classes
842 only if link is completely congested.
843 Though it is in agree with ancestor-only paradigm,
844 it looks very stupid. Particularly,
845 it means that this chunk of code will either
846 never be called or result in strong amplification
847 of burstiness. Dangerous, silly, and, however,
848 no another solution exists.
850 if ((cl = cl->borrow) == NULL) {
851 this_cl->qstats.overlimits++;
852 this_cl->overlimit(this_cl);
855 if (cl->level > q->toplevel)
857 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
863 static __inline__ struct sk_buff *
864 cbq_dequeue_prio(struct Qdisc *sch, int prio)
866 struct cbq_sched_data *q = qdisc_priv(sch);
867 struct cbq_class *cl_tail, *cl_prev, *cl;
871 cl_tail = cl_prev = q->active[prio];
872 cl = cl_prev->next_alive;
879 struct cbq_class *borrow = cl;
882 (borrow = cbq_under_limit(cl)) == NULL)
885 if (cl->deficit <= 0) {
886 /* Class exhausted its allotment per
887 this round. Switch to the next one.
890 cl->deficit += cl->quantum;
894 skb = cl->q->dequeue(cl->q);
896 /* Class did not give us any skb :-(
897 It could occur even if cl->q->q.qlen != 0
898 f.e. if cl->q == "tbf"
903 cl->deficit -= skb->len;
905 q->tx_borrowed = borrow;
907 #ifndef CBQ_XSTATS_BORROWS_BYTES
908 borrow->xstats.borrows++;
909 cl->xstats.borrows++;
911 borrow->xstats.borrows += skb->len;
912 cl->xstats.borrows += skb->len;
915 q->tx_len = skb->len;
917 if (cl->deficit <= 0) {
918 q->active[prio] = cl;
920 cl->deficit += cl->quantum;
925 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
926 /* Class is empty or penalized.
927 Unlink it from active chain.
929 cl_prev->next_alive = cl->next_alive;
930 cl->next_alive = NULL;
932 /* Did cl_tail point to it? */
937 /* Was it the last class in this band? */
940 q->active[prio] = NULL;
941 q->activemask &= ~(1<<prio);
943 cbq_activate_class(cl);
947 q->active[prio] = cl_tail;
950 cbq_activate_class(cl);
958 } while (cl_prev != cl_tail);
961 q->active[prio] = cl_prev;
966 static __inline__ struct sk_buff *
967 cbq_dequeue_1(struct Qdisc *sch)
969 struct cbq_sched_data *q = qdisc_priv(sch);
973 activemask = q->activemask&0xFF;
975 int prio = ffz(~activemask);
976 activemask &= ~(1<<prio);
977 skb = cbq_dequeue_prio(sch, prio);
984 static struct sk_buff *
985 cbq_dequeue(struct Qdisc *sch)
988 struct cbq_sched_data *q = qdisc_priv(sch);
992 now = psched_get_time();
993 incr = now - q->now_rt;
996 psched_tdiff_t incr2;
997 /* Time integrator. We calculate EOS time
998 by adding expected packet transmission time.
999 If real time is greater, we warp artificial clock,
1002 cbq_time = max(real_time, work);
1004 incr2 = L2T(&q->link, q->tx_len);
1007 if ((incr -= incr2) < 0)
1016 skb = cbq_dequeue_1(sch);
1019 sch->flags &= ~TCQ_F_THROTTLED;
1023 /* All the classes are overlimit.
1027 1. Scheduler is empty.
1028 2. Toplevel cutoff inhibited borrowing.
1029 3. Root class is overlimit.
1031 Reset 2d and 3d conditions and retry.
1033 Note, that NS and cbq-2.0 are buggy, peeking
1034 an arbitrary class is appropriate for ancestor-only
1035 sharing, but not for toplevel algorithm.
1037 Our version is better, but slower, because it requires
1038 two passes, but it is unavoidable with top-level sharing.
1041 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1042 q->link.undertime == PSCHED_PASTPERFECT)
1045 q->toplevel = TC_CBQ_MAXLEVEL;
1046 q->link.undertime = PSCHED_PASTPERFECT;
1049 /* No packets in scheduler or nobody wants to give them to us :-(
1050 Sigh... start watchdog timer in the last case. */
1053 sch->qstats.overlimits++;
1055 qdisc_watchdog_schedule(&q->watchdog,
1056 now + q->wd_expires);
1061 /* CBQ class maintanance routines */
1063 static void cbq_adjust_levels(struct cbq_class *this)
1070 struct cbq_class *cl;
1072 if ((cl = this->children) != NULL) {
1074 if (cl->level > level)
1076 } while ((cl = cl->sibling) != this->children);
1078 this->level = level+1;
1079 } while ((this = this->tparent) != NULL);
1082 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1084 struct cbq_class *cl;
1087 if (q->quanta[prio] == 0)
1090 for (h=0; h<16; h++) {
1091 for (cl = q->classes[h]; cl; cl = cl->next) {
1092 /* BUGGGG... Beware! This expression suffer of
1093 arithmetic overflows!
1095 if (cl->priority == prio) {
1096 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1099 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1100 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1101 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1107 static void cbq_sync_defmap(struct cbq_class *cl)
1109 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1110 struct cbq_class *split = cl->split;
1117 for (i=0; i<=TC_PRIO_MAX; i++) {
1118 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1119 split->defaults[i] = NULL;
1122 for (i=0; i<=TC_PRIO_MAX; i++) {
1123 int level = split->level;
1125 if (split->defaults[i])
1128 for (h=0; h<16; h++) {
1129 struct cbq_class *c;
1131 for (c = q->classes[h]; c; c = c->next) {
1132 if (c->split == split && c->level < level &&
1134 split->defaults[i] = c;
1142 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1144 struct cbq_class *split = NULL;
1147 if ((split = cl->split) == NULL)
1149 splitid = split->classid;
1152 if (split == NULL || split->classid != splitid) {
1153 for (split = cl->tparent; split; split = split->tparent)
1154 if (split->classid == splitid)
1161 if (cl->split != split) {
1163 cbq_sync_defmap(cl);
1165 cl->defmap = def&mask;
1167 cl->defmap = (cl->defmap&~mask)|(def&mask);
1169 cbq_sync_defmap(cl);
1172 static void cbq_unlink_class(struct cbq_class *this)
1174 struct cbq_class *cl, **clp;
1175 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1177 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1185 if (this->tparent) {
1194 } while ((cl = *clp) != this->sibling);
1196 if (this->tparent->children == this) {
1197 this->tparent->children = this->sibling;
1198 if (this->sibling == this)
1199 this->tparent->children = NULL;
1202 BUG_TRAP(this->sibling == this);
1206 static void cbq_link_class(struct cbq_class *this)
1208 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1209 unsigned h = cbq_hash(this->classid);
1210 struct cbq_class *parent = this->tparent;
1212 this->sibling = this;
1213 this->next = q->classes[h];
1214 q->classes[h] = this;
1219 if (parent->children == NULL) {
1220 parent->children = this;
1222 this->sibling = parent->children->sibling;
1223 parent->children->sibling = this;
1227 static unsigned int cbq_drop(struct Qdisc* sch)
1229 struct cbq_sched_data *q = qdisc_priv(sch);
1230 struct cbq_class *cl, *cl_head;
1234 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1235 if ((cl_head = q->active[prio]) == NULL)
1240 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1243 cbq_deactivate_class(cl);
1246 } while ((cl = cl->next_alive) != cl_head);
1252 cbq_reset(struct Qdisc* sch)
1254 struct cbq_sched_data *q = qdisc_priv(sch);
1255 struct cbq_class *cl;
1262 q->tx_borrowed = NULL;
1263 qdisc_watchdog_cancel(&q->watchdog);
1264 hrtimer_cancel(&q->delay_timer);
1265 q->toplevel = TC_CBQ_MAXLEVEL;
1266 q->now = psched_get_time();
1269 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1270 q->active[prio] = NULL;
1272 for (h = 0; h < 16; h++) {
1273 for (cl = q->classes[h]; cl; cl = cl->next) {
1276 cl->next_alive = NULL;
1277 cl->undertime = PSCHED_PASTPERFECT;
1278 cl->avgidle = cl->maxidle;
1279 cl->deficit = cl->quantum;
1280 cl->cpriority = cl->priority;
1287 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1289 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1290 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1291 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1293 if (lss->change&TCF_CBQ_LSS_EWMA)
1294 cl->ewma_log = lss->ewma_log;
1295 if (lss->change&TCF_CBQ_LSS_AVPKT)
1296 cl->avpkt = lss->avpkt;
1297 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1298 cl->minidle = -(long)lss->minidle;
1299 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1300 cl->maxidle = lss->maxidle;
1301 cl->avgidle = lss->maxidle;
1303 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1304 cl->offtime = lss->offtime;
1308 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1310 q->nclasses[cl->priority]--;
1311 q->quanta[cl->priority] -= cl->weight;
1312 cbq_normalize_quanta(q, cl->priority);
1315 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1317 q->nclasses[cl->priority]++;
1318 q->quanta[cl->priority] += cl->weight;
1319 cbq_normalize_quanta(q, cl->priority);
1322 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1324 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1327 cl->allot = wrr->allot;
1329 cl->weight = wrr->weight;
1330 if (wrr->priority) {
1331 cl->priority = wrr->priority-1;
1332 cl->cpriority = cl->priority;
1333 if (cl->priority >= cl->priority2)
1334 cl->priority2 = TC_CBQ_MAXPRIO-1;
1341 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1343 switch (ovl->strategy) {
1344 case TC_CBQ_OVL_CLASSIC:
1345 cl->overlimit = cbq_ovl_classic;
1347 case TC_CBQ_OVL_DELAY:
1348 cl->overlimit = cbq_ovl_delay;
1350 case TC_CBQ_OVL_LOWPRIO:
1351 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1352 ovl->priority2-1 <= cl->priority)
1354 cl->priority2 = ovl->priority2-1;
1355 cl->overlimit = cbq_ovl_lowprio;
1357 case TC_CBQ_OVL_DROP:
1358 cl->overlimit = cbq_ovl_drop;
1360 case TC_CBQ_OVL_RCLASSIC:
1361 cl->overlimit = cbq_ovl_rclassic;
1366 cl->penalty = ovl->penalty;
1370 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1371 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1373 cl->police = p->police;
1375 if (cl->q->handle) {
1376 if (p->police == TC_POLICE_RECLASSIFY)
1377 cl->q->reshape_fail = cbq_reshape_fail;
1379 cl->q->reshape_fail = NULL;
1385 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1387 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1391 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1393 struct cbq_sched_data *q = qdisc_priv(sch);
1394 struct rtattr *tb[TCA_CBQ_MAX];
1395 struct tc_ratespec *r;
1397 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1398 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1399 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1402 if (tb[TCA_CBQ_LSSOPT-1] &&
1403 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1406 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1408 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1412 q->link.sibling = &q->link;
1413 q->link.classid = sch->handle;
1414 q->link.qdisc = sch;
1415 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1417 q->link.q = &noop_qdisc;
1419 q->link.priority = TC_CBQ_MAXPRIO-1;
1420 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1421 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1422 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1423 q->link.overlimit = cbq_ovl_classic;
1424 q->link.allot = psched_mtu(sch->dev);
1425 q->link.quantum = q->link.allot;
1426 q->link.weight = q->link.R_tab->rate.rate;
1428 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1429 q->link.avpkt = q->link.allot/2;
1430 q->link.minidle = -0x7FFFFFFF;
1432 qdisc_watchdog_init(&q->watchdog, sch);
1433 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1434 q->delay_timer.function = cbq_undelay;
1435 q->toplevel = TC_CBQ_MAXLEVEL;
1436 q->now = psched_get_time();
1439 cbq_link_class(&q->link);
1441 if (tb[TCA_CBQ_LSSOPT-1])
1442 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1444 cbq_addprio(q, &q->link);
1448 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1450 unsigned char *b = skb_tail_pointer(skb);
1452 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1460 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1462 unsigned char *b = skb_tail_pointer(skb);
1463 struct tc_cbq_lssopt opt;
1466 if (cl->borrow == NULL)
1467 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1468 if (cl->share == NULL)
1469 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1470 opt.ewma_log = cl->ewma_log;
1471 opt.level = cl->level;
1472 opt.avpkt = cl->avpkt;
1473 opt.maxidle = cl->maxidle;
1474 opt.minidle = (u32)(-cl->minidle);
1475 opt.offtime = cl->offtime;
1477 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1485 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1487 unsigned char *b = skb_tail_pointer(skb);
1488 struct tc_cbq_wrropt opt;
1491 opt.allot = cl->allot;
1492 opt.priority = cl->priority+1;
1493 opt.cpriority = cl->cpriority+1;
1494 opt.weight = cl->weight;
1495 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1503 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1505 unsigned char *b = skb_tail_pointer(skb);
1506 struct tc_cbq_ovl opt;
1508 opt.strategy = cl->ovl_strategy;
1509 opt.priority2 = cl->priority2+1;
1511 opt.penalty = cl->penalty;
1512 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1520 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1522 unsigned char *b = skb_tail_pointer(skb);
1523 struct tc_cbq_fopt opt;
1525 if (cl->split || cl->defmap) {
1526 opt.split = cl->split ? cl->split->classid : 0;
1527 opt.defmap = cl->defmap;
1529 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1538 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1539 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1541 unsigned char *b = skb_tail_pointer(skb);
1542 struct tc_cbq_police opt;
1545 opt.police = cl->police;
1548 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1558 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1560 if (cbq_dump_lss(skb, cl) < 0 ||
1561 cbq_dump_rate(skb, cl) < 0 ||
1562 cbq_dump_wrr(skb, cl) < 0 ||
1563 cbq_dump_ovl(skb, cl) < 0 ||
1564 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1565 cbq_dump_police(skb, cl) < 0 ||
1567 cbq_dump_fopt(skb, cl) < 0)
1572 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1574 struct cbq_sched_data *q = qdisc_priv(sch);
1575 unsigned char *b = skb_tail_pointer(skb);
1578 rta = (struct rtattr*)b;
1579 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1580 if (cbq_dump_attr(skb, &q->link) < 0)
1581 goto rtattr_failure;
1582 rta->rta_len = skb_tail_pointer(skb) - b;
1591 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1593 struct cbq_sched_data *q = qdisc_priv(sch);
1595 q->link.xstats.avgidle = q->link.avgidle;
1596 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1600 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1601 struct sk_buff *skb, struct tcmsg *tcm)
1603 struct cbq_class *cl = (struct cbq_class*)arg;
1604 unsigned char *b = skb_tail_pointer(skb);
1608 tcm->tcm_parent = cl->tparent->classid;
1610 tcm->tcm_parent = TC_H_ROOT;
1611 tcm->tcm_handle = cl->classid;
1612 tcm->tcm_info = cl->q->handle;
1614 rta = (struct rtattr*)b;
1615 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1616 if (cbq_dump_attr(skb, cl) < 0)
1617 goto rtattr_failure;
1618 rta->rta_len = skb_tail_pointer(skb) - b;
1627 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1628 struct gnet_dump *d)
1630 struct cbq_sched_data *q = qdisc_priv(sch);
1631 struct cbq_class *cl = (struct cbq_class*)arg;
1633 cl->qstats.qlen = cl->q->q.qlen;
1634 cl->xstats.avgidle = cl->avgidle;
1635 cl->xstats.undertime = 0;
1637 if (cl->undertime != PSCHED_PASTPERFECT)
1638 cl->xstats.undertime = cl->undertime - q->now;
1640 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1641 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1642 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1645 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1648 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1651 struct cbq_class *cl = (struct cbq_class*)arg;
1655 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1656 cl->classid)) == NULL)
1659 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1660 if (cl->police == TC_POLICE_RECLASSIFY)
1661 new->reshape_fail = cbq_reshape_fail;
1665 *old = xchg(&cl->q, new);
1666 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1668 sch_tree_unlock(sch);
1675 static struct Qdisc *
1676 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1678 struct cbq_class *cl = (struct cbq_class*)arg;
1680 return cl ? cl->q : NULL;
1683 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1685 struct cbq_class *cl = (struct cbq_class *)arg;
1687 if (cl->q->q.qlen == 0)
1688 cbq_deactivate_class(cl);
1691 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1693 struct cbq_sched_data *q = qdisc_priv(sch);
1694 struct cbq_class *cl = cbq_class_lookup(q, classid);
1698 return (unsigned long)cl;
1703 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1705 struct cbq_sched_data *q = qdisc_priv(sch);
1707 BUG_TRAP(!cl->filters);
1709 tcf_destroy_chain(cl->filter_list);
1710 qdisc_destroy(cl->q);
1711 qdisc_put_rtab(cl->R_tab);
1712 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1718 cbq_destroy(struct Qdisc* sch)
1720 struct cbq_sched_data *q = qdisc_priv(sch);
1721 struct cbq_class *cl;
1724 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1728 * Filters must be destroyed first because we don't destroy the
1729 * classes from root to leafs which means that filters can still
1730 * be bound to classes which have been destroyed already. --TGR '04
1732 for (h = 0; h < 16; h++) {
1733 for (cl = q->classes[h]; cl; cl = cl->next) {
1734 tcf_destroy_chain(cl->filter_list);
1735 cl->filter_list = NULL;
1738 for (h = 0; h < 16; h++) {
1739 struct cbq_class *next;
1741 for (cl = q->classes[h]; cl; cl = next) {
1743 cbq_destroy_class(sch, cl);
1748 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1750 struct cbq_class *cl = (struct cbq_class*)arg;
1752 if (--cl->refcnt == 0) {
1753 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1754 struct cbq_sched_data *q = qdisc_priv(sch);
1756 spin_lock_bh(&sch->dev->queue_lock);
1757 if (q->rx_class == cl)
1759 spin_unlock_bh(&sch->dev->queue_lock);
1762 cbq_destroy_class(sch, cl);
1767 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1771 struct cbq_sched_data *q = qdisc_priv(sch);
1772 struct cbq_class *cl = (struct cbq_class*)*arg;
1773 struct rtattr *opt = tca[TCA_OPTIONS-1];
1774 struct rtattr *tb[TCA_CBQ_MAX];
1775 struct cbq_class *parent;
1776 struct qdisc_rate_table *rtab = NULL;
1778 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1781 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1782 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1785 if (tb[TCA_CBQ_FOPT-1] &&
1786 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1789 if (tb[TCA_CBQ_RATE-1] &&
1790 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1793 if (tb[TCA_CBQ_LSSOPT-1] &&
1794 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1797 if (tb[TCA_CBQ_WRROPT-1] &&
1798 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1801 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1802 if (tb[TCA_CBQ_POLICE-1] &&
1803 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1810 if (cl->tparent && cl->tparent->classid != parentid)
1812 if (!cl->tparent && parentid != TC_H_ROOT)
1816 if (tb[TCA_CBQ_RATE-1]) {
1817 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1822 /* Change class parameters */
1825 if (cl->next_alive != NULL)
1826 cbq_deactivate_class(cl);
1829 rtab = xchg(&cl->R_tab, rtab);
1830 qdisc_put_rtab(rtab);
1833 if (tb[TCA_CBQ_LSSOPT-1])
1834 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1836 if (tb[TCA_CBQ_WRROPT-1]) {
1838 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1841 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1842 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1844 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1845 if (tb[TCA_CBQ_POLICE-1])
1846 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1849 if (tb[TCA_CBQ_FOPT-1])
1850 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1853 cbq_activate_class(cl);
1855 sch_tree_unlock(sch);
1857 if (tca[TCA_RATE-1])
1858 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1859 &sch->dev->queue_lock,
1864 if (parentid == TC_H_ROOT)
1867 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1868 tb[TCA_CBQ_LSSOPT-1] == NULL)
1871 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1877 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1881 classid = TC_H_MAKE(sch->handle,0x8000);
1883 for (i=0; i<0x8000; i++) {
1884 if (++q->hgenerator >= 0x8000)
1886 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1892 classid = classid|q->hgenerator;
1897 parent = cbq_class_lookup(q, parentid);
1904 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1910 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1911 cl->q = &noop_qdisc;
1912 cl->classid = classid;
1913 cl->tparent = parent;
1915 cl->allot = parent->allot;
1916 cl->quantum = cl->allot;
1917 cl->weight = cl->R_tab->rate.rate;
1921 cl->borrow = cl->tparent;
1922 if (cl->tparent != &q->link)
1923 cl->share = cl->tparent;
1924 cbq_adjust_levels(parent);
1925 cl->minidle = -0x7FFFFFFF;
1926 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1927 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1928 if (cl->ewma_log==0)
1929 cl->ewma_log = q->link.ewma_log;
1931 cl->maxidle = q->link.maxidle;
1933 cl->avpkt = q->link.avpkt;
1934 cl->overlimit = cbq_ovl_classic;
1935 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1936 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1937 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1938 if (tb[TCA_CBQ_POLICE-1])
1939 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1941 if (tb[TCA_CBQ_FOPT-1])
1942 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1943 sch_tree_unlock(sch);
1945 if (tca[TCA_RATE-1])
1946 gen_new_estimator(&cl->bstats, &cl->rate_est,
1947 &sch->dev->queue_lock, tca[TCA_RATE-1]);
1949 *arg = (unsigned long)cl;
1953 qdisc_put_rtab(rtab);
1957 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1959 struct cbq_sched_data *q = qdisc_priv(sch);
1960 struct cbq_class *cl = (struct cbq_class*)arg;
1963 if (cl->filters || cl->children || cl == &q->link)
1968 qlen = cl->q->q.qlen;
1970 qdisc_tree_decrease_qlen(cl->q, qlen);
1973 cbq_deactivate_class(cl);
1975 if (q->tx_borrowed == cl)
1976 q->tx_borrowed = q->tx_class;
1977 if (q->tx_class == cl) {
1979 q->tx_borrowed = NULL;
1981 #if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1982 if (q->rx_class == cl)
1986 cbq_unlink_class(cl);
1987 cbq_adjust_levels(cl->tparent);
1989 cbq_sync_defmap(cl);
1992 sch_tree_unlock(sch);
1994 if (--cl->refcnt == 0)
1995 cbq_destroy_class(sch, cl);
2000 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2002 struct cbq_sched_data *q = qdisc_priv(sch);
2003 struct cbq_class *cl = (struct cbq_class *)arg;
2008 return &cl->filter_list;
2011 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2014 struct cbq_sched_data *q = qdisc_priv(sch);
2015 struct cbq_class *p = (struct cbq_class*)parent;
2016 struct cbq_class *cl = cbq_class_lookup(q, classid);
2019 if (p && p->level <= cl->level)
2022 return (unsigned long)cl;
2027 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2029 struct cbq_class *cl = (struct cbq_class*)arg;
2034 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2036 struct cbq_sched_data *q = qdisc_priv(sch);
2042 for (h = 0; h < 16; h++) {
2043 struct cbq_class *cl;
2045 for (cl = q->classes[h]; cl; cl = cl->next) {
2046 if (arg->count < arg->skip) {
2050 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2059 static struct Qdisc_class_ops cbq_class_ops = {
2062 .qlen_notify = cbq_qlen_notify,
2065 .change = cbq_change_class,
2066 .delete = cbq_delete,
2068 .tcf_chain = cbq_find_tcf,
2069 .bind_tcf = cbq_bind_filter,
2070 .unbind_tcf = cbq_unbind_filter,
2071 .dump = cbq_dump_class,
2072 .dump_stats = cbq_dump_class_stats,
2075 static struct Qdisc_ops cbq_qdisc_ops = {
2077 .cl_ops = &cbq_class_ops,
2079 .priv_size = sizeof(struct cbq_sched_data),
2080 .enqueue = cbq_enqueue,
2081 .dequeue = cbq_dequeue,
2082 .requeue = cbq_requeue,
2086 .destroy = cbq_destroy,
2089 .dump_stats = cbq_dump_stats,
2090 .owner = THIS_MODULE,
2093 static int __init cbq_module_init(void)
2095 return register_qdisc(&cbq_qdisc_ops);
2097 static void __exit cbq_module_exit(void)
2099 unregister_qdisc(&cbq_qdisc_ops);
2101 module_init(cbq_module_init)
2102 module_exit(cbq_module_exit)
2103 MODULE_LICENSE("GPL");