#include <linux/list.h>
#include <linux/compiler.h>
#include <linux/rbtree.h>
+#include <linux/workqueue.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
struct htb_class {
struct Qdisc_class_common common;
/* general class parameters */
- struct gnet_stats_basic bstats;
+ struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est rate_est;
struct tc_htb_xstats xstats; /* our special stats */
unsigned int children;
struct htb_class *parent; /* parent class */
+ int prio; /* these two are used only by leaves... */
+ int quantum; /* but stored for parent-to-leaf return */
+
union {
struct htb_class_leaf {
struct Qdisc *q;
- int prio;
- int aprio;
- int quantum;
int deficit[TC_HTB_MAXDEPTH];
struct list_head drop_list;
} leaf;
struct tcf_proto *filter_list;
int filter_cnt;
- int warned; /* only one warning about non work conserving .. */
-
/* token bucket parameters */
struct qdisc_rate_table *rate; /* rate table of the class itself */
struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
psched_tdiff_t mbuffer; /* max wait time */
long tokens, ctokens; /* current number of tokens */
psched_time_t t_c; /* checkpoint time */
-
- int prio; /* For parent to leaf return possible here */
- int quantum; /* we do backup. Finally full replacement */
- /* of un.leaf originals should be done. */
};
-static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
- int size)
-{
- long result = qdisc_l2t(rate, size);
- return result;
-}
-
struct htb_sched {
struct Qdisc_class_hash clhash;
struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
/* time of nearest event per level (row) */
psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
- /* whether we hit non-work conserving class during this dequeue; we use */
- int nwc_hit; /* this to disable mindelay complaint in dequeue */
-
int defcls; /* class where unclassified flows go to */
/* filters for qdisc itself */
int direct_qlen; /* max qlen of above */
long direct_pkts;
+
+#define HTB_WARN_TOOMANYEVENTS 0x1
+ unsigned int warned; /* only one warning */
+ struct work_struct work;
};
/* find class in global hash table using given handle */
if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
return cl;
- *qerr = NET_XMIT_BYPASS;
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
tcf = q->filter_list;
while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
- *qerr = NET_XMIT_SUCCESS;
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
}
*/
static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
{
- BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
+ WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
if (!cl->prio_activity) {
- cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
+ cl->prio_activity = 1 << cl->prio;
htb_activate_prios(q, cl);
list_add_tail(&cl->un.leaf.drop_list,
- q->drops + cl->un.leaf.aprio);
+ q->drops + cl->prio);
}
}
*/
static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
{
- BUG_TRAP(cl->prio_activity);
+ WARN_ON(!cl->prio_activity);
htb_deactivate_prios(q, cl);
cl->prio_activity = 0;
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
- int ret;
+ int uninitialized_var(ret);
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = htb_classify(skb, sch, &ret);
}
#ifdef CONFIG_NET_CLS_ACT
} else if (!cl) {
- if (ret == NET_XMIT_BYPASS)
+ if (ret & __NET_XMIT_BYPASS)
sch->qstats.drops++;
kfree_skb(skb);
return ret;
#endif
- } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
- NET_XMIT_SUCCESS) {
- sch->qstats.drops++;
- cl->qstats.drops++;
- return NET_XMIT_DROP;
+ } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(ret)) {
+ sch->qstats.drops++;
+ cl->qstats.drops++;
+ }
+ return ret;
} else {
cl->bstats.packets +=
skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
- cl->bstats.bytes += skb->len;
+ cl->bstats.bytes += qdisc_pkt_len(skb);
htb_activate(q, cl);
}
sch->q.qlen++;
sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
return NET_XMIT_SUCCESS;
}
-/* TODO: requeuing packet charges it to policers again !! */
-static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
+static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
{
- int ret;
- struct htb_sched *q = qdisc_priv(sch);
- struct htb_class *cl = htb_classify(skb, sch, &ret);
- struct sk_buff *tskb;
+ long toks = diff + cl->tokens;
- if (cl == HTB_DIRECT) {
- /* enqueue to helper queue */
- if (q->direct_queue.qlen < q->direct_qlen) {
- __skb_queue_head(&q->direct_queue, skb);
- } else {
- __skb_queue_head(&q->direct_queue, skb);
- tskb = __skb_dequeue_tail(&q->direct_queue);
- kfree_skb(tskb);
- sch->qstats.drops++;
- return NET_XMIT_CN;
- }
-#ifdef CONFIG_NET_CLS_ACT
- } else if (!cl) {
- if (ret == NET_XMIT_BYPASS)
- sch->qstats.drops++;
- kfree_skb(skb);
- return ret;
-#endif
- } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
- NET_XMIT_SUCCESS) {
- sch->qstats.drops++;
- cl->qstats.drops++;
- return NET_XMIT_DROP;
- } else
- htb_activate(q, cl);
+ if (toks > cl->buffer)
+ toks = cl->buffer;
+ toks -= (long) qdisc_l2t(cl->rate, bytes);
+ if (toks <= -cl->mbuffer)
+ toks = 1 - cl->mbuffer;
- sch->q.qlen++;
- sch->qstats.requeues++;
- return NET_XMIT_SUCCESS;
+ cl->tokens = toks;
+}
+
+static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
+{
+ long toks = diff + cl->ctokens;
+
+ if (toks > cl->cbuffer)
+ toks = cl->cbuffer;
+ toks -= (long) qdisc_l2t(cl->ceil, bytes);
+ if (toks <= -cl->mbuffer)
+ toks = 1 - cl->mbuffer;
+
+ cl->ctokens = toks;
}
/**
static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
int level, struct sk_buff *skb)
{
- int bytes = skb->len;
- long toks, diff;
+ int bytes = qdisc_pkt_len(skb);
enum htb_cmode old_mode;
-
-#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
- if (toks > cl->B) toks = cl->B; \
- toks -= L2T(cl, cl->R, bytes); \
- if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
- cl->T = toks
+ long diff;
while (cl) {
diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
if (cl->level >= level) {
if (cl->level == level)
cl->xstats.lends++;
- HTB_ACCNT(tokens, buffer, rate);
+ htb_accnt_tokens(cl, bytes, diff);
} else {
cl->xstats.borrows++;
cl->tokens += diff; /* we moved t_c; update tokens */
}
- HTB_ACCNT(ctokens, cbuffer, ceil);
+ htb_accnt_ctokens(cl, bytes, diff);
cl->t_c = q->now;
old_mode = cl->cmode;
* htb_do_events - make mode changes to classes at the level
*
* Scans event queue for pending events and applies them. Returns time of
- * next pending event (0 for no event in pq).
+ * next pending event (0 for no event in pq, q->now for too many events).
* Note: Applied are events whose have cl->pq_key <= q->now.
*/
-static psched_time_t htb_do_events(struct htb_sched *q, int level)
+static psched_time_t htb_do_events(struct htb_sched *q, int level,
+ unsigned long start)
{
/* don't run for longer than 2 jiffies; 2 is used instead of
1 to simplify things when jiffy is going to be incremented
too soon */
- unsigned long stop_at = jiffies + 2;
+ unsigned long stop_at = start + 2;
while (time_before(jiffies, stop_at)) {
struct htb_class *cl;
long diff;
if (cl->cmode != HTB_CAN_SEND)
htb_add_to_wait_tree(q, cl, diff);
}
- /* too much load - let's continue on next jiffie */
- return q->now + PSCHED_TICKS_PER_SEC / HZ;
+
+ /* too much load - let's continue after a break for scheduling */
+ if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
+ printk(KERN_WARNING "htb: too many events!\n");
+ q->warned |= HTB_WARN_TOOMANYEVENTS;
+ }
+
+ return q->now;
}
/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
while (n) {
struct htb_class *cl =
rb_entry(n, struct htb_class, node[prio]);
- if (id == cl->common.classid)
- return n;
if (id > cl->common.classid) {
n = n->rb_right;
- } else {
+ } else if (id < cl->common.classid) {
r = n;
n = n->rb_left;
+ } else {
+ return n;
}
}
return r;
u32 *pid;
} stk[TC_HTB_MAXDEPTH], *sp = stk;
- BUG_TRAP(tree->rb_node);
+ BUG_ON(!tree->rb_node);
sp->root = tree->rb_node;
sp->pptr = pptr;
sp->pid = pid;
*sp->pptr = (*sp->pptr)->rb_left;
if (sp > stk) {
sp--;
- BUG_TRAP(*sp->pptr);
- if (!*sp->pptr)
+ if (!*sp->pptr) {
+ WARN_ON(1);
return NULL;
+ }
htb_next_rb_node(sp->pptr);
}
} else {
sp->pid = cl->un.inner.last_ptr_id + prio;
}
}
- BUG_TRAP(0);
+ WARN_ON(1);
return NULL;
}
do {
next:
- BUG_TRAP(cl);
- if (!cl)
+ if (unlikely(!cl))
return NULL;
/* class can be empty - it is unlikely but can be true if leaf
skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
if (likely(skb != NULL))
break;
- if (!cl->warned) {
- printk(KERN_WARNING
- "htb: class %X isn't work conserving ?!\n",
- cl->common.classid);
- cl->warned = 1;
- }
- q->nwc_hit++;
+
+ qdisc_warn_nonwc("htb", cl->un.leaf.q);
htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
ptr[0]) + prio);
cl = htb_lookup_leaf(q->row[level] + prio, prio,
} while (cl != start);
if (likely(skb != NULL)) {
- if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
- cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
+ cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
+ if (cl->un.leaf.deficit[level] < 0) {
+ cl->un.leaf.deficit[level] += cl->quantum;
htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
ptr[0]) + prio);
}
struct htb_sched *q = qdisc_priv(sch);
int level;
psched_time_t next_event;
+ unsigned long start_at;
/* try to dequeue direct packets as high prio (!) to minimize cpu work */
skb = __skb_dequeue(&q->direct_queue);
if (!sch->q.qlen)
goto fin;
q->now = psched_get_time();
+ start_at = jiffies;
next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
- q->nwc_hit = 0;
+
for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
/* common case optimization - skip event handler quickly */
int m;
psched_time_t event;
if (q->now >= q->near_ev_cache[level]) {
- event = htb_do_events(q, level);
+ event = htb_do_events(q, level, start_at);
if (!event)
event = q->now + PSCHED_TICKS_PER_SEC;
q->near_ev_cache[level] = event;
} else
event = q->near_ev_cache[level];
- if (event && next_event > event)
+ if (next_event > event)
next_event = event;
m = ~q->row_mask[level];
}
}
sch->qstats.overlimits++;
- qdisc_watchdog_schedule(&q->watchdog, next_event);
+ if (likely(next_event > q->now))
+ qdisc_watchdog_schedule(&q->watchdog, next_event);
+ else
+ schedule_work(&q->work);
fin:
return skb;
}
[TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
};
+static void htb_work_func(struct work_struct *work)
+{
+ struct htb_sched *q = container_of(work, struct htb_sched, work);
+ struct Qdisc *sch = q->watchdog.qdisc;
+
+ __netif_schedule(qdisc_root(sch));
+}
+
static int htb_init(struct Qdisc *sch, struct nlattr *opt)
{
struct htb_sched *q = qdisc_priv(sch);
INIT_LIST_HEAD(q->drops + i);
qdisc_watchdog_init(&q->watchdog, sch);
+ INIT_WORK(&q->work, htb_work_func);
skb_queue_head_init(&q->direct_queue);
q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
{
+ spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct htb_sched *q = qdisc_priv(sch);
struct nlattr *nest;
struct tc_htb_glob gopt;
- spin_lock_bh(&qdisc_dev(sch)->queue_lock);
+ spin_lock_bh(root_lock);
gopt.direct_pkts = q->direct_pkts;
gopt.version = HTB_VER;
NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
nla_nest_end(skb, nest);
- spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+ spin_unlock_bh(root_lock);
return skb->len;
nla_put_failure:
- spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+ spin_unlock_bh(root_lock);
nla_nest_cancel(skb, nest);
return -1;
}
struct sk_buff *skb, struct tcmsg *tcm)
{
struct htb_class *cl = (struct htb_class *)arg;
+ spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct nlattr *nest;
struct tc_htb_opt opt;
- spin_lock_bh(&qdisc_dev(sch)->queue_lock);
+ spin_lock_bh(root_lock);
tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
tcm->tcm_handle = cl->common.classid;
if (!cl->level && cl->un.leaf.q)
opt.buffer = cl->buffer;
opt.ceil = cl->ceil->rate;
opt.cbuffer = cl->cbuffer;
- opt.quantum = cl->un.leaf.quantum;
- opt.prio = cl->un.leaf.prio;
+ opt.quantum = cl->quantum;
+ opt.prio = cl->prio;
opt.level = cl->level;
NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
nla_nest_end(skb, nest);
- spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+ spin_unlock_bh(root_lock);
return skb->len;
nla_put_failure:
- spin_unlock_bh(&qdisc_dev(sch)->queue_lock);
+ spin_unlock_bh(root_lock);
nla_nest_cancel(skb, nest);
return -1;
}
cl->xstats.ctokens = cl->ctokens;
if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, &cl->qstats) < 0)
return -1;
{
struct htb_class *cl = (struct htb_class *)arg;
- if (cl && !cl->level) {
- if (new == NULL &&
- (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
- &pfifo_qdisc_ops,
- cl->common.classid))
- == NULL)
- return -ENOBUFS;
- sch_tree_lock(sch);
- if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- }
- sch_tree_unlock(sch);
- return 0;
+ if (cl->level)
+ return -EINVAL;
+ if (new == NULL &&
+ (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops,
+ cl->common.classid)) == NULL)
+ return -ENOBUFS;
+
+ sch_tree_lock(sch);
+ *old = cl->un.leaf.q;
+ cl->un.leaf.q = new;
+ if (*old != NULL) {
+ qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+ qdisc_reset(*old);
}
- return -ENOENT;
+ sch_tree_unlock(sch);
+ return 0;
}
static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
{
struct htb_class *cl = (struct htb_class *)arg;
- return (cl && !cl->level) ? cl->un.leaf.q : NULL;
+ return !cl->level ? cl->un.leaf.q : NULL;
}
static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
{
struct htb_class *parent = cl->parent;
- BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity);
+ WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
if (parent->cmode != HTB_CAN_SEND)
htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
memset(&parent->un.inner, 0, sizeof(parent->un.inner));
INIT_LIST_HEAD(&parent->un.leaf.drop_list);
parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
- parent->un.leaf.quantum = parent->quantum;
- parent->un.leaf.prio = parent->prio;
parent->tokens = parent->buffer;
parent->ctokens = parent->cbuffer;
parent->t_c = psched_get_time();
static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
{
if (!cl->level) {
- BUG_TRAP(cl->un.leaf.q);
+ WARN_ON(!cl->un.leaf.q);
qdisc_destroy(cl->un.leaf.q);
}
gen_kill_estimator(&cl->bstats, &cl->rate_est);
kfree(cl);
}
-/* always caled under BH & queue lock */
static void htb_destroy(struct Qdisc *sch)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl;
unsigned int i;
+ cancel_work_sync(&q->work);
qdisc_watchdog_cancel(&q->watchdog);
/* This line used to be after htb_destroy_class call below
and surprisingly it worked in 2.4. But it must precede it
/* delete from hash and active; remainder in destroy_class */
qdisc_class_hash_remove(&q->clhash, &cl->common);
- cl->parent->children--;
+ if (cl->parent)
+ cl->parent->children--;
if (cl->prio_activity)
htb_deactivate(q, cl);
if (last_child)
htb_parent_to_leaf(q, cl, new_q);
- if (--cl->refcnt == 0)
- htb_destroy_class(sch, cl);
+ BUG_ON(--cl->refcnt == 0);
+ /*
+ * This shouldn't happen: we "hold" one cops->get() when called
+ * from tc_ctl_tclass; the destroy method is done from cops->put().
+ */
sch_tree_unlock(sch);
return 0;
};
/* check for valid classid */
- if (!classid || TC_H_MAJ(classid ^ sch->handle)
- || htb_find(classid, sch))
+ if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
+ htb_find(classid, sch))
goto failure;
/* check maximal depth */
if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
goto failure;
- gen_new_estimator(&cl->bstats, &cl->rate_est,
- &qdisc_dev(sch)->queue_lock,
- tca[TCA_RATE] ? : &est.nla);
+ err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE] ? : &est.nla);
+ if (err) {
+ kfree(cl);
+ goto failure;
+ }
+
cl->refcnt = 1;
cl->children = 0;
INIT_LIST_HEAD(&cl->un.leaf.drop_list);
if (parent)
parent->children++;
} else {
- if (tca[TCA_RATE])
- gen_replace_estimator(&cl->bstats, &cl->rate_est,
- &qdisc_dev(sch)->queue_lock,
- tca[TCA_RATE]);
+ if (tca[TCA_RATE]) {
+ err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE]);
+ if (err)
+ return err;
+ }
sch_tree_lock(sch);
}
/* it used to be a nasty bug here, we have to check that node
is really leaf before changing cl->un.leaf ! */
if (!cl->level) {
- cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
- if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
+ cl->quantum = rtab->rate.rate / q->rate2quantum;
+ if (!hopt->quantum && cl->quantum < 1000) {
printk(KERN_WARNING
"HTB: quantum of class %X is small. Consider r2q change.\n",
cl->common.classid);
- cl->un.leaf.quantum = 1000;
+ cl->quantum = 1000;
}
- if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
+ if (!hopt->quantum && cl->quantum > 200000) {
printk(KERN_WARNING
"HTB: quantum of class %X is big. Consider r2q change.\n",
cl->common.classid);
- cl->un.leaf.quantum = 200000;
+ cl->quantum = 200000;
}
if (hopt->quantum)
- cl->un.leaf.quantum = hopt->quantum;
- if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
- cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
-
- /* backup for htb_parent_to_leaf */
- cl->quantum = cl->un.leaf.quantum;
- cl->prio = cl->un.leaf.prio;
+ cl->quantum = hopt->quantum;
+ if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
+ cl->prio = TC_HTB_NUMPRIO - 1;
}
cl->buffer = hopt->buffer;
.priv_size = sizeof(struct htb_sched),
.enqueue = htb_enqueue,
.dequeue = htb_dequeue,
- .requeue = htb_requeue,
+ .peek = qdisc_peek_dequeued,
.drop = htb_drop,
.init = htb_init,
.reset = htb_reset,