string: factorize skip_spaces and export it to be generally available
[safe/jmp/linux-2.6] / net / sched / sch_htb.c
index 3fb58f4..508cf5f 100644 (file)
@@ -24,8 +24,6 @@
  *             Jiri Fojtasek
  *                     fixed requeue routine
  *             and many others. thanks.
- *
- * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
  */
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -37,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/compiler.h>
 #include <linux/rbtree.h>
+#include <linux/workqueue.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
@@ -53,7 +52,6 @@
     one less than their parent.
 */
 
-#define HTB_HSIZE 16           /* classid hash size */
 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
 #define HTB_VER 0x30011                /* major must be matched with number suplied by TC as version */
 
@@ -74,9 +72,9 @@ enum htb_cmode {
 
 /* interior & leaf nodes; props specific to leaves are marked L: */
 struct htb_class {
+       struct Qdisc_class_common common;
        /* general class parameters */
-       u32 classid;
-       struct gnet_stats_basic bstats;
+       struct gnet_stats_basic_packed bstats;
        struct gnet_stats_queue qstats;
        struct gnet_stats_rate_est rate_est;
        struct tc_htb_xstats xstats;    /* our special stats */
@@ -84,17 +82,15 @@ struct htb_class {
 
        /* topology */
        int level;              /* our level (see above) */
+       unsigned int children;
        struct htb_class *parent;       /* parent class */
-       struct hlist_node hlist;        /* classid hash list item */
-       struct list_head sibling;       /* sibling list item */
-       struct list_head children;      /* children list */
+
+       int prio;               /* these two are used only by leaves... */
+       int quantum;            /* but stored for parent-to-leaf return */
 
        union {
                struct htb_class_leaf {
                        struct Qdisc *q;
-                       int prio;
-                       int aprio;
-                       int quantum;
                        int deficit[TC_HTB_MAXDEPTH];
                        struct list_head drop_list;
                } leaf;
@@ -119,8 +115,6 @@ struct htb_class {
        struct tcf_proto *filter_list;
        int filter_cnt;
 
-       int warned;             /* only one warning about non work conserving .. */
-
        /* token bucket parameters */
        struct qdisc_rate_table *rate;  /* rate table of the class itself */
        struct qdisc_rate_table *ceil;  /* ceiling rate (limits borrows too) */
@@ -128,22 +122,10 @@ struct htb_class {
        psched_tdiff_t mbuffer; /* max wait time */
        long tokens, ctokens;   /* current number of tokens */
        psched_time_t t_c;      /* checkpoint time */
-
-       int prio;               /* For parent to leaf return possible here */
-       int quantum;            /* we do backup. Finally full replacement  */
-                               /* of un.leaf originals should be done. */
 };
 
-static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
-                          int size)
-{
-       long result = qdisc_l2t(rate, size);
-       return result;
-}
-
 struct htb_sched {
-       struct list_head root;  /* root classes list */
-       struct hlist_head hash[HTB_HSIZE];      /* hashed by classid */
+       struct Qdisc_class_hash clhash;
        struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 
        /* self list - roots of self generating tree */
@@ -158,14 +140,10 @@ struct htb_sched {
        /* time of nearest event per level (row) */
        psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
 
-       /* whether we hit non-work conserving class during this dequeue; we use */
-       int nwc_hit;            /* this to disable mindelay complaint in dequeue */
-
        int defcls;             /* class where unclassified flows go to */
 
        /* filters for qdisc itself */
        struct tcf_proto *filter_list;
-       int filter_cnt;
 
        int rate2quantum;       /* quant = rate / rate2quantum */
        psched_time_t now;      /* cached dequeue time */
@@ -176,34 +154,22 @@ struct htb_sched {
        int direct_qlen;        /* max qlen of above */
 
        long direct_pkts;
-};
 
-/* compute hash of size HTB_HSIZE for given handle */
-static inline int htb_hash(u32 h)
-{
-#if HTB_HSIZE != 16
-#error "Declare new hash for your HTB_HSIZE"
-#endif
-       h ^= h >> 8;            /* stolen from cbq_hash */
-       h ^= h >> 4;
-       return h & 0xf;
-}
+#define HTB_WARN_TOOMANYEVENTS 0x1
+       unsigned int warned;    /* only one warning */
+       struct work_struct work;
+};
 
 /* find class in global hash table using given handle */
 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 {
        struct htb_sched *q = qdisc_priv(sch);
-       struct hlist_node *p;
-       struct htb_class *cl;
+       struct Qdisc_class_common *clc;
 
-       if (TC_H_MAJ(handle) != sch->handle)
+       clc = qdisc_class_find(&q->clhash, handle);
+       if (clc == NULL)
                return NULL;
-
-       hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
-               if (cl->classid == handle)
-                       return cl;
-       }
-       return NULL;
+       return container_of(clc, struct htb_class, common);
 }
 
 /**
@@ -237,14 +203,14 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
        if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
                return cl;
 
-       *qerr = NET_XMIT_BYPASS;
+       *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
        tcf = q->filter_list;
        while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
                switch (result) {
                case TC_ACT_QUEUED:
                case TC_ACT_STOLEN:
-                       *qerr = NET_XMIT_SUCCESS;
+                       *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
                case TC_ACT_SHOT:
                        return NULL;
                }
@@ -284,7 +250,7 @@ static void htb_add_to_id_tree(struct rb_root *root,
                parent = *p;
                c = rb_entry(parent, struct htb_class, node[prio]);
 
-               if (cl->classid > c->classid)
+               if (cl->common.classid > c->common.classid)
                        p = &parent->rb_right;
                else
                        p = &parent->rb_left;
@@ -448,7 +414,7 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
                                /* we are removing child which is pointed to from
                                   parent feed - forget the pointer but remember
                                   classid */
-                               p->un.inner.last_ptr_id[prio] = cl->classid;
+                               p->un.inner.last_ptr_id[prio] = cl->common.classid;
                                p->un.inner.ptr[prio] = NULL;
                        }
 
@@ -547,13 +513,13 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
  */
 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 {
-       BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
+       WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
 
        if (!cl->prio_activity) {
-               cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
+               cl->prio_activity = 1 << cl->prio;
                htb_activate_prios(q, cl);
                list_add_tail(&cl->un.leaf.drop_list,
-                             q->drops + cl->un.leaf.aprio);
+                             q->drops + cl->prio);
        }
 }
 
@@ -565,7 +531,7 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
  */
 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 {
-       BUG_TRAP(cl->prio_activity);
+       WARN_ON(!cl->prio_activity);
 
        htb_deactivate_prios(q, cl);
        cl->prio_activity = 0;
@@ -574,7 +540,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 
 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-       int ret;
+       int uninitialized_var(ret);
        struct htb_sched *q = qdisc_priv(sch);
        struct htb_class *cl = htb_classify(skb, sch, &ret);
 
@@ -590,66 +556,54 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
                }
 #ifdef CONFIG_NET_CLS_ACT
        } else if (!cl) {
-               if (ret == NET_XMIT_BYPASS)
+               if (ret & __NET_XMIT_BYPASS)
                        sch->qstats.drops++;
                kfree_skb(skb);
                return ret;
 #endif
-       } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
-                  NET_XMIT_SUCCESS) {
-               sch->qstats.drops++;
-               cl->qstats.drops++;
-               return NET_XMIT_DROP;
+       } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
+               if (net_xmit_drop_count(ret)) {
+                       sch->qstats.drops++;
+                       cl->qstats.drops++;
+               }
+               return ret;
        } else {
                cl->bstats.packets +=
                        skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
-               cl->bstats.bytes += skb->len;
+               cl->bstats.bytes += qdisc_pkt_len(skb);
                htb_activate(q, cl);
        }
 
        sch->q.qlen++;
        sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
-       sch->bstats.bytes += skb->len;
+       sch->bstats.bytes += qdisc_pkt_len(skb);
        return NET_XMIT_SUCCESS;
 }
 
-/* TODO: requeuing packet charges it to policers again !! */
-static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
+static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
 {
-       int ret;
-       struct htb_sched *q = qdisc_priv(sch);
-       struct htb_class *cl = htb_classify(skb, sch, &ret);
-       struct sk_buff *tskb;
+       long toks = diff + cl->tokens;
 
-       if (cl == HTB_DIRECT) {
-               /* enqueue to helper queue */
-               if (q->direct_queue.qlen < q->direct_qlen) {
-                       __skb_queue_head(&q->direct_queue, skb);
-               } else {
-                       __skb_queue_head(&q->direct_queue, skb);
-                       tskb = __skb_dequeue_tail(&q->direct_queue);
-                       kfree_skb(tskb);
-                       sch->qstats.drops++;
-                       return NET_XMIT_CN;
-               }
-#ifdef CONFIG_NET_CLS_ACT
-       } else if (!cl) {
-               if (ret == NET_XMIT_BYPASS)
-                       sch->qstats.drops++;
-               kfree_skb(skb);
-               return ret;
-#endif
-       } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
-                  NET_XMIT_SUCCESS) {
-               sch->qstats.drops++;
-               cl->qstats.drops++;
-               return NET_XMIT_DROP;
-       } else
-               htb_activate(q, cl);
+       if (toks > cl->buffer)
+               toks = cl->buffer;
+       toks -= (long) qdisc_l2t(cl->rate, bytes);
+       if (toks <= -cl->mbuffer)
+               toks = 1 - cl->mbuffer;
 
-       sch->q.qlen++;
-       sch->qstats.requeues++;
-       return NET_XMIT_SUCCESS;
+       cl->tokens = toks;
+}
+
+static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
+{
+       long toks = diff + cl->ctokens;
+
+       if (toks > cl->cbuffer)
+               toks = cl->cbuffer;
+       toks -= (long) qdisc_l2t(cl->ceil, bytes);
+       if (toks <= -cl->mbuffer)
+               toks = 1 - cl->mbuffer;
+
+       cl->ctokens = toks;
 }
 
 /**
@@ -666,27 +620,21 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
                             int level, struct sk_buff *skb)
 {
-       int bytes = skb->len;
-       long toks, diff;
+       int bytes = qdisc_pkt_len(skb);
        enum htb_cmode old_mode;
-
-#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
-       if (toks > cl->B) toks = cl->B; \
-       toks -= L2T(cl, cl->R, bytes); \
-       if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
-       cl->T = toks
+       long diff;
 
        while (cl) {
                diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
                if (cl->level >= level) {
                        if (cl->level == level)
                                cl->xstats.lends++;
-                       HTB_ACCNT(tokens, buffer, rate);
+                       htb_accnt_tokens(cl, bytes, diff);
                } else {
                        cl->xstats.borrows++;
                        cl->tokens += diff;     /* we moved t_c; update tokens */
                }
-               HTB_ACCNT(ctokens, cbuffer, ceil);
+               htb_accnt_ctokens(cl, bytes, diff);
                cl->t_c = q->now;
 
                old_mode = cl->cmode;
@@ -713,15 +661,16 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
  * htb_do_events - make mode changes to classes at the level
  *
  * Scans event queue for pending events and applies them. Returns time of
- * next pending event (0 for no event in pq).
+ * next pending event (0 for no event in pq, q->now for too many events).
  * Note: Applied are events whose have cl->pq_key <= q->now.
  */
-static psched_time_t htb_do_events(struct htb_sched *q, int level)
+static psched_time_t htb_do_events(struct htb_sched *q, int level,
+                                  unsigned long start)
 {
        /* don't run for longer than 2 jiffies; 2 is used instead of
           1 to simplify things when jiffy is going to be incremented
           too soon */
-       unsigned long stop_at = jiffies + 2;
+       unsigned long stop_at = start + 2;
        while (time_before(jiffies, stop_at)) {
                struct htb_class *cl;
                long diff;
@@ -740,8 +689,14 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level)
                if (cl->cmode != HTB_CAN_SEND)
                        htb_add_to_wait_tree(q, cl, diff);
        }
-       /* too much load - let's continue on next jiffie */
-       return q->now + PSCHED_TICKS_PER_SEC / HZ;
+
+       /* too much load - let's continue after a break for scheduling */
+       if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
+               printk(KERN_WARNING "htb: too many events!\n");
+               q->warned |= HTB_WARN_TOOMANYEVENTS;
+       }
+
+       return q->now;
 }
 
 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
@@ -753,14 +708,14 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
        while (n) {
                struct htb_class *cl =
                    rb_entry(n, struct htb_class, node[prio]);
-               if (id == cl->classid)
-                       return n;
 
-               if (id > cl->classid) {
+               if (id > cl->common.classid) {
                        n = n->rb_right;
-               } else {
+               } else if (id < cl->common.classid) {
                        r = n;
                        n = n->rb_left;
+               } else {
+                       return n;
                }
        }
        return r;
@@ -781,7 +736,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
                u32 *pid;
        } stk[TC_HTB_MAXDEPTH], *sp = stk;
 
-       BUG_TRAP(tree->rb_node);
+       BUG_ON(!tree->rb_node);
        sp->root = tree->rb_node;
        sp->pptr = pptr;
        sp->pid = pid;
@@ -801,9 +756,10 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
                                *sp->pptr = (*sp->pptr)->rb_left;
                        if (sp > stk) {
                                sp--;
-                               BUG_TRAP(*sp->pptr);
-                               if (!*sp->pptr)
+                               if (!*sp->pptr) {
+                                       WARN_ON(1);
                                        return NULL;
+                               }
                                htb_next_rb_node(sp->pptr);
                        }
                } else {
@@ -816,7 +772,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
                        sp->pid = cl->un.inner.last_ptr_id + prio;
                }
        }
-       BUG_TRAP(0);
+       WARN_ON(1);
        return NULL;
 }
 
@@ -834,8 +790,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
 
        do {
 next:
-               BUG_TRAP(cl);
-               if (!cl)
+               if (unlikely(!cl))
                        return NULL;
 
                /* class can be empty - it is unlikely but can be true if leaf
@@ -863,13 +818,8 @@ next:
                skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
                if (likely(skb != NULL))
                        break;
-               if (!cl->warned) {
-                       printk(KERN_WARNING
-                              "htb: class %X isn't work conserving ?!\n",
-                              cl->classid);
-                       cl->warned = 1;
-               }
-               q->nwc_hit++;
+
+               qdisc_warn_nonwc("htb", cl->un.leaf.q);
                htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
                                  ptr[0]) + prio);
                cl = htb_lookup_leaf(q->row[level] + prio, prio,
@@ -879,8 +829,9 @@ next:
        } while (cl != start);
 
        if (likely(skb != NULL)) {
-               if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
-                       cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
+               cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
+               if (cl->un.leaf.deficit[level] < 0) {
+                       cl->un.leaf.deficit[level] += cl->quantum;
                        htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
                                          ptr[0]) + prio);
                }
@@ -899,6 +850,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
        struct htb_sched *q = qdisc_priv(sch);
        int level;
        psched_time_t next_event;
+       unsigned long start_at;
 
        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
        skb = __skb_dequeue(&q->direct_queue);
@@ -911,23 +863,24 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
        if (!sch->q.qlen)
                goto fin;
        q->now = psched_get_time();
+       start_at = jiffies;
 
        next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
-       q->nwc_hit = 0;
+
        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
                /* common case optimization - skip event handler quickly */
                int m;
                psched_time_t event;
 
                if (q->now >= q->near_ev_cache[level]) {
-                       event = htb_do_events(q, level);
+                       event = htb_do_events(q, level, start_at);
                        if (!event)
                                event = q->now + PSCHED_TICKS_PER_SEC;
                        q->near_ev_cache[level] = event;
                } else
                        event = q->near_ev_cache[level];
 
-               if (event && next_event > event)
+               if (next_event > event)
                        next_event = event;
 
                m = ~q->row_mask[level];
@@ -943,7 +896,10 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
                }
        }
        sch->qstats.overlimits++;
-       qdisc_watchdog_schedule(&q->watchdog, next_event);
+       if (likely(next_event > q->now))
+               qdisc_watchdog_schedule(&q->watchdog, next_event);
+       else
+               schedule_work(&q->work);
 fin:
        return skb;
 }
@@ -977,13 +933,12 @@ static unsigned int htb_drop(struct Qdisc *sch)
 static void htb_reset(struct Qdisc *sch)
 {
        struct htb_sched *q = qdisc_priv(sch);
-       int i;
-
-       for (i = 0; i < HTB_HSIZE; i++) {
-               struct hlist_node *p;
-               struct htb_class *cl;
+       struct htb_class *cl;
+       struct hlist_node *n;
+       unsigned int i;
 
-               hlist_for_each_entry(cl, p, q->hash + i, hlist) {
+       for (i = 0; i < q->clhash.hashsize; i++) {
+               hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
                        if (cl->level)
                                memset(&cl->un.inner, 0, sizeof(cl->un.inner));
                        else {
@@ -1014,6 +969,14 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
        [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 };
 
+static void htb_work_func(struct work_struct *work)
+{
+       struct htb_sched *q = container_of(work, struct htb_sched, work);
+       struct Qdisc *sch = q->watchdog.qdisc;
+
+       __netif_schedule(qdisc_root(sch));
+}
+
 static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct htb_sched *q = qdisc_priv(sch);
@@ -1041,16 +1004,17 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
                return -EINVAL;
        }
 
-       INIT_LIST_HEAD(&q->root);
-       for (i = 0; i < HTB_HSIZE; i++)
-               INIT_HLIST_HEAD(q->hash + i);
+       err = qdisc_class_hash_init(&q->clhash);
+       if (err < 0)
+               return err;
        for (i = 0; i < TC_HTB_NUMPRIO; i++)
                INIT_LIST_HEAD(q->drops + i);
 
        qdisc_watchdog_init(&q->watchdog, sch);
+       INIT_WORK(&q->work, htb_work_func);
        skb_queue_head_init(&q->direct_queue);
 
-       q->direct_qlen = sch->dev->tx_queue_len;
+       q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
        if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
                q->direct_qlen = 2;
 
@@ -1063,11 +1027,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
+       spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
        struct htb_sched *q = qdisc_priv(sch);
        struct nlattr *nest;
        struct tc_htb_glob gopt;
 
-       spin_lock_bh(&sch->dev->queue_lock);
+       spin_lock_bh(root_lock);
 
        gopt.direct_pkts = q->direct_pkts;
        gopt.version = HTB_VER;
@@ -1081,11 +1046,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
        NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
        nla_nest_end(skb, nest);
 
-       spin_unlock_bh(&sch->dev->queue_lock);
+       spin_unlock_bh(root_lock);
        return skb->len;
 
 nla_put_failure:
-       spin_unlock_bh(&sch->dev->queue_lock);
+       spin_unlock_bh(root_lock);
        nla_nest_cancel(skb, nest);
        return -1;
 }
@@ -1094,12 +1059,13 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
                          struct sk_buff *skb, struct tcmsg *tcm)
 {
        struct htb_class *cl = (struct htb_class *)arg;
+       spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
        struct nlattr *nest;
        struct tc_htb_opt opt;
 
-       spin_lock_bh(&sch->dev->queue_lock);
-       tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
-       tcm->tcm_handle = cl->classid;
+       spin_lock_bh(root_lock);
+       tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
+       tcm->tcm_handle = cl->common.classid;
        if (!cl->level && cl->un.leaf.q)
                tcm->tcm_info = cl->un.leaf.q->handle;
 
@@ -1113,17 +1079,17 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
        opt.buffer = cl->buffer;
        opt.ceil = cl->ceil->rate;
        opt.cbuffer = cl->cbuffer;
-       opt.quantum = cl->un.leaf.quantum;
-       opt.prio = cl->un.leaf.prio;
+       opt.quantum = cl->quantum;
+       opt.prio = cl->prio;
        opt.level = cl->level;
        NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
 
        nla_nest_end(skb, nest);
-       spin_unlock_bh(&sch->dev->queue_lock);
+       spin_unlock_bh(root_lock);
        return skb->len;
 
 nla_put_failure:
-       spin_unlock_bh(&sch->dev->queue_lock);
+       spin_unlock_bh(root_lock);
        nla_nest_cancel(skb, nest);
        return -1;
 }
@@ -1139,7 +1105,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
        cl->xstats.ctokens = cl->ctokens;
 
        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
-           gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+           gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
            gnet_stats_copy_queue(d, &cl->qstats) < 0)
                return -1;
 
@@ -1151,27 +1117,29 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 {
        struct htb_class *cl = (struct htb_class *)arg;
 
-       if (cl && !cl->level) {
-               if (new == NULL &&
-                   (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
-                                            cl->classid))
-                   == NULL)
-                       return -ENOBUFS;
-               sch_tree_lock(sch);
-               if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
-                       qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-                       qdisc_reset(*old);
-               }
-               sch_tree_unlock(sch);
-               return 0;
+       if (cl->level)
+               return -EINVAL;
+       if (new == NULL &&
+           (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                                    &pfifo_qdisc_ops,
+                                    cl->common.classid)) == NULL)
+               return -ENOBUFS;
+
+       sch_tree_lock(sch);
+       *old = cl->un.leaf.q;
+       cl->un.leaf.q = new;
+       if (*old != NULL) {
+               qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+               qdisc_reset(*old);
        }
-       return -ENOENT;
+       sch_tree_unlock(sch);
+       return 0;
 }
 
 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
 {
        struct htb_class *cl = (struct htb_class *)arg;
-       return (cl && !cl->level) ? cl->un.leaf.q : NULL;
+       return !cl->level ? cl->un.leaf.q : NULL;
 }
 
 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
@@ -1195,12 +1163,9 @@ static inline int htb_parent_last_child(struct htb_class *cl)
        if (!cl->parent)
                /* the root class */
                return 0;
-
-       if (!(cl->parent->children.next == &cl->sibling &&
-               cl->parent->children.prev == &cl->sibling))
+       if (cl->parent->children > 1)
                /* not the last child */
                return 0;
-
        return 1;
 }
 
@@ -1209,7 +1174,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
 {
        struct htb_class *parent = cl->parent;
 
-       BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity);
+       WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
 
        if (parent->cmode != HTB_CAN_SEND)
                htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
@@ -1218,8 +1183,6 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
        INIT_LIST_HEAD(&parent->un.leaf.drop_list);
        parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
-       parent->un.leaf.quantum = parent->quantum;
-       parent->un.leaf.prio = parent->prio;
        parent->tokens = parent->buffer;
        parent->ctokens = parent->cbuffer;
        parent->t_c = psched_get_time();
@@ -1228,10 +1191,8 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
 
 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
-       struct htb_sched *q = qdisc_priv(sch);
-
        if (!cl->level) {
-               BUG_TRAP(cl->un.leaf.q);
+               WARN_ON(!cl->un.leaf.q);
                qdisc_destroy(cl->un.leaf.q);
        }
        gen_kill_estimator(&cl->bstats, &cl->rate_est);
@@ -1239,29 +1200,17 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
        qdisc_put_rtab(cl->ceil);
 
        tcf_destroy_chain(&cl->filter_list);
-
-       while (!list_empty(&cl->children))
-               htb_destroy_class(sch, list_entry(cl->children.next,
-                                                 struct htb_class, sibling));
-
-       /* note: this delete may happen twice (see htb_delete) */
-       hlist_del_init(&cl->hlist);
-       list_del(&cl->sibling);
-
-       if (cl->prio_activity)
-               htb_deactivate(q, cl);
-
-       if (cl->cmode != HTB_CAN_SEND)
-               htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
-
        kfree(cl);
 }
 
-/* always caled under BH & queue lock */
 static void htb_destroy(struct Qdisc *sch)
 {
        struct htb_sched *q = qdisc_priv(sch);
+       struct hlist_node *n, *next;
+       struct htb_class *cl;
+       unsigned int i;
 
+       cancel_work_sync(&q->work);
        qdisc_watchdog_cancel(&q->watchdog);
        /* This line used to be after htb_destroy_class call below
           and surprisingly it worked in 2.4. But it must precede it
@@ -1269,10 +1218,16 @@ static void htb_destroy(struct Qdisc *sch)
           unbind_filter on it (without Oops). */
        tcf_destroy_chain(&q->filter_list);
 
-       while (!list_empty(&q->root))
-               htb_destroy_class(sch, list_entry(q->root.next,
-                                                 struct htb_class, sibling));
-
+       for (i = 0; i < q->clhash.hashsize; i++) {
+               hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
+                       tcf_destroy_chain(&cl->filter_list);
+       }
+       for (i = 0; i < q->clhash.hashsize; i++) {
+               hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+                                         common.hnode)
+                       htb_destroy_class(sch, cl);
+       }
+       qdisc_class_hash_destroy(&q->clhash);
        __skb_queue_purge(&q->direct_queue);
 }
 
@@ -1287,12 +1242,13 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
        // TODO: why don't allow to delete subtree ? references ? does
        // tc subsys quarantee us that in htb_destroy it holds no class
        // refs so that we can remove children safely there ?
-       if (!list_empty(&cl->children) || cl->filter_cnt)
+       if (cl->children || cl->filter_cnt)
                return -EBUSY;
 
        if (!cl->level && htb_parent_last_child(cl)) {
-               new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
-                                               cl->parent->classid);
+               new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                                         &pfifo_qdisc_ops,
+                                         cl->parent->common.classid);
                last_child = 1;
        }
 
@@ -1305,16 +1261,24 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
        }
 
        /* delete from hash and active; remainder in destroy_class */
-       hlist_del_init(&cl->hlist);
+       qdisc_class_hash_remove(&q->clhash, &cl->common);
+       if (cl->parent)
+               cl->parent->children--;
 
        if (cl->prio_activity)
                htb_deactivate(q, cl);
 
+       if (cl->cmode != HTB_CAN_SEND)
+               htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
+
        if (last_child)
                htb_parent_to_leaf(q, cl, new_q);
 
-       if (--cl->refcnt == 0)
-               htb_destroy_class(sch, cl);
+       BUG_ON(--cl->refcnt == 0);
+       /*
+        * This shouldn't happen: we "hold" one cops->get() when called
+        * from tc_ctl_tclass; the destroy method is done from cops->put().
+        */
 
        sch_tree_unlock(sch);
        return 0;
@@ -1380,8 +1344,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                };
 
                /* check for valid classid */
-               if (!classid || TC_H_MAJ(classid ^ sch->handle)
-                   || htb_find(classid, sch))
+               if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
+                   htb_find(classid, sch))
                        goto failure;
 
                /* check maximal depth */
@@ -1393,13 +1357,16 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
                        goto failure;
 
-               gen_new_estimator(&cl->bstats, &cl->rate_est,
-                                 &sch->dev->queue_lock,
-                                 tca[TCA_RATE] ? : &est.nla);
+               err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+                                       qdisc_root_sleeping_lock(sch),
+                                       tca[TCA_RATE] ? : &est.nla);
+               if (err) {
+                       kfree(cl);
+                       goto failure;
+               }
+
                cl->refcnt = 1;
-               INIT_LIST_HEAD(&cl->sibling);
-               INIT_HLIST_NODE(&cl->hlist);
-               INIT_LIST_HEAD(&cl->children);
+               cl->children = 0;
                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
                RB_CLEAR_NODE(&cl->pq_node);
 
@@ -1409,7 +1376,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
                   so that can't be used inside of sch_tree_lock
                   -- thanks to Karlis Peisenieks */
-               new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
+               new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                                         &pfifo_qdisc_ops, classid);
                sch_tree_lock(sch);
                if (parent && !parent->level) {
                        unsigned int qlen = parent->un.leaf.q->q.qlen;
@@ -1433,7 +1401,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                /* leaf (we) needs elementary qdisc */
                cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
 
-               cl->classid = classid;
+               cl->common.classid = classid;
                cl->parent = parent;
 
                /* set class to be in HTB_CAN_SEND state */
@@ -1444,41 +1412,40 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                cl->cmode = HTB_CAN_SEND;
 
                /* attach to the hash list and parent's family */
-               hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
-               list_add_tail(&cl->sibling,
-                             parent ? &parent->children : &q->root);
+               qdisc_class_hash_insert(&q->clhash, &cl->common);
+               if (parent)
+                       parent->children++;
        } else {
-               if (tca[TCA_RATE])
-                       gen_replace_estimator(&cl->bstats, &cl->rate_est,
-                                             &sch->dev->queue_lock,
-                                             tca[TCA_RATE]);
+               if (tca[TCA_RATE]) {
+                       err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+                                                   qdisc_root_sleeping_lock(sch),
+                                                   tca[TCA_RATE]);
+                       if (err)
+                               return err;
+               }
                sch_tree_lock(sch);
        }
 
        /* it used to be a nasty bug here, we have to check that node
           is really leaf before changing cl->un.leaf ! */
        if (!cl->level) {
-               cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
-               if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
+               cl->quantum = rtab->rate.rate / q->rate2quantum;
+               if (!hopt->quantum && cl->quantum < 1000) {
                        printk(KERN_WARNING
                               "HTB: quantum of class %X is small. Consider r2q change.\n",
-                              cl->classid);
-                       cl->un.leaf.quantum = 1000;
+                              cl->common.classid);
+                       cl->quantum = 1000;
                }
-               if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
+               if (!hopt->quantum && cl->quantum > 200000) {
                        printk(KERN_WARNING
                               "HTB: quantum of class %X is big. Consider r2q change.\n",
-                              cl->classid);
-                       cl->un.leaf.quantum = 200000;
+                              cl->common.classid);
+                       cl->quantum = 200000;
                }
                if (hopt->quantum)
-                       cl->un.leaf.quantum = hopt->quantum;
-               if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
-                       cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
-
-               /* backup for htb_parent_to_leaf */
-               cl->quantum = cl->un.leaf.quantum;
-               cl->prio = cl->un.leaf.prio;
+                       cl->quantum = hopt->quantum;
+               if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
+                       cl->prio = TC_HTB_NUMPRIO - 1;
        }
 
        cl->buffer = hopt->buffer;
@@ -1491,6 +1458,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
        cl->ceil = ctab;
        sch_tree_unlock(sch);
 
+       qdisc_class_hash_grow(sch, &q->clhash);
+
        *arg = (unsigned long)cl;
        return 0;
 
@@ -1514,7 +1483,6 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
                                     u32 classid)
 {
-       struct htb_sched *q = qdisc_priv(sch);
        struct htb_class *cl = htb_find(classid, sch);
 
        /*if (cl && !cl->level) return 0;
@@ -1528,35 +1496,29 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
         */
        if (cl)
                cl->filter_cnt++;
-       else
-               q->filter_cnt++;
        return (unsigned long)cl;
 }
 
 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
 {
-       struct htb_sched *q = qdisc_priv(sch);
        struct htb_class *cl = (struct htb_class *)arg;
 
        if (cl)
                cl->filter_cnt--;
-       else
-               q->filter_cnt--;
 }
 
 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
        struct htb_sched *q = qdisc_priv(sch);
-       int i;
+       struct htb_class *cl;
+       struct hlist_node *n;
+       unsigned int i;
 
        if (arg->stop)
                return;
 
-       for (i = 0; i < HTB_HSIZE; i++) {
-               struct hlist_node *p;
-               struct htb_class *cl;
-
-               hlist_for_each_entry(cl, p, q->hash + i, hlist) {
+       for (i = 0; i < q->clhash.hashsize; i++) {
+               hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
                        if (arg->count < arg->skip) {
                                arg->count++;
                                continue;
@@ -1593,7 +1555,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
        .priv_size      =       sizeof(struct htb_sched),
        .enqueue        =       htb_enqueue,
        .dequeue        =       htb_dequeue,
-       .requeue        =       htb_requeue,
+       .peek           =       qdisc_peek_dequeued,
        .drop           =       htb_drop,
        .init           =       htb_init,
        .reset          =       htb_reset,