string: factorize skip_spaces and export it to be generally available
[safe/jmp/linux-2.6] / net / sched / sch_htb.c
index 880a339..508cf5f 100644 (file)
@@ -1,4 +1,4 @@
-/* vim: ts=8 sw=8
+/*
  * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
  *
  *             This program is free software; you can redistribute it and/or
@@ -11,7 +11,7 @@
  * Credits (in time order) for older HTB versions:
  *              Stef Coene <stef.coene@docum.org>
  *                     HTB support at LARTC mailing list
- *             Ondrej Kraus, <krauso@barr.cz> 
+ *             Ondrej Kraus, <krauso@barr.cz>
  *                     found missing INIT_QDISC(htb)
  *             Vladimir Smelhaus, Aamer Akhter, Bert Hubert
  *                     helped a lot to locate nasty class stall bug
  *             Jiri Fojtasek
  *                     fixed requeue routine
  *             and many others. thanks.
- *
- * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
  */
 #include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
+#include <linux/moduleparam.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
 #include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
 #include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <net/ip.h>
-#include <net/route.h>
 #include <linux/skbuff.h>
 #include <linux/list.h>
 #include <linux/compiler.h>
-#include <net/sock.h>
-#include <net/pkt_sched.h>
 #include <linux/rbtree.h>
+#include <linux/workqueue.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
 
 /* HTB algorithm.
     Author: devik@cdi.cz
     ========================================================================
     HTB is like TBF with multiple classes. It is also similar to CBQ because
-    it allows to assign priority to each class in hierarchy. 
+    it allows to assign priority to each class in hierarchy.
     In fact it is another implementation of Floyd's formal sharing.
 
     Levels:
-    Each class is assigned level. Leaf has ALWAYS level 0 and root 
+    Each class is assigned level. Leaf has ALWAYS level 0 and root
     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
     one less than their parent.
 */
 
-#define HTB_HSIZE 16   /* classid hash size */
-#define HTB_EWMAC 2    /* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#undef HTB_DEBUG       /* compile debugging support (activated by tc tool) */
-#define HTB_RATECM 1    /* whether to use rate computer */
-#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
-#define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
-#define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
-#define HTB_VER 0x30011        /* major must be matched with number suplied by TC as version */
+static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
+#define HTB_VER 0x30011                /* major must be matched with number suplied by TC as version */
 
 #if HTB_VER >> 16 != TC_HTB_PROTOVER
 #error "Mismatched sch_htb.c and pkt_sch.h"
 #endif
 
-/* debugging support; S is subsystem, these are defined:
-  0 - netlink messages
-  1 - enqueue
-  2 - drop & requeue
-  3 - dequeue main
-  4 - dequeue one prio DRR part
-  5 - dequeue class accounting
-  6 - class overlimit status computation
-  7 - hint tree
-  8 - event queue
- 10 - rate estimator
- 11 - classifier 
- 12 - fast dequeue cache
-
- L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full
- q->debug uint32 contains 16 2-bit fields one for subsystem starting
- from LSB
- */
-#ifdef HTB_DEBUG
-#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
-#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
-       printk(KERN_DEBUG FMT,##ARG)
-#define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
-#define HTB_PASSQ q,
-#define HTB_ARGQ struct htb_sched *q,
-#define static
-#undef __inline__
-#define __inline__
-#undef inline
-#define inline
-#define HTB_CMAGIC 0xFEFAFEF1
-#define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
-               if ((N)->rb_color == -1) break; \
-               rb_erase(N,R); \
-               (N)->rb_color = -1; } while (0)
-#else
-#define HTB_DBG_COND(S,L) (0)
-#define HTB_DBG(S,L,FMT,ARG...)
-#define HTB_PASSQ
-#define HTB_ARGQ
-#define HTB_CHCL(cl)
-#define htb_safe_rb_erase(N,R) rb_erase(N,R)
-#endif
-
+/* Module parameter and sysfs export */
+module_param    (htb_hysteresis, int, 0640);
+MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
 
 /* used internaly to keep status of single class */
 enum htb_cmode {
-    HTB_CANT_SEND,             /* class can't send and can't borrow */
-    HTB_MAY_BORROW,            /* class can't send but may borrow */
-    HTB_CAN_SEND               /* class can send */
+       HTB_CANT_SEND,          /* class can't send and can't borrow */
+       HTB_MAY_BORROW,         /* class can't send but may borrow */
+       HTB_CAN_SEND            /* class can send */
 };
 
 /* interior & leaf nodes; props specific to leaves are marked L: */
-struct htb_class
-{
-#ifdef HTB_DEBUG
-       unsigned magic;
-#endif
-    /* general class parameters */
-    u32 classid;
-    struct gnet_stats_basic bstats;
-    struct gnet_stats_queue qstats;
-    struct gnet_stats_rate_est rate_est;
-    struct tc_htb_xstats xstats;/* our special stats */
-    int refcnt;                        /* usage count of this class */
-
-#ifdef HTB_RATECM
-    /* rate measurement counters */
-    unsigned long rate_bytes,sum_bytes;
-    unsigned long rate_packets,sum_packets;
-#endif
-
-    /* topology */
-    int level;                 /* our level (see above) */
-    struct htb_class *parent;  /* parent class */
-    struct list_head hlist;    /* classid hash list item */
-    struct list_head sibling;  /* sibling list item */
-    struct list_head children; /* children list */
-
-    union {
-           struct htb_class_leaf {
-                   struct Qdisc *q;
-                   int prio;
-                   int aprio;  
-                   int quantum;
-                   int deficit[TC_HTB_MAXDEPTH];
-                   struct list_head drop_list;
-           } leaf;
-           struct htb_class_inner {
-                   struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
-                   struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
-            /* When class changes from state 1->2 and disconnects from 
-               parent's feed then we lost ptr value and start from the
-              first child again. Here we store classid of the
-              last valid ptr (used when ptr is NULL). */
-              u32 last_ptr_id[TC_HTB_NUMPRIO];
-           } inner;
-    } un;
-    struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
-    struct rb_node pq_node;             /* node for event queue */
-    unsigned long pq_key;      /* the same type as jiffies global */
-    
-    int prio_activity;         /* for which prios are we active */
-    enum htb_cmode cmode;      /* current mode of the class */
-
-    /* class attached filters */
-    struct tcf_proto *filter_list;
-    int filter_cnt;
-
-    int warned;                /* only one warning about non work conserving .. */
-
-    /* token bucket parameters */
-    struct qdisc_rate_table *rate;     /* rate table of the class itself */
-    struct qdisc_rate_table *ceil;     /* ceiling rate (limits borrows too) */
-    long buffer,cbuffer;               /* token bucket depth/rate */
-    psched_tdiff_t mbuffer;            /* max wait time */
-    long tokens,ctokens;               /* current number of tokens */
-    psched_time_t t_c;                 /* checkpoint time */
+struct htb_class {
+       struct Qdisc_class_common common;
+       /* general class parameters */
+       struct gnet_stats_basic_packed bstats;
+       struct gnet_stats_queue qstats;
+       struct gnet_stats_rate_est rate_est;
+       struct tc_htb_xstats xstats;    /* our special stats */
+       int refcnt;             /* usage count of this class */
+
+       /* topology */
+       int level;              /* our level (see above) */
+       unsigned int children;
+       struct htb_class *parent;       /* parent class */
+
+       int prio;               /* these two are used only by leaves... */
+       int quantum;            /* but stored for parent-to-leaf return */
+
+       union {
+               struct htb_class_leaf {
+                       struct Qdisc *q;
+                       int deficit[TC_HTB_MAXDEPTH];
+                       struct list_head drop_list;
+               } leaf;
+               struct htb_class_inner {
+                       struct rb_root feed[TC_HTB_NUMPRIO];    /* feed trees */
+                       struct rb_node *ptr[TC_HTB_NUMPRIO];    /* current class ptr */
+                       /* When class changes from state 1->2 and disconnects from
+                          parent's feed then we lost ptr value and start from the
+                          first child again. Here we store classid of the
+                          last valid ptr (used when ptr is NULL). */
+                       u32 last_ptr_id[TC_HTB_NUMPRIO];
+               } inner;
+       } un;
+       struct rb_node node[TC_HTB_NUMPRIO];    /* node for self or feed tree */
+       struct rb_node pq_node; /* node for event queue */
+       psched_time_t pq_key;
+
+       int prio_activity;      /* for which prios are we active */
+       enum htb_cmode cmode;   /* current mode of the class */
+
+       /* class attached filters */
+       struct tcf_proto *filter_list;
+       int filter_cnt;
+
+       /* token bucket parameters */
+       struct qdisc_rate_table *rate;  /* rate table of the class itself */
+       struct qdisc_rate_table *ceil;  /* ceiling rate (limits borrows too) */
+       long buffer, cbuffer;   /* token bucket depth/rate */
+       psched_tdiff_t mbuffer; /* max wait time */
+       long tokens, ctokens;   /* current number of tokens */
+       psched_time_t t_c;      /* checkpoint time */
 };
 
-/* TODO: maybe compute rate when size is too large .. or drop ? */
-static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate,
-       int size)
-{ 
-    int slot = size >> rate->rate.cell_log;
-    if (slot > 255) {
-       cl->xstats.giants++;
-       slot = 255;
-    }
-    return rate->data[slot];
-}
+struct htb_sched {
+       struct Qdisc_class_hash clhash;
+       struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 
-struct htb_sched
-{
-    struct list_head root;                     /* root classes list */
-    struct list_head hash[HTB_HSIZE];          /* hashed by classid */
-    struct list_head drops[TC_HTB_NUMPRIO];    /* active leaves (for drops) */
-    
-    /* self list - roots of self generating tree */
-    struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
-    int row_mask[TC_HTB_MAXDEPTH];
-    struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
-    u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
-
-    /* self wait list - roots of wait PQs per row */
-    struct rb_root wait_pq[TC_HTB_MAXDEPTH];
-
-    /* time of nearest event per level (row) */
-    unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
-
-    /* cached value of jiffies in dequeue */
-    unsigned long jiffies;
-
-    /* whether we hit non-work conserving class during this dequeue; we use */
-    int nwc_hit;       /* this to disable mindelay complaint in dequeue */
-
-    int defcls;                /* class where unclassified flows go to */
-    u32 debug;         /* subsystem debug levels */
-
-    /* filters for qdisc itself */
-    struct tcf_proto *filter_list;
-    int filter_cnt;
-
-    int rate2quantum;          /* quant = rate / rate2quantum */
-    psched_time_t now;         /* cached dequeue time */
-    struct timer_list timer;   /* send delay timer */
-#ifdef HTB_RATECM
-    struct timer_list rttim;   /* rate computer timer */
-    int recmp_bucket;          /* which hash bucket to recompute next */
-#endif
-    
-    /* non shaped skbs; let them go directly thru */
-    struct sk_buff_head direct_queue;
-    int direct_qlen;  /* max qlen of above */
+       /* self list - roots of self generating tree */
+       struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+       int row_mask[TC_HTB_MAXDEPTH];
+       struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+       u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 
-    long direct_pkts;
-};
+       /* self wait list - roots of wait PQs per row */
+       struct rb_root wait_pq[TC_HTB_MAXDEPTH];
 
-/* compute hash of size HTB_HSIZE for given handle */
-static __inline__ int htb_hash(u32 h) 
-{
-#if HTB_HSIZE != 16
- #error "Declare new hash for your HTB_HSIZE"
-#endif
-    h ^= h>>8; /* stolen from cbq_hash */
-    h ^= h>>4;
-    return h & 0xf;
-}
+       /* time of nearest event per level (row) */
+       psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
+
+       int defcls;             /* class where unclassified flows go to */
+
+       /* filters for qdisc itself */
+       struct tcf_proto *filter_list;
+
+       int rate2quantum;       /* quant = rate / rate2quantum */
+       psched_time_t now;      /* cached dequeue time */
+       struct qdisc_watchdog watchdog;
+
+       /* non shaped skbs; let them go directly thru */
+       struct sk_buff_head direct_queue;
+       int direct_qlen;        /* max qlen of above */
+
+       long direct_pkts;
+
+#define HTB_WARN_TOOMANYEVENTS 0x1
+       unsigned int warned;    /* only one warning */
+       struct work_struct work;
+};
 
 /* find class in global hash table using given handle */
-static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
+static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 {
        struct htb_sched *q = qdisc_priv(sch);
-       struct list_head *p;
-       if (TC_H_MAJ(handle) != sch->handle) 
+       struct Qdisc_class_common *clc;
+
+       clc = qdisc_class_find(&q->clhash, handle);
+       if (clc == NULL)
                return NULL;
-       
-       list_for_each (p,q->hash+htb_hash(handle)) {
-               struct htb_class *cl = list_entry(p,struct htb_class,hlist);
-               if (cl->classid == handle)
-                       return cl;
-       }
-       return NULL;
+       return container_of(clc, struct htb_class, common);
 }
 
 /**
@@ -294,17 +180,14 @@ static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
  * We allow direct class selection by classid in priority. The we examine
  * filters in qdisc and in inner nodes (if higher filter points to the inner
  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
- * internal fifo (direct). These packets then go directly thru. If we still 
+ * internal fifo (direct). These packets then go directly thru. If we still
  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
  * then finish and return direct queue.
  */
 #define HTB_DIRECT (struct htb_class*)-1
-static inline u32 htb_classid(struct htb_class *cl)
-{
-       return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
-}
 
-static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
+                                     int *qerr)
 {
        struct htb_sched *q = qdisc_priv(sch);
        struct htb_class *cl;
@@ -316,116 +199,60 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
           note that nfmark can be used too by attaching filter fw with no
           rules in it */
        if (skb->priority == sch->handle)
-               return HTB_DIRECT;  /* X:0 (direct flow) selected */
-       if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) 
+               return HTB_DIRECT;      /* X:0 (direct flow) selected */
+       if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
                return cl;
 
-       *qerr = NET_XMIT_BYPASS;
+       *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
        tcf = q->filter_list;
        while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
                switch (result) {
                case TC_ACT_QUEUED:
-               case TC_ACT_STOLEN: 
-                       *qerr = NET_XMIT_SUCCESS;
+               case TC_ACT_STOLEN:
+                       *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
                case TC_ACT_SHOT:
                        return NULL;
                }
-#elif defined(CONFIG_NET_CLS_POLICE)
-               if (result == TC_POLICE_SHOT)
-                       return HTB_DIRECT;
 #endif
-               if ((cl = (void*)res.class) == NULL) {
+               if ((cl = (void *)res.class) == NULL) {
                        if (res.classid == sch->handle)
-                               return HTB_DIRECT;  /* X:0 (direct flow) */
-                       if ((cl = htb_find(res.classid,sch)) == NULL)
-                               break; /* filter selected invalid classid */
+                               return HTB_DIRECT;      /* X:0 (direct flow) */
+                       if ((cl = htb_find(res.classid, sch)) == NULL)
+                               break;  /* filter selected invalid classid */
                }
                if (!cl->level)
-                       return cl; /* we hit leaf; return it */
+                       return cl;      /* we hit leaf; return it */
 
                /* we have got inner class; apply inner filter chain */
                tcf = cl->filter_list;
        }
        /* classification failed; try to use default class */
-       cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch);
+       cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
        if (!cl || cl->level)
-               return HTB_DIRECT; /* bad default .. this is safe bet */
+               return HTB_DIRECT;      /* bad default .. this is safe bet */
        return cl;
 }
 
-#ifdef HTB_DEBUG
-static void htb_next_rb_node(struct rb_node **n);
-#define HTB_DUMTREE(root,memb) if(root) { \
-       struct rb_node *n = (root)->rb_node; \
-       while (n->rb_left) n = n->rb_left; \
-       while (n) { \
-               struct htb_class *cl = rb_entry(n, struct htb_class, memb); \
-               printk(" %x",cl->classid); htb_next_rb_node (&n); \
-       } }
-
-static void htb_debug_dump (struct htb_sched *q)
-{
-       int i,p;
-       printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
-       /* rows */
-       for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
-               printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
-               for (p=0;p<TC_HTB_NUMPRIO;p++) {
-                       if (!q->row[i][p].rb_node) continue;
-                       printk(" p%d:",p);
-                       HTB_DUMTREE(q->row[i]+p,node[p]);
-               }
-               printk("\n");
-       }
-       /* classes */
-       for (i = 0; i < HTB_HSIZE; i++) {
-               struct list_head *l;
-               list_for_each (l,q->hash+i) {
-                       struct htb_class *cl = list_entry(l,struct htb_class,hlist);
-                       long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-                       printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d "
-                                       "pa=%x f:",
-                               cl->classid,cl->cmode,cl->tokens,cl->ctokens,
-                               cl->pq_node.rb_color==-1?0:cl->pq_key,diff,
-                               cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity);
-                       if (cl->level)
-                       for (p=0;p<TC_HTB_NUMPRIO;p++) {
-                               if (!cl->un.inner.feed[p].rb_node) continue;
-                               printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0);
-                               HTB_DUMTREE(cl->un.inner.feed+p,node[p]);
-                       }
-                       printk("\n");
-               }
-       }
-}
-#endif
 /**
  * htb_add_to_id_tree - adds class to the round robin list
  *
  * Routine adds class to the list (actually tree) sorted by classid.
  * Make sure that class is not already on such list for given prio.
  */
-static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
-               struct htb_class *cl,int prio)
+static void htb_add_to_id_tree(struct rb_root *root,
+                              struct htb_class *cl, int prio)
 {
        struct rb_node **p = &root->rb_node, *parent = NULL;
-       HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio);
-#ifdef HTB_DEBUG
-       if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; }
-       HTB_CHCL(cl);
-       if (*p) {
-               struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]);
-               HTB_CHCL(x);
-       }
-#endif
+
        while (*p) {
-               struct htb_class *c; parent = *p;
+               struct htb_class *c;
+               parent = *p;
                c = rb_entry(parent, struct htb_class, node[prio]);
-               HTB_CHCL(c);
-               if (cl->classid > c->classid)
+
+               if (cl->common.classid > c->common.classid)
                        p = &parent->rb_right;
-               else 
+               else
                        p = &parent->rb_left;
        }
        rb_link_node(&cl->node[prio], parent, p);
@@ -439,31 +266,26 @@ static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
  * change its mode in cl->pq_key microseconds. Make sure that class is not
  * already in the queue.
  */
-static void htb_add_to_wait_tree (struct htb_sched *q,
-               struct htb_class *cl,long delay,int debug_hint)
+static void htb_add_to_wait_tree(struct htb_sched *q,
+                                struct htb_class *cl, long delay)
 {
        struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
-       HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key);
-#ifdef HTB_DEBUG
-       if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; }
-       HTB_CHCL(cl);
-       if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
-               printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
-#endif
-       cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
-       if (cl->pq_key == q->jiffies)
+
+       cl->pq_key = q->now + delay;
+       if (cl->pq_key == q->now)
                cl->pq_key++;
 
        /* update the nearest event cache */
-       if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
+       if (q->near_ev_cache[cl->level] > cl->pq_key)
                q->near_ev_cache[cl->level] = cl->pq_key;
-       
+
        while (*p) {
-               struct htb_class *c; parent = *p;
+               struct htb_class *c;
+               parent = *p;
                c = rb_entry(parent, struct htb_class, pq_node);
-               if (time_after_eq(cl->pq_key, c->pq_key))
+               if (cl->pq_key >= c->pq_key)
                        p = &parent->rb_right;
-               else 
+               else
                        p = &parent->rb_left;
        }
        rb_link_node(&cl->pq_node, parent, p);
@@ -476,7 +298,7 @@ static void htb_add_to_wait_tree (struct htb_sched *q,
  * When we are past last key we return NULL.
  * Average complexity is 2 steps per call.
  */
-static void htb_next_rb_node(struct rb_node **n)
+static inline void htb_next_rb_node(struct rb_node **n)
 {
        *n = rb_next(*n);
 }
@@ -487,42 +309,51 @@ static void htb_next_rb_node(struct rb_node **n)
  * The class is added to row at priorities marked in mask.
  * It does nothing if mask == 0.
  */
-static inline void htb_add_class_to_row(struct htb_sched *q, 
-               struct htb_class *cl,int mask)
+static inline void htb_add_class_to_row(struct htb_sched *q,
+                                       struct htb_class *cl, int mask)
 {
-       HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n",
-                       cl->classid,mask,q->row_mask[cl->level]);
-       HTB_CHCL(cl);
        q->row_mask[cl->level] |= mask;
        while (mask) {
                int prio = ffz(~mask);
                mask &= ~(1 << prio);
-               htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio);
+               htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
+       }
+}
+
+/* If this triggers, it is a bug in this code, but it need not be fatal */
+static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
+{
+       if (RB_EMPTY_NODE(rb)) {
+               WARN_ON(1);
+       } else {
+               rb_erase(rb, root);
+               RB_CLEAR_NODE(rb);
        }
 }
 
+
 /**
  * htb_remove_class_from_row - removes class from its row
  *
  * The class is removed from row at priorities marked in mask.
  * It does nothing if mask == 0.
  */
-static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
-               struct htb_class *cl,int mask)
+static inline void htb_remove_class_from_row(struct htb_sched *q,
+                                                struct htb_class *cl, int mask)
 {
        int m = 0;
-       HTB_CHCL(cl);
+
        while (mask) {
                int prio = ffz(~mask);
+
                mask &= ~(1 << prio);
-               if (q->ptr[cl->level][prio] == cl->node+prio)
-                       htb_next_rb_node(q->ptr[cl->level]+prio);
-               htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio);
-               if (!q->row[cl->level][prio].rb_node) 
+               if (q->ptr[cl->level][prio] == cl->node + prio)
+                       htb_next_rb_node(q->ptr[cl->level] + prio);
+
+               htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
+               if (!q->row[cl->level][prio].rb_node)
                        m |= 1 << prio;
        }
-       HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n",
-                       cl->classid,mask,q->row_mask[cl->level],m);
        q->row_mask[cl->level] &= ~m;
 }
 
@@ -530,115 +361,120 @@ static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
  * htb_activate_prios - creates active classe's feed chain
  *
  * The class is connected to ancestors and/or appropriate rows
- * for priorities it is participating on. cl->cmode must be new 
+ * for priorities it is participating on. cl->cmode must be new
  * (activated) mode. It does nothing if cl->prio_activity == 0.
  */
-static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
+static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 {
        struct htb_class *p = cl->parent;
-       long m,mask = cl->prio_activity;
-       HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
-       HTB_CHCL(cl);
+       long m, mask = cl->prio_activity;
 
        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
-               HTB_CHCL(p);
-               m = mask; while (m) {
+               m = mask;
+               while (m) {
                        int prio = ffz(~m);
                        m &= ~(1 << prio);
-                       
+
                        if (p->un.inner.feed[prio].rb_node)
                                /* parent already has its feed in use so that
                                   reset bit in mask as parent is already ok */
                                mask &= ~(1 << prio);
-                       
-                       htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio);
+
+                       htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
                }
-               HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
-                               p->classid,p->prio_activity,mask,p->cmode);
                p->prio_activity |= mask;
-               cl = p; p = cl->parent;
-               HTB_CHCL(cl);
+               cl = p;
+               p = cl->parent;
+
        }
        if (cl->cmode == HTB_CAN_SEND && mask)
-               htb_add_class_to_row(q,cl,mask);
+               htb_add_class_to_row(q, cl, mask);
 }
 
 /**
  * htb_deactivate_prios - remove class from feed chain
  *
- * cl->cmode must represent old mode (before deactivation). It does 
+ * cl->cmode must represent old mode (before deactivation). It does
  * nothing if cl->prio_activity == 0. Class is removed from all feed
  * chains and rows.
  */
 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 {
        struct htb_class *p = cl->parent;
-       long m,mask = cl->prio_activity;
-       HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
-       HTB_CHCL(cl);
+       long m, mask = cl->prio_activity;
 
        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
-               m = mask; mask = 0; 
+               m = mask;
+               mask = 0;
                while (m) {
                        int prio = ffz(~m);
                        m &= ~(1 << prio);
-                       
-                       if (p->un.inner.ptr[prio] == cl->node+prio) {
+
+                       if (p->un.inner.ptr[prio] == cl->node + prio) {
                                /* we are removing child which is pointed to from
                                   parent feed - forget the pointer but remember
                                   classid */
-                               p->un.inner.last_ptr_id[prio] = cl->classid;
+                               p->un.inner.last_ptr_id[prio] = cl->common.classid;
                                p->un.inner.ptr[prio] = NULL;
                        }
-                       
-                       htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio);
-                       
-                       if (!p->un.inner.feed[prio].rb_node) 
+
+                       htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
+
+                       if (!p->un.inner.feed[prio].rb_node)
                                mask |= 1 << prio;
                }
-               HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
-                               p->classid,p->prio_activity,mask,p->cmode);
+
                p->prio_activity &= ~mask;
-               cl = p; p = cl->parent;
-               HTB_CHCL(cl);
+               cl = p;
+               p = cl->parent;
+
        }
-       if (cl->cmode == HTB_CAN_SEND && mask) 
-               htb_remove_class_from_row(q,cl,mask);
+       if (cl->cmode == HTB_CAN_SEND && mask)
+               htb_remove_class_from_row(q, cl, mask);
 }
 
+static inline long htb_lowater(const struct htb_class *cl)
+{
+       if (htb_hysteresis)
+               return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
+       else
+               return 0;
+}
+static inline long htb_hiwater(const struct htb_class *cl)
+{
+       if (htb_hysteresis)
+               return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
+       else
+               return 0;
+}
+
+
 /**
  * htb_class_mode - computes and returns current class mode
  *
  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
- * from now to time when cl will change its state. 
+ * from now to time when cl will change its state.
  * Also it is worth to note that class mode doesn't change simply
- * at cl->{c,}tokens == 0 but there can rather be hysteresis of 
+ * at cl->{c,}tokens == 0 but there can rather be hysteresis of
  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
  * mode transitions per time unit. The speed gain is about 1/6.
  */
-static __inline__ enum htb_cmode 
-htb_class_mode(struct htb_class *cl,long *diff)
+static inline enum htb_cmode
+htb_class_mode(struct htb_class *cl, long *diff)
 {
-    long toks;
+       long toks;
 
-    if ((toks = (cl->ctokens + *diff)) < (
-#if HTB_HYSTERESIS
-           cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
-#endif
-                   0)) {
-           *diff = -toks;
-           return HTB_CANT_SEND;
-    }
-    if ((toks = (cl->tokens + *diff)) >= (
-#if HTB_HYSTERESIS
-           cl->cmode == HTB_CAN_SEND ? -cl->buffer :
-#endif
-           0))
-           return HTB_CAN_SEND;
+       if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
+               *diff = -toks;
+               return HTB_CANT_SEND;
+       }
+
+       if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
+               return HTB_CAN_SEND;
 
-    *diff = -toks;
-    return HTB_MAY_BORROW;
+       *diff = -toks;
+       return HTB_MAY_BORROW;
 }
 
 /**
@@ -650,167 +486,125 @@ htb_class_mode(struct htb_class *cl,long *diff)
  * be different from old one and cl->pq_key has to be valid if changing
  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
  */
-static void 
+static void
 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
-{ 
-       enum htb_cmode new_mode = htb_class_mode(cl,diff);
-       
-       HTB_CHCL(cl);
-       HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid);
+{
+       enum htb_cmode new_mode = htb_class_mode(cl, diff);
 
        if (new_mode == cl->cmode)
-               return; 
-       
-       if (cl->prio_activity) { /* not necessary: speed optimization */
-               if (cl->cmode != HTB_CANT_SEND) 
-                       htb_deactivate_prios(q,cl);
+               return;
+
+       if (cl->prio_activity) {        /* not necessary: speed optimization */
+               if (cl->cmode != HTB_CANT_SEND)
+                       htb_deactivate_prios(q, cl);
                cl->cmode = new_mode;
-               if (new_mode != HTB_CANT_SEND) 
-                       htb_activate_prios(q,cl);
-       } else 
+               if (new_mode != HTB_CANT_SEND)
+                       htb_activate_prios(q, cl);
+       } else
                cl->cmode = new_mode;
 }
 
 /**
- * htb_activate - inserts leaf cl into appropriate active feeds 
+ * htb_activate - inserts leaf cl into appropriate active feeds
  *
  * Routine learns (new) priority of leaf and activates feed chain
  * for the prio. It can be called on already active leaf safely.
  * It also adds leaf into droplist.
  */
-static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
+static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 {
-       BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
-       HTB_CHCL(cl);
+       WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
+
        if (!cl->prio_activity) {
-               cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
-               htb_activate_prios(q,cl);
-               list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio);
+               cl->prio_activity = 1 << cl->prio;
+               htb_activate_prios(q, cl);
+               list_add_tail(&cl->un.leaf.drop_list,
+                             q->drops + cl->prio);
        }
 }
 
 /**
- * htb_deactivate - remove leaf cl from active feeds 
+ * htb_deactivate - remove leaf cl from active feeds
  *
  * Make sure that leaf is active. In the other words it can't be called
  * with non-active leaf. It also removes class from the drop list.
  */
-static __inline__ void 
-htb_deactivate(struct htb_sched *q,struct htb_class *cl)
+static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 {
-       BUG_TRAP(cl->prio_activity);
-       HTB_CHCL(cl);
-       htb_deactivate_prios(q,cl);
+       WARN_ON(!cl->prio_activity);
+
+       htb_deactivate_prios(q, cl);
        cl->prio_activity = 0;
        list_del_init(&cl->un.leaf.drop_list);
 }
 
 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-    int ret;
-    struct htb_sched *q = qdisc_priv(sch);
-    struct htb_class *cl = htb_classify(skb,sch,&ret);
-
-    if (cl == HTB_DIRECT) {
-       /* enqueue to helper queue */
-       if (q->direct_queue.qlen < q->direct_qlen) {
-           __skb_queue_tail(&q->direct_queue, skb);
-           q->direct_pkts++;
-       } else {
-           kfree_skb(skb);
-           sch->qstats.drops++;
-           return NET_XMIT_DROP;
-       }
+       int uninitialized_var(ret);
+       struct htb_sched *q = qdisc_priv(sch);
+       struct htb_class *cl = htb_classify(skb, sch, &ret);
+
+       if (cl == HTB_DIRECT) {
+               /* enqueue to helper queue */
+               if (q->direct_queue.qlen < q->direct_qlen) {
+                       __skb_queue_tail(&q->direct_queue, skb);
+                       q->direct_pkts++;
+               } else {
+                       kfree_skb(skb);
+                       sch->qstats.drops++;
+                       return NET_XMIT_DROP;
+               }
 #ifdef CONFIG_NET_CLS_ACT
-    } else if (!cl) {
-       if (ret == NET_XMIT_BYPASS)
-               sch->qstats.drops++;
-       kfree_skb (skb);
-       return ret;
+       } else if (!cl) {
+               if (ret & __NET_XMIT_BYPASS)
+                       sch->qstats.drops++;
+               kfree_skb(skb);
+               return ret;
 #endif
-    } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
-       sch->qstats.drops++;
-       cl->qstats.drops++;
-       return NET_XMIT_DROP;
-    } else {
-       cl->bstats.packets++; cl->bstats.bytes += skb->len;
-       htb_activate (q,cl);
-    }
-
-    sch->q.qlen++;
-    sch->bstats.packets++; sch->bstats.bytes += skb->len;
-    HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
-    return NET_XMIT_SUCCESS;
-}
-
-/* TODO: requeuing packet charges it to policers again !! */
-static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-    struct htb_sched *q = qdisc_priv(sch);
-    int ret =  NET_XMIT_SUCCESS;
-    struct htb_class *cl = htb_classify(skb,sch, &ret);
-    struct sk_buff *tskb;
-
-    if (cl == HTB_DIRECT || !cl) {
-       /* enqueue to helper queue */
-       if (q->direct_queue.qlen < q->direct_qlen && cl) {
-           __skb_queue_head(&q->direct_queue, skb);
+       } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
+               if (net_xmit_drop_count(ret)) {
+                       sch->qstats.drops++;
+                       cl->qstats.drops++;
+               }
+               return ret;
        } else {
-            __skb_queue_head(&q->direct_queue, skb);
-            tskb = __skb_dequeue_tail(&q->direct_queue);
-            kfree_skb (tskb);
-            sch->qstats.drops++;
-            return NET_XMIT_CN;        
+               cl->bstats.packets +=
+                       skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
+               cl->bstats.bytes += qdisc_pkt_len(skb);
+               htb_activate(q, cl);
        }
-    } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
-       sch->qstats.drops++;
-       cl->qstats.drops++;
-       return NET_XMIT_DROP;
-    } else 
-           htb_activate (q,cl);
-
-    sch->q.qlen++;
-    sch->qstats.requeues++;
-    HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
-    return NET_XMIT_SUCCESS;
+
+       sch->q.qlen++;
+       sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
+       sch->bstats.bytes += qdisc_pkt_len(skb);
+       return NET_XMIT_SUCCESS;
 }
 
-static void htb_timer(unsigned long arg)
+static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
 {
-    struct Qdisc *sch = (struct Qdisc*)arg;
-    sch->flags &= ~TCQ_F_THROTTLED;
-    wmb();
-    netif_schedule(sch->dev);
+       long toks = diff + cl->tokens;
+
+       if (toks > cl->buffer)
+               toks = cl->buffer;
+       toks -= (long) qdisc_l2t(cl->rate, bytes);
+       if (toks <= -cl->mbuffer)
+               toks = 1 - cl->mbuffer;
+
+       cl->tokens = toks;
 }
 
-#ifdef HTB_RATECM
-#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
-static void htb_rate_timer(unsigned long arg)
+static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
 {
-       struct Qdisc *sch = (struct Qdisc*)arg;
-       struct htb_sched *q = qdisc_priv(sch);
-       struct list_head *p;
-
-       /* lock queue so that we can muck with it */
-       HTB_QLOCK(sch);
-       HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies);
-
-       q->rttim.expires = jiffies + HZ;
-       add_timer(&q->rttim);
-
-       /* scan and recompute one bucket at time */
-       if (++q->recmp_bucket >= HTB_HSIZE) 
-               q->recmp_bucket = 0;
-       list_for_each (p,q->hash+q->recmp_bucket) {
-               struct htb_class *cl = list_entry(p,struct htb_class,hlist);
-               HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n",
-                               cl->classid,cl->sum_bytes,cl->sum_packets);
-               RT_GEN (cl->sum_bytes,cl->rate_bytes);
-               RT_GEN (cl->sum_packets,cl->rate_packets);
-       }
-       HTB_QUNLOCK(sch);
+       long toks = diff + cl->ctokens;
+
+       if (toks > cl->cbuffer)
+               toks = cl->cbuffer;
+       toks -= (long) qdisc_l2t(cl->ceil, bytes);
+       if (toks <= -cl->mbuffer)
+               toks = 1 - cl->mbuffer;
+
+       cl->ctokens = toks;
 }
-#endif
 
 /**
  * htb_charge_class - charges amount "bytes" to leaf and ancestors
@@ -823,67 +617,41 @@ static void htb_rate_timer(unsigned long arg)
  * CAN_SEND) because we can use more precise clock that event queue here.
  * In such case we remove class from event queue first.
  */
-static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
-               int level,int bytes)
-{      
-       long toks,diff;
+static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
+                            int level, struct sk_buff *skb)
+{
+       int bytes = qdisc_pkt_len(skb);
        enum htb_cmode old_mode;
-       HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes);
-
-#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
-       if (toks > cl->B) toks = cl->B; \
-       toks -= L2T(cl, cl->R, bytes); \
-       if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
-       cl->T = toks
+       long diff;
 
        while (cl) {
-               HTB_CHCL(cl);
-               diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-#ifdef HTB_DEBUG
-               if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
-                       if (net_ratelimit())
-                               printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
-                                      cl->classid, diff,
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-                                      q->now.tv_sec * 1000000ULL + q->now.tv_usec,
-                                      cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
-#else
-                                      (unsigned long long) q->now,
-                                      (unsigned long long) cl->t_c,
-#endif
-                                      q->jiffies);
-                       diff = 1000;
-               }
-#endif
+               diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
                if (cl->level >= level) {
-                       if (cl->level == level) cl->xstats.lends++;
-                       HTB_ACCNT (tokens,buffer,rate);
+                       if (cl->level == level)
+                               cl->xstats.lends++;
+                       htb_accnt_tokens(cl, bytes, diff);
                } else {
                        cl->xstats.borrows++;
-                       cl->tokens += diff; /* we moved t_c; update tokens */
+                       cl->tokens += diff;     /* we moved t_c; update tokens */
                }
-               HTB_ACCNT (ctokens,cbuffer,ceil);
+               htb_accnt_ctokens(cl, bytes, diff);
                cl->t_c = q->now;
-               HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens);
 
-               old_mode = cl->cmode; diff = 0;
-               htb_change_class_mode(q,cl,&diff);
+               old_mode = cl->cmode;
+               diff = 0;
+               htb_change_class_mode(q, cl, &diff);
                if (old_mode != cl->cmode) {
                        if (old_mode != HTB_CAN_SEND)
-                               htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
+                               htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
                        if (cl->cmode != HTB_CAN_SEND)
-                               htb_add_to_wait_tree (q,cl,diff,1);
+                               htb_add_to_wait_tree(q, cl, diff);
                }
-               
-#ifdef HTB_RATECM
-               /* update rate counters */
-               cl->sum_bytes += bytes; cl->sum_packets++;
-#endif
 
                /* update byte stats except for leaves which are already updated */
                if (cl->level) {
                        cl->bstats.bytes += bytes;
-                       cl->bstats.packets++;
+                       cl->bstats.packets += skb_is_gso(skb)?
+                                       skb_shinfo(skb)->gso_segs:1;
                }
                cl = cl->parent;
        }
@@ -892,69 +660,62 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
 /**
  * htb_do_events - make mode changes to classes at the level
  *
- * Scans event queue for pending events and applies them. Returns jiffies to
- * next pending event (0 for no event in pq).
- * Note: Aplied are events whose have cl->pq_key <= jiffies.
+ * Scans event queue for pending events and applies them. Returns time of
+ * next pending event (0 for no event in pq, q->now for too many events).
+ * Note: Applied are events whose have cl->pq_key <= q->now.
  */
-static long htb_do_events(struct htb_sched *q,int level)
+static psched_time_t htb_do_events(struct htb_sched *q, int level,
+                                  unsigned long start)
 {
-       int i;
-       HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n",
-                       level,q->wait_pq[level].rb_node,q->row_mask[level]);
-       for (i = 0; i < 500; i++) {
+       /* don't run for longer than 2 jiffies; 2 is used instead of
+          1 to simplify things when jiffy is going to be incremented
+          too soon */
+       unsigned long stop_at = start + 2;
+       while (time_before(jiffies, stop_at)) {
                struct htb_class *cl;
                long diff;
-               struct rb_node *p = q->wait_pq[level].rb_node;
-               if (!p) return 0;
-               while (p->rb_left) p = p->rb_left;
+               struct rb_node *p = rb_first(&q->wait_pq[level]);
+
+               if (!p)
+                       return 0;
 
                cl = rb_entry(p, struct htb_class, pq_node);
-               if (time_after(cl->pq_key, q->jiffies)) {
-                       HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
-                       return cl->pq_key - q->jiffies;
-               }
-               htb_safe_rb_erase(p,q->wait_pq+level);
-               diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-#ifdef HTB_DEBUG
-               if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
-                       if (net_ratelimit())
-                               printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
-                                      cl->classid, diff,
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-                                      q->now.tv_sec * 1000000ULL + q->now.tv_usec,
-                                      cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
-#else
-                                      (unsigned long long) q->now,
-                                      (unsigned long long) cl->t_c,
-#endif
-                                      q->jiffies);
-                       diff = 1000;
-               }
-#endif
-               htb_change_class_mode(q,cl,&diff);
+               if (cl->pq_key > q->now)
+                       return cl->pq_key;
+
+               htb_safe_rb_erase(p, q->wait_pq + level);
+               diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
+               htb_change_class_mode(q, cl, &diff);
                if (cl->cmode != HTB_CAN_SEND)
-                       htb_add_to_wait_tree (q,cl,diff,2);
+                       htb_add_to_wait_tree(q, cl, diff);
        }
-       if (net_ratelimit())
-               printk(KERN_WARNING "htb: too many events !\n");
-       return HZ/10;
+
+       /* too much load - let's continue after a break for scheduling */
+       if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
+               printk(KERN_WARNING "htb: too many events!\n");
+               q->warned |= HTB_WARN_TOOMANYEVENTS;
+       }
+
+       return q->now;
 }
 
 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
    is no such one exists. */
-static struct rb_node *
-htb_id_find_next_upper(int prio,struct rb_node *n,u32 id)
+static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
+                                             u32 id)
 {
        struct rb_node *r = NULL;
        while (n) {
-               struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]);
-               if (id == cl->classid) return n;
-               
-               if (id > cl->classid) {
+               struct htb_class *cl =
+                   rb_entry(n, struct htb_class, node[prio]);
+
+               if (id > cl->common.classid) {
                        n = n->rb_right;
-               } else {
+               } else if (id < cl->common.classid) {
                        r = n;
                        n = n->rb_left;
+               } else {
+                       return n;
                }
        }
        return r;
@@ -965,185 +726,168 @@ htb_id_find_next_upper(int prio,struct rb_node *n,u32 id)
  *
  * Find leaf where current feed pointers points to.
  */
-static struct htb_class *
-htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid)
+static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
+                                        struct rb_node **pptr, u32 * pid)
 {
        int i;
        struct {
                struct rb_node *root;
                struct rb_node **pptr;
                u32 *pid;
-       } stk[TC_HTB_MAXDEPTH],*sp = stk;
-       
-       BUG_TRAP(tree->rb_node);
+       } stk[TC_HTB_MAXDEPTH], *sp = stk;
+
+       BUG_ON(!tree->rb_node);
        sp->root = tree->rb_node;
        sp->pptr = pptr;
        sp->pid = pid;
 
        for (i = 0; i < 65535; i++) {
-               HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid);
-               
-               if (!*sp->pptr && *sp->pid) { 
-                       /* ptr was invalidated but id is valid - try to recover 
+               if (!*sp->pptr && *sp->pid) {
+                       /* ptr was invalidated but id is valid - try to recover
                           the original or next ptr */
-                       *sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid);
+                       *sp->pptr =
+                           htb_id_find_next_upper(prio, sp->root, *sp->pid);
                }
-               *sp->pid = 0; /* ptr is valid now so that remove this hint as it
-                                can become out of date quickly */
-               if (!*sp->pptr) { /* we are at right end; rewind & go up */
+               *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
+                                  can become out of date quickly */
+               if (!*sp->pptr) {       /* we are at right end; rewind & go up */
                        *sp->pptr = sp->root;
-                       while ((*sp->pptr)->rb_left) 
+                       while ((*sp->pptr)->rb_left)
                                *sp->pptr = (*sp->pptr)->rb_left;
                        if (sp > stk) {
                                sp--;
-                               BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL;
-                               htb_next_rb_node (sp->pptr);
+                               if (!*sp->pptr) {
+                                       WARN_ON(1);
+                                       return NULL;
+                               }
+                               htb_next_rb_node(sp->pptr);
                        }
                } else {
                        struct htb_class *cl;
-                       cl = rb_entry(*sp->pptr,struct htb_class,node[prio]);
-                       HTB_CHCL(cl);
-                       if (!cl->level) 
+                       cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
+                       if (!cl->level)
                                return cl;
                        (++sp)->root = cl->un.inner.feed[prio].rb_node;
-                       sp->pptr = cl->un.inner.ptr+prio;
-                       sp->pid = cl->un.inner.last_ptr_id+prio;
+                       sp->pptr = cl->un.inner.ptr + prio;
+                       sp->pid = cl->un.inner.last_ptr_id + prio;
                }
        }
-       BUG_TRAP(0);
+       WARN_ON(1);
        return NULL;
 }
 
 /* dequeues packet at given priority and level; call only if
    you are sure that there is active class at prio/level */
-static struct sk_buff *
-htb_dequeue_tree(struct htb_sched *q,int prio,int level)
+static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
+                                       int level)
 {
        struct sk_buff *skb = NULL;
-       struct htb_class *cl,*start;
+       struct htb_class *cl, *start;
        /* look initial class up in the row */
-       start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,
-                       q->ptr[level]+prio,q->last_ptr_id[level]+prio);
-       
+       start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
+                                    q->ptr[level] + prio,
+                                    q->last_ptr_id[level] + prio);
+
        do {
 next:
-               BUG_TRAP(cl); 
-               if (!cl) return NULL;
-               HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
-                               prio,level,cl->classid,cl->un.leaf.deficit[level]);
+               if (unlikely(!cl))
+                       return NULL;
 
                /* class can be empty - it is unlikely but can be true if leaf
                   qdisc drops packets in enqueue routine or if someone used
-                  graft operation on the leaf since last dequeue; 
+                  graft operation on the leaf since last dequeue;
                   simply deactivate and skip such class */
                if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
                        struct htb_class *next;
-                       htb_deactivate(q,cl);
+                       htb_deactivate(q, cl);
 
                        /* row/level might become empty */
                        if ((q->row_mask[level] & (1 << prio)) == 0)
-                               return NULL; 
-                       
-                       next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,
-                                       prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio);
+                               return NULL;
+
+                       next = htb_lookup_leaf(q->row[level] + prio,
+                                              prio, q->ptr[level] + prio,
+                                              q->last_ptr_id[level] + prio);
 
-                       if (cl == start) /* fix start if we just deleted it */
+                       if (cl == start)        /* fix start if we just deleted it */
                                start = next;
                        cl = next;
                        goto next;
                }
-       
-               if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) 
+
+               skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
+               if (likely(skb != NULL))
                        break;
-               if (!cl->warned) {
-                       printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid);
-                       cl->warned = 1;
-               }
-               q->nwc_hit++;
-               htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
-               cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio,
-                               q->last_ptr_id[level]+prio);
+
+               qdisc_warn_nonwc("htb", cl->un.leaf.q);
+               htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
+                                 ptr[0]) + prio);
+               cl = htb_lookup_leaf(q->row[level] + prio, prio,
+                                    q->ptr[level] + prio,
+                                    q->last_ptr_id[level] + prio);
 
        } while (cl != start);
 
        if (likely(skb != NULL)) {
-               if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
-                       HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
-                               level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum);
-                       cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
-                       htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
+               cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
+               if (cl->un.leaf.deficit[level] < 0) {
+                       cl->un.leaf.deficit[level] += cl->quantum;
+                       htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
+                                         ptr[0]) + prio);
                }
                /* this used to be after charge_class but this constelation
                   gives us slightly better performance */
                if (!cl->un.leaf.q->q.qlen)
-                       htb_deactivate (q,cl);
-               htb_charge_class (q,cl,level,skb->len);
+                       htb_deactivate(q, cl);
+               htb_charge_class(q, cl, level, skb);
        }
        return skb;
 }
 
-static void htb_delay_by(struct Qdisc *sch,long delay)
-{
-       struct htb_sched *q = qdisc_priv(sch);
-       if (delay <= 0) delay = 1;
-       if (unlikely(delay > 5*HZ)) {
-               if (net_ratelimit())
-                       printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
-               delay = 5*HZ;
-       }
-       /* why don't use jiffies here ? because expires can be in past */
-       mod_timer(&q->timer, q->jiffies + delay);
-       sch->flags |= TCQ_F_THROTTLED;
-       sch->qstats.overlimits++;
-       HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
-}
-
 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 {
        struct sk_buff *skb = NULL;
        struct htb_sched *q = qdisc_priv(sch);
        int level;
-       long min_delay;
-#ifdef HTB_DEBUG
-       int evs_used = 0;
-#endif
-
-       q->jiffies = jiffies;
-       HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
-                       sch->q.qlen);
+       psched_time_t next_event;
+       unsigned long start_at;
 
        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
-       if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) {
+       skb = __skb_dequeue(&q->direct_queue);
+       if (skb != NULL) {
                sch->flags &= ~TCQ_F_THROTTLED;
                sch->q.qlen--;
                return skb;
        }
 
-       if (!sch->q.qlen) goto fin;
-       PSCHED_GET_TIME(q->now);
+       if (!sch->q.qlen)
+               goto fin;
+       q->now = psched_get_time();
+       start_at = jiffies;
+
+       next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
 
-       min_delay = LONG_MAX;
-       q->nwc_hit = 0;
        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
                /* common case optimization - skip event handler quickly */
                int m;
-               long delay;
-               if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
-                       delay = htb_do_events(q,level);
-                       q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
-#ifdef HTB_DEBUG
-                       evs_used++;
-#endif
+               psched_time_t event;
+
+               if (q->now >= q->near_ev_cache[level]) {
+                       event = htb_do_events(q, level, start_at);
+                       if (!event)
+                               event = q->now + PSCHED_TICKS_PER_SEC;
+                       q->near_ev_cache[level] = event;
                } else
-                       delay = q->near_ev_cache[level] - q->jiffies;   
-               
-               if (delay && min_delay > delay) 
-                       min_delay = delay;
+                       event = q->near_ev_cache[level];
+
+               if (next_event > event)
+                       next_event = event;
+
                m = ~q->row_mask[level];
                while (m != (int)(-1)) {
-                       int prio = ffz (m);
+                       int prio = ffz(m);
                        m |= 1 << prio;
-                       skb = htb_dequeue_tree(q,prio,level);
+                       skb = htb_dequeue_tree(q, prio, level);
                        if (likely(skb != NULL)) {
                                sch->q.qlen--;
                                sch->flags &= ~TCQ_F_THROTTLED;
@@ -1151,40 +895,32 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
                        }
                }
        }
-#ifdef HTB_DEBUG
-       if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
-               if (min_delay == LONG_MAX) {
-                       printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
-                                       evs_used,q->jiffies,jiffies);
-                       htb_debug_dump(q);
-               } else 
-                       printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
-                                       "too small rate\n",min_delay);
-       }
-#endif
-       htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
+       sch->qstats.overlimits++;
+       if (likely(next_event > q->now))
+               qdisc_watchdog_schedule(&q->watchdog, next_event);
+       else
+               schedule_work(&q->work);
 fin:
-       HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
        return skb;
 }
 
 /* try to drop from each class (by prio) until one succeed */
-static unsigned int htb_drop(struct Qdiscsch)
+static unsigned int htb_drop(struct Qdisc *sch)
 {
        struct htb_sched *q = qdisc_priv(sch);
        int prio;
 
        for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
                struct list_head *p;
-               list_for_each (p,q->drops+prio) {
+               list_for_each(p, q->drops + prio) {
                        struct htb_class *cl = list_entry(p, struct htb_class,
                                                          un.leaf.drop_list);
                        unsigned int len;
-                       if (cl->un.leaf.q->ops->drop && 
-                               (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
+                       if (cl->un.leaf.q->ops->drop &&
+                           (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
                                sch->q.qlen--;
                                if (!cl->un.leaf.q->q.qlen)
-                                       htb_deactivate (q,cl);
+                                       htb_deactivate(q, cl);
                                return len;
                        }
                }
@@ -1194,91 +930,94 @@ static unsigned int htb_drop(struct Qdisc* sch)
 
 /* reset all classes */
 /* always caled under BH & queue lock */
-static void htb_reset(struct Qdiscsch)
+static void htb_reset(struct Qdisc *sch)
 {
        struct htb_sched *q = qdisc_priv(sch);
-       int i;
-       HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle);
+       struct htb_class *cl;
+       struct hlist_node *n;
+       unsigned int i;
 
-       for (i = 0; i < HTB_HSIZE; i++) {
-               struct list_head *p;
-               list_for_each (p,q->hash+i) {
-                       struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+       for (i = 0; i < q->clhash.hashsize; i++) {
+               hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
                        if (cl->level)
-                               memset(&cl->un.inner,0,sizeof(cl->un.inner));
+                               memset(&cl->un.inner, 0, sizeof(cl->un.inner));
                        else {
-                               if (cl->un.leaf.q) 
+                               if (cl->un.leaf.q)
                                        qdisc_reset(cl->un.leaf.q);
                                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
                        }
                        cl->prio_activity = 0;
                        cl->cmode = HTB_CAN_SEND;
-#ifdef HTB_DEBUG
-                       cl->pq_node.rb_color = -1;
-                       memset(cl->node,255,sizeof(cl->node));
-#endif
 
                }
        }
-       sch->flags &= ~TCQ_F_THROTTLED;
-       del_timer(&q->timer);
+       qdisc_watchdog_cancel(&q->watchdog);
        __skb_queue_purge(&q->direct_queue);
        sch->q.qlen = 0;
-       memset(q->row,0,sizeof(q->row));
-       memset(q->row_mask,0,sizeof(q->row_mask));
-       memset(q->wait_pq,0,sizeof(q->wait_pq));
-       memset(q->ptr,0,sizeof(q->ptr));
+       memset(q->row, 0, sizeof(q->row));
+       memset(q->row_mask, 0, sizeof(q->row_mask));
+       memset(q->wait_pq, 0, sizeof(q->wait_pq));
+       memset(q->ptr, 0, sizeof(q->ptr));
        for (i = 0; i < TC_HTB_NUMPRIO; i++)
-               INIT_LIST_HEAD(q->drops+i);
+               INIT_LIST_HEAD(q->drops + i);
+}
+
+static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
+       [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
+       [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
+       [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+       [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+};
+
+static void htb_work_func(struct work_struct *work)
+{
+       struct htb_sched *q = container_of(work, struct htb_sched, work);
+       struct Qdisc *sch = q->watchdog.qdisc;
+
+       __netif_schedule(qdisc_root(sch));
 }
 
-static int htb_init(struct Qdisc *sch, struct rtattr *opt)
+static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct htb_sched *q = qdisc_priv(sch);
-       struct rtattr *tb[TCA_HTB_INIT];
+       struct nlattr *tb[TCA_HTB_INIT + 1];
        struct tc_htb_glob *gopt;
+       int err;
        int i;
-#ifdef HTB_DEBUG
-       printk(KERN_INFO "HTB init, kernel part version %d.%d\n",
-                         HTB_VER >> 16,HTB_VER & 0xffff);
-#endif
-       if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
-                       tb[TCA_HTB_INIT-1] == NULL ||
-                       RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) {
+
+       if (!opt)
+               return -EINVAL;
+
+       err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
+       if (err < 0)
+               return err;
+
+       if (tb[TCA_HTB_INIT] == NULL) {
                printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
                return -EINVAL;
        }
-       gopt = RTA_DATA(tb[TCA_HTB_INIT-1]);
+       gopt = nla_data(tb[TCA_HTB_INIT]);
        if (gopt->version != HTB_VER >> 16) {
-               printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n",
-                               HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
+               printk(KERN_ERR
+                      "HTB: need tc/htb version %d (minor is %d), you have %d\n",
+                      HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
                return -EINVAL;
        }
-       q->debug = gopt->debug;
-       HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
 
-       INIT_LIST_HEAD(&q->root);
-       for (i = 0; i < HTB_HSIZE; i++)
-               INIT_LIST_HEAD(q->hash+i);
+       err = qdisc_class_hash_init(&q->clhash);
+       if (err < 0)
+               return err;
        for (i = 0; i < TC_HTB_NUMPRIO; i++)
-               INIT_LIST_HEAD(q->drops+i);
+               INIT_LIST_HEAD(q->drops + i);
 
-       init_timer(&q->timer);
+       qdisc_watchdog_init(&q->watchdog, sch);
+       INIT_WORK(&q->work, htb_work_func);
        skb_queue_head_init(&q->direct_queue);
 
-       q->direct_qlen = sch->dev->tx_queue_len;
-       if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
+       q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
+       if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
                q->direct_qlen = 2;
-       q->timer.function = htb_timer;
-       q->timer.data = (unsigned long)sch;
-
-#ifdef HTB_RATECM
-       init_timer(&q->rttim);
-       q->rttim.function = htb_rate_timer;
-       q->rttim.data = (unsigned long)sch;
-       q->rttim.expires = jiffies + HZ;
-       add_timer(&q->rttim);
-#endif
+
        if ((q->rate2quantum = gopt->rate2quantum) < 1)
                q->rate2quantum = 1;
        q->defcls = gopt->defcls;
@@ -1288,82 +1027,77 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
 
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
+       spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
        struct htb_sched *q = qdisc_priv(sch);
-       unsigned char    *b = skb->tail;
-       struct rtattr *rta;
+       struct nlattr *nest;
        struct tc_htb_glob gopt;
-       HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle);
-       HTB_QLOCK(sch);
-       gopt.direct_pkts = q->direct_pkts;
 
-#ifdef HTB_DEBUG
-       if (HTB_DBG_COND(0,2))
-               htb_debug_dump(q);
-#endif
+       spin_lock_bh(root_lock);
+
+       gopt.direct_pkts = q->direct_pkts;
        gopt.version = HTB_VER;
        gopt.rate2quantum = q->rate2quantum;
        gopt.defcls = q->defcls;
-       gopt.debug = q->debug;
-       rta = (struct rtattr*)b;
-       RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
-       RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
-       rta->rta_len = skb->tail - b;
-       HTB_QUNLOCK(sch);
+       gopt.debug = 0;
+
+       nest = nla_nest_start(skb, TCA_OPTIONS);
+       if (nest == NULL)
+               goto nla_put_failure;
+       NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
+       nla_nest_end(skb, nest);
+
+       spin_unlock_bh(root_lock);
        return skb->len;
-rtattr_failure:
-       HTB_QUNLOCK(sch);
-       skb_trim(skb, skb->tail - skb->data);
+
+nla_put_failure:
+       spin_unlock_bh(root_lock);
+       nla_nest_cancel(skb, nest);
        return -1;
 }
 
 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
-       struct sk_buff *skb, struct tcmsg *tcm)
+                         struct sk_buff *skb, struct tcmsg *tcm)
 {
-#ifdef HTB_DEBUG
-       struct htb_sched *q = qdisc_priv(sch);
-#endif
-       struct htb_class *cl = (struct htb_class*)arg;
-       unsigned char    *b = skb->tail;
-       struct rtattr *rta;
+       struct htb_class *cl = (struct htb_class *)arg;
+       spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
+       struct nlattr *nest;
        struct tc_htb_opt opt;
 
-       HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid);
-
-       HTB_QLOCK(sch);
-       tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
-       tcm->tcm_handle = cl->classid;
+       spin_lock_bh(root_lock);
+       tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
+       tcm->tcm_handle = cl->common.classid;
        if (!cl->level && cl->un.leaf.q)
                tcm->tcm_info = cl->un.leaf.q->handle;
 
-       rta = (struct rtattr*)b;
-       RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+       nest = nla_nest_start(skb, TCA_OPTIONS);
+       if (nest == NULL)
+               goto nla_put_failure;
 
-       memset (&opt,0,sizeof(opt));
+       memset(&opt, 0, sizeof(opt));
 
-       opt.rate = cl->rate->rate; opt.buffer = cl->buffer;
-       opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer;
-       opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio;
-       opt.level = cl->level; 
-       RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
-       rta->rta_len = skb->tail - b;
-       HTB_QUNLOCK(sch);
+       opt.rate = cl->rate->rate;
+       opt.buffer = cl->buffer;
+       opt.ceil = cl->ceil->rate;
+       opt.cbuffer = cl->cbuffer;
+       opt.quantum = cl->quantum;
+       opt.prio = cl->prio;
+       opt.level = cl->level;
+       NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
+
+       nla_nest_end(skb, nest);
+       spin_unlock_bh(root_lock);
        return skb->len;
-rtattr_failure:
-       HTB_QUNLOCK(sch);
-       skb_trim(skb, b - skb->data);
+
+nla_put_failure:
+       spin_unlock_bh(root_lock);
+       nla_nest_cancel(skb, nest);
        return -1;
 }
 
 static int
-htb_dump_class_stats(struct Qdisc *sch, unsigned long arg,
-       struct gnet_dump *d)
+htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 {
-       struct htb_class *cl = (struct htb_class*)arg;
-
-#ifdef HTB_RATECM
-       cl->rate_est.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE);
-       cl->rate_est.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE);
-#endif
+       struct htb_class *cl = (struct htb_class *)arg;
 
        if (!cl->level && cl->un.leaf.q)
                cl->qstats.qlen = cl->un.leaf.q->q.qlen;
@@ -1371,7 +1105,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg,
        cl->xstats.ctokens = cl->ctokens;
 
        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
-           gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+           gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
            gnet_stats_copy_queue(d, &cl->qstats) < 0)
                return -1;
 
@@ -1379,132 +1113,172 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 }
 
 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
-       struct Qdisc **old)
+                    struct Qdisc **old)
 {
-       struct htb_class *cl = (struct htb_class*)arg;
+       struct htb_class *cl = (struct htb_class *)arg;
 
-       if (cl && !cl->level) {
-               if (new == NULL && (new = qdisc_create_dflt(sch->dev, 
-                                       &pfifo_qdisc_ops)) == NULL)
-                                       return -ENOBUFS;
-               sch_tree_lock(sch);
-               if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
-                       if (cl->prio_activity)
-                               htb_deactivate (qdisc_priv(sch),cl);
+       if (cl->level)
+               return -EINVAL;
+       if (new == NULL &&
+           (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                                    &pfifo_qdisc_ops,
+                                    cl->common.classid)) == NULL)
+               return -ENOBUFS;
 
-                       /* TODO: is it correct ? Why CBQ doesn't do it ? */
-                       sch->q.qlen -= (*old)->q.qlen;  
-                       qdisc_reset(*old);
-               }
-               sch_tree_unlock(sch);
-               return 0;
+       sch_tree_lock(sch);
+       *old = cl->un.leaf.q;
+       cl->un.leaf.q = new;
+       if (*old != NULL) {
+               qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+               qdisc_reset(*old);
        }
-       return -ENOENT;
+       sch_tree_unlock(sch);
+       return 0;
 }
 
-static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg)
+static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
 {
-       struct htb_class *cl = (struct htb_class*)arg;
-       return (cl && !cl->level) ? cl->un.leaf.q : NULL;
+       struct htb_class *cl = (struct htb_class *)arg;
+       return !cl->level ? cl->un.leaf.q : NULL;
+}
+
+static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
+{
+       struct htb_class *cl = (struct htb_class *)arg;
+
+       if (cl->un.leaf.q->q.qlen == 0)
+               htb_deactivate(qdisc_priv(sch), cl);
 }
 
 static unsigned long htb_get(struct Qdisc *sch, u32 classid)
 {
-#ifdef HTB_DEBUG
-       struct htb_sched *q = qdisc_priv(sch);
-#endif
-       struct htb_class *cl = htb_find(classid,sch);
-       HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0);
-       if (cl) 
+       struct htb_class *cl = htb_find(classid, sch);
+       if (cl)
                cl->refcnt++;
        return (unsigned long)cl;
 }
 
-static void htb_destroy_filters(struct tcf_proto **fl)
+static inline int htb_parent_last_child(struct htb_class *cl)
 {
-       struct tcf_proto *tp;
+       if (!cl->parent)
+               /* the root class */
+               return 0;
+       if (cl->parent->children > 1)
+               /* not the last child */
+               return 0;
+       return 1;
+}
 
-       while ((tp = *fl) != NULL) {
-               *fl = tp->next;
-               tcf_destroy(tp);
-       }
+static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
+                              struct Qdisc *new_q)
+{
+       struct htb_class *parent = cl->parent;
+
+       WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
+
+       if (parent->cmode != HTB_CAN_SEND)
+               htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
+
+       parent->level = 0;
+       memset(&parent->un.inner, 0, sizeof(parent->un.inner));
+       INIT_LIST_HEAD(&parent->un.leaf.drop_list);
+       parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
+       parent->tokens = parent->buffer;
+       parent->ctokens = parent->cbuffer;
+       parent->t_c = psched_get_time();
+       parent->cmode = HTB_CAN_SEND;
 }
 
-static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
+static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
-       struct htb_sched *q = qdisc_priv(sch);
-       HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0);
        if (!cl->level) {
-               BUG_TRAP(cl->un.leaf.q);
-               sch->q.qlen -= cl->un.leaf.q->q.qlen;
+               WARN_ON(!cl->un.leaf.q);
                qdisc_destroy(cl->un.leaf.q);
        }
+       gen_kill_estimator(&cl->bstats, &cl->rate_est);
        qdisc_put_rtab(cl->rate);
        qdisc_put_rtab(cl->ceil);
-       
-       htb_destroy_filters (&cl->filter_list);
-       
-       while (!list_empty(&cl->children)) 
-               htb_destroy_class (sch,list_entry(cl->children.next,
-                                       struct htb_class,sibling));
-
-       /* note: this delete may happen twice (see htb_delete) */
-       list_del(&cl->hlist);
-       list_del(&cl->sibling);
-       
-       if (cl->prio_activity)
-               htb_deactivate (q,cl);
-       
-       if (cl->cmode != HTB_CAN_SEND)
-               htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
-       
+
+       tcf_destroy_chain(&cl->filter_list);
        kfree(cl);
 }
 
-/* always caled under BH & queue lock */
-static void htb_destroy(struct Qdisc* sch)
+static void htb_destroy(struct Qdisc *sch)
 {
        struct htb_sched *q = qdisc_priv(sch);
-       HTB_DBG(0,1,"htb_destroy q=%p\n",q);
+       struct hlist_node *n, *next;
+       struct htb_class *cl;
+       unsigned int i;
 
-       del_timer_sync (&q->timer);
-#ifdef HTB_RATECM
-       del_timer_sync (&q->rttim);
-#endif
+       cancel_work_sync(&q->work);
+       qdisc_watchdog_cancel(&q->watchdog);
        /* This line used to be after htb_destroy_class call below
-          and surprisingly it worked in 2.4. But it must precede it 
+          and surprisingly it worked in 2.4. But it must precede it
           because filter need its target class alive to be able to call
           unbind_filter on it (without Oops). */
-       htb_destroy_filters(&q->filter_list);
-       
-       while (!list_empty(&q->root)) 
-               htb_destroy_class (sch,list_entry(q->root.next,
-                                       struct htb_class,sibling));
+       tcf_destroy_chain(&q->filter_list);
 
+       for (i = 0; i < q->clhash.hashsize; i++) {
+               hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
+                       tcf_destroy_chain(&cl->filter_list);
+       }
+       for (i = 0; i < q->clhash.hashsize; i++) {
+               hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+                                         common.hnode)
+                       htb_destroy_class(sch, cl);
+       }
+       qdisc_class_hash_destroy(&q->clhash);
        __skb_queue_purge(&q->direct_queue);
 }
 
 static int htb_delete(struct Qdisc *sch, unsigned long arg)
 {
        struct htb_sched *q = qdisc_priv(sch);
-       struct htb_class *cl = (struct htb_class*)arg;
-       HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
+       struct htb_class *cl = (struct htb_class *)arg;
+       unsigned int qlen;
+       struct Qdisc *new_q = NULL;
+       int last_child = 0;
 
        // TODO: why don't allow to delete subtree ? references ? does
        // tc subsys quarantee us that in htb_destroy it holds no class
        // refs so that we can remove children safely there ?
-       if (!list_empty(&cl->children) || cl->filter_cnt)
+       if (cl->children || cl->filter_cnt)
                return -EBUSY;
-       
+
+       if (!cl->level && htb_parent_last_child(cl)) {
+               new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                                         &pfifo_qdisc_ops,
+                                         cl->parent->common.classid);
+               last_child = 1;
+       }
+
        sch_tree_lock(sch);
-       
+
+       if (!cl->level) {
+               qlen = cl->un.leaf.q->q.qlen;
+               qdisc_reset(cl->un.leaf.q);
+               qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
+       }
+
        /* delete from hash and active; remainder in destroy_class */
-       list_del_init(&cl->hlist);
+       qdisc_class_hash_remove(&q->clhash, &cl->common);
+       if (cl->parent)
+               cl->parent->children--;
+
        if (cl->prio_activity)
-               htb_deactivate (q,cl);
+               htb_deactivate(q, cl);
 
-       if (--cl->refcnt == 0)
-               htb_destroy_class(sch,cl);
+       if (cl->cmode != HTB_CAN_SEND)
+               htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
+
+       if (last_child)
+               htb_parent_to_leaf(q, cl, new_q);
+
+       BUG_ON(--cl->refcnt == 0);
+       /*
+        * This shouldn't happen: we "hold" one cops->get() when called
+        * from tc_ctl_tclass; the destroy method is done from cops->put().
+        */
 
        sch_tree_unlock(sch);
        return 0;
@@ -1512,45 +1286,66 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 
 static void htb_put(struct Qdisc *sch, unsigned long arg)
 {
-#ifdef HTB_DEBUG
-       struct htb_sched *q = qdisc_priv(sch);
-#endif
-       struct htb_class *cl = (struct htb_class*)arg;
-       HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
+       struct htb_class *cl = (struct htb_class *)arg;
 
        if (--cl->refcnt == 0)
-               htb_destroy_class(sch,cl);
+               htb_destroy_class(sch, cl);
 }
 
-static int htb_change_class(struct Qdisc *sch, u32 classid, 
-               u32 parentid, struct rtattr **tca, unsigned long *arg)
+static int htb_change_class(struct Qdisc *sch, u32 classid,
+                           u32 parentid, struct nlattr **tca,
+                           unsigned long *arg)
 {
        int err = -EINVAL;
        struct htb_sched *q = qdisc_priv(sch);
-       struct htb_class *cl = (struct htb_class*)*arg,*parent;
-       struct rtattr *opt = tca[TCA_OPTIONS-1];
+       struct htb_class *cl = (struct htb_class *)*arg, *parent;
+       struct nlattr *opt = tca[TCA_OPTIONS];
        struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
-       struct rtattr *tb[TCA_HTB_RTAB];
+       struct nlattr *tb[TCA_HTB_RTAB + 1];
        struct tc_htb_opt *hopt;
 
        /* extract all subattrs from opt attr */
-       if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
-                       tb[TCA_HTB_PARMS-1] == NULL ||
-                       RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt))
+       if (!opt)
+               goto failure;
+
+       err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
+       if (err < 0)
                goto failure;
-       
-       parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
 
-       hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
-       HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
-       rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
-       ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
-       if (!rtab || !ctab) goto failure;
+       err = -EINVAL;
+       if (tb[TCA_HTB_PARMS] == NULL)
+               goto failure;
+
+       parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
+
+       hopt = nla_data(tb[TCA_HTB_PARMS]);
+
+       rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
+       ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
+       if (!rtab || !ctab)
+               goto failure;
 
-       if (!cl) { /* new class */
+       if (!cl) {              /* new class */
                struct Qdisc *new_q;
+               int prio;
+               struct {
+                       struct nlattr           nla;
+                       struct gnet_estimator   opt;
+               } est = {
+                       .nla = {
+                               .nla_len        = nla_attr_size(sizeof(est.opt)),
+                               .nla_type       = TCA_RATE,
+                       },
+                       .opt = {
+                               /* 4s interval, 16s averaging constant */
+                               .interval       = 2,
+                               .ewma_log       = 2,
+                       },
+               };
+
                /* check for valid classid */
-               if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
+               if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
+                   htb_find(classid, sch))
                        goto failure;
 
                /* check maximal depth */
@@ -1561,91 +1356,118 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                err = -ENOBUFS;
                if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
                        goto failure;
-               
+
+               err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+                                       qdisc_root_sleeping_lock(sch),
+                                       tca[TCA_RATE] ? : &est.nla);
+               if (err) {
+                       kfree(cl);
+                       goto failure;
+               }
+
                cl->refcnt = 1;
-               INIT_LIST_HEAD(&cl->sibling);
-               INIT_LIST_HEAD(&cl->hlist);
-               INIT_LIST_HEAD(&cl->children);
+               cl->children = 0;
                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
-#ifdef HTB_DEBUG
-               cl->magic = HTB_CMAGIC;
-#endif
+               RB_CLEAR_NODE(&cl->pq_node);
+
+               for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
+                       RB_CLEAR_NODE(&cl->node[prio]);
 
                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
                   so that can't be used inside of sch_tree_lock
                   -- thanks to Karlis Peisenieks */
-               new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+               new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                                         &pfifo_qdisc_ops, classid);
                sch_tree_lock(sch);
                if (parent && !parent->level) {
+                       unsigned int qlen = parent->un.leaf.q->q.qlen;
+
                        /* turn parent into inner node */
-                       sch->q.qlen -= parent->un.leaf.q->q.qlen;
-                       qdisc_destroy (parent->un.leaf.q);
-                       if (parent->prio_activity) 
-                               htb_deactivate (q,parent);
+                       qdisc_reset(parent->un.leaf.q);
+                       qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
+                       qdisc_destroy(parent->un.leaf.q);
+                       if (parent->prio_activity)
+                               htb_deactivate(q, parent);
 
                        /* remove from evt list because of level change */
                        if (parent->cmode != HTB_CAN_SEND) {
-                               htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/);
+                               htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
                                parent->cmode = HTB_CAN_SEND;
                        }
                        parent->level = (parent->parent ? parent->parent->level
-                                       : TC_HTB_MAXDEPTH) - 1;
-                       memset (&parent->un.inner,0,sizeof(parent->un.inner));
+                                        : TC_HTB_MAXDEPTH) - 1;
+                       memset(&parent->un.inner, 0, sizeof(parent->un.inner));
                }
                /* leaf (we) needs elementary qdisc */
                cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
 
-               cl->classid = classid; cl->parent = parent;
+               cl->common.classid = classid;
+               cl->parent = parent;
 
                /* set class to be in HTB_CAN_SEND state */
                cl->tokens = hopt->buffer;
                cl->ctokens = hopt->cbuffer;
-               cl->mbuffer = PSCHED_JIFFIE2US(HZ*60); /* 1min */
-               PSCHED_GET_TIME(cl->t_c);
+               cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;        /* 1min */
+               cl->t_c = psched_get_time();
                cl->cmode = HTB_CAN_SEND;
 
                /* attach to the hash list and parent's family */
-               list_add_tail(&cl->hlist, q->hash+htb_hash(classid));
-               list_add_tail(&cl->sibling, parent ? &parent->children : &q->root);
-#ifdef HTB_DEBUG
-               { 
-                       int i;
-                       for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1;
-                       cl->pq_node.rb_color = -1;
+               qdisc_class_hash_insert(&q->clhash, &cl->common);
+               if (parent)
+                       parent->children++;
+       } else {
+               if (tca[TCA_RATE]) {
+                       err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+                                                   qdisc_root_sleeping_lock(sch),
+                                                   tca[TCA_RATE]);
+                       if (err)
+                               return err;
                }
-#endif
-       } else sch_tree_lock(sch);
+               sch_tree_lock(sch);
+       }
 
        /* it used to be a nasty bug here, we have to check that node
-           is really leaf before changing cl->un.leaf ! */
+          is really leaf before changing cl->un.leaf ! */
        if (!cl->level) {
-               cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
-               if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
-                       printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
-                       cl->un.leaf.quantum = 1000;
+               cl->quantum = rtab->rate.rate / q->rate2quantum;
+               if (!hopt->quantum && cl->quantum < 1000) {
+                       printk(KERN_WARNING
+                              "HTB: quantum of class %X is small. Consider r2q change.\n",
+                              cl->common.classid);
+                       cl->quantum = 1000;
                }
-               if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
-                       printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
-                       cl->un.leaf.quantum = 200000;
+               if (!hopt->quantum && cl->quantum > 200000) {
+                       printk(KERN_WARNING
+                              "HTB: quantum of class %X is big. Consider r2q change.\n",
+                              cl->common.classid);
+                       cl->quantum = 200000;
                }
                if (hopt->quantum)
-                       cl->un.leaf.quantum = hopt->quantum;
-               if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
-                       cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
+                       cl->quantum = hopt->quantum;
+               if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
+                       cl->prio = TC_HTB_NUMPRIO - 1;
        }
 
        cl->buffer = hopt->buffer;
        cl->cbuffer = hopt->cbuffer;
-       if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab;
-       if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab;
+       if (cl->rate)
+               qdisc_put_rtab(cl->rate);
+       cl->rate = rtab;
+       if (cl->ceil)
+               qdisc_put_rtab(cl->ceil);
+       cl->ceil = ctab;
        sch_tree_unlock(sch);
 
+       qdisc_class_hash_grow(sch, &q->clhash);
+
        *arg = (unsigned long)cl;
        return 0;
 
 failure:
-       if (rtab) qdisc_put_rtab(rtab);
-       if (ctab) qdisc_put_rtab(ctab);
+       if (rtab)
+               qdisc_put_rtab(rtab);
+       if (ctab)
+               qdisc_put_rtab(ctab);
        return err;
 }
 
@@ -1654,55 +1476,49 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
        struct htb_sched *q = qdisc_priv(sch);
        struct htb_class *cl = (struct htb_class *)arg;
        struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
-       HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl);
+
        return fl;
 }
 
 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
-       u32 classid)
+                                    u32 classid)
 {
-       struct htb_sched *q = qdisc_priv(sch);
-       struct htb_class *cl = htb_find (classid,sch);
-       HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt);
+       struct htb_class *cl = htb_find(classid, sch);
+
        /*if (cl && !cl->level) return 0;
-         The line above used to be there to prevent attaching filters to 
-         leaves. But at least tc_index filter uses this just to get class 
-         for other reasons so that we have to allow for it.
-         ----
-         19.6.2002 As Werner explained it is ok - bind filter is just
-         another way to "lock" the class - unlike "get" this lock can
-         be broken by class during destroy IIUC.
+          The line above used to be there to prevent attaching filters to
+          leaves. But at least tc_index filter uses this just to get class
+          for other reasons so that we have to allow for it.
+          ----
+          19.6.2002 As Werner explained it is ok - bind filter is just
+          another way to "lock" the class - unlike "get" this lock can
+          be broken by class during destroy IIUC.
         */
-       if (cl) 
-               cl->filter_cnt++; 
-       else 
-               q->filter_cnt++;
+       if (cl)
+               cl->filter_cnt++;
        return (unsigned long)cl;
 }
 
 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
 {
-       struct htb_sched *q = qdisc_priv(sch);
        struct htb_class *cl = (struct htb_class *)arg;
-       HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt);
-       if (cl) 
-               cl->filter_cnt--; 
-       else 
-               q->filter_cnt--;
+
+       if (cl)
+               cl->filter_cnt--;
 }
 
 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
        struct htb_sched *q = qdisc_priv(sch);
-       int i;
+       struct htb_class *cl;
+       struct hlist_node *n;
+       unsigned int i;
 
        if (arg->stop)
                return;
 
-       for (i = 0; i < HTB_HSIZE; i++) {
-               struct list_head *p;
-               list_for_each (p,q->hash+i) {
-                       struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+       for (i = 0; i < q->clhash.hashsize; i++) {
+               hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
                        if (arg->count < arg->skip) {
                                arg->count++;
                                continue;
@@ -1716,9 +1532,10 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
        }
 }
 
-static struct Qdisc_class_ops htb_class_ops = {
+static const struct Qdisc_class_ops htb_class_ops = {
        .graft          =       htb_graft,
        .leaf           =       htb_leaf,
+       .qlen_notify    =       htb_qlen_notify,
        .get            =       htb_get,
        .put            =       htb_put,
        .change         =       htb_change_class,
@@ -1731,14 +1548,14 @@ static struct Qdisc_class_ops htb_class_ops = {
        .dump_stats     =       htb_dump_class_stats,
 };
 
-static struct Qdisc_ops htb_qdisc_ops = {
+static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
        .next           =       NULL,
        .cl_ops         =       &htb_class_ops,
        .id             =       "htb",
        .priv_size      =       sizeof(struct htb_sched),
        .enqueue        =       htb_enqueue,
        .dequeue        =       htb_dequeue,
-       .requeue        =       htb_requeue,
+       .peek           =       qdisc_peek_dequeued,
        .drop           =       htb_drop,
        .init           =       htb_init,
        .reset          =       htb_reset,
@@ -1750,12 +1567,13 @@ static struct Qdisc_ops htb_qdisc_ops = {
 
 static int __init htb_module_init(void)
 {
-    return register_qdisc(&htb_qdisc_ops);
+       return register_qdisc(&htb_qdisc_ops);
 }
-static void __exit htb_module_exit(void) 
+static void __exit htb_module_exit(void)
 {
-    unregister_qdisc(&htb_qdisc_ops);
+       unregister_qdisc(&htb_qdisc_ops);
 }
+
 module_init(htb_module_init)
 module_exit(htb_module_exit)
 MODULE_LICENSE("GPL");