sh: convert /proc/cpu/aligmnent, /proc/cpu/kernel_alignment to seq_file
[safe/jmp/linux-2.6] / net / sched / sch_htb.c
index 3fda819..85acab9 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/compiler.h>
 #include <linux/rbtree.h>
+#include <linux/workqueue.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
@@ -73,7 +74,7 @@ enum htb_cmode {
 struct htb_class {
        struct Qdisc_class_common common;
        /* general class parameters */
-       struct gnet_stats_basic bstats;
+       struct gnet_stats_basic_packed bstats;
        struct gnet_stats_queue qstats;
        struct gnet_stats_rate_est rate_est;
        struct tc_htb_xstats xstats;    /* our special stats */
@@ -84,12 +85,12 @@ struct htb_class {
        unsigned int children;
        struct htb_class *parent;       /* parent class */
 
+       int prio;               /* these two are used only by leaves... */
+       int quantum;            /* but stored for parent-to-leaf return */
+
        union {
                struct htb_class_leaf {
                        struct Qdisc *q;
-                       int prio;
-                       int aprio;
-                       int quantum;
                        int deficit[TC_HTB_MAXDEPTH];
                        struct list_head drop_list;
                } leaf;
@@ -114,8 +115,6 @@ struct htb_class {
        struct tcf_proto *filter_list;
        int filter_cnt;
 
-       int warned;             /* only one warning about non work conserving .. */
-
        /* token bucket parameters */
        struct qdisc_rate_table *rate;  /* rate table of the class itself */
        struct qdisc_rate_table *ceil;  /* ceiling rate (limits borrows too) */
@@ -123,19 +122,8 @@ struct htb_class {
        psched_tdiff_t mbuffer; /* max wait time */
        long tokens, ctokens;   /* current number of tokens */
        psched_time_t t_c;      /* checkpoint time */
-
-       int prio;               /* For parent to leaf return possible here */
-       int quantum;            /* we do backup. Finally full replacement  */
-                               /* of un.leaf originals should be done. */
 };
 
-static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
-                          int size)
-{
-       long result = qdisc_l2t(rate, size);
-       return result;
-}
-
 struct htb_sched {
        struct Qdisc_class_hash clhash;
        struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
@@ -152,9 +140,6 @@ struct htb_sched {
        /* time of nearest event per level (row) */
        psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
 
-       /* whether we hit non-work conserving class during this dequeue; we use */
-       int nwc_hit;            /* this to disable mindelay complaint in dequeue */
-
        int defcls;             /* class where unclassified flows go to */
 
        /* filters for qdisc itself */
@@ -169,6 +154,10 @@ struct htb_sched {
        int direct_qlen;        /* max qlen of above */
 
        long direct_pkts;
+
+#define HTB_WARN_TOOMANYEVENTS 0x1
+       unsigned int warned;    /* only one warning */
+       struct work_struct work;
 };
 
 /* find class in global hash table using given handle */
@@ -527,10 +516,10 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
        WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
 
        if (!cl->prio_activity) {
-               cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
+               cl->prio_activity = 1 << cl->prio;
                htb_activate_prios(q, cl);
                list_add_tail(&cl->un.leaf.drop_list,
-                             q->drops + cl->un.leaf.aprio);
+                             q->drops + cl->prio);
        }
 }
 
@@ -551,7 +540,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 
 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-       int ret;
+       int uninitialized_var(ret);
        struct htb_sched *q = qdisc_priv(sch);
        struct htb_class *cl = htb_classify(skb, sch, &ret);
 
@@ -591,45 +580,30 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        return NET_XMIT_SUCCESS;
 }
 
-/* TODO: requeuing packet charges it to policers again !! */
-static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
+static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
 {
-       int ret;
-       struct htb_sched *q = qdisc_priv(sch);
-       struct htb_class *cl = htb_classify(skb, sch, &ret);
-       struct sk_buff *tskb;
+       long toks = diff + cl->tokens;
 
-       if (cl == HTB_DIRECT) {
-               /* enqueue to helper queue */
-               if (q->direct_queue.qlen < q->direct_qlen) {
-                       __skb_queue_head(&q->direct_queue, skb);
-               } else {
-                       __skb_queue_head(&q->direct_queue, skb);
-                       tskb = __skb_dequeue_tail(&q->direct_queue);
-                       kfree_skb(tskb);
-                       sch->qstats.drops++;
-                       return NET_XMIT_CN;
-               }
-#ifdef CONFIG_NET_CLS_ACT
-       } else if (!cl) {
-               if (ret & __NET_XMIT_BYPASS)
-                       sch->qstats.drops++;
-               kfree_skb(skb);
-               return ret;
-#endif
-       } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) !=
-                  NET_XMIT_SUCCESS) {
-               if (net_xmit_drop_count(ret)) {
-                       sch->qstats.drops++;
-                       cl->qstats.drops++;
-               }
-               return ret;
-       } else
-               htb_activate(q, cl);
+       if (toks > cl->buffer)
+               toks = cl->buffer;
+       toks -= (long) qdisc_l2t(cl->rate, bytes);
+       if (toks <= -cl->mbuffer)
+               toks = 1 - cl->mbuffer;
 
-       sch->q.qlen++;
-       sch->qstats.requeues++;
-       return NET_XMIT_SUCCESS;
+       cl->tokens = toks;
+}
+
+static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
+{
+       long toks = diff + cl->ctokens;
+
+       if (toks > cl->cbuffer)
+               toks = cl->cbuffer;
+       toks -= (long) qdisc_l2t(cl->ceil, bytes);
+       if (toks <= -cl->mbuffer)
+               toks = 1 - cl->mbuffer;
+
+       cl->ctokens = toks;
 }
 
 /**
@@ -647,26 +621,20 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
                             int level, struct sk_buff *skb)
 {
        int bytes = qdisc_pkt_len(skb);
-       long toks, diff;
        enum htb_cmode old_mode;
-
-#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
-       if (toks > cl->B) toks = cl->B; \
-       toks -= L2T(cl, cl->R, bytes); \
-       if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
-       cl->T = toks
+       long diff;
 
        while (cl) {
                diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
                if (cl->level >= level) {
                        if (cl->level == level)
                                cl->xstats.lends++;
-                       HTB_ACCNT(tokens, buffer, rate);
+                       htb_accnt_tokens(cl, bytes, diff);
                } else {
                        cl->xstats.borrows++;
                        cl->tokens += diff;     /* we moved t_c; update tokens */
                }
-               HTB_ACCNT(ctokens, cbuffer, ceil);
+               htb_accnt_ctokens(cl, bytes, diff);
                cl->t_c = q->now;
 
                old_mode = cl->cmode;
@@ -693,15 +661,16 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
  * htb_do_events - make mode changes to classes at the level
  *
  * Scans event queue for pending events and applies them. Returns time of
- * next pending event (0 for no event in pq).
+ * next pending event (0 for no event in pq, q->now for too many events).
  * Note: Applied are events whose have cl->pq_key <= q->now.
  */
-static psched_time_t htb_do_events(struct htb_sched *q, int level)
+static psched_time_t htb_do_events(struct htb_sched *q, int level,
+                                  unsigned long start)
 {
        /* don't run for longer than 2 jiffies; 2 is used instead of
           1 to simplify things when jiffy is going to be incremented
           too soon */
-       unsigned long stop_at = jiffies + 2;
+       unsigned long stop_at = start + 2;
        while (time_before(jiffies, stop_at)) {
                struct htb_class *cl;
                long diff;
@@ -720,8 +689,14 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level)
                if (cl->cmode != HTB_CAN_SEND)
                        htb_add_to_wait_tree(q, cl, diff);
        }
-       /* too much load - let's continue on next jiffie */
-       return q->now + PSCHED_TICKS_PER_SEC / HZ;
+
+       /* too much load - let's continue after a break for scheduling */
+       if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
+               printk(KERN_WARNING "htb: too many events!\n");
+               q->warned |= HTB_WARN_TOOMANYEVENTS;
+       }
+
+       return q->now;
 }
 
 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
@@ -733,14 +708,14 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
        while (n) {
                struct htb_class *cl =
                    rb_entry(n, struct htb_class, node[prio]);
-               if (id == cl->common.classid)
-                       return n;
 
                if (id > cl->common.classid) {
                        n = n->rb_right;
-               } else {
+               } else if (id < cl->common.classid) {
                        r = n;
                        n = n->rb_left;
+               } else {
+                       return n;
                }
        }
        return r;
@@ -761,7 +736,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
                u32 *pid;
        } stk[TC_HTB_MAXDEPTH], *sp = stk;
 
-       WARN_ON(!tree->rb_node);
+       BUG_ON(!tree->rb_node);
        sp->root = tree->rb_node;
        sp->pptr = pptr;
        sp->pid = pid;
@@ -781,9 +756,10 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
                                *sp->pptr = (*sp->pptr)->rb_left;
                        if (sp > stk) {
                                sp--;
-                               WARN_ON(!*sp->pptr);
-                               if (!*sp->pptr)
+                               if (!*sp->pptr) {
+                                       WARN_ON(1);
                                        return NULL;
+                               }
                                htb_next_rb_node(sp->pptr);
                        }
                } else {
@@ -814,8 +790,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
 
        do {
 next:
-               WARN_ON(!cl);
-               if (!cl)
+               if (unlikely(!cl))
                        return NULL;
 
                /* class can be empty - it is unlikely but can be true if leaf
@@ -843,13 +818,8 @@ next:
                skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
                if (likely(skb != NULL))
                        break;
-               if (!cl->warned) {
-                       printk(KERN_WARNING
-                              "htb: class %X isn't work conserving ?!\n",
-                              cl->common.classid);
-                       cl->warned = 1;
-               }
-               q->nwc_hit++;
+
+               qdisc_warn_nonwc("htb", cl->un.leaf.q);
                htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
                                  ptr[0]) + prio);
                cl = htb_lookup_leaf(q->row[level] + prio, prio,
@@ -861,7 +831,7 @@ next:
        if (likely(skb != NULL)) {
                cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
                if (cl->un.leaf.deficit[level] < 0) {
-                       cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
+                       cl->un.leaf.deficit[level] += cl->quantum;
                        htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
                                          ptr[0]) + prio);
                }
@@ -880,6 +850,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
        struct htb_sched *q = qdisc_priv(sch);
        int level;
        psched_time_t next_event;
+       unsigned long start_at;
 
        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
        skb = __skb_dequeue(&q->direct_queue);
@@ -892,23 +863,24 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
        if (!sch->q.qlen)
                goto fin;
        q->now = psched_get_time();
+       start_at = jiffies;
 
        next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
-       q->nwc_hit = 0;
+
        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
                /* common case optimization - skip event handler quickly */
                int m;
                psched_time_t event;
 
                if (q->now >= q->near_ev_cache[level]) {
-                       event = htb_do_events(q, level);
+                       event = htb_do_events(q, level, start_at);
                        if (!event)
                                event = q->now + PSCHED_TICKS_PER_SEC;
                        q->near_ev_cache[level] = event;
                } else
                        event = q->near_ev_cache[level];
 
-               if (event && next_event > event)
+               if (next_event > event)
                        next_event = event;
 
                m = ~q->row_mask[level];
@@ -924,7 +896,10 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
                }
        }
        sch->qstats.overlimits++;
-       qdisc_watchdog_schedule(&q->watchdog, next_event);
+       if (likely(next_event > q->now))
+               qdisc_watchdog_schedule(&q->watchdog, next_event);
+       else
+               schedule_work(&q->work);
 fin:
        return skb;
 }
@@ -994,6 +969,14 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
        [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 };
 
+static void htb_work_func(struct work_struct *work)
+{
+       struct htb_sched *q = container_of(work, struct htb_sched, work);
+       struct Qdisc *sch = q->watchdog.qdisc;
+
+       __netif_schedule(qdisc_root(sch));
+}
+
 static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct htb_sched *q = qdisc_priv(sch);
@@ -1028,6 +1011,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
                INIT_LIST_HEAD(q->drops + i);
 
        qdisc_watchdog_init(&q->watchdog, sch);
+       INIT_WORK(&q->work, htb_work_func);
        skb_queue_head_init(&q->direct_queue);
 
        q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
@@ -1095,8 +1079,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
        opt.buffer = cl->buffer;
        opt.ceil = cl->ceil->rate;
        opt.cbuffer = cl->cbuffer;
-       opt.quantum = cl->un.leaf.quantum;
-       opt.prio = cl->un.leaf.prio;
+       opt.quantum = cl->quantum;
+       opt.prio = cl->prio;
        opt.level = cl->level;
        NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
 
@@ -1133,28 +1117,29 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 {
        struct htb_class *cl = (struct htb_class *)arg;
 
-       if (cl && !cl->level) {
-               if (new == NULL &&
-                   (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
-                                            &pfifo_qdisc_ops,
-                                            cl->common.classid))
-                   == NULL)
-                       return -ENOBUFS;
-               sch_tree_lock(sch);
-               if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
-                       qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-                       qdisc_reset(*old);
-               }
-               sch_tree_unlock(sch);
-               return 0;
+       if (cl->level)
+               return -EINVAL;
+       if (new == NULL &&
+           (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                                    &pfifo_qdisc_ops,
+                                    cl->common.classid)) == NULL)
+               return -ENOBUFS;
+
+       sch_tree_lock(sch);
+       *old = cl->un.leaf.q;
+       cl->un.leaf.q = new;
+       if (*old != NULL) {
+               qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+               qdisc_reset(*old);
        }
-       return -ENOENT;
+       sch_tree_unlock(sch);
+       return 0;
 }
 
 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
 {
        struct htb_class *cl = (struct htb_class *)arg;
-       return (cl && !cl->level) ? cl->un.leaf.q : NULL;
+       return !cl->level ? cl->un.leaf.q : NULL;
 }
 
 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
@@ -1198,8 +1183,6 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
        INIT_LIST_HEAD(&parent->un.leaf.drop_list);
        parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
-       parent->un.leaf.quantum = parent->quantum;
-       parent->un.leaf.prio = parent->prio;
        parent->tokens = parent->buffer;
        parent->ctokens = parent->cbuffer;
        parent->t_c = psched_get_time();
@@ -1220,7 +1203,6 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
        kfree(cl);
 }
 
-/* always caled under BH & queue lock */
 static void htb_destroy(struct Qdisc *sch)
 {
        struct htb_sched *q = qdisc_priv(sch);
@@ -1228,6 +1210,7 @@ static void htb_destroy(struct Qdisc *sch)
        struct htb_class *cl;
        unsigned int i;
 
+       cancel_work_sync(&q->work);
        qdisc_watchdog_cancel(&q->watchdog);
        /* This line used to be after htb_destroy_class call below
           and surprisingly it worked in 2.4. But it must precede it
@@ -1291,8 +1274,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
        if (last_child)
                htb_parent_to_leaf(q, cl, new_q);
 
-       if (--cl->refcnt == 0)
-               htb_destroy_class(sch, cl);
+       BUG_ON(--cl->refcnt == 0);
+       /*
+        * This shouldn't happen: we "hold" one cops->get() when called
+        * from tc_ctl_tclass; the destroy method is done from cops->put().
+        */
 
        sch_tree_unlock(sch);
        return 0;
@@ -1371,9 +1357,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
                        goto failure;
 
-               gen_new_estimator(&cl->bstats, &cl->rate_est,
-                                 qdisc_root_sleeping_lock(sch),
-                                 tca[TCA_RATE] ? : &est.nla);
+               err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+                                       qdisc_root_sleeping_lock(sch),
+                                       tca[TCA_RATE] ? : &est.nla);
+               if (err) {
+                       kfree(cl);
+                       goto failure;
+               }
+
                cl->refcnt = 1;
                cl->children = 0;
                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
@@ -1425,37 +1416,36 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                if (parent)
                        parent->children++;
        } else {
-               if (tca[TCA_RATE])
-                       gen_replace_estimator(&cl->bstats, &cl->rate_est,
-                                             qdisc_root_sleeping_lock(sch),
-                                             tca[TCA_RATE]);
+               if (tca[TCA_RATE]) {
+                       err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+                                                   qdisc_root_sleeping_lock(sch),
+                                                   tca[TCA_RATE]);
+                       if (err)
+                               return err;
+               }
                sch_tree_lock(sch);
        }
 
        /* it used to be a nasty bug here, we have to check that node
           is really leaf before changing cl->un.leaf ! */
        if (!cl->level) {
-               cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
-               if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
+               cl->quantum = rtab->rate.rate / q->rate2quantum;
+               if (!hopt->quantum && cl->quantum < 1000) {
                        printk(KERN_WARNING
                               "HTB: quantum of class %X is small. Consider r2q change.\n",
                               cl->common.classid);
-                       cl->un.leaf.quantum = 1000;
+                       cl->quantum = 1000;
                }
-               if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
+               if (!hopt->quantum && cl->quantum > 200000) {
                        printk(KERN_WARNING
                               "HTB: quantum of class %X is big. Consider r2q change.\n",
                               cl->common.classid);
-                       cl->un.leaf.quantum = 200000;
+                       cl->quantum = 200000;
                }
                if (hopt->quantum)
-                       cl->un.leaf.quantum = hopt->quantum;
-               if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
-                       cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
-
-               /* backup for htb_parent_to_leaf */
-               cl->quantum = cl->un.leaf.quantum;
-               cl->prio = cl->un.leaf.prio;
+                       cl->quantum = hopt->quantum;
+               if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
+                       cl->prio = TC_HTB_NUMPRIO - 1;
        }
 
        cl->buffer = hopt->buffer;
@@ -1566,7 +1556,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
        .enqueue        =       htb_enqueue,
        .dequeue        =       htb_dequeue,
        .peek           =       qdisc_peek_dequeued,
-       .requeue        =       htb_requeue,
        .drop           =       htb_drop,
        .init           =       htb_init,
        .reset          =       htb_reset,