X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fsched%2Fsch_api.c;h=fe35c1f338c2b25d7c2a4e4ae19123bf4023f6b5;hb=e0c70b80786296d4f3c35ebe0d52591cebf8f916;hp=4fd0beca9450c892192a20d58dc49c0b122b260f;hpb=0621ed2e4edbe2f6f83dafbf85eecefae7aaf2e8;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 4fd0bec..fe35c1f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -27,14 +27,20 @@ #include #include #include +#include +#include +#include +#include #include #include -static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, +static int qdisc_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new); -static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, - struct Qdisc *q, unsigned long cl, int event); +static int tclass_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct Qdisc *q, + unsigned long cl, int event); /* @@ -94,10 +100,9 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, Auxiliary routines: - ---requeue + ---peek - requeues once dequeued packet. It is used for non-standard or - just buggy devices, which can defer output even if dev->tbusy=0. + like dequeue but without removing a packet from the queue ---reset @@ -144,8 +149,14 @@ int register_qdisc(struct Qdisc_ops *qops) if (qops->enqueue == NULL) qops->enqueue = noop_qdisc_ops.enqueue; - if (qops->requeue == NULL) - qops->requeue = noop_qdisc_ops.requeue; + if (qops->peek == NULL) { + if (qops->dequeue == NULL) { + qops->peek = noop_qdisc_ops.peek; + } else { + rc = -EINVAL; + goto out; + } + } if (qops->dequeue == NULL) qops->dequeue = noop_qdisc_ops.dequeue; @@ -156,6 +167,7 @@ out: write_unlock(&qdisc_mod_lock); return rc; } +EXPORT_SYMBOL(register_qdisc); int unregister_qdisc(struct Qdisc_ops *qops) { @@ -174,27 +186,58 @@ int unregister_qdisc(struct Qdisc_ops *qops) write_unlock(&qdisc_mod_lock); return err; } +EXPORT_SYMBOL(unregister_qdisc); /* We know handle. Find qdisc among all qdisc's attached to device (root qdisc, all its children, children of children etc.) */ -struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) +static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) { struct Qdisc *q; - list_for_each_entry(q, &dev->qdisc_list, list) { + if (!(root->flags & TCQ_F_BUILTIN) && + root->handle == handle) + return root; + + list_for_each_entry(q, &root->list, list) { if (q->handle == handle) return q; } return NULL; } +static void qdisc_list_add(struct Qdisc *q) +{ + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) + list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list); +} + +void qdisc_list_del(struct Qdisc *q) +{ + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) + list_del(&q->list); +} +EXPORT_SYMBOL(qdisc_list_del); + +struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) +{ + struct Qdisc *q; + + q = qdisc_match_from_root(dev->qdisc, handle); + if (q) + goto out; + + q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); +out: + return q; +} + static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; struct Qdisc *leaf; - struct Qdisc_class_ops *cops = p->ops->cl_ops; + const struct Qdisc_class_ops *cops = p->ops->cl_ops; if (cops == NULL) return NULL; @@ -209,14 +252,14 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) /* Find queueing discipline by name */ -static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind) +static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) { struct Qdisc_ops *q = NULL; if (kind) { read_lock(&qdisc_mod_lock); for (q = qdisc_base; q; q = q->next) { - if (rtattr_strcmp(kind, q->id) == 0) { + if (nla_strcmp(kind, q->id) == 0) { if (!try_module_get(q->owner)) q = NULL; break; @@ -229,7 +272,7 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind) static struct qdisc_rate_table *qdisc_rtab_list; -struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab) +struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) { struct qdisc_rate_table *rtab; @@ -240,19 +283,21 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *ta } } - if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024) + if (tab == NULL || r->rate == 0 || r->cell_log == 0 || + nla_len(tab) != TC_RTAB_SIZE) return NULL; rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); if (rtab) { rtab->rate = *r; rtab->refcnt = 1; - memcpy(rtab->data, RTA_DATA(tab), 1024); + memcpy(rtab->data, nla_data(tab), 1024); rtab->next = qdisc_rtab_list; qdisc_rtab_list = rtab; } return rtab; } +EXPORT_SYMBOL(qdisc_get_rtab); void qdisc_put_rtab(struct qdisc_rate_table *tab) { @@ -269,16 +314,151 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) } } } +EXPORT_SYMBOL(qdisc_put_rtab); + +static LIST_HEAD(qdisc_stab_list); +static DEFINE_SPINLOCK(qdisc_stab_lock); + +static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { + [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, + [TCA_STAB_DATA] = { .type = NLA_BINARY }, +}; + +static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) +{ + struct nlattr *tb[TCA_STAB_MAX + 1]; + struct qdisc_size_table *stab; + struct tc_sizespec *s; + unsigned int tsize = 0; + u16 *tab = NULL; + int err; + + err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy); + if (err < 0) + return ERR_PTR(err); + if (!tb[TCA_STAB_BASE]) + return ERR_PTR(-EINVAL); + + s = nla_data(tb[TCA_STAB_BASE]); + + if (s->tsize > 0) { + if (!tb[TCA_STAB_DATA]) + return ERR_PTR(-EINVAL); + tab = nla_data(tb[TCA_STAB_DATA]); + tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); + } + + if (!s || tsize != s->tsize || (!tab && tsize > 0)) + return ERR_PTR(-EINVAL); + + spin_lock(&qdisc_stab_lock); + + list_for_each_entry(stab, &qdisc_stab_list, list) { + if (memcmp(&stab->szopts, s, sizeof(*s))) + continue; + if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) + continue; + stab->refcnt++; + spin_unlock(&qdisc_stab_lock); + return stab; + } + + spin_unlock(&qdisc_stab_lock); + + stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); + if (!stab) + return ERR_PTR(-ENOMEM); + + stab->refcnt = 1; + stab->szopts = *s; + if (tsize > 0) + memcpy(stab->data, tab, tsize * sizeof(u16)); + + spin_lock(&qdisc_stab_lock); + list_add_tail(&stab->list, &qdisc_stab_list); + spin_unlock(&qdisc_stab_lock); + + return stab; +} + +void qdisc_put_stab(struct qdisc_size_table *tab) +{ + if (!tab) + return; + + spin_lock(&qdisc_stab_lock); + + if (--tab->refcnt == 0) { + list_del(&tab->list); + kfree(tab); + } + + spin_unlock(&qdisc_stab_lock); +} +EXPORT_SYMBOL(qdisc_put_stab); + +static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, TCA_STAB); + if (nest == NULL) + goto nla_put_failure; + NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts); + nla_nest_end(skb, nest); + + return skb->len; + +nla_put_failure: + return -1; +} + +void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + int pkt_len, slot; + + pkt_len = skb->len + stab->szopts.overhead; + if (unlikely(!stab->szopts.tsize)) + goto out; + + slot = pkt_len + stab->szopts.cell_align; + if (unlikely(slot < 0)) + slot = 0; + + slot >>= stab->szopts.cell_log; + if (likely(slot < stab->szopts.tsize)) + pkt_len = stab->data[slot]; + else + pkt_len = stab->data[stab->szopts.tsize - 1] * + (slot / stab->szopts.tsize) + + stab->data[slot % stab->szopts.tsize]; + + pkt_len <<= stab->szopts.size_log; +out: + if (unlikely(pkt_len < 1)) + pkt_len = 1; + qdisc_skb_cb(skb)->pkt_len = pkt_len; +} +EXPORT_SYMBOL(qdisc_calculate_pkt_len); + +void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) +{ + if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { + printk(KERN_WARNING + "%s: %s qdisc %X: is non-work-conserving?\n", + txt, qdisc->ops->id, qdisc->handle >> 16); + qdisc->flags |= TCQ_F_WARN_NONWC; + } +} +EXPORT_SYMBOL(qdisc_warn_nonwc); static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); - struct net_device *dev = wd->qdisc->dev; wd->qdisc->flags &= ~TCQ_F_THROTTLED; - smp_wmb(); - netif_schedule(dev); + __netif_schedule(qdisc_root(wd->qdisc)); return HRTIMER_NORESTART; } @@ -295,9 +475,13 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) { ktime_t time; + if (test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(wd->qdisc)->state)) + return; + wd->qdisc->flags |= TCQ_F_THROTTLED; time = ktime_set(0, 0); - time = ktime_add_ns(time, PSCHED_US2NS(expires)); + time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); } EXPORT_SYMBOL(qdisc_watchdog_schedule); @@ -309,77 +493,143 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) } EXPORT_SYMBOL(qdisc_watchdog_cancel); -/* Allocate an unique handle from space managed by kernel */ - -static u32 qdisc_alloc_handle(struct net_device *dev) +static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) { - int i = 0x10000; - static u32 autohandle = TC_H_MAKE(0x80000000U, 0); + unsigned int size = n * sizeof(struct hlist_head), i; + struct hlist_head *h; - do { - autohandle += TC_H_MAKE(0x10000U, 0); - if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) - autohandle = TC_H_MAKE(0x80000000U, 0); - } while (qdisc_lookup(dev, autohandle) && --i > 0); + if (size <= PAGE_SIZE) + h = kmalloc(size, GFP_KERNEL); + else + h = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(size)); - return i>0 ? autohandle : 0; + if (h != NULL) { + for (i = 0; i < n; i++) + INIT_HLIST_HEAD(&h[i]); + } + return h; } -/* Attach toplevel qdisc to device dev */ +static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n) +{ + unsigned int size = n * sizeof(struct hlist_head); + + if (size <= PAGE_SIZE) + kfree(h); + else + free_pages((unsigned long)h, get_order(size)); +} -static struct Qdisc * -dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) +void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) { - struct Qdisc *oqdisc; - - if (dev->flags & IFF_UP) - dev_deactivate(dev); - - qdisc_lock_tree(dev); - if (qdisc && qdisc->flags&TCQ_F_INGRESS) { - oqdisc = dev->qdisc_ingress; - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) { - /* delete */ - qdisc_reset(oqdisc); - dev->qdisc_ingress = NULL; - } else { /* new */ - dev->qdisc_ingress = qdisc; + struct Qdisc_class_common *cl; + struct hlist_node *n, *next; + struct hlist_head *nhash, *ohash; + unsigned int nsize, nmask, osize; + unsigned int i, h; + + /* Rehash when load factor exceeds 0.75 */ + if (clhash->hashelems * 4 <= clhash->hashsize * 3) + return; + nsize = clhash->hashsize * 2; + nmask = nsize - 1; + nhash = qdisc_class_hash_alloc(nsize); + if (nhash == NULL) + return; + + ohash = clhash->hash; + osize = clhash->hashsize; + + sch_tree_lock(sch); + for (i = 0; i < osize; i++) { + hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) { + h = qdisc_class_hash(cl->classid, nmask); + hlist_add_head(&cl->hnode, &nhash[h]); } + } + clhash->hash = nhash; + clhash->hashsize = nsize; + clhash->hashmask = nmask; + sch_tree_unlock(sch); - } else { + qdisc_class_hash_free(ohash, osize); +} +EXPORT_SYMBOL(qdisc_class_hash_grow); - oqdisc = dev->qdisc_sleeping; +int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) +{ + unsigned int size = 4; + + clhash->hash = qdisc_class_hash_alloc(size); + if (clhash->hash == NULL) + return -ENOMEM; + clhash->hashsize = size; + clhash->hashmask = size - 1; + clhash->hashelems = 0; + return 0; +} +EXPORT_SYMBOL(qdisc_class_hash_init); - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) - qdisc_reset(oqdisc); +void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) +{ + qdisc_class_hash_free(clhash->hash, clhash->hashsize); +} +EXPORT_SYMBOL(qdisc_class_hash_destroy); - /* ... and graft new one */ - if (qdisc == NULL) - qdisc = &noop_qdisc; - dev->qdisc_sleeping = qdisc; - dev->qdisc = &noop_qdisc; - } +void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, + struct Qdisc_class_common *cl) +{ + unsigned int h; - qdisc_unlock_tree(dev); + INIT_HLIST_NODE(&cl->hnode); + h = qdisc_class_hash(cl->classid, clhash->hashmask); + hlist_add_head(&cl->hnode, &clhash->hash[h]); + clhash->hashelems++; +} +EXPORT_SYMBOL(qdisc_class_hash_insert); - if (dev->flags & IFF_UP) - dev_activate(dev); +void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, + struct Qdisc_class_common *cl) +{ + hlist_del(&cl->hnode); + clhash->hashelems--; +} +EXPORT_SYMBOL(qdisc_class_hash_remove); + +/* Allocate an unique handle from space managed by kernel */ - return oqdisc; +static u32 qdisc_alloc_handle(struct net_device *dev) +{ + int i = 0x10000; + static u32 autohandle = TC_H_MAKE(0x80000000U, 0); + + do { + autohandle += TC_H_MAKE(0x10000U, 0); + if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) + autohandle = TC_H_MAKE(0x80000000U, 0); + } while (qdisc_lookup(dev, autohandle) && --i > 0); + + return i>0 ? autohandle : 0; } void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) { - struct Qdisc_class_ops *cops; + const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; if (n == 0) return; while ((parentid = sch->parent)) { - sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); + if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) + return; + + sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); + if (sch == NULL) { + WARN_ON(parentid != TC_H_ROOT); + return; + } cops = sch->ops->cl_ops; if (cops->qlen_notify) { cl = cops->get(sch, parentid); @@ -391,44 +641,101 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); -/* Graft qdisc "new" to class "classid" of qdisc "parent" or - to device "dev". +static void notify_and_destroy(struct net *net, struct sk_buff *skb, + struct nlmsghdr *n, u32 clid, + struct Qdisc *old, struct Qdisc *new) +{ + if (new || old) + qdisc_notify(net, skb, n, clid, old, new); + + if (old) + qdisc_destroy(old); +} - Old qdisc is not destroyed but returned in *old. +/* Graft qdisc "new" to class "classid" of qdisc "parent" or + * to device "dev". + * + * When appropriate send a netlink notification using 'skb' + * and "n". + * + * On success, destroy old qdisc. */ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, - u32 classid, - struct Qdisc *new, struct Qdisc **old) + struct sk_buff *skb, struct nlmsghdr *n, u32 classid, + struct Qdisc *new, struct Qdisc *old) { + struct Qdisc *q = old; + struct net *net = dev_net(dev); int err = 0; - struct Qdisc *q = *old; - if (parent == NULL) { - if (q && q->flags&TCQ_F_INGRESS) { - *old = dev_graft_qdisc(dev, q); + unsigned int i, num_q, ingress; + + ingress = 0; + num_q = dev->num_tx_queues; + if ((q && q->flags & TCQ_F_INGRESS) || + (new && new->flags & TCQ_F_INGRESS)) { + num_q = 1; + ingress = 1; + } + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + + if (new && new->ops->attach) { + new->ops->attach(new); + num_q = 0; + } + + for (i = 0; i < num_q; i++) { + struct netdev_queue *dev_queue = &dev->rx_queue; + + if (!ingress) + dev_queue = netdev_get_tx_queue(dev, i); + + old = dev_graft_qdisc(dev_queue, new); + if (new && i > 0) + atomic_inc(&new->refcnt); + + if (!ingress) + qdisc_destroy(old); + } + + if (!ingress) { + notify_and_destroy(net, skb, n, classid, + dev->qdisc, new); + if (new && !new->ops->attach) + atomic_inc(&new->refcnt); + dev->qdisc = new ? : &noop_qdisc; } else { - *old = dev_graft_qdisc(dev, new); + notify_and_destroy(net, skb, n, classid, old, new); } - } else { - struct Qdisc_class_ops *cops = parent->ops->cl_ops; - err = -EINVAL; + if (dev->flags & IFF_UP) + dev_activate(dev); + } else { + const struct Qdisc_class_ops *cops = parent->ops->cl_ops; - if (cops) { + err = -EOPNOTSUPP; + if (cops && cops->graft) { unsigned long cl = cops->get(parent, classid); if (cl) { - err = cops->graft(parent, cl, new, old); - if (new) - new->parent = classid; + err = cops->graft(parent, cl, new, &old); cops->put(parent, cl); - } + } else + err = -ENOENT; } + if (!err) + notify_and_destroy(net, skb, n, classid, old, new); } return err; } +/* lockdep annotation is needed for ingress; egress gets it only for name */ +static struct lock_class_key qdisc_tx_lock; +static struct lock_class_key qdisc_rx_lock; + /* Allocate and initialize new qdisc. @@ -436,18 +743,21 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, */ static struct Qdisc * -qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) +qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, + struct Qdisc *p, u32 parent, u32 handle, + struct nlattr **tca, int *errp) { int err; - struct rtattr *kind = tca[TCA_KIND-1]; + struct nlattr *kind = tca[TCA_KIND]; struct Qdisc *sch; struct Qdisc_ops *ops; + struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (ops == NULL && kind != NULL) { char name[IFNAMSIZ]; - if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { + if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { /* We dropped the RTNL semaphore in order to * perform the module load. So, even if we * succeeded in loading the module we have to @@ -476,47 +786,60 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) if (ops == NULL) goto err_out; - sch = qdisc_alloc(dev, ops); + sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) { err = PTR_ERR(sch); goto err_out2; } + sch->parent = parent; + if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; - sch->stats_lock = &dev->ingress_lock; handle = TC_H_MAKE(TC_H_INGRESS, 0); + lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock); } else { - sch->stats_lock = &dev->queue_lock; if (handle == 0) { handle = qdisc_alloc_handle(dev); err = -ENOMEM; if (handle == 0) goto err_out3; } + lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); } sch->handle = handle; - if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { - if (tca[TCA_RATE-1]) { - err = gen_new_estimator(&sch->bstats, &sch->rate_est, - sch->stats_lock, - tca[TCA_RATE-1]); - if (err) { - /* - * Any broken qdiscs that would require - * a ops->reset() here? The qdisc was never - * in action so it shouldn't be necessary. - */ - if (ops->destroy) - ops->destroy(sch); - goto err_out3; + if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB]); + if (IS_ERR(stab)) { + err = PTR_ERR(stab); + goto err_out4; } + sch->stab = stab; + } + if (tca[TCA_RATE]) { + spinlock_t *root_lock; + + err = -EOPNOTSUPP; + if (sch->flags & TCQ_F_MQROOT) + goto err_out4; + + if ((sch->parent != TC_H_ROOT) && + !(sch->flags & TCQ_F_INGRESS) && + (!p || !(p->flags & TCQ_F_MQROOT))) + root_lock = qdisc_root_sleeping_lock(sch); + else + root_lock = qdisc_lock(sch); + + err = gen_new_estimator(&sch->bstats, &sch->rate_est, + root_lock, tca[TCA_RATE]); + if (err) + goto err_out4; } - qdisc_lock_tree(dev); - list_add_tail(&sch->list, &dev->qdisc_list); - qdisc_unlock_tree(dev); + + qdisc_list_add(sch); return sch; } @@ -528,22 +851,50 @@ err_out2: err_out: *errp = err; return NULL; + +err_out4: + /* + * Any broken qdiscs that would require a ops->reset() here? + * The qdisc was never in action so it shouldn't be necessary. + */ + qdisc_put_stab(sch->stab); + if (ops->destroy) + ops->destroy(sch); + goto err_out3; } -static int qdisc_change(struct Qdisc *sch, struct rtattr **tca) +static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) { - if (tca[TCA_OPTIONS-1]) { - int err; + struct qdisc_size_table *stab = NULL; + int err = 0; + if (tca[TCA_OPTIONS]) { if (sch->ops->change == NULL) return -EINVAL; - err = sch->ops->change(sch, tca[TCA_OPTIONS-1]); + err = sch->ops->change(sch, tca[TCA_OPTIONS]); if (err) return err; } - if (tca[TCA_RATE-1]) + + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB]); + if (IS_ERR(stab)) + return PTR_ERR(stab); + } + + qdisc_put_stab(sch->stab); + sch->stab = stab; + + if (tca[TCA_RATE]) { + /* NB: ignores errors from replace_estimator + because change can't be undone. */ + if (sch->flags & TCQ_F_MQROOT) + goto out; gen_replace_estimator(&sch->bstats, &sch->rate_est, - sch->stats_lock, tca[TCA_RATE-1]); + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + } +out: return 0; } @@ -575,7 +926,7 @@ static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) { struct Qdisc *leaf; - struct Qdisc_class_ops *cops = q->ops->cl_ops; + const struct Qdisc_class_ops *cops = q->ops->cl_ops; struct check_loop_arg *arg = (struct check_loop_arg *)w; leaf = cops->leaf(q, cl); @@ -593,17 +944,22 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = sock_net(skb->sk); struct tcmsg *tcm = NLMSG_DATA(n); - struct rtattr **tca = arg; + struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; u32 clid = tcm->tcm_parent; struct Qdisc *q = NULL; struct Qdisc *p = NULL; int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return -ENODEV; + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); + if (err < 0) + return err; + if (clid) { if (clid != TC_H_ROOT) { if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { @@ -611,10 +967,10 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -ENOENT; q = qdisc_leaf(p, clid); } else { /* ingress */ - q = dev->qdisc_ingress; + q = dev->rx_queue.qdisc_sleeping; } } else { - q = dev->qdisc_sleeping; + q = dev->qdisc; } if (!q) return -ENOENT; @@ -626,7 +982,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -ENOENT; } - if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) + if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) return -EINVAL; if (n->nlmsg_type == RTM_DELQDISC) { @@ -634,16 +990,10 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -EINVAL; if (q->handle == 0) return -ENOENT; - if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0) + if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) return err; - if (q) { - qdisc_notify(skb, n, clid, q, NULL); - qdisc_lock_tree(dev); - qdisc_destroy(q); - qdisc_unlock_tree(dev); - } } else { - qdisc_notify(skb, n, clid, NULL, q); + qdisc_notify(net, skb, n, clid, NULL, q); } return 0; } @@ -654,8 +1004,9 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = sock_net(skb->sk); struct tcmsg *tcm; - struct rtattr **tca; + struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; u32 clid; struct Qdisc *q, *p; @@ -664,13 +1015,16 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) replay: /* Reinit, just in case something touches this. */ tcm = NLMSG_DATA(n); - tca = arg; clid = tcm->tcm_parent; q = p = NULL; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return -ENODEV; + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); + if (err < 0) + return err; + if (clid) { if (clid != TC_H_ROOT) { if (clid != TC_H_INGRESS) { @@ -678,10 +1032,10 @@ replay: return -ENOENT; q = qdisc_leaf(p, clid); } else { /*ingress */ - q = dev->qdisc_ingress; + q = dev->rx_queue.qdisc_sleeping; } } else { - q = dev->qdisc_sleeping; + q = dev->qdisc; } /* It may be default qdisc, ignore it */ @@ -698,7 +1052,7 @@ replay: goto create_n_graft; if (n->nlmsg_flags&NLM_F_EXCL) return -EEXIST; - if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) + if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) return -EINVAL; if (q == p || (p && check_loop(q, p, 0))) @@ -731,8 +1085,8 @@ replay: if ((n->nlmsg_flags&NLM_F_CREATE) && (n->nlmsg_flags&NLM_F_REPLACE) && ((n->nlmsg_flags&NLM_F_EXCL) || - (tca[TCA_KIND-1] && - rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)))) + (tca[TCA_KIND] && + nla_strcmp(tca[TCA_KIND], q->ops->id)))) goto create_n_graft; } } @@ -747,20 +1101,34 @@ replay: return -ENOENT; if (n->nlmsg_flags&NLM_F_EXCL) return -EEXIST; - if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) + if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) return -EINVAL; err = qdisc_change(q, tca); if (err == 0) - qdisc_notify(skb, n, clid, NULL, q); + qdisc_notify(net, skb, n, clid, NULL, q); return err; create_n_graft: if (!(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; if (clid == TC_H_INGRESS) - q = qdisc_create(dev, tcm->tcm_parent, tca, &err); - else - q = qdisc_create(dev, tcm->tcm_handle, tca, &err); + q = qdisc_create(dev, &dev->rx_queue, p, + tcm->tcm_parent, tcm->tcm_parent, + tca, &err); + else { + struct netdev_queue *dev_queue; + + if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) + dev_queue = p->ops->cl_ops->select_queue(p, tcm); + else if (p) + dev_queue = p->dev_queue; + else + dev_queue = netdev_get_tx_queue(dev, 0); + + q = qdisc_create(dev, dev_queue, p, + tcm->tcm_parent, tcm->tcm_handle, + tca, &err); + } if (q == NULL) { if (err == -EAGAIN) goto replay; @@ -768,24 +1136,13 @@ create_n_graft: } graft: - if (1) { - struct Qdisc *old_q = NULL; - err = qdisc_graft(dev, p, clid, q, &old_q); - if (err) { - if (q) { - qdisc_lock_tree(dev); - qdisc_destroy(q); - qdisc_unlock_tree(dev); - } - return err; - } - qdisc_notify(skb, n, clid, old_q, q); - if (old_q) { - qdisc_lock_tree(dev); - qdisc_destroy(old_q); - qdisc_unlock_tree(dev); - } + err = qdisc_graft(dev, p, skb, n, clid, q, NULL); + if (err) { + if (q) + qdisc_destroy(q); + return err; } + return 0; } @@ -802,41 +1159,45 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = clid; tcm->tcm_handle = q->handle; tcm->tcm_info = atomic_read(&q->refcnt); - RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id); + NLA_PUT_STRING(skb, TCA_KIND, q->ops->id); if (q->ops->dump && q->ops->dump(q, skb) < 0) - goto rtattr_failure; + goto nla_put_failure; q->qstats.qlen = q->q.qlen; - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, q->stats_lock, &d) < 0) - goto rtattr_failure; + if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) + goto nla_put_failure; + + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, + qdisc_root_sleeping_lock(q), &d) < 0) + goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) - goto rtattr_failure; + goto nla_put_failure; if (gnet_stats_copy_basic(&d, &q->bstats) < 0 || - gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 || gnet_stats_copy_queue(&d, &q->qstats) < 0) - goto rtattr_failure; + goto nla_put_failure; if (gnet_stats_finish_copy(&d) < 0) - goto rtattr_failure; + goto nla_put_failure; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: -rtattr_failure: +nla_put_failure: nlmsg_trim(skb, b); return -1; } -static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, - u32 clid, struct Qdisc *old, struct Qdisc *new) +static int qdisc_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, u32 clid, + struct Qdisc *old, struct Qdisc *new) { struct sk_buff *skb; u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; @@ -855,46 +1216,92 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, } if (skb->len) - return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); err_out: kfree_skb(skb); return -EINVAL; } +static bool tc_qdisc_dump_ignore(struct Qdisc *q) +{ + return (q->flags & TCQ_F_BUILTIN) ? true : false; +} + +static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, + struct netlink_callback *cb, + int *q_idx_p, int s_q_idx) +{ + int ret = 0, q_idx = *q_idx_p; + struct Qdisc *q; + + if (!root) + return 0; + + q = root; + if (q_idx < s_q_idx) { + q_idx++; + } else { + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + list_for_each_entry(q, &root->list, list) { + if (q_idx < s_q_idx) { + q_idx++; + continue; + } + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + +out: + *q_idx_p = q_idx; + return ret; +done: + ret = -1; + goto out; +} + static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; - struct Qdisc *q; s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; - read_lock(&dev_base_lock); + + rcu_read_lock(); idx = 0; - for_each_netdev(dev) { + for_each_netdev_rcu(net, dev) { + struct netdev_queue *dev_queue; + if (idx < s_idx) goto cont; if (idx > s_idx) s_q_idx = 0; q_idx = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (q_idx < s_q_idx) { - q_idx++; - continue; - } - if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) - goto done; - q_idx++; - } + + if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + + dev_queue = &dev->rx_queue; + if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + cont: idx++; } done: - read_unlock(&dev_base_lock); + rcu_read_unlock(); cb->args[0] = idx; cb->args[1] = q_idx; @@ -912,11 +1319,12 @@ done: static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = sock_net(skb->sk); struct tcmsg *tcm = NLMSG_DATA(n); - struct rtattr **tca = arg; + struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; struct Qdisc *q = NULL; - struct Qdisc_class_ops *cops; + const struct Qdisc_class_ops *cops; unsigned long cl = 0; unsigned long new_cl; u32 pid = tcm->tcm_parent; @@ -924,9 +1332,13 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 qid = TC_H_MAJ(clid); int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return -ENODEV; + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); + if (err < 0) + return err; + /* parent == TC_H_UNSPEC - unspecified parent. parent == TC_H_ROOT - class is root, which has no parent. @@ -952,7 +1364,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } else if (qid1) { qid = qid1; } else if (qid == 0) - qid = dev->qdisc_sleeping->handle; + qid = dev->qdisc->handle; /* Now qid is genuine qdisc handle consistent both with parent and child. @@ -963,7 +1375,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) pid = TC_H_MAKE(qid, pid); } else { if (qid == 0) - qid = dev->qdisc_sleeping->handle; + qid = dev->qdisc->handle; } /* OK. Locate qdisc */ @@ -997,12 +1409,14 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) goto out; break; case RTM_DELTCLASS: - err = cops->delete(q, cl); + err = -EOPNOTSUPP; + if (cops->delete) + err = cops->delete(q, cl); if (err == 0) - tclass_notify(skb, n, q, cl, RTM_DELTCLASS); + tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS); goto out; case RTM_GETTCLASS: - err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS); + err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); goto out; default: err = -EINVAL; @@ -1011,9 +1425,11 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } new_cl = cl; - err = cops->change(q, clid, pid, tca, &new_cl); + err = -EOPNOTSUPP; + if (cops->change) + err = cops->change(q, clid, pid, tca, &new_cl); if (err == 0) - tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS); + tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); out: if (cl) @@ -1031,40 +1447,43 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; - struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; + const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; - tcm->tcm_ifindex = q->dev->ifindex; + tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = q->handle; tcm->tcm_handle = q->handle; tcm->tcm_info = 0; - RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id); + NLA_PUT_STRING(skb, TCA_KIND, q->ops->id); if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) - goto rtattr_failure; + goto nla_put_failure; - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, q->stats_lock, &d) < 0) - goto rtattr_failure; + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, + qdisc_root_sleeping_lock(q), &d) < 0) + goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) - goto rtattr_failure; + goto nla_put_failure; if (gnet_stats_finish_copy(&d) < 0) - goto rtattr_failure; + goto nla_put_failure; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: -rtattr_failure: +nla_put_failure: nlmsg_trim(skb, b); return -1; } -static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, - struct Qdisc *q, unsigned long cl, int event) +static int tclass_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct Qdisc *q, + unsigned long cl, int event) { struct sk_buff *skb; u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; @@ -1078,7 +1497,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct qdisc_dump_args @@ -1096,45 +1515,79 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); } -static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) +static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct qdisc_dump_args arg; + + if (tc_qdisc_dump_ignore(q) || + *t_p < s_t || !q->ops->cl_ops || + (tcm->tcm_parent && + TC_H_MAJ(tcm->tcm_parent) != q->handle)) { + (*t_p)++; + return 0; + } + if (*t_p > s_t) + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); + arg.w.fn = qdisc_class_dump; + arg.skb = skb; + arg.cb = cb; + arg.w.stop = 0; + arg.w.skip = cb->args[1]; + arg.w.count = 0; + q->ops->cl_ops->walk(q, &arg.w); + cb->args[1] = arg.w.count; + if (arg.w.stop) + return -1; + (*t_p)++; + return 0; +} + +static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) { - int t; - int s_t; - struct net_device *dev; struct Qdisc *q; + + if (!root) + return 0; + + if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) + return -1; + + list_for_each_entry(q, &root->list, list) { + if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + return -1; + } + + return 0; +} + +static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) +{ struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); - struct qdisc_dump_args arg; + struct net *net = sock_net(skb->sk); + struct netdev_queue *dev_queue; + struct net_device *dev; + int t, s_t; if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return 0; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return 0; s_t = cb->args[0]; t = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (t < s_t || !q->ops->cl_ops || - (tcm->tcm_parent && - TC_H_MAJ(tcm->tcm_parent) != q->handle)) { - t++; - continue; - } - if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); - arg.w.fn = qdisc_class_dump; - arg.skb = skb; - arg.cb = cb; - arg.w.stop = 0; - arg.w.skip = cb->args[1]; - arg.w.count = 0; - q->ops->cl_ops->walk(q, &arg.w); - cb->args[1] = arg.w.count; - if (arg.w.stop) - break; - t++; - } + if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) + goto done; + + dev_queue = &dev->rx_queue; + if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) + goto done; +done: cb->args[0] = t; dev_put(dev); @@ -1145,47 +1598,60 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) to this qdisc, (optionally) tests for protocol and asks specific classifiers. */ +int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ + __be16 protocol = skb->protocol; + int err = 0; + + for (; tp; tp = tp->next) { + if ((tp->protocol == protocol || + tp->protocol == htons(ETH_P_ALL)) && + (err = tp->classify(skb, tp, res)) >= 0) { +#ifdef CONFIG_NET_CLS_ACT + if (err != TC_ACT_RECLASSIFY && skb->tc_verd) + skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); +#endif + return err; + } + } + return -1; +} +EXPORT_SYMBOL(tc_classify_compat); + int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, - struct tcf_result *res) + struct tcf_result *res) { int err = 0; - __be16 protocol = skb->protocol; + __be16 protocol; #ifdef CONFIG_NET_CLS_ACT struct tcf_proto *otp = tp; reclassify: #endif protocol = skb->protocol; - for ( ; tp; tp = tp->next) { - if ((tp->protocol == protocol || - tp->protocol == htons(ETH_P_ALL)) && - (err = tp->classify(skb, tp, res)) >= 0) { + err = tc_classify_compat(skb, tp, res); #ifdef CONFIG_NET_CLS_ACT - if ( TC_ACT_RECLASSIFY == err) { - __u32 verd = (__u32) G_TC_VERD(skb->tc_verd); - tp = otp; - - if (MAX_REC_LOOP < verd++) { - printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n", - tp->prio&0xffff, ntohs(tp->protocol)); - return TC_ACT_SHOT; - } - skb->tc_verd = SET_TC_VERD(skb->tc_verd,verd); - goto reclassify; - } else { - if (skb->tc_verd) - skb->tc_verd = SET_TC_VERD(skb->tc_verd,0); - return err; - } -#else - - return err; -#endif + if (err == TC_ACT_RECLASSIFY) { + u32 verd = G_TC_VERD(skb->tc_verd); + tp = otp; + + if (verd++ >= MAX_REC_LOOP) { + if (net_ratelimit()) + printk(KERN_NOTICE + "%s: packet reclassify loop" + " rule prio %u protocol %02x\n", + tp->q->ops->id, + tp->prio & 0xffff, ntohs(tp->protocol)); + return TC_ACT_SHOT; } - + skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); + goto reclassify; } - return -1; +#endif + return err; } +EXPORT_SYMBOL(tc_classify); void tcf_destroy(struct tcf_proto *tp) { @@ -1194,12 +1660,12 @@ void tcf_destroy(struct tcf_proto *tp) kfree(tp); } -void tcf_destroy_chain(struct tcf_proto *fl) +void tcf_destroy_chain(struct tcf_proto **fl) { struct tcf_proto *tp; - while ((tp = fl) != NULL) { - fl = tp->next; + while ((tp = *fl) != NULL) { + *fl = tp->next; tcf_destroy(tp); } } @@ -1208,17 +1674,20 @@ EXPORT_SYMBOL(tcf_destroy_chain); #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { + struct timespec ts; + + hrtimer_get_res(CLOCK_MONOTONIC, &ts); seq_printf(seq, "%08x %08x %08x %08x\n", - (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1), + (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 1000000, - (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES)); + (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts))); return 0; } static int psched_open(struct inode *inode, struct file *file) { - return single_open(file, psched_show, PDE(inode)->data); + return single_open(file, psched_show, NULL); } static const struct file_operations psched_fops = { @@ -1228,13 +1697,53 @@ static const struct file_operations psched_fops = { .llseek = seq_lseek, .release = single_release, }; + +static int __net_init psched_net_init(struct net *net) +{ + struct proc_dir_entry *e; + + e = proc_net_fops_create(net, "psched", 0, &psched_fops); + if (e == NULL) + return -ENOMEM; + + return 0; +} + +static void __net_exit psched_net_exit(struct net *net) +{ + proc_net_remove(net, "psched"); +} +#else +static int __net_init psched_net_init(struct net *net) +{ + return 0; +} + +static void __net_exit psched_net_exit(struct net *net) +{ +} #endif +static struct pernet_operations psched_net_ops = { + .init = psched_net_init, + .exit = psched_net_exit, +}; + static int __init pktsched_init(void) { + int err; + + err = register_pernet_subsys(&psched_net_ops); + if (err) { + printk(KERN_ERR "pktsched_init: " + "cannot initialize per netns operations\n"); + return err; + } + register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); - proc_net_fops_create("psched", 0, &psched_fops); + register_qdisc(&pfifo_head_drop_qdisc_ops); + register_qdisc(&mq_qdisc_ops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); @@ -1247,9 +1756,3 @@ static int __init pktsched_init(void) } subsys_initcall(pktsched_init); - -EXPORT_SYMBOL(qdisc_get_rtab); -EXPORT_SYMBOL(qdisc_put_rtab); -EXPORT_SYMBOL(register_qdisc); -EXPORT_SYMBOL(unregister_qdisc); -EXPORT_SYMBOL(tc_classify);