#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/kmod.h>
#include <linux/list.h>
-#include <linux/bitops.h>
#include <linux/hrtimer.h>
+#include <net/net_namespace.h>
#include <net/sock.h>
+#include <net/netlink.h>
#include <net/pkt_sched.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
struct Qdisc *old, struct Qdisc *new);
static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
(root qdisc, all its children, children of children etc.)
*/
-static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
return NULL;
}
-struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
-{
- struct Qdisc *q;
-
- read_lock(&qdisc_tree_lock);
- q = __qdisc_lookup(dev, handle);
- read_unlock(&qdisc_tree_lock);
- return q;
-}
-
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
unsigned long cl;
struct Qdisc *leaf;
- struct Qdisc_class_ops *cops = p->ops->cl_ops;
+ const struct Qdisc_class_ops *cops = p->ops->cl_ops;
if (cops == NULL)
return NULL;
{
struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
timer);
+ struct net_device *dev = wd->qdisc->dev;
wd->qdisc->flags &= ~TCQ_F_THROTTLED;
- netif_schedule(wd->qdisc->dev);
+ smp_wmb();
+ netif_schedule(dev);
+
return HRTIMER_NORESTART;
}
void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
{
- struct Qdisc_class_ops *cops;
+ const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
if (n == 0)
return;
while ((parentid = sch->parent)) {
- sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+ sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+ if (sch == NULL) {
+ WARN_ON(parentid != TC_H_ROOT);
+ return;
+ }
cops = sch->ops->cl_ops;
if (cops->qlen_notify) {
cl = cops->get(sch, parentid);
*old = dev_graft_qdisc(dev, new);
}
} else {
- struct Qdisc_class_ops *cops = parent->ops->cl_ops;
+ const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
err = -EINVAL;
unsigned long cl = cops->get(parent, classid);
if (cl) {
err = cops->graft(parent, cl, new, old);
- if (new)
- new->parent = classid;
cops->put(parent, cl);
}
}
*/
static struct Qdisc *
-qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
+qdisc_create(struct net_device *dev, u32 parent, u32 handle,
+ struct rtattr **tca, int *errp)
{
int err;
struct rtattr *kind = tca[TCA_KIND-1];
goto err_out2;
}
+ sch->parent = parent;
+
if (handle == TC_H_INGRESS) {
sch->flags |= TCQ_F_INGRESS;
+ sch->stats_lock = &dev->ingress_lock;
handle = TC_H_MAKE(TC_H_INGRESS, 0);
- } else if (handle == 0) {
- handle = qdisc_alloc_handle(dev);
- err = -ENOMEM;
- if (handle == 0)
- goto err_out3;
+ } else {
+ sch->stats_lock = &dev->queue_lock;
+ if (handle == 0) {
+ handle = qdisc_alloc_handle(dev);
+ err = -ENOMEM;
+ if (handle == 0)
+ goto err_out3;
+ }
}
sch->handle = handle;
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
-#ifdef CONFIG_NET_ESTIMATOR
if (tca[TCA_RATE-1]) {
err = gen_new_estimator(&sch->bstats, &sch->rate_est,
sch->stats_lock,
goto err_out3;
}
}
-#endif
qdisc_lock_tree(dev);
list_add_tail(&sch->list, &dev->qdisc_list);
qdisc_unlock_tree(dev);
if (err)
return err;
}
-#ifdef CONFIG_NET_ESTIMATOR
if (tca[TCA_RATE-1])
gen_replace_estimator(&sch->bstats, &sch->rate_est,
sch->stats_lock, tca[TCA_RATE-1]);
-#endif
return 0;
}
check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
{
struct Qdisc *leaf;
- struct Qdisc_class_ops *cops = q->ops->cl_ops;
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
struct check_loop_arg *arg = (struct check_loop_arg *)w;
leaf = cops->leaf(q, cl);
static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct tcmsg *tcm = NLMSG_DATA(n);
struct rtattr **tca = arg;
struct net_device *dev;
struct Qdisc *p = NULL;
int err;
- if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+ if (net != &init_net)
+ return -EINVAL;
+
+ if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
return -ENODEV;
if (clid) {
return err;
if (q) {
qdisc_notify(skb, n, clid, q, NULL);
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
qdisc_destroy(q);
- spin_unlock_bh(&dev->queue_lock);
+ qdisc_unlock_tree(dev);
}
} else {
qdisc_notify(skb, n, clid, NULL, q);
static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct tcmsg *tcm;
struct rtattr **tca;
struct net_device *dev;
struct Qdisc *q, *p;
int err;
+ if (net != &init_net)
+ return -EINVAL;
+
replay:
/* Reinit, just in case something touches this. */
tcm = NLMSG_DATA(n);
clid = tcm->tcm_parent;
q = p = NULL;
- if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+ if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
return -ENODEV;
if (clid) {
if (!(n->nlmsg_flags&NLM_F_CREATE))
return -ENOENT;
if (clid == TC_H_INGRESS)
- q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
+ q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent,
+ tca, &err);
else
- q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
+ q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle,
+ tca, &err);
if (q == NULL) {
if (err == -EAGAIN)
goto replay;
err = qdisc_graft(dev, p, clid, q, &old_q);
if (err) {
if (q) {
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
qdisc_destroy(q);
- spin_unlock_bh(&dev->queue_lock);
+ qdisc_unlock_tree(dev);
}
return err;
}
qdisc_notify(skb, n, clid, old_q, q);
if (old_q) {
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
qdisc_destroy(old_q);
- spin_unlock_bh(&dev->queue_lock);
+ qdisc_unlock_tree(dev);
}
}
return 0;
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct gnet_dump d;
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
goto rtattr_failure;
if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
-#ifdef CONFIG_NET_ESTIMATOR
gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
-#endif
gnet_stats_copy_queue(&d, &q->qstats) < 0)
goto rtattr_failure;
if (gnet_stats_finish_copy(&d) < 0)
goto rtattr_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
}
if (skb->len)
- return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
err_out:
kfree_skb(skb);
static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = skb->sk->sk_net;
int idx, q_idx;
int s_idx, s_q_idx;
struct net_device *dev;
struct Qdisc *q;
+ if (net != &init_net)
+ return 0;
+
s_idx = cb->args[0];
s_q_idx = q_idx = cb->args[1];
read_lock(&dev_base_lock);
- for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(&init_net, dev) {
if (idx < s_idx)
- continue;
+ goto cont;
if (idx > s_idx)
s_q_idx = 0;
- read_lock(&qdisc_tree_lock);
q_idx = 0;
list_for_each_entry(q, &dev->qdisc_list, list) {
if (q_idx < s_q_idx) {
continue;
}
if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
- read_unlock(&qdisc_tree_lock);
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
- }
q_idx++;
}
- read_unlock(&qdisc_tree_lock);
+cont:
+ idx++;
}
done:
static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct tcmsg *tcm = NLMSG_DATA(n);
struct rtattr **tca = arg;
struct net_device *dev;
struct Qdisc *q = NULL;
- struct Qdisc_class_ops *cops;
+ const struct Qdisc_class_ops *cops;
unsigned long cl = 0;
unsigned long new_cl;
u32 pid = tcm->tcm_parent;
u32 qid = TC_H_MAJ(clid);
int err;
- if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+ if (net != &init_net)
+ return -EINVAL;
+
+ if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
return -ENODEV;
/*
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct gnet_dump d;
- struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
+ const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
tcm = NLMSG_DATA(nlh);
if (gnet_stats_finish_copy(&d) < 0)
goto rtattr_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
return -EINVAL;
}
- return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
}
struct qdisc_dump_args
static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = skb->sk->sk_net;
int t;
int s_t;
struct net_device *dev;
struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
struct qdisc_dump_args arg;
+ if (net != &init_net)
+ return 0;
+
if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
return 0;
- if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+ if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
return 0;
s_t = cb->args[0];
t = 0;
- read_lock(&qdisc_tree_lock);
list_for_each_entry(q, &dev->qdisc_list, list) {
if (t < s_t || !q->ops->cl_ops ||
(tcm->tcm_parent &&
break;
t++;
}
- read_unlock(&qdisc_tree_lock);
cb->args[0] = t;
to this qdisc, (optionally) tests for protocol and asks
specific classifiers.
*/
+int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ __be16 protocol = skb->protocol;
+ int err = 0;
+
+ for (; tp; tp = tp->next) {
+ if ((tp->protocol == protocol ||
+ tp->protocol == htons(ETH_P_ALL)) &&
+ (err = tp->classify(skb, tp, res)) >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+ if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
+ skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
+#endif
+ return err;
+ }
+ }
+ return -1;
+}
+EXPORT_SYMBOL(tc_classify_compat);
+
int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
- struct tcf_result *res)
+ struct tcf_result *res)
{
int err = 0;
- __be16 protocol = skb->protocol;
+ __be16 protocol;
#ifdef CONFIG_NET_CLS_ACT
struct tcf_proto *otp = tp;
reclassify:
#endif
protocol = skb->protocol;
- for ( ; tp; tp = tp->next) {
- if ((tp->protocol == protocol ||
- tp->protocol == htons(ETH_P_ALL)) &&
- (err = tp->classify(skb, tp, res)) >= 0) {
+ err = tc_classify_compat(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
- if ( TC_ACT_RECLASSIFY == err) {
- __u32 verd = (__u32) G_TC_VERD(skb->tc_verd);
- tp = otp;
-
- if (MAX_REC_LOOP < verd++) {
- printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n",
- tp->prio&0xffff, ntohs(tp->protocol));
- return TC_ACT_SHOT;
- }
- skb->tc_verd = SET_TC_VERD(skb->tc_verd,verd);
- goto reclassify;
- } else {
- if (skb->tc_verd)
- skb->tc_verd = SET_TC_VERD(skb->tc_verd,0);
- return err;
- }
-#else
-
- return err;
-#endif
+ if (err == TC_ACT_RECLASSIFY) {
+ u32 verd = G_TC_VERD(skb->tc_verd);
+ tp = otp;
+
+ if (verd++ >= MAX_REC_LOOP) {
+ printk("rule prio %u protocol %02x reclassify loop, "
+ "packet dropped\n",
+ tp->prio&0xffff, ntohs(tp->protocol));
+ return TC_ACT_SHOT;
}
+ skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
+ goto reclassify;
+ }
+#endif
+ return err;
+}
+EXPORT_SYMBOL(tc_classify);
+
+void tcf_destroy(struct tcf_proto *tp)
+{
+ tp->ops->destroy(tp);
+ module_put(tp->ops->owner);
+ kfree(tp);
+}
+void tcf_destroy_chain(struct tcf_proto *fl)
+{
+ struct tcf_proto *tp;
+
+ while ((tp = fl) != NULL) {
+ fl = tp->next;
+ tcf_destroy(tp);
}
- return -1;
}
+EXPORT_SYMBOL(tcf_destroy_chain);
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
+ struct timespec ts;
+
+ hrtimer_get_res(CLOCK_MONOTONIC, &ts);
seq_printf(seq, "%08x %08x %08x %08x\n",
(u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
- 1000000, (u32)NSEC_PER_SEC/ktime_to_ns(KTIME_MONOTONIC_RES));
+ 1000000,
+ (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
return 0;
}
static int __init pktsched_init(void)
{
- struct rtnetlink_link *link_p;
-
- link_p = rtnetlink_links[PF_UNSPEC];
-
- /* Setup rtnetlink links. It is made here to avoid
- exporting large number of public symbols.
- */
-
- if (link_p) {
- link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
- link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
- link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
- link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
- link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
- link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
- link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
- link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
- }
-
register_qdisc(&pfifo_qdisc_ops);
register_qdisc(&bfifo_qdisc_ops);
- proc_net_fops_create("psched", 0, &psched_fops);
+ proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
+
+ rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
+ rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
return 0;
}
EXPORT_SYMBOL(qdisc_put_rtab);
EXPORT_SYMBOL(register_qdisc);
EXPORT_SYMBOL(unregister_qdisc);
-EXPORT_SYMBOL(tc_classify);