net sched: cleanup and rate limit warning
[safe/jmp/linux-2.6] / net / sched / sch_api.c
index 2a78d54..fe35c1f 100644 (file)
 #include <linux/list.h>
 #include <linux/hrtimer.h>
 #include <linux/lockdep.h>
+#include <linux/slab.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
-static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
+static int qdisc_notify(struct net *net, struct sk_buff *oskb,
+                       struct nlmsghdr *n, u32 clid,
                        struct Qdisc *old, struct Qdisc *new);
-static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-                        struct Qdisc *q, unsigned long cl, int event);
+static int tclass_notify(struct net *net, struct sk_buff *oskb,
+                        struct nlmsghdr *n, struct Qdisc *q,
+                        unsigned long cl, int event);
 
 /*
 
@@ -638,11 +641,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 }
 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
 
-static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
+static void notify_and_destroy(struct net *net, struct sk_buff *skb,
+                              struct nlmsghdr *n, u32 clid,
                               struct Qdisc *old, struct Qdisc *new)
 {
        if (new || old)
-               qdisc_notify(skb, n, clid, old, new);
+               qdisc_notify(net, skb, n, clid, old, new);
 
        if (old)
                qdisc_destroy(old);
@@ -662,6 +666,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                       struct Qdisc *new, struct Qdisc *old)
 {
        struct Qdisc *q = old;
+       struct net *net = dev_net(dev);
        int err = 0;
 
        if (parent == NULL) {
@@ -693,13 +698,19 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                        if (new && i > 0)
                                atomic_inc(&new->refcnt);
 
-                       qdisc_destroy(old);
+                       if (!ingress)
+                               qdisc_destroy(old);
                }
 
-               notify_and_destroy(skb, n, classid, dev->qdisc, new);
-               if (new && !new->ops->attach)
-                       atomic_inc(&new->refcnt);
-               dev->qdisc = new ? : &noop_qdisc;
+               if (!ingress) {
+                       notify_and_destroy(net, skb, n, classid,
+                                          dev->qdisc, new);
+                       if (new && !new->ops->attach)
+                               atomic_inc(&new->refcnt);
+                       dev->qdisc = new ? : &noop_qdisc;
+               } else {
+                       notify_and_destroy(net, skb, n, classid, old, new);
+               }
 
                if (dev->flags & IFF_UP)
                        dev_activate(dev);
@@ -716,7 +727,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                                err = -ENOENT;
                }
                if (!err)
-                       notify_and_destroy(skb, n, classid, old, new);
+                       notify_and_destroy(net, skb, n, classid, old, new);
        }
        return err;
 }
@@ -733,7 +744,8 @@ static struct lock_class_key qdisc_rx_lock;
 
 static struct Qdisc *
 qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
-            u32 parent, u32 handle, struct nlattr **tca, int *errp)
+            struct Qdisc *p, u32 parent, u32 handle,
+            struct nlattr **tca, int *errp)
 {
        int err;
        struct nlattr *kind = tca[TCA_KIND];
@@ -803,31 +815,28 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
                        stab = qdisc_get_stab(tca[TCA_STAB]);
                        if (IS_ERR(stab)) {
                                err = PTR_ERR(stab);
-                               goto err_out3;
+                               goto err_out4;
                        }
                        sch->stab = stab;
                }
                if (tca[TCA_RATE]) {
                        spinlock_t *root_lock;
 
+                       err = -EOPNOTSUPP;
+                       if (sch->flags & TCQ_F_MQROOT)
+                               goto err_out4;
+
                        if ((sch->parent != TC_H_ROOT) &&
-                           !(sch->flags & TCQ_F_INGRESS))
+                           !(sch->flags & TCQ_F_INGRESS) &&
+                           (!p || !(p->flags & TCQ_F_MQROOT)))
                                root_lock = qdisc_root_sleeping_lock(sch);
                        else
                                root_lock = qdisc_lock(sch);
 
                        err = gen_new_estimator(&sch->bstats, &sch->rate_est,
                                                root_lock, tca[TCA_RATE]);
-                       if (err) {
-                               /*
-                                * Any broken qdiscs that would require
-                                * a ops->reset() here? The qdisc was never
-                                * in action so it shouldn't be necessary.
-                                */
-                               if (ops->destroy)
-                                       ops->destroy(sch);
-                               goto err_out3;
-                       }
+                       if (err)
+                               goto err_out4;
                }
 
                qdisc_list_add(sch);
@@ -835,7 +844,6 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
                return sch;
        }
 err_out3:
-       qdisc_put_stab(sch->stab);
        dev_put(dev);
        kfree((char *) sch - sch->padded);
 err_out2:
@@ -843,6 +851,16 @@ err_out2:
 err_out:
        *errp = err;
        return NULL;
+
+err_out4:
+       /*
+        * Any broken qdiscs that would require a ops->reset() here?
+        * The qdisc was never in action so it shouldn't be necessary.
+        */
+       qdisc_put_stab(sch->stab);
+       if (ops->destroy)
+               ops->destroy(sch);
+       goto err_out3;
 }
 
 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
@@ -867,13 +885,16 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
        qdisc_put_stab(sch->stab);
        sch->stab = stab;
 
-       if (tca[TCA_RATE])
+       if (tca[TCA_RATE]) {
                /* NB: ignores errors from replace_estimator
                   because change can't be undone. */
+               if (sch->flags & TCQ_F_MQROOT)
+                       goto out;
                gen_replace_estimator(&sch->bstats, &sch->rate_est,
                                            qdisc_root_sleeping_lock(sch),
                                            tca[TCA_RATE]);
-
+       }
+out:
        return 0;
 }
 
@@ -932,10 +953,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        struct Qdisc *p = NULL;
        int err;
 
-       if (net != &init_net)
-               return -EINVAL;
-
-       if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+       if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
                return -ENODEV;
 
        err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -975,7 +993,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
                        return err;
        } else {
-               qdisc_notify(skb, n, clid, NULL, q);
+               qdisc_notify(net, skb, n, clid, NULL, q);
        }
        return 0;
 }
@@ -994,16 +1012,13 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        struct Qdisc *q, *p;
        int err;
 
-       if (net != &init_net)
-               return -EINVAL;
-
 replay:
        /* Reinit, just in case something touches this. */
        tcm = NLMSG_DATA(n);
        clid = tcm->tcm_parent;
        q = p = NULL;
 
-       if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+       if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
                return -ENODEV;
 
        err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1090,23 +1105,27 @@ replay:
                return -EINVAL;
        err = qdisc_change(q, tca);
        if (err == 0)
-               qdisc_notify(skb, n, clid, NULL, q);
+               qdisc_notify(net, skb, n, clid, NULL, q);
        return err;
 
 create_n_graft:
        if (!(n->nlmsg_flags&NLM_F_CREATE))
                return -ENOENT;
        if (clid == TC_H_INGRESS)
-               q = qdisc_create(dev, &dev->rx_queue,
+               q = qdisc_create(dev, &dev->rx_queue, p,
                                 tcm->tcm_parent, tcm->tcm_parent,
                                 tca, &err);
        else {
-               unsigned int ntx = 0;
+               struct netdev_queue *dev_queue;
 
                if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
-                       ntx = p->ops->cl_ops->select_queue(p, tcm);
+                       dev_queue = p->ops->cl_ops->select_queue(p, tcm);
+               else if (p)
+                       dev_queue = p->dev_queue;
+               else
+                       dev_queue = netdev_get_tx_queue(dev, 0);
 
-               q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx),
+               q = qdisc_create(dev, dev_queue, p,
                                 tcm->tcm_parent, tcm->tcm_handle,
                                 tca, &err);
        }
@@ -1160,7 +1179,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
                goto nla_put_failure;
 
        if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
-           gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+           gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
            gnet_stats_copy_queue(&d, &q->qstats) < 0)
                goto nla_put_failure;
 
@@ -1176,8 +1195,9 @@ nla_put_failure:
        return -1;
 }
 
-static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-                       u32 clid, struct Qdisc *old, struct Qdisc *new)
+static int qdisc_notify(struct net *net, struct sk_buff *oskb,
+                       struct nlmsghdr *n, u32 clid,
+                       struct Qdisc *old, struct Qdisc *new)
 {
        struct sk_buff *skb;
        u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -1196,7 +1216,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
        }
 
        if (skb->len)
-               return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+               return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 
 err_out:
        kfree_skb(skb);
@@ -1255,14 +1275,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
        int s_idx, s_q_idx;
        struct net_device *dev;
 
-       if (net != &init_net)
-               return 0;
-
        s_idx = cb->args[0];
        s_q_idx = q_idx = cb->args[1];
-       read_lock(&dev_base_lock);
+
+       rcu_read_lock();
        idx = 0;
-       for_each_netdev(&init_net, dev) {
+       for_each_netdev_rcu(net, dev) {
                struct netdev_queue *dev_queue;
 
                if (idx < s_idx)
@@ -1283,7 +1301,7 @@ cont:
        }
 
 done:
-       read_unlock(&dev_base_lock);
+       rcu_read_unlock();
 
        cb->args[0] = idx;
        cb->args[1] = q_idx;
@@ -1314,10 +1332,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        u32 qid = TC_H_MAJ(clid);
        int err;
 
-       if (net != &init_net)
-               return -EINVAL;
-
-       if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+       if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
                return -ENODEV;
 
        err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1398,10 +1413,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                        if (cops->delete)
                                err = cops->delete(q, cl);
                        if (err == 0)
-                               tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
+                               tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
                        goto out;
                case RTM_GETTCLASS:
-                       err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
+                       err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
                        goto out;
                default:
                        err = -EINVAL;
@@ -1414,7 +1429,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        if (cops->change)
                err = cops->change(q, clid, pid, tca, &new_cl);
        if (err == 0)
-               tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
+               tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
 
 out:
        if (cl)
@@ -1437,6 +1452,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
        nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
        tcm = NLMSG_DATA(nlh);
        tcm->tcm_family = AF_UNSPEC;
+       tcm->tcm__pad1 = 0;
+       tcm->tcm__pad2 = 0;
        tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
        tcm->tcm_parent = q->handle;
        tcm->tcm_handle = q->handle;
@@ -1464,8 +1481,9 @@ nla_put_failure:
        return -1;
 }
 
-static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-                         struct Qdisc *q, unsigned long cl, int event)
+static int tclass_notify(struct net *net, struct sk_buff *oskb,
+                        struct nlmsghdr *n, struct Qdisc *q,
+                        unsigned long cl, int event)
 {
        struct sk_buff *skb;
        u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -1479,7 +1497,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
                return -EINVAL;
        }
 
-       return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+       return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 }
 
 struct qdisc_dump_args
@@ -1554,12 +1572,9 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
        struct net_device *dev;
        int t, s_t;
 
-       if (net != &init_net)
-               return 0;
-
        if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
                return 0;
-       if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+       if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
                return 0;
 
        s_t = cb->args[0];
@@ -1622,9 +1637,12 @@ reclassify:
                tp = otp;
 
                if (verd++ >= MAX_REC_LOOP) {
-                       printk("rule prio %u protocol %02x reclassify loop, "
-                              "packet dropped\n",
-                              tp->prio&0xffff, ntohs(tp->protocol));
+                       if (net_ratelimit())
+                               printk(KERN_NOTICE
+                                      "%s: packet reclassify loop"
+                                         " rule prio %u protocol %02x\n",
+                                      tp->q->ops->id,
+                                      tp->prio & 0xffff, ntohs(tp->protocol));
                        return TC_ACT_SHOT;
                }
                skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
@@ -1669,7 +1687,7 @@ static int psched_show(struct seq_file *seq, void *v)
 
 static int psched_open(struct inode *inode, struct file *file)
 {
-       return single_open(file, psched_show, PDE(inode)->data);
+       return single_open(file, psched_show, NULL);
 }
 
 static const struct file_operations psched_fops = {
@@ -1679,14 +1697,53 @@ static const struct file_operations psched_fops = {
        .llseek = seq_lseek,
        .release = single_release,
 };
+
+static int __net_init psched_net_init(struct net *net)
+{
+       struct proc_dir_entry *e;
+
+       e = proc_net_fops_create(net, "psched", 0, &psched_fops);
+       if (e == NULL)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void __net_exit psched_net_exit(struct net *net)
+{
+       proc_net_remove(net, "psched");
+}
+#else
+static int __net_init psched_net_init(struct net *net)
+{
+       return 0;
+}
+
+static void __net_exit psched_net_exit(struct net *net)
+{
+}
 #endif
 
+static struct pernet_operations psched_net_ops = {
+       .init = psched_net_init,
+       .exit = psched_net_exit,
+};
+
 static int __init pktsched_init(void)
 {
+       int err;
+
+       err = register_pernet_subsys(&psched_net_ops);
+       if (err) {
+               printk(KERN_ERR "pktsched_init: "
+                      "cannot initialize per netns operations\n");
+               return err;
+       }
+
        register_qdisc(&pfifo_qdisc_ops);
        register_qdisc(&bfifo_qdisc_ops);
+       register_qdisc(&pfifo_head_drop_qdisc_ops);
        register_qdisc(&mq_qdisc_ops);
-       proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
 
        rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
        rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);