[NET_SCHED]: Convert packet schedulers from rtnetlink to new netlink API
[safe/jmp/linux-2.6] / net / sched / sch_red.c
1 /*
2  * net/sched/sch_red.c  Random Early Detection queue.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914: computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/inet_ecn.h>
23 #include <net/red.h>
24
25
26 /*      Parameters, settable by user:
27         -----------------------------
28
29         limit           - bytes (must be > qth_max + burst)
30
31         Hard limit on queue length, should be chosen >qth_max
32         to allow packet bursts. This parameter does not
33         affect the algorithms behaviour and can be chosen
34         arbitrarily high (well, less than ram size)
35         Really, this limit will never be reached
36         if RED works correctly.
37  */
38
39 struct red_sched_data
40 {
41         u32                     limit;          /* HARD maximal queue length */
42         unsigned char           flags;
43         struct red_parms        parms;
44         struct red_stats        stats;
45         struct Qdisc            *qdisc;
46 };
47
48 static inline int red_use_ecn(struct red_sched_data *q)
49 {
50         return q->flags & TC_RED_ECN;
51 }
52
53 static inline int red_use_harddrop(struct red_sched_data *q)
54 {
55         return q->flags & TC_RED_HARDDROP;
56 }
57
58 static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
59 {
60         struct red_sched_data *q = qdisc_priv(sch);
61         struct Qdisc *child = q->qdisc;
62         int ret;
63
64         q->parms.qavg = red_calc_qavg(&q->parms, child->qstats.backlog);
65
66         if (red_is_idling(&q->parms))
67                 red_end_of_idle_period(&q->parms);
68
69         switch (red_action(&q->parms, q->parms.qavg)) {
70                 case RED_DONT_MARK:
71                         break;
72
73                 case RED_PROB_MARK:
74                         sch->qstats.overlimits++;
75                         if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
76                                 q->stats.prob_drop++;
77                                 goto congestion_drop;
78                         }
79
80                         q->stats.prob_mark++;
81                         break;
82
83                 case RED_HARD_MARK:
84                         sch->qstats.overlimits++;
85                         if (red_use_harddrop(q) || !red_use_ecn(q) ||
86                             !INET_ECN_set_ce(skb)) {
87                                 q->stats.forced_drop++;
88                                 goto congestion_drop;
89                         }
90
91                         q->stats.forced_mark++;
92                         break;
93         }
94
95         ret = child->enqueue(skb, child);
96         if (likely(ret == NET_XMIT_SUCCESS)) {
97                 sch->bstats.bytes += skb->len;
98                 sch->bstats.packets++;
99                 sch->q.qlen++;
100         } else {
101                 q->stats.pdrop++;
102                 sch->qstats.drops++;
103         }
104         return ret;
105
106 congestion_drop:
107         qdisc_drop(skb, sch);
108         return NET_XMIT_CN;
109 }
110
111 static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
112 {
113         struct red_sched_data *q = qdisc_priv(sch);
114         struct Qdisc *child = q->qdisc;
115         int ret;
116
117         if (red_is_idling(&q->parms))
118                 red_end_of_idle_period(&q->parms);
119
120         ret = child->ops->requeue(skb, child);
121         if (likely(ret == NET_XMIT_SUCCESS)) {
122                 sch->qstats.requeues++;
123                 sch->q.qlen++;
124         }
125         return ret;
126 }
127
128 static struct sk_buff * red_dequeue(struct Qdisc* sch)
129 {
130         struct sk_buff *skb;
131         struct red_sched_data *q = qdisc_priv(sch);
132         struct Qdisc *child = q->qdisc;
133
134         skb = child->dequeue(child);
135         if (skb)
136                 sch->q.qlen--;
137         else if (!red_is_idling(&q->parms))
138                 red_start_of_idle_period(&q->parms);
139
140         return skb;
141 }
142
143 static unsigned int red_drop(struct Qdisc* sch)
144 {
145         struct red_sched_data *q = qdisc_priv(sch);
146         struct Qdisc *child = q->qdisc;
147         unsigned int len;
148
149         if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
150                 q->stats.other++;
151                 sch->qstats.drops++;
152                 sch->q.qlen--;
153                 return len;
154         }
155
156         if (!red_is_idling(&q->parms))
157                 red_start_of_idle_period(&q->parms);
158
159         return 0;
160 }
161
162 static void red_reset(struct Qdisc* sch)
163 {
164         struct red_sched_data *q = qdisc_priv(sch);
165
166         qdisc_reset(q->qdisc);
167         sch->q.qlen = 0;
168         red_restart(&q->parms);
169 }
170
171 static void red_destroy(struct Qdisc *sch)
172 {
173         struct red_sched_data *q = qdisc_priv(sch);
174         qdisc_destroy(q->qdisc);
175 }
176
177 static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
178 {
179         struct Qdisc *q;
180         struct nlattr *nla;
181         int ret;
182
183         q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
184                               TC_H_MAKE(sch->handle, 1));
185         if (q) {
186                 nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)),
187                               GFP_KERNEL);
188                 if (nla) {
189                         nla->nla_type = RTM_NEWQDISC;
190                         nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
191                         ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
192
193                         ret = q->ops->change(q, nla);
194                         kfree(nla);
195
196                         if (ret == 0)
197                                 return q;
198                 }
199                 qdisc_destroy(q);
200         }
201         return NULL;
202 }
203
204 static int red_change(struct Qdisc *sch, struct nlattr *opt)
205 {
206         struct red_sched_data *q = qdisc_priv(sch);
207         struct nlattr *tb[TCA_RED_MAX + 1];
208         struct tc_red_qopt *ctl;
209         struct Qdisc *child = NULL;
210
211         if (opt == NULL || nla_parse_nested(tb, TCA_RED_MAX, opt, NULL))
212                 return -EINVAL;
213
214         if (tb[TCA_RED_PARMS] == NULL ||
215             nla_len(tb[TCA_RED_PARMS]) < sizeof(*ctl) ||
216             tb[TCA_RED_STAB] == NULL ||
217             nla_len(tb[TCA_RED_STAB]) < RED_STAB_SIZE)
218                 return -EINVAL;
219
220         ctl = nla_data(tb[TCA_RED_PARMS]);
221
222         if (ctl->limit > 0) {
223                 child = red_create_dflt(sch, ctl->limit);
224                 if (child == NULL)
225                         return -ENOMEM;
226         }
227
228         sch_tree_lock(sch);
229         q->flags = ctl->flags;
230         q->limit = ctl->limit;
231         if (child) {
232                 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
233                 qdisc_destroy(xchg(&q->qdisc, child));
234         }
235
236         red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
237                                  ctl->Plog, ctl->Scell_log,
238                                  nla_data(tb[TCA_RED_STAB]));
239
240         if (skb_queue_empty(&sch->q))
241                 red_end_of_idle_period(&q->parms);
242
243         sch_tree_unlock(sch);
244         return 0;
245 }
246
247 static int red_init(struct Qdisc* sch, struct nlattr *opt)
248 {
249         struct red_sched_data *q = qdisc_priv(sch);
250
251         q->qdisc = &noop_qdisc;
252         return red_change(sch, opt);
253 }
254
255 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
256 {
257         struct red_sched_data *q = qdisc_priv(sch);
258         struct nlattr *opts = NULL;
259         struct tc_red_qopt opt = {
260                 .limit          = q->limit,
261                 .flags          = q->flags,
262                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
263                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
264                 .Wlog           = q->parms.Wlog,
265                 .Plog           = q->parms.Plog,
266                 .Scell_log      = q->parms.Scell_log,
267         };
268
269         opts = nla_nest_start(skb, TCA_OPTIONS);
270         if (opts == NULL)
271                 goto nla_put_failure;
272         NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
273         return nla_nest_end(skb, opts);
274
275 nla_put_failure:
276         return nla_nest_cancel(skb, opts);
277 }
278
279 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
280 {
281         struct red_sched_data *q = qdisc_priv(sch);
282         struct tc_red_xstats st = {
283                 .early  = q->stats.prob_drop + q->stats.forced_drop,
284                 .pdrop  = q->stats.pdrop,
285                 .other  = q->stats.other,
286                 .marked = q->stats.prob_mark + q->stats.forced_mark,
287         };
288
289         return gnet_stats_copy_app(d, &st, sizeof(st));
290 }
291
292 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
293                           struct sk_buff *skb, struct tcmsg *tcm)
294 {
295         struct red_sched_data *q = qdisc_priv(sch);
296
297         if (cl != 1)
298                 return -ENOENT;
299         tcm->tcm_handle |= TC_H_MIN(1);
300         tcm->tcm_info = q->qdisc->handle;
301         return 0;
302 }
303
304 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
305                      struct Qdisc **old)
306 {
307         struct red_sched_data *q = qdisc_priv(sch);
308
309         if (new == NULL)
310                 new = &noop_qdisc;
311
312         sch_tree_lock(sch);
313         *old = xchg(&q->qdisc, new);
314         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
315         qdisc_reset(*old);
316         sch_tree_unlock(sch);
317         return 0;
318 }
319
320 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
321 {
322         struct red_sched_data *q = qdisc_priv(sch);
323         return q->qdisc;
324 }
325
326 static unsigned long red_get(struct Qdisc *sch, u32 classid)
327 {
328         return 1;
329 }
330
331 static void red_put(struct Qdisc *sch, unsigned long arg)
332 {
333         return;
334 }
335
336 static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
337                             struct nlattr **tca, unsigned long *arg)
338 {
339         return -ENOSYS;
340 }
341
342 static int red_delete(struct Qdisc *sch, unsigned long cl)
343 {
344         return -ENOSYS;
345 }
346
347 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
348 {
349         if (!walker->stop) {
350                 if (walker->count >= walker->skip)
351                         if (walker->fn(sch, 1, walker) < 0) {
352                                 walker->stop = 1;
353                                 return;
354                         }
355                 walker->count++;
356         }
357 }
358
359 static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl)
360 {
361         return NULL;
362 }
363
364 static const struct Qdisc_class_ops red_class_ops = {
365         .graft          =       red_graft,
366         .leaf           =       red_leaf,
367         .get            =       red_get,
368         .put            =       red_put,
369         .change         =       red_change_class,
370         .delete         =       red_delete,
371         .walk           =       red_walk,
372         .tcf_chain      =       red_find_tcf,
373         .dump           =       red_dump_class,
374 };
375
376 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
377         .id             =       "red",
378         .priv_size      =       sizeof(struct red_sched_data),
379         .cl_ops         =       &red_class_ops,
380         .enqueue        =       red_enqueue,
381         .dequeue        =       red_dequeue,
382         .requeue        =       red_requeue,
383         .drop           =       red_drop,
384         .init           =       red_init,
385         .reset          =       red_reset,
386         .destroy        =       red_destroy,
387         .change         =       red_change,
388         .dump           =       red_dump,
389         .dump_stats     =       red_dump_stats,
390         .owner          =       THIS_MODULE,
391 };
392
393 static int __init red_module_init(void)
394 {
395         return register_qdisc(&red_qdisc_ops);
396 }
397
398 static void __exit red_module_exit(void)
399 {
400         unregister_qdisc(&red_qdisc_ops);
401 }
402
403 module_init(red_module_init)
404 module_exit(red_module_exit)
405
406 MODULE_LICENSE("GPL");