X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fsched%2Fsch_sfq.c;h=8706920a6d45b7c7b7cc67b866f94880a187f6f7;hb=31e6d363abcd0d05766c82f1a9c905a4c974a199;hp=e057768f68b4806a1155e367fbf77ba5e00d2fc4;hpb=f5539eb8caa52a9198079df767cc1bb5494e69e3;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index e057768..8706920 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -9,32 +9,19 @@ * Authors: Alexey Kuznetsov, */ -#include #include -#include -#include -#include #include #include #include #include -#include -#include -#include #include #include -#include -#include -#include -#include -#include -#include #include -#include #include -#include #include -#include +#include +#include +#include #include @@ -54,7 +41,7 @@ Queuing using Deficit Round Robin", Proc. SIGCOMM 95. - This is not the thing that is usually called (W)FQ nowadays. + This is not the thing that is usually called (W)FQ nowadays. It does not use any timestamp mechanism, but instead processes queues in round-robin order. @@ -64,7 +51,7 @@ DRAWBACKS: - - "Stochastic" -> It is not 100% fair. + - "Stochastic" -> It is not 100% fair. When hash collisions occur, several flows are considered as one. - "Round-robin" -> It introduces larger delays than virtual clock @@ -108,8 +95,9 @@ struct sfq_sched_data int limit; /* Variables */ + struct tcf_proto *filter_list; struct timer_list perturb_timer; - int perturbation; + u32 perturbation; sfq_index tail; /* Index of current slot in round */ sfq_index max_depth; /* Maximal depth */ @@ -123,12 +111,7 @@ struct sfq_sched_data static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) { - int pert = q->perturbation; - - /* Have we any rotation primitives? If not, WHY? */ - h ^= (h1<>(0x1F - pert)); - h ^= h>>10; - return h & 0x3FF; + return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); } static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) @@ -136,27 +119,29 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) u32 h, h2; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): { - struct iphdr *iph = skb->nh.iph; + const struct iphdr *iph = ip_hdr(skb); h = iph->daddr; - h2 = iph->saddr^iph->protocol; + h2 = iph->saddr ^ iph->protocol; if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP || + iph->protocol == IPPROTO_UDPLITE || iph->protocol == IPPROTO_SCTP || iph->protocol == IPPROTO_DCCP || iph->protocol == IPPROTO_ESP)) h2 ^= *(((u32*)iph) + iph->ihl); break; } - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): { - struct ipv6hdr *iph = skb->nh.ipv6h; + struct ipv6hdr *iph = ipv6_hdr(skb); h = iph->daddr.s6_addr32[3]; - h2 = iph->saddr.s6_addr32[3]^iph->nexthdr; + h2 = iph->saddr.s6_addr32[3] ^ iph->nexthdr; if (iph->nexthdr == IPPROTO_TCP || iph->nexthdr == IPPROTO_UDP || + iph->nexthdr == IPPROTO_UDPLITE || iph->nexthdr == IPPROTO_SCTP || iph->nexthdr == IPPROTO_DCCP || iph->nexthdr == IPPROTO_ESP) @@ -164,12 +149,46 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) break; } default: - h = (u32)(unsigned long)skb->dst^skb->protocol; - h2 = (u32)(unsigned long)skb->sk; + h = (unsigned long)skb_dst(skb) ^ skb->protocol; + h2 = (unsigned long)skb->sk; } + return sfq_fold_hash(q, h, h2); } +static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, + int *qerr) +{ + struct sfq_sched_data *q = qdisc_priv(sch); + struct tcf_result res; + int result; + + if (TC_H_MAJ(skb->priority) == sch->handle && + TC_H_MIN(skb->priority) > 0 && + TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR) + return TC_H_MIN(skb->priority); + + if (!q->filter_list) + return sfq_hash(q, skb) + 1; + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return 0; + } +#endif + if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR) + return TC_H_MIN(res.classid); + } + return 0; +} + static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; @@ -224,9 +243,9 @@ static unsigned int sfq_drop(struct Qdisc *sch) drop a packet from it */ if (d > 1) { - sfq_index x = q->dep[d+SFQ_DEPTH].next; + sfq_index x = q->dep[d + SFQ_DEPTH].next; skb = q->qs[x].prev; - len = skb->len; + len = qdisc_pkt_len(skb); __skb_unlink(skb, &q->qs[x]); kfree_skb(skb); sfq_dec(q, x); @@ -242,7 +261,7 @@ static unsigned int sfq_drop(struct Qdisc *sch) q->next[q->tail] = q->next[d]; q->allot[q->next[d]] += q->quantum; skb = q->qs[d].prev; - len = skb->len; + len = qdisc_pkt_len(skb); __skb_unlink(skb, &q->qs[d]); kfree_skb(skb); sfq_dec(q, d); @@ -257,18 +276,36 @@ static unsigned int sfq_drop(struct Qdisc *sch) } static int -sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) +sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - unsigned hash = sfq_hash(q, skb); + unsigned int hash; sfq_index x; + int uninitialized_var(ret); + + hash = sfq_classify(skb, sch, &ret); + if (hash == 0) { + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; + } + hash--; x = q->ht[hash]; if (x == SFQ_DEPTH) { q->ht[hash] = x = q->dep[SFQ_DEPTH].next; q->hash[x] = hash; } - sch->qstats.backlog += skb->len; + + /* If selected queue has length q->limit, this means that + * all another queues are empty and that we do simple tail drop, + * i.e. drop _this_ packet. + */ + if (q->qs[x].qlen >= q->limit) + return qdisc_drop(skb, sch); + + sch->qstats.backlog += qdisc_pkt_len(skb); __skb_queue_tail(&q->qs[x], skb); sfq_inc(q, x); if (q->qs[x].qlen == 1) { /* The flow is new */ @@ -282,8 +319,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) q->tail = x; } } - if (++sch->q.qlen < q->limit-1) { - sch->bstats.bytes += skb->len; + if (++sch->q.qlen <= q->limit) { + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; return 0; } @@ -292,47 +329,22 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) return NET_XMIT_CN; } -static int -sfq_requeue(struct sk_buff *skb, struct Qdisc* sch) +static struct sk_buff * +sfq_peek(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - unsigned hash = sfq_hash(q, skb); - sfq_index x; + sfq_index a; - x = q->ht[hash]; - if (x == SFQ_DEPTH) { - q->ht[hash] = x = q->dep[SFQ_DEPTH].next; - q->hash[x] = hash; - } - sch->qstats.backlog += skb->len; - __skb_queue_head(&q->qs[x], skb); - sfq_inc(q, x); - if (q->qs[x].qlen == 1) { /* The flow is new */ - if (q->tail == SFQ_DEPTH) { /* It is the first flow */ - q->tail = x; - q->next[x] = x; - q->allot[x] = q->quantum; - } else { - q->next[x] = q->next[q->tail]; - q->next[q->tail] = x; - q->tail = x; - } - } - if (++sch->q.qlen < q->limit - 1) { - sch->qstats.requeues++; - return 0; - } + /* No active slots */ + if (q->tail == SFQ_DEPTH) + return NULL; - sch->qstats.drops++; - sfq_drop(sch); - return NET_XMIT_CN; + a = q->next[q->tail]; + return skb_peek(&q->qs[a]); } - - - static struct sk_buff * -sfq_dequeue(struct Qdisc* sch) +sfq_dequeue(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; @@ -348,7 +360,7 @@ sfq_dequeue(struct Qdisc* sch) skb = __skb_dequeue(&q->qs[a]); sfq_dec(q, a); sch->q.qlen--; - sch->qstats.backlog -= skb->len; + sch->qstats.backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ if (q->qs[a].qlen == 0) { @@ -360,7 +372,7 @@ sfq_dequeue(struct Qdisc* sch) } q->next[q->tail] = a; q->allot[a] += q->quantum; - } else if ((q->allot[a] -= skb->len) <= 0) { + } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { q->tail = a; a = q->next[a]; q->allot[a] += q->quantum; @@ -369,7 +381,7 @@ sfq_dequeue(struct Qdisc* sch) } static void -sfq_reset(struct Qdisc* sch) +sfq_reset(struct Qdisc *sch) { struct sk_buff *skb; @@ -379,71 +391,76 @@ sfq_reset(struct Qdisc* sch) static void sfq_perturbation(unsigned long arg) { - struct Qdisc *sch = (struct Qdisc*)arg; + struct Qdisc *sch = (struct Qdisc *)arg; struct sfq_sched_data *q = qdisc_priv(sch); - q->perturbation = net_random()&0x1F; + q->perturbation = net_random(); - if (q->perturb_period) { - q->perturb_timer.expires = jiffies + q->perturb_period; - add_timer(&q->perturb_timer); - } + if (q->perturb_period) + mod_timer(&q->perturb_timer, jiffies + q->perturb_period); } -static int sfq_change(struct Qdisc *sch, struct rtattr *opt) +static int sfq_change(struct Qdisc *sch, struct nlattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); - struct tc_sfq_qopt *ctl = RTA_DATA(opt); + struct tc_sfq_qopt *ctl = nla_data(opt); + unsigned int qlen; - if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) + if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; sch_tree_lock(sch); - q->quantum = ctl->quantum ? : psched_mtu(sch->dev); - q->perturb_period = ctl->perturb_period*HZ; + q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); + q->perturb_period = ctl->perturb_period * HZ; if (ctl->limit) - q->limit = min_t(u32, ctl->limit, SFQ_DEPTH); + q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); - while (sch->q.qlen >= q->limit-1) + qlen = sch->q.qlen; + while (sch->q.qlen > q->limit) sfq_drop(sch); + qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); del_timer(&q->perturb_timer); if (q->perturb_period) { - q->perturb_timer.expires = jiffies + q->perturb_period; - add_timer(&q->perturb_timer); + mod_timer(&q->perturb_timer, jiffies + q->perturb_period); + q->perturbation = net_random(); } sch_tree_unlock(sch); return 0; } -static int sfq_init(struct Qdisc *sch, struct rtattr *opt) +static int sfq_init(struct Qdisc *sch, struct nlattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); int i; - init_timer(&q->perturb_timer); - q->perturb_timer.data = (unsigned long)sch; q->perturb_timer.function = sfq_perturbation; + q->perturb_timer.data = (unsigned long)sch; + init_timer_deferrable(&q->perturb_timer); - for (i=0; iht[i] = SFQ_DEPTH; - for (i=0; iqs[i]); - q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH; - q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH; + q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH; + q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH; } - q->limit = SFQ_DEPTH; + + q->limit = SFQ_DEPTH - 1; q->max_depth = 0; q->tail = SFQ_DEPTH; if (opt == NULL) { - q->quantum = psched_mtu(sch->dev); + q->quantum = psched_mtu(qdisc_dev(sch)); q->perturb_period = 0; + q->perturbation = net_random(); } else { int err = sfq_change(sch, opt); if (err) return err; } - for (i=0; iperturb_timer); + + tcf_destroy_chain(&q->filter_list); + q->perturb_period = 0; + del_timer_sync(&q->perturb_timer); } static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct sfq_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_sfq_qopt opt; opt.quantum = q->quantum; - opt.perturb_period = q->perturb_period/HZ; + opt.perturb_period = q->perturb_period / HZ; opt.limit = q->limit; opt.divisor = SFQ_HASH_DIVISOR; opt.flows = q->limit; - RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); return skb->len; -rtattr_failure: - skb_trim(skb, b - skb->data); +nla_put_failure: + nlmsg_trim(skb, b); return -1; } -static struct Qdisc_ops sfq_qdisc_ops = { - .next = NULL, - .cl_ops = NULL, +static int sfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + struct nlattr **tca, unsigned long *arg) +{ + return -EOPNOTSUPP; +} + +static unsigned long sfq_get(struct Qdisc *sch, u32 classid) +{ + return 0; +} + +static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct sfq_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return &q->filter_list; +} + +static int sfq_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + tcm->tcm_handle |= TC_H_MIN(cl); + return 0; +} + +static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct sfq_sched_data *q = qdisc_priv(sch); + sfq_index idx = q->ht[cl-1]; + struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen }; + struct tc_sfq_xstats xstats = { .allot = q->allot[idx] }; + + if (gnet_stats_copy_queue(d, &qs) < 0) + return -1; + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); +} + +static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct sfq_sched_data *q = qdisc_priv(sch); + unsigned int i; + + if (arg->stop) + return; + + for (i = 0; i < SFQ_HASH_DIVISOR; i++) { + if (q->ht[i] == SFQ_DEPTH || + arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, i + 1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static const struct Qdisc_class_ops sfq_class_ops = { + .get = sfq_get, + .change = sfq_change_class, + .tcf_chain = sfq_find_tcf, + .dump = sfq_dump_class, + .dump_stats = sfq_dump_class_stats, + .walk = sfq_walk, +}; + +static struct Qdisc_ops sfq_qdisc_ops __read_mostly = { + .cl_ops = &sfq_class_ops, .id = "sfq", .priv_size = sizeof(struct sfq_sched_data), .enqueue = sfq_enqueue, .dequeue = sfq_dequeue, - .requeue = sfq_requeue, + .peek = sfq_peek, .drop = sfq_drop, .init = sfq_init, .reset = sfq_reset, @@ -497,7 +587,7 @@ static int __init sfq_module_init(void) { return register_qdisc(&sfq_qdisc_ops); } -static void __exit sfq_module_exit(void) +static void __exit sfq_module_exit(void) { unregister_qdisc(&sfq_qdisc_ops); }