[Codel] [PATCH v1 ] sfq: add a Controlled Delay option
Eric Dumazet
eric.dumazet at gmail.com
Mon May 7 12:07:25 EDT 2012
On Mon, 2012-05-07 at 15:57 +0200, Eric Dumazet wrote:
> I plan to add codel to SFQ in a very near future (so that you can
> optionally select RED or Codel for SFQ flows)
Quick and dirty patch, to check if its sane or not.
(dirty because you dont need a new tc binary, this just enables codel by
default, with ECN (cf //FIXME comments)
I am pleased it actually works, with no extra memory need.
Some small changes are needed on codel, so I'll send a V11 to clean the
thing.
include/net/codel.h | 9 +--
net/sched/sch_codel.c | 7 --
net/sched/sch_sfq.c | 117 +++++++++++++++++++++++++++++++++-------
3 files changed, 104 insertions(+), 29 deletions(-)
diff --git a/include/net/codel.h b/include/net/codel.h
index aed7ee9..57aceb8 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -175,7 +175,8 @@ static bool codel_should_drop(struct sk_buff *skb,
return drop;
}
-typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars);
+typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars,
+ struct Qdisc *sch);
static struct sk_buff *codel_dequeue(const struct codel_params *params,
struct codel_vars *vars,
@@ -183,7 +184,7 @@ static struct sk_buff *codel_dequeue(const struct codel_params *params,
codel_skb_dequeue_t dequeue_func,
u32 *backlog)
{
- struct sk_buff *skb = dequeue_func(vars);
+ struct sk_buff *skb = dequeue_func(vars, stats->sch);
codel_time_t now;
bool drop;
@@ -222,7 +223,7 @@ static struct sk_buff *codel_dequeue(const struct codel_params *params,
}
qdisc_drop(skb, stats->sch);
stats->drop_count++;
- skb = dequeue_func(vars);
+ skb = dequeue_func(vars, stats->sch);
if (!codel_should_drop(skb, backlog,
vars, params, stats, now)) {
/* leave dropping state */
@@ -247,7 +248,7 @@ static struct sk_buff *codel_dequeue(const struct codel_params *params,
qdisc_drop(skb, stats->sch);
stats->drop_count++;
- skb = dequeue_func(vars);
+ skb = dequeue_func(vars, stats->sch);
drop = codel_should_drop(skb, backlog, vars, params, stats, now);
}
vars->dropping = true;
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index fa36dd2..c7d7fdc 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -35,13 +35,8 @@ struct codel_sched_data {
/* This is the specific function called from codel_dequeue()
* to dequeue a packet from queue.
*/
-static struct sk_buff *dequeue(struct codel_vars *vars)
+static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
{
- struct codel_sched_data *q;
- struct Qdisc *sch;
-
- q = container_of(vars, struct codel_sched_data, vars);
- sch = (struct Qdisc *)((void *)q - QDISC_ALIGN(sizeof(struct Qdisc)));
return __skb_dequeue(&sch->q);
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8a99179..d48722c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -25,6 +25,7 @@
#include <net/pkt_sched.h>
#include <net/flow_keys.h>
#include <net/red.h>
+#include <net/codel.h>
/* Stochastic Fairness Queuing algorithm.
@@ -111,7 +112,10 @@ struct sfq_slot {
short allot; /* credit for this slot */
unsigned int backlog;
- struct red_vars vars;
+ union {
+ struct red_vars rvars;
+ struct codel_vars cvars;
+ };
};
struct sfq_sched_data {
@@ -124,6 +128,7 @@ struct sfq_sched_data {
u32 perturbation;
u8 cur_depth; /* depth of longest slot */
u8 flags;
+ bool codel;
unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
struct tcf_proto *filter_list;
sfq_index *ht; /* Hash table ('divisor' slots) */
@@ -132,7 +137,8 @@ struct sfq_sched_data {
struct red_parms *red_parms;
struct tc_sfqred_stats stats;
struct sfq_slot *tail; /* current slot in round */
-
+ struct codel_params cparams;
+ struct codel_stats cstats;
struct sfq_head dep[SFQ_MAX_DEPTH + 1];
/* Linked lists of slots, indexed by depth
* dep[0] : list of unused flows
@@ -161,7 +167,8 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
* q->perturbation, we store flow_keys in skb->cb[]
*/
struct sfq_skb_cb {
- struct flow_keys keys;
+ codel_time_t enqueue_time; /* MUST be first field */
+ struct flow_keys keys;
};
static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
@@ -350,7 +357,7 @@ drop:
}
/* Is ECN parameter configured */
-static int sfq_prob_mark(const struct sfq_sched_data *q)
+static bool sfq_prob_mark(const struct sfq_sched_data *q)
{
return q->flags & TC_RED_ECN;
}
@@ -396,16 +403,19 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
slot = &q->slots[x];
slot->hash = hash;
slot->backlog = 0; /* should already be 0 anyway... */
- red_set_vars(&slot->vars);
+ if (q->codel)
+ codel_vars_init(&slot->cvars);
+ else
+ red_set_vars(&slot->rvars);
goto enqueue;
}
if (q->red_parms) {
- slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
- &slot->vars,
+ slot->rvars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
+ &slot->rvars,
slot->backlog);
switch (red_action(q->red_parms,
- &slot->vars,
- slot->vars.qavg)) {
+ &slot->rvars,
+ slot->rvars.qavg)) {
case RED_DONT_MARK:
break;
@@ -462,6 +472,8 @@ congestion_drop:
}
enqueue:
+ if (q->codel)
+ codel_set_enqueue_time(skb);
sch->qstats.backlog += qdisc_pkt_len(skb);
slot->backlog += qdisc_pkt_len(skb);
slot_queue_add(slot, skb);
@@ -497,6 +509,27 @@ enqueue:
return NET_XMIT_SUCCESS;
}
+/* This is the specific function called from codel_dequeue()
+ * to dequeue a packet from queue.
+ * codel already handles slot->backlog changes
+ */
+static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
+{
+ struct sfq_sched_data *q = qdisc_priv(sch);
+ struct sk_buff *skb;
+ struct sfq_slot *slot;
+
+ slot = container_of(vars, struct sfq_slot, cvars);
+
+ skb = slot_dequeue_head(slot);
+ sfq_dec(q, slot - q->slots);
+// slot->backlog -= qdisc_pkt_len(skb);
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ sch->q.qlen--;
+ return skb;
+}
+
+
static struct sk_buff *
sfq_dequeue(struct Qdisc *sch)
{
@@ -517,12 +550,28 @@ next_slot:
slot->allot += q->scaled_quantum;
goto next_slot;
}
- skb = slot_dequeue_head(slot);
- sfq_dec(q, a);
+ if (q->codel) {
+ skb = codel_dequeue(&q->cparams, &slot->cvars, &q->cstats,
+ dequeue, &slot->backlog);
+ /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+ * or HTB crashes. Defer it for next round.
+ */
+ if (q->cstats.drop_count && sch->q.qlen) {
+ qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+ q->cstats.drop_count = 0;
+ }
+ if (!skb) {
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+ } else {
+ skb = slot_dequeue_head(slot);
+ sfq_dec(q, a);
+ slot->backlog -= qdisc_pkt_len(skb);
+ sch->q.qlen--;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ }
qdisc_bstats_update(sch, skb);
- sch->q.qlen--;
- sch->qstats.backlog -= qdisc_pkt_len(skb);
- slot->backlog -= qdisc_pkt_len(skb);
/* Is the slot empty? */
if (slot->qlen == 0) {
q->ht[slot->hash] = SFQ_EMPTY_SLOT;
@@ -574,7 +623,10 @@ static void sfq_rehash(struct Qdisc *sch)
__skb_queue_tail(&list, skb);
}
slot->backlog = 0;
- red_set_vars(&slot->vars);
+ if (q->codel)
+ codel_vars_init(&slot->cvars);
+ else
+ red_set_vars(&slot->rvars);
q->ht[slot->hash] = SFQ_EMPTY_SLOT;
}
q->tail = NULL;
@@ -600,8 +652,8 @@ drop: sch->qstats.backlog -= qdisc_pkt_len(skb);
goto drop;
slot_queue_add(slot, skb);
if (q->red_parms)
- slot->vars.qavg = red_calc_qavg(q->red_parms,
- &slot->vars,
+ slot->rvars.qavg = red_calc_qavg(q->red_parms,
+ &slot->rvars,
slot->backlog);
slot->backlog += qdisc_pkt_len(skb);
sfq_inc(q, x);
@@ -636,17 +688,27 @@ static void sfq_perturbation(unsigned long arg)
mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
}
+struct tc_sfq_qopt_v2 {
+ struct tc_sfq_qopt_v1 v1;
+ __u32 target;
+ __u32 interval;
+ __u32 minbytes;
+};
+
static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
{
struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = nla_data(opt);
struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
+ struct tc_sfq_qopt_v2 *ctl_v2 = NULL;
unsigned int qlen;
struct red_parms *p = NULL;
if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
return -EINVAL;
- if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
+ if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v2)))
+ ctl_v2 = nla_data(opt);
+ else if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
ctl_v1 = nla_data(opt);
if (ctl->divisor &&
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
@@ -668,7 +730,21 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
q->divisor = ctl->divisor;
q->maxflows = min_t(u32, q->maxflows, q->divisor);
}
- if (ctl_v1) {
+ q->codel = true; // FIXME
+ q->cparams.ecn = true; // FIXME
+ if (ctl_v2) {
+ q->codel = true;
+ if (ctl_v2->target)
+ q->cparams.target = ((u64)ctl_v2->target * NSEC_PER_USEC) >> CODEL_SHIFT;
+ if (ctl_v2->interval)
+ q->cparams.interval = ((u64)ctl_v2->interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+ if (ctl_v2->minbytes)
+ q->cparams.minbytes = ctl_v2->minbytes;
+ q->flags = ctl_v2->v1.flags;
+ q->cparams.ecn = sfq_prob_mark(q);
+ q->headdrop = ctl_v2->v1.headdrop;
+ }
+ if (ctl_v1 && !q->codel) {
if (ctl_v1->depth)
q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
if (p) {
@@ -758,6 +834,8 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
q->perturb_period = 0;
q->perturbation = net_random();
+ codel_params_init(&q->cparams, sch);
+ codel_stats_init(&q->cstats, sch);
if (opt) {
int err = sfq_change(sch, opt);
@@ -810,6 +888,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.max_P = p->max_P;
}
memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
+ opt.stats.prob_mark += q->cstats.ecn_mark;
opt.flags = q->flags;
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
More information about the Codel
mailing list