[Codel] [PATCH v1 ] sfq: add a Controlled Delay option

Eric Dumazet eric.dumazet at gmail.com
Mon May 7 12:07:25 EDT 2012


On Mon, 2012-05-07 at 15:57 +0200, Eric Dumazet wrote:


>    I plan to add codel to SFQ in a very near future (so that you can
> optionally select RED or Codel for SFQ flows)

Quick and dirty patch, to check if its sane or not.

(dirty because you dont need a new tc binary, this just enables codel by
default, with ECN (cf //FIXME comments)

I am pleased it actually works, with no extra memory need.

Some small changes are needed on codel, so I'll send a V11 to clean the
thing.

 include/net/codel.h   |    9 +--
 net/sched/sch_codel.c |    7 --
 net/sched/sch_sfq.c   |  117 +++++++++++++++++++++++++++++++++-------
 3 files changed, 104 insertions(+), 29 deletions(-)


diff --git a/include/net/codel.h b/include/net/codel.h
index aed7ee9..57aceb8 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -175,7 +175,8 @@ static bool codel_should_drop(struct sk_buff *skb,
 	return drop;
 }
 
-typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars);
+typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars,
+						struct Qdisc *sch);
 
 static struct sk_buff *codel_dequeue(const struct codel_params *params,
 				     struct codel_vars *vars,
@@ -183,7 +184,7 @@ static struct sk_buff *codel_dequeue(const struct codel_params *params,
 				     codel_skb_dequeue_t dequeue_func,
 				     u32 *backlog)
 {
-	struct sk_buff *skb = dequeue_func(vars);
+	struct sk_buff *skb = dequeue_func(vars, stats->sch);
 	codel_time_t now;
 	bool drop;
 
@@ -222,7 +223,7 @@ static struct sk_buff *codel_dequeue(const struct codel_params *params,
 				}
 				qdisc_drop(skb, stats->sch);
 				stats->drop_count++;
-				skb = dequeue_func(vars);
+				skb = dequeue_func(vars, stats->sch);
 				if (!codel_should_drop(skb, backlog,
 						       vars, params, stats, now)) {
 					/* leave dropping state */
@@ -247,7 +248,7 @@ static struct sk_buff *codel_dequeue(const struct codel_params *params,
 			qdisc_drop(skb, stats->sch);
 			stats->drop_count++;
 
-			skb = dequeue_func(vars);
+			skb = dequeue_func(vars, stats->sch);
 			drop = codel_should_drop(skb, backlog, vars, params, stats, now);
 		}
 		vars->dropping = true;
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index fa36dd2..c7d7fdc 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -35,13 +35,8 @@ struct codel_sched_data {
 /* This is the specific function called from codel_dequeue()
  * to dequeue a packet from queue.
  */
-static struct sk_buff *dequeue(struct codel_vars *vars)
+static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
 {
-	struct codel_sched_data *q;
-	struct Qdisc *sch;
-
-	q = container_of(vars, struct codel_sched_data, vars);
-	sch = (struct Qdisc *)((void *)q - QDISC_ALIGN(sizeof(struct Qdisc)));
 	return __skb_dequeue(&sch->q);
 }
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8a99179..d48722c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -25,6 +25,7 @@
 #include <net/pkt_sched.h>
 #include <net/flow_keys.h>
 #include <net/red.h>
+#include <net/codel.h>
 
 
 /*	Stochastic Fairness Queuing algorithm.
@@ -111,7 +112,10 @@ struct sfq_slot {
 	short		allot; /* credit for this slot */
 
 	unsigned int    backlog;
-	struct red_vars vars;
+	union {
+		struct red_vars rvars;
+		struct codel_vars cvars;
+	};
 };
 
 struct sfq_sched_data {
@@ -124,6 +128,7 @@ struct sfq_sched_data {
 	u32		perturbation;
 	u8		cur_depth;	/* depth of longest slot */
 	u8		flags;
+	bool		codel;
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 	struct tcf_proto *filter_list;
 	sfq_index	*ht;		/* Hash table ('divisor' slots) */
@@ -132,7 +137,8 @@ struct sfq_sched_data {
 	struct red_parms *red_parms;
 	struct tc_sfqred_stats stats;
 	struct sfq_slot *tail;		/* current slot in round */
-
+	struct codel_params cparams;
+	struct codel_stats  cstats;
 	struct sfq_head	dep[SFQ_MAX_DEPTH + 1];
 					/* Linked lists of slots, indexed by depth
 					 * dep[0] : list of unused flows
@@ -161,7 +167,8 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
  * q->perturbation, we store flow_keys in skb->cb[]
  */
 struct sfq_skb_cb {
-       struct flow_keys        keys;
+	codel_time_t		enqueue_time; /* MUST be first field */
+	struct flow_keys        keys;
 };
 
 static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
@@ -350,7 +357,7 @@ drop:
 }
 
 /* Is ECN parameter configured */
-static int sfq_prob_mark(const struct sfq_sched_data *q)
+static bool sfq_prob_mark(const struct sfq_sched_data *q)
 {
 	return q->flags & TC_RED_ECN;
 }
@@ -396,16 +403,19 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		slot = &q->slots[x];
 		slot->hash = hash;
 		slot->backlog = 0; /* should already be 0 anyway... */
-		red_set_vars(&slot->vars);
+		if (q->codel)
+			codel_vars_init(&slot->cvars);
+		else
+			red_set_vars(&slot->rvars);
 		goto enqueue;
 	}
 	if (q->red_parms) {
-		slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
-							&slot->vars,
+		slot->rvars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
+							&slot->rvars,
 							slot->backlog);
 		switch (red_action(q->red_parms,
-				   &slot->vars,
-				   slot->vars.qavg)) {
+				   &slot->rvars,
+				   slot->rvars.qavg)) {
 		case RED_DONT_MARK:
 			break;
 
@@ -462,6 +472,8 @@ congestion_drop:
 	}
 
 enqueue:
+	if (q->codel)
+		codel_set_enqueue_time(skb);
 	sch->qstats.backlog += qdisc_pkt_len(skb);
 	slot->backlog += qdisc_pkt_len(skb);
 	slot_queue_add(slot, skb);
@@ -497,6 +509,27 @@ enqueue:
 	return NET_XMIT_SUCCESS;
 }
 
+/* This is the specific function called from codel_dequeue()
+ * to dequeue a packet from queue.
+ * codel already handles slot->backlog changes
+ */
+static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	struct sfq_slot *slot;
+
+	slot = container_of(vars, struct sfq_slot, cvars);
+
+	skb = slot_dequeue_head(slot);
+	sfq_dec(q, slot - q->slots);
+//	slot->backlog -= qdisc_pkt_len(skb);
+	sch->qstats.backlog -= qdisc_pkt_len(skb);
+	sch->q.qlen--;
+	return skb;
+}
+
+
 static struct sk_buff *
 sfq_dequeue(struct Qdisc *sch)
 {
@@ -517,12 +550,28 @@ next_slot:
 		slot->allot += q->scaled_quantum;
 		goto next_slot;
 	}
-	skb = slot_dequeue_head(slot);
-	sfq_dec(q, a);
+	if (q->codel) {
+		skb = codel_dequeue(&q->cparams, &slot->cvars, &q->cstats,
+				    dequeue, &slot->backlog);
+		/* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+		 * or HTB crashes. Defer it for next round.
+		 */
+		if (q->cstats.drop_count && sch->q.qlen) {
+			qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+			q->cstats.drop_count = 0;
+		}
+		if (!skb) {
+			WARN_ON_ONCE(1);
+			return NULL;
+		}
+	} else {
+		skb = slot_dequeue_head(slot);
+		sfq_dec(q, a);
+		slot->backlog -= qdisc_pkt_len(skb);
+		sch->q.qlen--;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
+	}
 	qdisc_bstats_update(sch, skb);
-	sch->q.qlen--;
-	sch->qstats.backlog -= qdisc_pkt_len(skb);
-	slot->backlog -= qdisc_pkt_len(skb);
 	/* Is the slot empty? */
 	if (slot->qlen == 0) {
 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
@@ -574,7 +623,10 @@ static void sfq_rehash(struct Qdisc *sch)
 			__skb_queue_tail(&list, skb);
 		}
 		slot->backlog = 0;
-		red_set_vars(&slot->vars);
+		if (q->codel)
+			codel_vars_init(&slot->cvars);
+		else
+			red_set_vars(&slot->rvars);
 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
 	}
 	q->tail = NULL;
@@ -600,8 +652,8 @@ drop:				sch->qstats.backlog -= qdisc_pkt_len(skb);
 			goto drop;
 		slot_queue_add(slot, skb);
 		if (q->red_parms)
-			slot->vars.qavg = red_calc_qavg(q->red_parms,
-							&slot->vars,
+			slot->rvars.qavg = red_calc_qavg(q->red_parms,
+							&slot->rvars,
 							slot->backlog);
 		slot->backlog += qdisc_pkt_len(skb);
 		sfq_inc(q, x);
@@ -636,17 +688,27 @@ static void sfq_perturbation(unsigned long arg)
 		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
 }
 
+struct tc_sfq_qopt_v2 {
+	struct tc_sfq_qopt_v1 v1;
+	__u32 target;
+	__u32 interval;
+	__u32 minbytes;
+};
+
 static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct tc_sfq_qopt *ctl = nla_data(opt);
 	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
+	struct tc_sfq_qopt_v2 *ctl_v2 = NULL;
 	unsigned int qlen;
 	struct red_parms *p = NULL;
 
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
-	if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
+	if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v2)))
+		ctl_v2 = nla_data(opt);
+	else if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
 		ctl_v1 = nla_data(opt);
 	if (ctl->divisor &&
 	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
@@ -668,7 +730,21 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 		q->divisor = ctl->divisor;
 		q->maxflows = min_t(u32, q->maxflows, q->divisor);
 	}
-	if (ctl_v1) {
+	q->codel = true; // FIXME
+	q->cparams.ecn = true; // FIXME
+	if (ctl_v2) {
+		q->codel = true;
+		if (ctl_v2->target)
+			q->cparams.target = ((u64)ctl_v2->target * NSEC_PER_USEC) >> CODEL_SHIFT;
+		if (ctl_v2->interval)
+			q->cparams.interval = ((u64)ctl_v2->interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+		if (ctl_v2->minbytes)
+			q->cparams.minbytes = ctl_v2->minbytes;
+		q->flags = ctl_v2->v1.flags;
+		q->cparams.ecn = sfq_prob_mark(q);
+		q->headdrop = ctl_v2->v1.headdrop;
+	}
+	if (ctl_v1 && !q->codel) {
 		if (ctl_v1->depth)
 			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
 		if (p) {
@@ -758,6 +834,8 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 	q->perturb_period = 0;
 	q->perturbation = net_random();
+	codel_params_init(&q->cparams, sch);
+	codel_stats_init(&q->cstats, sch);
 
 	if (opt) {
 		int err = sfq_change(sch, opt);
@@ -810,6 +888,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 		opt.max_P	= p->max_P;
 	}
 	memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
+	opt.stats.prob_mark += q->cstats.ecn_mark;
 	opt.flags	= q->flags;
 
 	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))





More information about the Codel mailing list