From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <eric.dumazet@gmail.com>
Received: from mail-ee0-f43.google.com (mail-ee0-f43.google.com [74.125.83.43])
	(using TLSv1 with cipher RC4-SHA (128/128 bits))
	(Client CN "smtp.gmail.com",
	Issuer "Google Internet Authority" (verified OK))
	by huchra.bufferbloat.net (Postfix) with ESMTPS id 52373201A91
	for <codel@lists.bufferbloat.net>; Mon,  7 May 2012 09:07:36 -0700 (PDT)
Received: by eekc13 with SMTP id c13so2066564eek.16
	for <codel@lists.bufferbloat.net>; Mon, 07 May 2012 09:07:34 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113;
	h=subject:from:to:cc:in-reply-to:references:content-type:date
	:message-id:mime-version:x-mailer:content-transfer-encoding;
	bh=2rZ0JqGf1XCbYOi6mdp0YVjmKe366VaqbAx1+2NlhcM=;
	b=vmbZXkksJq3/+iRLgCAHA7N/IN022WDZRN8RXju7PZU4iPwBKa9ssYl+llhy88nwpV
	vYXCxJbtIKlQKasOsb46p/D/dfLauwp73RWjHFfJ5aPcXIHg+2qfwzX/fw+tbZWEAj3H
	hRffxeH3tKfDJ9aKfHy+DjtypYwmz44qsIMfz13qqHrMHaoO4PsPaJQho2FF09s1uReA
	ZWkAUJN0h0LsYKt23MR6+sqg8QaYYNMP7dq/kseAWWrlKm/cGzNtPWaa8at4xCsWmZ/E
	qFLGfa9IhO86x18X6DaH9q+3Gg/bkVg4k0rPdQs49Hl/tmog5SK8jbSUBWxVoc6PUXb0
	Jvsg==
Received: by 10.14.101.134 with SMTP id b6mr2824824eeg.5.1336406853888;
	Mon, 07 May 2012 09:07:33 -0700 (PDT)
Received: from [172.30.42.18] (122.237.66.86.rev.sfr.net. [86.66.237.122])
	by mx.google.com with ESMTPS id y53sm88281032eea.3.2012.05.07.09.07.28
	(version=SSLv3 cipher=OTHER); Mon, 07 May 2012 09:07:32 -0700 (PDT)
From: Eric Dumazet <eric.dumazet@gmail.com>
To: Dave =?ISO-8859-1?Q?T=E4ht?= <dave.taht@bufferbloat.net>
In-Reply-To: <1336399043.3752.2318.camel@edumazet-glaptop>
References: <1336368957-17586-1-git-send-email-dave.taht@bufferbloat.net>
	<1336399043.3752.2318.camel@edumazet-glaptop>
Content-Type: text/plain; charset="UTF-8"
Date: Mon, 07 May 2012 18:07:25 +0200
Message-ID: <1336406845.3752.2324.camel@edumazet-glaptop>
Mime-Version: 1.0
X-Mailer: Evolution 2.28.3 
Content-Transfer-Encoding: 7bit
Cc: codel@lists.bufferbloat.net
Subject: [Codel] [PATCH v1 ] sfq: add a Controlled Delay option
X-BeenThere: codel@lists.bufferbloat.net
X-Mailman-Version: 2.1.13
Precedence: list
List-Id: CoDel AQM discussions <codel.lists.bufferbloat.net>
List-Unsubscribe: <https://lists.bufferbloat.net/options/codel>,
	<mailto:codel-request@lists.bufferbloat.net?subject=unsubscribe>
List-Archive: <https://lists.bufferbloat.net/private/codel>
List-Post: <mailto:codel@lists.bufferbloat.net>
List-Help: <mailto:codel-request@lists.bufferbloat.net?subject=help>
List-Subscribe: <https://lists.bufferbloat.net/listinfo/codel>,
	<mailto:codel-request@lists.bufferbloat.net?subject=subscribe>
X-List-Received-Date: Mon, 07 May 2012 16:07:37 -0000

On Mon, 2012-05-07 at 15:57 +0200, Eric Dumazet wrote:


>    I plan to add codel to SFQ in a very near future (so that you can
> optionally select RED or Codel for SFQ flows)

Quick and dirty patch, to check if its sane or not.

(dirty because you dont need a new tc binary, this just enables codel by
default, with ECN (cf //FIXME comments)

I am pleased it actually works, with no extra memory need.

Some small changes are needed on codel, so I'll send a V11 to clean the
thing.

 include/net/codel.h   |    9 +--
 net/sched/sch_codel.c |    7 --
 net/sched/sch_sfq.c   |  117 +++++++++++++++++++++++++++++++++-------
 3 files changed, 104 insertions(+), 29 deletions(-)


diff --git a/include/net/codel.h b/include/net/codel.h
index aed7ee9..57aceb8 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -175,7 +175,8 @@ static bool codel_should_drop(struct sk_buff *skb,
 	return drop;
 }
 
-typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars);
+typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars,
+						struct Qdisc *sch);
 
 static struct sk_buff *codel_dequeue(const struct codel_params *params,
 				     struct codel_vars *vars,
@@ -183,7 +184,7 @@ static struct sk_buff *codel_dequeue(const struct codel_params *params,
 				     codel_skb_dequeue_t dequeue_func,
 				     u32 *backlog)
 {
-	struct sk_buff *skb = dequeue_func(vars);
+	struct sk_buff *skb = dequeue_func(vars, stats->sch);
 	codel_time_t now;
 	bool drop;
 
@@ -222,7 +223,7 @@ static struct sk_buff *codel_dequeue(const struct codel_params *params,
 				}
 				qdisc_drop(skb, stats->sch);
 				stats->drop_count++;
-				skb = dequeue_func(vars);
+				skb = dequeue_func(vars, stats->sch);
 				if (!codel_should_drop(skb, backlog,
 						       vars, params, stats, now)) {
 					/* leave dropping state */
@@ -247,7 +248,7 @@ static struct sk_buff *codel_dequeue(const struct codel_params *params,
 			qdisc_drop(skb, stats->sch);
 			stats->drop_count++;
 
-			skb = dequeue_func(vars);
+			skb = dequeue_func(vars, stats->sch);
 			drop = codel_should_drop(skb, backlog, vars, params, stats, now);
 		}
 		vars->dropping = true;
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index fa36dd2..c7d7fdc 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -35,13 +35,8 @@ struct codel_sched_data {
 /* This is the specific function called from codel_dequeue()
  * to dequeue a packet from queue.
  */
-static struct sk_buff *dequeue(struct codel_vars *vars)
+static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
 {
-	struct codel_sched_data *q;
-	struct Qdisc *sch;
-
-	q = container_of(vars, struct codel_sched_data, vars);
-	sch = (struct Qdisc *)((void *)q - QDISC_ALIGN(sizeof(struct Qdisc)));
 	return __skb_dequeue(&sch->q);
 }
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8a99179..d48722c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -25,6 +25,7 @@
 #include <net/pkt_sched.h>
 #include <net/flow_keys.h>
 #include <net/red.h>
+#include <net/codel.h>
 
 
 /*	Stochastic Fairness Queuing algorithm.
@@ -111,7 +112,10 @@ struct sfq_slot {
 	short		allot; /* credit for this slot */
 
 	unsigned int    backlog;
-	struct red_vars vars;
+	union {
+		struct red_vars rvars;
+		struct codel_vars cvars;
+	};
 };
 
 struct sfq_sched_data {
@@ -124,6 +128,7 @@ struct sfq_sched_data {
 	u32		perturbation;
 	u8		cur_depth;	/* depth of longest slot */
 	u8		flags;
+	bool		codel;
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 	struct tcf_proto *filter_list;
 	sfq_index	*ht;		/* Hash table ('divisor' slots) */
@@ -132,7 +137,8 @@ struct sfq_sched_data {
 	struct red_parms *red_parms;
 	struct tc_sfqred_stats stats;
 	struct sfq_slot *tail;		/* current slot in round */
-
+	struct codel_params cparams;
+	struct codel_stats  cstats;
 	struct sfq_head	dep[SFQ_MAX_DEPTH + 1];
 					/* Linked lists of slots, indexed by depth
 					 * dep[0] : list of unused flows
@@ -161,7 +167,8 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
  * q->perturbation, we store flow_keys in skb->cb[]
  */
 struct sfq_skb_cb {
-       struct flow_keys        keys;
+	codel_time_t		enqueue_time; /* MUST be first field */
+	struct flow_keys        keys;
 };
 
 static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
@@ -350,7 +357,7 @@ drop:
 }
 
 /* Is ECN parameter configured */
-static int sfq_prob_mark(const struct sfq_sched_data *q)
+static bool sfq_prob_mark(const struct sfq_sched_data *q)
 {
 	return q->flags & TC_RED_ECN;
 }
@@ -396,16 +403,19 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		slot = &q->slots[x];
 		slot->hash = hash;
 		slot->backlog = 0; /* should already be 0 anyway... */
-		red_set_vars(&slot->vars);
+		if (q->codel)
+			codel_vars_init(&slot->cvars);
+		else
+			red_set_vars(&slot->rvars);
 		goto enqueue;
 	}
 	if (q->red_parms) {
-		slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
-							&slot->vars,
+		slot->rvars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
+							&slot->rvars,
 							slot->backlog);
 		switch (red_action(q->red_parms,
-				   &slot->vars,
-				   slot->vars.qavg)) {
+				   &slot->rvars,
+				   slot->rvars.qavg)) {
 		case RED_DONT_MARK:
 			break;
 
@@ -462,6 +472,8 @@ congestion_drop:
 	}
 
 enqueue:
+	if (q->codel)
+		codel_set_enqueue_time(skb);
 	sch->qstats.backlog += qdisc_pkt_len(skb);
 	slot->backlog += qdisc_pkt_len(skb);
 	slot_queue_add(slot, skb);
@@ -497,6 +509,27 @@ enqueue:
 	return NET_XMIT_SUCCESS;
 }
 
+/* This is the specific function called from codel_dequeue()
+ * to dequeue a packet from queue.
+ * codel already handles slot->backlog changes
+ */
+static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	struct sfq_slot *slot;
+
+	slot = container_of(vars, struct sfq_slot, cvars);
+
+	skb = slot_dequeue_head(slot);
+	sfq_dec(q, slot - q->slots);
+//	slot->backlog -= qdisc_pkt_len(skb);
+	sch->qstats.backlog -= qdisc_pkt_len(skb);
+	sch->q.qlen--;
+	return skb;
+}
+
+
 static struct sk_buff *
 sfq_dequeue(struct Qdisc *sch)
 {
@@ -517,12 +550,28 @@ next_slot:
 		slot->allot += q->scaled_quantum;
 		goto next_slot;
 	}
-	skb = slot_dequeue_head(slot);
-	sfq_dec(q, a);
+	if (q->codel) {
+		skb = codel_dequeue(&q->cparams, &slot->cvars, &q->cstats,
+				    dequeue, &slot->backlog);
+		/* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+		 * or HTB crashes. Defer it for next round.
+		 */
+		if (q->cstats.drop_count && sch->q.qlen) {
+			qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+			q->cstats.drop_count = 0;
+		}
+		if (!skb) {
+			WARN_ON_ONCE(1);
+			return NULL;
+		}
+	} else {
+		skb = slot_dequeue_head(slot);
+		sfq_dec(q, a);
+		slot->backlog -= qdisc_pkt_len(skb);
+		sch->q.qlen--;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
+	}
 	qdisc_bstats_update(sch, skb);
-	sch->q.qlen--;
-	sch->qstats.backlog -= qdisc_pkt_len(skb);
-	slot->backlog -= qdisc_pkt_len(skb);
 	/* Is the slot empty? */
 	if (slot->qlen == 0) {
 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
@@ -574,7 +623,10 @@ static void sfq_rehash(struct Qdisc *sch)
 			__skb_queue_tail(&list, skb);
 		}
 		slot->backlog = 0;
-		red_set_vars(&slot->vars);
+		if (q->codel)
+			codel_vars_init(&slot->cvars);
+		else
+			red_set_vars(&slot->rvars);
 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
 	}
 	q->tail = NULL;
@@ -600,8 +652,8 @@ drop:				sch->qstats.backlog -= qdisc_pkt_len(skb);
 			goto drop;
 		slot_queue_add(slot, skb);
 		if (q->red_parms)
-			slot->vars.qavg = red_calc_qavg(q->red_parms,
-							&slot->vars,
+			slot->rvars.qavg = red_calc_qavg(q->red_parms,
+							&slot->rvars,
 							slot->backlog);
 		slot->backlog += qdisc_pkt_len(skb);
 		sfq_inc(q, x);
@@ -636,17 +688,27 @@ static void sfq_perturbation(unsigned long arg)
 		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
 }
 
+struct tc_sfq_qopt_v2 {
+	struct tc_sfq_qopt_v1 v1;
+	__u32 target;
+	__u32 interval;
+	__u32 minbytes;
+};
+
 static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct tc_sfq_qopt *ctl = nla_data(opt);
 	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
+	struct tc_sfq_qopt_v2 *ctl_v2 = NULL;
 	unsigned int qlen;
 	struct red_parms *p = NULL;
 
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
-	if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
+	if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v2)))
+		ctl_v2 = nla_data(opt);
+	else if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
 		ctl_v1 = nla_data(opt);
 	if (ctl->divisor &&
 	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
@@ -668,7 +730,21 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 		q->divisor = ctl->divisor;
 		q->maxflows = min_t(u32, q->maxflows, q->divisor);
 	}
-	if (ctl_v1) {
+	q->codel = true; // FIXME
+	q->cparams.ecn = true; // FIXME
+	if (ctl_v2) {
+		q->codel = true;
+		if (ctl_v2->target)
+			q->cparams.target = ((u64)ctl_v2->target * NSEC_PER_USEC) >> CODEL_SHIFT;
+		if (ctl_v2->interval)
+			q->cparams.interval = ((u64)ctl_v2->interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+		if (ctl_v2->minbytes)
+			q->cparams.minbytes = ctl_v2->minbytes;
+		q->flags = ctl_v2->v1.flags;
+		q->cparams.ecn = sfq_prob_mark(q);
+		q->headdrop = ctl_v2->v1.headdrop;
+	}
+	if (ctl_v1 && !q->codel) {
 		if (ctl_v1->depth)
 			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
 		if (p) {
@@ -758,6 +834,8 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 	q->perturb_period = 0;
 	q->perturbation = net_random();
+	codel_params_init(&q->cparams, sch);
+	codel_stats_init(&q->cstats, sch);
 
 	if (opt) {
 		int err = sfq_change(sch, opt);
@@ -810,6 +888,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 		opt.max_P	= p->max_P;
 	}
 	memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
+	opt.stats.prob_mark += q->cstats.ecn_mark;
 	opt.flags	= q->flags;
 
 	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))