From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-ee0-f43.google.com (mail-ee0-f43.google.com [74.125.83.43]) (using TLSv1 with cipher RC4-SHA (128/128 bits)) (Client CN "smtp.gmail.com", Issuer "Google Internet Authority" (verified OK)) by huchra.bufferbloat.net (Postfix) with ESMTPS id 52373201A91 for ; Mon, 7 May 2012 09:07:36 -0700 (PDT) Received: by eekc13 with SMTP id c13so2066564eek.16 for ; Mon, 07 May 2012 09:07:34 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=subject:from:to:cc:in-reply-to:references:content-type:date :message-id:mime-version:x-mailer:content-transfer-encoding; bh=2rZ0JqGf1XCbYOi6mdp0YVjmKe366VaqbAx1+2NlhcM=; b=vmbZXkksJq3/+iRLgCAHA7N/IN022WDZRN8RXju7PZU4iPwBKa9ssYl+llhy88nwpV vYXCxJbtIKlQKasOsb46p/D/dfLauwp73RWjHFfJ5aPcXIHg+2qfwzX/fw+tbZWEAj3H hRffxeH3tKfDJ9aKfHy+DjtypYwmz44qsIMfz13qqHrMHaoO4PsPaJQho2FF09s1uReA ZWkAUJN0h0LsYKt23MR6+sqg8QaYYNMP7dq/kseAWWrlKm/cGzNtPWaa8at4xCsWmZ/E qFLGfa9IhO86x18X6DaH9q+3Gg/bkVg4k0rPdQs49Hl/tmog5SK8jbSUBWxVoc6PUXb0 Jvsg== Received: by 10.14.101.134 with SMTP id b6mr2824824eeg.5.1336406853888; Mon, 07 May 2012 09:07:33 -0700 (PDT) Received: from [172.30.42.18] (122.237.66.86.rev.sfr.net. [86.66.237.122]) by mx.google.com with ESMTPS id y53sm88281032eea.3.2012.05.07.09.07.28 (version=SSLv3 cipher=OTHER); Mon, 07 May 2012 09:07:32 -0700 (PDT) From: Eric Dumazet To: Dave =?ISO-8859-1?Q?T=E4ht?= In-Reply-To: <1336399043.3752.2318.camel@edumazet-glaptop> References: <1336368957-17586-1-git-send-email-dave.taht@bufferbloat.net> <1336399043.3752.2318.camel@edumazet-glaptop> Content-Type: text/plain; charset="UTF-8" Date: Mon, 07 May 2012 18:07:25 +0200 Message-ID: <1336406845.3752.2324.camel@edumazet-glaptop> Mime-Version: 1.0 X-Mailer: Evolution 2.28.3 Content-Transfer-Encoding: 7bit Cc: codel@lists.bufferbloat.net Subject: [Codel] [PATCH v1 ] sfq: add a Controlled Delay option X-BeenThere: codel@lists.bufferbloat.net X-Mailman-Version: 2.1.13 Precedence: list List-Id: CoDel AQM discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 07 May 2012 16:07:37 -0000 On Mon, 2012-05-07 at 15:57 +0200, Eric Dumazet wrote: > I plan to add codel to SFQ in a very near future (so that you can > optionally select RED or Codel for SFQ flows) Quick and dirty patch, to check if its sane or not. (dirty because you dont need a new tc binary, this just enables codel by default, with ECN (cf //FIXME comments) I am pleased it actually works, with no extra memory need. Some small changes are needed on codel, so I'll send a V11 to clean the thing. include/net/codel.h | 9 +-- net/sched/sch_codel.c | 7 -- net/sched/sch_sfq.c | 117 +++++++++++++++++++++++++++++++++------- 3 files changed, 104 insertions(+), 29 deletions(-) diff --git a/include/net/codel.h b/include/net/codel.h index aed7ee9..57aceb8 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -175,7 +175,8 @@ static bool codel_should_drop(struct sk_buff *skb, return drop; } -typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars); +typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars, + struct Qdisc *sch); static struct sk_buff *codel_dequeue(const struct codel_params *params, struct codel_vars *vars, @@ -183,7 +184,7 @@ static struct sk_buff *codel_dequeue(const struct codel_params *params, codel_skb_dequeue_t dequeue_func, u32 *backlog) { - struct sk_buff *skb = dequeue_func(vars); + struct sk_buff *skb = dequeue_func(vars, stats->sch); codel_time_t now; bool drop; @@ -222,7 +223,7 @@ static struct sk_buff *codel_dequeue(const struct codel_params *params, } qdisc_drop(skb, stats->sch); stats->drop_count++; - skb = dequeue_func(vars); + skb = dequeue_func(vars, stats->sch); if (!codel_should_drop(skb, backlog, vars, params, stats, now)) { /* leave dropping state */ @@ -247,7 +248,7 @@ static struct sk_buff *codel_dequeue(const struct codel_params *params, qdisc_drop(skb, stats->sch); stats->drop_count++; - skb = dequeue_func(vars); + skb = dequeue_func(vars, stats->sch); drop = codel_should_drop(skb, backlog, vars, params, stats, now); } vars->dropping = true; diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index fa36dd2..c7d7fdc 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -35,13 +35,8 @@ struct codel_sched_data { /* This is the specific function called from codel_dequeue() * to dequeue a packet from queue. */ -static struct sk_buff *dequeue(struct codel_vars *vars) +static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch) { - struct codel_sched_data *q; - struct Qdisc *sch; - - q = container_of(vars, struct codel_sched_data, vars); - sch = (struct Qdisc *)((void *)q - QDISC_ALIGN(sizeof(struct Qdisc))); return __skb_dequeue(&sch->q); } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 8a99179..d48722c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -25,6 +25,7 @@ #include #include #include +#include /* Stochastic Fairness Queuing algorithm. @@ -111,7 +112,10 @@ struct sfq_slot { short allot; /* credit for this slot */ unsigned int backlog; - struct red_vars vars; + union { + struct red_vars rvars; + struct codel_vars cvars; + }; }; struct sfq_sched_data { @@ -124,6 +128,7 @@ struct sfq_sched_data { u32 perturbation; u8 cur_depth; /* depth of longest slot */ u8 flags; + bool codel; unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ struct tcf_proto *filter_list; sfq_index *ht; /* Hash table ('divisor' slots) */ @@ -132,7 +137,8 @@ struct sfq_sched_data { struct red_parms *red_parms; struct tc_sfqred_stats stats; struct sfq_slot *tail; /* current slot in round */ - + struct codel_params cparams; + struct codel_stats cstats; struct sfq_head dep[SFQ_MAX_DEPTH + 1]; /* Linked lists of slots, indexed by depth * dep[0] : list of unused flows @@ -161,7 +167,8 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index * q->perturbation, we store flow_keys in skb->cb[] */ struct sfq_skb_cb { - struct flow_keys keys; + codel_time_t enqueue_time; /* MUST be first field */ + struct flow_keys keys; }; static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb) @@ -350,7 +357,7 @@ drop: } /* Is ECN parameter configured */ -static int sfq_prob_mark(const struct sfq_sched_data *q) +static bool sfq_prob_mark(const struct sfq_sched_data *q) { return q->flags & TC_RED_ECN; } @@ -396,16 +403,19 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) slot = &q->slots[x]; slot->hash = hash; slot->backlog = 0; /* should already be 0 anyway... */ - red_set_vars(&slot->vars); + if (q->codel) + codel_vars_init(&slot->cvars); + else + red_set_vars(&slot->rvars); goto enqueue; } if (q->red_parms) { - slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms, - &slot->vars, + slot->rvars.qavg = red_calc_qavg_no_idle_time(q->red_parms, + &slot->rvars, slot->backlog); switch (red_action(q->red_parms, - &slot->vars, - slot->vars.qavg)) { + &slot->rvars, + slot->rvars.qavg)) { case RED_DONT_MARK: break; @@ -462,6 +472,8 @@ congestion_drop: } enqueue: + if (q->codel) + codel_set_enqueue_time(skb); sch->qstats.backlog += qdisc_pkt_len(skb); slot->backlog += qdisc_pkt_len(skb); slot_queue_add(slot, skb); @@ -497,6 +509,27 @@ enqueue: return NET_XMIT_SUCCESS; } +/* This is the specific function called from codel_dequeue() + * to dequeue a packet from queue. + * codel already handles slot->backlog changes + */ +static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch) +{ + struct sfq_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb; + struct sfq_slot *slot; + + slot = container_of(vars, struct sfq_slot, cvars); + + skb = slot_dequeue_head(slot); + sfq_dec(q, slot - q->slots); +// slot->backlog -= qdisc_pkt_len(skb); + sch->qstats.backlog -= qdisc_pkt_len(skb); + sch->q.qlen--; + return skb; +} + + static struct sk_buff * sfq_dequeue(struct Qdisc *sch) { @@ -517,12 +550,28 @@ next_slot: slot->allot += q->scaled_quantum; goto next_slot; } - skb = slot_dequeue_head(slot); - sfq_dec(q, a); + if (q->codel) { + skb = codel_dequeue(&q->cparams, &slot->cvars, &q->cstats, + dequeue, &slot->backlog); + /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, + * or HTB crashes. Defer it for next round. + */ + if (q->cstats.drop_count && sch->q.qlen) { + qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); + q->cstats.drop_count = 0; + } + if (!skb) { + WARN_ON_ONCE(1); + return NULL; + } + } else { + skb = slot_dequeue_head(slot); + sfq_dec(q, a); + slot->backlog -= qdisc_pkt_len(skb); + sch->q.qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); + } qdisc_bstats_update(sch, skb); - sch->q.qlen--; - sch->qstats.backlog -= qdisc_pkt_len(skb); - slot->backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ if (slot->qlen == 0) { q->ht[slot->hash] = SFQ_EMPTY_SLOT; @@ -574,7 +623,10 @@ static void sfq_rehash(struct Qdisc *sch) __skb_queue_tail(&list, skb); } slot->backlog = 0; - red_set_vars(&slot->vars); + if (q->codel) + codel_vars_init(&slot->cvars); + else + red_set_vars(&slot->rvars); q->ht[slot->hash] = SFQ_EMPTY_SLOT; } q->tail = NULL; @@ -600,8 +652,8 @@ drop: sch->qstats.backlog -= qdisc_pkt_len(skb); goto drop; slot_queue_add(slot, skb); if (q->red_parms) - slot->vars.qavg = red_calc_qavg(q->red_parms, - &slot->vars, + slot->rvars.qavg = red_calc_qavg(q->red_parms, + &slot->rvars, slot->backlog); slot->backlog += qdisc_pkt_len(skb); sfq_inc(q, x); @@ -636,17 +688,27 @@ static void sfq_perturbation(unsigned long arg) mod_timer(&q->perturb_timer, jiffies + q->perturb_period); } +struct tc_sfq_qopt_v2 { + struct tc_sfq_qopt_v1 v1; + __u32 target; + __u32 interval; + __u32 minbytes; +}; + static int sfq_change(struct Qdisc *sch, struct nlattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); struct tc_sfq_qopt *ctl = nla_data(opt); struct tc_sfq_qopt_v1 *ctl_v1 = NULL; + struct tc_sfq_qopt_v2 *ctl_v2 = NULL; unsigned int qlen; struct red_parms *p = NULL; if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; - if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1))) + if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v2))) + ctl_v2 = nla_data(opt); + else if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1))) ctl_v1 = nla_data(opt); if (ctl->divisor && (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) @@ -668,7 +730,21 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) q->divisor = ctl->divisor; q->maxflows = min_t(u32, q->maxflows, q->divisor); } - if (ctl_v1) { + q->codel = true; // FIXME + q->cparams.ecn = true; // FIXME + if (ctl_v2) { + q->codel = true; + if (ctl_v2->target) + q->cparams.target = ((u64)ctl_v2->target * NSEC_PER_USEC) >> CODEL_SHIFT; + if (ctl_v2->interval) + q->cparams.interval = ((u64)ctl_v2->interval * NSEC_PER_USEC) >> CODEL_SHIFT; + if (ctl_v2->minbytes) + q->cparams.minbytes = ctl_v2->minbytes; + q->flags = ctl_v2->v1.flags; + q->cparams.ecn = sfq_prob_mark(q); + q->headdrop = ctl_v2->v1.headdrop; + } + if (ctl_v1 && !q->codel) { if (ctl_v1->depth) q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); if (p) { @@ -758,6 +834,8 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = 0; q->perturbation = net_random(); + codel_params_init(&q->cparams, sch); + codel_stats_init(&q->cstats, sch); if (opt) { int err = sfq_change(sch, opt); @@ -810,6 +888,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) opt.max_P = p->max_P; } memcpy(&opt.stats, &q->stats, sizeof(opt.stats)); + opt.stats.prob_mark += q->cstats.ecn_mark; opt.flags = q->flags; if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))