From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.taht.net (mail.taht.net [IPv6:2a01:7e00::f03c:91ff:feae:7028]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by lists.bufferbloat.net (Postfix) with ESMTPS id 366EC3BA8E for ; Mon, 19 Nov 2018 13:40:04 -0500 (EST) Received: from dancer.taht.net (unknown [IPv6:2603:3024:1536:86f0:eea8:6bff:fefe:9a2]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by mail.taht.net (Postfix) with ESMTPSA id 21809228A5 for ; Mon, 19 Nov 2018 18:40:02 +0000 (UTC) From: Dave Taht To: cake@lists.bufferbloat.net References: <20181111171131.24115-1-edumazet@google.com> Date: Mon, 19 Nov 2018 10:39:51 -0800 In-Reply-To: <20181111171131.24115-1-edumazet@google.com> (Eric Dumazet's message of "Sun, 11 Nov 2018 09:11:31 -0800") Message-ID: <87in0sewso.fsf@taht.net> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/24.5 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain Subject: Re: [Cake] [PATCH net-next] net_sched: sch_fq: add dctcp-like marking X-BeenThere: cake@lists.bufferbloat.net X-Mailman-Version: 2.1.20 Precedence: list List-Id: Cake - FQ_codel the next generation List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 19 Nov 2018 18:40:04 -0000 Never did find a use for CE_threshold... now... Eric Dumazet writes: > Similar to 80ba92fa1a92 ("codel: add ce_threshold attribute") > > After EDT adoption, it became easier to implement DCTCP-like CE marking. > > In many cases, queues are not building in the network fabric but on > the hosts themselves. > > If packets leaving fq missed their Earliest Departure Time by XXX usec, > we mark them with ECN CE. This gives a feedback (after one RTT) to > the sender to slow down and find better operating mode. > > Example : > > tc qd replace dev eth0 root fq ce_threshold 2.5ms > > Signed-off-by: Eric Dumazet > --- > include/uapi/linux/pkt_sched.h | 3 +++ > net/sched/sch_fq.c | 21 +++++++++++++++++++++ > 2 files changed, 24 insertions(+) > > diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h > index 89ee47c2f17d86fba9a37733b5593680ceefcf00..ee017bc057a3cb390f995329ec8ab5432a844557 100644 > --- a/include/uapi/linux/pkt_sched.h > +++ b/include/uapi/linux/pkt_sched.h > @@ -864,6 +864,8 @@ enum { > > TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ > > + TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ > + > __TCA_FQ_MAX > }; > > @@ -882,6 +884,7 @@ struct tc_fq_qd_stats { > __u32 inactive_flows; > __u32 throttled_flows; > __u32 unthrottle_latency_ns; > + __u64 ce_mark; /* packets above ce_threshold */ > }; > > /* Heavy-Hitter Filter */ > diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c > index 4b1af706896c07e5a0fe6d542dfcd530acdcf8f5..3671eab91107d168062ab73ebb0640d44f94fc95 100644 > --- a/net/sched/sch_fq.c > +++ b/net/sched/sch_fq.c > @@ -94,6 +94,7 @@ struct fq_sched_data { > u32 flow_refill_delay; > u32 flow_plimit; /* max packets per flow */ > unsigned long flow_max_rate; /* optional max rate per flow */ > + u64 ce_threshold; > u32 orphan_mask; /* mask for orphaned skb */ > u32 low_rate_threshold; > struct rb_root *fq_root; > @@ -107,6 +108,7 @@ struct fq_sched_data { > u64 stat_gc_flows; > u64 stat_internal_packets; > u64 stat_throttled; > + u64 stat_ce_mark; > u64 stat_flows_plimit; > u64 stat_pkts_too_long; > u64 stat_allocation_errors; > @@ -454,6 +456,11 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) > fq_flow_set_throttled(q, f); > goto begin; > } > + if (time_next_packet && > + (s64)(now - time_next_packet - q->ce_threshold) > 0) { > + INET_ECN_set_ce(skb); > + q->stat_ce_mark++; > + } > } > > skb = fq_dequeue_head(sch, f); > @@ -650,6 +657,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { > [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, > [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, > [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, > + [TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 }, > }; > > static int fq_change(struct Qdisc *sch, struct nlattr *opt, > @@ -729,6 +737,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, > if (tb[TCA_FQ_ORPHAN_MASK]) > q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]); > > + if (tb[TCA_FQ_CE_THRESHOLD]) > + q->ce_threshold = (u64)NSEC_PER_USEC * > + nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]); > + > if (!err) { > sch_tree_unlock(sch); > err = fq_resize(sch, fq_log); > @@ -779,6 +791,10 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, > q->fq_trees_log = ilog2(1024); > q->orphan_mask = 1024 - 1; > q->low_rate_threshold = 550000 / 8; > + > + /* Default ce_threshold of 4294 seconds */ > + q->ce_threshold = (u64)NSEC_PER_USEC * ~0U; > + > qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC); > > if (opt) > @@ -792,6 +808,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, > static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) > { > struct fq_sched_data *q = qdisc_priv(sch); > + u64 ce_threshold = q->ce_threshold; > struct nlattr *opts; > > opts = nla_nest_start(skb, TCA_OPTIONS); > @@ -800,6 +817,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) > > /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */ > > + do_div(ce_threshold, NSEC_PER_USEC); > + > if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || > nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || > nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || > @@ -812,6 +831,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) > nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || > nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD, > q->low_rate_threshold) || > + nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) || > nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) > goto nla_put_failure; > > @@ -841,6 +861,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) > st.throttled_flows = q->throttled_flows; > st.unthrottle_latency_ns = min_t(unsigned long, > q->unthrottle_latency_ns, ~0U); > + st.ce_mark = q->stat_ce_mark; > sch_tree_unlock(sch); > > return gnet_stats_copy_app(d, &st, sizeof(st));