[Cake] [PATCH net-next] net_sched: sch_fq: add dctcp-like marking

Dave Taht dave at taht.net
Mon Nov 19 13:39:51 EST 2018


Never did find a use for CE_threshold... now...

Eric Dumazet <edumazet at google.com> writes:

> Similar to 80ba92fa1a92 ("codel: add ce_threshold attribute")
>
> After EDT adoption, it became easier to implement DCTCP-like CE marking.
>
> In many cases, queues are not building in the network fabric but on
> the hosts themselves.
>
> If packets leaving fq missed their Earliest Departure Time by XXX usec,
> we mark them with ECN CE. This gives a feedback (after one RTT) to
> the sender to slow down and find better operating mode.
>
> Example :
>
> tc qd replace dev eth0 root fq ce_threshold 2.5ms
>
> Signed-off-by: Eric Dumazet <edumazet at google.com>
> ---
>  include/uapi/linux/pkt_sched.h |  3 +++
>  net/sched/sch_fq.c             | 21 +++++++++++++++++++++
>  2 files changed, 24 insertions(+)
>
> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> index 89ee47c2f17d86fba9a37733b5593680ceefcf00..ee017bc057a3cb390f995329ec8ab5432a844557 100644
> --- a/include/uapi/linux/pkt_sched.h
> +++ b/include/uapi/linux/pkt_sched.h
> @@ -864,6 +864,8 @@ enum {
>  
>  	TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */
>  
> +	TCA_FQ_CE_THRESHOLD,	/* DCTCP-like CE-marking threshold */
> +
>  	__TCA_FQ_MAX
>  };
>  
> @@ -882,6 +884,7 @@ struct tc_fq_qd_stats {
>  	__u32	inactive_flows;
>  	__u32	throttled_flows;
>  	__u32	unthrottle_latency_ns;
> +	__u64	ce_mark;		/* packets above ce_threshold */
>  };
>  
>  /* Heavy-Hitter Filter */
> diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
> index 4b1af706896c07e5a0fe6d542dfcd530acdcf8f5..3671eab91107d168062ab73ebb0640d44f94fc95 100644
> --- a/net/sched/sch_fq.c
> +++ b/net/sched/sch_fq.c
> @@ -94,6 +94,7 @@ struct fq_sched_data {
>  	u32		flow_refill_delay;
>  	u32		flow_plimit;	/* max packets per flow */
>  	unsigned long	flow_max_rate;	/* optional max rate per flow */
> +	u64		ce_threshold;
>  	u32		orphan_mask;	/* mask for orphaned skb */
>  	u32		low_rate_threshold;
>  	struct rb_root	*fq_root;
> @@ -107,6 +108,7 @@ struct fq_sched_data {
>  	u64		stat_gc_flows;
>  	u64		stat_internal_packets;
>  	u64		stat_throttled;
> +	u64		stat_ce_mark;
>  	u64		stat_flows_plimit;
>  	u64		stat_pkts_too_long;
>  	u64		stat_allocation_errors;
> @@ -454,6 +456,11 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
>  			fq_flow_set_throttled(q, f);
>  			goto begin;
>  		}
> +		if (time_next_packet &&
> +		    (s64)(now - time_next_packet - q->ce_threshold) > 0) {
> +			INET_ECN_set_ce(skb);
> +			q->stat_ce_mark++;
> +		}
>  	}
>  
>  	skb = fq_dequeue_head(sch, f);
> @@ -650,6 +657,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
>  	[TCA_FQ_BUCKETS_LOG]		= { .type = NLA_U32 },
>  	[TCA_FQ_FLOW_REFILL_DELAY]	= { .type = NLA_U32 },
>  	[TCA_FQ_LOW_RATE_THRESHOLD]	= { .type = NLA_U32 },
> +	[TCA_FQ_CE_THRESHOLD]		= { .type = NLA_U32 },
>  };
>  
>  static int fq_change(struct Qdisc *sch, struct nlattr *opt,
> @@ -729,6 +737,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
>  	if (tb[TCA_FQ_ORPHAN_MASK])
>  		q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
>  
> +	if (tb[TCA_FQ_CE_THRESHOLD])
> +		q->ce_threshold = (u64)NSEC_PER_USEC *
> +				  nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
> +
>  	if (!err) {
>  		sch_tree_unlock(sch);
>  		err = fq_resize(sch, fq_log);
> @@ -779,6 +791,10 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
>  	q->fq_trees_log		= ilog2(1024);
>  	q->orphan_mask		= 1024 - 1;
>  	q->low_rate_threshold	= 550000 / 8;
> +
> +	/* Default ce_threshold of 4294 seconds */
> +	q->ce_threshold		= (u64)NSEC_PER_USEC * ~0U;
> +
>  	qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC);
>  
>  	if (opt)
> @@ -792,6 +808,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
>  static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
>  {
>  	struct fq_sched_data *q = qdisc_priv(sch);
> +	u64 ce_threshold = q->ce_threshold;
>  	struct nlattr *opts;
>  
>  	opts = nla_nest_start(skb, TCA_OPTIONS);
> @@ -800,6 +817,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
>  
>  	/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
>  
> +	do_div(ce_threshold, NSEC_PER_USEC);
> +
>  	if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
>  	    nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
>  	    nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
> @@ -812,6 +831,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
>  	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
>  	    nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
>  			q->low_rate_threshold) ||
> +	    nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
>  	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
>  		goto nla_put_failure;
>  
> @@ -841,6 +861,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
>  	st.throttled_flows	  = q->throttled_flows;
>  	st.unthrottle_latency_ns  = min_t(unsigned long,
>  					  q->unthrottle_latency_ns, ~0U);
> +	st.ce_mark		  = q->stat_ce_mark;
>  	sch_tree_unlock(sch);
>  
>  	return gnet_stats_copy_app(d, &st, sizeof(st));


More information about the Cake mailing list