[Cake] [PATCH net-next] net_sched: sch_fq: add dctcp-like marking
Dave Taht
dave at taht.net
Mon Nov 19 13:39:51 EST 2018
Never did find a use for CE_threshold... now...
Eric Dumazet <edumazet at google.com> writes:
> Similar to 80ba92fa1a92 ("codel: add ce_threshold attribute")
>
> After EDT adoption, it became easier to implement DCTCP-like CE marking.
>
> In many cases, queues are not building in the network fabric but on
> the hosts themselves.
>
> If packets leaving fq missed their Earliest Departure Time by XXX usec,
> we mark them with ECN CE. This gives a feedback (after one RTT) to
> the sender to slow down and find better operating mode.
>
> Example :
>
> tc qd replace dev eth0 root fq ce_threshold 2.5ms
>
> Signed-off-by: Eric Dumazet <edumazet at google.com>
> ---
> include/uapi/linux/pkt_sched.h | 3 +++
> net/sched/sch_fq.c | 21 +++++++++++++++++++++
> 2 files changed, 24 insertions(+)
>
> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> index 89ee47c2f17d86fba9a37733b5593680ceefcf00..ee017bc057a3cb390f995329ec8ab5432a844557 100644
> --- a/include/uapi/linux/pkt_sched.h
> +++ b/include/uapi/linux/pkt_sched.h
> @@ -864,6 +864,8 @@ enum {
>
> TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */
>
> + TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */
> +
> __TCA_FQ_MAX
> };
>
> @@ -882,6 +884,7 @@ struct tc_fq_qd_stats {
> __u32 inactive_flows;
> __u32 throttled_flows;
> __u32 unthrottle_latency_ns;
> + __u64 ce_mark; /* packets above ce_threshold */
> };
>
> /* Heavy-Hitter Filter */
> diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
> index 4b1af706896c07e5a0fe6d542dfcd530acdcf8f5..3671eab91107d168062ab73ebb0640d44f94fc95 100644
> --- a/net/sched/sch_fq.c
> +++ b/net/sched/sch_fq.c
> @@ -94,6 +94,7 @@ struct fq_sched_data {
> u32 flow_refill_delay;
> u32 flow_plimit; /* max packets per flow */
> unsigned long flow_max_rate; /* optional max rate per flow */
> + u64 ce_threshold;
> u32 orphan_mask; /* mask for orphaned skb */
> u32 low_rate_threshold;
> struct rb_root *fq_root;
> @@ -107,6 +108,7 @@ struct fq_sched_data {
> u64 stat_gc_flows;
> u64 stat_internal_packets;
> u64 stat_throttled;
> + u64 stat_ce_mark;
> u64 stat_flows_plimit;
> u64 stat_pkts_too_long;
> u64 stat_allocation_errors;
> @@ -454,6 +456,11 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
> fq_flow_set_throttled(q, f);
> goto begin;
> }
> + if (time_next_packet &&
> + (s64)(now - time_next_packet - q->ce_threshold) > 0) {
> + INET_ECN_set_ce(skb);
> + q->stat_ce_mark++;
> + }
> }
>
> skb = fq_dequeue_head(sch, f);
> @@ -650,6 +657,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
> [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
> [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
> [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
> + [TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
> };
>
> static int fq_change(struct Qdisc *sch, struct nlattr *opt,
> @@ -729,6 +737,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
> if (tb[TCA_FQ_ORPHAN_MASK])
> q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
>
> + if (tb[TCA_FQ_CE_THRESHOLD])
> + q->ce_threshold = (u64)NSEC_PER_USEC *
> + nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
> +
> if (!err) {
> sch_tree_unlock(sch);
> err = fq_resize(sch, fq_log);
> @@ -779,6 +791,10 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
> q->fq_trees_log = ilog2(1024);
> q->orphan_mask = 1024 - 1;
> q->low_rate_threshold = 550000 / 8;
> +
> + /* Default ce_threshold of 4294 seconds */
> + q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;
> +
> qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC);
>
> if (opt)
> @@ -792,6 +808,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
> static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
> {
> struct fq_sched_data *q = qdisc_priv(sch);
> + u64 ce_threshold = q->ce_threshold;
> struct nlattr *opts;
>
> opts = nla_nest_start(skb, TCA_OPTIONS);
> @@ -800,6 +817,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
>
> /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
>
> + do_div(ce_threshold, NSEC_PER_USEC);
> +
> if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
> nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
> nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
> @@ -812,6 +831,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
> nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
> nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
> q->low_rate_threshold) ||
> + nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
> nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
> goto nla_put_failure;
>
> @@ -841,6 +861,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
> st.throttled_flows = q->throttled_flows;
> st.unthrottle_latency_ns = min_t(unsigned long,
> q->unthrottle_latency_ns, ~0U);
> + st.ce_mark = q->stat_ce_mark;
> sch_tree_unlock(sch);
>
> return gnet_stats_copy_app(d, &st, sizeof(st));
More information about the Cake
mailing list