[Ecn-sane] Fwd: [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample
Rodney W. Grimes
4bone at gndrsh.dnsmgr.net
Sat Sep 25 14:17:15 EDT 2021
> ---------- Forwarded message ---------
> From: Luke Hsiao <luke.w.hsiao at gmail.com>
> Date: Thu, Sep 23, 2021 at 2:20 PM
> Subject: [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample
> To: David Miller <davem at davemloft.net>
> Cc: <netdev at vger.kernel.org>, Yuchung Cheng <ycheng at google.com>,
> Lawrence Brakmo <brakmo at fb.com>, Neal Cardwell <ncardwell at google.com>,
> Eric Dumazet <edumazet at google.com>, Luke Hsiao <lukehsiao at google.com>
>
>
> From: Yuchung Cheng <ycheng at google.com>
>
> In order to track CE marks per rate sample (one round trip), TCP needs a
> per-skb header field to record the tp->delivered_ce count when the skb
> was sent. To make space, we replace the "last_in_flight" field which is
> used exclusively for NV congestion control. The stat needed by NV can be
> alternatively approximated by existing stats tcp_sock delivered and
> mss_cache.
>
> This patch counts the number of packets delivered which have CE marks in
> the rate sample, using similar approach of delivery accounting.
>
> Cc: Lawrence Brakmo <brakmo at fb.com>
> Signed-off-by: Yuchung Cheng <ycheng at google.com>
> Acked-by: Neal Cardwell <ncardwell at google.com>
> Signed-off-by: Eric Dumazet <edumazet at google.com>
> Signed-off-by: Luke Hsiao <lukehsiao at google.com>
> ---
> include/net/tcp.h | 9 ++++++---
> net/ipv4/tcp_input.c | 11 +++++------
> net/ipv4/tcp_output.c | 2 --
> net/ipv4/tcp_rate.c | 6 ++++++
> 4 files changed, 17 insertions(+), 11 deletions(-)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 673c3b01e287..32cf6c01f403 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -874,10 +874,11 @@ struct tcp_skb_cb {
> __u32 ack_seq; /* Sequence number ACK'd */
> union {
> struct {
> +#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
> /* There is space for up to 24 bytes */
> - __u32 in_flight:30,/* Bytes in flight at transmit */
> - is_app_limited:1, /* cwnd not fully used? */
> - unused:1;
> + __u32 is_app_limited:1, /* cwnd not fully used? */
> + delivered_ce:20,
> + unused:11;
> /* pkts S/ACKed so far upon tx of skb, incl retrans: */
> __u32 delivered;
> /* start of send pipeline phase */
> @@ -1029,7 +1030,9 @@ struct ack_sample {
> struct rate_sample {
> u64 prior_mstamp; /* starting timestamp for interval */
> u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
> + u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
> s32 delivered; /* number of packets delivered over interval */
> + s32 delivered_ce; /* number of packets delivered w/ CE marks*/
> long interval_us; /* time for tp->delivered to incr "delivered" */
> u32 snd_interval_us; /* snd interval for delivered packets */
> u32 rcv_interval_us; /* rcv interval for delivered packets */
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 141e85e6422b..53675e284841 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3221,7 +3221,6 @@ static int tcp_clean_rtx_queue(struct sock *sk,
> const struct sk_buff *ack_skb,
> long seq_rtt_us = -1L;
> long ca_rtt_us = -1L;
> u32 pkts_acked = 0;
> - u32 last_in_flight = 0;
> bool rtt_update;
> int flag = 0;
>
> @@ -3257,7 +3256,6 @@ static int tcp_clean_rtx_queue(struct sock *sk,
> const struct sk_buff *ack_skb,
> if (!first_ackt)
> first_ackt = last_ackt;
>
> - last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
> if (before(start_seq, reord))
> reord = start_seq;
> if (!after(scb->end_seq, tp->high_seq))
> @@ -3323,8 +3321,8 @@ static int tcp_clean_rtx_queue(struct sock *sk,
> const struct sk_buff *ack_skb,
> seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
> ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
>
> - if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
> - last_in_flight && !prior_sacked && fully_acked &&
> + if (pkts_acked == 1 && fully_acked && !prior_sacked &&
> + (tp->snd_una - prior_snd_una) < tp->mss_cache &&
> sack->rate->prior_delivered + 1 == tp->delivered &&
> !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
> /* Conservatively mark a delayed ACK. It's typically
> @@ -3381,9 +3379,10 @@ static int tcp_clean_rtx_queue(struct sock *sk,
> const struct sk_buff *ack_skb,
>
> if (icsk->icsk_ca_ops->pkts_acked) {
> struct ack_sample sample = { .pkts_acked = pkts_acked,
> - .rtt_us = sack->rate->rtt_us,
> - .in_flight = last_in_flight };
> + .rtt_us = sack->rate->rtt_us };
>
> + sample.in_flight = tp->mss_cache *
> + (tp->delivered - sack->rate->prior_delivered);
> icsk->icsk_ca_ops->pkts_acked(sk, &sample);
> }
>
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 6d72f3ea48c4..fdc39b4fbbfa 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -1256,8 +1256,6 @@ static int __tcp_transmit_skb(struct sock *sk,
> struct sk_buff *skb,
> tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
> skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
> if (clone_it) {
> - TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
> - - tp->snd_una;
> oskb = skb;
>
> tcp_skb_tsorted_save(oskb) {
> diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
> index 0de693565963..fbab921670cc 100644
> --- a/net/ipv4/tcp_rate.c
> +++ b/net/ipv4/tcp_rate.c
> @@ -65,6 +65,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
> TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp;
> TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp;
> TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
> + TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
> TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
> }
>
> @@ -86,6 +87,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct
> sk_buff *skb,
>
> if (!rs->prior_delivered ||
> after(scb->tx.delivered, rs->prior_delivered)) {
> + rs->prior_delivered_ce = scb->tx.delivered_ce;
> rs->prior_delivered = scb->tx.delivered;
> rs->prior_mstamp = scb->tx.delivered_mstamp;
> rs->is_app_limited = scb->tx.is_app_limited;
> @@ -138,6 +140,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
> }
> rs->delivered = tp->delivered - rs->prior_delivered;
>
> + rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
> + /* delivered_ce occupies less than 32 bits in the skb control block */
> + rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK;
Doesnt the compiler just get this right and truncate the storage
to the 20 bit field to 20 bits? Ie, this is an unneeded operation.
> +
> /* Model sending data and receiving ACKs as separate pipeline phases
> * for a window. Usually the ACK phase is longer, but with ACK
> * compression the send phase can be longer. To be safe we use the
> --
> 2.33.0.685.g46640cef36-goog
>
>
>
> --
> Fixing Starlink's Latencies: https://www.youtube.com/watch?v=c9gLo6Xrwgw
>
> Dave T?ht CEO, TekLibre, LLC
> _______________________________________________
> Ecn-sane mailing list
> Ecn-sane at lists.bufferbloat.net
> https://lists.bufferbloat.net/listinfo/ecn-sane
>
>
--
Rod Grimes rgrimes at freebsd.org
More information about the Ecn-sane
mailing list