[Ecn-sane] Fwd: [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample

Dave Taht dave.taht at gmail.com
Fri Sep 24 21:10:24 EDT 2021


---------- Forwarded message ---------
From: Luke Hsiao <luke.w.hsiao at gmail.com>
Date: Thu, Sep 23, 2021 at 2:20 PM
Subject: [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample
To: David Miller <davem at davemloft.net>
Cc: <netdev at vger.kernel.org>, Yuchung Cheng <ycheng at google.com>,
Lawrence Brakmo <brakmo at fb.com>, Neal Cardwell <ncardwell at google.com>,
Eric Dumazet <edumazet at google.com>, Luke Hsiao <lukehsiao at google.com>


From: Yuchung Cheng <ycheng at google.com>

In order to track CE marks per rate sample (one round trip), TCP needs a
per-skb header field to record the tp->delivered_ce count when the skb
was sent. To make space, we replace the "last_in_flight" field which is
used exclusively for NV congestion control. The stat needed by NV can be
alternatively approximated by existing stats tcp_sock delivered and
mss_cache.

This patch counts the number of packets delivered which have CE marks in
the rate sample, using similar approach of delivery accounting.

Cc: Lawrence Brakmo <brakmo at fb.com>
Signed-off-by: Yuchung Cheng <ycheng at google.com>
Acked-by: Neal Cardwell <ncardwell at google.com>
Signed-off-by: Eric Dumazet <edumazet at google.com>
Signed-off-by: Luke Hsiao <lukehsiao at google.com>
---
 include/net/tcp.h     |  9 ++++++---
 net/ipv4/tcp_input.c  | 11 +++++------
 net/ipv4/tcp_output.c |  2 --
 net/ipv4/tcp_rate.c   |  6 ++++++
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 673c3b01e287..32cf6c01f403 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -874,10 +874,11 @@ struct tcp_skb_cb {
        __u32           ack_seq;        /* Sequence number ACK'd        */
        union {
                struct {
+#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
                        /* There is space for up to 24 bytes */
-                       __u32 in_flight:30,/* Bytes in flight at transmit */
-                             is_app_limited:1, /* cwnd not fully used? */
-                             unused:1;
+                       __u32 is_app_limited:1, /* cwnd not fully used? */
+                             delivered_ce:20,
+                             unused:11;
                        /* pkts S/ACKed so far upon tx of skb, incl retrans: */
                        __u32 delivered;
                        /* start of send pipeline phase */
@@ -1029,7 +1030,9 @@ struct ack_sample {
 struct rate_sample {
        u64  prior_mstamp; /* starting timestamp for interval */
        u32  prior_delivered;   /* tp->delivered at "prior_mstamp" */
+       u32  prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
        s32  delivered;         /* number of packets delivered over interval */
+       s32  delivered_ce;      /* number of packets delivered w/ CE marks*/
        long interval_us;       /* time for tp->delivered to incr "delivered" */
        u32 snd_interval_us;    /* snd interval for delivered packets */
        u32 rcv_interval_us;    /* rcv interval for delivered packets */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 141e85e6422b..53675e284841 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3221,7 +3221,6 @@ static int tcp_clean_rtx_queue(struct sock *sk,
const struct sk_buff *ack_skb,
        long seq_rtt_us = -1L;
        long ca_rtt_us = -1L;
        u32 pkts_acked = 0;
-       u32 last_in_flight = 0;
        bool rtt_update;
        int flag = 0;

@@ -3257,7 +3256,6 @@ static int tcp_clean_rtx_queue(struct sock *sk,
const struct sk_buff *ack_skb,
                        if (!first_ackt)
                                first_ackt = last_ackt;

-                       last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
                        if (before(start_seq, reord))
                                reord = start_seq;
                        if (!after(scb->end_seq, tp->high_seq))
@@ -3323,8 +3321,8 @@ static int tcp_clean_rtx_queue(struct sock *sk,
const struct sk_buff *ack_skb,
                seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
                ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);

-               if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
-                   last_in_flight && !prior_sacked && fully_acked &&
+               if (pkts_acked == 1 && fully_acked && !prior_sacked &&
+                   (tp->snd_una - prior_snd_una) < tp->mss_cache &&
                    sack->rate->prior_delivered + 1 == tp->delivered &&
                    !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
                        /* Conservatively mark a delayed ACK. It's typically
@@ -3381,9 +3379,10 @@ static int tcp_clean_rtx_queue(struct sock *sk,
const struct sk_buff *ack_skb,

        if (icsk->icsk_ca_ops->pkts_acked) {
                struct ack_sample sample = { .pkts_acked = pkts_acked,
-                                            .rtt_us = sack->rate->rtt_us,
-                                            .in_flight = last_in_flight };
+                                            .rtt_us = sack->rate->rtt_us };

+               sample.in_flight = tp->mss_cache *
+                       (tp->delivered - sack->rate->prior_delivered);
                icsk->icsk_ca_ops->pkts_acked(sk, &sample);
        }

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6d72f3ea48c4..fdc39b4fbbfa 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1256,8 +1256,6 @@ static int __tcp_transmit_skb(struct sock *sk,
struct sk_buff *skb,
        tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
        skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
        if (clone_it) {
-               TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
-                       - tp->snd_una;
                oskb = skb;

                tcp_skb_tsorted_save(oskb) {
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 0de693565963..fbab921670cc 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -65,6 +65,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
        TCP_SKB_CB(skb)->tx.first_tx_mstamp     = tp->first_tx_mstamp;
        TCP_SKB_CB(skb)->tx.delivered_mstamp    = tp->delivered_mstamp;
        TCP_SKB_CB(skb)->tx.delivered           = tp->delivered;
+       TCP_SKB_CB(skb)->tx.delivered_ce        = tp->delivered_ce;
        TCP_SKB_CB(skb)->tx.is_app_limited      = tp->app_limited ? 1 : 0;
 }

@@ -86,6 +87,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct
sk_buff *skb,

        if (!rs->prior_delivered ||
            after(scb->tx.delivered, rs->prior_delivered)) {
+               rs->prior_delivered_ce  = scb->tx.delivered_ce;
                rs->prior_delivered  = scb->tx.delivered;
                rs->prior_mstamp     = scb->tx.delivered_mstamp;
                rs->is_app_limited   = scb->tx.is_app_limited;
@@ -138,6 +140,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
        }
        rs->delivered   = tp->delivered - rs->prior_delivered;

+       rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
+       /* delivered_ce occupies less than 32 bits in the skb control block */
+       rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK;
+
        /* Model sending data and receiving ACKs as separate pipeline phases
         * for a window. Usually the ACK phase is longer, but with ACK
         * compression the send phase can be longer. To be safe we use the
--
2.33.0.685.g46640cef36-goog



-- 
Fixing Starlink's Latencies: https://www.youtube.com/watch?v=c9gLo6Xrwgw

Dave Täht CEO, TekLibre, LLC


More information about the Ecn-sane mailing list