From: Dave Taht <dave.taht@gmail.com>
To: ECN-Sane <ecn-sane@lists.bufferbloat.net>
Subject: [Ecn-sane] Fwd: [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample
Date: Fri, 24 Sep 2021 18:10:24 -0700 [thread overview]
Message-ID: <CAA93jw70szehpR3Z_nC97=HSz+-sqWVPNqLsp46xSsTKb9uaWA@mail.gmail.com> (raw)
In-Reply-To: <20210923211706.2553282-1-luke.w.hsiao@gmail.com>
---------- Forwarded message ---------
From: Luke Hsiao <luke.w.hsiao@gmail.com>
Date: Thu, Sep 23, 2021 at 2:20 PM
Subject: [PATCH net-next] tcp: tracking packets with CE marks in BW rate sample
To: David Miller <davem@davemloft.net>
Cc: <netdev@vger.kernel.org>, Yuchung Cheng <ycheng@google.com>,
Lawrence Brakmo <brakmo@fb.com>, Neal Cardwell <ncardwell@google.com>,
Eric Dumazet <edumazet@google.com>, Luke Hsiao <lukehsiao@google.com>
From: Yuchung Cheng <ycheng@google.com>
In order to track CE marks per rate sample (one round trip), TCP needs a
per-skb header field to record the tp->delivered_ce count when the skb
was sent. To make space, we replace the "last_in_flight" field which is
used exclusively for NV congestion control. The stat needed by NV can be
alternatively approximated by existing stats tcp_sock delivered and
mss_cache.
This patch counts the number of packets delivered which have CE marks in
the rate sample, using similar approach of delivery accounting.
Cc: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Luke Hsiao <lukehsiao@google.com>
---
include/net/tcp.h | 9 ++++++---
net/ipv4/tcp_input.c | 11 +++++------
net/ipv4/tcp_output.c | 2 --
net/ipv4/tcp_rate.c | 6 ++++++
4 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 673c3b01e287..32cf6c01f403 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -874,10 +874,11 @@ struct tcp_skb_cb {
__u32 ack_seq; /* Sequence number ACK'd */
union {
struct {
+#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
/* There is space for up to 24 bytes */
- __u32 in_flight:30,/* Bytes in flight at transmit */
- is_app_limited:1, /* cwnd not fully used? */
- unused:1;
+ __u32 is_app_limited:1, /* cwnd not fully used? */
+ delivered_ce:20,
+ unused:11;
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
__u32 delivered;
/* start of send pipeline phase */
@@ -1029,7 +1030,9 @@ struct ack_sample {
struct rate_sample {
u64 prior_mstamp; /* starting timestamp for interval */
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
+ u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
s32 delivered; /* number of packets delivered over interval */
+ s32 delivered_ce; /* number of packets delivered w/ CE marks*/
long interval_us; /* time for tp->delivered to incr "delivered" */
u32 snd_interval_us; /* snd interval for delivered packets */
u32 rcv_interval_us; /* rcv interval for delivered packets */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 141e85e6422b..53675e284841 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3221,7 +3221,6 @@ static int tcp_clean_rtx_queue(struct sock *sk,
const struct sk_buff *ack_skb,
long seq_rtt_us = -1L;
long ca_rtt_us = -1L;
u32 pkts_acked = 0;
- u32 last_in_flight = 0;
bool rtt_update;
int flag = 0;
@@ -3257,7 +3256,6 @@ static int tcp_clean_rtx_queue(struct sock *sk,
const struct sk_buff *ack_skb,
if (!first_ackt)
first_ackt = last_ackt;
- last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
if (before(start_seq, reord))
reord = start_seq;
if (!after(scb->end_seq, tp->high_seq))
@@ -3323,8 +3321,8 @@ static int tcp_clean_rtx_queue(struct sock *sk,
const struct sk_buff *ack_skb,
seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
- if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
- last_in_flight && !prior_sacked && fully_acked &&
+ if (pkts_acked == 1 && fully_acked && !prior_sacked &&
+ (tp->snd_una - prior_snd_una) < tp->mss_cache &&
sack->rate->prior_delivered + 1 == tp->delivered &&
!(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
/* Conservatively mark a delayed ACK. It's typically
@@ -3381,9 +3379,10 @@ static int tcp_clean_rtx_queue(struct sock *sk,
const struct sk_buff *ack_skb,
if (icsk->icsk_ca_ops->pkts_acked) {
struct ack_sample sample = { .pkts_acked = pkts_acked,
- .rtt_us = sack->rate->rtt_us,
- .in_flight = last_in_flight };
+ .rtt_us = sack->rate->rtt_us };
+ sample.in_flight = tp->mss_cache *
+ (tp->delivered - sack->rate->prior_delivered);
icsk->icsk_ca_ops->pkts_acked(sk, &sample);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6d72f3ea48c4..fdc39b4fbbfa 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1256,8 +1256,6 @@ static int __tcp_transmit_skb(struct sock *sk,
struct sk_buff *skb,
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
if (clone_it) {
- TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- - tp->snd_una;
oskb = skb;
tcp_skb_tsorted_save(oskb) {
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 0de693565963..fbab921670cc 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -65,6 +65,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp;
TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp;
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
+ TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
}
@@ -86,6 +87,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct
sk_buff *skb,
if (!rs->prior_delivered ||
after(scb->tx.delivered, rs->prior_delivered)) {
+ rs->prior_delivered_ce = scb->tx.delivered_ce;
rs->prior_delivered = scb->tx.delivered;
rs->prior_mstamp = scb->tx.delivered_mstamp;
rs->is_app_limited = scb->tx.is_app_limited;
@@ -138,6 +140,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
}
rs->delivered = tp->delivered - rs->prior_delivered;
+ rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
+ /* delivered_ce occupies less than 32 bits in the skb control block */
+ rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK;
+
/* Model sending data and receiving ACKs as separate pipeline phases
* for a window. Usually the ACK phase is longer, but with ACK
* compression the send phase can be longer. To be safe we use the
--
2.33.0.685.g46640cef36-goog
--
Fixing Starlink's Latencies: https://www.youtube.com/watch?v=c9gLo6Xrwgw
Dave Täht CEO, TekLibre, LLC
next parent reply other threads:[~2021-09-25 1:10 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20210923211706.2553282-1-luke.w.hsiao@gmail.com>
2021-09-25 1:10 ` Dave Taht [this message]
2021-09-25 18:17 ` Rodney W. Grimes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://lists.bufferbloat.net/postorius/lists/ecn-sane.lists.bufferbloat.net/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAA93jw70szehpR3Z_nC97=HSz+-sqWVPNqLsp46xSsTKb9uaWA@mail.gmail.com' \
--to=dave.taht@gmail.com \
--cc=ecn-sane@lists.bufferbloat.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox