From: "Toke Høiland-Jørgensen" <toke@redhat.com>
To: "Toke Høiland-Jørgensen" <toke@toke.dk>,
"Jamal Hadi Salim" <jhs@mojatatu.com>,
"Cong Wang" <xiyou.wangcong@gmail.com>,
"Jiri Pirko" <jiri@resnulli.us>,
"David S. Miller" <davem@davemloft.net>,
"Eric Dumazet" <edumazet@google.com>,
"Jakub Kicinski" <kuba@kernel.org>,
"Paolo Abeni" <pabeni@redhat.com>,
"Simon Horman" <horms@kernel.org>
Cc: "Jonas Köppeler" <j.koeppeler@tu-berlin.de>,
cake@lists.bufferbloat.net, netdev@vger.kernel.org,
"Toke Høiland-Jørgensen" <toke@redhat.com>
Subject: [Cake] [PATCH net-next 4/4] net/sched: sch_cake: share shaper state across sub-instances of cake_mq
Date: Mon, 24 Nov 2025 15:59:35 +0100 [thread overview]
Message-ID: <20251124-mq-cake-sub-qdisc-v1-4-a2ff1dab488f@redhat.com> (raw)
In-Reply-To: <20251124-mq-cake-sub-qdisc-v1-0-a2ff1dab488f@redhat.com>
From: Jonas Köppeler <j.koeppeler@tu-berlin.de>
This commit adds shared shaper state across the cake instances beneath a
cake_mq qdisc. It works by periodically tracking the number of active
instances, and scaling the configured rate by the number of active
queues.
The scan is lockless and simply reads the qlen and the last_active state
variable of each of the instances configured beneath the parent cake_mq
instance. Locking is not required since the values are only updated by
the owning instance, and eventual consistency is sufficient for the
purpose of estimating the number of active queues.
The interval for scanning the number of active queues is set to 200 us.
We found this to be a good tradeoff between overhead and response time.
For a detailed analysis of this aspect see the Netdevconf talk:
https://netdevconf.info/0x19/docs/netdev-0x19-paper16-talk-paper.pdf
Signed-off-by: Jonas Köppeler <j.koeppeler@tu-berlin.de>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
Documentation/netlink/specs/tc.yaml | 3 +++
include/uapi/linux/pkt_sched.h | 1 +
net/sched/sch_cake.c | 51 +++++++++++++++++++++++++++++++++++++
3 files changed, 55 insertions(+)
diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml
index b398f7a46dae..2e663333a279 100644
--- a/Documentation/netlink/specs/tc.yaml
+++ b/Documentation/netlink/specs/tc.yaml
@@ -2207,6 +2207,9 @@ attribute-sets:
-
name: blue-timer-us
type: s32
+ -
+ name: active-queues
+ type: u32
-
name: cake-tin-stats-attrs
name-prefix: tca-cake-tin-stats-
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index c2da76e78bad..66e8072f44df 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -1036,6 +1036,7 @@ enum {
TCA_CAKE_STATS_DROP_NEXT_US,
TCA_CAKE_STATS_P_DROP,
TCA_CAKE_STATS_BLUE_TIMER_US,
+ TCA_CAKE_STATS_ACTIVE_QUEUES,
__TCA_CAKE_STATS_MAX
};
#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 7ceccbfaa9b6..a04aafb129c4 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -201,6 +201,7 @@ struct cake_sched_config {
u64 rate_bps;
u64 interval;
u64 target;
+ u64 sync_time;
u32 buffer_config_limit;
u32 fwmark_mask;
u16 fwmark_shft;
@@ -257,6 +258,11 @@ struct cake_sched_data {
u16 max_adjlen;
u16 min_netlen;
u16 min_adjlen;
+
+ /* mq sync state */
+ u64 last_checked_active;
+ u64 last_active;
+ u32 active_queues;
};
enum {
@@ -383,6 +389,8 @@ static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = {
1239850263, 1191209601, 1147878294, 1108955788
};
+static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
+ u64 target_ns, u64 rtt_est_ns);
/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
* new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
*
@@ -2002,6 +2010,40 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
u64 delay;
u32 len;
+ if (q->config->is_shared &&
+ now - q->last_checked_active >= q->config->sync_time) { //check every 1ms is the default
+ struct net_device *dev = qdisc_dev(sch);
+ struct cake_sched_data *other_priv;
+ u64 new_rate = q->config->rate_bps;
+ u64 other_qlen, other_last_active;
+ struct Qdisc *other_sch;
+ u32 num_active_qs = 1;
+ unsigned int ntx;
+
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ other_sch = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
+ other_priv = qdisc_priv(other_sch);
+
+ if (other_priv == q)
+ continue;
+
+ other_qlen = READ_ONCE(other_sch->q.qlen);
+ other_last_active = READ_ONCE(other_priv->last_active);
+
+ if (other_qlen || other_last_active > q->last_checked_active)
+ num_active_qs++;
+ }
+
+ if (num_active_qs > 1)
+ new_rate = div64_u64(q->config->rate_bps, num_active_qs);
+
+ /* mtu = 0 is used to only update the rate and not mess with cobalt params */
+ cake_set_rate(b, new_rate, 0, 0, 0);
+ q->last_checked_active = now;
+ q->rate_ns = b->tin_rate_ns;
+ q->rate_shft = b->tin_rate_shft;
+ }
+
begin:
if (!sch->q.qlen)
return NULL;
@@ -2201,6 +2243,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
b->tin_ecn_mark += !!flow->cvars.ecn_marked;
qdisc_bstats_update(sch, skb);
+ q->last_active = now;
/* collect delay stats */
delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
@@ -2301,6 +2344,9 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
b->tin_rate_ns = rate_ns;
b->tin_rate_shft = rate_shft;
+ if (mtu == 0)
+ return;
+
byte_target_ns = (byte_target * rate_ns) >> rate_shft;
b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
@@ -2763,6 +2809,7 @@ static void cake_config_init(struct cake_sched_config *q, bool is_shared)
*/
q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
q->is_shared = is_shared;
+ q->sync_time = 200 * NSEC_PER_USEC;
}
static int cake_init(struct Qdisc *sch, struct nlattr *opt,
@@ -2834,6 +2881,9 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
qd->avg_peak_bandwidth = q->rate_bps;
qd->min_netlen = ~0;
qd->min_adjlen = ~0;
+ qd->active_queues = 0;
+ qd->last_checked_active = 0;
+
return 0;
err:
kvfree(qd->config);
@@ -2967,6 +3017,7 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
+ PUT_STAT_U32(ACTIVE_QUEUES, q->active_queues);
#undef PUT_STAT_U32
#undef PUT_STAT_U64
--
2.51.2
next prev parent reply other threads:[~2025-11-24 15:00 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-24 14:59 [Cake] [PATCH net-next 0/4] Multi-queue aware sch_cake Toke Høiland-Jørgensen
2025-11-24 14:59 ` [Cake] [PATCH net-next 1/4] net/sched: sch_cake: Factor out config variables into separate struct Toke Høiland-Jørgensen
2025-11-24 14:59 ` [Cake] [PATCH net-next 2/4] net/sched: sch_cake: Add cake_mq qdisc for using cake on mq devices Toke Høiland-Jørgensen
2025-11-24 14:59 ` [Cake] [PATCH net-next 3/4] net/sched: sch_cake: Share config across cake_mq sub-qdiscs Toke Høiland-Jørgensen
2025-11-24 14:59 ` Toke Høiland-Jørgensen [this message]
2025-11-24 15:03 ` [Cake] [PATCH iproute2-next] tc: cake: add cake_mq support Toke Høiland-Jørgensen
2025-11-25 0:00 ` [Cake] " David Ahern
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://lists.bufferbloat.net/postorius/lists/cake.lists.bufferbloat.net/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251124-mq-cake-sub-qdisc-v1-4-a2ff1dab488f@redhat.com \
--to=toke@redhat.com \
--cc=cake@lists.bufferbloat.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=j.koeppeler@tu-berlin.de \
--cc=jhs@mojatatu.com \
--cc=jiri@resnulli.us \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=toke@toke.dk \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox