* [Cake] [PATCH net-next v3 1/5] net/sched: Export mq functions for reuse
2025-11-30 20:37 [Cake] [PATCH net-next v3 0/5] Multi-queue aware sch_cake Toke Høiland-Jørgensen
@ 2025-11-30 20:37 ` Toke Høiland-Jørgensen
2025-11-30 20:37 ` [Cake] [PATCH net-next v3 2/5] net/sched: sch_cake: Factor out config variables into separate struct Toke Høiland-Jørgensen
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Toke Høiland-Jørgensen @ 2025-11-30 20:37 UTC (permalink / raw)
To: Toke Høiland-Jørgensen, Jamal Hadi Salim, Cong Wang,
Jiri Pirko, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Jonas Köppeler, cake, netdev, Toke Høiland-Jørgensen
To enable the cake_mq qdisc to reuse code from the mq qdisc, export a
bunch of functions from sch_mq. Split common functionality out from some
functions so it can be composed with other code, and export other
functions wholesale.
No functional change intended.
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
include/net/sch_generic.h | 19 +++++++++++++
net/sched/sch_mq.c | 69 ++++++++++++++++++++++++++++++++---------------
2 files changed, 67 insertions(+), 21 deletions(-)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c3a7268b567e..f2281914d962 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1419,7 +1419,26 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
struct tcf_block *block);
+struct mq_sched {
+ struct Qdisc **qdiscs;
+};
+
+int mq_init_common(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack,
+ const struct Qdisc_ops *qdisc_ops);
+void mq_destroy_common(struct Qdisc *sch);
+void mq_attach(struct Qdisc *sch);
void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx);
+void mq_dump_common(struct Qdisc *sch, struct sk_buff *skb);
+struct netdev_queue *mq_select_queue(struct Qdisc *sch,
+ struct tcmsg *tcm);
+struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl);
+unsigned long mq_find(struct Qdisc *sch, u32 classid);
+int mq_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm);
+int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ struct gnet_dump *d);
+void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg);
int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb));
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index c860119a8f09..0bcabdcd1f44 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -17,10 +17,6 @@
#include <net/pkt_sched.h>
#include <net/sch_generic.h>
-struct mq_sched {
- struct Qdisc **qdiscs;
-};
-
static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd)
{
struct net_device *dev = qdisc_dev(sch);
@@ -49,23 +45,29 @@ static int mq_offload_stats(struct Qdisc *sch)
return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_MQ, &opt);
}
-static void mq_destroy(struct Qdisc *sch)
+void mq_destroy_common(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
unsigned int ntx;
- mq_offload(sch, TC_MQ_DESTROY);
-
if (!priv->qdiscs)
return;
for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
qdisc_put(priv->qdiscs[ntx]);
kfree(priv->qdiscs);
}
+EXPORT_SYMBOL(mq_destroy_common);
-static int mq_init(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
+static void mq_destroy(struct Qdisc *sch)
+{
+ mq_offload(sch, TC_MQ_DESTROY);
+ mq_destroy_common(sch);
+}
+
+int mq_init_common(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack,
+ const struct Qdisc_ops *qdisc_ops)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
@@ -87,7 +89,8 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt,
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
dev_queue = netdev_get_tx_queue(dev, ntx);
- qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx),
+ qdisc = qdisc_create_dflt(dev_queue,
+ qdisc_ops ?: get_default_qdisc_ops(dev, ntx),
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(ntx + 1)),
extack);
@@ -98,12 +101,24 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt,
}
sch->flags |= TCQ_F_MQROOT;
+ return 0;
+}
+EXPORT_SYMBOL(mq_init_common);
+
+static int mq_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ int ret;
+
+ ret = mq_init_common(sch, opt, extack, NULL);
+ if (ret)
+ return ret;
mq_offload(sch, TC_MQ_CREATE);
return 0;
}
-static void mq_attach(struct Qdisc *sch)
+void mq_attach(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
@@ -124,8 +139,9 @@ static void mq_attach(struct Qdisc *sch)
kfree(priv->qdiscs);
priv->qdiscs = NULL;
}
+EXPORT_SYMBOL(mq_attach);
-static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
+void mq_dump_common(struct Qdisc *sch, struct sk_buff *skb)
{
struct net_device *dev = qdisc_dev(sch);
struct Qdisc *qdisc;
@@ -152,7 +168,12 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
spin_unlock_bh(qdisc_lock(qdisc));
}
+}
+EXPORT_SYMBOL(mq_dump_common);
+static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ mq_dump_common(sch, skb);
return mq_offload_stats(sch);
}
@@ -166,11 +187,12 @@ static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
return netdev_get_tx_queue(dev, ntx);
}
-static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
- struct tcmsg *tcm)
+struct netdev_queue *mq_select_queue(struct Qdisc *sch,
+ struct tcmsg *tcm)
{
return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
}
+EXPORT_SYMBOL(mq_select_queue);
static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
struct Qdisc **old, struct netlink_ext_ack *extack)
@@ -198,14 +220,15 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
return 0;
}
-static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
+struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
return rtnl_dereference(dev_queue->qdisc_sleeping);
}
+EXPORT_SYMBOL(mq_leaf);
-static unsigned long mq_find(struct Qdisc *sch, u32 classid)
+unsigned long mq_find(struct Qdisc *sch, u32 classid)
{
unsigned int ntx = TC_H_MIN(classid);
@@ -213,9 +236,10 @@ static unsigned long mq_find(struct Qdisc *sch, u32 classid)
return 0;
return ntx;
}
+EXPORT_SYMBOL(mq_find);
-static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
- struct sk_buff *skb, struct tcmsg *tcm)
+int mq_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
@@ -224,9 +248,10 @@ static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
return 0;
}
+EXPORT_SYMBOL(mq_dump_class);
-static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
- struct gnet_dump *d)
+int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ struct gnet_dump *d)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
@@ -236,8 +261,9 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
return -1;
return 0;
}
+EXPORT_SYMBOL(mq_dump_class_stats);
-static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
{
struct net_device *dev = qdisc_dev(sch);
unsigned int ntx;
@@ -251,6 +277,7 @@ static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
break;
}
}
+EXPORT_SYMBOL(mq_walk);
static const struct Qdisc_class_ops mq_class_ops = {
.select_queue = mq_select_queue,
--
2.52.0
^ permalink raw reply [flat|nested] 6+ messages in thread* [Cake] [PATCH net-next v3 2/5] net/sched: sch_cake: Factor out config variables into separate struct
2025-11-30 20:37 [Cake] [PATCH net-next v3 0/5] Multi-queue aware sch_cake Toke Høiland-Jørgensen
2025-11-30 20:37 ` [Cake] [PATCH net-next v3 1/5] net/sched: Export mq functions for reuse Toke Høiland-Jørgensen
@ 2025-11-30 20:37 ` Toke Høiland-Jørgensen
2025-11-30 20:37 ` [Cake] [PATCH net-next v3 3/5] net/sched: sch_cake: Add cake_mq qdisc for using cake on mq devices Toke Høiland-Jørgensen
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Toke Høiland-Jørgensen @ 2025-11-30 20:37 UTC (permalink / raw)
To: Toke Høiland-Jørgensen, Jamal Hadi Salim, Cong Wang,
Jiri Pirko, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Jonas Köppeler, cake, netdev, Toke Høiland-Jørgensen
Factor out all the user-configurable variables into a separate struct
and embed it into struct cake_sched_data. This is done in preparation
for sharing the configuration across multiple instances of cake in an mq
setup.
No functional change is intended with this patch.
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
net/sched/sch_cake.c | 245 ++++++++++++++++++++++++++++-----------------------
1 file changed, 133 insertions(+), 112 deletions(-)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 0ea9440f68c6..545b9b830cce 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -197,40 +197,42 @@ struct cake_tin_data {
u32 way_collisions;
}; /* number of tins is small, so size of this struct doesn't matter much */
+struct cake_sched_config {
+ u64 rate_bps;
+ u64 interval;
+ u64 target;
+ u32 buffer_config_limit;
+ u32 fwmark_mask;
+ u16 fwmark_shft;
+ s16 rate_overhead;
+ u16 rate_mpu;
+ u16 rate_flags;
+ u8 tin_mode;
+ u8 flow_mode;
+ u8 atm_mode;
+ u8 ack_filter;
+};
+
struct cake_sched_data {
struct tcf_proto __rcu *filter_list; /* optional external classifier */
struct tcf_block *block;
struct cake_tin_data *tins;
+ struct cake_sched_config *config;
struct cake_heap_entry overflow_heap[CAKE_QUEUES * CAKE_MAX_TINS];
- u16 overflow_timeout;
-
- u16 tin_cnt;
- u8 tin_mode;
- u8 flow_mode;
- u8 ack_filter;
- u8 atm_mode;
-
- u32 fwmark_mask;
- u16 fwmark_shft;
/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
- u16 rate_shft;
ktime_t time_next_packet;
ktime_t failsafe_next_packet;
u64 rate_ns;
- u64 rate_bps;
- u16 rate_flags;
- s16 rate_overhead;
- u16 rate_mpu;
- u64 interval;
- u64 target;
+ u16 rate_shft;
+ u16 overflow_timeout;
+ u16 tin_cnt;
/* resource tracking */
u32 buffer_used;
u32 buffer_max_used;
u32 buffer_limit;
- u32 buffer_config_limit;
/* indices for dequeue */
u16 cur_tin;
@@ -1198,7 +1200,7 @@ static bool cake_tcph_may_drop(const struct tcphdr *tcph,
static struct sk_buff *cake_ack_filter(struct cake_sched_data *q,
struct cake_flow *flow)
{
- bool aggressive = q->ack_filter == CAKE_ACK_AGGRESSIVE;
+ bool aggressive = q->config->ack_filter == CAKE_ACK_AGGRESSIVE;
struct sk_buff *elig_ack = NULL, *elig_ack_prev = NULL;
struct sk_buff *skb_check, *skb_prev = NULL;
const struct ipv6hdr *ipv6h, *ipv6h_check;
@@ -1358,15 +1360,17 @@ static u64 cake_ewma(u64 avg, u64 sample, u32 shift)
return avg;
}
-static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
+static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off)
{
+ struct cake_sched_config *q = qd->config;
+
if (q->rate_flags & CAKE_FLAG_OVERHEAD)
len -= off;
- if (q->max_netlen < len)
- q->max_netlen = len;
- if (q->min_netlen > len)
- q->min_netlen = len;
+ if (qd->max_netlen < len)
+ qd->max_netlen = len;
+ if (qd->min_netlen > len)
+ qd->min_netlen = len;
len += q->rate_overhead;
@@ -1385,10 +1389,10 @@ static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
len += (len + 63) / 64;
}
- if (q->max_adjlen < len)
- q->max_adjlen = len;
- if (q->min_adjlen > len)
- q->min_adjlen = len;
+ if (qd->max_adjlen < len)
+ qd->max_adjlen = len;
+ if (qd->min_adjlen > len)
+ qd->min_adjlen = len;
return len;
}
@@ -1586,7 +1590,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
flow->dropped++;
b->tin_dropped++;
- if (q->rate_flags & CAKE_FLAG_INGRESS)
+ if (q->config->rate_flags & CAKE_FLAG_INGRESS)
cake_advance_shaper(q, b, skb, now, true);
qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT);
@@ -1657,7 +1661,8 @@ static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash)
static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
struct sk_buff *skb)
{
- struct cake_sched_data *q = qdisc_priv(sch);
+ struct cake_sched_data *qd = qdisc_priv(sch);
+ struct cake_sched_config *q = qd->config;
u32 tin, mark;
bool wash;
u8 dscp;
@@ -1674,24 +1679,24 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
tin = 0;
- else if (mark && mark <= q->tin_cnt)
- tin = q->tin_order[mark - 1];
+ else if (mark && mark <= qd->tin_cnt)
+ tin = qd->tin_order[mark - 1];
else if (TC_H_MAJ(skb->priority) == sch->handle &&
TC_H_MIN(skb->priority) > 0 &&
- TC_H_MIN(skb->priority) <= q->tin_cnt)
- tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
+ TC_H_MIN(skb->priority) <= qd->tin_cnt)
+ tin = qd->tin_order[TC_H_MIN(skb->priority) - 1];
else {
if (!wash)
dscp = cake_handle_diffserv(skb, wash);
- tin = q->tin_index[dscp];
+ tin = qd->tin_index[dscp];
- if (unlikely(tin >= q->tin_cnt))
+ if (unlikely(tin >= qd->tin_cnt))
tin = 0;
}
- return &q->tins[tin];
+ return &qd->tins[tin];
}
static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
@@ -1747,7 +1752,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
u32 idx, tin;
/* choose flow to insert into */
- idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
+ idx = cake_classify(sch, &b, skb, q->config->flow_mode, &ret);
if (idx == 0) {
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
@@ -1782,7 +1787,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (unlikely(len > b->max_skblen))
b->max_skblen = len;
- if (qdisc_pkt_segs(skb) > 1 && q->rate_flags & CAKE_FLAG_SPLIT_GSO) {
+ if (qdisc_pkt_segs(skb) > 1 && q->config->rate_flags & CAKE_FLAG_SPLIT_GSO) {
struct sk_buff *segs, *nskb;
netdev_features_t features = netif_skb_features(skb);
unsigned int slen = 0, numsegs = 0;
@@ -1822,7 +1827,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
flow_queue_add(flow, skb);
- if (q->ack_filter)
+ if (q->config->ack_filter)
ack = cake_ack_filter(q, flow);
if (ack) {
@@ -1831,7 +1836,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
b->bytes += qdisc_pkt_len(ack);
len -= qdisc_pkt_len(ack);
q->buffer_used += skb->truesize - ack->truesize;
- if (q->rate_flags & CAKE_FLAG_INGRESS)
+ if (q->config->rate_flags & CAKE_FLAG_INGRESS)
cake_advance_shaper(q, b, ack, now, true);
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(ack));
@@ -1854,7 +1859,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
cake_heapify_up(q, b->overflow_idx[idx]);
/* incoming bandwidth capacity estimate */
- if (q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) {
+ if (q->config->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) {
u64 packet_interval = \
ktime_to_ns(ktime_sub(now, q->last_packet_time));
@@ -1886,7 +1891,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (ktime_after(now,
ktime_add_ms(q->last_reconfig_time,
250))) {
- q->rate_bps = (q->avg_peak_bandwidth * 15) >> 4;
+ q->config->rate_bps = (q->avg_peak_bandwidth * 15) >> 4;
cake_reconfigure(sch);
}
}
@@ -1906,7 +1911,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
flow->set = CAKE_SET_SPARSE;
b->sparse_flow_count++;
- flow->deficit = cake_get_flow_quantum(b, flow, q->flow_mode);
+ flow->deficit = cake_get_flow_quantum(b, flow, q->config->flow_mode);
} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
/* this flow was empty, accounted as a sparse flow, but actually
* in the bulk rotation.
@@ -1915,8 +1920,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
b->sparse_flow_count--;
b->bulk_flow_count++;
- cake_inc_srchost_bulk_flow_count(b, flow, q->flow_mode);
- cake_inc_dsthost_bulk_flow_count(b, flow, q->flow_mode);
+ cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
+ cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
}
if (q->buffer_used > q->buffer_max_used)
@@ -2098,8 +2103,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
b->sparse_flow_count--;
b->bulk_flow_count++;
- cake_inc_srchost_bulk_flow_count(b, flow, q->flow_mode);
- cake_inc_dsthost_bulk_flow_count(b, flow, q->flow_mode);
+ cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
+ cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
flow->set = CAKE_SET_BULK;
} else {
@@ -2111,7 +2116,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
}
}
- flow->deficit += cake_get_flow_quantum(b, flow, q->flow_mode);
+ flow->deficit += cake_get_flow_quantum(b, flow, q->config->flow_mode);
list_move_tail(&flow->flowchain, &b->old_flows);
goto retry;
@@ -2135,8 +2140,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--;
- cake_dec_srchost_bulk_flow_count(b, flow, q->flow_mode);
- cake_dec_dsthost_bulk_flow_count(b, flow, q->flow_mode);
+ cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
+ cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
b->decaying_flow_count++;
} else if (flow->set == CAKE_SET_SPARSE ||
@@ -2154,8 +2159,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
else if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--;
- cake_dec_srchost_bulk_flow_count(b, flow, q->flow_mode);
- cake_dec_dsthost_bulk_flow_count(b, flow, q->flow_mode);
+ cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
+ cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
} else
b->decaying_flow_count--;
@@ -2166,14 +2171,14 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
reason = cobalt_should_drop(&flow->cvars, &b->cparams, now, skb,
(b->bulk_flow_count *
- !!(q->rate_flags &
+ !!(q->config->rate_flags &
CAKE_FLAG_INGRESS)));
/* Last packet in queue may be marked, shouldn't be dropped */
if (reason == SKB_NOT_DROPPED_YET || !flow->head)
break;
/* drop this packet, get another one */
- if (q->rate_flags & CAKE_FLAG_INGRESS) {
+ if (q->config->rate_flags & CAKE_FLAG_INGRESS) {
len = cake_advance_shaper(q, b, skb,
now, true);
flow->deficit -= len;
@@ -2184,7 +2189,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
qdisc_qstats_drop(sch);
qdisc_dequeue_drop(sch, skb, reason);
- if (q->rate_flags & CAKE_FLAG_INGRESS)
+ if (q->config->rate_flags & CAKE_FLAG_INGRESS)
goto retry;
}
@@ -2306,7 +2311,7 @@ static int cake_config_besteffort(struct Qdisc *sch)
struct cake_sched_data *q = qdisc_priv(sch);
struct cake_tin_data *b = &q->tins[0];
u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->rate_bps;
+ u64 rate = q->config->rate_bps;
q->tin_cnt = 1;
@@ -2314,7 +2319,7 @@ static int cake_config_besteffort(struct Qdisc *sch)
q->tin_order = normal_order;
cake_set_rate(b, rate, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
b->tin_quantum = 65535;
return 0;
@@ -2325,7 +2330,7 @@ static int cake_config_precedence(struct Qdisc *sch)
/* convert high-level (user visible) parameters into internal format */
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->rate_bps;
+ u64 rate = q->config->rate_bps;
u32 quantum = 256;
u32 i;
@@ -2336,8 +2341,8 @@ static int cake_config_precedence(struct Qdisc *sch)
for (i = 0; i < q->tin_cnt; i++) {
struct cake_tin_data *b = &q->tins[i];
- cake_set_rate(b, rate, mtu, us_to_ns(q->target),
- us_to_ns(q->interval));
+ cake_set_rate(b, rate, mtu, us_to_ns(q->config->target),
+ us_to_ns(q->config->interval));
b->tin_quantum = max_t(u16, 1U, quantum);
@@ -2414,7 +2419,7 @@ static int cake_config_diffserv8(struct Qdisc *sch)
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->rate_bps;
+ u64 rate = q->config->rate_bps;
u32 quantum = 256;
u32 i;
@@ -2428,8 +2433,8 @@ static int cake_config_diffserv8(struct Qdisc *sch)
for (i = 0; i < q->tin_cnt; i++) {
struct cake_tin_data *b = &q->tins[i];
- cake_set_rate(b, rate, mtu, us_to_ns(q->target),
- us_to_ns(q->interval));
+ cake_set_rate(b, rate, mtu, us_to_ns(q->config->target),
+ us_to_ns(q->config->interval));
b->tin_quantum = max_t(u16, 1U, quantum);
@@ -2458,7 +2463,7 @@ static int cake_config_diffserv4(struct Qdisc *sch)
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->rate_bps;
+ u64 rate = q->config->rate_bps;
u32 quantum = 1024;
q->tin_cnt = 4;
@@ -2469,13 +2474,13 @@ static int cake_config_diffserv4(struct Qdisc *sch)
/* class characteristics */
cake_set_rate(&q->tins[0], rate, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
cake_set_rate(&q->tins[1], rate >> 4, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
cake_set_rate(&q->tins[2], rate >> 1, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
cake_set_rate(&q->tins[3], rate >> 2, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
/* bandwidth-sharing weights */
q->tins[0].tin_quantum = quantum;
@@ -2495,7 +2500,7 @@ static int cake_config_diffserv3(struct Qdisc *sch)
*/
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
- u64 rate = q->rate_bps;
+ u64 rate = q->config->rate_bps;
u32 quantum = 1024;
q->tin_cnt = 3;
@@ -2506,11 +2511,11 @@ static int cake_config_diffserv3(struct Qdisc *sch)
/* class characteristics */
cake_set_rate(&q->tins[0], rate, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
cake_set_rate(&q->tins[1], rate >> 4, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
cake_set_rate(&q->tins[2], rate >> 2, mtu,
- us_to_ns(q->target), us_to_ns(q->interval));
+ us_to_ns(q->config->target), us_to_ns(q->config->interval));
/* bandwidth-sharing weights */
q->tins[0].tin_quantum = quantum;
@@ -2522,7 +2527,8 @@ static int cake_config_diffserv3(struct Qdisc *sch)
static void cake_reconfigure(struct Qdisc *sch)
{
- struct cake_sched_data *q = qdisc_priv(sch);
+ struct cake_sched_data *qd = qdisc_priv(sch);
+ struct cake_sched_config *q = qd->config;
int c, ft;
switch (q->tin_mode) {
@@ -2548,36 +2554,37 @@ static void cake_reconfigure(struct Qdisc *sch)
break;
}
- for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) {
+ for (c = qd->tin_cnt; c < CAKE_MAX_TINS; c++) {
cake_clear_tin(sch, c);
- q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time;
+ qd->tins[c].cparams.mtu_time = qd->tins[ft].cparams.mtu_time;
}
- q->rate_ns = q->tins[ft].tin_rate_ns;
- q->rate_shft = q->tins[ft].tin_rate_shft;
+ qd->rate_ns = qd->tins[ft].tin_rate_ns;
+ qd->rate_shft = qd->tins[ft].tin_rate_shft;
if (q->buffer_config_limit) {
- q->buffer_limit = q->buffer_config_limit;
+ qd->buffer_limit = q->buffer_config_limit;
} else if (q->rate_bps) {
u64 t = q->rate_bps * q->interval;
do_div(t, USEC_PER_SEC / 4);
- q->buffer_limit = max_t(u32, t, 4U << 20);
+ qd->buffer_limit = max_t(u32, t, 4U << 20);
} else {
- q->buffer_limit = ~0;
+ qd->buffer_limit = ~0;
}
sch->flags &= ~TCQ_F_CAN_BYPASS;
- q->buffer_limit = min(q->buffer_limit,
- max(sch->limit * psched_mtu(qdisc_dev(sch)),
- q->buffer_config_limit));
+ qd->buffer_limit = min(qd->buffer_limit,
+ max(sch->limit * psched_mtu(qdisc_dev(sch)),
+ q->buffer_config_limit));
}
static int cake_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
- struct cake_sched_data *q = qdisc_priv(sch);
+ struct cake_sched_data *qd = qdisc_priv(sch);
+ struct cake_sched_config *q = qd->config;
struct nlattr *tb[TCA_CAKE_MAX + 1];
u16 rate_flags;
u8 flow_mode;
@@ -2631,19 +2638,19 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
nla_get_s32(tb[TCA_CAKE_OVERHEAD]));
rate_flags |= CAKE_FLAG_OVERHEAD;
- q->max_netlen = 0;
- q->max_adjlen = 0;
- q->min_netlen = ~0;
- q->min_adjlen = ~0;
+ qd->max_netlen = 0;
+ qd->max_adjlen = 0;
+ qd->min_netlen = ~0;
+ qd->min_adjlen = ~0;
}
if (tb[TCA_CAKE_RAW]) {
rate_flags &= ~CAKE_FLAG_OVERHEAD;
- q->max_netlen = 0;
- q->max_adjlen = 0;
- q->min_netlen = ~0;
- q->min_adjlen = ~0;
+ qd->max_netlen = 0;
+ qd->max_adjlen = 0;
+ qd->min_netlen = ~0;
+ qd->min_adjlen = ~0;
}
if (tb[TCA_CAKE_MPU])
@@ -2699,7 +2706,7 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
WRITE_ONCE(q->rate_flags, rate_flags);
WRITE_ONCE(q->flow_mode, flow_mode);
- if (q->tins) {
+ if (qd->tins) {
sch_tree_lock(sch);
cake_reconfigure(sch);
sch_tree_unlock(sch);
@@ -2715,14 +2722,20 @@ static void cake_destroy(struct Qdisc *sch)
qdisc_watchdog_cancel(&q->watchdog);
tcf_block_put(q->block);
kvfree(q->tins);
+ kvfree(q->config);
}
static int cake_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
- struct cake_sched_data *q = qdisc_priv(sch);
+ struct cake_sched_data *qd = qdisc_priv(sch);
+ struct cake_sched_config *q;
int i, j, err;
+ q = kvcalloc(1, sizeof(struct cake_sched_config), GFP_KERNEL);
+ if (!q)
+ return -ENOMEM;
+
sch->limit = 10240;
sch->flags |= TCQ_F_DEQUEUE_DROPS;
@@ -2736,33 +2749,36 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
* for 5 to 10% of interval
*/
q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
- q->cur_tin = 0;
- q->cur_flow = 0;
+ qd->cur_tin = 0;
+ qd->cur_flow = 0;
+ qd->config = q;
- qdisc_watchdog_init(&q->watchdog, sch);
+ qdisc_watchdog_init(&qd->watchdog, sch);
if (opt) {
err = cake_change(sch, opt, extack);
if (err)
- return err;
+ goto err;
}
- err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
+ err = tcf_block_get(&qd->block, &qd->filter_list, sch, extack);
if (err)
- return err;
+ goto err;
quantum_div[0] = ~0;
for (i = 1; i <= CAKE_QUEUES; i++)
quantum_div[i] = 65535 / i;
- q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data),
- GFP_KERNEL);
- if (!q->tins)
- return -ENOMEM;
+ qd->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data),
+ GFP_KERNEL);
+ if (!qd->tins) {
+ err = -ENOMEM;
+ goto err;
+ }
for (i = 0; i < CAKE_MAX_TINS; i++) {
- struct cake_tin_data *b = q->tins + i;
+ struct cake_tin_data *b = qd->tins + i;
INIT_LIST_HEAD(&b->new_flows);
INIT_LIST_HEAD(&b->old_flows);
@@ -2778,22 +2794,27 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
INIT_LIST_HEAD(&flow->flowchain);
cobalt_vars_init(&flow->cvars);
- q->overflow_heap[k].t = i;
- q->overflow_heap[k].b = j;
+ qd->overflow_heap[k].t = i;
+ qd->overflow_heap[k].b = j;
b->overflow_idx[j] = k;
}
}
cake_reconfigure(sch);
- q->avg_peak_bandwidth = q->rate_bps;
- q->min_netlen = ~0;
- q->min_adjlen = ~0;
+ qd->avg_peak_bandwidth = q->rate_bps;
+ qd->min_netlen = ~0;
+ qd->min_adjlen = ~0;
return 0;
+err:
+ kvfree(qd->config);
+ qd->config = NULL;
+ return err;
}
static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
{
- struct cake_sched_data *q = qdisc_priv(sch);
+ struct cake_sched_data *qd = qdisc_priv(sch);
+ struct cake_sched_config *q = qd->config;
struct nlattr *opts;
u16 rate_flags;
u8 flow_mode;
--
2.52.0
^ permalink raw reply [flat|nested] 6+ messages in thread* [Cake] [PATCH net-next v3 3/5] net/sched: sch_cake: Add cake_mq qdisc for using cake on mq devices
2025-11-30 20:37 [Cake] [PATCH net-next v3 0/5] Multi-queue aware sch_cake Toke Høiland-Jørgensen
2025-11-30 20:37 ` [Cake] [PATCH net-next v3 1/5] net/sched: Export mq functions for reuse Toke Høiland-Jørgensen
2025-11-30 20:37 ` [Cake] [PATCH net-next v3 2/5] net/sched: sch_cake: Factor out config variables into separate struct Toke Høiland-Jørgensen
@ 2025-11-30 20:37 ` Toke Høiland-Jørgensen
2025-11-30 20:37 ` [Cake] [PATCH net-next v3 4/5] net/sched: sch_cake: Share config across cake_mq sub-qdiscs Toke Høiland-Jørgensen
2025-11-30 20:37 ` [Cake] [PATCH net-next v3 5/5] net/sched: sch_cake: share shaper state across sub-instances of cake_mq Toke Høiland-Jørgensen
4 siblings, 0 replies; 6+ messages in thread
From: Toke Høiland-Jørgensen @ 2025-11-30 20:37 UTC (permalink / raw)
To: Toke Høiland-Jørgensen, Jamal Hadi Salim, Cong Wang,
Jiri Pirko, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Jonas Köppeler, cake, netdev, Toke Høiland-Jørgensen
Add a cake_mq qdisc which installs cake instances on each hardware
queue on a multi-queue device.
This is just a copy of sch_mq that installs cake instead of the default
qdisc on each queue. Subsequent commits will add sharing of the config
between cake instances, as well as a multi-queue aware shaper algorithm.
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
net/sched/sch_cake.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 67 insertions(+), 1 deletion(-)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 545b9b830cce..d360ade6ca26 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -3151,14 +3151,80 @@ static struct Qdisc_ops cake_qdisc_ops __read_mostly = {
};
MODULE_ALIAS_NET_SCH("cake");
+struct cake_mq_sched {
+ struct mq_sched mq_priv; /* must be first */
+};
+
+static void cake_mq_destroy(struct Qdisc *sch)
+{
+ mq_destroy_common(sch);
+}
+
+static int cake_mq_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ int ret;
+
+ ret = mq_init_common(sch, opt, extack, &cake_qdisc_ops);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int cake_mq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ mq_dump_common(sch, skb);
+ return 0;
+}
+
+static int cake_mq_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static int cake_mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
+ struct Qdisc **old, struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG(extack, "can't replace cake_mq sub-qdiscs");
+ return -EOPNOTSUPP;
+}
+
+static const struct Qdisc_class_ops cake_mq_class_ops = {
+ .select_queue = mq_select_queue,
+ .graft = cake_mq_graft,
+ .leaf = mq_leaf,
+ .find = mq_find,
+ .walk = mq_walk,
+ .dump = mq_dump_class,
+ .dump_stats = mq_dump_class_stats,
+};
+
+static struct Qdisc_ops cake_mq_qdisc_ops __read_mostly = {
+ .cl_ops = &cake_mq_class_ops,
+ .id = "cake_mq",
+ .priv_size = sizeof(struct cake_mq_sched),
+ .init = cake_mq_init,
+ .destroy = cake_mq_destroy,
+ .attach = mq_attach,
+ .change = cake_mq_change,
+ .change_real_num_tx = mq_change_real_num_tx,
+ .dump = cake_mq_dump,
+ .owner = THIS_MODULE,
+};
+MODULE_ALIAS_NET_SCH("cake_mq");
+
static int __init cake_module_init(void)
{
- return register_qdisc(&cake_qdisc_ops);
+ return register_qdisc(&cake_qdisc_ops) ?:
+ register_qdisc(&cake_mq_qdisc_ops);
}
static void __exit cake_module_exit(void)
{
unregister_qdisc(&cake_qdisc_ops);
+ unregister_qdisc(&cake_mq_qdisc_ops);
}
module_init(cake_module_init)
--
2.52.0
^ permalink raw reply [flat|nested] 6+ messages in thread* [Cake] [PATCH net-next v3 4/5] net/sched: sch_cake: Share config across cake_mq sub-qdiscs
2025-11-30 20:37 [Cake] [PATCH net-next v3 0/5] Multi-queue aware sch_cake Toke Høiland-Jørgensen
` (2 preceding siblings ...)
2025-11-30 20:37 ` [Cake] [PATCH net-next v3 3/5] net/sched: sch_cake: Add cake_mq qdisc for using cake on mq devices Toke Høiland-Jørgensen
@ 2025-11-30 20:37 ` Toke Høiland-Jørgensen
2025-11-30 20:37 ` [Cake] [PATCH net-next v3 5/5] net/sched: sch_cake: share shaper state across sub-instances of cake_mq Toke Høiland-Jørgensen
4 siblings, 0 replies; 6+ messages in thread
From: Toke Høiland-Jørgensen @ 2025-11-30 20:37 UTC (permalink / raw)
To: Toke Høiland-Jørgensen, Jamal Hadi Salim, Cong Wang,
Jiri Pirko, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Jonas Köppeler, cake, netdev, Toke Høiland-Jørgensen
This adds support for configuring the cake_mq instance directly, sharing
the config across the cake sub-qdiscs.
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
net/sched/sch_cake.c | 146 +++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 117 insertions(+), 29 deletions(-)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index d360ade6ca26..51184f308387 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -211,6 +211,7 @@ struct cake_sched_config {
u8 flow_mode;
u8 atm_mode;
u8 ack_filter;
+ u8 is_shared;
};
struct cake_sched_data {
@@ -2580,11 +2581,9 @@ static void cake_reconfigure(struct Qdisc *sch)
q->buffer_config_limit));
}
-static int cake_change(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
+static int cake_config_change(struct cake_sched_config *q, struct nlattr *opt,
+ struct netlink_ext_ack *extack, bool *overhead_changed)
{
- struct cake_sched_data *qd = qdisc_priv(sch);
- struct cake_sched_config *q = qd->config;
struct nlattr *tb[TCA_CAKE_MAX + 1];
u16 rate_flags;
u8 flow_mode;
@@ -2637,20 +2636,12 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
WRITE_ONCE(q->rate_overhead,
nla_get_s32(tb[TCA_CAKE_OVERHEAD]));
rate_flags |= CAKE_FLAG_OVERHEAD;
-
- qd->max_netlen = 0;
- qd->max_adjlen = 0;
- qd->min_netlen = ~0;
- qd->min_adjlen = ~0;
+ *overhead_changed = true;
}
if (tb[TCA_CAKE_RAW]) {
rate_flags &= ~CAKE_FLAG_OVERHEAD;
-
- qd->max_netlen = 0;
- qd->max_adjlen = 0;
- qd->min_netlen = ~0;
- qd->min_adjlen = ~0;
+ *overhead_changed = true;
}
if (tb[TCA_CAKE_MPU])
@@ -2706,6 +2697,34 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
WRITE_ONCE(q->rate_flags, rate_flags);
WRITE_ONCE(q->flow_mode, flow_mode);
+
+ return 0;
+}
+
+static int cake_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct cake_sched_data *qd = qdisc_priv(sch);
+ struct cake_sched_config *q = qd->config;
+ bool overhead_changed = false;
+ int ret;
+
+ if (q->is_shared) {
+ NL_SET_ERR_MSG(extack, "can't reconfigure cake_mq sub-qdiscs");
+ return -EOPNOTSUPP;
+ }
+
+ ret = cake_config_change(q, opt, extack, &overhead_changed);
+ if (ret)
+ return ret;
+
+ if (overhead_changed) {
+ qd->max_netlen = 0;
+ qd->max_adjlen = 0;
+ qd->min_netlen = ~0;
+ qd->min_adjlen = ~0;
+ }
+
if (qd->tins) {
sch_tree_lock(sch);
cake_reconfigure(sch);
@@ -2722,7 +2741,23 @@ static void cake_destroy(struct Qdisc *sch)
qdisc_watchdog_cancel(&q->watchdog);
tcf_block_put(q->block);
kvfree(q->tins);
- kvfree(q->config);
+ if (!q->config->is_shared)
+ kvfree(q->config);
+}
+
+static void cake_config_init(struct cake_sched_config *q, bool is_shared)
+{
+ q->tin_mode = CAKE_DIFFSERV_DIFFSERV3;
+ q->flow_mode = CAKE_FLOW_TRIPLE;
+
+ q->rate_bps = 0; /* unlimited by default */
+
+ q->interval = 100000; /* 100ms default */
+ q->target = 5000; /* 5ms: codel RFC argues
+ * for 5 to 10% of interval
+ */
+ q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
+ q->is_shared = is_shared;
}
static int cake_init(struct Qdisc *sch, struct nlattr *opt,
@@ -2736,19 +2771,11 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
if (!q)
return -ENOMEM;
+ cake_config_init(q, false);
+
sch->limit = 10240;
sch->flags |= TCQ_F_DEQUEUE_DROPS;
- q->tin_mode = CAKE_DIFFSERV_DIFFSERV3;
- q->flow_mode = CAKE_FLOW_TRIPLE;
-
- q->rate_bps = 0; /* unlimited by default */
-
- q->interval = 100000; /* 100ms default */
- q->target = 5000; /* 5ms: codel RFC argues
- * for 5 to 10% of interval
- */
- q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
qd->cur_tin = 0;
qd->cur_flow = 0;
qd->config = q;
@@ -2811,10 +2838,21 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
return err;
}
-static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
+static void cake_config_replace(struct Qdisc *sch, struct cake_sched_config *cfg)
{
struct cake_sched_data *qd = qdisc_priv(sch);
struct cake_sched_config *q = qd->config;
+
+ qd->config = cfg;
+
+ if (!q->is_shared)
+ kvfree(q);
+
+ cake_reconfigure(sch);
+}
+
+static int cake_config_dump(struct cake_sched_config *q, struct sk_buff *skb)
+{
struct nlattr *opts;
u16 rate_flags;
u8 flow_mode;
@@ -2890,6 +2928,13 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
return -1;
}
+static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct cake_sched_data *qd = qdisc_priv(sch);
+
+ return cake_config_dump(qd->config, skb);
+}
+
static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct nlattr *stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP);
@@ -3153,6 +3198,7 @@ MODULE_ALIAS_NET_SCH("cake");
struct cake_mq_sched {
struct mq_sched mq_priv; /* must be first */
+ struct cake_sched_config cake_config;
};
static void cake_mq_destroy(struct Qdisc *sch)
@@ -3163,25 +3209,67 @@ static void cake_mq_destroy(struct Qdisc *sch)
static int cake_mq_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
- int ret;
+ struct cake_mq_sched *priv = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int ret, ntx;
+ bool _unused;
+
+ cake_config_init(&priv->cake_config, true);
+ if (opt) {
+ ret = cake_config_change(&priv->cake_config, opt, extack, &_unused);
+ if (ret)
+ return ret;
+ }
ret = mq_init_common(sch, opt, extack, &cake_qdisc_ops);
if (ret)
return ret;
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++)
+ cake_config_replace(priv->mq_priv.qdiscs[ntx], &priv->cake_config);
+
return 0;
}
static int cake_mq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
+ struct cake_mq_sched *priv = qdisc_priv(sch);
+
mq_dump_common(sch, skb);
- return 0;
+ return cake_config_dump(&priv->cake_config, skb);
}
static int cake_mq_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
- return -EOPNOTSUPP;
+ struct cake_mq_sched *priv = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ bool overhead_changed = false;
+ unsigned int ntx;
+ int ret;
+
+ ret = cake_config_change(&priv->cake_config, opt, extack, &overhead_changed);
+ if (ret)
+ return ret;
+
+ sch_tree_lock(sch);
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ struct Qdisc *chld = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
+ struct cake_sched_data *qd = qdisc_priv(chld);
+
+ if (overhead_changed) {
+ qd->max_netlen = 0;
+ qd->max_adjlen = 0;
+ qd->min_netlen = ~0;
+ qd->min_adjlen = ~0;
+ }
+
+ if (qd->tins)
+ cake_reconfigure(chld);
+ }
+ sch_tree_unlock(sch);
+
+ return 0;
}
static int cake_mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
--
2.52.0
^ permalink raw reply [flat|nested] 6+ messages in thread* [Cake] [PATCH net-next v3 5/5] net/sched: sch_cake: share shaper state across sub-instances of cake_mq
2025-11-30 20:37 [Cake] [PATCH net-next v3 0/5] Multi-queue aware sch_cake Toke Høiland-Jørgensen
` (3 preceding siblings ...)
2025-11-30 20:37 ` [Cake] [PATCH net-next v3 4/5] net/sched: sch_cake: Share config across cake_mq sub-qdiscs Toke Høiland-Jørgensen
@ 2025-11-30 20:37 ` Toke Høiland-Jørgensen
4 siblings, 0 replies; 6+ messages in thread
From: Toke Høiland-Jørgensen @ 2025-11-30 20:37 UTC (permalink / raw)
To: Toke Høiland-Jørgensen, Jamal Hadi Salim, Cong Wang,
Jiri Pirko, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Jonas Köppeler, cake, netdev, Toke Høiland-Jørgensen
From: Jonas Köppeler <j.koeppeler@tu-berlin.de>
This commit adds shared shaper state across the cake instances beneath a
cake_mq qdisc. It works by periodically tracking the number of active
instances, and scaling the configured rate by the number of active
queues.
The scan is lockless and simply reads the qlen and the last_active state
variable of each of the instances configured beneath the parent cake_mq
instance. Locking is not required since the values are only updated by
the owning instance, and eventual consistency is sufficient for the
purpose of estimating the number of active queues.
The interval for scanning the number of active queues is set to 200 us.
We found this to be a good tradeoff between overhead and response time.
For a detailed analysis of this aspect see the Netdevconf talk:
https://netdevconf.info/0x19/docs/netdev-0x19-paper16-talk-paper.pdf
Signed-off-by: Jonas Köppeler <j.koeppeler@tu-berlin.de>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
Documentation/netlink/specs/tc.yaml | 3 +++
include/uapi/linux/pkt_sched.h | 1 +
net/sched/sch_cake.c | 51 +++++++++++++++++++++++++++++++++++++
3 files changed, 55 insertions(+)
diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml
index b398f7a46dae..2e663333a279 100644
--- a/Documentation/netlink/specs/tc.yaml
+++ b/Documentation/netlink/specs/tc.yaml
@@ -2207,6 +2207,9 @@ attribute-sets:
-
name: blue-timer-us
type: s32
+ -
+ name: active-queues
+ type: u32
-
name: cake-tin-stats-attrs
name-prefix: tca-cake-tin-stats-
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index c2da76e78bad..66e8072f44df 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -1036,6 +1036,7 @@ enum {
TCA_CAKE_STATS_DROP_NEXT_US,
TCA_CAKE_STATS_P_DROP,
TCA_CAKE_STATS_BLUE_TIMER_US,
+ TCA_CAKE_STATS_ACTIVE_QUEUES,
__TCA_CAKE_STATS_MAX
};
#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 51184f308387..5392e8fbe34b 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -201,6 +201,7 @@ struct cake_sched_config {
u64 rate_bps;
u64 interval;
u64 target;
+ u64 sync_time;
u32 buffer_config_limit;
u32 fwmark_mask;
u16 fwmark_shft;
@@ -257,6 +258,11 @@ struct cake_sched_data {
u16 max_adjlen;
u16 min_netlen;
u16 min_adjlen;
+
+ /* mq sync state */
+ u64 last_checked_active;
+ u64 last_active;
+ u32 active_queues;
};
enum {
@@ -383,6 +389,8 @@ static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = {
1239850263, 1191209601, 1147878294, 1108955788
};
+static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
+ u64 target_ns, u64 rtt_est_ns);
/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
* new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
*
@@ -1997,6 +2005,40 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
u64 delay;
u32 len;
+ if (q->config->is_shared &&
+ now - q->last_checked_active >= q->config->sync_time) { //check every 1ms is the default
+ struct net_device *dev = qdisc_dev(sch);
+ struct cake_sched_data *other_priv;
+ u64 new_rate = q->config->rate_bps;
+ u64 other_qlen, other_last_active;
+ struct Qdisc *other_sch;
+ u32 num_active_qs = 1;
+ unsigned int ntx;
+
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ other_sch = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
+ other_priv = qdisc_priv(other_sch);
+
+ if (other_priv == q)
+ continue;
+
+ other_qlen = READ_ONCE(other_sch->q.qlen);
+ other_last_active = READ_ONCE(other_priv->last_active);
+
+ if (other_qlen || other_last_active > q->last_checked_active)
+ num_active_qs++;
+ }
+
+ if (num_active_qs > 1)
+ new_rate = div64_u64(q->config->rate_bps, num_active_qs);
+
+ /* mtu = 0 is used to only update the rate and not mess with cobalt params */
+ cake_set_rate(b, new_rate, 0, 0, 0);
+ q->last_checked_active = now;
+ q->rate_ns = b->tin_rate_ns;
+ q->rate_shft = b->tin_rate_shft;
+ }
+
begin:
if (!sch->q.qlen)
return NULL;
@@ -2196,6 +2238,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
b->tin_ecn_mark += !!flow->cvars.ecn_marked;
qdisc_bstats_update(sch, skb);
+ q->last_active = now;
/* collect delay stats */
delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
@@ -2296,6 +2339,9 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
b->tin_rate_ns = rate_ns;
b->tin_rate_shft = rate_shft;
+ if (mtu == 0)
+ return;
+
byte_target_ns = (byte_target * rate_ns) >> rate_shft;
b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
@@ -2758,6 +2804,7 @@ static void cake_config_init(struct cake_sched_config *q, bool is_shared)
*/
q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
q->is_shared = is_shared;
+ q->sync_time = 200 * NSEC_PER_USEC;
}
static int cake_init(struct Qdisc *sch, struct nlattr *opt,
@@ -2831,6 +2878,9 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
qd->avg_peak_bandwidth = q->rate_bps;
qd->min_netlen = ~0;
qd->min_adjlen = ~0;
+ qd->active_queues = 0;
+ qd->last_checked_active = 0;
+
return 0;
err:
kvfree(qd->config);
@@ -2963,6 +3013,7 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
+ PUT_STAT_U32(ACTIVE_QUEUES, q->active_queues);
#undef PUT_STAT_U32
#undef PUT_STAT_U64
--
2.52.0
^ permalink raw reply [flat|nested] 6+ messages in thread