Cake - FQ_codel the next generation
 help / color / mirror / Atom feed
* [Cake] [PATCH net v5] net/sched: sch_cake: Fix incorrect qlen reduction in cake_drop
@ 2025-11-21 22:27 Xiang Mei
  2025-11-24 10:48 ` [Cake] " Toke Høiland-Jørgensen
  0 siblings, 1 reply; 2+ messages in thread
From: Xiang Mei @ 2025-11-21 22:27 UTC (permalink / raw)
  To: security; +Cc: netdev, toke, xiyou.wangcong, cake, bestswngs, Xiang Mei

In cake_drop(), qdisc_tree_reduce_backlog() is used to update the qlen
and backlog of the qdisc hierarchy. Its caller, cake_enqueue(), assumes
that the parent qdisc will enqueue the current packet. However, this
assumption breaks when cake_enqueue() returns NET_XMIT_CN: the parent
qdisc stops enqueuing current packet, leaving the tree qlen/backlog
accounting inconsistent. This mismatch can lead to a NULL dereference
(e.g., when the parent Qdisc is qfq_qdisc).

This patch computes the qlen/backlog delta in a more robust way by
observing the difference before and after the series of cake_drop()
calls, and then compensates the qdisc tree accounting if cake_enqueue()
returns NET_XMIT_CN.

To ensure correct compensation when ACK thinning is enabled, a new
variable is introduced to keep qlen unchanged.

Fixes: 15de71d06a40 ("net/sched: Make cake_enqueue return NET_XMIT_CN when past buffer_limit")
Signed-off-by: Xiang Mei <xmei5@asu.edu>
---
v2: add missing cc
v3: move qdisc_tree_reduce_backlog out of cake_drop
v4: remove redundant variable and handle ack branch correctly
v5: add the PoC as a test case
---
 net/sched/sch_cake.c                          | 52 +++++++++++--------
 .../tc-testing/tc-tests/qdiscs/cake.json      | 28 ++++++++++
 2 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 32bacfc314c2..cf4d6454ca9c 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1597,7 +1597,6 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
 
 	qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT);
 	sch->q.qlen--;
-	qdisc_tree_reduce_backlog(sch, 1, len);
 
 	cake_heapify(q, 0);
 
@@ -1750,7 +1749,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	ktime_t now = ktime_get();
 	struct cake_tin_data *b;
 	struct cake_flow *flow;
-	u32 idx, tin;
+	u32 idx, tin, prev_qlen, prev_backlog, drop_id;
+	bool same_flow = false;
 
 	/* choose flow to insert into */
 	idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
@@ -1823,6 +1823,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		consume_skb(skb);
 	} else {
 		/* not splitting */
+		int ack_pkt_len = 0;
+
 		cobalt_set_enqueue_time(skb, now);
 		get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
 		flow_queue_add(flow, skb);
@@ -1834,7 +1836,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			b->ack_drops++;
 			sch->qstats.drops++;
 			b->bytes += qdisc_pkt_len(ack);
-			len -= qdisc_pkt_len(ack);
+			ack_pkt_len = qdisc_pkt_len(ack);
 			q->buffer_used += skb->truesize - ack->truesize;
 			if (q->rate_flags & CAKE_FLAG_INGRESS)
 				cake_advance_shaper(q, b, ack, now, true);
@@ -1848,11 +1850,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
 		/* stats */
 		b->packets++;
-		b->bytes	    += len;
-		b->backlogs[idx]    += len;
-		b->tin_backlog      += len;
-		sch->qstats.backlog += len;
-		q->avg_window_bytes += len;
+		b->bytes	    += len - ack_pkt_len;
+		b->backlogs[idx]    += len - ack_pkt_len;
+		b->tin_backlog      += len - ack_pkt_len;
+		sch->qstats.backlog += len - ack_pkt_len;
+		q->avg_window_bytes += len - ack_pkt_len;
 	}
 
 	if (q->overflow_timeout)
@@ -1927,24 +1929,30 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	if (q->buffer_used > q->buffer_max_used)
 		q->buffer_max_used = q->buffer_used;
 
-	if (q->buffer_used > q->buffer_limit) {
-		bool same_flow = false;
-		u32 dropped = 0;
-		u32 drop_id;
+	if (q->buffer_used <= q->buffer_limit)
+		return NET_XMIT_SUCCESS;
 
-		while (q->buffer_used > q->buffer_limit) {
-			dropped++;
-			drop_id = cake_drop(sch, to_free);
+	prev_qlen = sch->q.qlen;
+	prev_backlog = sch->qstats.backlog;
 
-			if ((drop_id >> 16) == tin &&
-			    (drop_id & 0xFFFF) == idx)
-				same_flow = true;
-		}
-		b->drop_overlimit += dropped;
+	while (q->buffer_used > q->buffer_limit) {
+		drop_id = cake_drop(sch, to_free);
+		if ((drop_id >> 16) == tin &&
+		    (drop_id & 0xFFFF) == idx)
+			same_flow = true;
+	}
+
+	/* Compute the droppped qlen and pkt length */
+	prev_qlen -= sch->q.qlen;
+	prev_backlog -= sch->qstats.backlog;
+	b->drop_overlimit += prev_backlog;
 
-		if (same_flow)
-			return NET_XMIT_CN;
+	if (same_flow) {
+		qdisc_tree_reduce_backlog(sch, prev_qlen - 1,
+					  prev_backlog - len);
+		return NET_XMIT_CN;
 	}
+	qdisc_tree_reduce_backlog(sch, prev_qlen, prev_backlog);
 	return NET_XMIT_SUCCESS;
 }
 
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json
index c4c5f7ba0e0f..47ecd3fb1ea4 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json
@@ -441,5 +441,33 @@
         "teardown": [
             "$TC qdisc del dev $DUMMY handle 1: root"
         ]
+    },
+    {
+	"id": "4366",
+	"name": "Enqueue CAKE with packets dropping",
+	"category": [
+	    "qdisc",
+	    "cake",
+	    "netem"
+	],
+	"plugins": {
+	    "requires": "nsPlugin"
+	},
+	"setup":[
+	    "$TC qdisc add dev $DUMMY handle 1: root qfq",
+	    "$TC class add dev $DUMMY parent 1: classid 1:1 qfq maxpkt 1024",
+	    "$TC qdisc add dev $DUMMY parent 1:1 handle 2: cake memlimit 9",
+	    "$TC filter add dev $DUMMY protocol ip parent 1: prio 1 u32 match ip protocol 1 0xff flowid 1:1",
+	    "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true",
+	    "$TC qdisc replace dev $DUMMY parent 1:1 handle 3: netem delay 0ms"
+	],
+	"cmdUnderTest": "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true",
+	"expExitCode": "0",
+	"verifyCmd": "$TC -s qdisc show dev $DUMMY",
+	"matchPattern": "qdisc qfq 1:",
+	"matchCount": "1",
+	"teardown": [
+	    "$TC qdisc del dev $DUMMY handle 1: root"
+	]
     }
 ]
-- 
2.43.0


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [Cake] Re: [PATCH net v5] net/sched: sch_cake: Fix incorrect qlen reduction in cake_drop
  2025-11-21 22:27 [Cake] [PATCH net v5] net/sched: sch_cake: Fix incorrect qlen reduction in cake_drop Xiang Mei
@ 2025-11-24 10:48 ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 2+ messages in thread
From: Toke Høiland-Jørgensen @ 2025-11-24 10:48 UTC (permalink / raw)
  To: Xiang Mei, security; +Cc: netdev, xiyou.wangcong, cake, bestswngs, Xiang Mei

Xiang Mei <xmei5@asu.edu> writes:

> In cake_drop(), qdisc_tree_reduce_backlog() is used to update the qlen
> and backlog of the qdisc hierarchy. Its caller, cake_enqueue(), assumes
> that the parent qdisc will enqueue the current packet. However, this
> assumption breaks when cake_enqueue() returns NET_XMIT_CN: the parent
> qdisc stops enqueuing current packet, leaving the tree qlen/backlog
> accounting inconsistent. This mismatch can lead to a NULL dereference
> (e.g., when the parent Qdisc is qfq_qdisc).
>
> This patch computes the qlen/backlog delta in a more robust way by
> observing the difference before and after the series of cake_drop()
> calls, and then compensates the qdisc tree accounting if cake_enqueue()
> returns NET_XMIT_CN.
>
> To ensure correct compensation when ACK thinning is enabled, a new
> variable is introduced to keep qlen unchanged.
>
> Fixes: 15de71d06a40 ("net/sched: Make cake_enqueue return NET_XMIT_CN when past buffer_limit")
> Signed-off-by: Xiang Mei <xmei5@asu.edu>
> ---
> v2: add missing cc
> v3: move qdisc_tree_reduce_backlog out of cake_drop
> v4: remove redundant variable and handle ack branch correctly
> v5: add the PoC as a test case

Please split the test case into its own patch and send both as a series.

Otherwise, the changes LGTM apart from the few nits below:

> ---
>  net/sched/sch_cake.c                          | 52 +++++++++++--------
>  .../tc-testing/tc-tests/qdiscs/cake.json      | 28 ++++++++++
>  2 files changed, 58 insertions(+), 22 deletions(-)
>
> diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
> index 32bacfc314c2..cf4d6454ca9c 100644
> --- a/net/sched/sch_cake.c
> +++ b/net/sched/sch_cake.c
> @@ -1597,7 +1597,6 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
>  
>  	qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT);
>  	sch->q.qlen--;
> -	qdisc_tree_reduce_backlog(sch, 1, len);
>  
>  	cake_heapify(q, 0);
>  
> @@ -1750,7 +1749,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>  	ktime_t now = ktime_get();
>  	struct cake_tin_data *b;
>  	struct cake_flow *flow;
> -	u32 idx, tin;
> +	u32 idx, tin, prev_qlen, prev_backlog, drop_id;
> +	bool same_flow = false;

Please make sure to maintain the reverse x-mas tree ordering of the
variable declarations.

>  
>  	/* choose flow to insert into */
>  	idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
> @@ -1823,6 +1823,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>  		consume_skb(skb);
>  	} else {
>  		/* not splitting */
> +		int ack_pkt_len = 0;
> +
>  		cobalt_set_enqueue_time(skb, now);
>  		get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
>  		flow_queue_add(flow, skb);
> @@ -1834,7 +1836,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>  			b->ack_drops++;
>  			sch->qstats.drops++;
>  			b->bytes += qdisc_pkt_len(ack);
> -			len -= qdisc_pkt_len(ack);
> +			ack_pkt_len = qdisc_pkt_len(ack);

There's a qdisc_tree_reduce_backlog() that uses qdisc_pkt_len(ack) just
below this; let's also change that to use ack_pkt_len while we're at it.

>  			q->buffer_used += skb->truesize - ack->truesize;
>  			if (q->rate_flags & CAKE_FLAG_INGRESS)
>  				cake_advance_shaper(q, b, ack, now, true);
> @@ -1848,11 +1850,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>  
>  		/* stats */
>  		b->packets++;
> -		b->bytes	    += len;
> -		b->backlogs[idx]    += len;
> -		b->tin_backlog      += len;
> -		sch->qstats.backlog += len;
> -		q->avg_window_bytes += len;
> +		b->bytes	    += len - ack_pkt_len;
> +		b->backlogs[idx]    += len - ack_pkt_len;
> +		b->tin_backlog      += len - ack_pkt_len;
> +		sch->qstats.backlog += len - ack_pkt_len;
> +		q->avg_window_bytes += len - ack_pkt_len;
>  	}
>  
>  	if (q->overflow_timeout)
> @@ -1927,24 +1929,30 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>  	if (q->buffer_used > q->buffer_max_used)
>  		q->buffer_max_used = q->buffer_used;
>  
> -	if (q->buffer_used > q->buffer_limit) {
> -		bool same_flow = false;
> -		u32 dropped = 0;
> -		u32 drop_id;
> +	if (q->buffer_used <= q->buffer_limit)
> +		return NET_XMIT_SUCCESS;
>  
> -		while (q->buffer_used > q->buffer_limit) {
> -			dropped++;
> -			drop_id = cake_drop(sch, to_free);
> +	prev_qlen = sch->q.qlen;
> +	prev_backlog = sch->qstats.backlog;
>  
> -			if ((drop_id >> 16) == tin &&
> -			    (drop_id & 0xFFFF) == idx)
> -				same_flow = true;
> -		}
> -		b->drop_overlimit += dropped;
> +	while (q->buffer_used > q->buffer_limit) {
> +		drop_id = cake_drop(sch, to_free);
> +		if ((drop_id >> 16) == tin &&
> +		    (drop_id & 0xFFFF) == idx)
> +			same_flow = true;
> +	}
> +
> +	/* Compute the droppped qlen and pkt length */
> +	prev_qlen -= sch->q.qlen;
> +	prev_backlog -= sch->qstats.backlog;
> +	b->drop_overlimit += prev_backlog;

drop_overlimit was accounted in packets before, so this should be += prev_qlen.

-Toke

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-11-24 10:48 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-11-21 22:27 [Cake] [PATCH net v5] net/sched: sch_cake: Fix incorrect qlen reduction in cake_drop Xiang Mei
2025-11-24 10:48 ` [Cake] " Toke Høiland-Jørgensen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox