[Cake] progress? dual-src/dsthost unfairness

George Amanakis gamanakis at gmail.com
Wed Feb 13 15:55:59 EST 2019


Updated version with Jonathan's suggestion. Fairness is preserved.

            enp4s0	    enp1s0
Client A/B <------> router <------> server

tc qdisc add dev enp1s0 root cake bandwidth 100mbit dual-srchost
besteffort

tc qdisc add dev enp4s0 root cake bandwidth 100mbit dual-dsthost
besteffort

-----------8<-----------
Client A:
Data file written to ./tcp_8down-2019-02-13T150846.110370.flent.gz.
Summary of tcp_8down test run at 2019-02-13 20:08:46.110370:

                             avg       median          # data pts
 Ping (ms) ICMP   :         0.82         0.66 ms              340
 TCP download avg :         5.97         5.80 Mbits/s         301
 TCP download sum :        47.73        46.42 Mbits/s         301
 TCP download::1  :         5.96         5.82 Mbits/s         297
 TCP download::2  :         5.96         5.82 Mbits/s         298
 TCP download::3  :         5.97         5.82 Mbits/s         297
 TCP download::4  :         5.96         5.82 Mbits/s         297
 TCP download::5  :         5.97         5.82 Mbits/s         297
 TCP download::6  :         5.97         5.82 Mbits/s         297
 TCP download::7  :         5.97         5.82 Mbits/s         297
 TCP download::8  :         5.97         5.82 Mbits/s         297
Data file written to ./tcp_1up-2019-02-13T150847.148563.flent.gz.
Summary of tcp_1up test run at 2019-02-13 20:08:47.148563:

                           avg       median          # data pts
 Ping (ms) ICMP :         0.87         0.68 ms              340
 TCP upload     :        47.46        46.39 Mbits/s         265



Client B:
Data file written to ./tcp_1down-2019-02-13T150848.112225.flent.gz.
Summary of tcp_1down test run at 2019-02-13 20:08:48.112225:

                           avg       median          # data pts
 Ping (ms) ICMP :         0.75         0.65 ms              340
 TCP download   :        47.50        46.57 Mbits/s         300
Data file written to ./tcp_8up-2019-02-13T150849.120750.flent.gz.
Summary of tcp_8up test run at 2019-02-13 20:08:49.120750:

                           avg       median          # data pts
 Ping (ms) ICMP :         0.79         0.68 ms              340
 TCP upload avg :         5.95         5.78 Mbits/s         301
 TCP upload sum :        47.61        46.23 Mbits/s         301
 TCP upload::1  :         5.97         5.82 Mbits/s         171
 TCP upload::2  :         5.95         5.83 Mbits/s         224
 TCP upload::3  :         5.95         5.81 Mbits/s         223
 TCP upload::4  :         5.95         5.82 Mbits/s         224
 TCP upload::5  :         5.94         5.82 Mbits/s         223
 TCP upload::6  :         5.95         5.76 Mbits/s         275
 TCP upload::7  :         5.95         5.77 Mbits/s         222
 TCP upload::8  :         5.95         5.77 Mbits/s         221
-----------8<-----------

---
 sch_cake.c | 125 +++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 93 insertions(+), 32 deletions(-)

diff --git a/sch_cake.c b/sch_cake.c
index d434ae0..7567378 100644
--- a/sch_cake.c
+++ b/sch_cake.c
@@ -146,8 +146,10 @@ struct cake_flow {
 struct cake_host {
 	u32 srchost_tag;
 	u32 dsthost_tag;
-	u16 srchost_refcnt;
-	u16 dsthost_refcnt;
+	u16 srchost_bulk_flow_count;
+	u16 dsthost_bulk_flow_count;
+	u16 srchost_sparse_flow_count;
+	u16 dsthost_sparse_flow_count;
 };
 
 struct cake_heap_entry {
@@ -844,8 +846,6 @@ skip_hash:
 		 * queue, accept the collision, update the host tags.
 		 */
 		q->way_collisions++;
-		q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
-		q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
 		allocate_src = cake_dsrc(flow_mode);
 		allocate_dst = cake_ddst(flow_mode);
 found:
@@ -865,13 +865,13 @@ found:
 			}
 			for (i = 0; i < CAKE_SET_WAYS;
 				i++, k = (k + 1) % CAKE_SET_WAYS) {
-				if (!q->hosts[outer_hash + k].srchost_refcnt)
+				if (!q->hosts[outer_hash + k].srchost_bulk_flow_count &&
+						!q->hosts[outer_hash + k].srchost_sparse_flow_count)
 					break;
 			}
 			q->hosts[outer_hash + k].srchost_tag = srchost_hash;
 found_src:
 			srchost_idx = outer_hash + k;
-			q->hosts[srchost_idx].srchost_refcnt++;
 			q->flows[reduced_hash].srchost = srchost_idx;
 		}
 
@@ -887,13 +887,13 @@ found_src:
 			}
 			for (i = 0; i < CAKE_SET_WAYS;
 			     i++, k = (k + 1) % CAKE_SET_WAYS) {
-				if (!q->hosts[outer_hash + k].dsthost_refcnt)
+				if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count &&
+						!q->hosts[outer_hash + k].dsthost_sparse_flow_count)
 					break;
 			}
 			q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
 found_dst:
 			dsthost_idx = outer_hash + k;
-			q->hosts[dsthost_idx].dsthost_refcnt++;
 			q->flows[reduced_hash].dsthost = dsthost_idx;
 		}
 	}
@@ -1901,6 +1901,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	if (!flow->set || flow->set == CAKE_SET_DECAYING) {
 		struct cake_host *srchost = &b->hosts[flow->srchost];
 		struct cake_host *dsthost = &b->hosts[flow->dsthost];
+		u16 sum;
 		u16 host_load = 1;
 
 		if (!flow->set) {
@@ -1912,21 +1913,43 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		flow->set = CAKE_SET_SPARSE;
 		b->sparse_flow_count++;
 
-		if (cake_dsrc(q->flow_mode))
-			host_load = max(host_load, srchost->srchost_refcnt);
+		if (cake_dsrc(q->flow_mode)) {
+			srchost->srchost_sparse_flow_count++;
+			sum = srchost->srchost_sparse_flow_count +
+				srchost->srchost_bulk_flow_count;
+			host_load = max(host_load, sum);
+		}
 
-		if (cake_ddst(q->flow_mode))
-			host_load = max(host_load, dsthost->dsthost_refcnt);
+		if (cake_ddst(q->flow_mode)) {
+			dsthost->dsthost_sparse_flow_count++;
+			sum = dsthost->dsthost_sparse_flow_count +
+				dsthost->dsthost_bulk_flow_count;
+			host_load = max(host_load, sum);
+		}
 
 		flow->deficit = (b->flow_quantum *
 				 quantum_div[host_load]) >> 16;
 	} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
+		struct cake_host *srchost = &b->hosts[flow->srchost];
+		struct cake_host *dsthost = &b->hosts[flow->dsthost];
+
 		/* this flow was empty, accounted as a sparse flow, but actually
 		 * in the bulk rotation.
 		 */
 		flow->set = CAKE_SET_BULK;
 		b->sparse_flow_count--;
 		b->bulk_flow_count++;
+
+		if (cake_dsrc(q->flow_mode)) {
+			srchost->srchost_sparse_flow_count--;
+			srchost->srchost_bulk_flow_count++;
+		}
+
+		if (cake_ddst(q->flow_mode)) {
+			dsthost->dsthost_sparse_flow_count--;
+			dsthost->dsthost_bulk_flow_count++;
+		}
+
 	}
 
 	if (q->buffer_used > q->buffer_max_used)
@@ -2097,23 +2120,8 @@ retry:
 	dsthost = &b->hosts[flow->dsthost];
 	host_load = 1;
 
-	if (cake_dsrc(q->flow_mode))
-		host_load = max(host_load, srchost->srchost_refcnt);
-
-	if (cake_ddst(q->flow_mode))
-		host_load = max(host_load, dsthost->dsthost_refcnt);
-
-	WARN_ON(host_load > CAKE_QUEUES);
-
 	/* flow isolation (DRR++) */
 	if (flow->deficit <= 0) {
-		/* The shifted prandom_u32() is a way to apply dithering to
-		 * avoid accumulating roundoff errors
-		 */
-		flow->deficit += (b->flow_quantum * quantum_div[host_load] +
-				  (prandom_u32() >> 16)) >> 16;
-		list_move_tail(&flow->flowchain, &b->old_flows);
-
 		/* Keep all flows with deficits out of the sparse and decaying
 		 * rotations.  No non-empty flow can go into the decaying
 		 * rotation, so they can't get deficits
@@ -2122,6 +2130,17 @@ retry:
 			if (flow->head) {
 				b->sparse_flow_count--;
 				b->bulk_flow_count++;
+
+				if (cake_dsrc(q->flow_mode)) {
+					srchost->srchost_sparse_flow_count--;
+					srchost->srchost_bulk_flow_count++;
+				}
+
+				if (cake_ddst(q->flow_mode)) {
+					dsthost->dsthost_sparse_flow_count--;
+					dsthost->dsthost_bulk_flow_count++;
+				}
+
 				flow->set = CAKE_SET_BULK;
 			} else {
 				/* we've moved it to the bulk rotation for
@@ -2131,6 +2150,22 @@ retry:
 				flow->set = CAKE_SET_SPARSE_WAIT;
 			}
 		}
+
+		if (cake_dsrc(q->flow_mode))
+			host_load = max(host_load, srchost->srchost_bulk_flow_count);
+
+		if (cake_ddst(q->flow_mode))
+			host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
+
+		WARN_ON(host_load > CAKE_QUEUES);
+
+		/* The shifted prandom_u32() is a way to apply dithering to
+		 * avoid accumulating roundoff errors
+		 */
+		flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+				  (prandom_u32() >> 16)) >> 16;
+		list_move_tail(&flow->flowchain, &b->old_flows);
+
 		goto retry;
 	}
 
@@ -2151,10 +2186,24 @@ retry:
 					       &b->decaying_flows);
 				if (flow->set == CAKE_SET_BULK) {
 					b->bulk_flow_count--;
+
+					if (cake_dsrc(q->flow_mode))
+						srchost->srchost_bulk_flow_count--;
+
+					if (cake_ddst(q->flow_mode))
+						dsthost->dsthost_bulk_flow_count--;
+
 					b->decaying_flow_count++;
 				} else if (flow->set == CAKE_SET_SPARSE ||
 					   flow->set == CAKE_SET_SPARSE_WAIT) {
 					b->sparse_flow_count--;
+
+					if (cake_dsrc(q->flow_mode))
+						srchost->srchost_sparse_flow_count--;
+
+					if (cake_ddst(q->flow_mode))
+						dsthost->dsthost_sparse_flow_count--;
+
 					b->decaying_flow_count++;
 				}
 				flow->set = CAKE_SET_DECAYING;
@@ -2162,16 +2211,28 @@ retry:
 				/* remove empty queue from the flowchain */
 				list_del_init(&flow->flowchain);
 				if (flow->set == CAKE_SET_SPARSE ||
-				    flow->set == CAKE_SET_SPARSE_WAIT)
+				    flow->set == CAKE_SET_SPARSE_WAIT) {
 					b->sparse_flow_count--;
-				else if (flow->set == CAKE_SET_BULK)
+
+					if (cake_dsrc(q->flow_mode))
+						srchost->srchost_sparse_flow_count--;
+
+					if (cake_ddst(q->flow_mode))
+						dsthost->dsthost_sparse_flow_count--;
+
+				} else if (flow->set == CAKE_SET_BULK) {
 					b->bulk_flow_count--;
-				else
+
+					if (cake_dsrc(q->flow_mode))
+						srchost->srchost_bulk_flow_count--;
+
+					if (cake_ddst(q->flow_mode))
+						dsthost->dsthost_bulk_flow_count--;
+
+				} else
 					b->decaying_flow_count--;
 
 				flow->set = CAKE_SET_NONE;
-				srchost->srchost_refcnt--;
-				dsthost->dsthost_refcnt--;
 			}
 			goto begin;
 		}
-- 
2.20.1



More information about the Cake mailing list