[Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides

CoDel AQM discussions
 help / color / mirror / Atom feed

* [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
@ 2012-05-12 13:32 Eric Dumazet
  2012-05-12 19:52 ` David Miller
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2012-05-12 13:32 UTC (permalink / raw)
  To: David Miller
  Cc: Dave Taht, Nandita Dukkipati, netdev, codel, Yuchung Cheng,
	Stephen Hemminger, Matt Mathis

From: Eric Dumazet <edumazet@google.com>

As Van pointed out, interval/sqrt(count) can be implemented using
multiplies only.

http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots

This patch implements the Newton method and reciprocal divide.

Total cost is 15 cycles instead of 120 on my Corei5 machine (64bit
kernel).

There is a small 'error' for count values < 5, but we don't really care.

I reuse a hole in struct codel_vars :
 - pack the dropping boolean into one bit
 - use 31bit to store the reciprocal value of sqrt(count).

Suggested-by: Van Jacobson <van@pollere.net>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Dave Taht <dave.taht@bufferbloat.net>
Cc: Kathleen Nichols <nichols@pollere.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Matt Mathis <mattmathis@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Cc: Stephen Hemminger <shemminger@vyatta.com>
---
 include/net/codel.h |   68 ++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/include/net/codel.h b/include/net/codel.h
index bce2cef..bd8747c 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -46,6 +46,7 @@
 #include <linux/skbuff.h>
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
+#include <linux/reciprocal_div.h>
 
 /* Controlling Queue Delay (CoDel) algorithm
  * =========================================
@@ -123,6 +124,7 @@ struct codel_params {
  *			entered dropping state
  * @lastcount:		count at entry to dropping state
  * @dropping:		set to true if in dropping state
+ * @rec_inv_sqrt:	reciprocal value of sqrt(count) >> 1
  * @first_above_time:	when we went (or will go) continuously above target
  *			for interval
  * @drop_next:		time to drop next packet, or when we dropped last
@@ -131,7 +133,8 @@ struct codel_params {
 struct codel_vars {
 	u32		count;
 	u32		lastcount;
-	bool		dropping;
+	bool		dropping:1;
+	u32		rec_inv_sqrt:31;
 	codel_time_t	first_above_time;
 	codel_time_t	drop_next;
 	codel_time_t	ldelay;
@@ -158,11 +161,7 @@ static void codel_params_init(struct codel_params *params)
 
 static void codel_vars_init(struct codel_vars *vars)
 {
-	vars->drop_next = 0;
-	vars->first_above_time = 0;
-	vars->dropping = false; /* exit dropping state */
-	vars->count = 0;
-	vars->lastcount = 0;
+	memset(vars, 0, sizeof(*vars));
 }
 
 static void codel_stats_init(struct codel_stats *stats)
@@ -170,38 +169,37 @@ static void codel_stats_init(struct codel_stats *stats)
 	stats->maxpacket = 256;
 }
 
-/* return interval/sqrt(x) with good precision
- * relies on int_sqrt(unsigned long x) kernel implementation
+/*
+ * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots
+ * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
+ *
+ * Here, invsqrt is a fixed point number (< 1.0), 31bit mantissa)
  */
-static u32 codel_inv_sqrt(u32 _interval, u32 _x)
+static void codel_Newton_step(struct codel_vars *vars)
 {
-	u64 interval = _interval;
-	unsigned long x = _x;
+	u32 invsqrt = vars->rec_inv_sqrt;
+	u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 31;
+	u64 val = (3LL << 31) - ((u64)vars->count * invsqrt2);
 
-	/* Scale operands for max precision */
-
-#if BITS_PER_LONG == 64
-	x <<= 32; /* On 64bit arches, we can prescale x by 32bits */
-	interval <<= 16;
-#endif
+	val = (val * invsqrt) >> 32;
 
-	while (x < (1UL << (BITS_PER_LONG - 2))) {
-		x <<= 2;
-		interval <<= 1;
-	}
-	do_div(interval, int_sqrt(x));
-	return (u32)interval;
+	vars->rec_inv_sqrt = val;
 }
 
+/*
+ * CoDel control_law is t + interval/sqrt(count)
+ * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
+ * both sqrt() and divide operation.
+ */
 static codel_time_t codel_control_law(codel_time_t t,
 				      codel_time_t interval,
-				      u32 count)
+				      u32 rec_inv_sqrt)
 {
-	return t + codel_inv_sqrt(interval, count);
+	return t + reciprocal_divide(interval, rec_inv_sqrt << 1);
 }
 
 
-static bool codel_should_drop(struct sk_buff *skb,
+static bool codel_should_drop(const struct sk_buff *skb,
 			      unsigned int *backlog,
 			      struct codel_vars *vars,
 			      struct codel_params *params,
@@ -274,14 +272,16 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch,
 			 */
 			while (vars->dropping &&
 			       codel_time_after_eq(now, vars->drop_next)) {
-				if (++vars->count == 0) /* avoid zero divides */
-					vars->count = ~0U;
+				vars->count++; /* dont care of possible wrap
+						* since there is no more divide
+						*/
+				codel_Newton_step(vars);
 				if (params->ecn && INET_ECN_set_ce(skb)) {
 					stats->ecn_mark++;
 					vars->drop_next =
 						codel_control_law(vars->drop_next,
 								  params->interval,
-								  vars->count);
+								  vars->rec_inv_sqrt);
 					goto end;
 				}
 				qdisc_drop(skb, sch);
@@ -296,7 +296,7 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch,
 					vars->drop_next =
 						codel_control_law(vars->drop_next,
 								  params->interval,
-								  vars->count);
+								  vars->rec_inv_sqrt);
 				}
 			}
 		}
@@ -319,12 +319,18 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch,
 		if (codel_time_before(now - vars->drop_next,
 				      16 * params->interval)) {
 			vars->count = (vars->count - vars->lastcount) | 1;
+			/* we dont care if rec_inv_sqrt approximation
+			 * is not very precise :
+			 * Next Newton steps will correct it quadratically.
+			 */
+			codel_Newton_step(vars);
 		} else {
 			vars->count = 1;
+			vars->rec_inv_sqrt = 0x7fffffff;
 		}
 		vars->lastcount = vars->count;
 		vars->drop_next = codel_control_law(now, params->interval,
-						    vars->count);
+						    vars->rec_inv_sqrt);
 	}
 end:
 	return skb;



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-12 13:32 [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides Eric Dumazet
@ 2012-05-12 19:52 ` David Miller
  2012-05-12 20:40   ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: David Miller @ 2012-05-12 19:52 UTC (permalink / raw)
  To: eric.dumazet
  Cc: dave.taht, nanditad, netdev, codel, ycheng, shemminger, mattmathis

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 12 May 2012 15:32:13 +0200

> From: Eric Dumazet <edumazet@google.com>
> 
> As Van pointed out, interval/sqrt(count) can be implemented using
> multiplies only.
> 
> http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots
> 
> This patch implements the Newton method and reciprocal divide.
> 
> Total cost is 15 cycles instead of 120 on my Corei5 machine (64bit
> kernel).
> 
> There is a small 'error' for count values < 5, but we don't really care.
> 
> I reuse a hole in struct codel_vars :
>  - pack the dropping boolean into one bit
>  - use 31bit to store the reciprocal value of sqrt(count).
> 
> Suggested-by: Van Jacobson <van@pollere.net>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Applied but I never like that bitfield sharing for real integers.

GCC makes a complete mess of it as it extracts and inserts the
integer value into that bit field.  You are guarenteed to get
better code if you do this by hand in a full u32.

Either that or just bite the bullet and use a completely seperate
field, maybe we'll need more boolean states later.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-12 19:52 ` David Miller
@ 2012-05-12 20:40   ` Eric Dumazet
  2012-05-12 20:45     ` David Miller
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2012-05-12 20:40 UTC (permalink / raw)
  To: David Miller
  Cc: dave.taht, nanditad, netdev, codel, ycheng, shemminger, mattmathis

On Sat, 2012-05-12 at 15:52 -0400, David Miller wrote:

> Applied but I never like that bitfield sharing for real integers.
> 
> GCC makes a complete mess of it as it extracts and inserts the
> integer value into that bit field.  You are guarenteed to get
> better code if you do this by hand in a full u32.
> 
> Either that or just bite the bullet and use a completely seperate
> field, maybe we'll need more boolean states later.

I couldnt use a full u32 or else fq_codel cell was > 64 bytes (or I
would have to remove the 'dropped' field)

24 bit of precision for the reciprocal value is more than enough (Van
suggested 16 bits in fact), so we have actually room for 7 bits if
needed.

By the way, gcc on x86 generates nice "and 0xfffffffe,%eax" instruction
for (vars->rec_inv_sqrt << 1).

Thanks



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-12 20:40   ` Eric Dumazet
@ 2012-05-12 20:45     ` David Miller
  2012-05-12 21:48       ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: David Miller @ 2012-05-12 20:45 UTC (permalink / raw)
  To: eric.dumazet
  Cc: dave.taht, nanditad, netdev, codel, ycheng, shemminger, mattmathis

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 12 May 2012 22:40:56 +0200

> 24 bit of precision for the reciprocal value is more than enough (Van
> suggested 16 bits in fact), so we have actually room for 7 bits if
> needed.

Using a u16 would also work for me.

> By the way, gcc on x86 generates nice "and 0xfffffffe,%eax" instruction
> for (vars->rec_inv_sqrt << 1).

Yeah but what do stores of ->rec_inv_sqrt look like?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-12 20:45     ` David Miller
@ 2012-05-12 21:48       ` Eric Dumazet
  2012-05-12 21:52         ` David Miller
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2012-05-12 21:48 UTC (permalink / raw)
  To: David Miller
  Cc: dave.taht, nanditad, netdev, codel, ycheng, shemminger, mattmathis

On Sat, 2012-05-12 at 16:45 -0400, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Sat, 12 May 2012 22:40:56 +0200
> 
> > 24 bit of precision for the reciprocal value is more than enough (Van
> > suggested 16 bits in fact), so we have actually room for 7 bits if
> > needed.
> 
> Using a u16 would also work for me.

I tried it but it gives noticeable errors for count > 16000, and no
speed gain.


count=16525 scale=0 sqrt(scaled_count)=2056 reciprocal(sq)=1fe0200
Newton=235
  interval/sqrt(16525) = 
	777909 (float compute)
	778210 (integer approx)
	777926 (int_sqrt_div)
	862121 (Newton approx)

And if a flow is really agressive, count can grow above 10^6

> 
> > By the way, gcc on x86 generates nice "and 0xfffffffe,%eax" instruction
> > for (vars->rec_inv_sqrt << 1).
> 
> Yeah but what do stores of ->rec_inv_sqrt look like?

The load is "shr %edi" as in :
and the store an "or %ecx,%esi"

 5f2:	8b 72 08             	mov    0x8(%rdx),%esi
 5f5:	44 8b 02             	mov    (%rdx),%r8d
 5f8:	89 f7                	mov    %esi,%edi
 5fa:	41 83 c0 01          	add    $0x1,%r8d    vars->count + 1
 5fe:	83 e6 01             	and    $0x1,%esi    vars->dropping in esi

 601:	d1 ef                	shr    %edi
 603:	44 89 02             	mov    %r8d,(%rdx)  vars->count++;
 606:	45 89 c0             	mov    %r8d,%r8d
 609:	89 ff                	mov    %edi,%edi
 60b:	48 89 f9             	mov    %rdi,%rcx
 60e:	48 0f af cf          	imul   %rdi,%rcx

 612:	48 c1 e9 1f          	shr    $0x1f,%rcx
 616:	49 0f af c8          	imul   %r8,%rcx
 61a:	49 b8 00 00 00 80 01 	mov    $0x180000000,%r8
 621:	00 00 00 
 624:	49 29 c8             	sub    %rcx,%r8
 627:	4c 89 c1             	mov    %r8,%rcx
 62a:	48 0f af cf          	imul   %rdi,%rcx
 62e:	48 c1 e9 20          	shr    $0x20,%rcx
 632:	01 c9                	add    %ecx,%ecx
 634:	09 ce                	or     %ecx,%esi        combine the two fields
 636:	89 72 08             	mov    %esi,0x8(%rdx)   final store

 
Using 24bits generates roughly same code. (constants are different)




^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-12 21:48       ` Eric Dumazet
@ 2012-05-12 21:52         ` David Miller
  2012-05-13  7:23           ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: David Miller @ 2012-05-12 21:52 UTC (permalink / raw)
  To: eric.dumazet
  Cc: dave.taht, nanditad, netdev, codel, ycheng, shemminger, mattmathis

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 12 May 2012 23:48:44 +0200

> On Sat, 2012-05-12 at 16:45 -0400, David Miller wrote:
>> Using a u16 would also work for me.
> 
> I tried it but it gives noticeable errors for count > 16000, and no
> speed gain.
 ...
> And if a flow is really agressive, count can grow above 10^6
> 
>> > By the way, gcc on x86 generates nice "and 0xfffffffe,%eax" instruction
>> > for (vars->rec_inv_sqrt << 1).
>> 
>> Yeah but what do stores of ->rec_inv_sqrt look like?
> 
> The load is "shr %edi" as in :
> and the store an "or %ecx,%esi"

Ok, fair enough.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-12 21:52         ` David Miller
@ 2012-05-13  7:23           ` Eric Dumazet
  2012-05-14  5:46             ` Andrew McGregor
  2012-05-14 22:33             ` David Miller
  0 siblings, 2 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-05-13  7:23 UTC (permalink / raw)
  To: David Miller
  Cc: dave.taht, nanditad, netdev, codel, ycheng, shemminger, mattmathis

From: Eric Dumazet <edumazet@google.com>

On Sat, 2012-05-12 at 17:52 -0400, David Miller wrote:

> Ok, fair enough.

Oh well, I sent my mail too late. The error made no sense after a good
night. Also, when Van says something, you can be fairly sure its right,
and if it's not, then you didn't understand what Van said ;)

16bit precision is OK, once the maths are correctly done in the userland
program I wrote yesterday...

count=16525, precision=16 bits, sqrt(scaled_count)=4113, reciprocal(sq)=1fde240, Newton=1fd0000
  interval/sqrt(16525) = 
	777909 (float compute)  // (u32)(interval/sqrt(count))
	778020 (integer approx) // reciprocal_divide(interval, rec)
	777926 (int_sqrt_div)   // int_sqrt_div(interval, count)
	776672 (Newton approx)  // reciprocal_divide(interval, previnv << shift)

count=9889134, precision=16 bits, sqrt(scaled_count)=50315,
reciprocal(sq)=14d720, Newton=140000
  interval/sqrt(9889134) = 
	31799 (float compute)
	31799 (integer approx)
	31799 (int_sqrt_div)
	30517 (Newton approx)


And kernel code using u16 :

 6a1:	0f b7 72 0a          	movzwl 0xa(%rdx),%esi
 6a5:	8b 3a                	mov    (%rdx),%edi
 6a7:	83 c7 01             	add    $0x1,%edi
 6aa:	c1 e6 10             	shl    $0x10,%esi
 6ad:	89 3a                	mov    %edi,(%rdx)  vars->count++
 6af:	89 ff                	mov    %edi,%edi
 6b1:	89 f6                	mov    %esi,%esi
 6b3:	48 89 f1             	mov    %rsi,%rcx
 6b6:	48 0f af ce          	imul   %rsi,%rcx
 6ba:	48 c1 e9 20          	shr    $0x20,%rcx
 6be:	48 0f af cf          	imul   %rdi,%rcx
 6c2:	48 bf 00 00 00 00 03 	mov    $0x300000000,%rdi
 6c9:	00 00 00 
 6cc:	48 29 cf             	sub    %rcx,%rdi
 6cf:	48 89 f9             	mov    %rdi,%rcx
 6d2:	48 c1 e9 02          	shr    $0x2,%rcx
 6d6:	48 0f af ce          	imul   %rsi,%rcx
 6da:	48 c1 e9 2f          	shr    $0x2f,%rcx
 6de:	66 89 4a 0a          	mov    %cx,0xa(%rdx)


Fell free to add following cleanup patch, if you like it ;)

Thanks

[PATCH net-next] codel: use u16 field instead of 31bits for rec_inv_sqrt

David pointed out gcc might generate poor code with 31bit fields.

Using u16 is more than enough and permits a better code output.

Also make the code intent more readable using constants, fixed point arithmetic
not being trivial for everybody.

Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/codel.h |   25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/include/net/codel.h b/include/net/codel.h
index bd8747c..7546517 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -133,13 +133,17 @@ struct codel_params {
 struct codel_vars {
 	u32		count;
 	u32		lastcount;
-	bool		dropping:1;
-	u32		rec_inv_sqrt:31;
+	bool		dropping;
+	u16		rec_inv_sqrt;
 	codel_time_t	first_above_time;
 	codel_time_t	drop_next;
 	codel_time_t	ldelay;
 };
 
+#define REC_INV_SQRT_BITS (8 * sizeof(u16)) /* or sizeof_in_bits(rec_inv_sqrt) */
+/* needed shift to get a Q0.32 number from rec_inv_sqrt */
+#define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS)
+
 /**
  * struct codel_stats - contains codel shared variables and stats
  * @maxpacket:	largest packet we've seen so far
@@ -173,17 +177,18 @@ static void codel_stats_init(struct codel_stats *stats)
  * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots
  * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
  *
- * Here, invsqrt is a fixed point number (< 1.0), 31bit mantissa)
+ * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
  */
 static void codel_Newton_step(struct codel_vars *vars)
 {
-	u32 invsqrt = vars->rec_inv_sqrt;
-	u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 31;
-	u64 val = (3LL << 31) - ((u64)vars->count * invsqrt2);
+	u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT;
+	u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
+	u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2);
 
-	val = (val * invsqrt) >> 32;
+	val >>= 2; /* avoid overflow in following multiply */
+	val = (val * invsqrt) >> (32 - 2 + 1);
 
-	vars->rec_inv_sqrt = val;
+	vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT;
 }
 
 /*
@@ -195,7 +200,7 @@ static codel_time_t codel_control_law(codel_time_t t,
 				      codel_time_t interval,
 				      u32 rec_inv_sqrt)
 {
-	return t + reciprocal_divide(interval, rec_inv_sqrt << 1);
+	return t + reciprocal_divide(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT);
 }
 
 
@@ -326,7 +331,7 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch,
 			codel_Newton_step(vars);
 		} else {
 			vars->count = 1;
-			vars->rec_inv_sqrt = 0x7fffffff;
+			vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
 		}
 		vars->lastcount = vars->count;
 		vars->drop_next = codel_control_law(now, params->interval,



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-13  7:23           ` Eric Dumazet
@ 2012-05-14  5:46             ` Andrew McGregor
  2012-05-14  6:00               ` dave taht
  2012-05-14 22:33             ` David Miller
  1 sibling, 1 reply; 21+ messages in thread
From: Andrew McGregor @ 2012-05-14  5:46 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Nandita Dukkipati, Matt Mathis, codel, ycheng, Dave Taht


[-- Attachment #1.1: Type: text/plain, Size: 869 bytes --]

(netdev and linux developers removed from the cc: list)

Here's what I believe to be a matching patch for ns-3.

Unlike the first version, this should compile even if ns-3 has not been built first.

It includes a bunch of trace sources, allowing graphing of sojourn times and queue occupancy, for example.

Attach a CoDelQueue to a point-to-point interface (or anything that supports the SetQueue API) like this (C++ version, Python should be obvious):

      bottleneckchannel.SetQueue ("ns3::CoDelQueue",
                                  "Interval", StringValue(CoDelInterval),
                                  "Target", StringValue(CoDelTarget)
                                  );

As an example, a trace path for bytesInQueue will look something like "/NodeList/*/DeviceList/*/$ns3::PointToPointNetDevice/TxQueue/$ns3::CoDelQueue/bytesInQueue"


[-- Attachment #1.2: ns-3-codel.patch --]
[-- Type: application/octet-stream, Size: 18580 bytes --]

diff -r 2a669a0c452e -r a00e9e71bb24 src/network/utils/codel-queue.cc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/network/utils/codel-queue.cc	Mon May 14 17:35:24 2012 +1200
@@ -0,0 +1,476 @@
+/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
+/*
+ * Copyright (c) 2012 Andrew McGregor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * 
+ * Codel, the COntrolled DELay Queueing discipline
+ * Based on ns2 simulation code presented by Kathie Nichols
+ *
+ * This port based on linux kernel code by
+ * Authors:	Dave Täht <d@taht.net>
+ *		Eric Dumazet <edumazet@google.com>
+ *
+ * Ported to ns-3 by: Andrew McGregor <andrewmcgr@gmail.com>
+*/
+
+#include "ns3/log.h"
+#include "ns3/enum.h"
+#include "ns3/uinteger.h"
+#include "ns3/abort.h"
+#include "codel-queue.h"
+
+NS_LOG_COMPONENT_DEFINE ("CoDelQueue");
+
+namespace ns3 {
+
+#define BITS_PER_LONG (8 * sizeof (unsigned long))
+
+/* borrowed from the linux kernel */
+#define do_div(n,base)						\
+({								\
+	int __res;						\
+	__res = ((unsigned long)n) % (unsigned int)base;	\
+	n = ((unsigned long)n) / (unsigned int)base;		\
+	__res;							\
+})
+
+static inline uint32_t reciprocal_divide(uint32_t A, uint32_t R)
+{
+	return (uint32_t)(((uint64_t)A * R) >> 32);
+}
+
+/* end kernel borrowings */
+
+static codel_time_t codel_get_time(void)
+{
+  Time time = Simulator::Now ();
+  uint64_t ns = time.GetNanoSeconds ();
+
+  return ns >> CODEL_SHIFT;
+}
+
+#define codel_time_after(a, b)	 ((int)(a) - (int)(b) > 0)
+#define codel_time_after_eq(a, b) ((int)(a) - (int)(b) >= 0)
+#define codel_time_before(a, b)	 ((int)(a) - (int)(b) < 0)
+#define codel_time_before_eq(a, b) ((int)(a) - (int)(b) <= 0)
+
+#define NSEC_PER_MSEC 1000000
+#define NSEC_PER_USEC 1000
+#define MS2TIME(a) ((a * NSEC_PER_MSEC) >> CODEL_SHIFT)
+#define US2TIME(a) ((a * NSEC_PER_USEC) >> CODEL_SHIFT)
+#define NS2TIME(a) ((a) >> CODEL_SHIFT)
+#define TIME2CODEL(a) NS2TIME(a.GetNanoSeconds())
+
+#define DEFAULT_CODEL_LIMIT 1000
+
+
+class CoDelTimestampTag : public Tag
+{
+public:
+  CoDelTimestampTag ();
+  static TypeId GetTypeId (void);
+  virtual TypeId GetInstanceTypeId (void) const;
+
+  virtual uint32_t GetSerializedSize (void) const;
+  virtual void Serialize (TagBuffer i) const;
+  virtual void Deserialize (TagBuffer i);
+  virtual void Print (std::ostream &os) const;
+
+  Time GetTxTime (void) const;
+private:
+  uint64_t m_creationTime;
+};
+
+CoDelTimestampTag::CoDelTimestampTag ()
+  : m_creationTime (Simulator::Now ().GetTimeStep ())
+{
+}
+
+TypeId
+CoDelTimestampTag::GetTypeId (void)
+{
+  static TypeId tid = TypeId ("anon::CoDelTimestampTag")
+    .SetParent<Tag> ()
+    .AddConstructor<CoDelTimestampTag> ()
+    .AddAttribute ("CreationTime",
+                   "The time at which the timestamp was created",
+                   StringValue ("0.0s"),
+                   MakeTimeAccessor (&CoDelTimestampTag::GetTxTime),
+                   MakeTimeChecker ())
+  ;
+  return tid;
+}
+TypeId
+CoDelTimestampTag::GetInstanceTypeId (void) const
+{
+  return GetTypeId ();
+}
+
+uint32_t
+CoDelTimestampTag::GetSerializedSize (void) const
+{
+  return 8;
+}
+void
+CoDelTimestampTag::Serialize (TagBuffer i) const
+{
+  i.WriteU64 (m_creationTime);
+}
+void
+CoDelTimestampTag::Deserialize (TagBuffer i)
+{
+  m_creationTime = i.ReadU64 ();
+}
+void
+CoDelTimestampTag::Print (std::ostream &os) const
+{
+  os << "CreationTime=" << m_creationTime;
+}
+Time
+CoDelTimestampTag::GetTxTime (void) const
+{
+  return TimeStep (m_creationTime);
+}
+
+NS_OBJECT_ENSURE_REGISTERED (CoDelQueue);
+
+TypeId CoDelQueue::GetTypeId (void) 
+{
+  static TypeId tid = TypeId ("ns3::CoDelQueue")
+    .SetParent<Queue> ()
+    .AddConstructor<CoDelQueue> ()
+    .AddAttribute ("Mode", 
+                   "Whether to use Bytes (see MaxBytes) or Packets (see MaxPackets) as the maximum queue size metric.",
+                   EnumValue (PACKETS),
+                   MakeEnumAccessor (&CoDelQueue::SetMode),
+                   MakeEnumChecker (BYTES, "Bytes",
+                                    PACKETS, "Packets"))
+    .AddAttribute ("MaxPackets", 
+                   "The maximum number of packets accepted by this CoDelQueue.",
+                   UintegerValue (DEFAULT_CODEL_LIMIT),
+                   MakeUintegerAccessor (&CoDelQueue::m_maxPackets),
+                   MakeUintegerChecker<uint32_t> ())
+    .AddAttribute ("MaxBytes", 
+                   "The maximum number of bytes accepted by this CoDelQueue.",
+                   UintegerValue (1500*DEFAULT_CODEL_LIMIT),
+                   MakeUintegerAccessor (&CoDelQueue::m_maxBytes),
+                   MakeUintegerChecker<uint32_t> ())
+    .AddAttribute ("MinBytes", 
+                   "The CoDel algorithm minbytes parameter.",
+                   UintegerValue (1500),
+                   MakeUintegerAccessor (&CoDelQueue::m_minbytes),
+                   MakeUintegerChecker<uint32_t> ())
+    .AddAttribute ("Interval",
+                   "The CoDel algorithm interval",
+                   StringValue ("100ms"),
+                   MakeTimeAccessor (&CoDelQueue::m_Interval),
+                   MakeTimeChecker ())
+    .AddAttribute ("Target",
+                   "The CoDel algorithm target queue delay",
+                   StringValue ("5ms"),
+                   MakeTimeAccessor (&CoDelQueue::m_Target),
+                   MakeTimeChecker ())
+    .AddTraceSource("count",
+                    "CoDel count",
+                    MakeTraceSourceAccessor(&CoDelQueue::m_count))
+    .AddTraceSource("drop_count",
+                    "CoDel drop count",
+                    MakeTraceSourceAccessor(&CoDelQueue::m_drop_count))
+    .AddTraceSource("bytesInQueue",
+                    "Number of bytes in the queue",
+                    MakeTraceSourceAccessor(&CoDelQueue::m_bytesInQueue))
+  ;
+
+  return tid;
+}
+
+CoDelQueue::CoDelQueue () :
+  Queue (),
+  m_packets (),
+  m_maxBytes(),
+  m_bytesInQueue(0),
+  m_count(0),
+  m_drop_count(0),
+  m_dropping(false),
+  m_rec_inv_sqrt(~0U >> REC_INV_SQRT_SHIFT),
+  m_first_above_time(0),
+  m_drop_next(0),
+  m_state1(0),
+  m_state2(0),
+  m_state3(0),
+  m_states(0),
+  m_drop_overlimit(0)  
+{
+  NS_LOG_FUNCTION_NOARGS ();
+}
+
+CoDelQueue::~CoDelQueue ()
+{
+  NS_LOG_FUNCTION_NOARGS ();
+}
+
+void
+CoDelQueue::NewtonStep(void)
+{
+  uint32_t invsqrt = ((uint32_t) m_rec_inv_sqrt) << REC_INV_SQRT_SHIFT;
+  uint32_t invsqrt2 = ((uint64_t) invsqrt*invsqrt) >> 32;
+  uint64_t val = (3ll<<32) - ((uint64_t) m_count * invsqrt2);
+
+  val >>= 2; /* avoid overflow */
+  val = (val * invsqrt) >> (32-2+1);
+  m_rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT;
+}
+
+codel_time_t 
+CoDelQueue::ControlLaw(codel_time_t t)
+{
+  return t + reciprocal_divide(TIME2CODEL(m_Interval), m_rec_inv_sqrt << REC_INV_SQRT_SHIFT);
+}
+
+void
+CoDelQueue::SetMode (enum Mode mode)
+{
+  NS_LOG_FUNCTION (mode);
+  m_mode = mode;
+}
+
+CoDelQueue::Mode
+CoDelQueue::GetMode (void)
+{
+  NS_LOG_FUNCTION_NOARGS ();
+  return m_mode;
+}
+
+bool 
+CoDelQueue::DoEnqueue (Ptr<Packet> p)
+{
+  NS_LOG_FUNCTION (this << p);
+
+  if (m_mode == PACKETS && (m_packets.size () >= m_maxPackets))
+    {
+      NS_LOG_LOGIC ("Queue full (at max packets) -- droppping pkt");
+      Drop (p);
+      ++m_drop_overlimit;
+      return false;
+    }
+
+  if (m_mode == BYTES && (m_bytesInQueue + p->GetSize () >= m_maxBytes))
+    {
+      NS_LOG_LOGIC ("Queue full (packet would exceed max bytes) -- droppping pkt");
+      Drop (p);
+      ++m_drop_overlimit;
+      return false;
+    }
+
+  CoDelTimestampTag tag;
+  p->AddByteTag (tag);
+  m_bytesInQueue += p->GetSize ();
+  m_packets.push (p);
+
+  NS_LOG_LOGIC ("Number packets " << m_packets.size ());
+  NS_LOG_LOGIC ("Number bytes " << m_bytesInQueue);
+
+  return true;
+}
+
+bool
+CoDelQueue::ShouldDrop(Ptr<Packet> p, codel_time_t now)
+{
+  CoDelTimestampTag tag;
+  bool found;
+  bool drop;
+  found = p->FindFirstMatchingByteTag (tag);
+  Time delta = Simulator::Now () - tag.GetTxTime ();
+  NS_LOG_INFO ("Sojourn time "<<delta.GetSeconds ());
+  codel_time_t sojourn_time = TIME2CODEL(delta);
+  
+  if (codel_time_before(sojourn_time, TIME2CODEL(m_Target)) || 
+      m_bytesInQueue < m_minbytes)
+    {
+      /* went below so we'll stay below for at least q->interval */
+      m_first_above_time = 0;
+      return false;
+    }
+  drop = false;
+  if (m_first_above_time == 0) 
+    {
+      /* just went above from below. If we stay above
+       * for at least q->interval we'll say it's ok to drop
+       */
+      m_first_above_time = now + TIME2CODEL(m_Interval);
+    } 
+  else 
+    if (codel_time_after(now, m_first_above_time)) 
+      {
+        drop = true;
+        ++m_state1;
+      }
+  return drop;
+}
+
+Ptr<Packet>
+CoDelQueue::DoDequeue (void)
+{
+  NS_LOG_FUNCTION (this);
+
+  if (m_packets.empty ())
+    {
+      m_dropping = false;
+      m_first_above_time = 0;
+      NS_LOG_LOGIC ("Queue empty");
+      return 0;
+    }
+  codel_time_t now = codel_get_time();
+  Ptr<Packet> p = m_packets.front ();
+  m_packets.pop ();
+  m_bytesInQueue -= p->GetSize ();
+
+  NS_LOG_LOGIC ("Popped " << p);
+  NS_LOG_LOGIC ("Number packets " << m_packets.size ());
+  NS_LOG_LOGIC ("Number bytes " << m_bytesInQueue);
+
+  bool drop = ShouldDrop(p, now);
+  if (m_dropping)
+    {
+      if (!drop)
+        {
+          /* sojourn time below target - leave dropping state */    
+          m_dropping = false;
+        }
+      else
+        if (codel_time_after_eq(now, m_drop_next))
+          {
+            m_state2++;
+            /* It's time for the next drop. Drop the current
+             * packet and dequeue the next. The dequeue might 
+             * take us out of dropping state. 
+             * If not, schedule the next drop.
+             * A large backlog might result in drop rates so high
+             * that the next drop should happen now, 
+             * hence the while loop.
+             */  
+            while (m_dropping && 
+                   codel_time_after_eq(now, m_drop_next)) {
+              Drop(p);
+              ++m_drop_count;
+              ++m_count;
+              NewtonStep();
+              if (m_packets.empty ())
+                {
+                  m_dropping = false;
+                  NS_LOG_LOGIC ("Queue empty");
+                  ++m_states;
+                  return 0;
+                }
+              p = m_packets.front ();
+              m_packets.pop ();
+              m_bytesInQueue -= p->GetSize ();
+
+              NS_LOG_LOGIC ("Popped " << p);
+              NS_LOG_LOGIC ("Number packets " << m_packets.size ());
+              NS_LOG_LOGIC ("Number bytes " << m_bytesInQueue);
+
+              if (!ShouldDrop(p, now)) 
+                {
+                  /* leave dropping state */
+                  m_dropping = false;
+                }
+              else 
+                {
+                  /* and schedule the next drop */
+                  m_drop_next = ControlLaw(m_drop_next);
+                }
+            }
+          }
+    } 
+  else 
+    if (drop &&
+        (codel_time_before(now - m_drop_next,
+                           TIME2CODEL(m_Interval)) ||
+         codel_time_after_eq(now - m_first_above_time,
+                             TIME2CODEL(m_Interval)))) 
+      {
+        Drop(p);
+        ++m_drop_count;
+
+        NS_LOG_LOGIC ("Popped " << p);
+        NS_LOG_LOGIC ("Number packets " << m_packets.size ());
+        NS_LOG_LOGIC ("Number bytes " << m_bytesInQueue);
+
+        drop = ShouldDrop(p, now);
+        m_dropping = true;
+        ++m_state3;
+        /* 
+         * if min went above target close to when we last went below it
+         * assume that the drop rate that controlled the queue on the
+         * last cycle is a good starting point to control it now.
+         */
+        if (codel_time_after(now - m_drop_next, TIME2CODEL(m_Interval))) 
+          {
+            //uint32_t c = m_count - 2;
+            //m_count = std::max(1U, c);
+            m_count = m_count>2U? (uint32_t)m_count-2U:1U;
+            NewtonStep();
+          } 
+        else
+          {
+            m_count = 1;
+            m_rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
+          }
+        m_drop_next = ControlLaw(now);
+      }
+  ++m_states;
+  return p;
+}
+
+uint32_t
+CoDelQueue::GetQueueSize (void)
+{
+  NS_LOG_FUNCTION_NOARGS ();
+  if (GetMode () == BYTES)
+    {
+      return m_bytesInQueue;
+    }
+  else if (GetMode () == PACKETS)
+    {
+      return m_packets.size ();
+    }
+  else
+    {
+      NS_ABORT_MSG ("Unknown mode.");
+    }
+}
+
+Ptr<const Packet>
+CoDelQueue::DoPeek (void) const
+{
+  NS_LOG_FUNCTION (this);
+
+  if (m_packets.empty ())
+    {
+      NS_LOG_LOGIC ("Queue empty");
+      return 0;
+    }
+
+  Ptr<Packet> p = m_packets.front ();
+
+  NS_LOG_LOGIC ("Number packets " << m_packets.size ());
+  NS_LOG_LOGIC ("Number bytes " << m_bytesInQueue);
+
+  return p;
+}
+
+} // namespace ns3
+
diff -r 2a669a0c452e -r a00e9e71bb24 src/network/utils/codel-queue.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/network/utils/codel-queue.h	Mon May 14 17:35:24 2012 +1200
@@ -0,0 +1,126 @@
+/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
+/*
+ * Copyright (c) 2012 Andrew McGregor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * 
+ * Codel, the COntrolled DELay Queueing discipline
+ * Based on ns2 simulation code presented by Kathie Nichols
+ *
+ * This port based on linux kernel code by
+ * Authors:	Dave Täht <d@taht.net>
+ *		Eric Dumazet <edumazet@google.com>
+ *
+ * Ported to ns-3 by: Andrew McGregor <andrewmcgr@gmail.com>
+ */
+
+#ifndef CODEL_H
+#define CODEL_H
+
+#include <queue>
+#include "ns3/packet.h"
+#include "ns3/queue.h"
+#include "ns3/nstime.h"
+#include "ns3/simulator.h"
+#include "ns3/string.h"
+#include "ns3/traced-value.h"
+#include "ns3/trace-source-accessor.h"
+
+namespace ns3 {
+
+typedef uint32_t codel_time_t;
+typedef uint16_t rec_inv_sqrt_t;
+
+#define CODEL_SHIFT 10
+#define REC_INV_SQRT_BITS (8*sizeof(rec_inv_sqrt_t))
+#define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS)
+
+class TraceContainer;
+
+/**
+ * \ingroup queue
+ *
+ * \brief A FIFO packet queue that drops tail-end packets on overflow
+ */
+class CoDelQueue : public Queue {
+public:
+  static TypeId GetTypeId (void);
+  /**
+   * \brief CoDelQueue Constructor
+   *
+   * Creates a codel queue with a maximum size of 100 packets by default
+   */
+  CoDelQueue ();
+
+  virtual ~CoDelQueue();
+
+  /**
+   * Enumeration of the modes supported in the class.
+   *
+   */
+  enum Mode {
+    ILLEGAL,     /**< Mode not set */
+    PACKETS,     /**< Use number of packets for maximum queue size */
+    BYTES,       /**< Use number of bytes for maximum queue size */
+  };
+
+  /**
+   * Set the operating mode of this device.
+   *
+   * \param mode The operating mode of this device.
+   *
+   */
+  void SetMode (CoDelQueue::Mode mode);
+
+  /**
+   * Get the encapsulation mode of this device.
+   *
+   * \returns The encapsulation mode of this device.
+   */
+  CoDelQueue::Mode  GetMode (void);
+
+  uint32_t GetQueueSize (void);
+
+private:
+  virtual bool DoEnqueue (Ptr<Packet> p);
+  virtual Ptr<Packet> DoDequeue (void);
+  virtual Ptr<const Packet> DoPeek (void) const;
+  void NewtonStep(void);
+  codel_time_t ControlLaw(codel_time_t t);
+  bool ShouldDrop(Ptr<Packet> p, codel_time_t now);
+
+  std::queue<Ptr<Packet> > m_packets;
+  uint32_t m_maxPackets;
+  uint32_t m_maxBytes;
+  TracedValue<uint32_t> m_bytesInQueue;
+  uint32_t m_minbytes;
+  Time m_Interval;
+  Time m_Target;
+  TracedValue<uint32_t> m_count;
+  TracedValue<uint32_t> m_drop_count;
+  bool m_dropping;
+  uint16_t m_rec_inv_sqrt;
+  codel_time_t m_first_above_time;
+  codel_time_t m_drop_next;
+  uint32_t m_state1;
+  uint32_t m_state2;
+  uint32_t m_state3;
+  uint32_t m_states;
+  uint32_t m_drop_overlimit;
+  Mode     m_mode;
+};
+
+} // namespace ns3
+
+#endif /* CODEL_H */
diff -r 2a669a0c452e -r a00e9e71bb24 src/network/wscript
--- a/src/network/wscript	Sun Apr 08 23:03:34 2012 -0700
+++ b/src/network/wscript	Mon May 14 17:35:24 2012 +1200
@@ -24,6 +24,7 @@
         'model/tag-buffer.cc',
         'model/trailer.cc',
 	'utils/address-utils.cc',
+        'utils/codel-queue.cc',
         'utils/data-rate.cc',
         'utils/drop-tail-queue.cc',
         'utils/error-model.cc',
@@ -93,6 +94,7 @@
         'model/tag-buffer.h',
         'model/trailer.h',
       	'utils/address-utils.h',
+        'utils/codel-queue.h',
         'utils/data-rate.h',
         'utils/drop-tail-queue.h',
         'utils/error-model.h',

[-- Attachment #1.3: Type: text/plain, Size: 5414 bytes --]



On 13/05/2012, at 7:23 PM, Eric Dumazet wrote:

> From: Eric Dumazet <edumazet@google.com>
> 
> On Sat, 2012-05-12 at 17:52 -0400, David Miller wrote:
> 
>> Ok, fair enough.
> 
> Oh well, I sent my mail too late. The error made no sense after a good
> night. Also, when Van says something, you can be fairly sure its right,
> and if it's not, then you didn't understand what Van said ;)
> 
> 16bit precision is OK, once the maths are correctly done in the userland
> program I wrote yesterday...
> 
> count=16525, precision=16 bits, sqrt(scaled_count)=4113, reciprocal(sq)=1fde240, Newton=1fd0000
>  interval/sqrt(16525) = 
> 	777909 (float compute)  // (u32)(interval/sqrt(count))
> 	778020 (integer approx) // reciprocal_divide(interval, rec)
> 	777926 (int_sqrt_div)   // int_sqrt_div(interval, count)
> 	776672 (Newton approx)  // reciprocal_divide(interval, previnv << shift)
> 
> count=9889134, precision=16 bits, sqrt(scaled_count)=50315,
> reciprocal(sq)=14d720, Newton=140000
>  interval/sqrt(9889134) = 
> 	31799 (float compute)
> 	31799 (integer approx)
> 	31799 (int_sqrt_div)
> 	30517 (Newton approx)
> 
> 
> And kernel code using u16 :
> 
> 6a1:	0f b7 72 0a          	movzwl 0xa(%rdx),%esi
> 6a5:	8b 3a                	mov    (%rdx),%edi
> 6a7:	83 c7 01             	add    $0x1,%edi
> 6aa:	c1 e6 10             	shl    $0x10,%esi
> 6ad:	89 3a                	mov    %edi,(%rdx)  vars->count++
> 6af:	89 ff                	mov    %edi,%edi
> 6b1:	89 f6                	mov    %esi,%esi
> 6b3:	48 89 f1             	mov    %rsi,%rcx
> 6b6:	48 0f af ce          	imul   %rsi,%rcx
> 6ba:	48 c1 e9 20          	shr    $0x20,%rcx
> 6be:	48 0f af cf          	imul   %rdi,%rcx
> 6c2:	48 bf 00 00 00 00 03 	mov    $0x300000000,%rdi
> 6c9:	00 00 00 
> 6cc:	48 29 cf             	sub    %rcx,%rdi
> 6cf:	48 89 f9             	mov    %rdi,%rcx
> 6d2:	48 c1 e9 02          	shr    $0x2,%rcx
> 6d6:	48 0f af ce          	imul   %rsi,%rcx
> 6da:	48 c1 e9 2f          	shr    $0x2f,%rcx
> 6de:	66 89 4a 0a          	mov    %cx,0xa(%rdx)
> 
> 
> Fell free to add following cleanup patch, if you like it ;)
> 
> Thanks
> 
> [PATCH net-next] codel: use u16 field instead of 31bits for rec_inv_sqrt
> 
> David pointed out gcc might generate poor code with 31bit fields.
> 
> Using u16 is more than enough and permits a better code output.
> 
> Also make the code intent more readable using constants, fixed point arithmetic
> not being trivial for everybody.
> 
> Suggested-by: David Miller <davem@davemloft.net>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
> include/net/codel.h |   25 +++++++++++++++----------
> 1 file changed, 15 insertions(+), 10 deletions(-)
> 
> diff --git a/include/net/codel.h b/include/net/codel.h
> index bd8747c..7546517 100644
> --- a/include/net/codel.h
> +++ b/include/net/codel.h
> @@ -133,13 +133,17 @@ struct codel_params {
> struct codel_vars {
> 	u32		count;
> 	u32		lastcount;
> -	bool		dropping:1;
> -	u32		rec_inv_sqrt:31;
> +	bool		dropping;
> +	u16		rec_inv_sqrt;
> 	codel_time_t	first_above_time;
> 	codel_time_t	drop_next;
> 	codel_time_t	ldelay;
> };
> 
> +#define REC_INV_SQRT_BITS (8 * sizeof(u16)) /* or sizeof_in_bits(rec_inv_sqrt) */
> +/* needed shift to get a Q0.32 number from rec_inv_sqrt */
> +#define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS)
> +
> /**
>  * struct codel_stats - contains codel shared variables and stats
>  * @maxpacket:	largest packet we've seen so far
> @@ -173,17 +177,18 @@ static void codel_stats_init(struct codel_stats *stats)
>  * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots
>  * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
>  *
> - * Here, invsqrt is a fixed point number (< 1.0), 31bit mantissa)
> + * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
>  */
> static void codel_Newton_step(struct codel_vars *vars)
> {
> -	u32 invsqrt = vars->rec_inv_sqrt;
> -	u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 31;
> -	u64 val = (3LL << 31) - ((u64)vars->count * invsqrt2);
> +	u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT;
> +	u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
> +	u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2);
> 
> -	val = (val * invsqrt) >> 32;
> +	val >>= 2; /* avoid overflow in following multiply */
> +	val = (val * invsqrt) >> (32 - 2 + 1);
> 
> -	vars->rec_inv_sqrt = val;
> +	vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT;
> }
> 
> /*
> @@ -195,7 +200,7 @@ static codel_time_t codel_control_law(codel_time_t t,
> 				      codel_time_t interval,
> 				      u32 rec_inv_sqrt)
> {
> -	return t + reciprocal_divide(interval, rec_inv_sqrt << 1);
> +	return t + reciprocal_divide(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT);
> }
> 
> 
> @@ -326,7 +331,7 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch,
> 			codel_Newton_step(vars);
> 		} else {
> 			vars->count = 1;
> -			vars->rec_inv_sqrt = 0x7fffffff;
> +			vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
> 		}
> 		vars->lastcount = vars->count;
> 		vars->drop_next = codel_control_law(now, params->interval,
> 
> 
> _______________________________________________
> Codel mailing list
> Codel@lists.bufferbloat.net
> https://lists.bufferbloat.net/listinfo/codel


[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 2330 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-14  5:46             ` Andrew McGregor
@ 2012-05-14  6:00               ` dave taht
  2012-05-14  6:17                 ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: dave taht @ 2012-05-14  6:00 UTC (permalink / raw)
  To: codel

I am sitting here with the latest fq_codel implementation on cerowrt, and
this new 16 bit change.

With either interface, running netserver on the router, I get 
~260Mbit/sec out of it,
with fq_codel on. It's quite marvelous, as ping times stay in the 2-3ms 
range...

Running traffic through the router with two fq_codels is another story.

I get only 91.6Mbit out of it (and the cpu on the router is loafing). I 
was rather puzzled as
this is sooclose to 100Mbit as for me to assume that I had goofed and 
was running
at line rate, but no, I'm at gigE on both sides.

A) Scheduler granularity? HZ_256 and NO_HZ here. Not sure of the clock res
B) The new error in the new sqrt routine actually significant enough to 
show up

I will try a 20ms target and revert to the previous fuller precision 
version. I wish I had data on the previous version but was battling an 
entirely different bug, then.

Has anyone hooked up a chain of codel ns3 sims to each other?

Or a few gigE boxes through each other?

> _______________________________________________
> Codel mailing list
> Codel@lists.bufferbloat.net
> https://lists.bufferbloat.net/listinfo/codel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-14  6:00               ` dave taht
@ 2012-05-14  6:17                 ` Eric Dumazet
  2012-05-14  6:33                   ` dave taht
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2012-05-14  6:17 UTC (permalink / raw)
  To: dave taht; +Cc: codel

On Sun, 2012-05-13 at 23:00 -0700, dave taht wrote:
> I am sitting here with the latest fq_codel implementation on cerowrt, and
> this new 16 bit change.
> 
> With either interface, running netserver on the router, I get 
> ~260Mbit/sec out of it,
> with fq_codel on. It's quite marvelous, as ping times stay in the 2-3ms 
> range...
> 

If you disable TSO and GSO, do you still get good numbers ?

> Running traffic through the router with two fq_codels is another story.
> 
> I get only 91.6Mbit out of it (and the cpu on the router is loafing). I 
> was rather puzzled as
> this is sooclose to 100Mbit as for me to assume that I had goofed and 
> was running
> at line rate, but no, I'm at gigE on both sides.
> 
> A) Scheduler granularity? HZ_256 and NO_HZ here. Not sure of the clock res

fq_codel has no HZ dependencies. (it uses ktime_get())

But are you using fq_codel alone (as the root qdisc), or as leaves in a
more complex tree ?

> B) The new error in the new sqrt routine actually significant enough to 
> show up
> 
> I will try a 20ms target and revert to the previous fuller precision 
> version. I wish I had data on the previous version but was battling an 
> entirely different bug, then.
> 
> Has anyone hooked up a chain of codel ns3 sims to each other?
> 
> Or a few gigE boxes through each other?



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-14  6:17                 ` Eric Dumazet
@ 2012-05-14  6:33                   ` dave taht
  2012-05-14  6:47                     ` dave taht
  0 siblings, 1 reply; 21+ messages in thread
From: dave taht @ 2012-05-14  6:33 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: codel

On 05/13/2012 11:17 PM, Eric Dumazet wrote:
> On Sun, 2012-05-13 at 23:00 -0700, dave taht wrote:
>> I am sitting here with the latest fq_codel implementation on cerowrt, and
>> this new 16 bit change.
>>
>> With either interface, running netserver on the router, I get
>> ~260Mbit/sec out of it,
>> with fq_codel on. It's quite marvelous, as ping times stay in the 2-3ms
>> range...
>>
> If you disable TSO and GSO, do you still get good numbers ?
>
>> Running traffic through the router with two fq_codels is another story.
>>
>> I get only 91.6Mbit out of it (and the cpu on the router is loafing). I
>> was rather puzzled as
>> this is sooclose to 100Mbit as for me to assume that I had goofed and
>> was running
>> at line rate, but no, I'm at gigE on both sides.
>>
>> A) Scheduler granularity? HZ_256 and NO_HZ here. Not sure of the clock res

I rebooted the whole lab...

(it's very satisfying to do that, watching all the lights blinking)

Even more satisifying was getting back to where I had ~240Mbit through 
fq_codel
on both sides. (and it didn't crash due to the other bug I'd been battling)

I'm thinking I'd ended up with a ifb lying around, or something getting 
routed through wireless...

> fq_codel has no HZ dependencies. (it uses ktime_get())
>

> But are you using fq_codel alone (as the root qdisc), or as leaves in a
> more complex tree ?

I HAD been using htb and ifb in front of codel. I thought I'd deleted it 
entirely... I had rebooted the router, but something else in the chain 
was funky... (installed net-next on the test servers to
make sure, too)

Anyway:

root@codel:~# QMODEL=fq_codel QDEBUG=1 IFACE=ge00 /usr/sbin/debloat
(this script disables tso/gso/etc)

qdisc del dev ge00 root
qdisc add dev ge00 handle a root fq_codel

root@codel:~# tc -s qdisc show dev se00
qdisc fq_codel a: root refcnt 2 limit 10240p flows 1024 quantum 1514 
target 5.0ms interval 100.0ms ecn
  Sent 3639976192 bytes 2436533 pkt (dropped 0, overlimits 0 requeues 
77594)
  backlog 240534b 161p requeues 77594
   maxpacket 1494 drop_overlimit 0 new_flow_count 373 ecn_mark 367
   new_flows_len 0 old_flows_len 4


>
>> B) The new error in the new sqrt routine actually significant enough to
>> show up
So never mind.......
>> I will try a 20ms target and revert to the previous fuller precision
>> version. I wish I had data on the previous version but was battling an
>> entirely different bug, then.

never mind. next time before emailing I'll hit the big red lab button.

>> Has anyone hooked up a chain of codel ns3 sims to each other?
>>
>> Or a few gigE boxes through each other?

This is still an interesting question, though, as ecn is not well 
understood...

>


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-14  6:33                   ` dave taht
@ 2012-05-14  6:47                     ` dave taht
  2012-05-14  6:51                       ` Roger Jørgensen
  0 siblings, 1 reply; 21+ messages in thread
From: dave taht @ 2012-05-14  6:47 UTC (permalink / raw)
  To: Eric Dumazet, codel

On 05/13/2012 11:33 PM, dave taht wrote:
> On 05/13/2012 11:17 PM, Eric Dumazet wrote:
>> On Sun, 2012-05-13 at 23:00 -0700, dave taht wrote:
>>> I am sitting here with the latest fq_codel implementation on 
>>> cerowrt, and
>>> this new 16 bit change.
>>>
>>> With either interface, running netserver on the router, I get
>>> ~260Mbit/sec out of it,
>>> with fq_codel on. It's quite marvelous, as ping times stay in the 2-3ms
>>> range...
>>>
>> If you disable TSO and GSO, do you still get good numbers ?

This is the first time I've had these puppies not crash in a month under 
workloads like this.

*Everything* I have has TSO and GSO off by default. That stuff has cause 
me no end of grief.

But for you...

I just fired up a long duration ipv6 test with TSO/GSO on the driving 
host, at 1Gbit, with fq_codel on all sides... with  10 TCP_MAERTS, 10 
TCP_STREAMS, over pure IPv6

and am running test for 6000 seconds.  and I'm going to dinner. :)

At the moment I see ping times 1.553 avg, 5.999 max, I can do a CDF plot 
of the next run.

Actually I think ipv4 fragmentation is still busted in cero, but I'm 
getting ther....

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-14  6:47                     ` dave taht
@ 2012-05-14  6:51                       ` Roger Jørgensen
  2012-05-14  8:23                         ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Roger Jørgensen @ 2012-05-14  6:51 UTC (permalink / raw)
  To: dave taht; +Cc: codel

On Mon, May 14, 2012 at 8:47 AM, dave taht <dave.taht@gmail.com> wrote:
> On 05/13/2012 11:33 PM, dave taht wrote:
>> On 05/13/2012 11:17 PM, Eric Dumazet wrote:
>>> On Sun, 2012-05-13 at 23:00 -0700, dave taht wrote:
>>>>
>>>> I am sitting here with the latest fq_codel implementation on cerowrt,
>>>> and
>>>> this new 16 bit change.
>>>>
>>>> With either interface, running netserver on the router, I get
>>>> ~260Mbit/sec out of it,
>>>> with fq_codel on. It's quite marvelous, as ping times stay in the 2-3ms
>>>> range...
>>>>
>>> If you disable TSO and GSO, do you still get good numbers ?
>
>
> This is the first time I've had these puppies not crash in a month under
> workloads like this.
>
> *Everything* I have has TSO and GSO off by default. That stuff has cause me
> no end of grief.
>
> But for you...
>
> I just fired up a long duration ipv6 test with TSO/GSO on the driving host,
> at 1Gbit, with fq_codel on all sides... with  10 TCP_MAERTS, 10 TCP_STREAMS,
> over pure IPv6
>
> and am running test for 6000 seconds.  and I'm going to dinner. :)
>
> At the moment I see ping times 1.553 avg, 5.999 max, I can do a CDF plot of
> the next run.
>
> Actually I think ipv4 fragmentation is still busted in cero, but I'm getting
> ther....

could you pass me the script for setting that up, got nowhere with any
of the previous... will try the same on some 10GigE servers over v6...



-- 

Roger Jorgensen           | ROJO9-RIPE
rogerj@gmail.com          | - IPv6 is The Key!
http://www.jorgensen.no   | roger@jorgensen.no

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-14  6:51                       ` Roger Jørgensen
@ 2012-05-14  8:23                         ` Eric Dumazet
  2012-05-14  8:50                           ` dave taht
  2012-05-14 11:34                           ` Roger Jørgensen
  0 siblings, 2 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-05-14  8:23 UTC (permalink / raw)
  To: Roger Jørgensen; +Cc: codel

On Mon, 2012-05-14 at 08:51 +0200, Roger Jørgensen wrote:

> could you pass me the script for setting that up, got nowhere with any
> of the previous... will try the same on some 10GigE servers over v6...

If multi-queue, you probably could use :

EST="est 1sec 4sec"
TXQUEUES=24 

DEV=eth7
# ethtool -K $DEV tso off gso off
tc qdisc del dev $DEV root 2>/dev/null
tc qdisc add dev $DEV root handle 1: mq

for i in `seq 1 $TXQUEUES`
do
 hexa=$(printf %02x $i)
 tc qdisc add dev $DEV parent 1:$hexa fq_codel
done





^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-14  8:23                         ` Eric Dumazet
@ 2012-05-14  8:50                           ` dave taht
  2012-05-14  9:03                             ` Eric Dumazet
  2012-05-14 11:34                           ` Roger Jørgensen
  1 sibling, 1 reply; 21+ messages in thread
From: dave taht @ 2012-05-14  8:50 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: codel

On 05/14/2012 01:23 AM, Eric Dumazet wrote:
> On Mon, 2012-05-14 at 08:51 +0200, Roger Jørgensen wrote:
>
>> could you pass me the script for setting that up, got nowhere with any
>> of the previous... will try the same on some 10GigE servers over v6...
> If multi-queue, you probably could use :
>
> EST="est 1sec 4sec"
> TXQUEUES=24
>
> DEV=eth7
> # ethtool -K $DEV tso off gso off
> tc qdisc del dev $DEV root 2>/dev/null
> tc qdisc add dev $DEV root handle 1: mq
>
> for i in `seq 1 $TXQUEUES`
> do
>   hexa=$(printf %02x $i)
>   tc qdisc add dev $DEV parent 1:$hexa fq_codel
> done
>
>
>
>
Eric:

I have been seeing you use target 500us on your 10GigE testing, I think.

That seems like a good idea...

Secondly don't you need some sort of filter to direct stuff to each of 
those mq queues?


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-14  8:50                           ` dave taht
@ 2012-05-14  9:03                             ` Eric Dumazet
  0 siblings, 0 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-05-14  9:03 UTC (permalink / raw)
  To: dave taht; +Cc: codel

On Mon, 2012-05-14 at 01:50 -0700, dave taht wrote:

> Eric:
> 
> I have been seeing you use target 500us on your 10GigE testing, I think.
> 


yes, I use " target 500us interval 10ms"

> That seems like a good idea...
> 
> Secondly don't you need some sort of filter to direct stuff to each of 
> those mq queues?
> 

No, its done automatically by the stack, to spread load coming from
various sources to different queues.

Of course, all packets of a given tcp flow should land on a particular
txqueue (or it could trigger OOO packets)





^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-14  8:23                         ` Eric Dumazet
  2012-05-14  8:50                           ` dave taht
@ 2012-05-14 11:34                           ` Roger Jørgensen
  2012-05-14 11:56                             ` Eric Dumazet
  2012-05-14 15:05                             ` Dave Taht
  1 sibling, 2 replies; 21+ messages in thread
From: Roger Jørgensen @ 2012-05-14 11:34 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: codel

On Mon, May 14, 2012 at 10:23 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Mon, 2012-05-14 at 08:51 +0200, Roger Jørgensen wrote:
>
>> could you pass me the script for setting that up, got nowhere with any
>> of the previous... will try the same on some 10GigE servers over v6...
>
> If multi-queue, you probably could use :
>
> EST="est 1sec 4sec"
> TXQUEUES=24
>
> DEV=eth7
> # ethtool -K $DEV tso off gso off
> tc qdisc del dev $DEV root 2>/dev/null
> tc qdisc add dev $DEV root handle 1: mq
>
> for i in `seq 1 $TXQUEUES`
> do
>  hexa=$(printf %02x $i)
>  tc qdisc add dev $DEV parent 1:$hexa fq_codel
> done

I did it much simpler, with nothing changed I get  2.42Gbits/sec
through my range of VMs.


                         rtt min    avg     max     mdev    bw test showing
plain                  1,037      2,327  5,123   0,558    2,4Gbps
TBF                   1,529      2,87   8,275   0,547    1,5Gbps
fq_codel +ecn     0,907     1,82    5,214   0,547    1,4Gbps
fq_codel noec     0,905     1,913   4,95    0,586   1,4Gbps



regular ping is like this then

--- 2a00:d740:110:8000::a ping statistics ---
500 packets transmitted, 500 received, 0% packet loss, time 499742ms
rtt min/avg/max/mdev = 1.037/2.327/5.123/0.558 ms



when I add fq_codel to the mix:
root@codel-core:~# tc  qdisc show
...
qdisc fq_codel 804b: dev eth1 root refcnt 2 limit 5p flows 5 quantum
1514 target 499us interval 5.0ms ecn
qdisc pfifo_fast 0: dev eth2 root refcnt 2 bands 3 priomap  1 2 2 2 1
2 0 0 1 1 1 1 1 1 1 1


--- 2a00:d740:110:8000::a ping statistics ---
500 packets transmitted, 500 received, 0% packet loss, time 499770ms
rtt min/avg/max/mdev = 0.907/1.820/5.214/0.547 ms


fq_codel without ecn
root@codel-core:~# tc  qdisc show
...
qdisc fq_codel 804c: dev eth1 root refcnt 2 limit 5p flows 5 quantum
1514 target 499us interval 5.0ms
qdisc pfifo_fast 0: dev eth2 root refcnt 2 bands 3 priomap  1 2 2 2 1
2 0 0 1 1 1 1 1 1 1 1

--- 2a00:d740:110:8000::a ping statistics ---
500 packets transmitted, 500 received, 0% packet loss, time 499763ms
rtt min/avg/max/mdev = 0.905/1.913/4.950/0.586 ms





using tbf,
root@codel-core:~# tc qdisc show
...
qdisc tbf 804a: dev eth1 root refcnt 2 rate 1560Mbit burst 20280b lat 5.0ms
qdisc pfifo_fast 0: dev eth2 root refcnt 2 bands 3 priomap  1 2 2 2 1 2 0

--- 2a00:d740:110:8000::a ping statistics ---
500 packets transmitted, 500 received, 0% packet loss, time 499746ms
rtt min/avg/max/mdev = 1.529/2.879/7.275/0.547 ms



-- 

Roger Jorgensen           | ROJO9-RIPE
rogerj@gmail.com          | - IPv6 is The Key!
http://www.jorgensen.no   | roger@jorgensen.no

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-14 11:34                           ` Roger Jørgensen
@ 2012-05-14 11:56                             ` Eric Dumazet
  2012-05-14 15:05                             ` Dave Taht
  1 sibling, 0 replies; 21+ messages in thread
From: Eric Dumazet @ 2012-05-14 11:56 UTC (permalink / raw)
  To: Roger Jørgensen; +Cc: codel

On Mon, 2012-05-14 at 13:34 +0200, Roger Jørgensen wrote:
> On Mon, May 14, 2012 at 10:23 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > On Mon, 2012-05-14 at 08:51 +0200, Roger Jørgensen wrote:
> >
> >> could you pass me the script for setting that up, got nowhere with any
> >> of the previous... will try the same on some 10GigE servers over v6...
> >
> > If multi-queue, you probably could use :
> >
> > EST="est 1sec 4sec"
> > TXQUEUES=24
> >
> > DEV=eth7
> > # ethtool -K $DEV tso off gso off
> > tc qdisc del dev $DEV root 2>/dev/null
> > tc qdisc add dev $DEV root handle 1: mq
> >
> > for i in `seq 1 $TXQUEUES`
> > do
> >  hexa=$(printf %02x $i)
> >  tc qdisc add dev $DEV parent 1:$hexa fq_codel
> > done
> 
> I did it much simpler, with nothing changed I get  2.42Gbits/sec
> through my range of VMs.



Not sure it means anything with VM, since the 'queue' is probably empty
on the guest (packet is sent to hypervisor, and hypervisor send it to
real hardware later)

You can check stats on qdisc/class to see if _some_ packets stayed in
queue a bit...

tc -s qdisc show dev eth1

tc -s class show dev eth1




^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-14 11:34                           ` Roger Jørgensen
  2012-05-14 11:56                             ` Eric Dumazet
@ 2012-05-14 15:05                             ` Dave Taht
  2012-05-14 18:31                               ` Roger Jørgensen
  1 sibling, 1 reply; 21+ messages in thread
From: Dave Taht @ 2012-05-14 15:05 UTC (permalink / raw)
  To: Roger Jørgensen; +Cc: codel

On Mon, May 14, 2012 at 4:34 AM, Roger Jørgensen <rogerj@gmail.com> wrote:
> On Mon, May 14, 2012 at 10:23 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> On Mon, 2012-05-14 at 08:51 +0200, Roger Jørgensen wrote:
>>
>>> could you pass me the script for setting that up, got nowhere with any
>>> of the previous... will try the same on some 10GigE servers over v6...
>>
>> If multi-queue, you probably could use :
>>
>> EST="est 1sec 4sec"
>> TXQUEUES=24
>>
>> DEV=eth7
>> # ethtool -K $DEV tso off gso off
>> tc qdisc del dev $DEV root 2>/dev/null
>> tc qdisc add dev $DEV root handle 1: mq
>>
>> for i in `seq 1 $TXQUEUES`
>> do
>>  hexa=$(printf %02x $i)
>>  tc qdisc add dev $DEV parent 1:$hexa fq_codel
>> done
>
> I did it much simpler, with nothing changed I get  2.42Gbits/sec
> through my range of VMs.
>
>
>                         rtt min    avg     max     mdev    bw test showing
> plain                  1,037      2,327  5,123   0,558    2,4Gbps
> TBF                   1,529      2,87   8,275   0,547    1,5Gbps
> fq_codel +ecn     0,907     1,82    5,214   0,547    1,4Gbps
> fq_codel noec     0,905     1,913   4,95    0,586   1,4Gbps


I find your results mildly puzzling. certainly your max rtt in the no
ecn case is pleasing,
but your max throughput in all three AQM cases seems to be rather
severely capped.

Secondly,
I note that there was an infinite cwr bug related to ecn recently
fixed in the kernel.



>
>
> regular ping is like this then
>
> --- 2a00:d740:110:8000::a ping statistics ---
> 500 packets transmitted, 500 received, 0% packet loss, time 499742ms
> rtt min/avg/max/mdev = 1.037/2.327/5.123/0.558 ms
>
>
>
> when I add fq_codel to the mix:
> root@codel-core:~# tc  qdisc show
> ...
> qdisc fq_codel 804b: dev eth1 root refcnt 2 limit 5p flows 5 quantum
> 1514 target 499us interval 5.0ms ecn
> qdisc pfifo_fast 0: dev eth2 root refcnt 2 bands 3 priomap  1 2 2 2 1
> 2 0 0 1 1 1 1 1 1 1 1
>
>
> --- 2a00:d740:110:8000::a ping statistics ---
> 500 packets transmitted, 500 received, 0% packet loss, time 499770ms
> rtt min/avg/max/mdev = 0.907/1.820/5.214/0.547 ms
>
>
> fq_codel without ecn
> root@codel-core:~# tc  qdisc show
> ...
> qdisc fq_codel 804c: dev eth1 root refcnt 2 limit 5p flows 5 quantum
> 1514 target 499us interval 5.0ms
> qdisc pfifo_fast 0: dev eth2 root refcnt 2 bands 3 priomap  1 2 2 2 1
> 2 0 0 1 1 1 1 1 1 1 1
>
> --- 2a00:d740:110:8000::a ping statistics ---
> 500 packets transmitted, 500 received, 0% packet loss, time 499763ms
> rtt min/avg/max/mdev = 0.905/1.913/4.950/0.586 ms
>
>
>
>
>
> using tbf,
> root@codel-core:~# tc qdisc show
> ...
> qdisc tbf 804a: dev eth1 root refcnt 2 rate 1560Mbit burst 20280b lat 5.0ms
> qdisc pfifo_fast 0: dev eth2 root refcnt 2 bands 3 priomap  1 2 2 2 1 2 0
>
> --- 2a00:d740:110:8000::a ping statistics ---
> 500 packets transmitted, 500 received, 0% packet loss, time 499746ms
> rtt min/avg/max/mdev = 1.529/2.879/7.275/0.547 ms
>
>
>
> --
>
> Roger Jorgensen           | ROJO9-RIPE
> rogerj@gmail.com          | - IPv6 is The Key!
> http://www.jorgensen.no   | roger@jorgensen.no



-- 
Dave Täht
SKYPE: davetaht
US Tel: 1-239-829-5608
http://www.bufferbloat.net

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-14 15:05                             ` Dave Taht
@ 2012-05-14 18:31                               ` Roger Jørgensen
  0 siblings, 0 replies; 21+ messages in thread
From: Roger Jørgensen @ 2012-05-14 18:31 UTC (permalink / raw)
  To: Dave Taht; +Cc: codel

On Mon, May 14, 2012 at 5:05 PM, Dave Taht <dave.taht@gmail.com> wrote:
> On Mon, May 14, 2012 at 4:34 AM, Roger Jørgensen <rogerj@gmail.com> wrote:
<snip>

>>                         rtt min    avg     max     mdev    bw test showing
>> plain                  1,037      2,327  5,123   0,558    2,4Gbps
>> TBF                   1,529      2,87   8,275   0,547    1,5Gbps
>> fq_codel +ecn     0,907     1,82    5,214   0,547    1,4Gbps
>> fq_codel noec     0,905     1,913   4,95    0,586   1,4Gbps
>
>
> I find your results mildly puzzling. certainly your max rtt in the no
> ecn case is pleasing,
> but your max throughput in all three AQM cases seems to be rather
> severely capped.

I did try to capp the bandwidth,  (that's why the values for fq_codel
is as they are), the idea was to limit the bw and see how the pingtime
vary with that. Turns out that bw capping give better pingtime than no
capping :-)

and since I can see how it work I'll try to run it on a linux box
acting as gateway for a wireless guestnetwork to see it under real
load :-)



<snip>

-- 

Roger Jorgensen           | ROJO9-RIPE
rogerj@gmail.com          | - IPv6 is The Key!
http://www.jorgensen.no   | roger@jorgensen.no

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides
  2012-05-13  7:23           ` Eric Dumazet
  2012-05-14  5:46             ` Andrew McGregor
@ 2012-05-14 22:33             ` David Miller
  1 sibling, 0 replies; 21+ messages in thread
From: David Miller @ 2012-05-14 22:33 UTC (permalink / raw)
  To: eric.dumazet
  Cc: dave.taht, nanditad, netdev, codel, ycheng, shemminger, mattmathis

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 13 May 2012 09:23:23 +0200

> Fell free to add following cleanup patch, if you like it ;)
 ...
> [PATCH net-next] codel: use u16 field instead of 31bits for rec_inv_sqrt

I do, applied :-)

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2012-05-14 22:35 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-12 13:32 [Codel] [PATCH net-next] codel: use Newton method instead of sqrt() and divides Eric Dumazet
2012-05-12 19:52 ` David Miller
2012-05-12 20:40   ` Eric Dumazet
2012-05-12 20:45     ` David Miller
2012-05-12 21:48       ` Eric Dumazet
2012-05-12 21:52         ` David Miller
2012-05-13  7:23           ` Eric Dumazet
2012-05-14  5:46             ` Andrew McGregor
2012-05-14  6:00               ` dave taht
2012-05-14  6:17                 ` Eric Dumazet
2012-05-14  6:33                   ` dave taht
2012-05-14  6:47                     ` dave taht
2012-05-14  6:51                       ` Roger Jørgensen
2012-05-14  8:23                         ` Eric Dumazet
2012-05-14  8:50                           ` dave taht
2012-05-14  9:03                             ` Eric Dumazet
2012-05-14 11:34                           ` Roger Jørgensen
2012-05-14 11:56                             ` Eric Dumazet
2012-05-14 15:05                             ` Dave Taht
2012-05-14 18:31                               ` Roger Jørgensen
2012-05-14 22:33             ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox