From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from sonic303-22.consmr.mail.ne1.yahoo.com (sonic303-22.consmr.mail.ne1.yahoo.com [66.163.188.148]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by lists.bufferbloat.net (Postfix) with ESMTPS id 4CE1D3B29E for ; Mon, 13 Nov 2017 20:51:17 -0500 (EST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=yahoo.com; s=s2048; t=1510624276; bh=fm10HuzzKL8jpHDRjgyhXl/wvk/b+xOV1iM1YuWVNUI=; h=Subject:From:To:References:Date:In-Reply-To:From:Subject; b=GxEps6gokytgyA2LnTZCExDPtjDXhLxJHn8VfFUCraSo1p9yIdoFKgVCFY37NwebIShWqmCK9THIUaZDLw5zheslfqHTYZphVWbeI1crxeX9yTZ+7DOUPOzvMI9NWJ+rMDCfonqE9eJuk+Z1UaUptoNpZ+B5R1qnzkWQyupObTT6k9NbS8vxV5Ao9KXjx1iviNLj9sB41wx5AVYvm9oxEmGQPuzb+BT1ggxWsZfTLMlrAa1suBEmXXH7HnIZGR7MCCEflj4RlY4vTH12TF38ByIsfpCZcrn8o6oVdE/1DRGmoW+t/wqDOvLxuz9vi3JdEeSMxRWr+b5LKmoAsXQSVg== X-YMail-OSG: Uv3rXVgVM1nF1JCff4IFV8pT47ZjCf7.MjVKNMr10sHwC8EAvsu4nczYT5HGcb0 aJ7r4twMIp6T05K1YNzxikvjy4sU51szkqT068TyIo4p1AYvqYeX8Hhg7oC.mkKuisGookf.h49l 2NYbM95NZW49Q0nyfj1.LD95Z3Uc02AWrsvZQFV3IEAF48SkfLqQpxFYFLCuGwzz9voyIHBun7M3 zLRGReLwqFT5vgW_MDu6S2yYZf3c1k18xIlKyvLeEb1gi5FfV7sjc13tY35BDlCPxNK4HsYji2fq Je91JUUNdi_XBBQ3iW3l6ZMSqVIfnk7Y4YEqCQsXnpDIrI9GzZegQY9y1N8zDGh6rpQf0V2_wRAW 33BXihVFOHK0d_A1t_pGd32_7pLk0_cWn7PPYGwY8ZD8sPhx7h1EU8SFFCUFATfSVoyOQxh2d7Gu x4hgMlWXwIkGzqVT8fc._UQp52Rc4vm72VbxVhIqVxU_UxeDM5au6JODaYMkgGmnCeZh9PfeXl2. WP1zzynNWTS9ZKG3AnMRzw.XeAwvShvlQf7XIbVypLAflDt3qx04uIvLDRDOZBrifSmRW5cUw4pK C Received: from sonic.gate.mail.ne1.yahoo.com by sonic303.consmr.mail.ne1.yahoo.com with HTTP; Tue, 14 Nov 2017 01:51:16 +0000 Received: from [127.0.0.1] by smtp219.mail.ne1.yahoo.com with NNFMP; 14 Nov 2017 01:51:16 -0000 X-Yahoo-Newman-Id: 206176.92137.bm@smtp219.mail.ne1.yahoo.com X-Yahoo-Newman-Property: ymail-3 X-YMail-OSG: Uv3rXVgVM1nF1JCff4IFV8pT47ZjCf7.MjVKNMr10sHwC8E Avsu4nczYT5HGcb0aJ7r4twMIp6T05K1YNzxikvjy4sU51szkqT068TyIo4p 1AYvqYeX8Hhg7oC.mkKuisGookf.h49l2NYbM95NZW49Q0nyfj1.LD95Z3Uc 02AWrsvZQFV3IEAF48SkfLqQpxFYFLCuGwzz9voyIHBun7M3zLRGReLwqFT5 vgW_MDu6S2yYZf3c1k18xIlKyvLeEb1gi5FfV7sjc13tY35BDlCPxNK4HsYj i2fqJe91JUUNdi_XBBQ3iW3l6ZMSqVIfnk7Y4YEqCQsXnpDIrI9GzZegQY9y 1N8zDGh6rpQf0V2_wRAW33BXihVFOHK0d_A1t_pGd32_7pLk0_cWn7PPYGwY 8ZD8sPhx7h1EU8SFFCUFATfSVoyOQxh2d7Gux4hgMlWXwIkGzqVT8fc._UQp 52Rc4vm72VbxVhIqVxU_UxeDM5au6JODaYMkgGmnCeZh9PfeXl2.WP1zzynN WTS9ZKG3AnMRzw.XeAwvShvlQf7XIbVypLAflDt3qx04uIvLDRDOZBrifSmR W5cUw4pKC X-Yahoo-SMTP: 6sUo5IiswBDB2TZm6JKJ6DaI.Rsz4O0- Subject: Re: [Cake] total download rate with many flows From: George Amanakis To: Jonathan Morton , cake@lists.bufferbloat.net References: <87po92aku1.fsf@nemesis.taht.net> <87efph4q0y.fsf@nemesis.taht.net> <87vaihkgr9.fsf@nemesis.taht.net> <3cfb7852-ac1f-9274-a10e-2d8c6981922f@yahoo.com> Message-ID: <80d2c766-bde4-d46e-7b7a-0a24c8ef78c2@yahoo.com> Date: Mon, 13 Nov 2017 20:51:15 -0500 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Thunderbird/52.4.0 MIME-Version: 1.0 In-Reply-To: <3cfb7852-ac1f-9274-a10e-2d8c6981922f@yahoo.com> Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 8bit Content-Language: en-US X-List-Received-Date: Tue, 14 Nov 2017 01:51:17 -0000 I am exploring this idea further. If q->time_next_packet is incremented for dropped packets proportionally to (1-1/x), where x is the count of all flows in the tin that is being served, ingress mode works much more smoothly: latency is still <50ms and throughput is very near to the set limit. I *tried* to make a patch from latest cobalt. =============8<============= diff --git a/sch_cake.c b/sch_cake.c index 82f264f..752783a 100644 --- a/sch_cake.c +++ b/sch_cake.c @@ -145,6 +145,7 @@ struct cake_flow {         struct list_head  flowchain;         s32               deficit;         struct cobalt_vars cvars; +       struct cobalt_vars cvars2;         u16               srchost; /* index into cake_host table */         u16               dsthost;         u8                set; @@ -254,6 +255,7 @@ struct cake_sched_data {         u32             avg_window_bytes;         u32             avg_peak_bandwidth;         u64             last_reconfig_time; +       u32             drop_len;  };  enum { @@ -820,7 +822,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)         sch->qstats.drops++;         if(q->rate_flags & CAKE_FLAG_INGRESS) -               cake_advance_shaper(q, b, cake_overhead(q, len), now); +               q->drop_len += len;  #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)         kfree_skb(skb); @@ -1274,7 +1276,9 @@ retry:                 /* drop this packet, get another one */                 if(q->rate_flags & CAKE_FLAG_INGRESS) {                         len = cake_overhead(q, qdisc_pkt_len(skb)); -                       cake_advance_shaper(q, b, len, now); +                       flow->cvars2.count = b->bulk_flow_count+b->sparse_flow_count+b->decaying_flow_count+b->unresponsive_flow_count; +                       cobalt_invsqrt(&(flow->cvars2)); +                       q->drop_len += (len - reciprocal_scale(len, flow->cvars2.rec_inv_sqrt));                         flow->deficit -= len;                         b->tin_deficit -= len;                 } @@ -1286,8 +1290,6 @@ retry:                 qdisc_qstats_drop(sch);                 kfree_skb(skb);  #endif -               if(q->rate_flags & CAKE_FLAG_INGRESS) -                       goto retry;         }         b->tin_ecn_mark += !!flow->cvars.ecn_marked; @@ -1340,7 +1342,7 @@ static void cake_advance_shaper(struct cake_sched_data *q, struct cake_tin_data         if(q->rate_ns) {                 s64 tdiff1 = b->tin_time_next_packet - now;                 s64 tdiff2 = (len * (u64)b->tin_rate_ns) >> b->tin_rate_shft; -               s64 tdiff3 = (len * (u64)q->rate_ns) >> q->rate_shft; +               s64 tdiff3 = ((q->drop_len + len) * (u64)q->rate_ns) >> q->rate_shft;                 if(tdiff1 < 0)                         b->tin_time_next_packet += tdiff2; @@ -1348,6 +1350,7 @@ static void cake_advance_shaper(struct cake_sched_data *q, struct cake_tin_data                         b->tin_time_next_packet = now + tdiff2;                 q->time_next_packet += tdiff3; +               q->drop_len = 0;         }  } @@ -1711,6 +1714,7 @@ static void cake_reconfigure(struct Qdisc *sch)  {         struct cake_sched_data *q = qdisc_priv(sch);         int c, ft; +       q->drop_len=0;         switch (q->tin_mode) {         case CAKE_MODE_BESTEFFORT: @@ -1941,6 +1945,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt)                         INIT_LIST_HEAD(&flow->flowchain);                         cobalt_vars_init(&flow->cvars); +                       cobalt_vars_init(&flow->cvars2);                         q->overflow_heap[k].t = i;                         q->overflow_heap[k].b = j; =============8<============= On 11/11/2017 10:48 PM, George Amanakis wrote: > I totally understand what you are saying. However, I believe cake's > egress and ingress modes currently behave as two extremes. One could > argue that neither of them is the golden mean. With a patch in ingress > mode (see below) and a single host using 32 flows to download I > managed to increase throughput from ~7Mbps to ~10Mbps (configured > limit 12200kbps) while latency increased from ~10ms to ~50ms, which > would still be acceptable. As a comparison egress mode in the same > setup gives me throughput ~11.5Mbps and latency ~500ms. > > I would like to hear your thoughts about this idea: the patch is > incrementing q->time_next_packet for dropped packets differently than > for passed-through ones. Please focus on the idea, not the actual > implementation :) (also pasted in https://pastebin.com/SZ14WiYw) > > =============8<============= > > diff --git a/sch_cake.c b/sch_cake.c > index 82f264f..a3a4a88 100644 > --- a/sch_cake.c > +++ b/sch_cake.c > @@ -769,6 +769,7 @@ static void cake_heapify_up(struct cake_sched_data > *q, u16 i) >  } > >  static void cake_advance_shaper(struct cake_sched_data *q, struct > cake_tin_data *b, u32 len, u64 now); > +static void cake_advance_shaper2(struct cake_sched_data *q, struct > cake_tin_data *b, u32 len, u64 now); > >  #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) >  static unsigned int cake_drop(struct Qdisc *sch) > @@ -1274,7 +1275,7 @@ retry: >                 /* drop this packet, get another one */ >                 if(q->rate_flags & CAKE_FLAG_INGRESS) { >                         len = cake_overhead(q, qdisc_pkt_len(skb)); > -                       cake_advance_shaper(q, b, len, now); > +                       cake_advance_shaper2(q, b, len, now); >                         flow->deficit -= len; >                         b->tin_deficit -= len; >                 } > @@ -1286,8 +1287,6 @@ retry: >                 qdisc_qstats_drop(sch); >                 kfree_skb(skb); >  #endif > -               if(q->rate_flags & CAKE_FLAG_INGRESS) > -                       goto retry; >         } > >         b->tin_ecn_mark += !!flow->cvars.ecn_marked; > @@ -1351,6 +1350,24 @@ static void cake_advance_shaper(struct > cake_sched_data *q, struct cake_tin_data >         } >  } > > +static void cake_advance_shaper2(struct cake_sched_data *q, struct > cake_tin_data *b, u32 len, u64 now) > +{ > +       /* charge packet bandwidth to this tin, lower tins, > +        * and to the global shaper. > +        */ > +       if(q->rate_ns) { > +               s64 tdiff1 = b->tin_time_next_packet - now; > +               s64 tdiff2 = (len * (u64)b->tin_rate_ns) >> > b->tin_rate_shft; > +               s64 tdiff3 = (len * (u64)q->rate_ns) >> q->rate_shft; > + > +               if(tdiff1 < 0) > +                       b->tin_time_next_packet += tdiff2; > +               else if(tdiff1 < tdiff2) > +                       b->tin_time_next_packet = now + tdiff2; > + > +               q->time_next_packet += (tdiff3*27)>>5; > +       } > +} >  static void cake_reset(struct Qdisc *sch) >  { >         u32 c; > > =============8<============= > > On 11/10/2017 4:50 PM, Jonathan Morton wrote: >> >> In fact, that's why I put a failsafe into ingress mode, so that it >> would never stall completely.  It can happen, however, that >> throughput is significantly reduced when the drop rate is high. >> >> If throughput is more important to you than induced latency, switch >> to egress mode. >> >> Unfortunately it's not possible to guarantee both low latency and >> high throughput when operating downstream of the bottleneck link.  >> ECN gives you better results, though. >> >> - Jonathan Morton >> >