Cake - FQ_codel the next generation
 help / color / mirror / Atom feed
From: Georgios Amanakis <gamanakis@gmail.com>
To: Cake List <cake@lists.bufferbloat.net>
Subject: Re: [Cake] profiling using perf
Date: Fri, 8 Mar 2019 16:01:20 -0500	[thread overview]
Message-ID: <CACvFP_hF5BMTrVFNxLZqeVmyLUHbFcDvMxbxmKfO=_T6zqF+GQ@mail.gmail.com> (raw)
In-Reply-To: <CACvFP_jwSorufZDagNiuMdmG+ZRcQi7fPUDNrzQEFCauz0cPEw@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 423 bytes --]

Dear List,

I made an effort to profile the performance of cake with perf in
openwrt. perf was run on a WRT1900ACS router while downloading
archlinux.iso via torrent in a LAN client. You can find the annotated
sch_cake.c in the attachment as well as a performance histogram of
sch_cake (percentages are relative to sch_cake). Hopefully people can
take a look at it, and see if there are performance concerns.

Best,
George

[-- Attachment #2: cake_dequeue.annotation --]
[-- Type: application/octet-stream, Size: 85930 bytes --]

cake_dequeue() /lib/modules/4.14.104/sch_cake.ko
Event: cycles:ppp

Percent        
               
               
              Disassembly of section .text:
               
              0000230c <cake_dequeue>:
              cake_dequeue():
                              while (!!(skb = cake_dequeue_one(sch)))
                                      kfree_skb(skb);
              }           
               
              static struct sk_buff *cake_dequeue(struct Qdisc *sch)
              {           
  0.83          push   {r4, r5, r6, r7, r8, r9, sl, fp, lr}
                      struct cake_sched_data *q = qdisc_priv(sch);
                      struct cake_tin_data *b = &q->tins[q->cur_tin];
  0.09          add    r1, r0, #16640  ; 0x4100
              {           
  0.01          sub    sp, sp, #100    ; 0x64
                      struct cake_tin_data *b = &q->tins[q->cur_tin];
  0.13          ldr    r2, [r0, #264]  ; 0x108
                movw   r3, #22720      ; 0x58c0
  0.49          movt   r3, #1   
  0.02          str    r1, [sp, #56]   ; 0x38
  0.10          ldrh   r1, [r1, #96]   ; 0x60
              {           
                str    r0, [sp, #52]   ; 0x34
              qdisc_priv():
              #define QDISC_ALIGNTO           64
              #define QDISC_ALIGN(len)        (((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1))
               
              static inline void *qdisc_priv(struct Qdisc *q)
              {           
                      return (char *) q + QDISC_ALIGN(sizeof(struct Qdisc));
  0.15          add    r0, r0, #256    ; 0x100
  0.07          str    r0, [sp, #72]   ; 0x48
              cobalt_invsqrt():
                              vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
  0.10          movw   r0, #0   
              cake_dequeue():
                      struct cake_tin_data *b = &q->tins[q->cur_tin];
  0.01          mla    r3, r3, r1, r2
              cobalt_invsqrt():
                              vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
  0.34          movt   r0, #0   
  0.16          str    r0, [sp, #44]   ; 0x2c
              cake_dequeue():
                      struct cake_tin_data *b = &q->tins[q->cur_tin];
  0.07          str    r3, [sp, #60]   ; 0x3c
                      struct cake_host *srchost, *dsthost;
                      ktime_t now = ktime_get();
  0.06        → bl     qdisc_peek_dequeued
                strd   r0, [sp, #24]
                      struct cake_flow *flow;
                      struct list_head *head;
                      bool first_flow = true;
  0.35          mov    r1, #1   
                      u16 host_load;
                      u64 delay;
                      u32 len;
               
              begin:      
                      if (!sch->q.qlen)
  0.03    4c:   ldr    r3, [sp, #52]   ; 0x34
  0.30          ldr    r3, [r3, #76]   ; 0x4c
  0.11          cmp    r3, #0   
  0.22        ↓ bne    68       
                              return NULL;
  0.01    5c:   mov    r3, #0   
                str    r3, [sp, #48]   ; 0x30
              ↓ b      114c     
               
                      /* global hard shaper */
                      if (ktime_after(q->time_next_packet, now) &&
  0.21    68:   ldr    r3, [sp, #72]   ; 0x48
  0.09          movw   r0, #16416      ; 0x4020
              ktime_compare():
               */         
              static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2)
              {           
                      if (cmp1 < cmp2)
                              return -1;
                      if (cmp1 > cmp2)
  0.06          ldrd   r4, [sp, #24]
              cake_dequeue():
  0.24          add    r3, r3, r0
  0.01          ldrd   r2, [r3, #-8]
              ktime_compare():
  0.24          cmp    r4, r2   
  0.02          sbcs   ip, r5, r3
  0.29        ↓ bge    dc       
              cake_dequeue():
                          ktime_after(q->failsafe_next_packet, now)) {
  0.08          ldr    ip, [sp, #72]   ; 0x48
              ktime_compare():
  0.04          ldrd   r6, [sp, #24]
              cake_dequeue():
                ldrd   r4, [ip, r0]
              ktime_compare():
  0.19          cmp    r6, r4   
                sbcs   r0, r7, r5
  0.09        ↓ bge    dc       
              cake_dequeue():
                              u64 next = min(ktime_to_ns(q->time_next_packet),
                                             ktime_to_ns(q->failsafe_next_packet));
               
                              sch->qstats.overlimits++;
                ldr    r0, [sp, #52]   ; 0x34
                              u64 next = min(ktime_to_ns(q->time_next_packet),
  0.03          cmp    r4, r2   
                sbcs   ip, r5, r3
                              sch->qstats.overlimits++;
  0.08          ldr    r1, [r0, #116]  ; 0x74
                              u64 next = min(ktime_to_ns(q->time_next_packet),
                movlt  r2, r4   
  0.08          movlt  r3, r5   
                              sch->qstats.overlimits++;
                add    r1, r1, #1
  0.08          str    r1, [r0, #116]  ; 0x74
                              qdisc_watchdog_schedule_ns(&q->watchdog, next);
                ldr    r1, [sp, #72]   ; 0x48
  0.07          add    r0, r1, #16384  ; 0x4000
                              return NULL;
                mov    r1, #0   
                              qdisc_watchdog_schedule_ns(&q->watchdog, next);
  0.05          add    r0, r0, #104    ; 0x68
                              return NULL;
                str    r1, [sp, #48]   ; 0x30
                              qdisc_watchdog_schedule_ns(&q->watchdog, next);
  0.10        → bl     qdisc_peek_dequeued
                              return NULL;
  0.03        ↓ b      114c     
                      }   
               
                      /* Choose a class to work on. */
                      if (!q->rate_ns) {
  0.14    dc:   ldr    r3, [sp, #72]   ; 0x48
  0.07          add    r3, r3, #16384  ; 0x4000
  0.05          add    r3, r3, #48     ; 0x30
  0.03          ldrd   r2, [r3, #-8]
  0.03          orrs   r3, r2, r3
  0.35        ↓ beq    18c      
  0.09          ldr    r3, [sp, #52]   ; 0x34
                              /* In shaped mode, choose:
                               * - Highest-priority tin with queue and meeting schedule, or
                               * - The earliest-scheduled tin with queue.
                               */
                              ktime_t best_time = ns_to_ktime(KTIME_MAX);
                              int tin, best_tin = 0;
  0.05          mov    r6, #0   
               
                              for (tin = 0; tin < q->tin_cnt; tin++) {
  0.05          mov    ip, r6   
                              ktime_t best_time = ns_to_ktime(KTIME_MAX);
  0.05          mvn    r4, #0   
  0.05          mvn    r5, #-2147483648        ; 0x80000000
  0.23          ldrd   r8, [sp, #24]
  0.04          ldr    r7, [r3, #264]  ; 0x108
                              for (tin = 0; tin < q->tin_cnt; tin++) {
  0.26          ldr    r3, [sp, #56]   ; 0x38
  0.11          add    r0, r7, #88064  ; 0x15800
  0.05          add    r0, r0, #44     ; 0x2c
  0.08          ldrh   lr, [r3, #14]
  0.04        ↓ b      240      
                                      if (b->tin_deficit <= 0)
         124:   cmp    r8, #0   
              ↓ bne    144      
              ↓ b      10dc     
                                              empty = false;
         130:   cmp    r3, #0   
                                              b->tin_deficit += b->tin_quantum_band;
                ldrh   r3, [r9, r7]
                                              empty = false;
                movne  r5, #0   
                                              b->tin_deficit += b->tin_quantum_band;
                add    r8, r3, r8
                str    r8, [r9, #2160] ; 0x870
                                      q->cur_tin++;
         144:   ldrh   r3, [r0, #96]   ; 0x60
                                      if (q->cur_tin >= q->tin_cnt) {
                ldrh   r8, [r0, #14]
                                      q->cur_tin++;
                add    r3, r3, #1
                uxth   r3, r3   
                strh   r3, [r0, #96]   ; 0x60
                                      if (q->cur_tin >= q->tin_cnt) {
                cmp    r8, r3   
                                      b++;
                addhi  r2, r2, #88064  ; 0x15800
                addhi  r2, r2, #192    ; 0xc0
                                      if (q->cur_tin >= q->tin_cnt) {
              ↓ bhi    1ac      
                                              b = q->tins;
                ldr    r3, [sp, #52]   ; 0x34
                                              if (wrapped) {
                cmp    r4, #0   
                                              q->cur_tin = 0;
                strh   r6, [r0, #96]   ; 0x60
                                              b = q->tins;
                ldr    r2, [r3, #264]  ; 0x108
                                              if (wrapped) {
              ↓ beq    184      
                                                      if (empty)
                cmp    r5, #0   
              ↑ bne    5c       
                                                      wrapped = true;
         184:   mov    r4, #1   
              ↓ b      1ac      
                                              b->tin_deficit += b->tin_quantum_band;
         18c:   ldr    r2, [sp, #60]   ; 0x3c
                              bool wrapped = false, empty = true;
                mov    r4, #0   
                                              b->tin_deficit += b->tin_quantum_band;
                ldr    r0, [sp, #56]   ; 0x38
                              bool wrapped = false, empty = true;
                mov    r5, #1   
                                              q->cur_tin = 0;
                mov    r6, r4   
                movw   lr, #2094       ; 0x82e
                movw   ip, #2092       ; 0x82c
                                              b->tin_deficit += b->tin_quantum_band;
                movw   r7, #2156       ; 0x86c
                              while (b->tin_deficit < 0 ||
         1ac:   add    r9, r2, #86016  ; 0x15000
                ldr    r8, [r9, #2160] ; 0x870
                ldrh   r3, [r9, lr]
                ldrh   sl, [r9, ip]
                cmp    r8, #0   
                add    r3, r3, sl
              ↑ blt    130      
                cmp    r3, #0   
              ↑ beq    124      
                str    r2, [sp, #60]   ; 0x3c
              ↓ b      27c      
                                      b = q->tins + tin;
                                      if ((b->sparse_flow_count + b->bulk_flow_count) > 0) {
  0.07   1d8:   ldrh   r3, [r0, #2]
  0.20          ldrh   r2, [r0] 
  0.09          cmn    r3, r2   
  0.18        ↓ beq    234      
                                              ktime_t time_to_pkt = \
  0.15          ldrd   sl, [r0, #36]   ; 0x24
  0.07          subs   sl, sl, r8
  0.01          sbc    fp, fp, r9
              ktime_compare():
                      if (cmp1 < cmp2)
  0.19          cmp    sl, r4   
  0.05          sbcs   r3, fp, r5
  0.12          movlt  r3, #1   
  0.04          movge  r3, #0   
              cake_dequeue():
                                                      ktime_sub(b->time_next_packet, now);
               
                                              if (ktime_to_ns(time_to_pkt) <= 0 ||
  0.14          cmp    sl, #1   
  0.01          sbcs   r2, fp, #0
              ktime_compare():
  0.28          orrlt  r3, r3, #1
                      if (cmp1 > cmp2)
                cmp    r4, sl   
  0.36          sbcs   r2, r5, fp
                      if (cmp1 < cmp2)
  0.01          eor    r3, r3, #1
                      if (cmp1 > cmp2)
  0.11          andlt  r3, r3, #1
  0.02          movge  r3, #0   
  0.05          cmp    r3, #0   
              cake_dequeue():
                                              ktime_t time_to_pkt = \
  0.01          moveq  r4, sl   
  0.06          moveq  r5, fp   
              ktime_compare():
                moveq  r6, ip   
  0.23   234:   add    r0, r0, #88064  ; 0x15800
              cake_dequeue():
                              for (tin = 0; tin < q->tin_cnt; tin++) {
  0.09          add    ip, ip, #1
  0.08          add    r0, r0, #192    ; 0xc0
  0.19   240:   cmp    lr, ip   
  0.29        ↑ bne    1d8      
                                              }
                                      }
                              }
               
                              q->cur_tin = best_tin;
                              b = q->tins + best_tin;
  0.34          movw   r3, #22720      ; 0x58c0
  0.09          movt   r3, #1   
                              q->cur_tin = best_tin;
                ldr    r2, [sp, #56]   ; 0x38
               
                              /* No point in going further if no packets to deliver. */
                              if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
  0.30          movw   r0, #2094       ; 0x82e
                              b = q->tins + best_tin;
  0.01          mla    r3, r3, r6, r7
                              q->cur_tin = best_tin;
  0.07          strh   r6, [r2, #96]   ; 0x60
                              if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
  0.26          movw   r2, #2092       ; 0x82c
  0.01          add    r9, r3, #86016  ; 0x15000
                              b = q->tins + best_tin;
  0.22          str    r3, [sp, #60]   ; 0x3c
                              if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
                ldrh   r2, [r9, r2]
  0.10          ldrh   r3, [r9, r0]
  0.08          cmn    r3, r2   
  0.06        ↑ beq    5c       
                                      return NULL;
                      }   
               
              retry:      
                      /* service this class */
                      head = &b->decaying_flows;
  0.06   27c:   ldr    r3, [sp, #60]   ; 0x3c
                      if (!first_flow || list_empty(head)) {
  0.04          cmp    r1, #0   
                      head = &b->decaying_flows;
  0.01          add    r3, r3, #88064  ; 0x15800
  0.11          str    r3, [sp, #76]   ; 0x4c
  0.02          add    r3, r3, #72     ; 0x48
  0.10          str    r3, [sp, #80]   ; 0x50
                      if (!first_flow || list_empty(head)) {
  0.13        ↓ beq    2b4      
              __read_once_size():
              })          
               
              static __always_inline
              void __read_once_size(const volatile void *p, void *res, int size)
              {           
                      __READ_ONCE_SIZE;
  0.14          ldr    r2, [r9, #2120] ; 0x848
              cake_dequeue():
  0.03          cmp    r3, r2   
              __read_once_size():
  0.00          str    r2, [sp, #84]   ; 0x54
              cake_dequeue():
                moveq  r1, #1   
  0.02        ↓ beq    2bc      
  0.01        ↓ b      2fc      
  0.01   2b0:   mov    r9, fp   
                      head = &b->decaying_flows;
  0.29   2b4:   ldr    r3, [sp, #80]   ; 0x50
                str    r3, [sp, #84]   ; 0x54
                              head = &b->new_flows;
  0.11   2bc:   ldr    r3, [sp, #76]   ; 0x4c
              __read_once_size():
  0.80          ldr    r2, [r9, #2104] ; 0x838
              cake_dequeue():
  0.09          add    r3, r3, #56     ; 0x38
                              if (list_empty(head)) {
  1.12          cmp    r3, r2   
  0.20        ↓ bne    304      
                                      head = &b->old_flows;
  2.02          ldr    r3, [sp, #76]   ; 0x4c
              __read_once_size():
  0.18          ldr    r2, [r9, #2112] ; 0x840
              cake_dequeue():
  1.14          add    r3, r3, #64     ; 0x40
                                      if (unlikely(list_empty(head))) {
  0.05          cmp    r3, r2   
  1.00        ↓ bne    304      
              __read_once_size():
                ldr    r3, [r9, #2120] ; 0x848
              cake_dequeue():
                                              head = &b->decaying_flows;
                                              if (unlikely(list_empty(head)))
                ldr    r2, [sp, #84]   ; 0x54
                cmp    r2, r3   
              ↑ beq    4c       
                mov    r3, r2   
              ↓ b      304      
  0.12   2fc:   ldr    r3, [sp, #80]   ; 0x50
                str    r3, [sp, #84]   ; 0x54
                                                      goto begin;
                                      }
                              }
                      }   
                      flow = list_first_entry(head, struct cake_flow, flowchain);
  0.26   304:   ldr    r8, [r3] 
                      q->cur_flow = flow - b->flows;
  0.89          ldr    r2, [sp, #60]   ; 0x3c
                      flow = list_first_entry(head, struct cake_flow, flowchain);
  0.09          sub    r3, r8, #8
                      q->cur_flow = flow - b->flows;
  0.84          sub    r3, r3, r2
  0.24          ldr    r2, [sp, #56]   ; 0x38
  1.82          asr    r3, r3, #6
  0.10          strh   r3, [r2, #98]   ; 0x62
                      srchost = &b->hosts[flow->srchost];
                      dsthost = &b->hosts[flow->dsthost];
                      host_load = 1;
               
                      /* flow isolation (DRR++) */
                      if (flow->deficit <= 0) {
  1.01          ldr    r3, [r8, #8]
                      srchost = &b->hosts[flow->srchost];
  0.13          ldrh   r2, [r8, #48]   ; 0x30
                      if (flow->deficit <= 0) {
  2.08          cmp    r3, #0   
                      dsthost = &b->hosts[flow->dsthost];
  0.14          ldrh   r3, [r8, #50]   ; 0x32
                      srchost = &b->hosts[flow->srchost];
  1.63          str    r2, [sp, #88]   ; 0x58
  0.27          movgt  sl, r8   
                      dsthost = &b->hosts[flow->dsthost];
  0.98          str    r3, [sp, #92]   ; 0x5c
  0.17          movgt  fp, r9   
                      if (flow->deficit <= 0) {
  3.57        ↓ bgt    4ec      
                              /* Keep all flows with deficits out of the sparse and decaying
                               * rotations.  No non-empty flow can go into the decaying
                               * rotation, so they can't get deficits
                               */
                              if (flow->set == CAKE_SET_SPARSE) {
  0.24          ldrb   r3, [r8, #52]   ; 0x34
  1.66          cmp    r3, #1   
  0.04          ldrne  r3, [sp, #52]   ; 0x34
  0.97          addne  r3, r3, #16384  ; 0x4000
  0.30        ↓ bne    408      
                                      if (flow->head) {
                ldr    r3, [r8, #-8]
  0.02          cmp    r3, #0   
              ↓ beq    3f8      
                                              b->sparse_flow_count--;
                                              b->bulk_flow_count++;
               
                                              if (cake_dsrc(q->flow_mode))
                ldr    r3, [sp, #52]   ; 0x34
                                              b->sparse_flow_count--;
                movw   ip, #2094       ; 0x82e
                                              b->bulk_flow_count++;
                movw   r0, #2092       ; 0x82c
                                              b->sparse_flow_count--;
                ldrh   r1, [r9, ip]
                                              b->bulk_flow_count++;
  0.02          ldrh   r2, [r9, r0]
                                              if (cake_dsrc(q->flow_mode))
                add    r3, r3, #16384  ; 0x4000
                                              b->sparse_flow_count--;
                sub    r1, r1, #1
                strh   r1, [r9, ip]
                                              b->bulk_flow_count++;
                add    r2, r2, #1
                strh   r2, [r9, r0]
                                              if (cake_dsrc(q->flow_mode))
                ldrb   r2, [r3, #273]  ; 0x111
                and    r1, r2, #5
                cmp    r1, #5   
              ↓ bne    3c0      
                                                      srchost->srchost_bulk_flow_count++;
                ldr    r1, [sp, #88]   ; 0x58
                mov    r2, #12  
                ldr    r0, [sp, #60]   ; 0x3c
                mla    r2, r2, r1, r0
                add    r2, r2, #75776  ; 0x12800
                ldrh   r1, [r2, #8]
                add    r1, r1, #1
                strh   r1, [r2, #8]
                ldrb   r2, [r3, #273]  ; 0x111
               
                                              if (cake_ddst(q->flow_mode))
  0.02   3c0:   and    r2, r2, #6
                cmp    r2, #6   
              ↓ bne    3ec      
                                                      dsthost->dsthost_bulk_flow_count++;
                ldr    r1, [sp, #92]   ; 0x5c
                mov    r2, #12  
  0.01          ldr    r0, [sp, #60]   ; 0x3c
                mla    r2, r2, r1, r0
                add    r2, r2, #75776  ; 0x12800
                ldrh   r1, [r2, #10]
                add    r1, r1, #1
                strh   r1, [r2, #10]
               
                                              flow->set = CAKE_SET_BULK;
         3ec:   mov    r2, #3   
  0.01          strb   r2, [r8, #52]   ; 0x34
  0.01        ↓ b      408      
  0.03   3f8:   ldr    r3, [sp, #52]   ; 0x34
                                      } else {
                                              /* we've moved it to the bulk rotation for
                                               * correct deficit accounting but we still want
                                               * to count it as a sparse flow, not a bulk one.
                                               */
                                              flow->set = CAKE_SET_SPARSE_WAIT;
                mov    r2, #2   
                strb   r2, [r8, #52]   ; 0x34
                add    r3, r3, #16384  ; 0x4000
                                      }
                              }
               
                              if (cake_dsrc(q->flow_mode))
  1.22   408:   ldrb   r3, [r3, #273]  ; 0x111
  0.11          and    r2, r3, #5
  1.42          and    r3, r3, #6
  0.09          cmp    r2, #5   
  1.00        ↓ bne    10cc     
                                      host_load = max(host_load, srchost->srchost_bulk_flow_count);
                ldr    r1, [sp, #88]   ; 0x58
                mov    r2, #12  
                ldr    r0, [sp, #60]   ; 0x3c
                mla    r2, r2, r1, r0
                add    r2, r2, #75776  ; 0x12800
                ldrh   r2, [r2, #8]
                cmp    r2, #0   
              ↓ beq    10cc     
               
                              if (cake_ddst(q->flow_mode))
                cmp    r3, #6   
                movne  r4, r2   
              ↓ bne    474      
              ↓ b      450      
                      host_load = 1;
  0.20   44c:   mov    r2, #1   
                                      host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
  2.19   450:   ldr    r1, [sp, #92]   ; 0x5c
  0.10          mov    r3, #12  
  4.08          ldr    r0, [sp, #60]   ; 0x3c
  0.08          mla    r3, r3, r1, r0
  3.43          add    r3, r3, #75776  ; 0x12800
  1.75          ldrh   r4, [r3, #10]
                cmp    r2, r4   
  1.00          movcs  r4, r2   
                uxth   r2, r4   
               
                              WARN_ON(host_load > CAKE_QUEUES);
  0.99   474:   cmp    r2, #1024       ; 0x400
              ↓ bls    48c      
                movw   r0, #0   
                movw   r1, #2141       ; 0x85d
                movt   r0, #0   
              → bl     qdisc_peek_dequeued
               
                              /* The shifted prandom_u32() is a way to apply dithering to
                               * avoid accumulating roundoff errors
                               */
                              flow->deficit += (b->flow_quantum * quantum_div[host_load] +
  1.91   48c:   ldr    r2, [sp, #44]   ; 0x2c
                lsl    r4, r4, #1
  1.11          movw   r3, #2052       ; 0x804
                ldrh   r3, [r9, r3]
  2.62          ldrh   r4, [r2, r4]
                mul    r4, r4, r3
                                                (prandom_u32() >> 16)) >> 16;
  3.53        → bl     qdisc_peek_dequeued
              __list_del_entry():
              static inline void __list_del_entry(struct list_head *entry)
              {           
                      if (!__list_del_entry_valid(entry))
                              return;
               
                      __list_del(entry->prev, entry->next);
  2.75          ldr    ip, [r8, #4]
  0.04          ldr    lr, [r8] 
              cake_dequeue():
                      first_flow = false;
  1.31          mov    r1, #0   
                              flow->deficit += (b->flow_quantum * quantum_div[host_load] +
  0.10          ldr    r2, [r8, #8]
  0.90          add    r0, r4, r0, lsr #16
                              list_move_tail(&flow->flowchain, &b->old_flows);
                ldr    r3, [sp, #76]   ; 0x4c
                              flow->deficit += (b->flow_quantum * quantum_div[host_load] +
  1.98          add    r2, r2, r0, lsr #16
  0.01          str    r2, [r8, #8]
              __list_del():
                      next->prev = prev;
  1.88          str    ip, [lr, #4]
              cake_dequeue():
                              list_move_tail(&flow->flowchain, &b->old_flows);
  0.02          add    r3, r3, #64     ; 0x40
              __write_once_size():
              static __always_inline void __write_once_size(volatile void *p, void *res, int size)
              {           
                      switch (size) {
                      case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
                      case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
                      case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
  0.92          str    lr, [ip] 
              list_add_tail():
                      __list_add(new, head->prev, head);
  0.02          ldr    r2, [r9, #2116] ; 0x844
              __list_add():
                      next->prev = new;
  0.77          str    r8, [r9, #2116] ; 0x844
                      new->next = next;
  0.07          str    r3, [r8] 
                      new->prev = prev;
  0.86          str    r2, [r8, #4]
              __write_once_size():
  0.02          str    r8, [r2] 
  1.66        ↑ b      2bc      
              cake_dequeue():
                              goto retry;
                      }   
               
                      /* Retrieve a packet via the AQM */
                      while (1) {
                              skb = cake_dequeue_one(sch);
  1.11   4ec:   ldr    r0, [sp, #52]   ; 0x34
  0.06        → bl     cake_dequeue_one
                              if (!skb) {
  0.33          subs   r3, r0, #0
  0.18          str    r3, [sp, #48]   ; 0x30
  0.08        ↓ bne    840      
              cobalt_queue_empty():
                      if (vars->p_drop &&
  0.01          ldr    ip, [sl, #40]   ; 0x28
  0.04          mov    r8, sl   
                mov    r9, fp   
  0.06          cmp    ip, #0   
              ↓ beq    1118     
                          ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
                ldrd   r0, [sl, #32]
                add    r3, fp, #2064   ; 0x810
                ldrd   r6, [sp, #24]
                      if (vars->p_drop &&
                ldrd   r2, [r3] 
                          ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
                subs   r4, r6, r0
                sbc    r5, r7, r1
                      if (vars->p_drop &&
                cmp    r5, r3   
                          ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
                mov    r0, r4   
                      if (vars->p_drop &&
                cmpeq  r4, r2   
              ↓ bls    112c     
                              if (vars->p_drop < p->p_dec)
                ldr    r3, [fp, #2084] ; 0x824
                      if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
                ldr    r1, [sl, #16]
                              if (vars->p_drop < p->p_dec)
                cmp    ip, r3   
              ↓ bcs    570      
                                      vars->p_drop = 0;
                ldr    r3, [sp, #48]   ; 0x30
                      if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
                cmp    r1, #0   
                              vars->blue_timer = now;
                mov    r4, r6   
                mov    r5, r7   
                strd   r4, [sl, #32]
                                      vars->p_drop = 0;
                str    r3, [sl, #40]   ; 0x28
                      vars->dropping = false;
                strb   r3, [sl, #44]   ; 0x2c
                      if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
              ↓ bne    10e8     
              ↓ b      638      
                                      vars->p_drop -= p->p_dec;
         570:   sub    r3, ip, r3
                      vars->dropping = false;
                ldr    r0, [sp, #48]   ; 0x30
                clz    ip, r3   
                                      vars->p_drop -= p->p_dec;
                str    r3, [sl, #40]   ; 0x28
                              vars->blue_timer = now;
                ldrd   r2, [sp, #24]
                      if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
                cmp    r1, #0   
                lsr    ip, ip, #5
                      vars->dropping = false;
                strb   r0, [sl, #44]   ; 0x2c
                              vars->blue_timer = now;
                strd   r2, [sl, #32]
                      if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
              ↓ beq    630      
                ldrd   r6, [sl, #24]
                subs   r2, r2, r6
                sbc    r3, r3, r7
                cmp    r2, #0   
                sbcs   r3, r3, #0
              ↓ blt    630      
                              vars->count--;
  0.01   5b0:   sub    r2, r1, #1
  0.02          str    r2, [r8, #16]
              cobalt_invsqrt():
                      if (vars->count < REC_INV_SQRT_CACHE)
                cmp    r2, #15  
              ↓ bhi    5d4      
                              vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
                ldr    r3, [sp, #44]   ; 0x2c
                add    r2, r3, r2, lsl #2
                ldr    r1, [r2, #2052] ; 0x804
  0.00          str    r1, [r8, #20]
              ↓ b      610      
              cobalt_newton_step():
                      invsqrt = vars->rec_inv_sqrt;
         5d4:   ldr    r0, [r8, #20]
                      invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
                umull  r4, r5, r0, r0
                      val = (3LL << 32) - ((u64)vars->count * invsqrt2);
                mov    r4, #0   
                umull  r2, r3, r5, r2
                mov    r5, #3   
                subs   r4, r4, r2
                sbc    r5, r5, r3
                      val >>= 2; /* avoid overflow in following multiply */
                lsr    r2, r4, #2
                orr    r2, r2, r5, lsl #30
                lsr    r1, r5, #2
                      val = (val * invsqrt) >> (32 - 2 + 1);
                umull  r2, r3, r2, r0
                mla    r3, r0, r1, r3
                lsr    r1, r2, #31
                orr    r1, r1, r3, lsl #1
                      vars->rec_inv_sqrt = val;
                str    r1, [r8, #20]
              reciprocal_scale():
               *          
               * Return: a result based on @val in interval [0, @ep_ro).
               */         
              static inline u32 reciprocal_scale(u32 val, u32 ep_ro)
              {           
                      return (u32)(((u64) val * ep_ro) >> 32);
         610:   ldr    r2, [r9, #2056] ; 0x808
  0.02          mov    r0, #0   
                mov    fp, r0   
                umull  r2, r3, r2, r1
                mla    sl, r1, r0, r3
              cobalt_control():
                      return ktime_add_ns(t, reciprocal_scale(interval,
                adds   r4, sl, r6
                adc    r5, r0, r7
              cobalt_queue_empty():
                              vars->drop_next = cobalt_control(vars->drop_next,
                strd   r4, [r8, #24]
              cake_dequeue():
                                      /* this queue was actually empty */
                                      if (cobalt_queue_empty(&flow->cvars, &b->cparams, now))
         630:   cmp    ip, #0   
              ↓ beq    648      
                                              b->unresponsive_flow_count--;
         638:   movw   r2, #2098       ; 0x832
                ldrh   r3, [r9, r2]
                sub    r3, r3, #1
                strh   r3, [r9, r2]
  0.04   648:   ldr    ip, [r8, #40]   ; 0x28
               
                                      if (flow->cvars.p_drop || flow->cvars.count ||
  0.04   64c:   cmp    ip, #0   
                ldr    r0, [r8, #4]
                ldr    r1, [r8] 
              ↓ bne    67c      
  0.26   65c:   ldr    r3, [r8, #16]
                cmp    r3, #0   
  0.04        ↓ bne    67c      
              ktime_compare():
                      if (cmp1 < cmp2)
  0.01          ldrd   r2, [r8, #24]
  0.10          ldrd   r4, [sp, #24]
                cmp    r4, r2   
  0.25          sbcs   r3, r5, r3
              ↓ bge    76c      
              __list_del():
                      next->prev = prev;
  0.27   67c:   str    r0, [r1, #4]
              __list_add():
                      new->next = next;
                ldr    r2, [sp, #84]   ; 0x54
              __write_once_size():
  0.03          str    r1, [r0] 
              list_add_tail():
                      __list_add(new, head->prev, head);
                ldr    r3, [r9, #2124] ; 0x84c
              __list_add():
                      next->prev = new;
  0.21          str    r8, [r9, #2124] ; 0x84c
                      new->prev = prev;
  0.01          strd   r2, [r8] 
              __write_once_size():
  0.13          str    r8, [r3] 
              cake_dequeue():
                                              /* keep in the flowchain until the state has
                                               * decayed to rest
                                               */
                                              list_move_tail(&flow->flowchain,
                                                             &b->decaying_flows);
                                              if (flow->set == CAKE_SET_BULK) {
                ldrb   r3, [r8, #52]   ; 0x34
  0.05          cmp    r3, #3   
              ↓ bne    730      
                                                      b->bulk_flow_count--;
               
                                                      if (cake_dsrc(q->flow_mode))
  0.04          ldr    r3, [sp, #52]   ; 0x34
                                                      b->bulk_flow_count--;
                movw   r1, #2092       ; 0x82c
  0.01          ldrh   r2, [r9, r1]
                                                      if (cake_dsrc(q->flow_mode))
                add    r3, r3, #16384  ; 0x4000
                                                      b->bulk_flow_count--;
  0.02          sub    r2, r2, #1
                strh   r2, [r9, r1]
                                                      if (cake_dsrc(q->flow_mode))
                ldrb   r2, [r3, #273]  ; 0x111
                and    r1, r2, #5
  0.01          cmp    r1, #5   
              ↓ bne    6f0      
                                                              srchost->srchost_bulk_flow_count--;
                ldr    r1, [sp, #88]   ; 0x58
                mov    r2, #12  
                ldr    r0, [sp, #60]   ; 0x3c
                mla    r2, r2, r1, r0
                add    r2, r2, #75776  ; 0x12800
                ldrh   r1, [r2, #8]
                sub    r1, r1, #1
                strh   r1, [r2, #8]
                ldrb   r2, [r3, #273]  ; 0x111
               
                                                      if (cake_ddst(q->flow_mode))
         6f0:   and    r2, r2, #6
                cmp    r2, #6   
              ↓ bne    71c      
                                                              dsthost->dsthost_bulk_flow_count--;
  0.01          ldr    r2, [sp, #92]   ; 0x5c
                mov    r3, #12  
                ldr    r1, [sp, #60]   ; 0x3c
                mla    r3, r3, r2, r1
                add    r3, r3, #75776  ; 0x12800
                ldrh   r2, [r3, #10]
                sub    r2, r2, #1
                strh   r2, [r3, #10]
               
                                                      b->decaying_flow_count++;
         71c:   add    r2, r9, #2096   ; 0x830
  0.01          ldrh   r3, [r2] 
                add    r3, r3, #1
  0.01          strh   r3, [r2] 
              ↓ b      75c      
                                              } else if (flow->set == CAKE_SET_SPARSE ||
  0.11   730:   sub    r3, r3, #1
  0.22          cmp    r3, #1   
              ↓ bhi    75c      
                                                         flow->set == CAKE_SET_SPARSE_WAIT) {
                                                      b->sparse_flow_count--;
  0.02          movw   r1, #2094       ; 0x82e
                                                      b->decaying_flow_count++;
  0.01          add    r2, r9, #2096   ; 0x830
                                                      b->sparse_flow_count--;
  0.01          ldrh   r3, [r9, r1]
  0.04          sub    r3, r3, #1
  0.00          strh   r3, [r9, r1]
                                                      b->decaying_flow_count++;
  0.01          ldrh   r3, [r2] 
  0.01          add    r3, r3, #1
  0.01          strh   r3, [r2] 
                                              }
                                              flow->set = CAKE_SET_DECAYING;
  0.37   75c:   mov    r3, #4   
                      first_flow = false;
  0.01          mov    r1, #0   
                                              flow->set = CAKE_SET_DECAYING;
  0.01          strb   r3, [r8, #52]   ; 0x34
              ↑ b      4c       
              __list_del():
                      next->prev = prev;
  0.03   76c:   str    r0, [r1, #4]
              __write_once_size():
                str    r1, [r0] 
              cake_dequeue():
                                      } else {
                                              /* remove empty queue from the flowchain */
                                              list_del_init(&flow->flowchain);
                                              if (flow->set == CAKE_SET_SPARSE ||
  0.03          ldrb   r3, [r8, #52]   ; 0x34
              __write_once_size():
                str    r8, [r8] 
              cake_dequeue():
                sub    r2, r3, #1
              INIT_LIST_HEAD():
                      list->prev = list;
                str    r8, [r8, #4]
              cake_dequeue():
                cmp    r2, #1   
              ↓ bhi    7a0      
                                                  flow->set == CAKE_SET_SPARSE_WAIT)
                                                      b->sparse_flow_count--;
                movw   r2, #2094       ; 0x82e
                ldrh   r3, [r9, r2]
                sub    r3, r3, #1
                strh   r3, [r9, r2]
              ↓ b      834      
                                              else if (flow->set == CAKE_SET_BULK) {
         7a0:   cmp    r3, #3   
  0.01        ↓ bne    824      
                                                      b->bulk_flow_count--;
               
                                                      if (cake_dsrc(q->flow_mode))
                ldr    r3, [sp, #52]   ; 0x34
                                                      b->bulk_flow_count--;
                movw   r1, #2092       ; 0x82c
                ldrh   r2, [r9, r1]
                                                      if (cake_dsrc(q->flow_mode))
                add    r3, r3, #16384  ; 0x4000
                                                      b->bulk_flow_count--;
                sub    r2, r2, #1
                strh   r2, [r9, r1]
                                                      if (cake_dsrc(q->flow_mode))
                ldrb   r2, [r3, #273]  ; 0x111
                and    r1, r2, #5
                cmp    r1, #5   
              ↓ bne    7f4      
                                                              srchost->srchost_bulk_flow_count--;
                ldr    r1, [sp, #88]   ; 0x58
                mov    r2, #12  
                ldr    r0, [sp, #60]   ; 0x3c
                mla    r2, r2, r1, r0
                add    r2, r2, #75776  ; 0x12800
                ldrh   r1, [r2, #8]
                sub    r1, r1, #1
                strh   r1, [r2, #8]
                ldrb   r2, [r3, #273]  ; 0x111
               
                                                      if (cake_ddst(q->flow_mode))
         7f4:   and    r2, r2, #6
                cmp    r2, #6   
              ↓ bne    834      
                                                              dsthost->dsthost_bulk_flow_count--;
                ldr    r2, [sp, #92]   ; 0x5c
                mov    r3, #12  
                ldr    r1, [sp, #60]   ; 0x3c
                mla    r3, r3, r2, r1
                add    r3, r3, #75776  ; 0x12800
                ldrh   r2, [r3, #10]
                sub    r2, r2, #1
                strh   r2, [r3, #10]
              ↓ b      834      
               
                                              } else
                                                      b->decaying_flow_count--;
         824:   add    r2, r9, #2096   ; 0x830
                ldrh   r3, [r2] 
  0.01          sub    r3, r3, #1
                strh   r3, [r2] 
               
                                              flow->set = CAKE_SET_NONE;
  0.01   834:   mov    r1, #0   
                strb   r1, [r8, #52]   ; 0x34
              ↑ b      4c       
              cobalt_should_drop():
                      sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
  0.21   840:   ldr    r3, [sp, #48]   ; 0x30
                      over_target = sojourn > p->target &&
  0.01          add    r0, fp, #2064   ; 0x810
                      schedule = ktime_sub(now, vars->drop_next);
  0.03          ldrd   r4, [sl, #24]
  0.01          ldrd   r6, [sp, #24]
                      sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
  0.21          ldrd   r2, [r3, #32]
                      schedule = ktime_sub(now, vars->drop_next);
  0.03          subs   r8, r6, r4
  0.18          ldr    r1, [sl, #16]
                sbc    r9, r7, r5
                      sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
  0.05          subs   r6, r6, r2
  0.01          sbc    r7, r7, r3
                      schedule = ktime_sub(now, vars->drop_next);
  0.04          strd   r4, [sp, #32]
                cmp    r1, #0   
                                    sojourn > p->mtu_time * bulk_flows * 2 &&
  0.05          ldrd   r4, [r0] 
  0.01          mvn    lr, r9   
                      sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
  0.07          strd   r6, [sp, #16]
                lsr    lr, lr, #31
  0.12          moveq  lr, #0   
                                    sojourn > p->mtu_time * bulk_flows * 2 &&
  0.01          cmp    r7, r5   
  0.06          ldrb   ip, [sl, #44]   ; 0x2c
                cmpeq  r6, r4   
  0.46        ↓ bls    8f8      
              cake_dequeue():
                              }
               
                              /* Last packet in queue may be marked, shouldn't be dropped */
                              if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb,
                                                      (b->bulk_flow_count *
                                                       !!(q->rate_flags &
  0.01          ldr    r4, [sp, #56]   ; 0x38
                                                      (b->bulk_flow_count *
  0.01          movw   r5, #2092       ; 0x82c
                ldrh   r5, [fp, r5]
              cobalt_should_drop():
                                    sojourn > p->mtu_time * bulk_flows * 2 &&
  0.01          ldrd   r6, [r0, #8]
              cake_dequeue():
                                                       !!(q->rate_flags &
                ldrh   r4, [r4, #56]   ; 0x38
              cobalt_should_drop():
                                    sojourn > p->mtu_time * bulk_flows * 2 &&
  0.04          adds   r2, r6, r6
              cake_dequeue():
                                                       !!(q->rate_flags &
                ubfx   r4, r4, #2, #1
              cobalt_should_drop():
                                    sojourn > p->mtu_time * bulk_flows * 2 &&
  0.04          adc    r3, r7, r7
  0.02          mov    r6, r2   
              cake_dequeue():
                                                      (b->bulk_flow_count *
                mul    r4, r5, r4
              cobalt_should_drop():
                                    sojourn > p->mtu_time * bulk_flows * 2 &&
                mov    r7, r3   
  0.01          strd   r6, [sp, #64]   ; 0x40
                umull  r6, r7, r4, r2
  0.04          ldrd   r2, [sp, #64]   ; 0x40
  0.03          mla    r7, r4, r3, r7
                      over_target = sojourn > p->target &&
                ldrd   r4, [sp, #16]
  0.05          cmp    r5, r7   
                cmpeq  r4, r6   
  0.01        ↓ bls    8f8      
                                    sojourn > p->mtu_time * 4;
  0.01          adds   r4, r2, r2
  0.01          adc    r5, r3, r3
                                    sojourn > p->mtu_time * bulk_flows * 2 &&
                ldrd   r2, [sp, #16]
  0.05          cmp    r3, r5   
                cmpeq  r2, r4   
  0.02        ↓ bhi    90c      
                      } else if (vars->dropping) {
  0.21   8f8:   cmp    ip, #0   
                      vars->ecn_marked = false;
  0.04          mov    r3, #0   
                strb   r3, [sl, #45]   ; 0x2d
                      } else if (vars->dropping) {
  0.03        ↓ bne    964      
              ↓ b      974      
                              if (!vars->dropping) {
  0.01   90c:   cmp    ip, #0   
                      vars->ecn_marked = false;
                mov    r3, #0   
  0.03          strb   r3, [sl, #45]   ; 0x2d
                              if (!vars->dropping) {
  0.01        ↓ bne    954      
                                      vars->dropping = true;
  0.03          mov    r3, #1   
                strb   r3, [sl, #44]   ; 0x2c
              reciprocal_scale():
                ldrd   r2, [r0, #-8]
                mov    r5, #0   
                ldr    r0, [sl, #20]
                mov    r3, r5   
  0.07          umull  r4, r5, r2, r0
                mla    r6, r0, r3, r5
              cobalt_control():
                      return ktime_add_ns(t, reciprocal_scale(interval,
                ldrd   r2, [sp, #24]
  0.00          adds   r2, r2, r6
                adc    r3, r3, ip
  0.01          mov    r6, r2   
                mov    r7, r3   
              cobalt_should_drop():
                                      vars->drop_next = cobalt_control(now,
                strd   r6, [sl, #24]
                              if (!vars->count)
  0.02   954:   cmp    r1, #0   
                                      vars->count = 1;
                moveq  r3, #1   
  0.03          streq  r3, [sl, #16]
  0.01        ↓ b      974      
                      if (next_due && vars->dropping) {
  0.01   964:   cmp    lr, #0   
                              vars->dropping = false;
                strb   r3, [sl, #44]   ; 0x2c
                      if (next_due && vars->dropping) {
              ↓ bne    b98      
              ↓ b      cbc      
  0.07   974:   cmp    lr, #0   
              ↓ beq    cbc      
  0.02          ldrb   r3, [sl, #44]   ; 0x2c
                cmp    r3, #0   
                ldrdeq r2, [sl, #24]
                ldreq  r1, [sl, #16]
                strdeq r2, [sp, #32]
              ↓ beq    b98      
              INET_ECN_set_ce():
                      ipv6_change_dsfield(inner, INET_ECN_MASK, dscp);
              }           
               
              static inline int INET_ECN_set_ce(struct sk_buff *skb)
              {           
                      switch (skb->protocol) {
  0.02          ldr    r3, [sp, #48]   ; 0x30
                ldrh   r3, [r3, #148]  ; 0x94
                cmp    r3, #8   
              ↓ beq    9b4      
                movw   r2, #56710      ; 0xdd86
                cmp    r3, r2   
              ↓ beq    a3c      
              ↓ b      ad0      
              skb_network_header():
                      skb->transport_header += offset;
              }           
               
              static inline unsigned char *skb_network_header(const struct sk_buff *skb)
              {           
                      return skb->head + skb->network_header;
  0.01   9b4:   ldr    r0, [sp, #48]   ; 0x30
                ldrh   r2, [r0, #152]  ; 0x98
                ldr    r1, [r0, #164]  ; 0xa4
              INET_ECN_set_ce():
                      case cpu_to_be16(ETH_P_IP):
                              if (skb_network_header(skb) + sizeof(struct iphdr) <=
                add    r3, r2, #20
                ldr    r0, [r0, #156]  ; 0x9c
                add    r3, r1, r3
                cmp    r0, r3   
              ↓ bcc    ad0      
              skb_network_header():
                add    r2, r1, r2
              IP_ECN_set_ce():
                      u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
                ldrb   r1, [r2, #1]
                add    r3, r1, #1
                      if (!(ecn & 2))
                tst    r3, #2   
                      u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
  0.03          and    r3, r3, #3
                      if (!(ecn & 2))
              ↓ bne    a04      
                              return !ecn;
  0.01          cmp    r3, #0   
                moveq  r2, #1   
                movne  r2, #0   
                movne  r4, #1   
                moveq  r4, #0   
              ↓ b      adc      
                      u32 check = (__force u32)iph->check;
  0.01   a04:   ldrh   r0, [r2, #10]
                      iph->tos |= INET_ECN_CE;
                orr    r1, r1, #3
                      check += (__force u16)htons(0xFFFB) + (__force u16)htons(ecn);
                rev16  r3, r3   
                      iph->tos |= INET_ECN_CE;
                strb   r1, [r2, #1]
                add    r1, r0, #64256  ; 0xfb00
                mov    r4, #0   
                add    r1, r1, #255    ; 0xff
                      iph->check = (__force __sum16)(check + (check>=0xFFFF));
                movw   r0, #65534      ; 0xfffe
                      check += (__force u16)htons(0xFFFB) + (__force u16)htons(ecn);
  0.04          uxtah  r3, r1, r3
                      iph->check = (__force __sum16)(check + (check>=0xFFFF));
                cmp    r3, r0   
                addhi  r3, r3, #1
                strh   r3, [r2, #10]
                      iph->tos |= INET_ECN_CE;
                mov    r2, #1   
              ↓ b      adc      
              skb_network_header():
         a3c:   ldr    lr, [sp, #48]   ; 0x30
                ldrh   r1, [lr, #152]  ; 0x98
                ldr    r0, [lr, #164]  ; 0xa4
              INET_ECN_set_ce():
                                  skb_tail_pointer(skb))
                                      return IP_ECN_set_ce(ip_hdr(skb));
                              break;
               
                      case cpu_to_be16(ETH_P_IPV6):
                              if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
                add    r3, r1, #40     ; 0x28
                ldr    r2, [lr, #156]  ; 0x9c
                add    r3, r0, r3
                cmp    r2, r3   
              ↓ bcc    ad0      
              ipv6_get_dsfield():
              }           
               
               
              static inline __u8 ipv6_get_dsfield(const struct ipv6hdr *ipv6h)
              {           
                      return ntohs(*(const __be16 *)ipv6h) >> 4;
                ldrh   r2, [r0, r1]
                rev16  r2, r2   
              IP6_ECN_set_ce():
                      if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph)))
                ubfx   r2, r2, #4, #2
                cmp    r2, #0   
                moveq  r4, #1   
              ↓ beq    adc      
                      from = *(__be32 *)iph;
                ldr    r2, [r0, r1]
                      to = from | htonl(INET_ECN_CE << 20);
                orr    ip, r2, #12288  ; 0x3000
                      *(__be32 *)iph = to;
                str    ip, [r0, r1]
                      if (skb->ip_summed == CHECKSUM_COMPLETE)
                ldrb   r3, [lr, #104]  ; 0x68
                and    r3, r3, #96     ; 0x60
                cmp    r3, #64 ; 0x40
                movne  r4, #0   
                movne  r2, #1   
              ↓ bne    adc      
                ldr    r3, [lr, #112]  ; 0x70
              csum_sub(): 
              }           
              #endif      
               
              static inline __wsum csum_sub(__wsum csum, __wsum addend)
              {           
                      return csum_add(csum, ~addend);
                mvn    r2, r2   
                mov    r1, #0   
              IP6_ECN_set_ce():
                              skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
                mov    r4, #0   
                adds   r3, r3, r2
                mov    r2, #1   
              csum_add(): 
                      res += (__force u32)addend;
                add    r3, r3, ip
                movcs  r1, #1   
                add    r3, r3, r1
              IP6_ECN_set_ce():
                ldr    r1, [sp, #48]   ; 0x30
              csum_add(): 
                      return (__force __wsum)(res + (res < (__force u32)addend));
                cmp    ip, r3   
                addhi  r3, r3, #1
              IP6_ECN_set_ce():
                str    r3, [r1, #112]  ; 0x70
              ↓ b      adc      
              INET_ECN_set_ce():
                              if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
         ad0:   mov    r4, #1   
                mov    r2, #0   
              ↓ b      adc      
              cobalt_should_drop():
                              vars->count++;
  0.03   adc:   ldr    r3, [sl, #16]
                              drop = !(vars->ecn_marked = INET_ECN_set_ce(skb));
                strb   r2, [sl, #45]   ; 0x2d
                              vars->count++;
                add    r3, r3, #1
                str    r3, [sl, #16]
                              if (!vars->count)
                cmp    r3, #0   
                                      vars->count--;
                mvneq  r2, #0   
                moveq  ip, r3   
                streq  r2, [sl, #16]
              ↓ beq    b24      
              cobalt_invsqrt():
                      if (vars->count < REC_INV_SQRT_CACHE)
                cmp    r3, #15  
              ↓ bhi    b1c      
                              vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
                ldr    r2, [sp, #44]   ; 0x2c
                add    r3, r2, r3, lsl #2
                ldr    ip, [r3, #2052] ; 0x804
  0.01          str    ip, [sl, #20]
              ↓ b      b68      
         b1c:   mov    r2, r3   
                mov    ip, #0   
              cobalt_newton_step():
                      invsqrt = vars->rec_inv_sqrt;
         b24:   ldr    lr, [sl, #20]
                      invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
                umull  r0, r1, lr, lr
                      val = (3LL << 32) - ((u64)vars->count * invsqrt2);
                mov    r0, #0   
                umull  r2, r3, r1, r2
                subs   r0, r0, r2
                      val >>= 2; /* avoid overflow in following multiply */
                lsr    r2, r0, #2
                      val = (3LL << 32) - ((u64)vars->count * invsqrt2);
                mla    r3, ip, r1, r3
                mov    r1, #3   
                sbc    r1, r1, r3
                mov    r7, r1   
                      val >>= 2; /* avoid overflow in following multiply */
                lsr    r1, r1, #2
                orr    r2, r2, r7, lsl #30
                      val = (val * invsqrt) >> (32 - 2 + 1);
                umull  r2, r3, r2, lr
                mla    r3, lr, r1, r3
                lsr    ip, r2, #31
                orr    ip, ip, r3, lsl #1
                      vars->rec_inv_sqrt = val;
                str    ip, [sl, #20]
              reciprocal_scale():
         b68:   ldr    r0, [fp, #2056] ; 0x808
                mov    lr, #0   
              cobalt_control():
                      return ktime_add_ns(t, reciprocal_scale(interval,
                ldrd   r6, [sl, #24]
              reciprocal_scale():
                umull  r0, r1, r0, ip
                mla    r8, ip, lr, r1
              cobalt_control():
                adds   r2, r8, r6
              cobalt_should_drop():
                              schedule = ktime_sub(now, vars->drop_next);
                ldrd   r8, [sp, #24]
              cobalt_control():
                      return ktime_add_ns(t, reciprocal_scale(interval,
                adc    r3, lr, r7
              cobalt_should_drop():
                              vars->drop_next = cobalt_control(vars->drop_next,
                strd   r2, [sl, #24]
                              schedule = ktime_sub(now, vars->drop_next);
                subs   r8, r8, r2
                sbc    r9, r9, r3
              ↓ b      cc0      
                                      vars->drop_next = cobalt_control(vars->drop_next,
  0.03   b98:   add    ip, fp, #2064   ; 0x810
              cobalt_newton_step():
                      invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
                mov    lr, #0   
              cobalt_should_drop():
                                      vars->count--;
         ba0:   sub    r1, r1, #1
              cobalt_newton_step():
                      val = (val * invsqrt) >> (32 - 2 + 1);
                mov    r3, #0   
                mov    r2, #0   
              cobalt_invsqrt():
                      if (vars->count < REC_INV_SQRT_CACHE)
                cmp    r1, #15  
              cobalt_newton_step():
                      val = (val * invsqrt) >> (32 - 2 + 1);
                strd   r2, [sp, #16]
              cobalt_invsqrt():
                              vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
                ldr    r3, [sp, #44]   ; 0x2c
              cobalt_should_drop():
                                      vars->count--;
                str    r1, [sl, #16]
              cobalt_invsqrt():
                              vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
                add    r0, r3, r1, lsl #2
                      if (vars->count < REC_INV_SQRT_CACHE)
              ↓ bhi    c1c      
                              vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
                ldr    r0, [r0, #2052] ; 0x804
              reciprocal_scale():
                mov    r9, #0   
              cobalt_invsqrt():
                str    r0, [sl, #20]
              reciprocal_scale():
                ldrd   r2, [ip, #-8]
                umull  r6, r7, r2, r0
              cobalt_control():
                      return ktime_add_ns(t, reciprocal_scale(interval,
                ldrd   r2, [sp, #32]
              reciprocal_scale():
                mla    r4, r0, r9, r7
              cobalt_should_drop():
                                      schedule = ktime_sub(now, vars->drop_next);
                ldrd   r8, [sp, #24]
              cobalt_control():
                      return ktime_add_ns(t, reciprocal_scale(interval,
                adds   r2, r2, r4
                adc    r3, r3, lr
              cobalt_should_drop():
                                      schedule = ktime_sub(now, vars->drop_next);
                subs   r8, r8, r2
                sbc    r9, r9, r3
                                      next_due = vars->count && ktime_to_ns(schedule) >= 0;
                cmp    r1, #0   
              cobalt_control():
                      return ktime_add_ns(t, reciprocal_scale(interval,
                mov    r4, r2   
  0.01          mov    r5, r3   
              cobalt_should_drop():
                                      vars->drop_next = cobalt_control(vars->drop_next,
                strd   r4, [sl, #24]
                                      next_due = vars->count && ktime_to_ns(schedule) >= 0;
              ↓ bne    ca0      
                      if (vars->p_drop)
  0.01          ldr    r3, [sl, #40]   ; 0x28
                      bool next_due, over_target, drop = false;
  0.01          mov    r4, r1   
                      if (vars->p_drop)
                cmp    r3, #0   
  0.01        ↓ bne    ccc      
              ↓ b      ce8      
              cobalt_newton_step():
                      invsqrt = vars->rec_inv_sqrt;
         c1c:   ldr    r0, [sl, #20]
                      val = (3LL << 32) - ((u64)vars->count * invsqrt2);
                mov    r2, #0   
                mov    r3, #3   
                      invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
                umull  r4, r5, r0, r0
                      val = (3LL << 32) - ((u64)vars->count * invsqrt2);
                umull  r6, r7, r5, r1
                subs   r2, r2, r6
                sbc    r3, r3, r7
              reciprocal_scale():
                mov    r7, #0   
              cobalt_newton_step():
                      val >>= 2; /* avoid overflow in following multiply */
                lsr    r4, r2, #2
                orr    r4, r4, r3, lsl #30
                lsr    r6, r3, #2
                      val = (val * invsqrt) >> (32 - 2 + 1);
                umull  r4, r5, r4, r0
                mla    r5, r0, r6, r5
                lsr    r3, r4, #31
                orr    r3, r3, r5, lsl #1
                str    r3, [sp, #16]
                lsr    r3, r5, #31
                str    r3, [sp, #20]
                      vars->rec_inv_sqrt = val;
                ldrd   r2, [sp, #16]
              reciprocal_scale():
                mov    r3, #0   
              cobalt_newton_step():
                str    r2, [sl, #20]
              reciprocal_scale():
                ldrd   r4, [ip, #-8]
                mul    r0, r4, r3
                mla    r0, r2, r7, r0
                umull  r2, r3, r4, r2
              cobalt_control():
                      return ktime_add_ns(t, reciprocal_scale(interval,
                ldrd   r4, [sp, #32]
              reciprocal_scale():
                add    r8, r0, r3
              cobalt_control():
                adds   r4, r4, r8
              cobalt_should_drop():
                                      schedule = ktime_sub(now, vars->drop_next);
                ldrd   r8, [sp, #24]
              cobalt_control():
                      return ktime_add_ns(t, reciprocal_scale(interval,
                adc    r5, r5, lr
              cobalt_should_drop():
                                      vars->drop_next = cobalt_control(vars->drop_next,
                strd   r4, [sl, #24]
                                      schedule = ktime_sub(now, vars->drop_next);
                subs   r8, r8, r4
                sbc    r9, r9, r5
                              while (next_due) {
  0.01   ca0:   cmp    r8, #0   
                sbcs   r3, r9, #0
                ldrdge r2, [sl, #24]
                strdge r2, [sp, #32]
              ↑ bge    ba0      
                      bool next_due, over_target, drop = false;
                mov    r4, #0   
              ↓ b      cc0      
  0.16   cbc:   mov    r4, lr   
                      if (vars->p_drop)
  0.02   cc0:   ldr    r3, [sl, #40]   ; 0x28
  0.02          cmp    r3, #0   
              ↓ beq    cdc      
                              drop |= (prandom_u32() < vars->p_drop);
         ccc: → bl     qdisc_peek_dequeued
                ldr    r3, [sl, #40]   ; 0x28
                cmp    r0, r3   
                orrcc  r4, r4, #1
                      if (!vars->count)
  0.56   cdc:   ldr    r3, [sl, #16]
                cmp    r3, #0   
  0.06        ↓ bne    d0c      
                              vars->drop_next = ktime_add_ns(now, p->interval);
  0.13   ce8:   add    r3, fp, #2064   ; 0x810
  0.05          ldrd   r0, [sp, #24]
                ldrd   r2, [r3, #-8]
  0.10          adds   r0, r0, r2
                adc    r1, r1, r3
  0.06          mov    r2, r0   
                mov    r3, r1   
  0.03          strd   r2, [sl, #24]
              ↓ b      d3c      
                      else if (ktime_to_ns(schedule) > 0 && !drop)
  0.17   d0c:   cmp    r8, #1   
                sbcs   r3, r9, #0
  0.09          eor    r3, r4, #1
                andge  r3, r3, #1
  0.03          movlt  r3, #0   
                cmp    r3, #0   
  0.04        ↓ beq    d3c      
                              vars->drop_next = now;
                ldrd   r2, [sp, #24]
                mov    r9, sl   
                mov    r8, fp   
                strd   r2, [sl, #24]
              ↓ b      dfc      
              cake_dequeue():
                              if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb,
  0.65   d3c:   cmp    r4, #0   
              ↓ beq    df4      
                                                          CAKE_FLAG_INGRESS))) ||
                ldr    r3, [sl, #-8]
                cmp    r3, #0   
              ↓ beq    df4      
                                  !flow->head)
                                      break;
               
                              /* drop this packet, get another one */
                              if (q->rate_flags & CAKE_FLAG_INGRESS) {
                ldr    r3, [sp, #56]   ; 0x38
  0.01          ldrh   r3, [r3, #56]   ; 0x38
                tst    r3, #4   
              ↓ beq    d98      
                                      len = cake_advance_shaper(q, b, skb,
                ldrd   r2, [sp, #24]
                ldr    r1, [sp, #60]   ; 0x3c
                ldr    r0, [sp, #72]   ; 0x48
                strd   r2, [sp] 
                mov    r3, #1   
  0.01          ldr    r2, [sp, #48]   ; 0x30
                str    r3, [sp, #8]
  0.01        → bl     cake_advance_shaper
                                                                now, true);
                                      flow->deficit -= len;
                ldr    r3, [sl, #8]
                sub    r3, r3, r0
                str    r3, [sl, #8]
                                      b->tin_deficit -= len;
                ldr    r3, [fp, #2160] ; 0x870
                sub    r0, r3, r0
                str    r0, [fp, #2160] ; 0x870
                              }
                              flow->dropped++;
         d98:   ldr    r3, [sl, #12]
                              b->tin_dropped++;
                              qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
                mov    r1, #1   
                ldr    r4, [sp, #52]   ; 0x34
                              flow->dropped++;
                add    r3, r3, r1
                              qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
                ldr    r5, [sp, #48]   ; 0x30
                              flow->dropped++;
                str    r3, [sl, #12]
                              b->tin_dropped++;
                ldr    r3, [fp, #2168] ; 0x878
                              qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
                mov    r0, r4   
                              b->tin_dropped++;
                add    r3, r3, r1
                str    r3, [fp, #2168] ; 0x878
                              qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
                ldr    r2, [r5, #24]
              → bl     qdisc_peek_dequeued
              qstats_drop_inc():
                      sch->qstats.drops += count;
              }           
               
              static inline void qstats_drop_inc(struct gnet_stats_queue *qstats)
              {           
                      qstats->drops++;
                ldr    r3, [r4, #108]  ; 0x6c
              cake_dequeue():
              #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)
                              qdisc_drop(skb, sch);
              #else       
                              qdisc_qstats_drop(sch);
                              kfree_skb(skb);
                mov    r0, r5   
              qstats_drop_inc():
                add    r3, r3, #1
                str    r3, [r4, #108]  ; 0x6c
              cake_dequeue():
              → bl     qdisc_peek_dequeued
                      first_flow = false;
                mov    r1, #0   
              #endif      
                              if (q->rate_flags & CAKE_FLAG_INGRESS)
                ldr    r3, [sp, #56]   ; 0x38
                ldrh   r3, [r3, #56]   ; 0x38
                tst    r3, #4   
              ↑ beq    4ec      
              ↑ b      2b0      
  0.30   df4:   mov    r9, sl   
                mov    r8, fp   
                                      goto retry;
                      }   
               
                      b->tin_ecn_mark += !!flow->cvars.ecn_marked;
  0.02   dfc:   ldrb   r2, [r9, #45]   ; 0x2d
              bstats_update():
                      _bstats_update(bstats,
                mov    r7, #0   
              cake_dequeue():
  0.08          ldr    r3, [r8, #2172] ; 0x87c
              _bstats_update():
                      bstats->bytes += bytes;
                ldr    lr, [sp, #52]   ; 0x34
              cake_dequeue():
                add    r3, r3, r2
              skb_end_pointer():
                      return skb->end;
                ldr    r2, [sp, #48]   ; 0x30
              cake_dequeue():
  0.33          str    r3, [r8, #2172] ; 0x87c
              _bstats_update():
                ldr    r4, [lr, #88]   ; 0x58
              skb_end_pointer():
  0.03          ldr    r3, [r2, #160]  ; 0xa0
              bstats_update():
                      _bstats_update(bstats,
                ldr    r6, [r2, #24]
              cake_dequeue():
                      qdisc_bstats_update(sch, skb);
               
                      /* collect delay stats */
                      delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
                      b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
  0.05          ldr    sl, [sp, #76]   ; 0x4c
              bstats_update():
                ldrh   r2, [r3, #4]
  0.02          cmp    r2, #0   
                ldrhne r2, [r3, #6]
  0.32          moveq  r2, #1   
              _bstats_update():
                      bstats->bytes += bytes;
                ldr    r3, [lr, #84]   ; 0x54
  0.05          str    r3, [sp, #32]
                      bstats->packets += packets;
                ldr    r3, [lr, #92]   ; 0x5c
                      bstats->bytes += bytes;
  0.11          str    r4, [sp, #36]   ; 0x24
                      bstats->packets += packets;
                add    r3, r3, r2
                      bstats->bytes += bytes;
  0.07          ldrd   r4, [sp, #32]
                      bstats->packets += packets;
                str    r3, [lr, #92]   ; 0x5c
              cake_dequeue():
                      delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
  0.20          ldr    r3, [sp, #48]   ; 0x30
              _bstats_update():
                      bstats->bytes += bytes;
                adds   r4, r4, r6
  0.05          adc    r5, r5, r7
                str    r4, [lr, #84]   ; 0x54
  0.08          str    r5, [lr, #88]   ; 0x58
              cake_dequeue():
                ldrd   r4, [r3, #32]
                      b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
  0.03          ldrd   r2, [sl, #152]  ; 0x98
                      delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
                ldrd   r6, [sp, #24]
              cake_ewma():
                      avg -= avg >> shift;
  0.64          lsr    r0, r2, #8
              cake_dequeue():
                      delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
                subs   r6, r6, r4
              cake_ewma():
                      avg -= avg >> shift;
  0.01          orr    r0, r0, r3, lsl #24
              cake_dequeue():
                      delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
                sbc    r7, r7, r5
              cake_ewma():
                      avg -= avg >> shift;
  0.13          lsr    r1, r3, #8
                subs   r2, r2, r0
  0.02          sbc    r3, r3, r1
                      avg += sample >> shift;
                lsr    lr, r6, #8
  0.10          orr    lr, lr, r7, lsl #24
                str    lr, [sp, #16]
  0.16          lsr    lr, r7, #8
                str    lr, [sp, #20]
                      avg -= avg >> shift;
  0.06          mov    r4, r2   
                mov    r5, r3   
                      avg += sample >> shift;
  0.04          ldrd   r2, [sp, #16]
                adds   r2, r2, r4
  0.10          adc    r3, r3, r5
              cake_dequeue():
                      b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
                strd   r2, [sl, #152]  ; 0x98
                      b->peak_delay = cake_ewma(b->peak_delay, delay,
  0.08          ldrd   r2, [sl, #160]  ; 0xa0
                cmp    r3, r7   
  0.09          cmpeq  r2, r6   
              ↓ bcs    ee8      
  0.02          mov    r1, #2   
                lsr    r0, r6, r1
                orr    r0, r0, r7, lsl #30
                mov    sl, r0   
                lsr    r0, r7, r1
                mov    fp, r0   
              ↓ b      ef0      
  0.06   ee8:   ldrd   sl, [sp, #16]
  0.01          mov    r1, #8   
                                                delay > b->peak_delay ? 2 : 8);
                      b->base_delay = cake_ewma(b->base_delay, delay,
  0.08   ef0:   ldr    lr, [sp, #76]   ; 0x4c
              cake_ewma():
                      avg -= avg >> shift;
  0.09          rsb    r0, r1, #32
  0.01          lsr    r4, r2, r1
  0.01          sub    ip, r1, #32
  0.04          orr    r4, r4, r3, lsl r0
                      avg += sample >> shift;
  0.02          adds   sl, sl, r2
                      avg -= avg >> shift;
  0.03          orr    r4, r4, r3, lsr ip
              cake_dequeue():
                      b->base_delay = cake_ewma(b->base_delay, delay,
  0.05          add    r0, lr, #176    ; 0xb0
              cake_ewma():
                      avg += sample >> shift;
  0.05          adc    fp, fp, r3
                      avg -= avg >> shift;
  0.05          lsr    r5, r3, r1
                      avg += sample >> shift;
  0.03          subs   r2, sl, r4
  0.01          sbc    r3, fp, r5
              cake_dequeue():
                      b->peak_delay = cake_ewma(b->peak_delay, delay,
  0.01          strd   r2, [lr, #160]  ; 0xa0
                      b->base_delay = cake_ewma(b->base_delay, delay,
  0.01          ldrd   r4, [r0, #-8]
  0.08          cmp    r5, r7   
  0.03          cmpeq  r4, r6   
  0.02          movhi  ip, #2   
  0.08          lsrhi  r3, r6, ip
  0.02          orrhi  r3, r3, r7, lsl #30
  0.03          movls  ip, #8   
  0.02          strhi  r3, [sp, #16]
              cake_ewma():
                      avg -= avg >> shift;
  0.04          rsb    lr, ip, #32
                lsrhi  r3, r7, ip
  0.02          lsr    r2, r4, ip
  0.04          strhi  r3, [sp, #20]
  0.08          sub    r1, ip, #32
                      avg += sample >> shift;
  0.02          ldrd   sl, [sp, #16]
                      avg -= avg >> shift;
  0.02          orr    r2, r2, r5, lsl lr
                orr    r2, r2, r5, lsr r1
  0.02          lsr    r3, r5, ip
                      avg += sample >> shift;
  0.03          adds   sl, sl, r4
              cake_dequeue():
                                                delay < b->base_delay ? 2 : 8);
               
                      len = cake_advance_shaper(q, b, skb, now, false);
                ldr    r1, [sp, #60]   ; 0x3c
              cake_ewma():
                      avg += sample >> shift;
  0.02          adc    fp, fp, r5
  0.01          subs   r6, sl, r2
  0.05          sbc    r7, fp, r3
              cake_dequeue():
                      len = cake_advance_shaper(q, b, skb, now, false);
  0.03          ldrd   r4, [sp, #24]
                      b->base_delay = cake_ewma(b->base_delay, delay,
  0.03          strd   r6, [r0, #-8]
                      len = cake_advance_shaper(q, b, skb, now, false);
  0.02          mov    ip, #0   
  0.07          ldr    r6, [sp, #72]   ; 0x48
  0.02          ldr    r2, [sp, #48]   ; 0x30
  0.02          strd   r4, [sp] 
  0.09          mov    r0, r6   
  0.03          str    ip, [sp, #8]
  0.03        → bl     cake_advance_shaper
                      flow->deficit -= len;
  0.22          ldr    r3, [r9, #8]
                      b->tin_deficit -= len;
               
                      if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
  0.01          movw   r2, #16416      ; 0x4020
  0.05          add    r1, r6, r2
                      flow->deficit -= len;
  0.04          sub    r3, r3, r0
  0.04          str    r3, [r9, #8]
                      b->tin_deficit -= len;
  0.22          ldr    r3, [r8, #2160] ; 0x870
                sub    r3, r3, r0
  0.02          str    r3, [r8, #2160] ; 0x870
                      if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
                ldrd   r0, [r1, #-8]
              ktime_compare():
                      if (cmp1 > cmp2)
  0.01          cmp    r4, r0   
                sbcs   r3, r5, r1
              cake_dequeue():
  0.04          ldr    r3, [sp, #52]   ; 0x34
                ldr    r3, [r3, #76]   ; 0x4c
              ktime_compare():
  0.14        ↓ bge    1010     
              cake_dequeue():
                cmp    r3, #0   
  0.05        ↓ beq    1070     
                              u64 next = min(ktime_to_ns(q->time_next_packet),
  0.17          ldr    r3, [sp, #72]   ; 0x48
  0.02          add    r2, r3, r2
                ldrd   r2, [r2] 
  0.05          cmp    r0, r2   
                sbcs   ip, r1, r3
  0.15          movlt  r3, r1   
                                             ktime_to_ns(q->failsafe_next_packet));
               
                              qdisc_watchdog_schedule_ns(&q->watchdog, next);
                ldr    r1, [sp, #72]   ; 0x48
                              u64 next = min(ktime_to_ns(q->time_next_packet),
                movlt  r2, r0   
                              qdisc_watchdog_schedule_ns(&q->watchdog, next);
  0.01          add    r0, r1, #16384  ; 0x4000
  0.02          add    r0, r0, #104    ; 0x68
              → bl     qdisc_peek_dequeued
                      if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
              ↓ b      1090     
                      } else if (!sch->q.qlen) {
        1010:   cmp    r3, #0   
              ↓ bne    1090     
              ↓ b      1070     
                              int i;
               
                              for (i = 0; i < q->tin_cnt; i++) {
                                      if (q->tins[i].decaying_flow_count) {
  0.05  101c:   ldr    r2, [lr, #264]  ; 0x108
                add    r2, r2, r3
  0.01          add    r3, r3, #88064  ; 0x15800
                add    r1, r2, #88064  ; 0x15800
  0.03          add    r3, r3, #192    ; 0xc0
                add    r1, r1, #48     ; 0x30
  0.05          ldrh   r1, [r1] 
                cmp    r1, #0   
  0.03        ↓ beq    1084     
                                              ktime_t next = \
                                                      ktime_add_ns(now,
                                                                   q->tins[i].cparams.target);
               
                                              qdisc_watchdog_schedule_ns(&q->watchdog,
                ldr    r3, [sp, #72]   ; 0x48
                                                      ktime_add_ns(now,
  0.05          add    r2, r2, #88064  ; 0x15800
                                              qdisc_watchdog_schedule_ns(&q->watchdog,
                ldrd   r4, [sp, #24]
  0.05          add    r0, r3, #16384  ; 0x4000
                                                      ktime_add_ns(now,
                ldrd   r2, [r2, #16]
                                              qdisc_watchdog_schedule_ns(&q->watchdog,
  0.02          add    r0, r0, #104    ; 0x68
                adds   r4, r4, r2
  0.01          adc    r5, r5, r3
                mov    r2, r4   
  0.03          mov    r3, r5   
              → bl     qdisc_peek_dequeued
                                                                         ktime_to_ns(next));
                                              break;
              ↓ b      1090     
                              for (i = 0; i < q->tin_cnt; i++) {
  0.11  1070:   ldr    r3, [sp, #56]   ; 0x38
  0.03          ldr    lr, [sp, #52]   ; 0x34
                ldrh   ip, [r3, #14]
  0.02          mov    r3, #0   
                mov    r0, r3   
  0.07  1084:   cmp    ip, r0   
                add    r0, r0, #1
  0.04        ↑ bne    101c     
                                      }
                              }
                      }   
               
                      if (q->overflow_timeout)
  0.63  1090:   ldr    r2, [sp, #56]   ; 0x38
  0.07          ldrh   r3, [r2, #12]
  0.01          cmp    r3, #0   
                              q->overflow_timeout--;
  0.09          subne  r3, r3, #1
  0.03          strhne r3, [r2, #12]
  0.03        ↓ b      114c     
              cobalt_queue_empty():
                      if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
  0.11  10a8:   ldrd   r6, [r8, #24]
  0.01          ldrd   r2, [sp, #24]
                subs   r2, r2, r6
  0.02          sbc    r3, r3, r7
                cmp    r2, #0   
  0.01          sbcs   r3, r3, #0
                movge  ip, #0   
              ↑ bge    5b0      
              ↑ b      64c      
              cake_dequeue():
                              if (cake_ddst(q->flow_mode))
  0.15  10cc:   cmp    r3, #6   
  2.80          movne  r4, #1   
  0.17        ↑ bne    48c      
  0.98        ↑ b      44c      
                                              b->tin_deficit += b->tin_quantum_band;
        10dc:   ldrh   r3, [r9, r7]
                str    r3, [r9, #2160] ; 0x870
              ↑ b      144      
              cobalt_queue_empty():
                      if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
        10e8:   ldrd   r6, [sl, #24]
                ldrd   r2, [sp, #24]
                subs   r2, r2, r6
                sbc    r3, r3, r7
                cmp    r2, #0   
                sbcs   r3, r3, #0
                movge  ip, #1   
              ↑ bge    5b0      
              ↑ b      638      
  0.25  110c:   ldr    r0, [r8, #4]
  0.02          ldr    r1, [r8] 
  0.02        ↑ b      65c      
  0.34  1118:   ldr    r1, [sl, #16]
                      vars->dropping = false;
  0.04          strb   ip, [sl, #44]   ; 0x2c
                      if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
  0.01          cmp    r1, #0   
  0.05        ↑ bne    10a8     
  0.05        ↑ b      110c     
        112c:   ldr    r1, [sl, #16]
                      vars->dropping = false;
                ldr    r3, [sp, #48]   ; 0x30
                      if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
                cmp    r1, #0   
                      vars->dropping = false;
                strb   r3, [sl, #44]   ; 0x2c
                ldreq  r0, [sl, #4]
                ldreq  r1, [sl] 
                      if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
              ↑ beq    67c      
              ↑ b      10a8     
              cake_dequeue():
               
                      return skb;
              }           
  0.46  114c:   ldr    r0, [sp, #48]   ; 0x30
  0.02          add    sp, sp, #100    ; 0x64
  0.10          pop    {r4, r5, r6, r7, r8, r9, sl, fp, pc}

[-- Attachment #3: perf.hist --]
[-- Type: application/octet-stream, Size: 1683 bytes --]

  74.68%  swapper      0x2678             K [k] cake_dequeue
  10.61%  swapper      0x5ad0             K [k] cake_enqueue
   6.35%  swapper      0x382c             K [k] cake_hash
   3.17%  swapper      0x478              K [k] cake_dequeue_one
   1.98%  swapper      0x40c              K [k] cake_advance_shaper
   0.85%  swapper      0x3cfc             K [k] cake_overhead
   0.69%  swapper      0x138              K [k] cake_calc_overhead
   0.39%  swapper      0x49e8             K [k] cake_ack_filter
   0.25%  sh           0x2630             K [k] cake_dequeue
   0.24%  sh           0x54b8             K [k] cake_enqueue
   0.15%  dhcpv6.scri  0x26b4             K [k] cake_dequeue
   0.12%  fw3          0x5018             K [k] cake_enqueue
   0.06%  ksoftirqd/1  0x26b8             K [k] cake_dequeue
   0.06%  ubusd        0x2ba4             K [k] cake_dequeue
   0.06%  ubusd        0x4f2c             K [k] cake_enqueue
   0.06%  sh           0x478              K [k] cake_dequeue_one
   0.06%  sh           0x198              K [k] cake_calc_overhead
   0.05%  sh           0x3bb0             K [k] cake_hash
   0.05%  perf         0x23b4             K [k] cake_dequeue
   0.03%  odhcp6c      0x5630             K [k] cake_enqueue
   0.03%  dnsmasq      0x2418             K [k] cake_dequeue
   0.01%  ksoftirqd/1  0x198              K [k] cake_calc_overhead
   0.01%  dnsmasq      0x36cc             K [k] cake_hash
   0.01%  hostapd      0x276c             K [k] cake_dequeue
   0.01%  swapper      0xb0               K [k] cake_get_tcpopt
   0.01%  hostapd      0x4c8              K [k] cake_dequeue_one
   0.00%  hostapd      0x3d00             K [k] cake_overhead

  reply	other threads:[~2019-03-08 21:01 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-08 20:58 Georgios Amanakis
2019-03-08 21:01 ` Georgios Amanakis [this message]
2019-03-09 16:03   ` Toke Høiland-Jørgensen
2019-03-11 14:49     ` Adrian Popescu
2019-03-11 15:53       ` Jonathan Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://lists.bufferbloat.net/postorius/lists/cake.lists.bufferbloat.net/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CACvFP_hF5BMTrVFNxLZqeVmyLUHbFcDvMxbxmKfO=_T6zqF+GQ@mail.gmail.com' \
    --to=gamanakis@gmail.com \
    --cc=cake@lists.bufferbloat.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox