* Re: [Cake] profiling using perf
2019-03-08 20:58 [Cake] profiling using perf Georgios Amanakis
@ 2019-03-08 21:01 ` Georgios Amanakis
2019-03-09 16:03 ` Toke Høiland-Jørgensen
0 siblings, 1 reply; 5+ messages in thread
From: Georgios Amanakis @ 2019-03-08 21:01 UTC (permalink / raw)
To: Cake List
[-- Attachment #1: Type: text/plain, Size: 423 bytes --]
Dear List,
I made an effort to profile the performance of cake with perf in
openwrt. perf was run on a WRT1900ACS router while downloading
archlinux.iso via torrent in a LAN client. You can find the annotated
sch_cake.c in the attachment as well as a performance histogram of
sch_cake (percentages are relative to sch_cake). Hopefully people can
take a look at it, and see if there are performance concerns.
Best,
George
[-- Attachment #2: cake_dequeue.annotation --]
[-- Type: application/octet-stream, Size: 85930 bytes --]
cake_dequeue() /lib/modules/4.14.104/sch_cake.ko
Event: cycles:ppp
Percent
Disassembly of section .text:
0000230c <cake_dequeue>:
cake_dequeue():
while (!!(skb = cake_dequeue_one(sch)))
kfree_skb(skb);
}
static struct sk_buff *cake_dequeue(struct Qdisc *sch)
{
0.83 push {r4, r5, r6, r7, r8, r9, sl, fp, lr}
struct cake_sched_data *q = qdisc_priv(sch);
struct cake_tin_data *b = &q->tins[q->cur_tin];
0.09 add r1, r0, #16640 ; 0x4100
{
0.01 sub sp, sp, #100 ; 0x64
struct cake_tin_data *b = &q->tins[q->cur_tin];
0.13 ldr r2, [r0, #264] ; 0x108
movw r3, #22720 ; 0x58c0
0.49 movt r3, #1
0.02 str r1, [sp, #56] ; 0x38
0.10 ldrh r1, [r1, #96] ; 0x60
{
str r0, [sp, #52] ; 0x34
qdisc_priv():
#define QDISC_ALIGNTO 64
#define QDISC_ALIGN(len) (((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1))
static inline void *qdisc_priv(struct Qdisc *q)
{
return (char *) q + QDISC_ALIGN(sizeof(struct Qdisc));
0.15 add r0, r0, #256 ; 0x100
0.07 str r0, [sp, #72] ; 0x48
cobalt_invsqrt():
vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
0.10 movw r0, #0
cake_dequeue():
struct cake_tin_data *b = &q->tins[q->cur_tin];
0.01 mla r3, r3, r1, r2
cobalt_invsqrt():
vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
0.34 movt r0, #0
0.16 str r0, [sp, #44] ; 0x2c
cake_dequeue():
struct cake_tin_data *b = &q->tins[q->cur_tin];
0.07 str r3, [sp, #60] ; 0x3c
struct cake_host *srchost, *dsthost;
ktime_t now = ktime_get();
0.06 → bl qdisc_peek_dequeued
strd r0, [sp, #24]
struct cake_flow *flow;
struct list_head *head;
bool first_flow = true;
0.35 mov r1, #1
u16 host_load;
u64 delay;
u32 len;
begin:
if (!sch->q.qlen)
0.03 4c: ldr r3, [sp, #52] ; 0x34
0.30 ldr r3, [r3, #76] ; 0x4c
0.11 cmp r3, #0
0.22 ↓ bne 68
return NULL;
0.01 5c: mov r3, #0
str r3, [sp, #48] ; 0x30
↓ b 114c
/* global hard shaper */
if (ktime_after(q->time_next_packet, now) &&
0.21 68: ldr r3, [sp, #72] ; 0x48
0.09 movw r0, #16416 ; 0x4020
ktime_compare():
*/
static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2)
{
if (cmp1 < cmp2)
return -1;
if (cmp1 > cmp2)
0.06 ldrd r4, [sp, #24]
cake_dequeue():
0.24 add r3, r3, r0
0.01 ldrd r2, [r3, #-8]
ktime_compare():
0.24 cmp r4, r2
0.02 sbcs ip, r5, r3
0.29 ↓ bge dc
cake_dequeue():
ktime_after(q->failsafe_next_packet, now)) {
0.08 ldr ip, [sp, #72] ; 0x48
ktime_compare():
0.04 ldrd r6, [sp, #24]
cake_dequeue():
ldrd r4, [ip, r0]
ktime_compare():
0.19 cmp r6, r4
sbcs r0, r7, r5
0.09 ↓ bge dc
cake_dequeue():
u64 next = min(ktime_to_ns(q->time_next_packet),
ktime_to_ns(q->failsafe_next_packet));
sch->qstats.overlimits++;
ldr r0, [sp, #52] ; 0x34
u64 next = min(ktime_to_ns(q->time_next_packet),
0.03 cmp r4, r2
sbcs ip, r5, r3
sch->qstats.overlimits++;
0.08 ldr r1, [r0, #116] ; 0x74
u64 next = min(ktime_to_ns(q->time_next_packet),
movlt r2, r4
0.08 movlt r3, r5
sch->qstats.overlimits++;
add r1, r1, #1
0.08 str r1, [r0, #116] ; 0x74
qdisc_watchdog_schedule_ns(&q->watchdog, next);
ldr r1, [sp, #72] ; 0x48
0.07 add r0, r1, #16384 ; 0x4000
return NULL;
mov r1, #0
qdisc_watchdog_schedule_ns(&q->watchdog, next);
0.05 add r0, r0, #104 ; 0x68
return NULL;
str r1, [sp, #48] ; 0x30
qdisc_watchdog_schedule_ns(&q->watchdog, next);
0.10 → bl qdisc_peek_dequeued
return NULL;
0.03 ↓ b 114c
}
/* Choose a class to work on. */
if (!q->rate_ns) {
0.14 dc: ldr r3, [sp, #72] ; 0x48
0.07 add r3, r3, #16384 ; 0x4000
0.05 add r3, r3, #48 ; 0x30
0.03 ldrd r2, [r3, #-8]
0.03 orrs r3, r2, r3
0.35 ↓ beq 18c
0.09 ldr r3, [sp, #52] ; 0x34
/* In shaped mode, choose:
* - Highest-priority tin with queue and meeting schedule, or
* - The earliest-scheduled tin with queue.
*/
ktime_t best_time = ns_to_ktime(KTIME_MAX);
int tin, best_tin = 0;
0.05 mov r6, #0
for (tin = 0; tin < q->tin_cnt; tin++) {
0.05 mov ip, r6
ktime_t best_time = ns_to_ktime(KTIME_MAX);
0.05 mvn r4, #0
0.05 mvn r5, #-2147483648 ; 0x80000000
0.23 ldrd r8, [sp, #24]
0.04 ldr r7, [r3, #264] ; 0x108
for (tin = 0; tin < q->tin_cnt; tin++) {
0.26 ldr r3, [sp, #56] ; 0x38
0.11 add r0, r7, #88064 ; 0x15800
0.05 add r0, r0, #44 ; 0x2c
0.08 ldrh lr, [r3, #14]
0.04 ↓ b 240
if (b->tin_deficit <= 0)
124: cmp r8, #0
↓ bne 144
↓ b 10dc
empty = false;
130: cmp r3, #0
b->tin_deficit += b->tin_quantum_band;
ldrh r3, [r9, r7]
empty = false;
movne r5, #0
b->tin_deficit += b->tin_quantum_band;
add r8, r3, r8
str r8, [r9, #2160] ; 0x870
q->cur_tin++;
144: ldrh r3, [r0, #96] ; 0x60
if (q->cur_tin >= q->tin_cnt) {
ldrh r8, [r0, #14]
q->cur_tin++;
add r3, r3, #1
uxth r3, r3
strh r3, [r0, #96] ; 0x60
if (q->cur_tin >= q->tin_cnt) {
cmp r8, r3
b++;
addhi r2, r2, #88064 ; 0x15800
addhi r2, r2, #192 ; 0xc0
if (q->cur_tin >= q->tin_cnt) {
↓ bhi 1ac
b = q->tins;
ldr r3, [sp, #52] ; 0x34
if (wrapped) {
cmp r4, #0
q->cur_tin = 0;
strh r6, [r0, #96] ; 0x60
b = q->tins;
ldr r2, [r3, #264] ; 0x108
if (wrapped) {
↓ beq 184
if (empty)
cmp r5, #0
↑ bne 5c
wrapped = true;
184: mov r4, #1
↓ b 1ac
b->tin_deficit += b->tin_quantum_band;
18c: ldr r2, [sp, #60] ; 0x3c
bool wrapped = false, empty = true;
mov r4, #0
b->tin_deficit += b->tin_quantum_band;
ldr r0, [sp, #56] ; 0x38
bool wrapped = false, empty = true;
mov r5, #1
q->cur_tin = 0;
mov r6, r4
movw lr, #2094 ; 0x82e
movw ip, #2092 ; 0x82c
b->tin_deficit += b->tin_quantum_band;
movw r7, #2156 ; 0x86c
while (b->tin_deficit < 0 ||
1ac: add r9, r2, #86016 ; 0x15000
ldr r8, [r9, #2160] ; 0x870
ldrh r3, [r9, lr]
ldrh sl, [r9, ip]
cmp r8, #0
add r3, r3, sl
↑ blt 130
cmp r3, #0
↑ beq 124
str r2, [sp, #60] ; 0x3c
↓ b 27c
b = q->tins + tin;
if ((b->sparse_flow_count + b->bulk_flow_count) > 0) {
0.07 1d8: ldrh r3, [r0, #2]
0.20 ldrh r2, [r0]
0.09 cmn r3, r2
0.18 ↓ beq 234
ktime_t time_to_pkt = \
0.15 ldrd sl, [r0, #36] ; 0x24
0.07 subs sl, sl, r8
0.01 sbc fp, fp, r9
ktime_compare():
if (cmp1 < cmp2)
0.19 cmp sl, r4
0.05 sbcs r3, fp, r5
0.12 movlt r3, #1
0.04 movge r3, #0
cake_dequeue():
ktime_sub(b->time_next_packet, now);
if (ktime_to_ns(time_to_pkt) <= 0 ||
0.14 cmp sl, #1
0.01 sbcs r2, fp, #0
ktime_compare():
0.28 orrlt r3, r3, #1
if (cmp1 > cmp2)
cmp r4, sl
0.36 sbcs r2, r5, fp
if (cmp1 < cmp2)
0.01 eor r3, r3, #1
if (cmp1 > cmp2)
0.11 andlt r3, r3, #1
0.02 movge r3, #0
0.05 cmp r3, #0
cake_dequeue():
ktime_t time_to_pkt = \
0.01 moveq r4, sl
0.06 moveq r5, fp
ktime_compare():
moveq r6, ip
0.23 234: add r0, r0, #88064 ; 0x15800
cake_dequeue():
for (tin = 0; tin < q->tin_cnt; tin++) {
0.09 add ip, ip, #1
0.08 add r0, r0, #192 ; 0xc0
0.19 240: cmp lr, ip
0.29 ↑ bne 1d8
}
}
}
q->cur_tin = best_tin;
b = q->tins + best_tin;
0.34 movw r3, #22720 ; 0x58c0
0.09 movt r3, #1
q->cur_tin = best_tin;
ldr r2, [sp, #56] ; 0x38
/* No point in going further if no packets to deliver. */
if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
0.30 movw r0, #2094 ; 0x82e
b = q->tins + best_tin;
0.01 mla r3, r3, r6, r7
q->cur_tin = best_tin;
0.07 strh r6, [r2, #96] ; 0x60
if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
0.26 movw r2, #2092 ; 0x82c
0.01 add r9, r3, #86016 ; 0x15000
b = q->tins + best_tin;
0.22 str r3, [sp, #60] ; 0x3c
if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
ldrh r2, [r9, r2]
0.10 ldrh r3, [r9, r0]
0.08 cmn r3, r2
0.06 ↑ beq 5c
return NULL;
}
retry:
/* service this class */
head = &b->decaying_flows;
0.06 27c: ldr r3, [sp, #60] ; 0x3c
if (!first_flow || list_empty(head)) {
0.04 cmp r1, #0
head = &b->decaying_flows;
0.01 add r3, r3, #88064 ; 0x15800
0.11 str r3, [sp, #76] ; 0x4c
0.02 add r3, r3, #72 ; 0x48
0.10 str r3, [sp, #80] ; 0x50
if (!first_flow || list_empty(head)) {
0.13 ↓ beq 2b4
__read_once_size():
})
static __always_inline
void __read_once_size(const volatile void *p, void *res, int size)
{
__READ_ONCE_SIZE;
0.14 ldr r2, [r9, #2120] ; 0x848
cake_dequeue():
0.03 cmp r3, r2
__read_once_size():
0.00 str r2, [sp, #84] ; 0x54
cake_dequeue():
moveq r1, #1
0.02 ↓ beq 2bc
0.01 ↓ b 2fc
0.01 2b0: mov r9, fp
head = &b->decaying_flows;
0.29 2b4: ldr r3, [sp, #80] ; 0x50
str r3, [sp, #84] ; 0x54
head = &b->new_flows;
0.11 2bc: ldr r3, [sp, #76] ; 0x4c
__read_once_size():
0.80 ldr r2, [r9, #2104] ; 0x838
cake_dequeue():
0.09 add r3, r3, #56 ; 0x38
if (list_empty(head)) {
1.12 cmp r3, r2
0.20 ↓ bne 304
head = &b->old_flows;
2.02 ldr r3, [sp, #76] ; 0x4c
__read_once_size():
0.18 ldr r2, [r9, #2112] ; 0x840
cake_dequeue():
1.14 add r3, r3, #64 ; 0x40
if (unlikely(list_empty(head))) {
0.05 cmp r3, r2
1.00 ↓ bne 304
__read_once_size():
ldr r3, [r9, #2120] ; 0x848
cake_dequeue():
head = &b->decaying_flows;
if (unlikely(list_empty(head)))
ldr r2, [sp, #84] ; 0x54
cmp r2, r3
↑ beq 4c
mov r3, r2
↓ b 304
0.12 2fc: ldr r3, [sp, #80] ; 0x50
str r3, [sp, #84] ; 0x54
goto begin;
}
}
}
flow = list_first_entry(head, struct cake_flow, flowchain);
0.26 304: ldr r8, [r3]
q->cur_flow = flow - b->flows;
0.89 ldr r2, [sp, #60] ; 0x3c
flow = list_first_entry(head, struct cake_flow, flowchain);
0.09 sub r3, r8, #8
q->cur_flow = flow - b->flows;
0.84 sub r3, r3, r2
0.24 ldr r2, [sp, #56] ; 0x38
1.82 asr r3, r3, #6
0.10 strh r3, [r2, #98] ; 0x62
srchost = &b->hosts[flow->srchost];
dsthost = &b->hosts[flow->dsthost];
host_load = 1;
/* flow isolation (DRR++) */
if (flow->deficit <= 0) {
1.01 ldr r3, [r8, #8]
srchost = &b->hosts[flow->srchost];
0.13 ldrh r2, [r8, #48] ; 0x30
if (flow->deficit <= 0) {
2.08 cmp r3, #0
dsthost = &b->hosts[flow->dsthost];
0.14 ldrh r3, [r8, #50] ; 0x32
srchost = &b->hosts[flow->srchost];
1.63 str r2, [sp, #88] ; 0x58
0.27 movgt sl, r8
dsthost = &b->hosts[flow->dsthost];
0.98 str r3, [sp, #92] ; 0x5c
0.17 movgt fp, r9
if (flow->deficit <= 0) {
3.57 ↓ bgt 4ec
/* Keep all flows with deficits out of the sparse and decaying
* rotations. No non-empty flow can go into the decaying
* rotation, so they can't get deficits
*/
if (flow->set == CAKE_SET_SPARSE) {
0.24 ldrb r3, [r8, #52] ; 0x34
1.66 cmp r3, #1
0.04 ldrne r3, [sp, #52] ; 0x34
0.97 addne r3, r3, #16384 ; 0x4000
0.30 ↓ bne 408
if (flow->head) {
ldr r3, [r8, #-8]
0.02 cmp r3, #0
↓ beq 3f8
b->sparse_flow_count--;
b->bulk_flow_count++;
if (cake_dsrc(q->flow_mode))
ldr r3, [sp, #52] ; 0x34
b->sparse_flow_count--;
movw ip, #2094 ; 0x82e
b->bulk_flow_count++;
movw r0, #2092 ; 0x82c
b->sparse_flow_count--;
ldrh r1, [r9, ip]
b->bulk_flow_count++;
0.02 ldrh r2, [r9, r0]
if (cake_dsrc(q->flow_mode))
add r3, r3, #16384 ; 0x4000
b->sparse_flow_count--;
sub r1, r1, #1
strh r1, [r9, ip]
b->bulk_flow_count++;
add r2, r2, #1
strh r2, [r9, r0]
if (cake_dsrc(q->flow_mode))
ldrb r2, [r3, #273] ; 0x111
and r1, r2, #5
cmp r1, #5
↓ bne 3c0
srchost->srchost_bulk_flow_count++;
ldr r1, [sp, #88] ; 0x58
mov r2, #12
ldr r0, [sp, #60] ; 0x3c
mla r2, r2, r1, r0
add r2, r2, #75776 ; 0x12800
ldrh r1, [r2, #8]
add r1, r1, #1
strh r1, [r2, #8]
ldrb r2, [r3, #273] ; 0x111
if (cake_ddst(q->flow_mode))
0.02 3c0: and r2, r2, #6
cmp r2, #6
↓ bne 3ec
dsthost->dsthost_bulk_flow_count++;
ldr r1, [sp, #92] ; 0x5c
mov r2, #12
0.01 ldr r0, [sp, #60] ; 0x3c
mla r2, r2, r1, r0
add r2, r2, #75776 ; 0x12800
ldrh r1, [r2, #10]
add r1, r1, #1
strh r1, [r2, #10]
flow->set = CAKE_SET_BULK;
3ec: mov r2, #3
0.01 strb r2, [r8, #52] ; 0x34
0.01 ↓ b 408
0.03 3f8: ldr r3, [sp, #52] ; 0x34
} else {
/* we've moved it to the bulk rotation for
* correct deficit accounting but we still want
* to count it as a sparse flow, not a bulk one.
*/
flow->set = CAKE_SET_SPARSE_WAIT;
mov r2, #2
strb r2, [r8, #52] ; 0x34
add r3, r3, #16384 ; 0x4000
}
}
if (cake_dsrc(q->flow_mode))
1.22 408: ldrb r3, [r3, #273] ; 0x111
0.11 and r2, r3, #5
1.42 and r3, r3, #6
0.09 cmp r2, #5
1.00 ↓ bne 10cc
host_load = max(host_load, srchost->srchost_bulk_flow_count);
ldr r1, [sp, #88] ; 0x58
mov r2, #12
ldr r0, [sp, #60] ; 0x3c
mla r2, r2, r1, r0
add r2, r2, #75776 ; 0x12800
ldrh r2, [r2, #8]
cmp r2, #0
↓ beq 10cc
if (cake_ddst(q->flow_mode))
cmp r3, #6
movne r4, r2
↓ bne 474
↓ b 450
host_load = 1;
0.20 44c: mov r2, #1
host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
2.19 450: ldr r1, [sp, #92] ; 0x5c
0.10 mov r3, #12
4.08 ldr r0, [sp, #60] ; 0x3c
0.08 mla r3, r3, r1, r0
3.43 add r3, r3, #75776 ; 0x12800
1.75 ldrh r4, [r3, #10]
cmp r2, r4
1.00 movcs r4, r2
uxth r2, r4
WARN_ON(host_load > CAKE_QUEUES);
0.99 474: cmp r2, #1024 ; 0x400
↓ bls 48c
movw r0, #0
movw r1, #2141 ; 0x85d
movt r0, #0
→ bl qdisc_peek_dequeued
/* The shifted prandom_u32() is a way to apply dithering to
* avoid accumulating roundoff errors
*/
flow->deficit += (b->flow_quantum * quantum_div[host_load] +
1.91 48c: ldr r2, [sp, #44] ; 0x2c
lsl r4, r4, #1
1.11 movw r3, #2052 ; 0x804
ldrh r3, [r9, r3]
2.62 ldrh r4, [r2, r4]
mul r4, r4, r3
(prandom_u32() >> 16)) >> 16;
3.53 → bl qdisc_peek_dequeued
__list_del_entry():
static inline void __list_del_entry(struct list_head *entry)
{
if (!__list_del_entry_valid(entry))
return;
__list_del(entry->prev, entry->next);
2.75 ldr ip, [r8, #4]
0.04 ldr lr, [r8]
cake_dequeue():
first_flow = false;
1.31 mov r1, #0
flow->deficit += (b->flow_quantum * quantum_div[host_load] +
0.10 ldr r2, [r8, #8]
0.90 add r0, r4, r0, lsr #16
list_move_tail(&flow->flowchain, &b->old_flows);
ldr r3, [sp, #76] ; 0x4c
flow->deficit += (b->flow_quantum * quantum_div[host_load] +
1.98 add r2, r2, r0, lsr #16
0.01 str r2, [r8, #8]
__list_del():
next->prev = prev;
1.88 str ip, [lr, #4]
cake_dequeue():
list_move_tail(&flow->flowchain, &b->old_flows);
0.02 add r3, r3, #64 ; 0x40
__write_once_size():
static __always_inline void __write_once_size(volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
0.92 str lr, [ip]
list_add_tail():
__list_add(new, head->prev, head);
0.02 ldr r2, [r9, #2116] ; 0x844
__list_add():
next->prev = new;
0.77 str r8, [r9, #2116] ; 0x844
new->next = next;
0.07 str r3, [r8]
new->prev = prev;
0.86 str r2, [r8, #4]
__write_once_size():
0.02 str r8, [r2]
1.66 ↑ b 2bc
cake_dequeue():
goto retry;
}
/* Retrieve a packet via the AQM */
while (1) {
skb = cake_dequeue_one(sch);
1.11 4ec: ldr r0, [sp, #52] ; 0x34
0.06 → bl cake_dequeue_one
if (!skb) {
0.33 subs r3, r0, #0
0.18 str r3, [sp, #48] ; 0x30
0.08 ↓ bne 840
cobalt_queue_empty():
if (vars->p_drop &&
0.01 ldr ip, [sl, #40] ; 0x28
0.04 mov r8, sl
mov r9, fp
0.06 cmp ip, #0
↓ beq 1118
ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
ldrd r0, [sl, #32]
add r3, fp, #2064 ; 0x810
ldrd r6, [sp, #24]
if (vars->p_drop &&
ldrd r2, [r3]
ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
subs r4, r6, r0
sbc r5, r7, r1
if (vars->p_drop &&
cmp r5, r3
ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
mov r0, r4
if (vars->p_drop &&
cmpeq r4, r2
↓ bls 112c
if (vars->p_drop < p->p_dec)
ldr r3, [fp, #2084] ; 0x824
if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
ldr r1, [sl, #16]
if (vars->p_drop < p->p_dec)
cmp ip, r3
↓ bcs 570
vars->p_drop = 0;
ldr r3, [sp, #48] ; 0x30
if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
cmp r1, #0
vars->blue_timer = now;
mov r4, r6
mov r5, r7
strd r4, [sl, #32]
vars->p_drop = 0;
str r3, [sl, #40] ; 0x28
vars->dropping = false;
strb r3, [sl, #44] ; 0x2c
if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
↓ bne 10e8
↓ b 638
vars->p_drop -= p->p_dec;
570: sub r3, ip, r3
vars->dropping = false;
ldr r0, [sp, #48] ; 0x30
clz ip, r3
vars->p_drop -= p->p_dec;
str r3, [sl, #40] ; 0x28
vars->blue_timer = now;
ldrd r2, [sp, #24]
if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
cmp r1, #0
lsr ip, ip, #5
vars->dropping = false;
strb r0, [sl, #44] ; 0x2c
vars->blue_timer = now;
strd r2, [sl, #32]
if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
↓ beq 630
ldrd r6, [sl, #24]
subs r2, r2, r6
sbc r3, r3, r7
cmp r2, #0
sbcs r3, r3, #0
↓ blt 630
vars->count--;
0.01 5b0: sub r2, r1, #1
0.02 str r2, [r8, #16]
cobalt_invsqrt():
if (vars->count < REC_INV_SQRT_CACHE)
cmp r2, #15
↓ bhi 5d4
vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
ldr r3, [sp, #44] ; 0x2c
add r2, r3, r2, lsl #2
ldr r1, [r2, #2052] ; 0x804
0.00 str r1, [r8, #20]
↓ b 610
cobalt_newton_step():
invsqrt = vars->rec_inv_sqrt;
5d4: ldr r0, [r8, #20]
invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
umull r4, r5, r0, r0
val = (3LL << 32) - ((u64)vars->count * invsqrt2);
mov r4, #0
umull r2, r3, r5, r2
mov r5, #3
subs r4, r4, r2
sbc r5, r5, r3
val >>= 2; /* avoid overflow in following multiply */
lsr r2, r4, #2
orr r2, r2, r5, lsl #30
lsr r1, r5, #2
val = (val * invsqrt) >> (32 - 2 + 1);
umull r2, r3, r2, r0
mla r3, r0, r1, r3
lsr r1, r2, #31
orr r1, r1, r3, lsl #1
vars->rec_inv_sqrt = val;
str r1, [r8, #20]
reciprocal_scale():
*
* Return: a result based on @val in interval [0, @ep_ro).
*/
static inline u32 reciprocal_scale(u32 val, u32 ep_ro)
{
return (u32)(((u64) val * ep_ro) >> 32);
610: ldr r2, [r9, #2056] ; 0x808
0.02 mov r0, #0
mov fp, r0
umull r2, r3, r2, r1
mla sl, r1, r0, r3
cobalt_control():
return ktime_add_ns(t, reciprocal_scale(interval,
adds r4, sl, r6
adc r5, r0, r7
cobalt_queue_empty():
vars->drop_next = cobalt_control(vars->drop_next,
strd r4, [r8, #24]
cake_dequeue():
/* this queue was actually empty */
if (cobalt_queue_empty(&flow->cvars, &b->cparams, now))
630: cmp ip, #0
↓ beq 648
b->unresponsive_flow_count--;
638: movw r2, #2098 ; 0x832
ldrh r3, [r9, r2]
sub r3, r3, #1
strh r3, [r9, r2]
0.04 648: ldr ip, [r8, #40] ; 0x28
if (flow->cvars.p_drop || flow->cvars.count ||
0.04 64c: cmp ip, #0
ldr r0, [r8, #4]
ldr r1, [r8]
↓ bne 67c
0.26 65c: ldr r3, [r8, #16]
cmp r3, #0
0.04 ↓ bne 67c
ktime_compare():
if (cmp1 < cmp2)
0.01 ldrd r2, [r8, #24]
0.10 ldrd r4, [sp, #24]
cmp r4, r2
0.25 sbcs r3, r5, r3
↓ bge 76c
__list_del():
next->prev = prev;
0.27 67c: str r0, [r1, #4]
__list_add():
new->next = next;
ldr r2, [sp, #84] ; 0x54
__write_once_size():
0.03 str r1, [r0]
list_add_tail():
__list_add(new, head->prev, head);
ldr r3, [r9, #2124] ; 0x84c
__list_add():
next->prev = new;
0.21 str r8, [r9, #2124] ; 0x84c
new->prev = prev;
0.01 strd r2, [r8]
__write_once_size():
0.13 str r8, [r3]
cake_dequeue():
/* keep in the flowchain until the state has
* decayed to rest
*/
list_move_tail(&flow->flowchain,
&b->decaying_flows);
if (flow->set == CAKE_SET_BULK) {
ldrb r3, [r8, #52] ; 0x34
0.05 cmp r3, #3
↓ bne 730
b->bulk_flow_count--;
if (cake_dsrc(q->flow_mode))
0.04 ldr r3, [sp, #52] ; 0x34
b->bulk_flow_count--;
movw r1, #2092 ; 0x82c
0.01 ldrh r2, [r9, r1]
if (cake_dsrc(q->flow_mode))
add r3, r3, #16384 ; 0x4000
b->bulk_flow_count--;
0.02 sub r2, r2, #1
strh r2, [r9, r1]
if (cake_dsrc(q->flow_mode))
ldrb r2, [r3, #273] ; 0x111
and r1, r2, #5
0.01 cmp r1, #5
↓ bne 6f0
srchost->srchost_bulk_flow_count--;
ldr r1, [sp, #88] ; 0x58
mov r2, #12
ldr r0, [sp, #60] ; 0x3c
mla r2, r2, r1, r0
add r2, r2, #75776 ; 0x12800
ldrh r1, [r2, #8]
sub r1, r1, #1
strh r1, [r2, #8]
ldrb r2, [r3, #273] ; 0x111
if (cake_ddst(q->flow_mode))
6f0: and r2, r2, #6
cmp r2, #6
↓ bne 71c
dsthost->dsthost_bulk_flow_count--;
0.01 ldr r2, [sp, #92] ; 0x5c
mov r3, #12
ldr r1, [sp, #60] ; 0x3c
mla r3, r3, r2, r1
add r3, r3, #75776 ; 0x12800
ldrh r2, [r3, #10]
sub r2, r2, #1
strh r2, [r3, #10]
b->decaying_flow_count++;
71c: add r2, r9, #2096 ; 0x830
0.01 ldrh r3, [r2]
add r3, r3, #1
0.01 strh r3, [r2]
↓ b 75c
} else if (flow->set == CAKE_SET_SPARSE ||
0.11 730: sub r3, r3, #1
0.22 cmp r3, #1
↓ bhi 75c
flow->set == CAKE_SET_SPARSE_WAIT) {
b->sparse_flow_count--;
0.02 movw r1, #2094 ; 0x82e
b->decaying_flow_count++;
0.01 add r2, r9, #2096 ; 0x830
b->sparse_flow_count--;
0.01 ldrh r3, [r9, r1]
0.04 sub r3, r3, #1
0.00 strh r3, [r9, r1]
b->decaying_flow_count++;
0.01 ldrh r3, [r2]
0.01 add r3, r3, #1
0.01 strh r3, [r2]
}
flow->set = CAKE_SET_DECAYING;
0.37 75c: mov r3, #4
first_flow = false;
0.01 mov r1, #0
flow->set = CAKE_SET_DECAYING;
0.01 strb r3, [r8, #52] ; 0x34
↑ b 4c
__list_del():
next->prev = prev;
0.03 76c: str r0, [r1, #4]
__write_once_size():
str r1, [r0]
cake_dequeue():
} else {
/* remove empty queue from the flowchain */
list_del_init(&flow->flowchain);
if (flow->set == CAKE_SET_SPARSE ||
0.03 ldrb r3, [r8, #52] ; 0x34
__write_once_size():
str r8, [r8]
cake_dequeue():
sub r2, r3, #1
INIT_LIST_HEAD():
list->prev = list;
str r8, [r8, #4]
cake_dequeue():
cmp r2, #1
↓ bhi 7a0
flow->set == CAKE_SET_SPARSE_WAIT)
b->sparse_flow_count--;
movw r2, #2094 ; 0x82e
ldrh r3, [r9, r2]
sub r3, r3, #1
strh r3, [r9, r2]
↓ b 834
else if (flow->set == CAKE_SET_BULK) {
7a0: cmp r3, #3
0.01 ↓ bne 824
b->bulk_flow_count--;
if (cake_dsrc(q->flow_mode))
ldr r3, [sp, #52] ; 0x34
b->bulk_flow_count--;
movw r1, #2092 ; 0x82c
ldrh r2, [r9, r1]
if (cake_dsrc(q->flow_mode))
add r3, r3, #16384 ; 0x4000
b->bulk_flow_count--;
sub r2, r2, #1
strh r2, [r9, r1]
if (cake_dsrc(q->flow_mode))
ldrb r2, [r3, #273] ; 0x111
and r1, r2, #5
cmp r1, #5
↓ bne 7f4
srchost->srchost_bulk_flow_count--;
ldr r1, [sp, #88] ; 0x58
mov r2, #12
ldr r0, [sp, #60] ; 0x3c
mla r2, r2, r1, r0
add r2, r2, #75776 ; 0x12800
ldrh r1, [r2, #8]
sub r1, r1, #1
strh r1, [r2, #8]
ldrb r2, [r3, #273] ; 0x111
if (cake_ddst(q->flow_mode))
7f4: and r2, r2, #6
cmp r2, #6
↓ bne 834
dsthost->dsthost_bulk_flow_count--;
ldr r2, [sp, #92] ; 0x5c
mov r3, #12
ldr r1, [sp, #60] ; 0x3c
mla r3, r3, r2, r1
add r3, r3, #75776 ; 0x12800
ldrh r2, [r3, #10]
sub r2, r2, #1
strh r2, [r3, #10]
↓ b 834
} else
b->decaying_flow_count--;
824: add r2, r9, #2096 ; 0x830
ldrh r3, [r2]
0.01 sub r3, r3, #1
strh r3, [r2]
flow->set = CAKE_SET_NONE;
0.01 834: mov r1, #0
strb r1, [r8, #52] ; 0x34
↑ b 4c
cobalt_should_drop():
sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
0.21 840: ldr r3, [sp, #48] ; 0x30
over_target = sojourn > p->target &&
0.01 add r0, fp, #2064 ; 0x810
schedule = ktime_sub(now, vars->drop_next);
0.03 ldrd r4, [sl, #24]
0.01 ldrd r6, [sp, #24]
sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
0.21 ldrd r2, [r3, #32]
schedule = ktime_sub(now, vars->drop_next);
0.03 subs r8, r6, r4
0.18 ldr r1, [sl, #16]
sbc r9, r7, r5
sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
0.05 subs r6, r6, r2
0.01 sbc r7, r7, r3
schedule = ktime_sub(now, vars->drop_next);
0.04 strd r4, [sp, #32]
cmp r1, #0
sojourn > p->mtu_time * bulk_flows * 2 &&
0.05 ldrd r4, [r0]
0.01 mvn lr, r9
sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
0.07 strd r6, [sp, #16]
lsr lr, lr, #31
0.12 moveq lr, #0
sojourn > p->mtu_time * bulk_flows * 2 &&
0.01 cmp r7, r5
0.06 ldrb ip, [sl, #44] ; 0x2c
cmpeq r6, r4
0.46 ↓ bls 8f8
cake_dequeue():
}
/* Last packet in queue may be marked, shouldn't be dropped */
if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb,
(b->bulk_flow_count *
!!(q->rate_flags &
0.01 ldr r4, [sp, #56] ; 0x38
(b->bulk_flow_count *
0.01 movw r5, #2092 ; 0x82c
ldrh r5, [fp, r5]
cobalt_should_drop():
sojourn > p->mtu_time * bulk_flows * 2 &&
0.01 ldrd r6, [r0, #8]
cake_dequeue():
!!(q->rate_flags &
ldrh r4, [r4, #56] ; 0x38
cobalt_should_drop():
sojourn > p->mtu_time * bulk_flows * 2 &&
0.04 adds r2, r6, r6
cake_dequeue():
!!(q->rate_flags &
ubfx r4, r4, #2, #1
cobalt_should_drop():
sojourn > p->mtu_time * bulk_flows * 2 &&
0.04 adc r3, r7, r7
0.02 mov r6, r2
cake_dequeue():
(b->bulk_flow_count *
mul r4, r5, r4
cobalt_should_drop():
sojourn > p->mtu_time * bulk_flows * 2 &&
mov r7, r3
0.01 strd r6, [sp, #64] ; 0x40
umull r6, r7, r4, r2
0.04 ldrd r2, [sp, #64] ; 0x40
0.03 mla r7, r4, r3, r7
over_target = sojourn > p->target &&
ldrd r4, [sp, #16]
0.05 cmp r5, r7
cmpeq r4, r6
0.01 ↓ bls 8f8
sojourn > p->mtu_time * 4;
0.01 adds r4, r2, r2
0.01 adc r5, r3, r3
sojourn > p->mtu_time * bulk_flows * 2 &&
ldrd r2, [sp, #16]
0.05 cmp r3, r5
cmpeq r2, r4
0.02 ↓ bhi 90c
} else if (vars->dropping) {
0.21 8f8: cmp ip, #0
vars->ecn_marked = false;
0.04 mov r3, #0
strb r3, [sl, #45] ; 0x2d
} else if (vars->dropping) {
0.03 ↓ bne 964
↓ b 974
if (!vars->dropping) {
0.01 90c: cmp ip, #0
vars->ecn_marked = false;
mov r3, #0
0.03 strb r3, [sl, #45] ; 0x2d
if (!vars->dropping) {
0.01 ↓ bne 954
vars->dropping = true;
0.03 mov r3, #1
strb r3, [sl, #44] ; 0x2c
reciprocal_scale():
ldrd r2, [r0, #-8]
mov r5, #0
ldr r0, [sl, #20]
mov r3, r5
0.07 umull r4, r5, r2, r0
mla r6, r0, r3, r5
cobalt_control():
return ktime_add_ns(t, reciprocal_scale(interval,
ldrd r2, [sp, #24]
0.00 adds r2, r2, r6
adc r3, r3, ip
0.01 mov r6, r2
mov r7, r3
cobalt_should_drop():
vars->drop_next = cobalt_control(now,
strd r6, [sl, #24]
if (!vars->count)
0.02 954: cmp r1, #0
vars->count = 1;
moveq r3, #1
0.03 streq r3, [sl, #16]
0.01 ↓ b 974
if (next_due && vars->dropping) {
0.01 964: cmp lr, #0
vars->dropping = false;
strb r3, [sl, #44] ; 0x2c
if (next_due && vars->dropping) {
↓ bne b98
↓ b cbc
0.07 974: cmp lr, #0
↓ beq cbc
0.02 ldrb r3, [sl, #44] ; 0x2c
cmp r3, #0
ldrdeq r2, [sl, #24]
ldreq r1, [sl, #16]
strdeq r2, [sp, #32]
↓ beq b98
INET_ECN_set_ce():
ipv6_change_dsfield(inner, INET_ECN_MASK, dscp);
}
static inline int INET_ECN_set_ce(struct sk_buff *skb)
{
switch (skb->protocol) {
0.02 ldr r3, [sp, #48] ; 0x30
ldrh r3, [r3, #148] ; 0x94
cmp r3, #8
↓ beq 9b4
movw r2, #56710 ; 0xdd86
cmp r3, r2
↓ beq a3c
↓ b ad0
skb_network_header():
skb->transport_header += offset;
}
static inline unsigned char *skb_network_header(const struct sk_buff *skb)
{
return skb->head + skb->network_header;
0.01 9b4: ldr r0, [sp, #48] ; 0x30
ldrh r2, [r0, #152] ; 0x98
ldr r1, [r0, #164] ; 0xa4
INET_ECN_set_ce():
case cpu_to_be16(ETH_P_IP):
if (skb_network_header(skb) + sizeof(struct iphdr) <=
add r3, r2, #20
ldr r0, [r0, #156] ; 0x9c
add r3, r1, r3
cmp r0, r3
↓ bcc ad0
skb_network_header():
add r2, r1, r2
IP_ECN_set_ce():
u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
ldrb r1, [r2, #1]
add r3, r1, #1
if (!(ecn & 2))
tst r3, #2
u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
0.03 and r3, r3, #3
if (!(ecn & 2))
↓ bne a04
return !ecn;
0.01 cmp r3, #0
moveq r2, #1
movne r2, #0
movne r4, #1
moveq r4, #0
↓ b adc
u32 check = (__force u32)iph->check;
0.01 a04: ldrh r0, [r2, #10]
iph->tos |= INET_ECN_CE;
orr r1, r1, #3
check += (__force u16)htons(0xFFFB) + (__force u16)htons(ecn);
rev16 r3, r3
iph->tos |= INET_ECN_CE;
strb r1, [r2, #1]
add r1, r0, #64256 ; 0xfb00
mov r4, #0
add r1, r1, #255 ; 0xff
iph->check = (__force __sum16)(check + (check>=0xFFFF));
movw r0, #65534 ; 0xfffe
check += (__force u16)htons(0xFFFB) + (__force u16)htons(ecn);
0.04 uxtah r3, r1, r3
iph->check = (__force __sum16)(check + (check>=0xFFFF));
cmp r3, r0
addhi r3, r3, #1
strh r3, [r2, #10]
iph->tos |= INET_ECN_CE;
mov r2, #1
↓ b adc
skb_network_header():
a3c: ldr lr, [sp, #48] ; 0x30
ldrh r1, [lr, #152] ; 0x98
ldr r0, [lr, #164] ; 0xa4
INET_ECN_set_ce():
skb_tail_pointer(skb))
return IP_ECN_set_ce(ip_hdr(skb));
break;
case cpu_to_be16(ETH_P_IPV6):
if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
add r3, r1, #40 ; 0x28
ldr r2, [lr, #156] ; 0x9c
add r3, r0, r3
cmp r2, r3
↓ bcc ad0
ipv6_get_dsfield():
}
static inline __u8 ipv6_get_dsfield(const struct ipv6hdr *ipv6h)
{
return ntohs(*(const __be16 *)ipv6h) >> 4;
ldrh r2, [r0, r1]
rev16 r2, r2
IP6_ECN_set_ce():
if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph)))
ubfx r2, r2, #4, #2
cmp r2, #0
moveq r4, #1
↓ beq adc
from = *(__be32 *)iph;
ldr r2, [r0, r1]
to = from | htonl(INET_ECN_CE << 20);
orr ip, r2, #12288 ; 0x3000
*(__be32 *)iph = to;
str ip, [r0, r1]
if (skb->ip_summed == CHECKSUM_COMPLETE)
ldrb r3, [lr, #104] ; 0x68
and r3, r3, #96 ; 0x60
cmp r3, #64 ; 0x40
movne r4, #0
movne r2, #1
↓ bne adc
ldr r3, [lr, #112] ; 0x70
csum_sub():
}
#endif
static inline __wsum csum_sub(__wsum csum, __wsum addend)
{
return csum_add(csum, ~addend);
mvn r2, r2
mov r1, #0
IP6_ECN_set_ce():
skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
mov r4, #0
adds r3, r3, r2
mov r2, #1
csum_add():
res += (__force u32)addend;
add r3, r3, ip
movcs r1, #1
add r3, r3, r1
IP6_ECN_set_ce():
ldr r1, [sp, #48] ; 0x30
csum_add():
return (__force __wsum)(res + (res < (__force u32)addend));
cmp ip, r3
addhi r3, r3, #1
IP6_ECN_set_ce():
str r3, [r1, #112] ; 0x70
↓ b adc
INET_ECN_set_ce():
if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
ad0: mov r4, #1
mov r2, #0
↓ b adc
cobalt_should_drop():
vars->count++;
0.03 adc: ldr r3, [sl, #16]
drop = !(vars->ecn_marked = INET_ECN_set_ce(skb));
strb r2, [sl, #45] ; 0x2d
vars->count++;
add r3, r3, #1
str r3, [sl, #16]
if (!vars->count)
cmp r3, #0
vars->count--;
mvneq r2, #0
moveq ip, r3
streq r2, [sl, #16]
↓ beq b24
cobalt_invsqrt():
if (vars->count < REC_INV_SQRT_CACHE)
cmp r3, #15
↓ bhi b1c
vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
ldr r2, [sp, #44] ; 0x2c
add r3, r2, r3, lsl #2
ldr ip, [r3, #2052] ; 0x804
0.01 str ip, [sl, #20]
↓ b b68
b1c: mov r2, r3
mov ip, #0
cobalt_newton_step():
invsqrt = vars->rec_inv_sqrt;
b24: ldr lr, [sl, #20]
invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
umull r0, r1, lr, lr
val = (3LL << 32) - ((u64)vars->count * invsqrt2);
mov r0, #0
umull r2, r3, r1, r2
subs r0, r0, r2
val >>= 2; /* avoid overflow in following multiply */
lsr r2, r0, #2
val = (3LL << 32) - ((u64)vars->count * invsqrt2);
mla r3, ip, r1, r3
mov r1, #3
sbc r1, r1, r3
mov r7, r1
val >>= 2; /* avoid overflow in following multiply */
lsr r1, r1, #2
orr r2, r2, r7, lsl #30
val = (val * invsqrt) >> (32 - 2 + 1);
umull r2, r3, r2, lr
mla r3, lr, r1, r3
lsr ip, r2, #31
orr ip, ip, r3, lsl #1
vars->rec_inv_sqrt = val;
str ip, [sl, #20]
reciprocal_scale():
b68: ldr r0, [fp, #2056] ; 0x808
mov lr, #0
cobalt_control():
return ktime_add_ns(t, reciprocal_scale(interval,
ldrd r6, [sl, #24]
reciprocal_scale():
umull r0, r1, r0, ip
mla r8, ip, lr, r1
cobalt_control():
adds r2, r8, r6
cobalt_should_drop():
schedule = ktime_sub(now, vars->drop_next);
ldrd r8, [sp, #24]
cobalt_control():
return ktime_add_ns(t, reciprocal_scale(interval,
adc r3, lr, r7
cobalt_should_drop():
vars->drop_next = cobalt_control(vars->drop_next,
strd r2, [sl, #24]
schedule = ktime_sub(now, vars->drop_next);
subs r8, r8, r2
sbc r9, r9, r3
↓ b cc0
vars->drop_next = cobalt_control(vars->drop_next,
0.03 b98: add ip, fp, #2064 ; 0x810
cobalt_newton_step():
invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
mov lr, #0
cobalt_should_drop():
vars->count--;
ba0: sub r1, r1, #1
cobalt_newton_step():
val = (val * invsqrt) >> (32 - 2 + 1);
mov r3, #0
mov r2, #0
cobalt_invsqrt():
if (vars->count < REC_INV_SQRT_CACHE)
cmp r1, #15
cobalt_newton_step():
val = (val * invsqrt) >> (32 - 2 + 1);
strd r2, [sp, #16]
cobalt_invsqrt():
vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
ldr r3, [sp, #44] ; 0x2c
cobalt_should_drop():
vars->count--;
str r1, [sl, #16]
cobalt_invsqrt():
vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
add r0, r3, r1, lsl #2
if (vars->count < REC_INV_SQRT_CACHE)
↓ bhi c1c
vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
ldr r0, [r0, #2052] ; 0x804
reciprocal_scale():
mov r9, #0
cobalt_invsqrt():
str r0, [sl, #20]
reciprocal_scale():
ldrd r2, [ip, #-8]
umull r6, r7, r2, r0
cobalt_control():
return ktime_add_ns(t, reciprocal_scale(interval,
ldrd r2, [sp, #32]
reciprocal_scale():
mla r4, r0, r9, r7
cobalt_should_drop():
schedule = ktime_sub(now, vars->drop_next);
ldrd r8, [sp, #24]
cobalt_control():
return ktime_add_ns(t, reciprocal_scale(interval,
adds r2, r2, r4
adc r3, r3, lr
cobalt_should_drop():
schedule = ktime_sub(now, vars->drop_next);
subs r8, r8, r2
sbc r9, r9, r3
next_due = vars->count && ktime_to_ns(schedule) >= 0;
cmp r1, #0
cobalt_control():
return ktime_add_ns(t, reciprocal_scale(interval,
mov r4, r2
0.01 mov r5, r3
cobalt_should_drop():
vars->drop_next = cobalt_control(vars->drop_next,
strd r4, [sl, #24]
next_due = vars->count && ktime_to_ns(schedule) >= 0;
↓ bne ca0
if (vars->p_drop)
0.01 ldr r3, [sl, #40] ; 0x28
bool next_due, over_target, drop = false;
0.01 mov r4, r1
if (vars->p_drop)
cmp r3, #0
0.01 ↓ bne ccc
↓ b ce8
cobalt_newton_step():
invsqrt = vars->rec_inv_sqrt;
c1c: ldr r0, [sl, #20]
val = (3LL << 32) - ((u64)vars->count * invsqrt2);
mov r2, #0
mov r3, #3
invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
umull r4, r5, r0, r0
val = (3LL << 32) - ((u64)vars->count * invsqrt2);
umull r6, r7, r5, r1
subs r2, r2, r6
sbc r3, r3, r7
reciprocal_scale():
mov r7, #0
cobalt_newton_step():
val >>= 2; /* avoid overflow in following multiply */
lsr r4, r2, #2
orr r4, r4, r3, lsl #30
lsr r6, r3, #2
val = (val * invsqrt) >> (32 - 2 + 1);
umull r4, r5, r4, r0
mla r5, r0, r6, r5
lsr r3, r4, #31
orr r3, r3, r5, lsl #1
str r3, [sp, #16]
lsr r3, r5, #31
str r3, [sp, #20]
vars->rec_inv_sqrt = val;
ldrd r2, [sp, #16]
reciprocal_scale():
mov r3, #0
cobalt_newton_step():
str r2, [sl, #20]
reciprocal_scale():
ldrd r4, [ip, #-8]
mul r0, r4, r3
mla r0, r2, r7, r0
umull r2, r3, r4, r2
cobalt_control():
return ktime_add_ns(t, reciprocal_scale(interval,
ldrd r4, [sp, #32]
reciprocal_scale():
add r8, r0, r3
cobalt_control():
adds r4, r4, r8
cobalt_should_drop():
schedule = ktime_sub(now, vars->drop_next);
ldrd r8, [sp, #24]
cobalt_control():
return ktime_add_ns(t, reciprocal_scale(interval,
adc r5, r5, lr
cobalt_should_drop():
vars->drop_next = cobalt_control(vars->drop_next,
strd r4, [sl, #24]
schedule = ktime_sub(now, vars->drop_next);
subs r8, r8, r4
sbc r9, r9, r5
while (next_due) {
0.01 ca0: cmp r8, #0
sbcs r3, r9, #0
ldrdge r2, [sl, #24]
strdge r2, [sp, #32]
↑ bge ba0
bool next_due, over_target, drop = false;
mov r4, #0
↓ b cc0
0.16 cbc: mov r4, lr
if (vars->p_drop)
0.02 cc0: ldr r3, [sl, #40] ; 0x28
0.02 cmp r3, #0
↓ beq cdc
drop |= (prandom_u32() < vars->p_drop);
ccc: → bl qdisc_peek_dequeued
ldr r3, [sl, #40] ; 0x28
cmp r0, r3
orrcc r4, r4, #1
if (!vars->count)
0.56 cdc: ldr r3, [sl, #16]
cmp r3, #0
0.06 ↓ bne d0c
vars->drop_next = ktime_add_ns(now, p->interval);
0.13 ce8: add r3, fp, #2064 ; 0x810
0.05 ldrd r0, [sp, #24]
ldrd r2, [r3, #-8]
0.10 adds r0, r0, r2
adc r1, r1, r3
0.06 mov r2, r0
mov r3, r1
0.03 strd r2, [sl, #24]
↓ b d3c
else if (ktime_to_ns(schedule) > 0 && !drop)
0.17 d0c: cmp r8, #1
sbcs r3, r9, #0
0.09 eor r3, r4, #1
andge r3, r3, #1
0.03 movlt r3, #0
cmp r3, #0
0.04 ↓ beq d3c
vars->drop_next = now;
ldrd r2, [sp, #24]
mov r9, sl
mov r8, fp
strd r2, [sl, #24]
↓ b dfc
cake_dequeue():
if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb,
0.65 d3c: cmp r4, #0
↓ beq df4
CAKE_FLAG_INGRESS))) ||
ldr r3, [sl, #-8]
cmp r3, #0
↓ beq df4
!flow->head)
break;
/* drop this packet, get another one */
if (q->rate_flags & CAKE_FLAG_INGRESS) {
ldr r3, [sp, #56] ; 0x38
0.01 ldrh r3, [r3, #56] ; 0x38
tst r3, #4
↓ beq d98
len = cake_advance_shaper(q, b, skb,
ldrd r2, [sp, #24]
ldr r1, [sp, #60] ; 0x3c
ldr r0, [sp, #72] ; 0x48
strd r2, [sp]
mov r3, #1
0.01 ldr r2, [sp, #48] ; 0x30
str r3, [sp, #8]
0.01 → bl cake_advance_shaper
now, true);
flow->deficit -= len;
ldr r3, [sl, #8]
sub r3, r3, r0
str r3, [sl, #8]
b->tin_deficit -= len;
ldr r3, [fp, #2160] ; 0x870
sub r0, r3, r0
str r0, [fp, #2160] ; 0x870
}
flow->dropped++;
d98: ldr r3, [sl, #12]
b->tin_dropped++;
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
mov r1, #1
ldr r4, [sp, #52] ; 0x34
flow->dropped++;
add r3, r3, r1
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
ldr r5, [sp, #48] ; 0x30
flow->dropped++;
str r3, [sl, #12]
b->tin_dropped++;
ldr r3, [fp, #2168] ; 0x878
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
mov r0, r4
b->tin_dropped++;
add r3, r3, r1
str r3, [fp, #2168] ; 0x878
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
ldr r2, [r5, #24]
→ bl qdisc_peek_dequeued
qstats_drop_inc():
sch->qstats.drops += count;
}
static inline void qstats_drop_inc(struct gnet_stats_queue *qstats)
{
qstats->drops++;
ldr r3, [r4, #108] ; 0x6c
cake_dequeue():
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)
qdisc_drop(skb, sch);
#else
qdisc_qstats_drop(sch);
kfree_skb(skb);
mov r0, r5
qstats_drop_inc():
add r3, r3, #1
str r3, [r4, #108] ; 0x6c
cake_dequeue():
→ bl qdisc_peek_dequeued
first_flow = false;
mov r1, #0
#endif
if (q->rate_flags & CAKE_FLAG_INGRESS)
ldr r3, [sp, #56] ; 0x38
ldrh r3, [r3, #56] ; 0x38
tst r3, #4
↑ beq 4ec
↑ b 2b0
0.30 df4: mov r9, sl
mov r8, fp
goto retry;
}
b->tin_ecn_mark += !!flow->cvars.ecn_marked;
0.02 dfc: ldrb r2, [r9, #45] ; 0x2d
bstats_update():
_bstats_update(bstats,
mov r7, #0
cake_dequeue():
0.08 ldr r3, [r8, #2172] ; 0x87c
_bstats_update():
bstats->bytes += bytes;
ldr lr, [sp, #52] ; 0x34
cake_dequeue():
add r3, r3, r2
skb_end_pointer():
return skb->end;
ldr r2, [sp, #48] ; 0x30
cake_dequeue():
0.33 str r3, [r8, #2172] ; 0x87c
_bstats_update():
ldr r4, [lr, #88] ; 0x58
skb_end_pointer():
0.03 ldr r3, [r2, #160] ; 0xa0
bstats_update():
_bstats_update(bstats,
ldr r6, [r2, #24]
cake_dequeue():
qdisc_bstats_update(sch, skb);
/* collect delay stats */
delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
0.05 ldr sl, [sp, #76] ; 0x4c
bstats_update():
ldrh r2, [r3, #4]
0.02 cmp r2, #0
ldrhne r2, [r3, #6]
0.32 moveq r2, #1
_bstats_update():
bstats->bytes += bytes;
ldr r3, [lr, #84] ; 0x54
0.05 str r3, [sp, #32]
bstats->packets += packets;
ldr r3, [lr, #92] ; 0x5c
bstats->bytes += bytes;
0.11 str r4, [sp, #36] ; 0x24
bstats->packets += packets;
add r3, r3, r2
bstats->bytes += bytes;
0.07 ldrd r4, [sp, #32]
bstats->packets += packets;
str r3, [lr, #92] ; 0x5c
cake_dequeue():
delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
0.20 ldr r3, [sp, #48] ; 0x30
_bstats_update():
bstats->bytes += bytes;
adds r4, r4, r6
0.05 adc r5, r5, r7
str r4, [lr, #84] ; 0x54
0.08 str r5, [lr, #88] ; 0x58
cake_dequeue():
ldrd r4, [r3, #32]
b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
0.03 ldrd r2, [sl, #152] ; 0x98
delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
ldrd r6, [sp, #24]
cake_ewma():
avg -= avg >> shift;
0.64 lsr r0, r2, #8
cake_dequeue():
delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
subs r6, r6, r4
cake_ewma():
avg -= avg >> shift;
0.01 orr r0, r0, r3, lsl #24
cake_dequeue():
delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
sbc r7, r7, r5
cake_ewma():
avg -= avg >> shift;
0.13 lsr r1, r3, #8
subs r2, r2, r0
0.02 sbc r3, r3, r1
avg += sample >> shift;
lsr lr, r6, #8
0.10 orr lr, lr, r7, lsl #24
str lr, [sp, #16]
0.16 lsr lr, r7, #8
str lr, [sp, #20]
avg -= avg >> shift;
0.06 mov r4, r2
mov r5, r3
avg += sample >> shift;
0.04 ldrd r2, [sp, #16]
adds r2, r2, r4
0.10 adc r3, r3, r5
cake_dequeue():
b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
strd r2, [sl, #152] ; 0x98
b->peak_delay = cake_ewma(b->peak_delay, delay,
0.08 ldrd r2, [sl, #160] ; 0xa0
cmp r3, r7
0.09 cmpeq r2, r6
↓ bcs ee8
0.02 mov r1, #2
lsr r0, r6, r1
orr r0, r0, r7, lsl #30
mov sl, r0
lsr r0, r7, r1
mov fp, r0
↓ b ef0
0.06 ee8: ldrd sl, [sp, #16]
0.01 mov r1, #8
delay > b->peak_delay ? 2 : 8);
b->base_delay = cake_ewma(b->base_delay, delay,
0.08 ef0: ldr lr, [sp, #76] ; 0x4c
cake_ewma():
avg -= avg >> shift;
0.09 rsb r0, r1, #32
0.01 lsr r4, r2, r1
0.01 sub ip, r1, #32
0.04 orr r4, r4, r3, lsl r0
avg += sample >> shift;
0.02 adds sl, sl, r2
avg -= avg >> shift;
0.03 orr r4, r4, r3, lsr ip
cake_dequeue():
b->base_delay = cake_ewma(b->base_delay, delay,
0.05 add r0, lr, #176 ; 0xb0
cake_ewma():
avg += sample >> shift;
0.05 adc fp, fp, r3
avg -= avg >> shift;
0.05 lsr r5, r3, r1
avg += sample >> shift;
0.03 subs r2, sl, r4
0.01 sbc r3, fp, r5
cake_dequeue():
b->peak_delay = cake_ewma(b->peak_delay, delay,
0.01 strd r2, [lr, #160] ; 0xa0
b->base_delay = cake_ewma(b->base_delay, delay,
0.01 ldrd r4, [r0, #-8]
0.08 cmp r5, r7
0.03 cmpeq r4, r6
0.02 movhi ip, #2
0.08 lsrhi r3, r6, ip
0.02 orrhi r3, r3, r7, lsl #30
0.03 movls ip, #8
0.02 strhi r3, [sp, #16]
cake_ewma():
avg -= avg >> shift;
0.04 rsb lr, ip, #32
lsrhi r3, r7, ip
0.02 lsr r2, r4, ip
0.04 strhi r3, [sp, #20]
0.08 sub r1, ip, #32
avg += sample >> shift;
0.02 ldrd sl, [sp, #16]
avg -= avg >> shift;
0.02 orr r2, r2, r5, lsl lr
orr r2, r2, r5, lsr r1
0.02 lsr r3, r5, ip
avg += sample >> shift;
0.03 adds sl, sl, r4
cake_dequeue():
delay < b->base_delay ? 2 : 8);
len = cake_advance_shaper(q, b, skb, now, false);
ldr r1, [sp, #60] ; 0x3c
cake_ewma():
avg += sample >> shift;
0.02 adc fp, fp, r5
0.01 subs r6, sl, r2
0.05 sbc r7, fp, r3
cake_dequeue():
len = cake_advance_shaper(q, b, skb, now, false);
0.03 ldrd r4, [sp, #24]
b->base_delay = cake_ewma(b->base_delay, delay,
0.03 strd r6, [r0, #-8]
len = cake_advance_shaper(q, b, skb, now, false);
0.02 mov ip, #0
0.07 ldr r6, [sp, #72] ; 0x48
0.02 ldr r2, [sp, #48] ; 0x30
0.02 strd r4, [sp]
0.09 mov r0, r6
0.03 str ip, [sp, #8]
0.03 → bl cake_advance_shaper
flow->deficit -= len;
0.22 ldr r3, [r9, #8]
b->tin_deficit -= len;
if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
0.01 movw r2, #16416 ; 0x4020
0.05 add r1, r6, r2
flow->deficit -= len;
0.04 sub r3, r3, r0
0.04 str r3, [r9, #8]
b->tin_deficit -= len;
0.22 ldr r3, [r8, #2160] ; 0x870
sub r3, r3, r0
0.02 str r3, [r8, #2160] ; 0x870
if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
ldrd r0, [r1, #-8]
ktime_compare():
if (cmp1 > cmp2)
0.01 cmp r4, r0
sbcs r3, r5, r1
cake_dequeue():
0.04 ldr r3, [sp, #52] ; 0x34
ldr r3, [r3, #76] ; 0x4c
ktime_compare():
0.14 ↓ bge 1010
cake_dequeue():
cmp r3, #0
0.05 ↓ beq 1070
u64 next = min(ktime_to_ns(q->time_next_packet),
0.17 ldr r3, [sp, #72] ; 0x48
0.02 add r2, r3, r2
ldrd r2, [r2]
0.05 cmp r0, r2
sbcs ip, r1, r3
0.15 movlt r3, r1
ktime_to_ns(q->failsafe_next_packet));
qdisc_watchdog_schedule_ns(&q->watchdog, next);
ldr r1, [sp, #72] ; 0x48
u64 next = min(ktime_to_ns(q->time_next_packet),
movlt r2, r0
qdisc_watchdog_schedule_ns(&q->watchdog, next);
0.01 add r0, r1, #16384 ; 0x4000
0.02 add r0, r0, #104 ; 0x68
→ bl qdisc_peek_dequeued
if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
↓ b 1090
} else if (!sch->q.qlen) {
1010: cmp r3, #0
↓ bne 1090
↓ b 1070
int i;
for (i = 0; i < q->tin_cnt; i++) {
if (q->tins[i].decaying_flow_count) {
0.05 101c: ldr r2, [lr, #264] ; 0x108
add r2, r2, r3
0.01 add r3, r3, #88064 ; 0x15800
add r1, r2, #88064 ; 0x15800
0.03 add r3, r3, #192 ; 0xc0
add r1, r1, #48 ; 0x30
0.05 ldrh r1, [r1]
cmp r1, #0
0.03 ↓ beq 1084
ktime_t next = \
ktime_add_ns(now,
q->tins[i].cparams.target);
qdisc_watchdog_schedule_ns(&q->watchdog,
ldr r3, [sp, #72] ; 0x48
ktime_add_ns(now,
0.05 add r2, r2, #88064 ; 0x15800
qdisc_watchdog_schedule_ns(&q->watchdog,
ldrd r4, [sp, #24]
0.05 add r0, r3, #16384 ; 0x4000
ktime_add_ns(now,
ldrd r2, [r2, #16]
qdisc_watchdog_schedule_ns(&q->watchdog,
0.02 add r0, r0, #104 ; 0x68
adds r4, r4, r2
0.01 adc r5, r5, r3
mov r2, r4
0.03 mov r3, r5
→ bl qdisc_peek_dequeued
ktime_to_ns(next));
break;
↓ b 1090
for (i = 0; i < q->tin_cnt; i++) {
0.11 1070: ldr r3, [sp, #56] ; 0x38
0.03 ldr lr, [sp, #52] ; 0x34
ldrh ip, [r3, #14]
0.02 mov r3, #0
mov r0, r3
0.07 1084: cmp ip, r0
add r0, r0, #1
0.04 ↑ bne 101c
}
}
}
if (q->overflow_timeout)
0.63 1090: ldr r2, [sp, #56] ; 0x38
0.07 ldrh r3, [r2, #12]
0.01 cmp r3, #0
q->overflow_timeout--;
0.09 subne r3, r3, #1
0.03 strhne r3, [r2, #12]
0.03 ↓ b 114c
cobalt_queue_empty():
if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
0.11 10a8: ldrd r6, [r8, #24]
0.01 ldrd r2, [sp, #24]
subs r2, r2, r6
0.02 sbc r3, r3, r7
cmp r2, #0
0.01 sbcs r3, r3, #0
movge ip, #0
↑ bge 5b0
↑ b 64c
cake_dequeue():
if (cake_ddst(q->flow_mode))
0.15 10cc: cmp r3, #6
2.80 movne r4, #1
0.17 ↑ bne 48c
0.98 ↑ b 44c
b->tin_deficit += b->tin_quantum_band;
10dc: ldrh r3, [r9, r7]
str r3, [r9, #2160] ; 0x870
↑ b 144
cobalt_queue_empty():
if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
10e8: ldrd r6, [sl, #24]
ldrd r2, [sp, #24]
subs r2, r2, r6
sbc r3, r3, r7
cmp r2, #0
sbcs r3, r3, #0
movge ip, #1
↑ bge 5b0
↑ b 638
0.25 110c: ldr r0, [r8, #4]
0.02 ldr r1, [r8]
0.02 ↑ b 65c
0.34 1118: ldr r1, [sl, #16]
vars->dropping = false;
0.04 strb ip, [sl, #44] ; 0x2c
if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
0.01 cmp r1, #0
0.05 ↑ bne 10a8
0.05 ↑ b 110c
112c: ldr r1, [sl, #16]
vars->dropping = false;
ldr r3, [sp, #48] ; 0x30
if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
cmp r1, #0
vars->dropping = false;
strb r3, [sl, #44] ; 0x2c
ldreq r0, [sl, #4]
ldreq r1, [sl]
if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
↑ beq 67c
↑ b 10a8
cake_dequeue():
return skb;
}
0.46 114c: ldr r0, [sp, #48] ; 0x30
0.02 add sp, sp, #100 ; 0x64
0.10 pop {r4, r5, r6, r7, r8, r9, sl, fp, pc}
[-- Attachment #3: perf.hist --]
[-- Type: application/octet-stream, Size: 1683 bytes --]
74.68% swapper 0x2678 K [k] cake_dequeue
10.61% swapper 0x5ad0 K [k] cake_enqueue
6.35% swapper 0x382c K [k] cake_hash
3.17% swapper 0x478 K [k] cake_dequeue_one
1.98% swapper 0x40c K [k] cake_advance_shaper
0.85% swapper 0x3cfc K [k] cake_overhead
0.69% swapper 0x138 K [k] cake_calc_overhead
0.39% swapper 0x49e8 K [k] cake_ack_filter
0.25% sh 0x2630 K [k] cake_dequeue
0.24% sh 0x54b8 K [k] cake_enqueue
0.15% dhcpv6.scri 0x26b4 K [k] cake_dequeue
0.12% fw3 0x5018 K [k] cake_enqueue
0.06% ksoftirqd/1 0x26b8 K [k] cake_dequeue
0.06% ubusd 0x2ba4 K [k] cake_dequeue
0.06% ubusd 0x4f2c K [k] cake_enqueue
0.06% sh 0x478 K [k] cake_dequeue_one
0.06% sh 0x198 K [k] cake_calc_overhead
0.05% sh 0x3bb0 K [k] cake_hash
0.05% perf 0x23b4 K [k] cake_dequeue
0.03% odhcp6c 0x5630 K [k] cake_enqueue
0.03% dnsmasq 0x2418 K [k] cake_dequeue
0.01% ksoftirqd/1 0x198 K [k] cake_calc_overhead
0.01% dnsmasq 0x36cc K [k] cake_hash
0.01% hostapd 0x276c K [k] cake_dequeue
0.01% swapper 0xb0 K [k] cake_get_tcpopt
0.01% hostapd 0x4c8 K [k] cake_dequeue_one
0.00% hostapd 0x3d00 K [k] cake_overhead
^ permalink raw reply [flat|nested] 5+ messages in thread