* [Cake] [RFC PATCH 1/2] Add support for cake with ack filtering
2017-11-17 21:24 [Cake] [RFC PATCH 0/2] iproute2 cake support Dave Taht
@ 2017-11-17 21:24 ` Dave Taht
2017-11-17 21:24 ` [Cake] [RFC PATCH 2/2] add cake man page Dave Taht
2017-11-17 21:32 ` [Cake] [RFC PATCH 0/2] iproute2 cake support Dave Taht
2 siblings, 0 replies; 4+ messages in thread
From: Dave Taht @ 2017-11-17 21:24 UTC (permalink / raw)
To: cake
---
tc/Makefile | 1 +
tc/q_cake.c | 732 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 733 insertions(+)
create mode 100644 tc/q_cake.c
diff --git a/tc/Makefile b/tc/Makefile
index 24bd3e2..7eb6d90 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -63,6 +63,7 @@ TCMODULES += em_meta.o
TCMODULES += q_mqprio.o
TCMODULES += q_codel.o
TCMODULES += q_fq_codel.o
+TCMODULES += q_cake.o
TCMODULES += q_fq.o
TCMODULES += q_pie.o
TCMODULES += q_hhf.o
diff --git a/tc/q_cake.c b/tc/q_cake.c
new file mode 100644
index 0000000..43478e3
--- /dev/null
+++ b/tc/q_cake.c
@@ -0,0 +1,732 @@
+/*
+ * Common Applications Kept Enhanced -- CAKE
+ *
+ * Copyright (C) 2014-2015 Jonathan Morton <chromatix99@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ */
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+ fprintf(stderr,
+"Usage: ... cake [ bandwidth RATE | unlimited* | autorate_ingress ]\n"
+" [ rtt TIME | datacentre | lan | metro | regional |\n"
+" internet* | oceanic | satellite | interplanetary ]\n"
+" [ besteffort | diffserv8 | diffserv4 | diffserv-llt |\n"
+" diffserv3* ]\n"
+" [ flowblind | srchost | dsthost | hosts | flows |\n"
+" dual-srchost | dual-dsthost | triple-isolate* ]\n"
+" [ nat | nonat* ]\n"
+" [ wash | nowash * ]\n"
+" [ ack-filter | no-ack-filter * ]\n"
+" [ memlimit LIMIT ]\n"
+" [ ptm | atm | noatm* ] [ overhead N | conservative | raw* ]\n"
+" [ mpu N ] [ ingress | egress* ]\n"
+" (* marks defaults)\n");
+}
+
+static int cake_parse_opt(struct qdisc_util *qu, int argc, char **argv,
+ struct nlmsghdr *n)
+{
+ int unlimited = 0;
+ unsigned bandwidth = 0;
+ unsigned interval = 0;
+ unsigned target = 0;
+ unsigned diffserv = 0;
+ unsigned memlimit = 0;
+ int overhead = 0;
+ bool overhead_set = false;
+ bool overhead_override = false;
+ int mpu = 0;
+ int flowmode = -1;
+ int nat = -1;
+ int atm = -1;
+ int autorate = -1;
+ int wash = -1;
+ int ingress = -1;
+ int ack_filter = -1;
+ struct rtattr *tail;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "bandwidth") == 0) {
+ NEXT_ARG();
+ if (get_rate(&bandwidth, *argv)) {
+ fprintf(stderr, "Illegal \"bandwidth\"\n");
+ return -1;
+ }
+ unlimited = 0;
+ autorate = 0;
+ } else if (strcmp(*argv, "unlimited") == 0) {
+ bandwidth = 0;
+ unlimited = 1;
+ autorate = 0;
+ } else if (strcmp(*argv, "autorate_ingress") == 0) {
+ autorate = 1;
+
+ } else if (strcmp(*argv, "rtt") == 0) {
+ NEXT_ARG();
+ if (get_time(&interval, *argv)) {
+ fprintf(stderr, "Illegal \"rtt\"\n");
+ return -1;
+ }
+ target = interval / 20;
+ if(!target)
+ target = 1;
+ } else if (strcmp(*argv, "datacentre") == 0) {
+ interval = 100;
+ target = 5;
+ } else if (strcmp(*argv, "lan") == 0) {
+ interval = 1000;
+ target = 50;
+ } else if (strcmp(*argv, "metro") == 0) {
+ interval = 10000;
+ target = 500;
+ } else if (strcmp(*argv, "regional") == 0) {
+ interval = 30000;
+ target = 1500;
+ } else if (strcmp(*argv, "internet") == 0) {
+ interval = 100000;
+ target = 5000;
+ } else if (strcmp(*argv, "oceanic") == 0) {
+ interval = 300000;
+ target = 15000;
+ } else if (strcmp(*argv, "satellite") == 0) {
+ interval = 1000000;
+ target = 50000;
+ } else if (strcmp(*argv, "interplanetary") == 0) {
+ interval = 3600000000U;
+ target = 5000;
+
+ } else if (strcmp(*argv, "besteffort") == 0) {
+ diffserv = 1;
+ } else if (strcmp(*argv, "precedence") == 0) {
+ diffserv = 2;
+ } else if (strcmp(*argv, "diffserv8") == 0) {
+ diffserv = 3;
+ } else if (strcmp(*argv, "diffserv4") == 0) {
+ diffserv = 4;
+ } else if (strcmp(*argv, "diffserv") == 0) {
+ diffserv = 4;
+ } else if (strcmp(*argv, "diffserv-llt") == 0) {
+ diffserv = 5;
+ } else if (strcmp(*argv, "diffserv3") == 0) {
+ diffserv = 6;
+
+ } else if (strcmp(*argv, "nowash") == 0) {
+ wash = 0;
+ } else if (strcmp(*argv, "wash") == 0) {
+ wash = 1;
+
+ } else if (strcmp(*argv, "flowblind") == 0) {
+ flowmode = 0;
+ } else if (strcmp(*argv, "srchost") == 0) {
+ flowmode = 1;
+ } else if (strcmp(*argv, "dsthost") == 0) {
+ flowmode = 2;
+ } else if (strcmp(*argv, "hosts") == 0) {
+ flowmode = 3;
+ } else if (strcmp(*argv, "flows") == 0) {
+ flowmode = 4;
+ } else if (strcmp(*argv, "dual-srchost") == 0) {
+ flowmode = 5;
+ } else if (strcmp(*argv, "dual-dsthost") == 0) {
+ flowmode = 6;
+ } else if (strcmp(*argv, "triple-isolate") == 0) {
+ flowmode = 7;
+
+ } else if (strcmp(*argv, "nat") == 0) {
+ nat = 1;
+ } else if (strcmp(*argv, "nonat") == 0) {
+ nat = 0;
+
+ } else if (strcmp(*argv, "ptm") == 0) {
+ atm = 2;
+ } else if (strcmp(*argv, "atm") == 0) {
+ atm = 1;
+ } else if (strcmp(*argv, "noatm") == 0) {
+ atm = 0;
+
+ } else if (strcmp(*argv, "raw") == 0) {
+ atm = 0;
+ overhead = 0;
+ overhead_set = true;
+ overhead_override = true;
+ } else if (strcmp(*argv, "conservative") == 0) {
+ /*
+ * Deliberately over-estimate overhead:
+ * one whole ATM cell plus ATM framing.
+ * A safe choice if the actual overhead is unknown.
+ */
+ atm = 1;
+ overhead = 48;
+ overhead_set = true;
+
+ /* Various ADSL framing schemes, all over ATM cells */
+ } else if (strcmp(*argv, "ipoa-vcmux") == 0) {
+ atm = 1;
+ overhead += 8;
+ overhead_set = true;
+ } else if (strcmp(*argv, "ipoa-llcsnap") == 0) {
+ atm = 1;
+ overhead += 16;
+ overhead_set = true;
+ } else if (strcmp(*argv, "bridged-vcmux") == 0) {
+ atm = 1;
+ overhead += 24;
+ overhead_set = true;
+ } else if (strcmp(*argv, "bridged-llcsnap") == 0) {
+ atm = 1;
+ overhead += 32;
+ overhead_set = true;
+ } else if (strcmp(*argv, "pppoa-vcmux") == 0) {
+ atm = 1;
+ overhead += 10;
+ overhead_set = true;
+ } else if (strcmp(*argv, "pppoa-llc") == 0) {
+ atm = 1;
+ overhead += 14;
+ overhead_set = true;
+ } else if (strcmp(*argv, "pppoe-vcmux") == 0) {
+ atm = 1;
+ overhead += 32;
+ overhead_set = true;
+ } else if (strcmp(*argv, "pppoe-llcsnap") == 0) {
+ atm = 1;
+ overhead += 40;
+ overhead_set = true;
+
+ /* Typical VDSL2 framing schemes, both over PTM */
+ /* PTM has 64b/65b coding which absorbs some bandwidth */
+ } else if (strcmp(*argv, "pppoe-ptm") == 0) {
+ atm = 2;
+ overhead += 27;
+ overhead_set = true;
+ } else if (strcmp(*argv, "bridged-ptm") == 0) {
+ atm = 2;
+ overhead += 19;
+ overhead_set = true;
+
+ } else if (strcmp(*argv, "via-ethernet") == 0) {
+ /*
+ * We used to use this flag to manually compensate for
+ * Linux including the Ethernet header on Ethernet-type
+ * interfaces, but not on IP-type interfaces.
+ *
+ * It is no longer needed, because Cake now adjusts for
+ * that automatically, and is thus ignored.
+ *
+ * It would be deleted entirely, but it appears in the
+ * stats output when the automatic compensation is active.
+ */
+
+ } else if (strcmp(*argv, "ethernet") == 0) {
+ /* ethernet pre-amble & interframe gap & FCS
+ * you may need to add vlan tag */
+ overhead += 38;
+ overhead_set = true;
+ mpu = 84;
+
+ /* Additional Ethernet-related overhead used by some ISPs */
+ } else if (strcmp(*argv, "ether-vlan") == 0) {
+ /* 802.1q VLAN tag - may be repeated */
+ overhead += 4;
+ overhead_set = true;
+
+ /*
+ * DOCSIS cable shapers account for Ethernet frame with FCS,
+ * but not interframe gap nor preamble.
+ */
+ } else if (strcmp(*argv, "docsis") == 0) {
+ atm = 0;
+ overhead += 18;
+ overhead_set = true;
+ mpu = 64;
+
+ } else if (strcmp(*argv, "overhead") == 0) {
+ char* p = NULL;
+ NEXT_ARG();
+ overhead = strtol(*argv, &p, 10);
+ if(!p || *p || !*argv || overhead < -64 || overhead > 256) {
+ fprintf(stderr, "Illegal \"overhead\", valid range is -64 to 256\\n");
+ return -1;
+ }
+ overhead_set = true;
+
+ } else if (strcmp(*argv, "mpu") == 0) {
+ char* p = NULL;
+ NEXT_ARG();
+ mpu = strtol(*argv, &p, 10);
+ if(!p || *p || !*argv || mpu < 0 || mpu > 256) {
+ fprintf(stderr, "Illegal \"mpu\", valid range is 0 to 256\\n");
+ return -1;
+ }
+
+ } else if (strcmp(*argv, "ingress") == 0) {
+ ingress = 1;
+ } else if (strcmp(*argv, "egress") == 0) {
+ ingress = 0;
+
+ } else if (strcmp(*argv, "no-ack-filter") == 0) {
+ ack_filter = 0;
+ } else if (strcmp(*argv, "ack-filter") == 0) {
+ ack_filter = 1;
+
+ } else if (strcmp(*argv, "memlimit") == 0) {
+ NEXT_ARG();
+ if(get_size(&memlimit, *argv)) {
+ fprintf(stderr, "Illegal value for \"memlimit\": \"%s\"\n", *argv);
+ return -1;
+ }
+
+ } else if (strcmp(*argv, "help") == 0) {
+ explain();
+ return -1;
+ } else {
+ fprintf(stderr, "What is \"%s\"?\n", *argv);
+ explain();
+ return -1;
+ }
+ argc--; argv++;
+ }
+
+ tail = NLMSG_TAIL(n);
+ addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+ if (bandwidth || unlimited)
+ addattr_l(n, 1024, TCA_CAKE_BASE_RATE, &bandwidth, sizeof(bandwidth));
+ if (diffserv)
+ addattr_l(n, 1024, TCA_CAKE_DIFFSERV_MODE, &diffserv, sizeof(diffserv));
+ if (atm != -1)
+ addattr_l(n, 1024, TCA_CAKE_ATM, &atm, sizeof(atm));
+ if (flowmode != -1)
+ addattr_l(n, 1024, TCA_CAKE_FLOW_MODE, &flowmode, sizeof(flowmode));
+ if (overhead_set)
+ addattr_l(n, 1024, TCA_CAKE_OVERHEAD, &overhead, sizeof(overhead));
+ if (overhead_override) {
+ unsigned zero = 0;
+ addattr_l(n, 1024, TCA_CAKE_ETHERNET, &zero, sizeof(zero));
+ }
+ if (mpu > 0)
+ addattr_l(n, 1024, TCA_CAKE_MPU, &mpu, sizeof(mpu));
+ if (interval)
+ addattr_l(n, 1024, TCA_CAKE_RTT, &interval, sizeof(interval));
+ if (target)
+ addattr_l(n, 1024, TCA_CAKE_TARGET, &target, sizeof(target));
+ if (autorate != -1)
+ addattr_l(n, 1024, TCA_CAKE_AUTORATE, &autorate, sizeof(autorate));
+ if (memlimit)
+ addattr_l(n, 1024, TCA_CAKE_MEMORY, &memlimit, sizeof(memlimit));
+ if (nat != -1)
+ addattr_l(n, 1024, TCA_CAKE_NAT, &nat, sizeof(nat));
+ if (wash != -1)
+ addattr_l(n, 1024, TCA_CAKE_WASH, &wash, sizeof(wash));
+ if (ingress != -1)
+ addattr_l(n, 1024, TCA_CAKE_INGRESS, &ingress, sizeof(ingress));
+ if (ack_filter != -1)
+ addattr_l(n, 1024, TCA_CAKE_ACK_FILTER, &ack_filter, sizeof(ack_filter));
+
+ tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
+ return 0;
+}
+
+
+static int cake_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+ struct rtattr *tb[TCA_CAKE_MAX + 1];
+ unsigned bandwidth = 0;
+ unsigned diffserv = 0;
+ unsigned flowmode = 0;
+ unsigned interval = 0;
+ unsigned memlimit = 0;
+ int overhead = 0;
+ int ethernet = 0;
+ int mpu = 0;
+ int atm = 0;
+ int nat = 0;
+ int autorate = 0;
+ int wash = 0;
+ int ingress = 0;
+ int ack_filter = 0;
+ SPRINT_BUF(b1);
+ SPRINT_BUF(b2);
+
+ if (opt == NULL)
+ return 0;
+
+ parse_rtattr_nested(tb, TCA_CAKE_MAX, opt);
+
+ if (tb[TCA_CAKE_BASE_RATE] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_BASE_RATE]) >= sizeof(__u32)) {
+ bandwidth = rta_getattr_u32(tb[TCA_CAKE_BASE_RATE]);
+ if(bandwidth)
+ fprintf(f, "bandwidth %s ", sprint_rate(bandwidth, b1));
+ else
+ fprintf(f, "unlimited ");
+ }
+ if (tb[TCA_CAKE_AUTORATE] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_AUTORATE]) >= sizeof(__u32)) {
+ autorate = rta_getattr_u32(tb[TCA_CAKE_AUTORATE]);
+ if(autorate == 1)
+ fprintf(f, "autorate_ingress ");
+ else if(autorate)
+ fprintf(f, "(?autorate?) ");
+ }
+ if (tb[TCA_CAKE_DIFFSERV_MODE] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_DIFFSERV_MODE]) >= sizeof(__u32)) {
+ diffserv = rta_getattr_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
+ switch(diffserv) {
+ case 1:
+ fprintf(f, "besteffort ");
+ break;
+ case 2:
+ fprintf(f, "precedence ");
+ break;
+ case 3:
+ fprintf(f, "diffserv8 ");
+ break;
+ case 4:
+ fprintf(f, "diffserv4 ");
+ break;
+ case 5:
+ fprintf(f, "diffserv-llt ");
+ break;
+ case 6:
+ fprintf(f, "diffserv3 ");
+ break;
+ default:
+ fprintf(f, "(?diffserv?) ");
+ break;
+ };
+ }
+ if (tb[TCA_CAKE_FLOW_MODE] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_FLOW_MODE]) >= sizeof(__u32)) {
+ flowmode = rta_getattr_u32(tb[TCA_CAKE_FLOW_MODE]);
+ nat = !!(flowmode & 64);
+ flowmode &= ~64;
+ switch(flowmode) {
+ case 0:
+ fprintf(f, "flowblind ");
+ break;
+ case 1:
+ fprintf(f, "srchost ");
+ break;
+ case 2:
+ fprintf(f, "dsthost ");
+ break;
+ case 3:
+ fprintf(f, "hosts ");
+ break;
+ case 4:
+ fprintf(f, "flows ");
+ break;
+ case 5:
+ fprintf(f, "dual-srchost ");
+ break;
+ case 6:
+ fprintf(f, "dual-dsthost ");
+ break;
+ case 7:
+ fprintf(f, "triple-isolate ");
+ break;
+ default:
+ fprintf(f, "(?flowmode?) ");
+ break;
+ };
+
+ if(nat)
+ fprintf(f, "nat ");
+ }
+ if (tb[TCA_CAKE_WASH] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_WASH]) >= sizeof(__u32)) {
+ wash = rta_getattr_u32(tb[TCA_CAKE_WASH]);
+ }
+ if (tb[TCA_CAKE_ATM] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_ATM]) >= sizeof(__u32)) {
+ atm = rta_getattr_u32(tb[TCA_CAKE_ATM]);
+ }
+ if (tb[TCA_CAKE_OVERHEAD] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_OVERHEAD]) >= sizeof(__u32)) {
+ overhead = rta_getattr_u32(tb[TCA_CAKE_OVERHEAD]);
+ }
+ if (tb[TCA_CAKE_MPU] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_MPU]) >= sizeof(__u32)) {
+ mpu = rta_getattr_u32(tb[TCA_CAKE_MPU]);
+ }
+ if (tb[TCA_CAKE_INGRESS] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_INGRESS]) >= sizeof(__u32)) {
+ ingress = rta_getattr_u32(tb[TCA_CAKE_INGRESS]);
+ }
+ if (tb[TCA_CAKE_ACK_FILTER] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_ACK_FILTER]) >= sizeof(__u32)) {
+ ack_filter = rta_getattr_u32(tb[TCA_CAKE_ACK_FILTER]);
+ }
+ if (tb[TCA_CAKE_ETHERNET] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_ETHERNET]) >= sizeof(__u32)) {
+ ethernet = rta_getattr_u32(tb[TCA_CAKE_ETHERNET]);
+ }
+ if (tb[TCA_CAKE_RTT] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_RTT]) >= sizeof(__u32)) {
+ interval = rta_getattr_u32(tb[TCA_CAKE_RTT]);
+ }
+
+ if (wash)
+ fprintf(f,"wash ");
+
+ if (ingress)
+ fprintf(f,"ingress ");
+
+ if (ack_filter)
+ fprintf(f,"ack-filter ");
+
+ if (interval)
+ fprintf(f, "rtt %s ", sprint_time(interval, b2));
+
+ if (!atm && overhead == ethernet) {
+ fprintf(f, "raw ");
+ } else {
+ if (atm == 1)
+ fprintf(f, "atm ");
+ else if (atm == 2)
+ fprintf(f, "ptm ");
+ else
+ fprintf(f, "noatm ");
+
+ fprintf(f, "overhead %d ", overhead);
+
+ // This is actually the *amount* of automatic compensation, but we only report
+ // its presence as a boolean for now.
+ if (ethernet)
+ fprintf(f, "via-ethernet ");
+ }
+
+ if (mpu) {
+ fprintf(f, "mpu %d ", mpu);
+ }
+
+ if (memlimit)
+ fprintf(f, "memlimit %s", sprint_size(memlimit, b1));
+
+ return 0;
+}
+
+static int cake_print_xstats(struct qdisc_util *qu, FILE *f,
+ struct rtattr *xstats)
+{
+ /* fq_codel stats format borrowed */
+ struct tc_fq_codel_xstats *st;
+ struct tc_cake_xstats *stnc;
+ SPRINT_BUF(b1);
+ SPRINT_BUF(b2);
+
+ if (xstats == NULL)
+ return 0;
+
+ if (RTA_PAYLOAD(xstats) < sizeof(st->type))
+ return -1;
+
+ st = RTA_DATA(xstats);
+ stnc = RTA_DATA(xstats);
+
+ if (st->type == TCA_FQ_CODEL_XSTATS_QDISC && RTA_PAYLOAD(xstats) >= sizeof(*st)) {
+ fprintf(f, " maxpacket %u drop_overlimit %u new_flow_count %u ecn_mark %u",
+ st->qdisc_stats.maxpacket,
+ st->qdisc_stats.drop_overlimit,
+ st->qdisc_stats.new_flow_count,
+ st->qdisc_stats.ecn_mark);
+ fprintf(f, "\n new_flows_len %u old_flows_len %u",
+ st->qdisc_stats.new_flows_len,
+ st->qdisc_stats.old_flows_len);
+ } else if (st->type == TCA_FQ_CODEL_XSTATS_CLASS && RTA_PAYLOAD(xstats) >= sizeof(*st)) {
+ fprintf(f, " deficit %d count %u lastcount %u ldelay %s",
+ st->class_stats.deficit,
+ st->class_stats.count,
+ st->class_stats.lastcount,
+ sprint_time(st->class_stats.ldelay, b1));
+ if (st->class_stats.dropping) {
+ fprintf(f, " dropping");
+ if (st->class_stats.drop_next < 0)
+ fprintf(f, " drop_next -%s",
+ sprint_time(-st->class_stats.drop_next, b1));
+ else
+ fprintf(f, " drop_next %s",
+ sprint_time(st->class_stats.drop_next, b1));
+ }
+ } else if (stnc->version >= 1 && stnc->version < 0xFF
+ && stnc->max_tins == TC_CAKE_MAX_TINS
+ && RTA_PAYLOAD(xstats) >= offsetof(struct tc_cake_xstats, capacity_estimate))
+ {
+ int i;
+
+ if(stnc->version >= 3)
+ fprintf(f, " memory used: %s of %s\n", sprint_size(stnc->memory_used, b1), sprint_size(stnc->memory_limit, b2));
+
+ if(stnc->version >= 2)
+ fprintf(f, " capacity estimate: %s\n", sprint_rate(stnc->capacity_estimate, b1));
+
+ switch(stnc->tin_cnt) {
+ case 3:
+ fprintf(f, " Bulk Best Effort Voice\n");
+ break;
+
+ case 4:
+ fprintf(f, " Bulk Best Effort Video Voice\n");
+ break;
+
+ case 5:
+ fprintf(f, " Low Loss Best Effort Low Delay Bulk Net Control\n");
+ break;
+
+ default:
+ fprintf(f, " ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, " Tin %u", i);
+ fprintf(f, "\n");
+ };
+
+ fprintf(f, " thresh ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12s", sprint_rate(stnc->threshold_rate[i], b1));
+ fprintf(f, "\n");
+
+ fprintf(f, " target ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12s", sprint_time(stnc->target_us[i], b1));
+ fprintf(f, "\n");
+
+ fprintf(f, " interval");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12s", sprint_time(stnc->interval_us[i], b1));
+ fprintf(f, "\n");
+
+ fprintf(f, " pk_delay");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12s", sprint_time(stnc->peak_delay_us[i], b1));
+ fprintf(f, "\n");
+
+ fprintf(f, " av_delay");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12s", sprint_time(stnc->avge_delay_us[i], b1));
+ fprintf(f, "\n");
+
+ fprintf(f, " sp_delay");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12s", sprint_time(stnc->base_delay_us[i], b1));
+ fprintf(f, "\n");
+
+ fprintf(f, " pkts ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->sent[i].packets);
+ fprintf(f, "\n");
+
+ fprintf(f, " bytes ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12llu", stnc->sent[i].bytes);
+ fprintf(f, "\n");
+
+ fprintf(f, " way_inds");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->way_indirect_hits[i]);
+ fprintf(f, "\n");
+
+ fprintf(f, " way_miss");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->way_misses[i]);
+ fprintf(f, "\n");
+
+ fprintf(f, " way_cols");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->way_collisions[i]);
+ fprintf(f, "\n");
+
+ fprintf(f, " drops ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->dropped[i].packets);
+ fprintf(f, "\n");
+
+ fprintf(f, " marks ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->ecn_marked[i].packets);
+ fprintf(f, "\n");
+
+ if(stnc->version >= 5) {
+ fprintf(f, " ack_drop");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->ack_drops[i].packets);
+ fprintf(f, "\n");
+ }
+
+ fprintf(f, " sp_flows");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->sparse_flows[i]);
+ fprintf(f, "\n");
+
+ fprintf(f, " bk_flows");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->bulk_flows[i]);
+ fprintf(f, "\n");
+
+ if(stnc->version >= 4) {
+ fprintf(f, " un_flows");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->unresponse_flows[i]);
+ fprintf(f, "\n");
+ }
+
+ fprintf(f, " max_len ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->max_skblen[i]);
+ fprintf(f, "\n");
+ } else {
+ return -1;
+ }
+ return 0;
+}
+
+struct qdisc_util cake_qdisc_util = {
+ .id = "cake",
+ .parse_qopt = cake_parse_opt,
+ .print_qopt = cake_print_opt,
+ .print_xstats = cake_print_xstats,
+};
--
2.7.4
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Cake] [RFC PATCH 2/2] add cake man page
2017-11-17 21:24 [Cake] [RFC PATCH 0/2] iproute2 cake support Dave Taht
2017-11-17 21:24 ` [Cake] [RFC PATCH 1/2] Add support for cake with ack filtering Dave Taht
@ 2017-11-17 21:24 ` Dave Taht
2017-11-17 21:32 ` [Cake] [RFC PATCH 0/2] iproute2 cake support Dave Taht
2 siblings, 0 replies; 4+ messages in thread
From: Dave Taht @ 2017-11-17 21:24 UTC (permalink / raw)
To: cake
---
man/man8/tc-cake.8 | 530 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 530 insertions(+)
create mode 100644 man/man8/tc-cake.8
diff --git a/man/man8/tc-cake.8 b/man/man8/tc-cake.8
new file mode 100644
index 0000000..92b4e41
--- /dev/null
+++ b/man/man8/tc-cake.8
@@ -0,0 +1,530 @@
+.TH CAKE 8 "15 October 2015" "iproute2" "Linux"
+.SH NAME
+CAKE \- COMMON Applications Kept Enhanced (CAKE)
+.SH SYNOPSIS
+.B tc qdisc ... cake
+.br
+ [
+.B bandwidth RATE
+|
+.B unlimited
+|
+.B autorate_ingress
+]
+.br
+ [
+.B rtt
+TIME |
+.B datacenter
+|
+.B lan
+|
+.B metro
+|
+.B regional
+|
+.B internet
+|
+.B oceanic
+|
+.B satellite
+|
+.B interplanetary
+]
+.br
+ [
+.B besteffort
+|
+.B squash
+|
+.B precedence
+|
+.B diffserv8
+|
+.B diffserv4
+|
+.B diffserv3
+|
+.B diffserv-llt
+]
+.br
+ [
+.B flowblind
+|
+.B srchost
+|
+.B dsthost
+|
+.B hosts
+|
+.B flows
+|
+.B dual-srchost
+|
+.B dual-dsthost
+|
+.B triple-isolate
+]
+.br
+ [
+.B nat
+|
+.B nonat
+]
+.br
+ [
+.B ptm
+|
+.B atm
+|
+.B noatm
+]
+.br
+ [
+.B overhead BYTES
+|
+.B conservative
+|
+.B raw
+]
+.br
+ [
+.B mpu
+BYTES
+]
+.br
+ [
+.B memlimit
+SIZE
+]
+
+.SH DESCRIPTION
+CAKE (Common Applications Kept Enhanced) is a shaping queue discipline which uses both AQM and FQ.
+It combines COBALT, which is an AQM algorithm combining Codel and BLUE,
+a shaper which operates in deficit mode, and a variant of DRR for flow isolation.
+An 8-way set-associative hashing is used to virtually eliminate hash collisions.
+Priority queuing is available through a simplified diffserv implementation.
+Overhead compensation for various encapsulation schemes is tightly integrated.
+
+All settings are optional; the default settings are chosen to be sensible in most common deployments.
+Most people will only need to set the
+.B bandwidth
+parameter to get useful results, but reading the
+.B Overhead Compensation
+and
+.B Round Trip Time
+sections is strongly encouraged.
+
+
+.SH SHAPER PARAMETERS
+CAKE uses a deficit-mode shaper, which does not exhibit the initial burst typical of token-bucket shapers.
+It will automatically burst precisely as much as required to maintain the configured throughput.
+As such, it is very straightforward to configure.
+.PP
+.B unlimited
+(default)
+.br
+ No limit on the bandwidth.
+.PP
+.B bandwidth
+RATE
+.br
+ Set the shaper bandwidth. See
+.BR tc(8)
+or examples below for details of the RATE value.
+.PP
+.B autorate_ingress
+.br
+ Automatic capacity estimation based on traffic arriving at this qdisc.
+This is most likely to be useful with cellular links, which tend to change quality randomly. A
+.B bandwidth
+parameter can be used in conjunction to specify an initial estimate.
+The shaper will periodically be set to a bandwidth slightly below the estimated rate.
+This estimator cannot estimate the bandwidth of links downstream of itself.
+
+.SH OVERHEAD COMPENSATION PARAMETERS
+The size of each packet on the wire may differ from that seen by Linux.
+The following parameters allow CAKE to compensate for this difference by internally considering
+each packet to be bigger than Linux informs it.
+To assist users who are not expert network engineers, keywords have been provided
+to represent a number of common link technologies.
+
+.SS Manual Overhead Specification
+.B overhead
+BYTES
+.br
+ Adds BYTES to the size of each packet. BYTES may be negative; values between -64 and 256 (inclusive) are accepted.
+.PP
+.B mpu
+BYTES
+.br
+ Rounds each packet (including overhead) up to a minimum length BYTES. BYTES may not be negative; values between 0 and 256 (inclusive) are accepted.
+.PP
+.B atm
+.br
+ Compensates for ATM cell framing, which is normally found on ADSL links. This is performed after the
+.B overhead
+parameter above. ATM uses fixed 53-byte cells, each of which can carry 48 bytes payload.
+.PP
+.B ptm
+.br
+ Compensates for PTM encoding, which is normally found on VDSL2 links and uses a 64b/65b encoding scheme.
+.PP
+.B noatm
+.br
+ Disables ATM and PTM compensation.
+
+.SS Failsafe Overhead Keywords
+These two keywords are provided for quick-and-dirty setup. Use them if you can't be bothered to read the rest of this section.
+.PP
+.B raw
+(default)
+.br
+ Turns off all overhead compensation in CAKE. The packet size reported by Linux will be used directly.
+.PP
+ Other overhead keywords may be added after "raw". The effect of this is to make the overhead compensation operate
+relative to the reported packet size, not the underlying IP packet size.
+.PP
+.B conservative
+.br
+ Compensates for more overhead than is likely to occur on any widely-deployed link technology.
+.br
+ Equivalent to
+.B overhead 48 atm.
+
+.SS ADSL Overhead Keywords
+Most ADSL modems have a way to check which framing scheme is in use.
+Often this is also specified in the settings document provided by the ISP.
+The keywords in this section are intended to correspond with these sources of information.
+All of them implicitly set the
+.B atm
+flag.
+.PP
+.B pppoa-vcmux
+.br
+ Equivalent to
+.B overhead 10 atm
+.PP
+.B pppoa-llc
+.br
+ Equivalent to
+.B overhead 14 atm
+.PP
+.B pppoe-vcmux
+.br
+ Equivalent to
+.B overhead 32 atm
+.PP
+.B pppoe-llcsnap
+.br
+ Equivalent to
+.B overhead 40 atm
+.PP
+.B bridged-vcmux
+.br
+ Equivalent to
+.B overhead 24 atm
+.PP
+.B bridged-llcsnap
+.br
+ Equivalent to
+.B overhead 32 atm
+.PP
+.B ipoa-vcmux
+.br
+ Equivalent to
+.B overhead 8 atm
+.PP
+.B ipoa-llcsnap
+.br
+ Equivalent to
+.B overhead 16 atm
+.PP
+See also the Ethernet Correction Factors section below.
+
+.SS VDSL2 Overhead Keywords
+ATM was dropped from VDSL2 in favour of PTM, which is a much more straightforward framing scheme.
+Some ISPs retained PPPoE for compatibility with their existing back-end systems.
+.PP
+.B pppoe-ptm
+.br
+ Equivalent to
+.B overhead 27 ptm
+.PP
+.B bridged-ptm
+.br
+ Equivalent to
+.B overhead 19 ptm
+.PP
+See also the Ethernet Correction Factors section below.
+
+.SS DOCSIS Cable Overhead Keyword
+DOCSIS is the universal standard for providing Internet service over cable-TV infrastructure.
+
+In this case, the actual on-wire overhead is less important than the packet size the head-end equipment uses for shaping and metering.
+This is specified to be an Ethernet frame including the CRC (aka FCS).
+.PP
+.B docsis
+.br
+ Equivalent to
+.B overhead 18 mpu 64 noatm
+
+.SS Ethernet Overhead Keywords
+.PP
+.B ethernet
+.br
+ Accounts for Ethernet's preamble, inter-frame gap, and Frame Check Sequence. Use this keyword when the bottleneck being shaped for is an actual Ethernet cable.
+.br
+ Equivalent to
+.B overhead 38 mpu 84 noatm
+.PP
+.B ether-vlan
+.br
+ Adds 4 bytes to the overhead compensation, accounting for a VLAN header appended to the Ethernet frame header. NB: Some ISPs use one or even two of these within PPPoE; this keyword may be repeated as necessary to express this.
+
+.SH ROUND TRIP TIME PARAMETERS
+Active Queue Management (AQM) consists of embedding congestion signals in the packet flow, which
+receivers use to instruct senders to slow down when the queue is persistently occupied. CAKE
+uses ECN signalling when available, and packet drops otherwise, according to a combination of the
+Codel and BLUE AQM algorithms called COBALT.
+
+Very short latencies require a very rapid AQM response to adequately control latency.
+However, such a rapid response tends to impair throughput when the actual RTT is relatively long.
+CAKE allows specifying the RTT it assumes for tuning various parameters. Actual RTTs within
+an order of magnitude of this will generally work well for both throughput and latency management.
+.PP
+.B rtt
+TIME
+.br
+ Manually specify an RTT.
+.PP
+.B datacenter
+.br
+ For extremely high-performance networks only. Equivalent to
+.B rtt 100us.
+.PP
+.B lan
+.br
+ For typical Ethernet and Wi-Fi networks, at home or in the office. Don't use this
+when shaping for an Internet access link. Equivalent to
+.B rtt 1ms.
+.PP
+.B metro
+.br
+ For traffic mostly within a single city. Equivalent to
+.B rtt 10ms.
+.PP
+.B regional
+.br
+ For traffic mostly within a European-sized country. Equivalent to
+.B rtt 30ms.
+.PP
+.B internet
+(default)
+.br
+ This is suitable for typical Internet traffic. Equivalent to
+.B rtt 100ms.
+.PP
+.B oceanic
+.br
+ For Internet traffic with generally above-average latency, such as that suffered by Australasian residents. Equivalent to
+.B rtt 300ms.
+.PP
+.B satellite
+.br
+ For traffic via geostationary satellites. Equivalent to
+.B rtt 1000ms.
+.PP
+.B interplanetary
+.br
+ So named because Jupiter is about 1 light-hour from Earth. Use this to (almost) completely disable AQM actions. Equivalent to
+.B rtt 3600s.
+
+.SH FLOW ISOLATION PARAMETERS
+With flow isolation enabled, CAKE places packets from different flows into different queues,
+each of which carries its own AQM state. Packets from each queue are then delivered fairly,
+according to a DRR++ algorithm which minimises latency for "sparse" flows. CAKE uses a set-associative
+hashing algorithm to minimise flow collisions.
+
+These keywords specify whether fairness based on source address, destination address,
+individual flows, or any combination of those is desired.
+.PP
+.B flowblind
+.br
+ Disables flow isolation; all traffic passes through a single queue for each tin.
+.PP
+.B srchost
+.br
+ Flows are defined only by source address. Could be useful on the egress path of an ISP backhaul.
+.PP
+.B dsthost
+.br
+ Flows are defined only by destination address. Could be useful on the ingress path of an ISP backhaul.
+.PP
+.B hosts
+.br
+ Flows are defined by source-destination host pairs. This is host isolation, rather than flow isolation.
+.PP
+.B flows
+.br
+ Flows are defined by the entire 5-tuple of source address, destination address, transport
+protocol, source port and destination port. This is the type of flow isolation performed by
+SFQ and fq_codel.
+.PP
+.B dual-srchost
+.br
+ Flows are defined by the 5-tuple, and fairness is applied first over source addresses, then
+over individual flows. Good for use on egress traffic from a LAN to the internet, where it'll
+prevent any one LAN host from monopolising the uplink, regardless of the number of flows they use.
+.PP
+.B dual-dsthost
+.br
+ Flows are defined by the 5-tuple, and fairness is applied first over destination addresses, then
+over individual flows. Good for use on ingress traffic to a LAN from the internet, where it'll
+prevent any one LAN host from monopolising the downlink, regardless of the number of flows they use.
+.PP
+.B triple-isolate
+(default)
+.br
+ Flows are defined by the 5-tuple, and fairness is applied over source *and* destination addresses
+intelligently (ie. not merely by host-pairs), and also over individual flows. Use this if you're
+not certain whether to use dual-srchost or dual-dsthost; it'll do both jobs at once, preventing
+any one host on *either* side of the link from monopolising it with a large number of flows.
+.PP
+.B nat
+.br
+ Instructs Cake to perform a NAT lookup before applying flow-isolation rules, to determine the true
+addresses and port numbers of the packet, to improve fairness between hosts "inside" the NAT. This
+has no practical effect in "flowblind" or "flows" modes, or if NAT is performed on a different host.
+.PP
+.B nonat
+(default)
+.br
+ Cake will not perform a NAT lookup. Flow isolation will be performed using the addresses and
+port numbers directly visible to the interface Cake is attached to.
+
+.SH PRIORITY QUEUE PARAMETERS
+CAKE can divide traffic into "tins" based on the Diffserv field. Each tin has its own independent
+set of flow-isolation queues, and is serviced based on a WRR algorithm. To avoid perverse Diffserv
+marking incentives, tin weights have a "priority sharing" value when bandwidth used by that tin is
+below a threshold, and a lower "bandwidth sharing" value when above. Bandwidth is compared against
+the threshold using the same algorithm as the deficit-mode shaper.
+
+Detailed customisation of tin parameters is not provided. The following presets perform all necessary
+tuning, relative to the current shaper bandwidth and RTT settings.
+.PP
+.B besteffort
+.br
+ Disables priority queuing by placing all traffic in one tin.
+.PP
+.B precedence
+.br
+ Enables legacy interpretation of TOS "Precedence" field. Use of this preset on the modern Internet is firmly discouraged.
+.PP
+.B diffserv-llt
+.br
+ Provides a "Latency-Loss Tradeoff" implementation with five tins:
+.br
+ Low Loss (TOS1, TOS2), 100% threshold, increased Codel target.
+.br
+ Best Effort (general), 100% threshold, normal Codel target & interval.
+.br
+ Low Latency (TOS4, TOS5, VA, EF), 100% threshold, reduced Codel interval.
+.br
+ Bulk (CS1), 6.25% threshold, normal Codel target & interval.
+.br
+ Net Control (CS6, CS7), 6.25% threshold, increased Codel target & interval.
+.PP
+.B diffserv4
+.br
+ Provides a general-purpose Diffserv implementation with four tins:
+.br
+ Bulk (CS1), 6.25% threshold, generally low priority.
+.br
+ Best Effort (general), 100% threshold.
+.br
+ Video (AF4x, AF3x, CS3, AF2x, CS2, TOS4, TOS1), 50% threshold.
+.br
+ Voice (CS7, CS6, EF, VA, CS5, CS4), 25% threshold.
+.PP
+.B diffserv3
+(default)
+.br
+ Provides a simple, general-purpose Diffserv implementation with three tins:
+.br
+ Bulk (CS1), 6.25% threshold, generally low priority.
+.br
+ Best Effort (general), 100% threshold.
+.br
+ Voice (CS7, CS6, EF, VA, TOS4), 25% threshold, reduced Codel interval.
+
+.SH OTHER PARAMETERS
+.B memlimit
+LIMIT
+.br
+ Limit the memory consumed by Cake to LIMIT bytes. Note that this does not
+translate directly to queue size, as there is some overhead in the data
+structures containing the packets, especially for small packets.
+
+ By default, the limit is calculated based on the bandwidth and RTT settings.
+
+
+.SH EXAMPLES
+#tc qdisc add dev enp3s0 root cake bandwidth 30mbit
+.br
+#tc -s qdisc show dev enp3s0
+.br
+qdisc cake 8001: root refcnt 2 bandwidth 30Mbit diffserv4 flows rtt 100.0ms raw
+ Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
+ backlog 0b 0p requeues 0
+capacity estimate: 30Mbit
+.br
+ Tin 0 Tin 1 Tin 2 Tin 3
+.br
+ thresh 30Mbit 28125Kbit 22500Kbit 7500Kbit
+.br
+ target 5.0ms 5.0ms 5.0ms 5.0ms
+.br
+interval 100.0ms 100.0ms 100.0ms 100.0ms
+.br
+Pk-delay 0us 0us 0us 0us
+.br
+Av-delay 0us 0us 0us 0us
+.br
+Sp-delay 0us 0us 0us 0us
+.br
+ pkts 0 0 0 0
+.br
+ bytes 0 0 0 0
+.br
+way-inds 0 0 0 0
+.br
+way-miss 0 0 0 0
+.br
+way-cols 0 0 0 0
+.br
+ drops 0 0 0 0
+.br
+ marks 0 0 0 0
+.br
+Sp-flows 0 0 0 0
+.br
+Bk-flows 0 0 0 0
+.br
+last-len 0 0 0 0
+.br
+max-len 0 0 0 0
+.br
+
+
+.SH SEE ALSO
+.BR tc (8),
+.BR tc-codel (8),
+.BR tc-fq_codel (8),
+.BR tc-red (8)
+
+.SH AUTHORS
+CAKE was implemented by Jonathan Morton, with contributions from Dave Taht,
+Kevin Darbyshire-Bryant, Toke Hoiland-Jorgensen and Loganaden Velvindron.
+This manual page was written by Loganaden Velvindron. Please report corrections
+to the Linux Networking mailing list <netdev@vger.kernel.org>.
--
2.7.4
^ permalink raw reply [flat|nested] 4+ messages in thread