Cake - FQ_codel the next generation
 help / color / mirror / Atom feed
* [Cake] act_conndscp
@ 2019-03-19 20:08 Kevin Darbyshire-Bryant
  2019-03-19 21:24 ` Ryan Mounce
  0 siblings, 1 reply; 18+ messages in thread
From: Kevin Darbyshire-Bryant @ 2019-03-19 20:08 UTC (permalink / raw)
  To: Cake List

[-- Attachment #1: Type: text/plain, Size: 273 bytes --]

I’m not looking forward to the response/s from upstream but we shall see :-)

Here are the patches for a new tc action conndscp - both the kernel module and tc’s control of it.




Cheers,

Kevin D-B

gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A

[-- Attachment #2: 0001-initial-experimental-support-for-act_conndscp.patch --]
[-- Type: application/octet-stream, Size: 6961 bytes --]

From dd6785c1068131b0ba636713147228d1be53ffd9 Mon Sep 17 00:00:00 2001
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Date: Fri, 15 Mar 2019 09:35:37 +0000
Subject: [PATCH] initial experimental support for act_conndscp

Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
---
 include/uapi/linux/tc_act/tc_conndscp.h |  33 ++++
 tc/Makefile                             |   1 +
 tc/m_conndscp.c                         | 203 ++++++++++++++++++++++++
 3 files changed, 237 insertions(+)
 create mode 100644 include/uapi/linux/tc_act/tc_conndscp.h
 create mode 100644 tc/m_conndscp.c

diff --git a/include/uapi/linux/tc_act/tc_conndscp.h b/include/uapi/linux/tc_act/tc_conndscp.h
new file mode 100644
index 00000000..e857833b
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_conndscp.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __UAPI_TC_CONNDSCP_H
+#define __UAPI_TC_CONNDSCP_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_CONNDSCP 99
+
+struct tc_conndscp {
+	tc_gen;
+	__u16 zone;
+	__u32 mask;
+	__u32 statemask;
+	__u8 mode;
+	__u8 maskshift;
+};
+
+enum {
+	TCA_CONNDSCP_UNSPEC,
+	TCA_CONNDSCP_PARMS,
+	TCA_CONNDSCP_TM,
+	TCA_CONNDSCP_PAD,
+	__TCA_CONNDSCP_MAX
+};
+#define TCA_CONNDSCP_MAX (__TCA_CONNDSCP_MAX - 1)
+
+enum {
+	CONNDSCP_FLAG_GETDSCP	= BIT(0),
+	CONNDSCP_FLAG_SETDSCP	= BIT(1)
+};
+
+#endif
diff --git a/tc/Makefile b/tc/Makefile
index 2edaf2c8..6ab64f0f 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -47,6 +47,7 @@ TCMODULES += m_skbmod.o
 TCMODULES += m_csum.o
 TCMODULES += m_simple.o
 TCMODULES += m_vlan.o
+TCMODULES += m_conndscp.o
 TCMODULES += m_connmark.o
 TCMODULES += m_bpf.o
 TCMODULES += m_tunnel_key.o
diff --git a/tc/m_conndscp.c b/tc/m_conndscp.c
new file mode 100644
index 00000000..56c7a555
--- /dev/null
+++ b/tc/m_conndscp.c
@@ -0,0 +1,203 @@
+/*
+ * m_conndscp.c		netfilter conndscp dscp<->conntrack mark action
+ *
+ * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include "utils.h"
+#include "tc_util.h"
+#include <linux/tc_act/tc_conndscp.h>
+
+static const char * conndscp_modes[] = {
+	"?invalid",
+	"get",
+	"set",
+	"both",
+};
+
+static void
+explain(void)
+{
+	fprintf(stderr, "Usage: ... conndscp mask MASK statemask STATEMASK mode set/get/both [zone ZONE] [CONTROL] [index <INDEX>]\n");
+	fprintf(stderr, "where :\n"
+		"\tMASK is the bitmask to store/restore DSCP\n"
+		"\tSTATEMASK is the bitmask to determine conditional storing/restoring\n"
+		"\tMODE get (typically ingress) set (typically egress)\n"
+		"\tZONE is the conntrack zone\n"
+		"\tCONTROL := reclassify | pipe | drop | continue | ok |\n"
+		"\t           goto chain <CHAIN_INDEX>\n");
+}
+
+static void
+usage(void)
+{
+	explain();
+	exit(-1);
+}
+
+static int
+parse_conndscp(struct action_util *a, int *argc_p, char ***argv_p, int tca_id,
+	      struct nlmsghdr *n)
+{
+	struct tc_conndscp sel = {};
+	char **argv = *argv_p;
+	int argc = *argc_p;
+	int ok = 0;
+	struct rtattr *tail;
+
+	while (argc > 0) {
+		if (matches(*argv, "conndscp") == 0) {
+			ok = 1;
+			argc--;
+			argv++;
+		} else if (matches(*argv, "help") == 0) {
+			usage();
+		} else {
+			break;
+		}
+
+	}
+
+	if (!ok) {
+		explain();
+		return -1;
+	}
+
+	if (argc) {
+		if (matches(*argv, "mask") == 0) {
+			NEXT_ARG();
+			if (get_u32(&sel.mask, *argv, 0)) {
+				fprintf(stderr, "conndscp: Illegal \"mask\"\n");
+				return -1;
+			}
+			argc--;
+			argv++;
+		}
+	}
+
+	if (argc) {
+		if (matches(*argv, "statemask") == 0) {
+			NEXT_ARG();
+			if (get_u32(&sel.statemask, *argv, 0)) {
+				fprintf(stderr, "conndscp: Illegal \"statemask\"\n");
+				return -1;
+			}
+			argc--;
+			argv++;
+		}
+	}
+
+	if (argc) {
+		if (matches(*argv, "mode") == 0) {
+			NEXT_ARG();
+			if (matches(*argv, "set") == 0)
+				sel.mode |= CONNDSCP_FLAG_SETDSCP;
+			else if (matches(*argv, "get") == 0)
+				sel.mode |= CONNDSCP_FLAG_GETDSCP;
+			else if (matches(*argv, "both") == 0)
+				sel.mode |= (CONNDSCP_FLAG_GETDSCP | CONNDSCP_FLAG_SETDSCP);
+			else {
+				fprintf(stderr, "conndscp: Illegal \"mode\"\n");
+				return -1;
+			}
+			argc--;
+			argv++;
+		}
+	}
+
+	if (argc) {
+		if (matches(*argv, "zone") == 0) {
+			NEXT_ARG();
+			if (get_u16(&sel.zone, *argv, 10)) {
+				fprintf(stderr, "conndscp: Illegal \"zone\"\n");
+				return -1;
+			}
+			argc--;
+			argv++;
+		}
+	}
+
+	parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_PIPE);
+
+	if (argc) {
+		if (matches(*argv, "index") == 0) {
+			NEXT_ARG();
+			if (get_u32(&sel.index, *argv, 10)) {
+				fprintf(stderr, "conndscp: Illegal \"index\"\n");
+				return -1;
+			}
+			argc--;
+			argv++;
+		}
+	}
+
+	tail = addattr_nest(n, MAX_MSG, tca_id);
+	addattr_l(n, MAX_MSG, TCA_CONNDSCP_PARMS, &sel, sizeof(sel));
+	addattr_nest_end(n, tail);
+
+	*argc_p = argc;
+	*argv_p = argv;
+	return 0;
+}
+
+static int print_conndscp(struct action_util *au, FILE *f, struct rtattr *arg)
+{
+	struct rtattr *tb[TCA_CONNDSCP_MAX + 1];
+	struct tc_conndscp *ci;
+
+	if (arg == NULL)
+		return -1;
+
+	parse_rtattr_nested(tb, TCA_CONNDSCP_MAX, arg);
+	if (tb[TCA_CONNDSCP_PARMS] == NULL) {
+		print_string(PRINT_FP, NULL, "%s", "[NULL conndscp parameters]");
+		return -1;
+	}
+
+	ci = RTA_DATA(tb[TCA_CONNDSCP_PARMS]);
+
+	print_string(PRINT_ANY, "kind", "%s ", "conndscp");
+	print_uint(PRINT_ANY, "zone", "zone %u", ci->zone);
+	print_action_control(f, " ", ci->action, "");
+
+	print_string(PRINT_FP, NULL, "%s", _SL_);
+	print_uint(PRINT_ANY, "index", "\t index %u", ci->index);
+	print_int(PRINT_ANY, "ref", " ref %d", ci->refcnt);
+	print_int(PRINT_ANY, "bind", " bind %d", ci->bindcnt);
+	print_uint(PRINT_ANY, "mask", " mask 0x%08x", ci->mask);
+	print_uint(PRINT_ANY, "statemask", " statemask 0x%08x", ci->statemask);
+	print_string(PRINT_ANY, "mode", " mode %s", conndscp_modes[ci->mode & 0x3]);
+
+	if (show_stats) {
+		if (tb[TCA_CONNDSCP_TM]) {
+			struct tcf_t *tm = RTA_DATA(tb[TCA_CONNDSCP_TM]);
+
+			print_tm(f, tm);
+		}
+	}
+	print_string(PRINT_FP, NULL, "%s", _SL_);
+
+	return 0;
+}
+
+struct action_util conndscp_action_util = {
+	.id = "conndscp",
+	.parse_aopt = parse_conndscp,
+	.print_aopt = print_conndscp,
+};
-- 
2.17.2 (Apple Git-113)


[-- Attachment #3: 0001-net-sched-Introduce-conndscp-action-5.0.patch --]
[-- Type: application/octet-stream, Size: 15340 bytes --]

From 27caa3468b241be86e3220d45e6297741d5d50b6 Mon Sep 17 00:00:00 2001
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Date: Wed, 13 Mar 2019 20:54:49 +0000
Subject: [PATCH] net: sched: Introduce conndscp action

Conndscp is a new tc filter action module.  It is designed to copy DSCPs
to conntrack marks and the reverse operation of conntrack mark contained
DSCPs to the diffserv field of suitable skbs.

The feature is intended for use and has been found useful for restoring
ingress classifications based on egress classifications across links
that bleach or otherwise change DSCP, typically home ISP Internet links.
Restoring DSCP on ingress on the WAN link allows qdiscs such as CAKE to
shape inbound packets according to policies that are easier to implement
on egress.

Ingress classification is traditionally a challenging task since
iptables rules haven't yet run and tc filter/eBPF programs are pre-NAT
lookups, hence are unable to see internal IPv4 addresses as used on the
typical home masquerading gateway.

conndscp understands the following parameters:

mask - a 32 bit mask of at least 6 contiguous bits where conndscp will
place the DSCP in conntrack mark.  The DSCP is left-shifted by the
number of unset lower bits of the mask before storing into the mark
field.

statemask - a 32 bit mask of (usually) 1 bit length, outside the area
specified by mask.  This represents a conditional operation flag - get
will only store the DSCP if the flag is unset.  set will only restore
the DSCP if the flag is set.  This is useful to implement a 'one shot'
iptables based classification where the 'complicated' iptables rules are
only run once to classify the connection on initial (egress) packet and
subsequent packets are all marked/restored with the same DSCP.  A mask
of zero disables the conditional behaviour.

mode - get/set/both - get stores the DSCP into the mark, set restores
the DSCP into the diffserv field from the mark, both 'gets' the mark and
then 'sets' it in that order.

optional parameters:

zone - conntrack zone

control - action related control (reclassify | pipe | drop | continue |
ok | goto chain <CHAIN_INDEX>

Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
---
 include/net/tc_act/tc_conndscp.h          |  19 ++
 include/uapi/linux/tc_act/tc_conndscp.h   |  33 +++
 net/sched/Kconfig                         |  13 +
 net/sched/Makefile                        |   1 +
 net/sched/act_conndscp.c                  | 333 ++++++++++++++++++++++
 tools/testing/selftests/tc-testing/config |   1 +
 6 files changed, 400 insertions(+)
 create mode 100644 include/net/tc_act/tc_conndscp.h
 create mode 100644 include/uapi/linux/tc_act/tc_conndscp.h
 create mode 100644 net/sched/act_conndscp.c

diff --git a/include/net/tc_act/tc_conndscp.h b/include/net/tc_act/tc_conndscp.h
new file mode 100644
index 000000000000..4cb328fc487d
--- /dev/null
+++ b/include/net/tc_act/tc_conndscp.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_TC_CONNDSCP_H
+#define __NET_TC_CONNDSCP_H
+
+#include <net/act_api.h>
+
+struct tcf_conndscp_info {
+	struct tc_action common;
+	struct net *net;
+	u16 zone;
+	u32 mask;
+	u32 statemask;
+	u8 mode;
+	u8 maskshift;
+};
+
+#define to_conndscp(a) ((struct tcf_conndscp_info *)a)
+
+#endif /* __NET_TC_CONNDSCP_H */
diff --git a/include/uapi/linux/tc_act/tc_conndscp.h b/include/uapi/linux/tc_act/tc_conndscp.h
new file mode 100644
index 000000000000..0897b5d6b0ce
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_conndscp.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __UAPI_TC_CONNDSCP_H
+#define __UAPI_TC_CONNDSCP_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_CONNDSCP 27
+
+struct tc_conndscp {
+	tc_gen;
+	__u16 zone;
+	__u32 mask;
+	__u32 statemask;
+	__u8 mode;
+	__u8 maskshift;
+};
+
+enum {
+	TCA_CONNDSCP_UNSPEC,
+	TCA_CONNDSCP_PARMS,
+	TCA_CONNDSCP_TM,
+	TCA_CONNDSCP_PAD,
+	__TCA_CONNDSCP_MAX
+};
+#define TCA_CONNDSCP_MAX (__TCA_CONNDSCP_MAX - 1)
+
+enum {
+	CONNDSCP_FLAG_GETDSCP	= BIT(0),
+	CONNDSCP_FLAG_SETDSCP	= BIT(1)
+};
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 1b9afdee5ba9..f43788b9d332 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -865,6 +865,19 @@ config NET_ACT_BPF
 	  To compile this code as a module, choose M here: the
 	  module will be called act_bpf.
 
+config NET_ACT_CONNDSCP
+        tristate "DSCP to Netfilter Connection Mark Store/Retriever"
+        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+        depends on NF_CONNTRACK && NF_CONNTRACK_MARK
+        ---help---
+	  Say Y here to allow storing of DSCP into conn mark
+	  and vice verca
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_connmark.
+
 config NET_ACT_CONNMARK
         tristate "Netfilter Connection Mark Retriever"
         depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8a40431d7b5c..b78198944618 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
 obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
+obj-$(CONFIG_NET_ACT_CONNDSCP)	+= act_conndscp.o
 obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
 obj-$(CONFIG_NET_ACT_SKBMOD)	+= act_skbmod.o
 obj-$(CONFIG_NET_ACT_IFE)	+= act_ife.o
diff --git a/net/sched/act_conndscp.c b/net/sched/act_conndscp.c
new file mode 100644
index 000000000000..8ee87e2ab814
--- /dev/null
+++ b/net/sched/act_conndscp.c
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* net/sched/act_conndscp.c  netfilter conndscp dscp<->connmark action
+ *
+ * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/act_api.h>
+#include <uapi/linux/tc_act/tc_conndscp.h>
+#include <net/tc_act/tc_conndscp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+static unsigned int conndscp_net_id;
+static struct tc_action_ops act_conndscp_ops;
+
+static void tcf_conndscp_get(struct nf_conn *ct, struct tcf_conndscp_info *ca,
+			     struct sk_buff *skb, int proto)
+{
+	u32 newmark;
+	u8 dscp;
+
+	/* mark does not contain DSCP so store DSCP bits into c->mark */
+	switch (proto) {
+	case NFPROTO_IPV4:
+		dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+		break;
+	case NFPROTO_IPV6:
+		dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+		break;
+	default:
+		dscp = 0;
+		break;
+	}
+	newmark = ct->mark & ~(ca->mask | ca->statemask);
+	newmark |= (dscp << ca->maskshift) | ca->statemask;
+	if (ct->mark != newmark) {
+		/* using requeues stats to count how many connmark updates */
+		ca->tcf_qstats.requeues++;
+		ct->mark = newmark;
+		nf_conntrack_event_cache(IPCT_MARK, ct);
+	}
+}
+
+static void tcf_conndscp_set(struct nf_conn *ct, struct tcf_conndscp_info *ca,
+			     struct sk_buff *skb, int proto)
+{
+	u8 newdscp;
+
+	newdscp = (((ct->mark & ca->mask) >> ca->maskshift) << 2) &
+		     ~INET_ECN_MASK;
+
+	/* mark contains DSCP so restore DSCP bits from c->mark into diffserv */
+	/* using overlimits stats to count how many DSCP updates */
+	switch (proto) {
+	case NFPROTO_IPV4:
+		if ((ipv4_get_dsfield(ip_hdr(skb)) & ~INET_ECN_MASK) !=
+		     newdscp) {
+			ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK,
+					    newdscp);
+			ca->tcf_qstats.overlimits++;
+		}
+		break;
+	case NFPROTO_IPV6:
+		if ((ipv6_get_dsfield(ipv6_hdr(skb)) &
+		     ~INET_ECN_MASK) != newdscp) {
+			ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK,
+					    newdscp);
+			ca->tcf_qstats.overlimits++;
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static int tcf_conndscp_act(struct sk_buff *skb, const struct tc_action *a,
+			    struct tcf_result *res)
+{
+	const struct nf_conntrack_tuple_hash *thash;
+	struct nf_conntrack_tuple tuple;
+	enum ip_conntrack_info ctinfo;
+	struct tcf_conndscp_info *ca = to_conndscp(a);
+	struct nf_conntrack_zone zone;
+	struct nf_conn *ct;
+	int proto;
+
+	spin_lock(&ca->tcf_lock);
+	tcf_lastuse_update(&ca->tcf_tm);
+	bstats_update(&ca->tcf_bstats, skb);
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		if (skb->len < sizeof(struct iphdr))
+			goto out;
+
+		proto = NFPROTO_IPV4;
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		if (skb->len < sizeof(struct ipv6hdr))
+			goto out;
+
+		proto = NFPROTO_IPV6;
+	} else {
+		goto out;
+	}
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		if (ca->mode & CONNDSCP_FLAG_SETDSCP &&
+		    (!ca->statemask || (ct->mark & ca->statemask)))
+			tcf_conndscp_set(ct, ca, skb, proto);
+		else if (ca->mode & CONNDSCP_FLAG_GETDSCP &&
+			 (!ca->statemask || !(ct->mark & ca->statemask)))
+			tcf_conndscp_get(ct, ca, skb, proto);
+		goto out;
+	}
+
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+			       proto, ca->net, &tuple))
+		goto out;
+
+	zone.id = ca->zone;
+	zone.dir = NF_CT_DEFAULT_ZONE_DIR;
+
+	thash = nf_conntrack_find_get(ca->net, &zone, &tuple);
+	if (!thash)
+		goto out;
+
+	ct = nf_ct_tuplehash_to_ctrack(thash);
+	if (ca->mode & CONNDSCP_FLAG_SETDSCP &&
+	    (!ca->statemask || (ct->mark & ca->statemask)))
+		tcf_conndscp_set(ct, ca, skb, proto);
+	else if (ca->mode & CONNDSCP_FLAG_GETDSCP &&
+		 (!ca->statemask || !(ct->mark & ca->statemask)))
+		tcf_conndscp_get(ct, ca, skb, proto);
+	nf_ct_put(ct);
+
+out:
+	spin_unlock(&ca->tcf_lock);
+	return ca->tcf_action;
+}
+
+static const struct nla_policy conndscp_policy[TCA_CONNDSCP_MAX + 1] = {
+	[TCA_CONNDSCP_PARMS] = { .len = sizeof(struct tc_conndscp) },
+};
+
+static void conndscp_parmset(struct tcf_conndscp_info *ci,
+			     struct tc_conndscp *parm)
+{
+	ci->tcf_action = parm->action;
+	ci->zone = parm->zone;
+	ci->mask = parm->mask;
+	ci->maskshift = ci->mask ? __ffs(ci->mask) : 0;
+	ci->statemask = parm->statemask;
+	ci->mode = parm->mode;
+
+	/* let's not trust userspace entirely */
+	/* need at least contiguous 6 bit mask */
+	if ((0x3f & (ci->mask >> ci->maskshift)) != 0x3f)
+		ci->mode = 0;
+	if (ci->mask & ci->statemask)
+		ci->mode = 0;
+}
+
+static int tcf_conndscp_init(struct net *net, struct nlattr *nla,
+			     struct nlattr *est, struct tc_action **a,
+			     int ovr, int bind, bool rtnl_held,
+			     struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, conndscp_net_id);
+	struct nlattr *tb[TCA_CONNDSCP_MAX + 1];
+	struct tcf_conndscp_info *ci;
+	struct tc_conndscp *parm;
+	int ret = 0;
+
+	if (!nla)
+		return -EINVAL;
+
+	ret = nla_parse_nested(tb, TCA_CONNDSCP_MAX, nla, conndscp_policy,
+			       NULL);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[TCA_CONNDSCP_PARMS])
+		return -EINVAL;
+
+	parm = nla_data(tb[TCA_CONNDSCP_PARMS]);
+
+	ret = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!ret) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_conndscp_ops, bind, false);
+		if (ret)
+			return ret;
+
+		ci = to_conndscp(*a);
+		ci->net = net;
+		conndscp_parmset(ci, parm);
+
+		tcf_idr_insert(tn, *a);
+		ret = ACT_P_CREATED;
+	} else if (ret > 0) {
+		ci = to_conndscp(*a);
+		if (bind)
+			return 0;
+		tcf_idr_release(*a, bind);
+		if (!ovr)
+			return -EEXIST;
+		/* replacing action and zone */
+		spin_lock_bh(&ci->tcf_lock);
+		conndscp_parmset(ci, parm);
+		spin_unlock_bh(&ci->tcf_lock);
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static inline int tcf_conndscp_dump(struct sk_buff *skb, struct tc_action *a,
+				    int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_conndscp_info *ci = to_conndscp(a);
+	struct tc_conndscp opt = {
+		.index   = ci->tcf_index,
+		.refcnt  = refcount_read(&ci->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
+	};
+	struct tcf_t t;
+
+	spin_lock_bh(&ci->tcf_lock);
+	opt.action = ci->tcf_action;
+	opt.zone = ci->zone;
+	opt.mask = ci->mask;
+	opt.statemask = ci->statemask;
+	opt.mode = ci->mode;
+
+	if (nla_put(skb, TCA_CONNDSCP_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	tcf_tm_dump(&t, &ci->tcf_tm);
+	if (nla_put_64bit(skb, TCA_CONNDSCP_TM, sizeof(t), &t,
+			  TCA_CONNDSCP_PAD))
+		goto nla_put_failure;
+	spin_unlock_bh(&ci->tcf_lock);
+
+	return skb->len;
+
+nla_put_failure:
+	spin_unlock_bh(&ci->tcf_lock);
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tcf_conndscp_walker(struct net *net, struct sk_buff *skb,
+			       struct netlink_callback *cb, int type,
+			       const struct tc_action_ops *ops,
+			       struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, conndscp_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static int tcf_conndscp_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, conndscp_net_id);
+
+	return tcf_idr_search(tn, a, index);
+}
+
+static struct tc_action_ops act_conndscp_ops = {
+	.kind		=	"conndscp",
+	.type		=	TCA_ACT_CONNDSCP,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_conndscp_act,
+	.dump		=	tcf_conndscp_dump,
+	.init		=	tcf_conndscp_init,
+	.walk		=	tcf_conndscp_walker,
+	.lookup		=	tcf_conndscp_search,
+	.size		=	sizeof(struct tcf_conndscp_info),
+};
+
+static __net_init int conndscp_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, conndscp_net_id);
+
+	return tc_action_net_init(tn, &act_conndscp_ops);
+}
+
+static void __net_exit conndscp_exit_net(struct list_head *net_list)
+{
+	tc_action_net_exit(net_list, conndscp_net_id);
+}
+
+static struct pernet_operations conndscp_net_ops = {
+	.init = conndscp_init_net,
+	.exit_batch = conndscp_exit_net,
+	.id   = &conndscp_net_id,
+	.size = sizeof(struct tc_action_net),
+};
+
+static int __init conndscp_init_module(void)
+{
+	return tcf_register_action(&act_conndscp_ops, &conndscp_net_ops);
+}
+
+static void __exit conndscp_cleanup_module(void)
+{
+	tcf_unregister_action(&act_conndscp_ops, &conndscp_net_ops);
+}
+
+module_init(conndscp_init_module);
+module_exit(conndscp_cleanup_module);
+MODULE_AUTHOR("Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>");
+MODULE_DESCRIPTION("DSCP to Connection tracking mark storing/restoring");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 203302065458..9d1fddcfb887 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -37,6 +37,7 @@ CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_NET_ACT_VLAN=m
 CONFIG_NET_ACT_BPF=m
+CONFIG_NET_ACT_CONNDSCP=m
 CONFIG_NET_ACT_CONNMARK=m
 CONFIG_NET_ACT_SKBMOD=m
 CONFIG_NET_ACT_IFE=m
-- 
2.17.2 (Apple Git-113)


[-- Attachment #4: 0001-net-sched-Introduce-conndscp-action-14.4.patch --]
[-- Type: application/octet-stream, Size: 14578 bytes --]

From 526ad7dd1221a176afea28d0b45e469f5cfc46c7 Mon Sep 17 00:00:00 2001
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Date: Wed, 13 Mar 2019 20:54:49 +0000
Subject: [PATCH] net: sched: Introduce conndscp action

With nervousness and trepidation I'm submitting the attached RFC patch
for 'conndscp'.

Conndscp is a new tc filter action module.  It is designed to copy DSCPs
to conntrack marks and the reverse operation of conntrack mark contained
DSCPs to the diffserv field of suitable skbs.

The feature is intended for use and has been found useful for restoring
ingress classifications based on egress classifications across links
that bleach or otherwise change DSCP, typically home ISP Internet links.
Restoring DSCP on ingress on the WAN link allows qdiscs such as CAKE to
shape inbound packets according to policies that are easier to implement
on egress.

Ingress classification is traditionally a challenging task since
iptables rules haven't yet run and tc filter/eBPF programs are pre-NAT
lookups, hence are unable to see internal IPv4 addresses as used on the
typical home masquerading gateway.

conndscp understands the following parameters:

mask - a 32 bit mask of at least 6 contiguous bits where conndscp will
place the DSCP in conntrack mark.  The DSCP is left-shifted by the
number of unset lower bits of the mask before storing into the mark
field.

statemask - a 32 bit mask of (usually) 1 bit length, outside the area
specified by mask.  This represents a conditional operation flag - get
will only store the DSCP if the flag is unset.  set will only restore
the DSCP if the flag is set.  This is useful to implement a 'one shot'
iptables based classification where the 'complicated' iptables rules are
only run once to classify the connection on initial (egress) packet and
subsequent packets are all marked/restored with the same DSCP.  A mask
of zero disables the conditional behaviour.

mode - get/set/both - get stores the DSCP into the mark, set restores
the DSCP into the diffserv field from the mark, both 'gets' the mark and
then 'sets' it in that order.

optional parameters:

zone - conntrack zone

control - action related control (reclassify | pipe | drop | continue |
ok | goto chain <CHAIN_INDEX>

Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
---
 include/net/tc_act/tc_conndscp.h        |  19 ++
 include/uapi/linux/tc_act/tc_conndscp.h |  33 +++
 net/sched/Kconfig                       |  13 +
 net/sched/Makefile                      |   1 +
 net/sched/act_conndscp.c                | 327 ++++++++++++++++++++++++
 5 files changed, 393 insertions(+)
 create mode 100644 include/net/tc_act/tc_conndscp.h
 create mode 100644 include/uapi/linux/tc_act/tc_conndscp.h
 create mode 100644 net/sched/act_conndscp.c

diff --git a/include/net/tc_act/tc_conndscp.h b/include/net/tc_act/tc_conndscp.h
new file mode 100644
index 000000000000..4cb328fc487d
--- /dev/null
+++ b/include/net/tc_act/tc_conndscp.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_TC_CONNDSCP_H
+#define __NET_TC_CONNDSCP_H
+
+#include <net/act_api.h>
+
+struct tcf_conndscp_info {
+	struct tc_action common;
+	struct net *net;
+	u16 zone;
+	u32 mask;
+	u32 statemask;
+	u8 mode;
+	u8 maskshift;
+};
+
+#define to_conndscp(a) ((struct tcf_conndscp_info *)a)
+
+#endif /* __NET_TC_CONNDSCP_H */
diff --git a/include/uapi/linux/tc_act/tc_conndscp.h b/include/uapi/linux/tc_act/tc_conndscp.h
new file mode 100644
index 000000000000..e857833b87bd
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_conndscp.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __UAPI_TC_CONNDSCP_H
+#define __UAPI_TC_CONNDSCP_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_CONNDSCP 99
+
+struct tc_conndscp {
+	tc_gen;
+	__u16 zone;
+	__u32 mask;
+	__u32 statemask;
+	__u8 mode;
+	__u8 maskshift;
+};
+
+enum {
+	TCA_CONNDSCP_UNSPEC,
+	TCA_CONNDSCP_PARMS,
+	TCA_CONNDSCP_TM,
+	TCA_CONNDSCP_PAD,
+	__TCA_CONNDSCP_MAX
+};
+#define TCA_CONNDSCP_MAX (__TCA_CONNDSCP_MAX - 1)
+
+enum {
+	CONNDSCP_FLAG_GETDSCP	= BIT(0),
+	CONNDSCP_FLAG_SETDSCP	= BIT(1)
+};
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index e70ed26485a2..48dc5243f196 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -796,6 +796,19 @@ config NET_ACT_BPF
 	  To compile this code as a module, choose M here: the
 	  module will be called act_bpf.
 
+config NET_ACT_CONNDSCP
+        tristate "DSCP to Netfilter Connection Mark Store/Retriever"
+        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+        depends on NF_CONNTRACK && NF_CONNTRACK_MARK
+        ---help---
+	  Say Y here to allow storing of DSCP into conn mark
+	  and vice verca
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_connmark.
+
 config NET_ACT_CONNMARK
         tristate "Netfilter Connection Mark Retriever"
         depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 9e43a4721ef8..ae701bae8b2f 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
 obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
+obj-$(CONFIG_NET_ACT_CONNDSCP)	+= act_conndscp.o
 obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
 obj-$(CONFIG_NET_ACT_SKBMOD)	+= act_skbmod.o
 obj-$(CONFIG_NET_ACT_IFE)	+= act_ife.o
diff --git a/net/sched/act_conndscp.c b/net/sched/act_conndscp.c
new file mode 100644
index 000000000000..78bd1d8ac25b
--- /dev/null
+++ b/net/sched/act_conndscp.c
@@ -0,0 +1,327 @@
+/*
+ * net/sched/act_conndscp.c  netfilter conndscp dscp<->connmark action
+ *
+ * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/act_api.h>
+#include <uapi/linux/tc_act/tc_conndscp.h>
+#include <net/tc_act/tc_conndscp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+static unsigned int conndscp_net_id;
+static struct tc_action_ops act_conndscp_ops;
+
+static void tcf_conndscp_get(struct nf_conn *ct, struct tcf_conndscp_info *ca,
+			     struct sk_buff *skb, int proto)
+{
+	u32 newmark;
+	u8 dscp;
+
+	/* mark does not contain DSCP so store DSCP bits into c->mark */
+	switch (proto) {
+	case NFPROTO_IPV4:
+		dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+		break;
+	case NFPROTO_IPV6:
+		dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+		break;
+	default:
+		dscp = 0;
+		break;
+	}
+	newmark = ct->mark & ~(ca->mask | ca->statemask);
+	newmark |= (dscp << ca->maskshift) | ca->statemask;
+	if (ct->mark != newmark) {
+		/* using requeues stats to count how many connmark updates */
+		ca->tcf_qstats.requeues++;
+		ct->mark = newmark;
+		nf_conntrack_event_cache(IPCT_MARK, ct);
+	}
+}
+
+static void tcf_conndscp_set(struct nf_conn *ct, struct tcf_conndscp_info *ca,
+			     struct sk_buff *skb, int proto)
+{
+	u8 newdscp;
+
+	newdscp = (((ct->mark & ca->mask) >> ca->maskshift) << 2) &
+		     ~INET_ECN_MASK;
+
+	/* mark contains DSCP so restore DSCP bits from c->mark into diffserv */
+	/* using overlimits stats to count how many DSCP updates */
+	switch (proto) {
+	case NFPROTO_IPV4:
+		if ((ipv4_get_dsfield(ip_hdr(skb)) & ~INET_ECN_MASK) !=
+		     newdscp) {
+			ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK,
+					    newdscp);
+			ca->tcf_qstats.overlimits++;
+		}
+		break;
+	case NFPROTO_IPV6:
+		if ((ipv6_get_dsfield(ipv6_hdr(skb)) &
+		     ~INET_ECN_MASK) != newdscp) {
+			ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK,
+					    newdscp);
+			ca->tcf_qstats.overlimits++;
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static int tcf_conndscp_act(struct sk_buff *skb, const struct tc_action *a,
+			    struct tcf_result *res)
+{
+	const struct nf_conntrack_tuple_hash *thash;
+	struct nf_conntrack_tuple tuple;
+	enum ip_conntrack_info ctinfo;
+	struct tcf_conndscp_info *ca = to_conndscp(a);
+	struct nf_conntrack_zone zone;
+	struct nf_conn *ct;
+	int proto;
+
+	spin_lock(&ca->tcf_lock);
+	tcf_lastuse_update(&ca->tcf_tm);
+	bstats_update(&ca->tcf_bstats, skb);
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		if (skb->len < sizeof(struct iphdr))
+			goto out;
+
+		proto = NFPROTO_IPV4;
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		if (skb->len < sizeof(struct ipv6hdr))
+			goto out;
+
+		proto = NFPROTO_IPV6;
+	} else {
+		goto out;
+	}
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		if (ca->mode & CONNDSCP_FLAG_SETDSCP &&
+		    (!ca->statemask || (ct->mark & ca->statemask)))
+			tcf_conndscp_set(ct, ca, skb, proto);
+		else if (ca->mode & CONNDSCP_FLAG_GETDSCP &&
+			 (!ca->statemask || !(ct->mark & ca->statemask)))
+			tcf_conndscp_get(ct, ca, skb, proto);
+		goto out;
+	}
+
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+			       proto, ca->net, &tuple))
+		goto out;
+
+	zone.id = ca->zone;
+	zone.dir = NF_CT_DEFAULT_ZONE_DIR;
+
+	thash = nf_conntrack_find_get(ca->net, &zone, &tuple);
+	if (!thash)
+		goto out;
+
+	ct = nf_ct_tuplehash_to_ctrack(thash);
+	if (ca->mode & CONNDSCP_FLAG_SETDSCP &&
+	    (!ca->statemask || (ct->mark & ca->statemask)))
+		tcf_conndscp_set(ct, ca, skb, proto);
+	else if (ca->mode & CONNDSCP_FLAG_GETDSCP &&
+		 (!ca->statemask || !(ct->mark & ca->statemask)))
+		tcf_conndscp_get(ct, ca, skb, proto);
+	nf_ct_put(ct);
+
+out:
+	spin_unlock(&ca->tcf_lock);
+	return ca->tcf_action;
+}
+
+static const struct nla_policy conndscp_policy[TCA_CONNDSCP_MAX + 1] = {
+	[TCA_CONNDSCP_PARMS] = { .len = sizeof(struct tc_conndscp) },
+};
+
+static void conndscp_parmset(struct tcf_conndscp_info *ci,
+			     struct tc_conndscp *parm)
+{
+	ci->tcf_action = parm->action;
+	ci->zone = parm->zone;
+	ci->mask = parm->mask;
+	ci->maskshift = ci->mask ? __ffs(ci->mask) : 0;
+	ci->statemask = parm->statemask;
+	ci->mode = parm->mode;
+
+	/* let's not trust userspace entirely */
+	/* need at least contiguous 6 bit mask */
+	if ((0x3f & (ci->mask >> ci->maskshift)) != 0x3f)
+		ci->mode = 0;
+	if (ci->mask & ci->statemask)
+		ci->mode = 0;
+}
+
+static int tcf_conndscp_init(struct net *net, struct nlattr *nla,
+			     struct nlattr *est, struct tc_action **a,
+			     int ovr, int bind)
+{
+	struct tc_action_net *tn = net_generic(net, conndscp_net_id);
+	struct nlattr *tb[TCA_CONNDSCP_MAX + 1];
+	struct tcf_conndscp_info *ci;
+	struct tc_conndscp *parm;
+	int ret = 0;
+
+	if (!nla)
+		return -EINVAL;
+
+	ret = nla_parse_nested(tb, TCA_CONNDSCP_MAX, nla, conndscp_policy,
+			       NULL);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[TCA_CONNDSCP_PARMS])
+		return -EINVAL;
+
+	parm = nla_data(tb[TCA_CONNDSCP_PARMS]);
+
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_conndscp_ops, bind, false);
+		if (ret)
+			return ret;
+
+		ci = to_conndscp(*a);
+		ci->net = net;
+		conndscp_parmset(ci, parm);
+
+		tcf_idr_insert(tn, *a);
+		ret = ACT_P_CREATED;
+	} else if (ret > 0) {
+		ci = to_conndscp(*a);
+		if (bind)
+			return 0;
+		tcf_idr_release(*a, bind);
+		if (!ovr)
+			return -EEXIST;
+		/* replacing action and zone */
+		conndscp_parmset(ci, parm);
+	}
+
+	return ret;
+}
+
+static inline int tcf_conndscp_dump(struct sk_buff *skb, struct tc_action *a,
+				    int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_conndscp_info *ci = to_conndscp(a);
+
+	struct tc_conndscp opt = {
+		.index   = ci->tcf_index,
+		.refcnt  = ci->tcf_refcnt - ref,
+		.bindcnt = ci->tcf_bindcnt - bind,
+		.action = ci->tcf_action,
+		.zone = ci->zone,
+		.mask = ci->mask,
+		.statemask = ci->statemask,
+		.mode = ci->mode,
+	};
+	struct tcf_t t;
+
+	if (nla_put(skb, TCA_CONNDSCP_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	tcf_tm_dump(&t, &ci->tcf_tm);
+	if (nla_put_64bit(skb, TCA_CONNDSCP_TM, sizeof(t), &t,
+			  TCA_CONNDSCP_PAD))
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	spin_unlock_bh(&ci->tcf_lock);
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tcf_conndscp_walker(struct net *net, struct sk_buff *skb,
+			       struct netlink_callback *cb, int type,
+			       const struct tc_action_ops *ops)
+{
+	struct tc_action_net *tn = net_generic(net, conndscp_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tcf_conndscp_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, conndscp_net_id);
+
+	return tcf_idr_search(tn, a, index);
+}
+
+static struct tc_action_ops act_conndscp_ops = {
+	.kind		=	"conndscp",
+	.type		=	TCA_ACT_CONNDSCP,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_conndscp_act,
+	.dump		=	tcf_conndscp_dump,
+	.init		=	tcf_conndscp_init,
+	.walk		=	tcf_conndscp_walker,
+	.lookup		=	tcf_conndscp_search,
+	.size		=	sizeof(struct tcf_conndscp_info),
+};
+
+static __net_init int conndscp_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, conndscp_net_id);
+
+	return tc_action_net_init(tn, &act_conndscp_ops);
+}
+
+static void __net_exit conndscp_exit_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, conndscp_net_id);
+
+	tc_action_net_exit(tn);
+}
+
+static struct pernet_operations conndscp_net_ops = {
+	.init = conndscp_init_net,
+	.exit = conndscp_exit_net,
+	.id   = &conndscp_net_id,
+	.size = sizeof(struct tc_action_net),
+};
+
+static int __init conndscp_init_module(void)
+{
+	return tcf_register_action(&act_conndscp_ops, &conndscp_net_ops);
+}
+
+static void __exit conndscp_cleanup_module(void)
+{
+	tcf_unregister_action(&act_conndscp_ops, &conndscp_net_ops);
+}
+
+module_init(conndscp_init_module);
+module_exit(conndscp_cleanup_module);
+MODULE_AUTHOR("Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>");
+MODULE_DESCRIPTION("DSCP to Connection tracking mark storing/restoring");
+MODULE_LICENSE("GPL");
-- 
2.17.2 (Apple Git-113)


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-19 20:08 [Cake] act_conndscp Kevin Darbyshire-Bryant
@ 2019-03-19 21:24 ` Ryan Mounce
  2019-03-19 21:27   ` Kevin Darbyshire-Bryant
  0 siblings, 1 reply; 18+ messages in thread
From: Ryan Mounce @ 2019-03-19 21:24 UTC (permalink / raw)
  To: Kevin Darbyshire-Bryant; +Cc: Cake List

Hi Kevin,

I've finally applied your patches, compiled, and flashed on my router.
Could you share your tc filter action for conndscp to get me started?

-Ryan

On Wed, 20 Mar 2019 at 06:39, Kevin Darbyshire-Bryant
<kevin@darbyshire-bryant.me.uk> wrote:
>
> I’m not looking forward to the response/s from upstream but we shall see :-)
>
> Here are the patches for a new tc action conndscp - both the kernel module and tc’s control of it.
>
>
>
>
> Cheers,
>
> Kevin D-B
>
> gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A
> _______________________________________________
> Cake mailing list
> Cake@lists.bufferbloat.net
> https://lists.bufferbloat.net/listinfo/cake

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-19 21:24 ` Ryan Mounce
@ 2019-03-19 21:27   ` Kevin Darbyshire-Bryant
  2019-03-19 21:41     ` Toke Høiland-Jørgensen
  2019-03-20  3:31     ` Ryan Mounce
  0 siblings, 2 replies; 18+ messages in thread
From: Kevin Darbyshire-Bryant @ 2019-03-19 21:27 UTC (permalink / raw)
  To: Ryan Mounce; +Cc: Cake List

[-- Attachment #1: Type: text/plain, Size: 426 bytes --]



> On 19 Mar 2019, at 21:24, Ryan Mounce <ryan@mounce.com.au> wrote:
> 
> Hi Kevin,
> 
> I've finally applied your patches, compiled, and flashed on my router.
> Could you share your tc filter action for conndscp to get me started?

Ahh! Ooops yes knew I forgot something - here’s my hacked up sqm-scripts/my_layer_cake.qos




Cheers,

Kevin D-B

gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A

[-- Attachment #2: my_layer_cake.qos --]
[-- Type: application/octet-stream, Size: 6262 bytes --]

#!/bin/sh
# Cero3 Shaper
# A cake shaper and AQM solution that allows several diffserv marking schemes
# for ethernet gateways

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
#       Copyright (C) 2012-5 Michael D. Taht, Toke Høiland-Jørgensen, Sebastian Moeller


#sm: TODO pass in the cake diffserv keyword

. ${SQM_LIB_DIR}/defaults.sh
QDISC=cake

# Default traffic classication is passed in INGRESS_CAKE_OPTS and EGRESS_CAKE_OPTS, defined in defaults.sh now

egress() {
    SILENT=1 $TC qdisc del dev $IFACE root
    $TC qdisc add dev $IFACE root handle cacf: $( get_stab_string ) cake \
        bandwidth ${UPLINK}kbit $( get_cake_lla_string ) ${EGRESS_CAKE_OPTS} ${EQDISC_OPTS}

    # put an action on the egress interface to get DSCP to connmark->mark
    # and to set DSCP from the stored connmark.
    # this seems counter intuitive but it ensures once the mark is set that all
    # subsequent egress packets have the same stored DSCP avoiding iptables rules
    # to mark every packet, conndscp does it for us and then CAKE is happy using the
    # DSCP
    $TC filter add dev $IFACE protocol all prio 10 u32 match u32 0 0 flowid 1:1 action \
	conndscp mask 0xfc000000 statemask 0x01000000 mode both

}


ingress() {

    SILENT=1 $TC qdisc del dev $IFACE handle ffff: ingress
    $TC qdisc add dev $IFACE handle ffff: ingress

    SILENT=1 $TC qdisc del dev $DEV root

    [ "$IGNORE_DSCP_INGRESS" -eq "1" ] && INGRESS_CAKE_OPTS="$INGRESS_CAKE_OPTS besteffort"
    [ "$ZERO_DSCP_INGRESS" -eq "1" ] && INGRESS_CAKE_OPTS="$INGRESS_CAKE_OPTS wash"

    $TC qdisc add dev $DEV root handle cace: $( get_stab_string ) cake \
        bandwidth ${DOWNLINK}kbit $( get_cake_lla_string ) ${INGRESS_CAKE_OPTS} ${IQDISC_OPTS}

    $IP link set dev $DEV up

    # redirect all IP packets arriving in $IFACE to ifb0
    # set DSCP from conntrack mark
    $TC filter add dev $IFACE parent ffff: protocol all prio 10 u32 \
	match u32 0 0 flowid 1:1 action \
	conndscp mask 0xfc000000 statemask 0x01000000 mode set \
	mirred egress redirect dev $DEV

    # Configure iptables chain to mark packets
    ipt -t mangle -N QOS_MARK_${IFACE}

    # Change DSCP of relevant hosts/packets - this will be picked up by cake+ and placed in the firewall connmark
    # also the DSCP is used as the tin selector.

iptables -t mangle -A QOS_MARK_${IFACE} -p tcp -s 192.168.219.5 -m comment --comment "Skybox DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
iptables -t mangle -A QOS_MARK_${IFACE} -p udp -s 192.168.219.5 -m comment --comment "Skybox DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
iptables -t mangle -A QOS_MARK_${IFACE} -p tcp -s 192.168.219.10 -m comment --comment "Bluray DSCP CS3 Video" -j DSCP --set-dscp-class CS3
iptables -t mangle -A QOS_MARK_${IFACE} -p udp -s 192.168.219.10 -m comment --comment "Bluray DSCP CS3 Video" -j DSCP --set-dscp-class CS3
iptables -t mangle -A QOS_MARK_${IFACE} -p tcp -s 192.168.219.12 -m tcp --sport 6981 -m comment --comment "BT DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
iptables -t mangle -A QOS_MARK_${IFACE} -p udp -s 192.168.219.12 -m udp --sport 6981 -m comment --comment "BT DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
iptables -t mangle -A QOS_MARK_${IFACE} -p tcp -s 192.168.219.12 -m tcp --dport 4443 -m comment --comment "BT DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
#iptables -t mangle -A QOS_MARK_${IFACE} -p tcp -s 192.168.219.12 -m tcp --dport 443 -m comment --comment "HTTPS uploads DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1

iptables -t mangle -A QOS_MARK_${IFACE} -m set --match-set Bulk4  dst -j DSCP --set-dscp-class CS1 -m comment --comment "Bulk CS1 ipset"
iptables -t mangle -A QOS_MARK_${IFACE} -m set --match-set Vid4   dst -j DSCP --set-dscp-class CS3 -m comment --comment "Vid CS3 ipset"
iptables -t mangle -A QOS_MARK_${IFACE} -m set --match-set Voice4 dst -j DSCP --set-dscp-class CS4 -m comment --comment "Voice CS4 ipset"

ip6tables -t mangle -A QOS_MARK_${IFACE} -p tcp -s ::c/::ffff:ffff:ffff:ffff -m tcp --sport 6981 -m comment --comment "BT DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
ip6tables -t mangle -A QOS_MARK_${IFACE} -p udp -s ::c/::ffff:ffff:ffff:ffff -m udp --sport 6981 -m comment --comment "BT DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
ip6tables -t mangle -A QOS_MARK_${IFACE} -p tcp -s ::c/::ffff:ffff:ffff:ffff -m tcp --dport 4443 -m comment --comment "BT DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
#ip6tables -t mangle -A QOS_MARK_${IFACE} -p tcp -s ::c/::ffff:ffff:ffff:ffff -m tcp --dport 443 -m comment --comment "HTTPS uploads DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1

ip6tables -t mangle -A QOS_MARK_${IFACE} -m set --match-set Bulk6  dst -j DSCP --set-dscp-class CS1 -m comment --comment "Bulk CS1 ipset"
ip6tables -t mangle -A QOS_MARK_${IFACE} -m set --match-set Vid6 dst -j DSCP --set-dscp-class CS3 -m comment --comment "Vid CS3 ipset"
ip6tables -t mangle -A QOS_MARK_${IFACE} -m set --match-set Voice6 dst -j DSCP --set-dscp-class CS4 -m comment --comment "Voice CS4 ipset"

    # Send cake+ unmarked connections to the marking chain - Cake+ uses top byte as the
    # i've been marked & here's the dscp placeholder. 
    # top 6 bits are DSCP, LSB is DSCP is valid flag
#    ipt -t mangle -A PREROUTING  -i $IFACE -m connmark --mark 0x00000000/0x01000000 -g QOS_MARK_${IFACE}
    ipt -t mangle -A POSTROUTING -o $IFACE -m connmark --mark 0x00000000/0x01000000 -g QOS_MARK_${IFACE}

}

sqm_start() {
    [ -n "$IFACE" ] || return 1
    do_modules
    verify_qdisc $QDISC "cake" || return 1
    sqm_debug "Starting ${SCRIPT}"

    [ -z "$DEV" ] && DEV=$( get_ifb_for_if ${IFACE} )

    if [ "${UPLINK}" -ne 0 ];
    then
        egress
        sqm_debug "egress shaping activated"
    else
        sqm_debug "egress shaping deactivated"
        SILENT=1 $TC qdisc del dev ${IFACE} root
    fi
    if [ "${DOWNLINK}" -ne 0 ];
    then
	verify_qdisc ingress "ingress" || return 1
        ingress
        sqm_debug "ingress shaping activated"
    else
        sqm_debug "ingress shaping deactivated"
        SILENT=1 $TC qdisc del dev ${DEV} root
        SILENT=1 $TC qdisc del dev ${IFACE} ingress
    fi

    return 0
}

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-19 21:27   ` Kevin Darbyshire-Bryant
@ 2019-03-19 21:41     ` Toke Høiland-Jørgensen
  2019-03-19 21:51       ` Kevin Darbyshire-Bryant
  2019-03-20  3:31     ` Ryan Mounce
  1 sibling, 1 reply; 18+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-03-19 21:41 UTC (permalink / raw)
  To: Kevin Darbyshire-Bryant, Ryan Mounce; +Cc: Cake List

Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> writes:

>> On 19 Mar 2019, at 21:24, Ryan Mounce <ryan@mounce.com.au> wrote:
>> 
>> Hi Kevin,
>> 
>> I've finally applied your patches, compiled, and flashed on my router.
>> Could you share your tc filter action for conndscp to get me started?
>
> Ahh! Ooops yes knew I forgot something - here’s my hacked up
> sqm-scripts/my_layer_cake.qos

So this only works with your patched version of CAKE that interprets the
fwmark as a DSCP value, right?

-Toke

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-19 21:41     ` Toke Høiland-Jørgensen
@ 2019-03-19 21:51       ` Kevin Darbyshire-Bryant
  2019-03-19 21:59         ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 18+ messages in thread
From: Kevin Darbyshire-Bryant @ 2019-03-19 21:51 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: Ryan Mounce, Cake List



> On 19 Mar 2019, at 21:41, Toke Høiland-Jørgensen <toke@redhat.com> wrote:
> 
> Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> writes:
> 
>>> On 19 Mar 2019, at 21:24, Ryan Mounce <ryan@mounce.com.au> wrote:
>>> 
>>> Hi Kevin,
>>> 
>>> I've finally applied your patches, compiled, and flashed on my router.
>>> Could you share your tc filter action for conndscp to get me started?
>> 
>> Ahh! Ooops yes knew I forgot something - here’s my hacked up
>> sqm-scripts/my_layer_cake.qos
> 
> So this only works with your patched version of CAKE that interprets the
> fwmark as a DSCP value, right?
> 

No.  It is completely qdisc agnostic/independent.

The tc conndscp action stores/restores DSCP to/from the conntrack mark.  CAKE is completely unmodified and looking at DSCP for tin selection.


Cheers,

Kevin D-B

gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-19 21:51       ` Kevin Darbyshire-Bryant
@ 2019-03-19 21:59         ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 18+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-03-19 21:59 UTC (permalink / raw)
  To: Kevin Darbyshire-Bryant; +Cc: Ryan Mounce, Cake List

Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> writes:

>> On 19 Mar 2019, at 21:41, Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>> 
>> Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> writes:
>> 
>>>> On 19 Mar 2019, at 21:24, Ryan Mounce <ryan@mounce.com.au> wrote:
>>>> 
>>>> Hi Kevin,
>>>> 
>>>> I've finally applied your patches, compiled, and flashed on my router.
>>>> Could you share your tc filter action for conndscp to get me started?
>>> 
>>> Ahh! Ooops yes knew I forgot something - here’s my hacked up
>>> sqm-scripts/my_layer_cake.qos
>> 
>> So this only works with your patched version of CAKE that interprets the
>> fwmark as a DSCP value, right?
>> 
>
> No.  It is completely qdisc agnostic/independent.
>
> The tc conndscp action stores/restores DSCP to/from the conntrack
> mark.  CAKE is completely unmodified and looking at DSCP for tin
> selection.

Ohh, right, silly me... It also goes the other way; of course :)

-Toke

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-19 21:27   ` Kevin Darbyshire-Bryant
  2019-03-19 21:41     ` Toke Høiland-Jørgensen
@ 2019-03-20  3:31     ` Ryan Mounce
  2019-03-20  8:25       ` Kevin Darbyshire-Bryant
  1 sibling, 1 reply; 18+ messages in thread
From: Ryan Mounce @ 2019-03-20  3:31 UTC (permalink / raw)
  To: Kevin Darbyshire-Bryant; +Cc: Cake List

[-- Attachment #1: Type: text/plain, Size: 1102 bytes --]

On Wed, 20 Mar 2019 at 07:57, Kevin Darbyshire-Bryant
<kevin@darbyshire-bryant.me.uk> wrote:
>
>
>
> > On 19 Mar 2019, at 21:24, Ryan Mounce <ryan@mounce.com.au> wrote:
> >
> > Hi Kevin,
> >
> > I've finally applied your patches, compiled, and flashed on my router.
> > Could you share your tc filter action for conndscp to get me started?
>
> Ahh! Ooops yes knew I forgot something - here’s my hacked up sqm-scripts/my_layer_cake.qos

Okay... I've just spent far longer than I'd like to admit relearning
the basics of tc filter in order to minify my scripts, but everything
is working now. See attached for my usage. I'm back to using the
Turris Omnia which has more than enough grunt to handle my 100/40
link, so I haven't put much thought into optimisation.

The only gotcha I ran into with your patch is the explanation

> MODE get (typically ingress) set (typically egress)

This is backwards, but it's confusing anyway. 'get' also sets bits in
the connmark while 'set' also gets bits from the connmark.

I'd suggest changing 'get' to 'save', and 'set' to 'restore'.

[-- Attachment #2: ryans-cake.txt --]
[-- Type: text/plain, Size: 2175 bytes --]

# /etc/rc.local

# EGRESS
tc qdisc del dev eth2 root
tc qdisc replace dev eth2 root handle 1111: cake \
	dual-srchost nat fwmark 0x03 wash ack-filter oceanic mpu 64 overhead 26 bandwidth 40Mbit
tc -s qdisc show dev eth2

tc filter del dev eth2 parent 1111:
tc filter replace dev eth2 parent 1111: matchall action \
	conndscp mask 0xfc000000 statemask 0x01000000 mode get
tc -s filter show dev eth2 parent 1111:


# INGRESS
ip link add name ibe2 type ifb
ip link set dev ibe2 up

tc qdisc del dev ibe2 root
tc qdisc replace dev ibe2 root cake \
	ingress dual-dsthost nat fwmark 0x03 ack-filter oceanic mpu 64 overhead 26 bandwidth 99Mbit
tc -s qdisc show dev ibe2

tc qdisc del dev eth2 ingress
tc qdisc replace dev eth2 ingress handle ffff:

tc filter del dev eth2 parent ffff:
tc filter replace dev eth2 parent ffff: matchall action \
	connmark \
	conndscp mask 0xfc000000 statemask 0x01000000 mode set \
	mirred egress redirect dev ibe2
tc -s filter show dev eth2 parent ffff:



# /etc/firewall.user

iptables  -t mangle -N mangle_forward_eth2
ip6tables -t mangle -N mangle_forward_eth2

iptables  -t mangle -A mangle_forward_eth2 -j CONNMARK --restore-mark --ctmask 0x03
ip6tables -t mangle -A mangle_forward_eth2 -j CONNMARK --restore-mark --ctmask 0x03
iptables  -t mangle -A mangle_forward_eth2 -m mark ! --mark 0 -j RETURN
ip6tables -t mangle -A mangle_forward_eth2 -m mark ! --mark 0 -j RETURN

# Put all traffic to/from this host in cake's bulk tin
iptables  -t mangle -A mangle_forward_eth2 -m mac --mac-source 01:23:45:67:89:ab -j MARK --set-mark 1
ip6tables -t mangle -A mangle_forward_eth2 -m mac --mac-source 01:23:45:67:89:ab -j MARK --set-mark 1

iptables  -t mangle -A mangle_forward_eth2 -m mark --mark 0 -j RETURN
ip6tables -t mangle -A mangle_forward_eth2 -m mark --mark 0 -j RETURN
iptables  -t mangle -A mangle_forward_eth2 -j CONNMARK --save-mark --ctmask 0x03 --nfmask 0x03
ip6tables -t mangle -A mangle_forward_eth2 -j CONNMARK --save-mark --ctmask 0x03 --nfmask 0x03

iptables  -t mangle -A FORWARD -o eth2 -j mangle_forward_eth2
ip6tables -t mangle -A FORWARD -o eth2 -j mangle_forward_eth2

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-20  3:31     ` Ryan Mounce
@ 2019-03-20  8:25       ` Kevin Darbyshire-Bryant
  2019-03-20  8:38         ` Sebastian Moeller
  0 siblings, 1 reply; 18+ messages in thread
From: Kevin Darbyshire-Bryant @ 2019-03-20  8:25 UTC (permalink / raw)
  To: Ryan Mounce; +Cc: Cake List



> On 20 Mar 2019, at 03:31, Ryan Mounce <ryan@mounce.com.au> wrote:
> 
> On Wed, 20 Mar 2019 at 07:57, Kevin Darbyshire-Bryant
> <kevin@darbyshire-bryant.me.uk> wrote:
>> 
>> 
>> 
>>> On 19 Mar 2019, at 21:24, Ryan Mounce <ryan@mounce.com.au> wrote:
>>> 
>>> Hi Kevin,
>>> 
>>> I've finally applied your patches, compiled, and flashed on my router.
>>> Could you share your tc filter action for conndscp to get me started?
>> 
>> Ahh! Ooops yes knew I forgot something - here’s my hacked up sqm-scripts/my_layer_cake.qos
> 
> Okay... I've just spent far longer than I'd like to admit relearning
> the basics of tc filter in order to minify my scripts, but everything
> is working now. See attached for my usage. I'm back to using the
> Turris Omnia which has more than enough grunt to handle my 100/40
> link, so I haven't put much thought into optimisation.
> 
> The only gotcha I ran into with your patch is the explanation
> 
>> MODE get (typically ingress) set (typically egress)
> 
> This is backwards, but it's confusing anyway. 'get' also sets bits in
> the connmark while 'set' also gets bits from the connmark.

Dammit!  And yes it shows how confusing and how easy it is to get confused with the get/set terminology.

> 
> I'd suggest changing 'get' to 'save', and 'set' to 'restore'.
> <ryans-cake.txt>

Fortunately the patch was sent as an RFC to netdev and I’m sure they’ll have other things to fix/clarify at the same time.

Thanks for putting your router/s in the testing firing line.  So that’s at least two of us doing fun DSCP shenanigans on our routers :-)



Cheers,

Kevin D-B

gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-20  8:25       ` Kevin Darbyshire-Bryant
@ 2019-03-20  8:38         ` Sebastian Moeller
  2019-03-20  9:01           ` Kevin Darbyshire-Bryant
  2019-03-20  9:06           ` Kevin Darbyshire-Bryant
  0 siblings, 2 replies; 18+ messages in thread
From: Sebastian Moeller @ 2019-03-20  8:38 UTC (permalink / raw)
  To: cake, Kevin Darbyshire-Bryant, Ryan Mounce; +Cc: Cake List

[-- Attachment #1: Type: text/plain, Size: 3223 bytes --]

Hi Kevin,

Impressive! I had a look at your_layer_cake.qos, and with half the brain at my disposal currently, I am confused. I had thought the idea is to set dscp marks on internal hosts or the LAN interface ofva router and copy those to incoming packets of the same flow, but you seem to set dscps in ingress. What am missing?
I ask because I fully bought your cool-aid ;)  I want a "mode" for sqm scripts where easy to set and control egress dscp from internal hosts is also used for ingress packets of the same flows. I also bought your argument to preferably only do that once per flow hook line and sinker.

AFAICT this is one feature that would solve a lot of issues regarding dscps in home networks. Especially in the light of how easy it turned out to dscp mark packets on windows10, and a lot of the potential dscp users come from the gaming crowd and need something that works on Windows. Sidenote, I really like how easy win10 makes it to dscp marks all egress packets of a given binary, I wish I knew a similarly straightforward way to do this in Linux and macosx....

Thanks for this cool feature....

On March 20, 2019 9:25:31 AM GMT+01:00, Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> wrote:
>
>
>> On 20 Mar 2019, at 03:31, Ryan Mounce <ryan@mounce.com.au> wrote:
>> 
>> On Wed, 20 Mar 2019 at 07:57, Kevin Darbyshire-Bryant
>> <kevin@darbyshire-bryant.me.uk> wrote:
>>> 
>>> 
>>> 
>>>> On 19 Mar 2019, at 21:24, Ryan Mounce <ryan@mounce.com.au> wrote:
>>>> 
>>>> Hi Kevin,
>>>> 
>>>> I've finally applied your patches, compiled, and flashed on my
>router.
>>>> Could you share your tc filter action for conndscp to get me
>started?
>>> 
>>> Ahh! Ooops yes knew I forgot something - here’s my hacked up
>sqm-scripts/my_layer_cake.qos
>> 
>> Okay... I've just spent far longer than I'd like to admit relearning
>> the basics of tc filter in order to minify my scripts, but everything
>> is working now. See attached for my usage. I'm back to using the
>> Turris Omnia which has more than enough grunt to handle my 100/40
>> link, so I haven't put much thought into optimisation.
>> 
>> The only gotcha I ran into with your patch is the explanation
>> 
>>> MODE get (typically ingress) set (typically egress)
>> 
>> This is backwards, but it's confusing anyway. 'get' also sets bits in
>> the connmark while 'set' also gets bits from the connmark.
>
>Dammit!  And yes it shows how confusing and how easy it is to get
>confused with the get/set terminology.
>
>> 
>> I'd suggest changing 'get' to 'save', and 'set' to 'restore'.
>> <ryans-cake.txt>
>
>Fortunately the patch was sent as an RFC to netdev and I’m sure they’ll
>have other things to fix/clarify at the same time.
>
>Thanks for putting your router/s in the testing firing line.  So that’s
>at least two of us doing fun DSCP shenanigans on our routers :-)
>
>
>
>Cheers,
>
>Kevin D-B
>
>gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A
>
>_______________________________________________
>Cake mailing list
>Cake@lists.bufferbloat.net
>https://lists.bufferbloat.net/listinfo/cake

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

[-- Attachment #2: Type: text/html, Size: 4132 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-20  8:38         ` Sebastian Moeller
@ 2019-03-20  9:01           ` Kevin Darbyshire-Bryant
  2019-03-20  9:54             ` Sebastian Moeller
  2019-03-20  9:06           ` Kevin Darbyshire-Bryant
  1 sibling, 1 reply; 18+ messages in thread
From: Kevin Darbyshire-Bryant @ 2019-03-20  9:01 UTC (permalink / raw)
  To: Sebastian Moeller; +Cc: cake, Ryan Mounce



> On 20 Mar 2019, at 08:38, Sebastian Moeller <moeller0@gmx.de> wrote:
> 
> Hi Kevin,
> 
> Impressive! I had a look at your_layer_cake.qos, and with half the brain at my disposal currently, I am confused. I had thought the idea is to set dscp marks on internal hosts or the LAN interface ofva router and copy those to incoming packets of the same flow, but you seem to set dscps in ingress. What am missing?
> I ask because I fully bought your cool-aid ;) I want a "mode" for sqm scripts where easy to set and control egress dscp from internal hosts is also used for ingress packets of the same flows. I also bought your argument to preferably only do that once per flow hook line and sinker.
> 
> AFAICT this is one feature that would solve a lot of issues regarding dscps in home networks. Especially in the light of how easy it turned out to dscp mark packets on windows10, and a lot of the potential dscp users come from the gaming crowd and need something that works on Windows. Sidenote, I really like how easy win10 makes it to dscp marks all egress packets of a given binary, I wish I knew a similarly straightforward way to do this in Linux and macosx....
> 
> Thanks for this cool feature….

Ha, ok, probably not helped by my commit message having get & set swapped with regards to the typical usage comments.  I’ll try to go through it in context of my layer cake script.


Egress is packet leaving router on wan interface to ‘Internet’
Ingress is packet arriving at router on wan interface from ‘Internet’

Egress packet goes through iptables mangle table, postrouting.  It doesn’t have ’statemask’ bit set so is sent to the DSCP mangling rule where it may have had the DSCP changed..it doesn’t matter.  Then it will hit conndscp running in ‘both’ mode.  Internally conndscp will go through the ’set’ check first, where it will do nothing because the ’statemask’ bit is unset.  Then it will go through the ‘get’ check, which it will go through, storing the DSCP into the mark and setting the ’statemask’ bit.  This is then passed to cake as before which uses the DSCP to do tin selection.

The ‘reply’ packet will come in on the ingress path.  There it will hit conndscp which will find the conntrack entry and hence the mark.  Conndscp is in ’set’ mode, so it will look at the ’statemask’ bit which is set and restore the mark stored DSCP into the diffserv field on the packet.  The packet is passed on to the cake which uses the now restored DSCP to do tin selection.

Subsequent egress packets will take this path:  It goes through iptables mangle table, postrouting but this time the conntrack mark has the ’statemask’ bit set, so it is NOT sent to the DSCP mangling rules.  Then it will hit conndscp running in ‘both’ mode.  As before internally it look at the ’set’ code first and because the ’statemask’ bit is now set, it will restore the DSCP contained in the mark to the egress packet.  The get action won’t run because the statemask bit is set.  The packet is passed on to cake which will use the (restored) DSCP to do tin selection.

The ingress path is exactly the same as before.

I suspect the subtlety is the ‘both’ action and its internal order of set -> get which allows the ‘one off’/’set forget’ type operation.

Does that help?


Cheers,

Kevin D-B

gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-20  8:38         ` Sebastian Moeller
  2019-03-20  9:01           ` Kevin Darbyshire-Bryant
@ 2019-03-20  9:06           ` Kevin Darbyshire-Bryant
  2019-03-20  9:24             ` Kevin Darbyshire-Bryant
  1 sibling, 1 reply; 18+ messages in thread
From: Kevin Darbyshire-Bryant @ 2019-03-20  9:06 UTC (permalink / raw)
  To: Sebastian Moeller; +Cc: cake, Ryan Mounce

Addendum: If not obvious.  There are two separate instances of ‘conndscp’, one on the egress path (in ‘both’ mode) and one on the ingress path (in ’set’ mode)

Cheers,

Kevin D-B

gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-20  9:06           ` Kevin Darbyshire-Bryant
@ 2019-03-20  9:24             ` Kevin Darbyshire-Bryant
  0 siblings, 0 replies; 18+ messages in thread
From: Kevin Darbyshire-Bryant @ 2019-03-20  9:24 UTC (permalink / raw)
  To: Sebastian Moeller; +Cc: cake

And another - sorry! - some stats/info

overlimits counts the number of packets that have had their DSCP overwritten/restored/set
requeues counts the number of times the ’statemask’ bit has been SET.

root@Router:~# tc -s filter show dev eth0
filter parent cacf: protocol all pref 10 u32 chain 0 
filter parent cacf: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1 
filter parent cacf: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 not_in_hw 
  match 00000000/00000000 at 0
	action order 1: conndscp zone 0 pipe
	 index 1 ref 1 bind 1 mask 0xfc000000 statemask 0x01000000 mode both installed 218013 sec used 0 sec
	Action statistics:
	Sent 1048008900 bytes 7586620 pkt (dropped 0, overlimits 5263898 requeues 87634) 
	backlog 0b 0p requeues 87634

So the above shows that 5263898 packets had their DSCP values set based on a stored DSCP value and thus avoided going through the iptables rules.  87634 packets set that stored value.


root@Router:~# tc -s filter show dev eth0 ingress
filter parent ffff: protocol all pref 10 u32 chain 0 
filter parent ffff: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1 
filter parent ffff: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 not_in_hw 
  match 00000000/00000000 at 0
	action order 1: conndscp zone 0 pipe
	 index 2 ref 1 bind 1 mask 0xfc000000 statemask 0x01000000 mode set installed 218027 sec used 0 sec
	Action statistics:
	Sent 7942289574 bytes 9601486 pkt (dropped 0, overlimits 6153697 requeues 0) 
	backlog 0b 0p requeues 0

	action order 2: mirred (Egress Redirect to device ifb4eth0) stolen
 	index 1 ref 1 bind 1 installed 218027 sec used 0 sec
 	Action statistics:
	Sent 7942289574 bytes 9601486 pkt (dropped 0, overlimits 0 requeues 0) 
	backlog 0b 0p requeues 0

The above shows that 6153697 packets had their DSCP values restored from the stored mark value.  Note DSCPs are only restored if they’re actually different from the current stored value, so a default DSCP of 0 on an egress path is unlikely to generate a whole load of unnecessary DSCP overwriting on the ingress path.

Does any of this help?

Kevin

> On 20 Mar 2019, at 09:06, Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> wrote:
> 
> Addendum: If not obvious.  There are two separate instances of ‘conndscp’, one on the egress path (in ‘both’ mode) and one on the ingress path (in ’set’ mode)
> 
> Cheers,
> 
> Kevin D-B
> 
> gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A
> 
> _______________________________________________
> Cake mailing list
> Cake@lists.bufferbloat.net
> https://lists.bufferbloat.net/listinfo/cake


Cheers,

Kevin D-B

gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-20  9:01           ` Kevin Darbyshire-Bryant
@ 2019-03-20  9:54             ` Sebastian Moeller
  2019-03-20 10:15               ` Kevin Darbyshire-Bryant
  0 siblings, 1 reply; 18+ messages in thread
From: Sebastian Moeller @ 2019-03-20  9:54 UTC (permalink / raw)
  To: Kevin Darbyshire-Bryant; +Cc: cake, Ryan Mounce

Hi Kevin,

thanks for the information!

> On Mar 20, 2019, at 10:01, Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> wrote:
> 
> 
> 
>> On 20 Mar 2019, at 08:38, Sebastian Moeller <moeller0@gmx.de> wrote:
>> 
>> Hi Kevin,
>> 
>> Impressive! I had a look at your_layer_cake.qos, and with half the brain at my disposal currently, I am confused. I had thought the idea is to set dscp marks on internal hosts or the LAN interface ofva router and copy those to incoming packets of the same flow, but you seem to set dscps in ingress. What am missing?
>> I ask because I fully bought your cool-aid ;) I want a "mode" for sqm scripts where easy to set and control egress dscp from internal hosts is also used for ingress packets of the same flows. I also bought your argument to preferably only do that once per flow hook line and sinker.
>> 
>> AFAICT this is one feature that would solve a lot of issues regarding dscps in home networks. Especially in the light of how easy it turned out to dscp mark packets on windows10, and a lot of the potential dscp users come from the gaming crowd and need something that works on Windows. Sidenote, I really like how easy win10 makes it to dscp marks all egress packets of a given binary, I wish I knew a similarly straightforward way to do this in Linux and macosx....
>> 
>> Thanks for this cool feature….
> 
> Ha, ok, probably not helped by my commit message having get & set swapped with regards to the typical usage comments.  I’ll try to go through it in context of my layer cake script.
> 
> 
> Egress is packet leaving router on wan interface to ‘Internet’
> Ingress is packet arriving at router on wan interface from ‘Internet’
> 
> Egress packet goes through iptables mangle table, postrouting.  It doesn’t have ’statemask’ bit set so is sent to the DSCP mangling rule where it may have had the DSCP changed..it doesn’t matter.  Then it will hit conndscp running in ‘both’ mode.  Internally conndscp will go through the ’set’ check first, where it will do nothing because the ’statemask’ bit is unset.  Then it will go through the ‘get’ check, which it will go through, storing the DSCP into the mark and setting the ’statemask’ bit.  This is then passed to cake as before which uses the DSCP to do tin selection.
> 
> The ‘reply’ packet will come in on the ingress path.  There it will hit conndscp which will find the conntrack entry and hence the mark.  Conndscp is in ’set’ mode, so it will look at the ’statemask’ bit which is set and restore the mark stored DSCP into the diffserv field on the packet.  The packet is passed on to the cake which uses the now restored DSCP to do tin selection.
> 
> Subsequent egress packets will take this path:  It goes through iptables mangle table, postrouting but this time the conntrack mark has the ’statemask’ bit set, so it is NOT sent to the DSCP mangling rules.  Then it will hit conndscp running in ‘both’ mode.  As before internally it look at the ’set’ code first and because the ’statemask’ bit is now set, it will restore the DSCP contained in the mark to the egress packet.  The get action won’t run because the statemask bit is set.  The packet is passed on to cake which will use the (restored) DSCP to do tin selection.

	Ah, but why is that necessary, why not simply keep the DSCP on the packet as is? Do you want to make sure that packet-captures on wan will show the effective DSCP in case that differs from the application set DSCP?


> 
> The ingress path is exactly the same as before.
> 
> I suspect the subtlety is the ‘both’ action and its internal order of set -> get which allows the ‘one off’/’set forget’ type operation.

	Much simpler, was/am puzzeled about lines like:
iptables -t mangle -A QOS_MARK_${IFACE} -p tcp -s 192.168.219.5 -m comment --comment "Skybox DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1

in the ingress section. with -s (source?) 192.168.219.5  this looks like it is processed post-cake (due to ifb preceding iptables), so the packet looks like it already is in the internal network, as if you would override the DSCP mark just set by conndscp. That surely seems like a wrng interpretation, so I would appreciate if you could walk me through the subtleties here. Thank you very much in advance! Or am I just daft and this truly is intended to mark outgoing packets and simply kives inside the ingress() function because it does not really amtter as long as both shapers are set to rates >0?


> 
> Does that help?

	Yes, just not all the way, as I said half a brain ATM (aka a cold).

Best Regards
	Sebastian

> 
> 
> Cheers,
> 
> Kevin D-B
> 
> gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-20  9:54             ` Sebastian Moeller
@ 2019-03-20 10:15               ` Kevin Darbyshire-Bryant
  2019-03-22 21:24                 ` Kevin Darbyshire-Bryant
  0 siblings, 1 reply; 18+ messages in thread
From: Kevin Darbyshire-Bryant @ 2019-03-20 10:15 UTC (permalink / raw)
  To: Sebastian Moeller; +Cc: cake, Ryan Mounce



> On 20 Mar 2019, at 09:54, Sebastian Moeller <moeller0@gmx.de> wrote:
> 
> Hi Kevin,
> 
> thanks for the information!
> 
>> On Mar 20, 2019, at 10:01, Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> wrote:
>> 
>> 
>> 
>>> On 20 Mar 2019, at 08:38, Sebastian Moeller <moeller0@gmx.de> wrote:
>>> 
>>> Hi Kevin,
>>> 
>>> Impressive! I had a look at your_layer_cake.qos, and with half the brain at my disposal currently, I am confused. I had thought the idea is to set dscp marks on internal hosts or the LAN interface ofva router and copy those to incoming packets of the same flow, but you seem to set dscps in ingress. What am missing?
>>> I ask because I fully bought your cool-aid ;) I want a "mode" for sqm scripts where easy to set and control egress dscp from internal hosts is also used for ingress packets of the same flows. I also bought your argument to preferably only do that once per flow hook line and sinker.
>>> 
>>> AFAICT this is one feature that would solve a lot of issues regarding dscps in home networks. Especially in the light of how easy it turned out to dscp mark packets on windows10, and a lot of the potential dscp users come from the gaming crowd and need something that works on Windows. Sidenote, I really like how easy win10 makes it to dscp marks all egress packets of a given binary, I wish I knew a similarly straightforward way to do this in Linux and macosx....
>>> 
>>> Thanks for this cool feature….
>> 
>> Ha, ok, probably not helped by my commit message having get & set swapped with regards to the typical usage comments.  I’ll try to go through it in context of my layer cake script.
>> 
>> 
>> Egress is packet leaving router on wan interface to ‘Internet’
>> Ingress is packet arriving at router on wan interface from ‘Internet’
>> 
>> Egress packet goes through iptables mangle table, postrouting.  It doesn’t have ’statemask’ bit set so is sent to the DSCP mangling rule where it may have had the DSCP changed..it doesn’t matter.  Then it will hit conndscp running in ‘both’ mode.  Internally conndscp will go through the ’set’ check first, where it will do nothing because the ’statemask’ bit is unset.  Then it will go through the ‘get’ check, which it will go through, storing the DSCP into the mark and setting the ’statemask’ bit.  This is then passed to cake as before which uses the DSCP to do tin selection.
>> 
>> The ‘reply’ packet will come in on the ingress path.  There it will hit conndscp which will find the conntrack entry and hence the mark.  Conndscp is in ’set’ mode, so it will look at the ’statemask’ bit which is set and restore the mark stored DSCP into the diffserv field on the packet.  The packet is passed on to the cake which uses the now restored DSCP to do tin selection.
>> 
>> Subsequent egress packets will take this path:  It goes through iptables mangle table, postrouting but this time the conntrack mark has the ’statemask’ bit set, so it is NOT sent to the DSCP mangling rules.  Then it will hit conndscp running in ‘both’ mode.  As before internally it look at the ’set’ code first and because the ’statemask’ bit is now set, it will restore the DSCP contained in the mark to the egress packet.  The get action won’t run because the statemask bit is set.  The packet is passed on to cake which will use the (restored) DSCP to do tin selection.
> 
> 	Ah, but why is that necessary, why not simply keep the DSCP on the packet as is? Do you want to make sure that packet-captures on wan will show the effective DSCP in case that differs from the application set DSCP?

Because if you don’t do that then you have to send every egress packet through the DSCP marking chain.  It is a compromise between dynamic DSCP and having to go through a (possibly) complicated iptables mangle chain vs a ‘one shot DSCP set’ and not hitting iptables chains as much.

You can do ‘dynamic’ dscp if you like - use a statemask of ‘0’, that way every DSCP capable packet is stored into the mark and the last value would be restored.



> 
> 
>> 
>> The ingress path is exactly the same as before.
>> 
>> I suspect the subtlety is the ‘both’ action and its internal order of set -> get which allows the ‘one off’/’set forget’ type operation.
> 
> 	Much simpler, was/am puzzeled about lines like:
> iptables -t mangle -A QOS_MARK_${IFACE} -p tcp -s 192.168.219.5 -m comment --comment "Skybox DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
> 
> in the ingress section. with -s (source?) 192.168.219.5  this looks like it is processed post-cake (due to ifb preceding iptables), so the packet looks like it already is in the internal network, as if you would override the DSCP mark just set by conndscp. That surely seems like a wrng interpretation, so I would appreciate if you could walk me through the subtleties here. Thank you very much in advance! Or am I just daft and this truly is intended to mark outgoing packets and simply kives inside the ingress() function because it does not really amtter as long as both shapers are set to rates >0?
> 

Bear in mind the ‘-o’ of ipt -t mangle -A POSTROUTING -o $IFACE -m connmark --mark 0x00000000/0x01000000 -g QOS_MARK_${IFACE}

AFAIUI the ingress packets will have an ‘-i’ incoming interface of eth0, not an ‘-o’ outgoing interface of eth0, so I don’t think these rules are hit.  Besides, the only thing that is going to set the ’statemask’ bit is conndscp on the egress path.  The mangle rules don’t set connmarks.  They only play with DSCP values.

The rule you point out is basically trapping my sky satellite RX into ‘Bulk’ since it doesn’t stream and all ‘on-demand’ services are downloads.  I don’t care how long it takes to download but I want it to lose out to any other normal priority downloads I’m doing on other systems at the time.

The point is to retain DSCP values as the key into CAKE’s tin selection (or any other qdisc for that matter) rather than abstract that into a CAKE only fwmark.  CAKE’s fwmark definitely still has its place and indeed I still think it should have an option to store its own tin selection in fwmarks - that would satisfy a lot of customers.  Conndscp is qdisc agnostic so has the potential of wider use/appeal.

> 
>> 
>> Does that help?
> 
> 	Yes, just not all the way, as I said half a brain ATM (aka a cold).

Happier now other than the cold?

> 
> Best Regards
> 	Sebastian
> 
>> 
>> 
>> Cheers,
>> 
>> Kevin D-B
>> 
>> gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A
> 


Cheers,

Kevin D-B

gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-20 10:15               ` Kevin Darbyshire-Bryant
@ 2019-03-22 21:24                 ` Kevin Darbyshire-Bryant
  2019-03-23 18:35                   ` Kevin Darbyshire-Bryant
  0 siblings, 1 reply; 18+ messages in thread
From: Kevin Darbyshire-Bryant @ 2019-03-22 21:24 UTC (permalink / raw)
  To: Sebastian Moeller; +Cc: cake

It looks like act_conndscp has been shot down by the kernel people, at least in its current form.  Setting a conntrack mark from tc is regarded as “not sure if it is a good idea”.  The other way (conntrack to skb) is fine.  That’s sort of good news in that ingress is the hard bit as it’s problematic with iptables.

egress is within iptables coverage - ‘just’ need a way to store a DSCP & flag to conntrack mark.


Cheers,

Kevin D-B

gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-22 21:24                 ` Kevin Darbyshire-Bryant
@ 2019-03-23 18:35                   ` Kevin Darbyshire-Bryant
  2019-04-01 14:07                     ` Kevin Darbyshire-Bryant
  0 siblings, 1 reply; 18+ messages in thread
From: Kevin Darbyshire-Bryant @ 2019-03-23 18:35 UTC (permalink / raw)
  To: Sebastian Moeller; +Cc: cake

[-- Attachment #1: Type: text/plain, Size: 1218 bytes --]



> On 22 Mar 2019, at 21:24, Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> wrote:
> 
> It looks like act_conndscp has been shot down by the kernel people, at least in its current form.  Setting a conntrack mark from tc is regarded as “not sure if it is a good idea”.  The other way (conntrack to skb) is fine.  That’s sort of good news in that ingress is the hard bit as it’s problematic with iptables.
> 
> egress is within iptables coverage - ‘just’ need a way to store a DSCP & flag to conntrack mark.

Never give in, never surrender.

Hacked together an iptables connmark extension that saves the DSCP (and optional status bit/s) to the conntrack mark ready for the ’set’ part of the tc conndscp action.  So we have the two parts of the operation happening across two different subsystems (iptables for the DSCP->connmark - tc action for the connmark -> DSCP)

Two patches - one kernel space and possibly tolerable.  One user space which is an iptables copy&paste abomination but it *does* work on my openwrt router.

And yet another version of ‘my_layer_cake’ showing how I use it.


Cheers,

Kevin D-B

gpg: 012C ACB2 28C6 C53E 9775  9123 B3A2 389B 9DE2 334A

[-- Attachment #2: 0001-xt_connmark-savedscp.patch --]
[-- Type: application/octet-stream, Size: 2533 bytes --]

From af04207aa32d88f7b2604134c00d97915cae04c1 Mon Sep 17 00:00:00 2001
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Date: Sat, 23 Mar 2019 09:29:49 +0000
Subject: [PATCH] xt_connmark savedscp

Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
---
 include/uapi/linux/netfilter/xt_connmark.h |  3 +-
 net/netfilter/xt_connmark.c                | 34 +++++++++++++++++++++-
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h
index 408a9654f05c..e63ad3c89b92 100644
--- a/include/uapi/linux/netfilter/xt_connmark.h
+++ b/include/uapi/linux/netfilter/xt_connmark.h
@@ -16,7 +16,8 @@
 enum {
 	XT_CONNMARK_SET = 0,
 	XT_CONNMARK_SAVE,
-	XT_CONNMARK_RESTORE
+	XT_CONNMARK_RESTORE,
+	XT_CONNMARK_SAVEDSCP
 };
 
 struct xt_connmark_tginfo1 {
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index ec377cc6a369..73004a111055 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -42,6 +42,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 	u_int32_t newmark;
+	u8 dscp, maskshift;
 
 	ct = nf_ct_get(skb, &ctinfo);
 	if (ct == NULL)
@@ -57,7 +58,37 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		break;
 	case XT_CONNMARK_SAVE:
 		newmark = (ct->mark & ~info->ctmask) ^
-		          (skb->mark & info->nfmask);
+			  (skb->mark & info->nfmask);
+		if (ct->mark != newmark) {
+			ct->mark = newmark;
+			nf_conntrack_event_cache(IPCT_MARK, ct);
+		}
+		break;
+	case XT_CONNMARK_SAVEDSCP:
+		if (!info->ctmask)
+			goto out;
+
+		if (skb->protocol == htons(ETH_P_IP)) {
+			if (skb->len < sizeof(struct iphdr))
+				goto out;
+
+			dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+
+		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+			if (skb->len < sizeof(struct ipv6hdr))
+				goto out;
+
+			dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+
+		} else { /* protocol doesn't have diffserv - get out! */
+			goto out;
+		}
+
+		/* nfmask contains the mask shift value */
+		maskshift = info->nfmask & 0x1f;
+		newmark = (ct->mark & ~info->ctmask) ^
+			  (info->ctmark | (dscp << maskshift));
+
 		if (ct->mark != newmark) {
 			ct->mark = newmark;
 			nf_conntrack_event_cache(IPCT_MARK, ct);
@@ -70,6 +101,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		break;
 	}
 
+out:
 	return XT_CONTINUE;
 }
 
-- 
2.17.2 (Apple Git-113)


[-- Attachment #3: 0001-savedscp.patch --]
[-- Type: application/octet-stream, Size: 8256 bytes --]

From d8010d1c1f6a3bfc7180ffb1634bdc3d63e18c13 Mon Sep 17 00:00:00 2001
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Date: Sat, 23 Mar 2019 10:21:03 +0000
Subject: [PATCH] savedscp

Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
---
 extensions/libxt_CONNMARK.c           | 75 ++++++++++++++++++++++++++-
 include/linux/netfilter/xt_connmark.h |  3 +-
 2 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/extensions/libxt_CONNMARK.c b/extensions/libxt_CONNMARK.c
index 21e10913..72a91f23 100644
--- a/extensions/libxt_CONNMARK.c
+++ b/extensions/libxt_CONNMARK.c
@@ -49,6 +49,7 @@ enum {
 	O_CTMASK,
 	O_NFMASK,
 	O_MASK,
+	O_SAVEDSCP_MARK,
 	F_SET_MARK         = 1 << O_SET_MARK,
 	F_SAVE_MARK        = 1 << O_SAVE_MARK,
 	F_RESTORE_MARK     = 1 << O_RESTORE_MARK,
@@ -61,8 +62,10 @@ enum {
 	F_CTMASK           = 1 << O_CTMASK,
 	F_NFMASK           = 1 << O_NFMASK,
 	F_MASK             = 1 << O_MASK,
+	F_SAVEDSCP_MARK	   = 1 << O_SAVEDSCP_MARK,
 	F_OP_ANY           = F_SET_MARK | F_SAVE_MARK | F_RESTORE_MARK |
-	                     F_AND_MARK | F_OR_MARK | F_XOR_MARK | F_SET_XMARK,
+	                     F_AND_MARK | F_OR_MARK | F_XOR_MARK | F_SET_XMARK |
+			     F_SAVEDSCP_MARK,
 };
 
 static const char *const xt_connmark_shift_ops[] = {
@@ -75,6 +78,7 @@ static void CONNMARK_help(void)
 	printf(
 "CONNMARK target options:\n"
 "  --set-mark value[/mask]       Set conntrack mark value\n"
+"  --savedscp-mark value/mask    Save DSCP to conntrack mark value\n"
 "  --save-mark [--mask mask]     Save the packet nfmark in the connection\n"
 "  --restore-mark [--mask mask]  Restore saved nfmark value\n");
 }
@@ -83,6 +87,8 @@ static void CONNMARK_help(void)
 static const struct xt_option_entry CONNMARK_opts[] = {
 	{.name = "set-mark", .id = O_SET_MARK, .type = XTTYPE_MARKMASK32,
 	 .excl = F_OP_ANY},
+	{.name = "savedscp-mark", .id = O_SAVEDSCP_MARK, .type = XTTYPE_MARKMASK32,
+	 .excl = F_OP_ANY},
 	{.name = "save-mark", .id = O_SAVE_MARK, .type = XTTYPE_NONE,
 	 .excl = F_OP_ANY},
 	{.name = "restore-mark", .id = O_RESTORE_MARK, .type = XTTYPE_NONE,
@@ -98,6 +104,8 @@ static const struct xt_option_entry connmark_tg_opts[] = {
 	 .excl = F_OP_ANY},
 	{.name = "set-mark", .id = O_SET_MARK, .type = XTTYPE_MARKMASK32,
 	 .excl = F_OP_ANY},
+	{.name = "savedscp-mark", .id = O_SAVEDSCP_MARK, .type = XTTYPE_MARKMASK32,
+	 .excl = F_OP_ANY},
 	{.name = "and-mark", .id = O_AND_MARK, .type = XTTYPE_UINT32,
 	 .excl = F_OP_ANY},
 	{.name = "or-mark", .id = O_OR_MARK, .type = XTTYPE_UINT32,
@@ -124,6 +132,8 @@ static const struct xt_option_entry connmark_tg_opts_v2[] = {
 	 .excl = F_OP_ANY},
 	{.name = "set-mark", .id = O_SET_MARK, .type = XTTYPE_MARKMASK32,
 	 .excl = F_OP_ANY},
+	{.name = "savedscp-mark", .id = O_SAVEDSCP_MARK, .type = XTTYPE_MARKMASK32,
+	 .excl = F_OP_ANY},
 	{.name = "and-mark", .id = O_AND_MARK, .type = XTTYPE_UINT32,
 	 .excl = F_OP_ANY},
 	{.name = "or-mark", .id = O_OR_MARK, .type = XTTYPE_UINT32,
@@ -158,6 +168,7 @@ static void connmark_tg_help(void)
 "  --restore-mark [--ctmask mask] [--nfmask mask]\n"
 "                                Copy nfmark to ctmark using masks\n"
 "  --set-mark value[/mask]       Set conntrack mark value\n"
+"  --savedscp-mark value/mask    Save DSCP to conntrack mark value\n"
 "  --save-mark [--mask mask]     Save the packet nfmark in the connection\n"
 "  --restore-mark [--mask mask]  Restore saved nfmark value\n"
 "  --and-mark value              Binary AND the ctmark with bits\n"
@@ -210,6 +221,11 @@ static void CONNMARK_parse(struct xt_option_call *cb)
 		markinfo->mark = cb->val.mark;
 		markinfo->mask = cb->val.mask;
 		break;
+	case O_SAVEDSCP_MARK:
+		markinfo->mode = XT_CONNMARK_SAVEDSCP;
+		markinfo->mark = cb->val.mark;
+		markinfo->mask = cb->val.mask;
+		break;
 	case O_SAVE_MARK:
 		markinfo->mode = XT_CONNMARK_SAVE;
 		break;
@@ -238,6 +254,14 @@ static void connmark_tg_parse(struct xt_option_call *cb)
 		info->ctmark = cb->val.mark;
 		info->ctmask = cb->val.mark | cb->val.mask;
 		break;
+	case O_SAVEDSCP_MARK:
+		info->mode   = XT_CONNMARK_SAVEDSCP;
+		info->ctmark = cb->val.mark;
+		info->ctmask = cb->val.mask;
+		info->nfmask = info->ctmask ? ffs(info->ctmask) - 1 : 0;
+		if ((0x3f & (info->ctmask >> info->nfmask)) != 0x3f)
+			info->ctmask = 0;
+		break;
 	case O_AND_MARK:
 		info->mode   = XT_CONNMARK_SET;
 		info->ctmark = 0;
@@ -283,6 +307,14 @@ static void connmark_tg_parse_v2(struct xt_option_call *cb)
 		info->ctmark = cb->val.mark;
 		info->ctmask = cb->val.mark | cb->val.mask;
 		break;
+	case O_SAVEDSCP_MARK:
+		info->mode   = XT_CONNMARK_SAVEDSCP;
+		info->ctmark = cb->val.mark;
+		info->ctmask = cb->val.mask;
+		info->nfmask = info->ctmask ? ffs(info->ctmask) - 1 : 0;
+		if ((0x3f & (info->ctmask >> info->nfmask)) != 0x3f)
+			info->ctmask = 0;
+		break;
 	case O_AND_MARK:
 		info->mode   = XT_CONNMARK_SET;
 		info->ctmark = 0;
@@ -351,6 +383,11 @@ static void CONNMARK_print(const void *ip,
 	    print_mark(markinfo->mark);
 	    print_mask("/", markinfo->mask);
 	    break;
+	case XT_CONNMARK_SAVEDSCP:
+	    printf(" CONNMARK savedscp ");
+	    print_mark(markinfo->mark);
+	    print_mask("/", markinfo->mask);
+	    break;
 	case XT_CONNMARK_SAVE:
 	    printf(" CONNMARK save ");
 	    print_mask("mask ", markinfo->mask);
@@ -386,6 +423,20 @@ connmark_tg_print(const void *ip, const struct xt_entry_target *target,
 			printf(" CONNMARK xset 0x%x/0x%x",
 			       info->ctmark, info->ctmask);
 		break;
+	case XT_CONNMARK_SAVEDSCP: /* FIXME */
+		if (info->ctmark == 0)
+			printf(" CONNMARK DSCP and 0x%x",
+			       (unsigned int)(uint32_t)~info->ctmask);
+		else if (info->ctmark == info->ctmask)
+			printf(" CONNMARK DSCP or 0x%x", info->ctmark);
+		else if (info->ctmask == 0)
+			printf(" CONNMARK DSCP xor 0x%x", info->ctmark);
+		else if (info->ctmask == 0xFFFFFFFFU)
+			printf(" CONNMARK DSCP set 0x%x", info->ctmark);
+		else
+			printf(" CONNMARK DSCP xset 0x%x/0x%x",
+			       info->ctmark, info->ctmask);
+		break;
 	case XT_CONNMARK_SAVE:
 		if (info->nfmask == UINT32_MAX && info->ctmask == UINT32_MAX)
 			printf(" CONNMARK save");
@@ -433,6 +484,20 @@ connmark_tg_print_v2(const void *ip, const struct xt_entry_target *target,
 			printf(" CONNMARK xset 0x%x/0x%x",
 			       info->ctmark, info->ctmask);
 		break;
+	case XT_CONNMARK_SAVEDSCP:
+		if (info->ctmark == 0)
+			printf(" CONNMARK DSCP and 0x%x",
+			       (unsigned int)(uint32_t)~info->ctmask);
+		else if (info->ctmark == info->ctmask)
+			printf(" CONNMARK DSCP or 0x%x", info->ctmark);
+		else if (info->ctmask == 0)
+			printf(" CONNMARK DSCP xor 0x%x", info->ctmark);
+		else if (info->ctmask == 0xFFFFFFFFU)
+			printf(" CONNMARK DSCP set 0x%x", info->ctmark);
+		else
+			printf(" CONNMARK DSCP xset 0x%x/0x%x",
+			       info->ctmark, info->ctmask);
+		break;
 	case XT_CONNMARK_SAVE:
 		if (info->nfmask == UINT32_MAX && info->ctmask == UINT32_MAX)
 			printf(" CONNMARK save");
@@ -474,6 +539,11 @@ static void CONNMARK_save(const void *ip, const struct xt_entry_target *target)
 	    print_mark(markinfo->mark);
 	    print_mask("/", markinfo->mask);
 	    break;
+	case XT_CONNMARK_SAVEDSCP:
+	    printf(" --savedscp-mark ");
+	    print_mark(markinfo->mark);
+	    print_mask("/", markinfo->mask);
+	    break;
 	case XT_CONNMARK_SAVE:
 	    printf(" --save-mark ");
 	    print_mask("--mask ", markinfo->mask);
@@ -505,6 +575,9 @@ connmark_tg_save(const void *ip, const struct xt_entry_target *target)
 	case XT_CONNMARK_SET:
 		printf(" --set-xmark 0x%x/0x%x", info->ctmark, info->ctmask);
 		break;
+	case XT_CONNMARK_SAVEDSCP:
+		printf(" --savedscp-mark 0x%x/0x%x", info->ctmark, info->ctmask);
+		break;
 	case XT_CONNMARK_SAVE:
 		printf(" --save-mark --nfmask 0x%x --ctmask 0x%x",
 		       info->nfmask, info->ctmask);
diff --git a/include/linux/netfilter/xt_connmark.h b/include/linux/netfilter/xt_connmark.h
index bbf2acc9..cf526101 100644
--- a/include/linux/netfilter/xt_connmark.h
+++ b/include/linux/netfilter/xt_connmark.h
@@ -15,7 +15,8 @@
 enum {
 	XT_CONNMARK_SET = 0,
 	XT_CONNMARK_SAVE,
-	XT_CONNMARK_RESTORE
+	XT_CONNMARK_RESTORE,
+	XT_CONNMARK_SAVEDSCP
 };
 
 struct xt_connmark_tginfo1 {
-- 
2.17.2 (Apple Git-113)


[-- Attachment #4: my_layer_cake.qos --]
[-- Type: application/octet-stream, Size: 6260 bytes --]

#!/bin/sh
# Cero3 Shaper
# A cake shaper and AQM solution that allows several diffserv marking schemes
# for ethernet gateways

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
#       Copyright (C) 2012-5 Michael D. Taht, Toke Høiland-Jørgensen, Sebastian Moeller


#sm: TODO pass in the cake diffserv keyword

. ${SQM_LIB_DIR}/defaults.sh
QDISC=cake

# Default traffic classication is passed in INGRESS_CAKE_OPTS and EGRESS_CAKE_OPTS, defined in defaults.sh now

egress() {
    SILENT=1 $TC qdisc del dev $IFACE root
    $TC qdisc add dev $IFACE root handle cacf: $( get_stab_string ) cake \
        bandwidth ${UPLINK}kbit $( get_cake_lla_string ) ${EGRESS_CAKE_OPTS} ${EQDISC_OPTS}

    # set DSCP from the stored connmark.
    # this seems counter intuitive but it ensures once the mark is set that all
    # subsequent egress packets have the same stored DSCP avoiding iptables rules
    # to mark every packet, conndscp does it for us and then CAKE is happy using the
    # DSCP
    $TC filter add dev $IFACE protocol all prio 10 u32 match u32 0 0 flowid 1:1 action \
	conndscp mask 0xfc000000 statemask 0x01000000 mode set
}


ingress() {

    SILENT=1 $TC qdisc del dev $IFACE handle ffff: ingress
    $TC qdisc add dev $IFACE handle ffff: ingress

    SILENT=1 $TC qdisc del dev $DEV root

    [ "$IGNORE_DSCP_INGRESS" -eq "1" ] && INGRESS_CAKE_OPTS="$INGRESS_CAKE_OPTS besteffort"
    [ "$ZERO_DSCP_INGRESS" -eq "1" ] && INGRESS_CAKE_OPTS="$INGRESS_CAKE_OPTS wash"

    $TC qdisc add dev $DEV root handle cace: $( get_stab_string ) cake \
        bandwidth ${DOWNLINK}kbit $( get_cake_lla_string ) ${INGRESS_CAKE_OPTS} ${IQDISC_OPTS}

    $IP link set dev $DEV up

    # redirect all IP packets arriving in $IFACE to ifb0
    # set DSCP from conntrack mark
    $TC filter add dev $IFACE parent ffff: protocol all prio 10 u32 \
	match u32 0 0 flowid 1:1 action \
	conndscp mask 0xfc000000 statemask 0x01000000 mode set \
	mirred egress redirect dev $DEV

    # Configure iptables chain to mark packets
    ipt -t mangle -N QOS_MARK_${IFACE}

    # Change DSCP of relevant hosts/packets - this will be picked up by cake+ and placed in the firewall connmark
    # also the DSCP is used as the tin selector.

iptables -t mangle -A QOS_MARK_${IFACE} -p tcp -s 192.168.219.5 -m comment --comment "Skybox DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
iptables -t mangle -A QOS_MARK_${IFACE} -p udp -s 192.168.219.5 -m comment --comment "Skybox DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
iptables -t mangle -A QOS_MARK_${IFACE} -p tcp -s 192.168.219.10 -m comment --comment "Bluray DSCP CS3 Video" -j DSCP --set-dscp-class CS3
iptables -t mangle -A QOS_MARK_${IFACE} -p udp -s 192.168.219.10 -m comment --comment "Bluray DSCP CS3 Video" -j DSCP --set-dscp-class CS3
iptables -t mangle -A QOS_MARK_${IFACE} -p tcp -s 192.168.219.12 -m tcp --sport 6981 -m comment --comment "BT DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
iptables -t mangle -A QOS_MARK_${IFACE} -p udp -s 192.168.219.12 -m udp --sport 6981 -m comment --comment "BT DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
iptables -t mangle -A QOS_MARK_${IFACE} -p tcp -s 192.168.219.12 -m tcp --dport 4443 -m comment --comment "BT DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
#iptables -t mangle -A QOS_MARK_${IFACE} -p tcp -s 192.168.219.12 -m tcp --dport 443 -m comment --comment "HTTPS uploads DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1

iptables -t mangle -A QOS_MARK_${IFACE} -m set --match-set Bulk4  dst -j DSCP --set-dscp-class CS1 -m comment --comment "Bulk CS1 ipset"
iptables -t mangle -A QOS_MARK_${IFACE} -m set --match-set Vid4   dst -j DSCP --set-dscp-class CS3 -m comment --comment "Vid CS3 ipset"
iptables -t mangle -A QOS_MARK_${IFACE} -m set --match-set Voice4 dst -j DSCP --set-dscp-class CS4 -m comment --comment "Voice CS4 ipset"

ip6tables -t mangle -A QOS_MARK_${IFACE} -p tcp -s ::c/::ffff:ffff:ffff:ffff -m tcp --sport 6981 -m comment --comment "BT DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
ip6tables -t mangle -A QOS_MARK_${IFACE} -p udp -s ::c/::ffff:ffff:ffff:ffff -m udp --sport 6981 -m comment --comment "BT DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
ip6tables -t mangle -A QOS_MARK_${IFACE} -p tcp -s ::c/::ffff:ffff:ffff:ffff -m tcp --dport 4443 -m comment --comment "BT DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1
#ip6tables -t mangle -A QOS_MARK_${IFACE} -p tcp -s ::c/::ffff:ffff:ffff:ffff -m tcp --dport 443 -m comment --comment "HTTPS uploads DSCP CS1 Bulk" -j DSCP --set-dscp-class CS1

ip6tables -t mangle -A QOS_MARK_${IFACE} -m set --match-set Bulk6  dst -j DSCP --set-dscp-class CS1 -m comment --comment "Bulk CS1 ipset"
ip6tables -t mangle -A QOS_MARK_${IFACE} -m set --match-set Vid6 dst -j DSCP --set-dscp-class CS3 -m comment --comment "Vid CS3 ipset"
ip6tables -t mangle -A QOS_MARK_${IFACE} -m set --match-set Voice6 dst -j DSCP --set-dscp-class CS4 -m comment --comment "Voice CS4 ipset"

ipt -A QOS_MARK_eth0 -t mangle -j CONNMARK --savedscp-mark 0x01000000/0xfc000000
    # Send cake+ unmarked connections to the marking chain - Cake+ uses top byte as the
    # i've been marked & here's the dscp placeholder. 
    # top 6 bits are DSCP, LSB is DSCP is valid flag
#    ipt -t mangle -A PREROUTING  -i $IFACE -m connmark --mark 0x00000000/0x01000000 -g QOS_MARK_${IFACE}
    ipt -t mangle -A POSTROUTING -o $IFACE -m connmark --mark 0x00000000/0x01000000 -g QOS_MARK_${IFACE}

}

sqm_start() {
    [ -n "$IFACE" ] || return 1
    do_modules
    verify_qdisc $QDISC "cake" || return 1
    sqm_debug "Starting ${SCRIPT}"

    [ -z "$DEV" ] && DEV=$( get_ifb_for_if ${IFACE} )

    if [ "${UPLINK}" -ne 0 ];
    then
        egress
        sqm_debug "egress shaping activated"
    else
        sqm_debug "egress shaping deactivated"
        SILENT=1 $TC qdisc del dev ${IFACE} root
    fi
    if [ "${DOWNLINK}" -ne 0 ];
    then
	verify_qdisc ingress "ingress" || return 1
        ingress
        sqm_debug "ingress shaping activated"
    else
        sqm_debug "ingress shaping deactivated"
        SILENT=1 $TC qdisc del dev ${DEV} root
        SILENT=1 $TC qdisc del dev ${IFACE} ingress
    fi

    return 0
}

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-03-23 18:35                   ` Kevin Darbyshire-Bryant
@ 2019-04-01 14:07                     ` Kevin Darbyshire-Bryant
  2019-04-01 23:52                       ` Ryan Mounce
  0 siblings, 1 reply; 18+ messages in thread
From: Kevin Darbyshire-Bryant @ 2019-04-01 14:07 UTC (permalink / raw)
  To: Sebastian Moeller; +Cc: cake



> On 23 Mar 2019, at 18:35, Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> wrote:
> 
> 
> 
>> On 22 Mar 2019, at 21:24, Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> wrote:
>> 
>> It looks like act_conndscp has been shot down by the kernel people, at least in its current form.  Setting a conntrack mark from tc is regarded as “not sure if it is a good idea”.  The other way (conntrack to skb) is fine.  That’s sort of good news in that ingress is the hard bit as it’s problematic with iptables.
>> 
>> egress is within iptables coverage - ‘just’ need a way to store a DSCP & flag to conntrack mark.
> 
> Never give in, never surrender.
> 

Given in.  Surrendered.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Cake] act_conndscp
  2019-04-01 14:07                     ` Kevin Darbyshire-Bryant
@ 2019-04-01 23:52                       ` Ryan Mounce
  0 siblings, 0 replies; 18+ messages in thread
From: Ryan Mounce @ 2019-04-01 23:52 UTC (permalink / raw)
  To: Kevin Darbyshire-Bryant; +Cc: Sebastian Moeller, cake

On Tue, 2 Apr 2019 at 00:37, Kevin Darbyshire-Bryant
<kevin@darbyshire-bryant.me.uk> wrote:
>
>
>
> > On 23 Mar 2019, at 18:35, Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> wrote:
> >
> >
> >
> >> On 22 Mar 2019, at 21:24, Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> wrote:
> >>
> >> It looks like act_conndscp has been shot down by the kernel people, at least in its current form.  Setting a conntrack mark from tc is regarded as “not sure if it is a good idea”.  The other way (conntrack to skb) is fine.  That’s sort of good news in that ingress is the hard bit as it’s problematic with iptables.
> >>
> >> egress is within iptables coverage - ‘just’ need a way to store a DSCP & flag to conntrack mark.
> >
> > Never give in, never surrender.
> >
>
> Given in.  Surrendered.

:(

FWIW I've applied an updated patch with your iptables 'abomination'
and it's been working without issue.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2019-04-01 23:52 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-19 20:08 [Cake] act_conndscp Kevin Darbyshire-Bryant
2019-03-19 21:24 ` Ryan Mounce
2019-03-19 21:27   ` Kevin Darbyshire-Bryant
2019-03-19 21:41     ` Toke Høiland-Jørgensen
2019-03-19 21:51       ` Kevin Darbyshire-Bryant
2019-03-19 21:59         ` Toke Høiland-Jørgensen
2019-03-20  3:31     ` Ryan Mounce
2019-03-20  8:25       ` Kevin Darbyshire-Bryant
2019-03-20  8:38         ` Sebastian Moeller
2019-03-20  9:01           ` Kevin Darbyshire-Bryant
2019-03-20  9:54             ` Sebastian Moeller
2019-03-20 10:15               ` Kevin Darbyshire-Bryant
2019-03-22 21:24                 ` Kevin Darbyshire-Bryant
2019-03-23 18:35                   ` Kevin Darbyshire-Bryant
2019-04-01 14:07                     ` Kevin Darbyshire-Bryant
2019-04-01 23:52                       ` Ryan Mounce
2019-03-20  9:06           ` Kevin Darbyshire-Bryant
2019-03-20  9:24             ` Kevin Darbyshire-Bryant

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox