It is highly unlikely these patches would be accepted by upstream as allocating a skb in the fast path is probably a no-no. However at least part of them seem generically useful - instrumenting pfifo_fast in particular - and certainly helpful for debugging in general. <br>
<br>I had hoped it would be possible to do this without mucking with locking, tho. :(<br><br>In my own case I'd like to have a qdisc_drop_with_stats(skb,sch,"fq_codel",reason) that could be compiled in or out easily, sending the packet headers of the dropped packet to a listener much as you do here. This would be useful for monitoring, have potential as a routing metric, and make it easier to analyze the behavior of various qdisc types, without having to poll and lock via tc.<br>
<br>Also as a drop takes place only when the card is producing delays, it would be possible to get a bandwidth estimate over smaller intervals from it easily, and be less invasive overall...<br><br>So thanks for digging into netlink so much and having a sample program. I almost get it now...<br>
<br>A couple more comments below:<br><br><div class="gmail_quote">On Mon, Jun 10, 2013 at 5:39 AM, Toke Høiland-Jørgensen <span dir="ltr"><<a href="mailto:toke@toke.dk" target="_blank">toke@toke.dk</a>></span> wrote:<br>
<br>Email syntax for a RFC PATCH is RFC PATCH, not PATCH RFC<br><br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">---<br>
include/uapi/linux/rtnetlink.h | 15 ++++++---<br>
net/core/gen_stats.c | 6 ++--<br>
net/sched/Kconfig | 8 +++++<br>
net/sched/sch_generic.c | 71 ++++++++++++++++++++++++++++++++++++++++++<br>
4 files changed, 93 insertions(+), 7 deletions(-)<br>
<br>
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h<br>
index 7a2144e..e9d91c5 100644<br>
--- a/include/uapi/linux/rtnetlink.h<br>
+++ b/include/uapi/linux/rtnetlink.h<br>
@@ -68,6 +68,9 @@ enum {<br>
RTM_GETQDISC,<br>
#define RTM_GETQDISC RTM_GETQDISC<br>
<br>
+ RTM_QDISC_STATS,<br>
+#define RTM_QDISC_STATS RTM_QDISC_STATS<br>
+<br></blockquote><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
RTM_NEWTCLASS = 40,<br>
#define RTM_NEWTCLASS RTM_NEWTCLASS<br>
RTM_DELTCLASS,<br>
@@ -140,7 +143,7 @@ enum {<br>
#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2)<br>
#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2)<br>
<br>
-/*<br>
+/*<br>
Generic structure for encapsulation of optional route information.<br>
It is reminiscent of sockaddr, but with sa_family replaced<br>
with attribute type.<br>
@@ -180,7 +183,7 @@ struct rtmsg {<br>
<br>
unsigned char rtm_table; /* Routing table id */<br>
unsigned char rtm_protocol; /* Routing protocol; see below */<br>
- unsigned char rtm_scope; /* See below */<br>
+ unsigned char rtm_scope; /* See below */<br>
unsigned char rtm_type; /* See below */<br>
<br></blockquote><div><br>unnecessary change.<br> <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
unsigned rtm_flags;<br>
@@ -450,7 +453,7 @@ struct ifinfomsg {<br>
};<br>
<br>
/********************************************************************<br>
- * prefix information<br>
+ * prefix information<br>
****/<br>
<br></blockquote><div><br>unnecessary change. try to touch as little as possible... There is an emacs mode for linux indentation, btw<br>which helps avoid annoyingly messing up existing code.<br><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
struct prefixmsg {<br>
@@ -464,7 +467,7 @@ struct prefixmsg {<br>
unsigned char prefix_pad3;<br>
};<br>
<br>
-enum<br>
+enum<br>
{<br>
PREFIX_UNSPEC,<br>
PREFIX_ADDRESS,<br>
@@ -571,6 +574,8 @@ enum rtnetlink_groups {<br>
#define RTNLGRP_NEIGH RTNLGRP_NEIGH<br>
RTNLGRP_TC,<br>
#define RTNLGRP_TC RTNLGRP_TC<br>
+ RTNLGRP_TC_STATS,<br>
+#define RTNLGRP_TC_STATS RTNLGRP_TC_STATS<br>
RTNLGRP_IPV4_IFADDR,<br>
#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR<br>
RTNLGRP_IPV4_MROUTE,<br></blockquote><div><br>It's not clear to me where this stuff enters userspace or not, and in general it's<br>safer to append new enums/defines rather than insert them.<br> <br></div>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
@@ -625,7 +630,7 @@ struct tcamsg {<br>
};<br>
#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))<br>
#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))<br>
-#define TCA_ACT_TAB 1 /* attr type must be >=1 */<br>
+#define TCA_ACT_TAB 1 /* attr type must be >=1 */<br>
#define TCAA_MAX 1<br></blockquote><div><br>Uneeeded change...<br> <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
/* New extended info filters for IFLA_EXT_MASK */<br>
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c<br>
index ddedf21..68df614 100644<br>
--- a/net/core/gen_stats.c<br>
+++ b/net/core/gen_stats.c<br>
@@ -61,7 +61,8 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,<br>
{<br>
memset(d, 0, sizeof(*d));<br>
<br>
- spin_lock_bh(lock);<br>
+ if(lock)<br>
+ spin_lock_bh(lock);<br></blockquote><div><br>So this acted up on a SMP kernel?<br> <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
d->lock = lock;<br>
if (type)<br>
d->tail = (struct nlattr *)skb_tail_pointer(skb);<br>
@@ -245,7 +246,8 @@ gnet_stats_finish_copy(struct gnet_dump *d)<br>
return -1;<br>
}<br>
<br>
- spin_unlock_bh(d->lock);<br>
+ if(d->lock)<br>
+ spin_unlock_bh(d->lock);<br>
return 0;<br>
}<br>
EXPORT_SYMBOL(gnet_stats_finish_copy);<br>
diff --git a/net/sched/Kconfig b/net/sched/Kconfig<br>
index 235e01a..4902812 100644<br>
--- a/net/sched/Kconfig<br>
+++ b/net/sched/Kconfig<br>
@@ -308,6 +308,14 @@ config NET_SCH_PLUG<br>
To compile this code as a module, choose M here: the<br>
module will be called sch_plug.<br>
<br>
+config NET_SCH_BROADCAST_STATS<br>
+ bool "Enable Qdisc statistics broadcast"<br>
+ ---help---<br>
+<br>
+ Select this option if you want to enable qdisc stats broadcast through<br>
+ netlink multicast. Broadcast happens on packet dequeue, limited to the<br>
+ interval set by the <something> sysctl parameter.<br>
+<br>
comment "Classification"<br></blockquote><div><br>QDISC_BROADCAST_DEQUEUE_STATS maybe? <br><br>Elsewhere you've defined the sysctl. Would argue for sysfs...<br> <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
config NET_CLS<br>
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c<br>
index eac7e0e..56490a1 100644<br>
--- a/net/sched/sch_generic.c<br>
+++ b/net/sched/sch_generic.c<br>
@@ -28,6 +28,7 @@<br>
#include <net/sch_generic.h><br>
#include <net/pkt_sched.h><br>
#include <net/dst.h><br>
+#include <net/netlink.h><br>
<br>
/* Main transmission queue. */<br>
<br>
@@ -148,6 +149,73 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,<br>
return ret;<br>
}<br>
<br>
+#ifdef CONFIG_NET_SCH_BROADCAST_STATS<br>
+int qdisc_broadcast_stats(struct Qdisc *q)<br>
+{<br>
+ struct tcmsg *tcm;<br>
+ struct nlmsghdr *nlh;<br>
+ struct gnet_dump d;<br>
+ struct sk_buff *skb;<br>
+ unsigned char *b;<br>
+<br>
+ if(!q->dev_queue || !q->dev_queue->dev)<br>
+ return 0;<br>
<br></blockquote><div><br>Is there a way to also merely detect if there is a listener present?<br> <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ printk(KERN_DEBUG "Packet dequeue, ifname %s(%d), qdisc %s, qlen %d\n",<br>
+ qdisc_dev(q)->name,<br>
+ qdisc_dev(q)->ifindex,<br>
+ q->ops->id,<br>
+ q->qstats.qlen);<br></blockquote><div><br>Useful for debugging, but invasive for general use, far more heavyweight than tossing<br>it into netlink. <br> <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ skb = alloc_skb(NLMSG_SPACE(1024), GFP_KERNEL);<br></blockquote><div><br>NLMSG_SPACE seems arbitrary, and perhaps this needs to be GFP_ATOMIC?<br>(But I am totally not an expert down here)<br> <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ if(!skb)<br>
+ return -ENOBUFS;<br>
+ b = skb_tail_pointer(skb);<br>
+<br>
+ nlh = nlmsg_put(skb, 0, 0, RTM_QDISC_STATS, sizeof(*tcm), NLM_F_MULTI);<br>
+ if (!nlh)<br>
+ goto out_nlmsg_trim;<br>
+<br>
+ tcm = nlmsg_data(nlh);<br>
+ tcm->tcm_family = AF_UNSPEC;<br>
+ tcm->tcm__pad1 = 0;<br>
+ tcm->tcm__pad2 = 0;<br>
+ tcm->tcm_ifindex = qdisc_dev(q)->ifindex;<br>
+ tcm->tcm_parent = q->parent;<br>
+ tcm->tcm_handle = q->handle;<br>
+ tcm->tcm_info = atomic_read(&q->refcnt);<br>
+<br>
+ if (nla_put_string(skb, TCA_KIND, q->ops->id))<br>
+ goto nla_put_failure;<br>
+<br>
+ if (gnet_stats_start_copy(skb, TCA_STATS2, NULL, &d) < 0)<br>
+ goto nla_put_failure;<br>
+<br>
+ if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)<br>
+ goto nla_put_failure;<br>
+<br>
+ if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||<br>
+ gnet_stats_copy_queue(&d, &q->qstats) < 0)<br>
+ goto nla_put_failure;<br>
+<br>
+ if (gnet_stats_finish_copy(&d) < 0)<br>
+ goto nla_put_failure;<br>
+<br>
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;<br>
+<br>
+ rtnetlink_send(skb, qdisc_dev(q)->nd_net, 0, RTNLGRP_TC_STATS, 0);<br>
+<br>
+ return 0;<br>
+<br>
+nla_put_failure:<br>
+out_nlmsg_trim:<br>
+ nlmsg_trim(skb, b);<br>
+ return -1;<br>
+<br>
+}<br>
+#endif<br>
+<br>
+<br>
/*<br>
* NOTE: Called under qdisc_lock(q) with locally disabled BH.<br>
*<br>
@@ -173,6 +241,9 @@ static inline int qdisc_restart(struct Qdisc *q)<br>
struct net_device *dev;<br>
spinlock_t *root_lock;<br>
struct sk_buff *skb;<br>
+#ifdef CONFIG_NET_SCH_BROADCAST_STATS<br>
+ qdisc_broadcast_stats(q);<br>
+#endif<br></blockquote><div><br>Am thinking this could be a callback in the Qdisc_ops struct...<br> <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
/* Dequeue packet */<br>
skb = dequeue_skb(q);<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.8.3<br>
<br>
_______________________________________________<br>
Bloat-devel mailing list<br>
<a href="mailto:Bloat-devel@lists.bufferbloat.net">Bloat-devel@lists.bufferbloat.net</a><br>
<a href="https://lists.bufferbloat.net/listinfo/bloat-devel" target="_blank">https://lists.bufferbloat.net/listinfo/bloat-devel</a><br>
</font></span></blockquote></div><br><br clear="all"><br>-- <br>Dave Täht<br><br>Fixing bufferbloat with cerowrt: <a href="http://www.teklibre.com/cerowrt/subscribe.html" target="_blank">http://www.teklibre.com/cerowrt/subscribe.html</a>