From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by huchra.bufferbloat.net (Postfix, from userid 1000) id 10B8F200B1B; Thu, 3 May 2012 10:52:31 -0700 (PDT) From: =?UTF-8?q?Dave=20T=C3=A4ht?= To: codel@lists.bufferbloat.net Date: Thu, 3 May 2012 10:52:13 -0700 Message-Id: <1336067533-16923-2-git-send-email-dave.taht@bufferbloat.net> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1336067533-16923-1-git-send-email-dave.taht@bufferbloat.net> References: <1336067533-16923-1-git-send-email-dave.taht@bufferbloat.net> X-Mailman-Approved-At: Thu, 03 May 2012 10:53:46 -0700 Cc: =?UTF-8?q?Dave=20T=C3=A4ht?= Subject: [Codel] [PATCH] Preliminary codel implementation X-BeenThere: codel@lists.bufferbloat.net X-Mailman-Version: 2.1.13 Precedence: list List-Id: CoDel AQM discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 03 May 2012 17:52:32 -0000 --- include/linux/pkt_sched.h | 29 +++++ net/sched/Kconfig | 11 ++ net/sched/Makefile | 1 + net/sched/sch_codel.c | 296 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 337 insertions(+) create mode 100644 net/sched/sch_codel.c diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 0d5b793..c21b720 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -633,4 +633,33 @@ struct tc_qfq_stats { __u32 lmax; }; +/* CODEL */ + +enum { + TCA_CODEL_UNSPEC, + TCA_CODEL_PARMS, + TCA_CODEL_TARGET, + TCA_CODEL_DEPTH, + TCA_CODEL_MINBYTES, + TCA_CODEL_INTERVAL, + __TCA_CODEL_MAX +}; + +#define TCA_CODEL_MAX (__TCA_CODEL_MAX - 1) +#define TC_CODEL_ECN 1 + +struct tc_codel_qopt { + __u32 flags; /* flags (e.g. ecn) */ + __u32 target; /* max delay, in us */ + __u32 depth; /* queue depth in packets */ + __u32 minbytes; /* MTU (usually) */ + __u32 interval; /* Sliding min time window width (us) */ +}; + +struct tc_codel_stats { + __u64 drops; + __u64 marks; +}; + + #endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2590e91..8106c42 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -250,6 +250,17 @@ config NET_SCH_QFQ If unsure, say N. +config NET_SCH_CODEL + tristate "Controlled Delay AQM (CODEL)" + help + Say Y here if you want to use the Controlled Delay (CODEL) + packet scheduling algorithm. + + To compile this driver as a module, choose M here: the module + will be called sch_codel. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index dc5889c..41130b5 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o +obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c new file mode 100644 index 0000000..159df47 --- /dev/null +++ b/net/sched/sch_codel.c @@ -0,0 +1,296 @@ +/* + * net/sched/sch_codel.c A Codel implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on ns2 simulation code presented by Kathie Nichols + * Authors: Dave Täht + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define MS2TIME(a) (ns_to_ktime( (u64) a*1000000)) +#define DEFAULT_CODEL_DEPTH 1000 + +/* Per-queue state (codel_queue_t instance variables) */ + +struct codel_sched_data { + u32 flags; + u32 minbytes; + u32 count; /* packets dropped since we went into drop state */ + bool dropping; /* 1 if in drop state, might just add to flags */ + ktime_t target; + ktime_t interval; + /* time to declare above q->target (0 if below)*/ + ktime_t first_above_time; + ktime_t drop_next; /* time to drop next packet */ +}; + +struct codel_skb_cb { + ktime_t enqueue_time; + char data[16]; +}; + +static inline struct codel_skb_cb *get_codel_cb(const struct sk_buff *skb) +{ + return (struct codel_skb_cb *)skb->cb; +} + +static inline ktime_t get_enqueue_time(const struct sk_buff *skb) { + struct codel_skb_cb *cb = get_codel_cb(skb); + return cb->enqueue_time; +} + +static inline ktime_t set_enqueue_time(struct sk_buff *skb, ktime_t t ) { + struct codel_skb_cb *cb = get_codel_cb(skb); + cb->enqueue_time = t; + return t; +} + +static inline ktime_t control_law(const struct codel_sched_data *q, ktime_t t) +{ + t.tv64 = t.tv64 + q->interval.tv64 / int_sqrt(q->count); + return t; +} + +/* +static int codel_prob_mark(const struct codel_sched_data *q) +{ + return q->flags & TC_CODEL_ECN; +} +*/ + + /* wrappers for ultimate statistics collection */ + +static int codel_drop(struct sk_buff *skb, struct Qdisc *sch) { + return qdisc_drop(skb,sch); +} + +/* +static int codel_queue_drop(struct Qdisc *sch) { + return qdisc_drop(skb,sch); +} +*/ + +struct sk_buff *codel_dequeue_head(struct Qdisc *sch) { + return(qdisc_dequeue_head(sch)); +} + +bool should_drop(struct sk_buff *skb, struct Qdisc *sch, ktime_t now) +{ + struct codel_sched_data *q = qdisc_priv(sch); + bool drop = 0; + if (skb == NULL) { + q->first_above_time.tv64 = 0; + } else { + ktime_t sojourn_time = ktime_sub(now, get_enqueue_time(skb)); + if (sojourn_time.tv64 < + q->target.tv64 || sch->qstats.backlog < q->minbytes) { +/* went below so we’ll stay below for at least q->interval */ + q->first_above_time.tv64 = 0; + } else { + if (q->first_above_time.tv64 == 0) { + +/* just went above from below. If we stay above + * for at least q->interval we’ll say it’s ok to drop + */ + q->first_above_time = + ktime_add(now,q->interval); + } else if (now.tv64 >= q->first_above_time.tv64) { + drop = 1; + } + } + } + return drop; +} + +static struct sk_buff *codel_dequeue(struct Qdisc *sch) +{ + struct codel_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb = codel_dequeue_head(sch); + ktime_t now; + bool drop; + if (skb == NULL) { + q->dropping = 0; + q->first_above_time.tv64 = 0; + return skb; + } + now = ktime_get(); + drop = should_drop(skb, sch, now); + if (q->dropping) { + if (drop) { +/* sojourn time below target - leave dropping state */ + q->dropping = 0; + } else if (now.tv64 >= q->drop_next.tv64) { +/* + * It’s time for the next drop. Drop the current packet and dequeue the next. + * The dequeue might take us out of dropping state. If not, schedule the + * next drop. A large backlog might result in drop rates so high that the next + * drop should happen now, hence the ‘while’ loop. + */ + while (now.tv64 >= q->drop_next.tv64 && q->dropping) { + codel_drop(skb, sch); + q->count++; + skb = codel_dequeue_head(sch); + if (should_drop(skb,sch,now)) { +/* leave dropping state */ + q->dropping = 0; + } else { +/* and schedule the next drop */ + q->drop_next = + control_law(q,q->drop_next); + } + } + } else if (drop && + ((now.tv64 - q->drop_next.tv64 < + 16 * q->interval.tv64) || + (now.tv64 - q->first_above_time.tv64 >= + 2 * q->interval.tv64))) { + codel_drop(skb, sch); + skb = codel_dequeue_head(sch); + /* engage state machine */ + drop = should_drop(skb,sch,now); + q->dropping = 1; + +/* if min went above target close to when we last went below it + * assume that the drop rate that controlled the queue on the + * last cycle is a good starting point to control it now. + */ + if (now.tv64 - q->drop_next.tv64 < + 16 * q->interval.tv64) { + int c = q->count - 1; + q->count = c < 1 ? 1 : c; + } else { + q->count = 1; + } + q->drop_next = control_law(q,now); + } + } + return skb; +} + + +static int codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + if (likely(skb_queue_len(&sch->q) < sch->limit)) { + set_enqueue_time(skb,ktime_get()); + return qdisc_enqueue_tail(skb, sch); + } + return qdisc_reshape_fail(skb, sch); +} + +static int codel_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct codel_sched_data *q = qdisc_priv(sch); + struct tc_codel_qopt *ctl = nla_data(opt); + unsigned int qlen; + + if (opt->nla_len < nla_attr_size(sizeof(*ctl))) + return -EINVAL; + if (ctl->depth && (ctl->depth < 2 || ctl->depth > 65536)) + return -EINVAL; + if (ctl->minbytes && (ctl->minbytes < 64 || ctl->minbytes > 65536)) + return -EINVAL; + sch_tree_lock(sch); + if (ctl->minbytes) q->minbytes = ctl->minbytes; + if (ctl->flags) q->flags = ctl->flags; + if (ctl->target) q->target = ns_to_ktime((u64) ctl->target * 1000); + if (ctl->interval) q->interval = + ns_to_ktime((u64) ctl->interval * 1000); + + /* something saner than this for depth is probably needed */ + + if (ctl->depth) sch->limit = ctl->depth; + qlen = sch->q.qlen; +// while (sch->q.qlen > ctl->depth) +// codel_drop(skb,sch); +// qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); //? + q->drop_next.tv64 = q->first_above_time.tv64 = 0; + q->dropping = 0; /* exit dropping state */ + sch_tree_unlock(sch); + return 0; +} + +static int codel_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct codel_sched_data *q = qdisc_priv(sch); + q->target = MS2TIME(5); + sch->limit = DEFAULT_CODEL_DEPTH; + q->minbytes = 1500; + q->interval = MS2TIME(100); + if (opt) { + int err = codel_change(sch, opt); + if (err) + return err; + } + +/* if (sch->limit >= 1) + sch->flags |= TCQ_F_CAN_BYPASS; + else + sch->flags &= ~TCQ_F_CAN_BYPASS; +*/ + return 0; +} + +static int codel_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct codel_sched_data *q = qdisc_priv(sch); + struct tc_codel_qopt opt; + opt.target = (u32) ktime_to_us(q->target); + opt.interval = (u32) ktime_to_us(q->interval); + opt.depth = sch->limit; + opt.flags = q->flags; + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + return skb->len; + +nla_put_failure: +// nlmsg_trim(skb, b); + return -1; +} + +static void +codel_reset(struct Qdisc *sch) +{ + struct sk_buff *skb; + + while ((skb = codel_dequeue(sch)) != NULL) + kfree_skb(skb); +} + +struct Qdisc_ops codel_qdisc_ops __read_mostly = { + .id = "codel", + .priv_size = sizeof(struct codel_sched_data), + .enqueue = codel_enqueue, + .dequeue = codel_dequeue, + .peek = qdisc_peek_head, +/* .drop = codel_queue_drop, */ + .init = codel_init, + .reset = codel_reset, + .change = codel_change, + .dump = codel_dump, + .owner = THIS_MODULE, +}; +EXPORT_SYMBOL(codel_qdisc_ops); + +static int __init codel_module_init(void) +{ + return register_qdisc(&codel_qdisc_ops); +} +static void __exit codel_module_exit(void) +{ + unregister_qdisc(&codel_qdisc_ops); +} +module_init(codel_module_init) +module_exit(codel_module_exit) +MODULE_LICENSE("GPL"); + -- 1.7.9.5