From 651f8c07ba4fb8e1ccdfaa9cc2eb94b51c902544 Mon Sep 17 00:00:00 2001 From: Elie Bouttier Date: Thu, 11 Dec 2014 19:02:17 +0100 Subject: [PATCH] implementing initial spreading using fq sched --- Documentation/networking/ip-sysctl.txt | 13 +++++++++++++ include/net/tcp.h | 2 ++ include/uapi/linux/sysctl.h | 2 ++ net/ipv4/sysctl_net_ipv4.c | 15 +++++++++++++++ net/ipv4/tcp_input.c | 33 ++++++++++++++++++++++++++++++++- net/sched/sch_fq.c | 5 +++-- 6 files changed, 67 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 8a984e9..cbe1c39 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -588,6 +588,19 @@ tcp_challenge_ack_limit - INTEGER in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks) Default: 100 +tcp_initial_spreading_min_rate - INTEGER + Minimal rate value computed by initial spreading algorithm. + Disabled if set to zero. + See + https://tools.ietf.org/html/draft-sallantin-tcpm-initial-spreading-00. + We have the relation rate_min = MTU / T_spreading. + Default: 0 + +tcp_initial_spreading_debug - BOOLEAN + When non-zero, a kernel log message will print the rate computed by Initial + Spreading each time a TCP connection is initialized. + Default: 0 + UDP variables: udp_mem - vector of 3 INTEGERs: min, pressure, max diff --git a/include/net/tcp.h b/include/net/tcp.h index 70e55d2..66c6305 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -282,6 +282,8 @@ extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_challenge_ack_limit; extern unsigned int sysctl_tcp_notsent_lowat; extern int sysctl_tcp_min_tso_segs; +extern int sysctl_tcp_initial_spreading_rate_min; +extern int sysctl_tcp_initial_spreading_debug; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 6d67213..355e741 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -425,6 +425,8 @@ enum NET_TCP_ALLOWED_CONG_CONTROL=123, NET_TCP_MAX_SSTHRESH=124, NET_TCP_FRTO_RESPONSE=125, + NET_TCP_INITIAL_SPREADING_RATE_MIN=126, + NET_TCP_INITIAL_SPREADING_DEBUG=127, }; enum { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3d69ec8..47827a4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -733,6 +733,21 @@ static struct ctl_table ipv4_table[] = { .extra2 = &gso_max_segs, }, { + .procname = "tcp_initial_spreading_rate_min", + .data = &sysctl_tcp_initial_spreading_rate_min, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_initial_spreading_debug", + .data = &sysctl_tcp_initial_spreading_debug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "udp_mem", .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c53b7f3..f02f389 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -99,6 +99,9 @@ int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_early_retrans __read_mostly = 3; +int sysctl_tcp_initial_spreading_rate_min __read_mostly = 0; +int sysctl_tcp_initial_spreading_debug __read_mostly = 0; + #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ @@ -763,6 +766,34 @@ static void tcp_update_pacing_rate(struct sock *sk) sk->sk_max_pacing_rate); } +/* Set the sk_pacing_rate to allow FQ packet scheduler doing TCP initial + * spreading. + * See + * https://tools.ietf.org/html/draft-sallantin-tcpm-initial-spreading-00 + */ +static void tcp_set_initial_pacing_rate(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + u64 rate; + + /* set sk_pacing_rate to 100 % of current rate (mss * cwnd / srtt) */ + rate = (u64)tp->mss_cache * (HZ << 3) * tp->snd_cwnd; + + if (tp->srtt > 8 + 2) + do_div(rate, tp->srtt); + + /* rate_min = mtu / T_spreading (see draft) */ + if (sysctl_tcp_initial_spreading_rate_min) + rate = max_t(u64, rate, sysctl_tcp_initial_spreading_rate_min); + + rate = min_t(u64, rate, sk->sk_max_pacing_rate); + + if (sysctl_tcp_initial_spreading_debug) + printk(KERN_INFO "tcp_pacing_rate: %llu\n", rate); + + ACCESS_ONCE(sk->sk_pacing_rate) = rate; +} + /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ @@ -5767,7 +5798,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } else tcp_init_metrics(sk); - tcp_update_pacing_rate(sk); + tcp_set_initial_pacing_rate(sk); /* Prevent spurious tcp_cwnd_restart() on first data packet */ tp->lsndtime = tcp_time_stamp; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 95d8439..b4063a5 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -708,8 +708,9 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = 10000; q->flow_plimit = 100; - q->quantum = 2 * psched_mtu(qdisc_dev(sch)); - q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); + /* We change default values for initial spreading use-case */ + q->quantum = 1 * psched_mtu(qdisc_dev(sch)); + q->initial_quantum = 1 * psched_mtu(qdisc_dev(sch)); q->flow_refill_delay = msecs_to_jiffies(40); q->flow_max_rate = ~0U; q->rate_enable = 1; -- 2.2.2