[Make-wifi-fast] [PATCH RFC/RFT 4/4] mac80211: Apply Airtime-based Queue Limit (AQL) on packet dequeue

Toke Høiland-Jørgensen toke at redhat.com
Thu Sep 19 08:22:48 EDT 2019


From: Toke Høiland-Jørgensen <toke at redhat.com>

Some devices have deep buffers in firmware and/or hardware which prevents
the FQ structure in mac80211 from effectively limiting bufferbloat on the
link. For Ethernet devices we have BQL to limit the lower-level queues, but
this cannot be applied to mac80211 because transmit rates can vary wildly
between packets depending on which station we are transmitting it to.

To overcome this, we can use airtime-based queue limiting (AQL), where we
estimate the transmission time for each packet before dequeueing it, and
use that to limit the amount of data in-flight to the hardware. This idea
was originally implemented as part of the out-of-tree airtime fairness
patch to ath10k[0] in chromiumos.

This patch ports that idea over to mac80211. The basic idea is simple
enough: Whenever we dequeue a packet from the TXQs and send it to the
driver, we estimate its airtime usage, based on the last recorded TX rate
of the station that packet is destined for. We keep a running per-AC total
of airtime queued for the whole device, and when that total climbs above 8
ms' worth of data (corresponding to two maximum-sized aggregates), we
simply throttle the queues until it drops down again.

The estimated airtime for each skb is stored in the tx_info, so we can
subtract the same amount from the running total when the skb is freed or
recycled. The throttling mechanism relies on this accounting to be
accurate (i.e., that we are not freeing skbs without subtracting any
airtime they were accounted for), so we put the subtraction into
ieee80211_report_used_skb().

This patch does *not* include any mechanism to wake a throttled TXQ again,
on the assumption that this will happen anyway as a side effect of whatever
freed the skb (most commonly a TX completion).

The throttling mechanism only kicks in if the queued airtime total goes
above the limit. Since mac80211 calculates the time based on the reported
last_tx_time from the driver, the whole throttling mechanism only kicks in
for drivers that actually report this value. With the exception of
multicast, where we always calculate an estimated tx time on the assumption
that multicast is transmitted at the lowest (6 Mbps) rate.

The throttling added in this patch is in addition to any throttling already
performed by the airtime fairness mechanism, and in principle the two
mechanisms are orthogonal (and currently also uses two different sources of
airtime). In the future, we could amend this, using the airtime estimates
calculated by this mechanism as a fallback input to the airtime fairness
scheduler, to enable airtime fairness even on drivers that don't have a
hardware source of airtime usage for each station.

[0] https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/588190/13/drivers/net/wireless-4.2/ath/ath10k/mac.c#3845

Signed-off-by: Toke Høiland-Jørgensen <toke at redhat.com>
---
 net/mac80211/debugfs.c     |   24 ++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h |    7 +++++++
 net/mac80211/status.c      |   22 ++++++++++++++++++++++
 net/mac80211/tx.c          |   38 +++++++++++++++++++++++++++++++++++++-
 4 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 568b3b276931..c846c6e7f3e3 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -148,6 +148,29 @@ static const struct file_operations aqm_ops = {
 	.llseek = default_llseek,
 };
 
+static ssize_t airtime_queued_read(struct file *file,
+				   char __user *user_buf,
+				   size_t count,
+				   loff_t *ppos)
+{
+	struct ieee80211_local *local = file->private_data;
+	char buf[32 * IEEE80211_NUM_ACS], *p = buf;
+	u8 ac;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+		p += scnprintf(p, sizeof(buf)+buf-p, "AC%u: %u\n", ac,
+			       local->airtime_queued[ac]);
+
+	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+
+}
+
+static const struct file_operations airtime_queued_ops = {
+	.read = airtime_queued_read,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
 static ssize_t force_tx_status_read(struct file *file,
 				    char __user *user_buf,
 				    size_t count,
@@ -440,6 +463,7 @@ void debugfs_hw_add(struct ieee80211_local *local)
 
 	debugfs_create_u16("airtime_flags", 0600,
 			   phyd, &local->airtime_flags);
+	DEBUGFS_ADD(airtime_queued);
 
 	statsd = debugfs_create_dir("statistics", phyd);
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9de5390411ba..6bebfe80ed29 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -63,6 +63,12 @@ struct ieee80211_local;
 #define IEEE80211_RECIPROCAL_DIVISOR 0x100000000ULL
 #define IEEE80211_RECIPROCAL_SHIFT 32
 
+/* constants used for airtime queue limit */
+#define IEEE80211_AIRTIME_QUEUE_LIMIT 8000 /* 8 ms */
+#define IEEE80211_AIRTIME_OVERHEAD 100
+#define IEEE80211_AIRTIME_OVERHEAD_IFS 16
+#define IEEE80211_AIRTIME_MINRATE_RECIPROCAL (IEEE80211_RECIPROCAL_DIVISOR / 6000)
+
 /*
  * Some APs experience problems when working with U-APSD. Decreasing the
  * probability of that happening by using legacy mode for all ACs but VO isn't
@@ -1144,6 +1150,7 @@ struct ieee80211_local {
 	spinlock_t active_txq_lock[IEEE80211_NUM_ACS];
 	struct list_head active_txqs[IEEE80211_NUM_ACS];
 	u16 schedule_round[IEEE80211_NUM_ACS];
+	u32 airtime_queued[IEEE80211_NUM_ACS];
 
 	u16 airtime_flags;
 
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index ab8ba5835ca0..e63a70657050 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -711,6 +711,28 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
 		skb->wifi_acked = acked;
 	}
 
+	if (info->control.tx_time_est) {
+		u8 *qc, ac;
+		int tid;
+
+		if (ieee80211_is_data_qos(hdr->frame_control)) {
+			qc = ieee80211_get_qos_ctl(hdr);
+			tid = qc[0] & 0xf;
+			ac = ieee80211_ac_from_tid(tid);
+		} else {
+			ac = IEEE80211_AC_BE;
+		}
+
+		spin_lock_bh(&local->active_txq_lock[ac]);
+		/* sanity check to make sure we don't underflow */
+		if (WARN_ON_ONCE(info->control.tx_time_est > local->airtime_queued[ac]))
+			local->airtime_queued[ac] = 0;
+		else
+			local->airtime_queued[ac] -= info->control.tx_time_est;
+		spin_unlock_bh(&local->active_txq_lock[ac]);
+
+	}
+
 	ieee80211_led_tx(local);
 
 	if (skb_has_frag_list(skb)) {
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1fa422782905..2b8564621ecf 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3546,9 +3546,19 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 	struct ieee80211_tx_data tx;
 	ieee80211_tx_result r;
 	struct ieee80211_vif *vif = txq->vif;
+	u32 airtime = 0, airtime_queued;
+	u8 ac = txq->ac;
+	u32 pktlen;
 
 	WARN_ON_ONCE(softirq_count() == 0);
 
+	spin_lock_bh(&local->active_txq_lock[ac]);
+	airtime_queued = local->airtime_queued[ac];
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+
+	if (airtime_queued > IEEE80211_AIRTIME_QUEUE_LIMIT)
+		return NULL;
+
 begin:
 	spin_lock_bh(&fq->lock);
 
@@ -3581,8 +3591,19 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 	tx.skb = skb;
 	tx.sdata = vif_to_sdata(info->control.vif);
 
-	if (txq->sta)
+	pktlen = skb->len + 38;
+	if (txq->sta) {
 		tx.sta = container_of(txq->sta, struct sta_info, sta);
+		if (tx.sta->last_tx_bitrate) {
+			airtime = (pktlen * 8 * 1000 *
+				   tx.sta->last_tx_bitrate_reciprocal) >> IEEE80211_RECIPROCAL_SHIFT;
+			airtime += IEEE80211_AIRTIME_OVERHEAD;
+		}
+	} else {
+		airtime = (pktlen * 8 * 1000 *
+			   IEEE80211_AIRTIME_MINRATE_RECIPROCAL) >> IEEE80211_RECIPROCAL_SHIFT;
+		airtime += IEEE80211_AIRTIME_OVERHEAD;
+	}
 
 	/*
 	 * The key can be removed while the packet was queued, so need to call
@@ -3659,6 +3680,15 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 	}
 
 	IEEE80211_SKB_CB(skb)->control.vif = vif;
+
+	if (airtime) {
+		info->control.tx_time_est = airtime;
+
+		spin_lock_bh(&local->active_txq_lock[ac]);
+		local->airtime_queued[ac] += airtime;
+		spin_unlock_bh(&local->active_txq_lock[ac]);
+	}
+
 	return skb;
 
 out:
@@ -3676,6 +3706,9 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
 
 	spin_lock_bh(&local->active_txq_lock[ac]);
 
+	if (local->airtime_queued[ac] > IEEE80211_AIRTIME_QUEUE_LIMIT)
+		goto out;
+
  begin:
 	txqi = list_first_entry_or_null(&local->active_txqs[ac],
 					struct txq_info,
@@ -3753,6 +3786,9 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 
 	spin_lock_bh(&local->active_txq_lock[ac]);
 
+	if (local->airtime_queued[ac] > IEEE80211_AIRTIME_QUEUE_LIMIT)
+		goto out;
+
 	if (!txqi->txq.sta)
 		goto out;
 



More information about the Make-wifi-fast mailing list