I am encouraged... more discussion on the netdev list...<br><br><div class="gmail_quote">---------- Forwarded message ----------<br>From: <b class="gmail_sendername">Jeff Kirsher</b> <span dir="ltr"><<a href="mailto:jeffrey.t.kirsher@intel.com">jeffrey.t.kirsher@intel.com</a>></span><br>
Date: Sat, Sep 17, 2011 at 1:04 AM<br>Subject: [net-next 11/13] igb: Make Tx budget for NAPI user adjustable<br>To: <a href="mailto:davem@davemloft.net">davem@davemloft.net</a><br>Cc: Alexander Duyck <<a href="mailto:alexander.h.duyck@intel.com">alexander.h.duyck@intel.com</a>>, <a href="mailto:netdev@vger.kernel.org">netdev@vger.kernel.org</a>, <a href="mailto:gospo@redhat.com">gospo@redhat.com</a>, Jeff Kirsher <<a href="mailto:jeffrey.t.kirsher@intel.com">jeffrey.t.kirsher@intel.com</a>><br>
<br><br>From: Alexander Duyck <<a href="mailto:alexander.h.duyck@intel.com">alexander.h.duyck@intel.com</a>><br>
<br>
This change is meant to make the NAPI budget limits for transmit<br>
adjustable. By doing this it is possible to tune the value for optimal<br>
performance with applications such as routing.<br>
<br>
Signed-off-by: Alexander Duyck <<a href="mailto:alexander.h.duyck@intel.com">alexander.h.duyck@intel.com</a>><br>
Tested-by: Aaron Brown <<a href="mailto:aaron.f.brown@intel.com">aaron.f.brown@intel.com</a>><br>
Signed-off-by: Jeff Kirsher <<a href="mailto:jeffrey.t.kirsher@intel.com">jeffrey.t.kirsher@intel.com</a>><br>
---<br>
drivers/net/ethernet/intel/igb/igb.h | 3 +<br>
drivers/net/ethernet/intel/igb/igb_ethtool.c | 6 +<br>
drivers/net/ethernet/intel/igb/igb_main.c | 136 ++++++++++++++++----------<br>
3 files changed, 92 insertions(+), 53 deletions(-)<br>
<br>
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h<br>
index b725937..cfa590c8 100644<br>
--- a/drivers/net/ethernet/intel/igb/igb.h<br>
+++ b/drivers/net/ethernet/intel/igb/igb.h<br>
@@ -47,6 +47,7 @@ struct igb_adapter;<br>
<br>
/* TX/RX descriptor defines */<br>
#define IGB_DEFAULT_TXD 256<br>
+#define IXGBE_DEFAULT_TX_WORK 128<br>
#define IGB_MIN_TXD 80<br>
#define IGB_MAX_TXD 4096<br>
<br>
@@ -177,6 +178,7 @@ struct igb_q_vector {<br>
<br>
u32 eims_value;<br>
u16 cpu;<br>
+ u16 tx_work_limit;<br>
<br>
u16 itr_val;<br>
u8 set_itr;<br>
@@ -266,6 +268,7 @@ struct igb_adapter {<br>
u16 rx_itr;<br>
<br>
/* TX */<br>
+ u16 tx_work_limit;<br>
u32 tx_timeout_count;<br>
int num_tx_queues;<br>
struct igb_ring *tx_ring[16];<br>
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c<br>
index f231d82..f6da820 100644<br>
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c<br>
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c<br>
@@ -1989,6 +1989,9 @@ static int igb_set_coalesce(struct net_device *netdev,<br>
if ((adapter->flags & IGB_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs)<br>
return -EINVAL;<br>
<br>
+ if (ec->tx_max_coalesced_frames_irq)<br>
+ adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq;<br>
+<br>
/* If ITR is disabled, disable DMAC */<br>
if (ec->rx_coalesce_usecs == 0) {<br>
if (adapter->flags & IGB_FLAG_DMAC)<br>
@@ -2011,6 +2014,7 @@ static int igb_set_coalesce(struct net_device *netdev,<br>
<br>
for (i = 0; i < adapter->num_q_vectors; i++) {<br>
struct igb_q_vector *q_vector = adapter->q_vector[i];<br>
+ q_vector->tx_work_limit = adapter->tx_work_limit;<br>
if (q_vector->rx_ring)<br>
q_vector->itr_val = adapter->rx_itr_setting;<br>
else<br>
@@ -2033,6 +2037,8 @@ static int igb_get_coalesce(struct net_device *netdev,<br>
else<br>
ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;<br>
<br>
+ ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit;<br>
+<br>
if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) {<br>
if (adapter->tx_itr_setting <= 3)<br>
ec->tx_coalesce_usecs = adapter->tx_itr_setting;<br>
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c<br>
index 7ad25e8..989e774 100644<br>
--- a/drivers/net/ethernet/intel/igb/igb_main.c<br>
+++ b/drivers/net/ethernet/intel/igb/igb_main.c<br>
@@ -136,8 +136,8 @@ static irqreturn_t igb_msix_ring(int irq, void *);<br>
static void igb_update_dca(struct igb_q_vector *);<br>
static void igb_setup_dca(struct igb_adapter *);<br>
#endif /* CONFIG_IGB_DCA */<br>
-static bool igb_clean_tx_irq(struct igb_q_vector *);<br>
static int igb_poll(struct napi_struct *, int);<br>
+static bool igb_clean_tx_irq(struct igb_q_vector *);<br>
static bool igb_clean_rx_irq(struct igb_q_vector *, int);<br>
static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);<br>
static void igb_tx_timeout(struct net_device *);<br>
@@ -1120,6 +1120,7 @@ static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,<br>
q_vector->tx_ring = adapter->tx_ring[ring_idx];<br>
q_vector->tx_ring->q_vector = q_vector;<br>
q_vector->itr_val = adapter->tx_itr_setting;<br>
+ q_vector->tx_work_limit = adapter->tx_work_limit;<br>
if (q_vector->itr_val && q_vector->itr_val <= 3)<br>
q_vector->itr_val = IGB_START_ITR;<br>
}<br>
@@ -2388,11 +2389,17 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter)<br>
<br>
pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);<br>
<br>
+ /* set default ring sizes */<br>
adapter->tx_ring_count = IGB_DEFAULT_TXD;<br>
adapter->rx_ring_count = IGB_DEFAULT_RXD;<br>
+<br>
+ /* set default ITR values */<br>
adapter->rx_itr_setting = IGB_DEFAULT_ITR;<br>
adapter->tx_itr_setting = IGB_DEFAULT_ITR;<br>
<br>
+ /* set default work limits */<br>
+ adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK;<br>
+<br>
adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +<br>
VLAN_HLEN;<br>
adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;<br>
@@ -5496,7 +5503,7 @@ static int igb_poll(struct napi_struct *napi, int budget)<br>
igb_update_dca(q_vector);<br>
#endif<br>
if (q_vector->tx_ring)<br>
- clean_complete = !!igb_clean_tx_irq(q_vector);<br>
+ clean_complete = igb_clean_tx_irq(q_vector);<br>
<br>
if (q_vector->rx_ring)<br>
clean_complete &= igb_clean_rx_irq(q_vector, budget);<br>
@@ -5578,64 +5585,69 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)<br>
{<br>
struct igb_adapter *adapter = q_vector->adapter;<br>
struct igb_ring *tx_ring = q_vector->tx_ring;<br>
- struct net_device *netdev = tx_ring->netdev;<br>
- struct e1000_hw *hw = &adapter->hw;<br>
- struct igb_buffer *buffer_info;<br>
- union e1000_adv_tx_desc *tx_desc, *eop_desc;<br>
+ struct igb_buffer *tx_buffer;<br>
+ union e1000_adv_tx_desc *tx_desc;<br>
unsigned int total_bytes = 0, total_packets = 0;<br>
- unsigned int i, eop, count = 0;<br>
- bool cleaned = false;<br>
+ unsigned int budget = q_vector->tx_work_limit;<br>
+ u16 i = tx_ring->next_to_clean;<br>
<br>
- i = tx_ring->next_to_clean;<br>
- eop = tx_ring->buffer_info[i].next_to_watch;<br>
- eop_desc = IGB_TX_DESC(tx_ring, eop);<br>
+ if (test_bit(__IGB_DOWN, &adapter->state))<br>
+ return true;<br>
<br>
- while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&<br>
- (count < tx_ring->count)) {<br>
- rmb(); /* read buffer_info after eop_desc status */<br>
- for (cleaned = false; !cleaned; count++) {<br>
- tx_desc = IGB_TX_DESC(tx_ring, i);<br>
- buffer_info = &tx_ring->buffer_info[i];<br>
- cleaned = (i == eop);<br>
+ tx_buffer = &tx_ring->buffer_info[i];<br>
+ tx_desc = IGB_TX_DESC(tx_ring, i);<br>
<br>
- if (buffer_info->skb) {<br>
- total_bytes += buffer_info->bytecount;<br>
- /* gso_segs is currently only valid for tcp */<br>
- total_packets += buffer_info->gso_segs;<br>
- igb_tx_hwtstamp(q_vector, buffer_info);<br>
- }<br>
+ for (; budget; budget--) {<br>
+ u16 eop = tx_buffer->next_to_watch;<br>
+ union e1000_adv_tx_desc *eop_desc;<br>
+<br>
+ eop_desc = IGB_TX_DESC(tx_ring, eop);<br>
+<br>
+ /* if DD is not set pending work has not been completed */<br>
+ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))<br>
+ break;<br>
+<br>
+ /* prevent any other reads prior to eop_desc being verified */<br>
+ rmb();<br>
<br>
- igb_unmap_and_free_tx_resource(tx_ring, buffer_info);<br>
+ do {<br>
tx_desc->wb.status = 0;<br>
+ if (likely(tx_desc == eop_desc)) {<br>
+ eop_desc = NULL;<br>
+<br>
+ total_bytes += tx_buffer->bytecount;<br>
+ total_packets += tx_buffer->gso_segs;<br>
+ igb_tx_hwtstamp(q_vector, tx_buffer);<br>
+ }<br>
+<br>
+ igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);<br>
<br>
+ tx_buffer++;<br>
+ tx_desc++;<br>
i++;<br>
- if (i == tx_ring->count)<br>
+ if (unlikely(i == tx_ring->count)) {<br>
i = 0;<br>
- }<br>
- eop = tx_ring->buffer_info[i].next_to_watch;<br>
- eop_desc = IGB_TX_DESC(tx_ring, eop);<br>
+ tx_buffer = tx_ring->buffer_info;<br>
+ tx_desc = IGB_TX_DESC(tx_ring, 0);<br>
+ }<br>
+ } while (eop_desc);<br>
}<br>
<br>
tx_ring->next_to_clean = i;<br>
+ u64_stats_update_begin(&tx_ring->tx_syncp);<br>
+ tx_ring->tx_stats.bytes += total_bytes;<br>
+ tx_ring->tx_stats.packets += total_packets;<br>
+ u64_stats_update_end(&tx_ring->tx_syncp);<br>
+ tx_ring->total_bytes += total_bytes;<br>
+ tx_ring->total_packets += total_packets;<br>
<br>
- if (unlikely(count &&<br>
- netif_carrier_ok(netdev) &&<br>
- igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {<br>
- /* Make sure that anybody stopping the queue after this<br>
- * sees the new next_to_clean.<br>
- */<br>
- smp_mb();<br>
- if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&<br>
- !(test_bit(__IGB_DOWN, &adapter->state))) {<br>
- netif_wake_subqueue(netdev, tx_ring->queue_index);<br>
+ if (tx_ring->detect_tx_hung) {<br>
+ struct e1000_hw *hw = &adapter->hw;<br>
+ u16 eop = tx_ring->buffer_info[i].next_to_watch;<br>
+ union e1000_adv_tx_desc *eop_desc;<br>
<br>
- u64_stats_update_begin(&tx_ring->tx_syncp);<br>
- tx_ring->tx_stats.restart_queue++;<br>
- u64_stats_update_end(&tx_ring->tx_syncp);<br>
- }<br>
- }<br>
+ eop_desc = IGB_TX_DESC(tx_ring, eop);<br>
<br>
- if (tx_ring->detect_tx_hung) {<br>
/* Detect a transmit hang in hardware, this serializes the<br>
* check with the clearing of time_stamp and movement of i */<br>
tx_ring->detect_tx_hung = false;<br>
@@ -5666,16 +5678,34 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)<br>
eop,<br>
jiffies,<br>
eop_desc->wb.status);<br>
- netif_stop_subqueue(netdev, tx_ring->queue_index);<br>
+ netif_stop_subqueue(tx_ring->netdev,<br>
+ tx_ring->queue_index);<br>
+<br>
+ /* we are about to reset, no point in enabling stuff */<br>
+ return true;<br>
}<br>
}<br>
- tx_ring->total_bytes += total_bytes;<br>
- tx_ring->total_packets += total_packets;<br>
- u64_stats_update_begin(&tx_ring->tx_syncp);<br>
- tx_ring->tx_stats.bytes += total_bytes;<br>
- tx_ring->tx_stats.packets += total_packets;<br>
- u64_stats_update_end(&tx_ring->tx_syncp);<br>
- return count < tx_ring->count;<br>
+<br>
+ if (unlikely(total_packets &&<br>
+ netif_carrier_ok(tx_ring->netdev) &&<br>
+ igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {<br>
+ /* Make sure that anybody stopping the queue after this<br>
+ * sees the new next_to_clean.<br>
+ */<br>
+ smp_mb();<br>
+ if (__netif_subqueue_stopped(tx_ring->netdev,<br>
+ tx_ring->queue_index) &&<br>
+ !(test_bit(__IGB_DOWN, &adapter->state))) {<br>
+ netif_wake_subqueue(tx_ring->netdev,<br>
+ tx_ring->queue_index);<br>
+<br>
+ u64_stats_update_begin(&tx_ring->tx_syncp);<br>
+ tx_ring->tx_stats.restart_queue++;<br>
+ u64_stats_update_end(&tx_ring->tx_syncp);<br>
+ }<br>
+ }<br>
+<br>
+ return !!budget;<br>
}<br>
<br>
static inline void igb_rx_checksum(struct igb_ring *ring,<br>
--<br>
1.7.6<br>
<font color="#888888"><br>
--<br>
To unsubscribe from this list: send the line "unsubscribe netdev" in<br>
the body of a message to <a href="mailto:majordomo@vger.kernel.org">majordomo@vger.kernel.org</a><br>
More majordomo info at <a href="http://vger.kernel.org/majordomo-info.html" target="_blank">http://vger.kernel.org/majordomo-info.html</a><br>
</font></div><br><br clear="all"><br>-- <br>Dave Täht<br>SKYPE: davetaht<br>US Tel: 1-239-829-5608<br><a href="http://the-edge.blogspot.com" target="_blank">http://the-edge.blogspot.com</a> <br>