net: Add byte queue limits (bql) for reduced buffer-bloat

[prerequisite] net: new counter for tx_timeout errors in sysfs
bnx2,bnx2x,e1000e,forcedeth,igb,ixgbe,r8169,sfc,skge,sky2,tg3: Add support for bql


svn path=/dists/sid/linux/; revision=19307
This commit is contained in:
Ben Hutchings 2012-08-04 13:32:28 +00:00
parent 608bf2b1f8
commit e398cff2ef
33 changed files with 2876 additions and 0 deletions

4
debian/changelog vendored
View File

@ -52,6 +52,10 @@ linux (3.2.25-1) UNRELEASED; urgency=low
(Closes: #682726)
* debugfs: Add mode, uid and gid mount options; set default mode to 700
(Closes: #681418)
* net: new counter for tx_timeout errors in sysfs
* net: Add byte queue limits (bql) for reduced buffer-bloat
* bnx2,bnx2x,e1000e,forcedeth,igb,ixgbe,r8169,sfc,skge,sky2,tg3:
Add support for bql
-- Ben Hutchings <ben@decadent.org.uk> Tue, 24 Jul 2012 02:20:37 +0100

View File

@ -0,0 +1,60 @@
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 29 Nov 2011 11:53:05 +0000
Subject: bnx2: Support for byte queue limits
commit e98319098885859e34c23cc8a130b6b8668a6abe upstream.
Changes to bnx2 to use byte queue limits.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/broadcom/bnx2.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index d573169..787e175 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -2810,6 +2810,7 @@ bnx2_tx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
struct bnx2_tx_ring_info *txr = &bnapi->tx_ring;
u16 hw_cons, sw_cons, sw_ring_cons;
int tx_pkt = 0, index;
+ unsigned int tx_bytes = 0;
struct netdev_queue *txq;
index = (bnapi - bp->bnx2_napi);
@@ -2864,6 +2865,7 @@ bnx2_tx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
sw_cons = NEXT_TX_BD(sw_cons);
+ tx_bytes += skb->len;
dev_kfree_skb(skb);
tx_pkt++;
if (tx_pkt == budget)
@@ -2873,6 +2875,7 @@ bnx2_tx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
hw_cons = bnx2_get_hw_tx_cons(bnapi);
}
+ netdev_tx_completed_queue(txq, tx_pkt, tx_bytes);
txr->hw_tx_cons = hw_cons;
txr->tx_cons = sw_cons;
@@ -5393,6 +5396,7 @@ bnx2_free_tx_skbs(struct bnx2 *bp)
}
dev_kfree_skb(skb);
}
+ netdev_tx_reset_queue(netdev_get_tx_queue(bp->dev, i));
}
}
@@ -6546,6 +6550,8 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
}
txbd->tx_bd_vlan_tag_flags |= TX_BD_FLAGS_END;
+ netdev_tx_sent_queue(txq, skb->len);
+
prod = NEXT_TX_BD(prod);
txr->tx_prod_bseq += skb->len;

View File

@ -0,0 +1,40 @@
From: Dmitry Kravkov <dmitry@broadcom.com>
Date: Tue, 6 Dec 2011 02:05:12 +0000
Subject: bnx2x: fix crash while ethtool -t
commit 73dbb5e1627a35c8ab81f3813c096e9e7aaabaaa upstream.
commit 2df1a70aaf70e8dff11b89b938a5f317556ee640 "bnx2x: Support
for byte queue limits" has introduced an asymmetry in usage of
netdev_tx_completed_queue and netdev_tx_sent_queue. Missing
call to netdev_tx_sent_queue causes the crash during ethtool -t.
The patch adds the missing call.
Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
[bwh: Backported to 3.2: adjust context]
---
drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 3 +++
1 file changed, 3 insertions(+)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -1740,6 +1740,7 @@
struct sw_rx_bd *rx_buf;
u16 len;
int rc = -ENODEV;
+ struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txdata->txq_index);
/* check the loopback mode */
switch (loopback_mode) {
@@ -1784,6 +1785,8 @@
tx_start_idx = le16_to_cpu(*txdata->tx_cons_sb);
rx_start_idx = le16_to_cpu(*fp_rx->rx_cons_sb);
+ netdev_tx_sent_queue(txq, skb->len);
+
pkt_prod = txdata->tx_pkt_prod++;
tx_buf = &txdata->tx_buf_ring[TX_BD(pkt_prod)];
tx_buf->first_bd = txdata->tx_bd_prod;

View File

@ -0,0 +1,32 @@
From: Dmitry Kravkov <dmitry@broadcom.com>
Date: Sun, 13 Nov 2011 04:34:23 +0000
Subject: bnx2x: remove unused variable
commit ad756594a8d88ffc048d14b8d5c02971e08856ce upstream.
Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 1ace946..f946a6e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1094,13 +1094,11 @@ static void bnx2x_free_tx_skbs(struct bnx2x *bp)
for_each_cos_in_tx_queue(fp, cos) {
struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
- u16 bd_cons = txdata->tx_bd_cons;
u16 sw_prod = txdata->tx_pkt_prod;
u16 sw_cons = txdata->tx_pkt_cons;
while (sw_cons != sw_prod) {
- bd_cons = bnx2x_free_tx_pkt(bp, txdata,
- TX_BD(sw_cons));
+ bnx2x_free_tx_pkt(bp, txdata, TX_BD(sw_cons));
sw_cons++;
}
}

View File

@ -0,0 +1,111 @@
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:33:37 +0000
Subject: bnx2x: Support for byte queue limits
commit 2df1a70aaf70e8dff11b89b938a5f317556ee640 upstream.
Changes to bnx2x to use byte queue limits.
Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 26 +++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 8336c78..42ce566 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -102,7 +102,8 @@ int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */
* return idx of last bd freed
*/
static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata,
- u16 idx)
+ u16 idx, unsigned int *pkts_compl,
+ unsigned int *bytes_compl)
{
struct sw_tx_bd *tx_buf = &txdata->tx_buf_ring[idx];
struct eth_tx_start_bd *tx_start_bd;
@@ -159,6 +160,10 @@ static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata,
/* release skb */
WARN_ON(!skb);
+ if (skb) {
+ (*pkts_compl)++;
+ (*bytes_compl) += skb->len;
+ }
dev_kfree_skb_any(skb);
tx_buf->first_bd = 0;
tx_buf->skb = NULL;
@@ -170,6 +175,7 @@ int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
{
struct netdev_queue *txq;
u16 hw_cons, sw_cons, bd_cons = txdata->tx_bd_cons;
+ unsigned int pkts_compl = 0, bytes_compl = 0;
#ifdef BNX2X_STOP_ON_ERROR
if (unlikely(bp->panic))
@@ -189,10 +195,14 @@ int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
" pkt_cons %u\n",
txdata->txq_index, hw_cons, sw_cons, pkt_cons);
- bd_cons = bnx2x_free_tx_pkt(bp, txdata, pkt_cons);
+ bd_cons = bnx2x_free_tx_pkt(bp, txdata, pkt_cons,
+ &pkts_compl, &bytes_compl);
+
sw_cons++;
}
+ netdev_tx_completed_queue(txq, pkts_compl, bytes_compl);
+
txdata->tx_pkt_cons = sw_cons;
txdata->tx_bd_cons = bd_cons;
@@ -1077,14 +1087,18 @@ static void bnx2x_free_tx_skbs(struct bnx2x *bp)
struct bnx2x_fastpath *fp = &bp->fp[i];
for_each_cos_in_tx_queue(fp, cos) {
struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
+ unsigned pkts_compl = 0, bytes_compl = 0;
u16 sw_prod = txdata->tx_pkt_prod;
u16 sw_cons = txdata->tx_pkt_cons;
while (sw_cons != sw_prod) {
- bnx2x_free_tx_pkt(bp, txdata, TX_BD(sw_cons));
+ bnx2x_free_tx_pkt(bp, txdata, TX_BD(sw_cons),
+ &pkts_compl, &bytes_compl);
sw_cons++;
}
+ netdev_tx_reset_queue(
+ netdev_get_tx_queue(bp->dev, txdata->txq_index));
}
}
}
@@ -2788,6 +2802,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
mapping = skb_frag_dma_map(&bp->pdev->dev, frag, 0,
skb_frag_size(frag), DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
+ unsigned int pkts_compl = 0, bytes_compl = 0;
DP(NETIF_MSG_TX_QUEUED, "Unable to map page - "
"dropping packet...\n");
@@ -2799,7 +2814,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
*/
first_bd->nbd = cpu_to_le16(nbd);
bnx2x_free_tx_pkt(bp, txdata,
- TX_BD(txdata->tx_pkt_prod));
+ TX_BD(txdata->tx_pkt_prod),
+ &pkts_compl, &bytes_compl);
return NETDEV_TX_OK;
}
@@ -2860,6 +2876,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
pbd_e2->parsing_data);
DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd %d bd %u\n", nbd, bd_prod);
+ netdev_tx_sent_queue(txq, skb->len);
+
txdata->tx_pkt_prod++;
/*
* Make sure that the BD data is updated before updating the producer

View File

@ -0,0 +1,55 @@
From: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Date: Wed, 30 May 2012 12:25:37 +0000
Subject: bql: Avoid possible inconsistent calculation.
commit 914bec1011a25f65cdc94988a6f974bfb9a3c10d upstream.
dql->num_queued could change while processing dql_completed().
To provide consistent calculation, added an on stack variable.
Signed-off-by: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
lib/dynamic_queue_limits.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index 0fafa77..0777c5a 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -17,16 +17,18 @@
void dql_completed(struct dql *dql, unsigned int count)
{
unsigned int inprogress, prev_inprogress, limit;
- unsigned int ovlimit, completed;
+ unsigned int ovlimit, completed, num_queued;
bool all_prev_completed;
+ num_queued = ACCESS_ONCE(dql->num_queued);
+
/* Can't complete more than what's in queue */
- BUG_ON(count > dql->num_queued - dql->num_completed);
+ BUG_ON(count > num_queued - dql->num_completed);
completed = dql->num_completed + count;
limit = dql->limit;
- ovlimit = POSDIFF(dql->num_queued - dql->num_completed, limit);
- inprogress = dql->num_queued - completed;
+ ovlimit = POSDIFF(num_queued - dql->num_completed, limit);
+ inprogress = num_queued - completed;
prev_inprogress = dql->prev_num_queued - dql->num_completed;
all_prev_completed = AFTER_EQ(completed, dql->prev_num_queued);
@@ -106,7 +108,7 @@ void dql_completed(struct dql *dql, unsigned int count)
dql->prev_ovlimit = ovlimit;
dql->prev_last_obj_cnt = dql->last_obj_cnt;
dql->num_completed = completed;
- dql->prev_num_queued = dql->num_queued;
+ dql->prev_num_queued = num_queued;
}
EXPORT_SYMBOL(dql_completed);

View File

@ -0,0 +1,70 @@
From: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Date: Wed, 30 May 2012 12:25:19 +0000
Subject: bql: Avoid unneeded limit decrement.
commit 25426b794efdc70dde7fd3134dc56fac3e7d562d upstream.
When below pattern is observed,
TIME
dql_queued() dql_completed() |
a) initial state |
|
b) X bytes queued V
c) Y bytes queued
d) X bytes completed
e) Z bytes queued
f) Y bytes completed
a) dql->limit has already some value and there is no in-flight packet.
b) X bytes queued.
c) Y bytes queued and excess limit.
d) X bytes completed and dql->prev_ovlimit is set and also
dql->prev_num_queued is set Y.
e) Z bytes queued.
f) Y bytes completed. inprogress and prev_inprogress are true.
At f), according to the comment, all_prev_completed becomes
true and limit should be increased. But POSDIFF() ignores
(completed == dql->prev_num_queued) case, so limit is decreased.
Signed-off-by: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Denys Fedoryshchenko <denys@visp.net.lb>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
lib/dynamic_queue_limits.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index c87eb76..0fafa77 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -11,12 +11,14 @@
#include <linux/dynamic_queue_limits.h>
#define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
+#define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
/* Records completed count and recalculates the queue limit */
void dql_completed(struct dql *dql, unsigned int count)
{
unsigned int inprogress, prev_inprogress, limit;
- unsigned int ovlimit, all_prev_completed, completed;
+ unsigned int ovlimit, completed;
+ bool all_prev_completed;
/* Can't complete more than what's in queue */
BUG_ON(count > dql->num_queued - dql->num_completed);
@@ -26,7 +28,7 @@ void dql_completed(struct dql *dql, unsigned int count)
ovlimit = POSDIFF(dql->num_queued - dql->num_completed, limit);
inprogress = dql->num_queued - completed;
prev_inprogress = dql->prev_num_queued - dql->num_completed;
- all_prev_completed = POSDIFF(completed, dql->prev_num_queued);
+ all_prev_completed = AFTER_EQ(completed, dql->prev_num_queued);
if ((ovlimit && !inprogress) ||
(dql->prev_ovlimit && all_prev_completed)) {

View File

@ -0,0 +1,325 @@
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:33:09 +0000
Subject: bql: Byte queue limits
commit 114cf5802165ee93e3ab461c9c505cd94a08b800 upstream.
Networking stack support for byte queue limits, uses dynamic queue
limits library. Byte queue limits are maintained per transmit queue,
and a dql structure has been added to netdev_queue structure for this
purpose.
Configuration of bql is in the tx-<n> sysfs directory for the queue
under the byte_queue_limits directory. Configuration includes:
limit_min, bql minimum limit
limit_max, bql maximum limit
hold_time, bql slack hold time
Also under the directory are:
limit, current byte limit
inflight, current number of bytes on the queue
Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
[bwh: Adjust context in net/Kconfig for 3.2]
---
include/linux/netdevice.h | 32 ++++++++++-
net/Kconfig | 6 ++
net/core/dev.c | 3 +
net/core/net-sysfs.c | 140 ++++++++++++++++++++++++++++++++++++++++++---
4 files changed, 172 insertions(+), 9 deletions(-)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -43,6 +43,7 @@
#include <linux/rculist.h>
#include <linux/dmaengine.h>
#include <linux/workqueue.h>
+#include <linux/dynamic_queue_limits.h>
#include <linux/ethtool.h>
#include <net/net_namespace.h>
@@ -540,7 +541,6 @@
*/
struct net_device *dev;
struct Qdisc *qdisc;
- unsigned long state;
struct Qdisc *qdisc_sleeping;
#ifdef CONFIG_SYSFS
struct kobject kobj;
@@ -563,6 +563,12 @@
* (/sys/class/net/DEV/Q/trans_timeout)
*/
unsigned long trans_timeout;
+
+ unsigned long state;
+
+#ifdef CONFIG_BQL
+ struct dql dql;
+#endif
} ____cacheline_aligned_in_smp;
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -1926,6 +1932,15 @@
static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
unsigned int bytes)
{
+#ifdef CONFIG_BQL
+ dql_queued(&dev_queue->dql, bytes);
+ if (unlikely(dql_avail(&dev_queue->dql) < 0)) {
+ set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state);
+ if (unlikely(dql_avail(&dev_queue->dql) >= 0))
+ clear_bit(__QUEUE_STATE_STACK_XOFF,
+ &dev_queue->state);
+ }
+#endif
}
static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes)
@@ -1936,6 +1951,18 @@
static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
unsigned pkts, unsigned bytes)
{
+#ifdef CONFIG_BQL
+ if (likely(bytes)) {
+ dql_completed(&dev_queue->dql, bytes);
+ if (unlikely(test_bit(__QUEUE_STATE_STACK_XOFF,
+ &dev_queue->state) &&
+ dql_avail(&dev_queue->dql) >= 0)) {
+ if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF,
+ &dev_queue->state))
+ netif_schedule_queue(dev_queue);
+ }
+ }
+#endif
}
static inline void netdev_completed_queue(struct net_device *dev,
@@ -1946,6 +1973,9 @@
static inline void netdev_tx_reset_queue(struct netdev_queue *q)
{
+#ifdef CONFIG_BQL
+ dql_reset(&q->dql);
+#endif
}
static inline void netdev_reset_queue(struct net_device *dev_queue)
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,12 @@
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
default y
+config BQL
+ boolean
+ depends on SYSFS
+ select DQL
+ default y
+
config BPF_JIT
bool "enable BPF Just In Time compiler"
depends on HAVE_BPF_JIT
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5524,6 +5524,9 @@
queue->xmit_lock_owner = -1;
netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
queue->dev = dev;
+#ifdef CONFIG_BQL
+ dql_init(&queue->dql, HZ);
+#endif
}
static int netif_alloc_netdev_queues(struct net_device *dev)
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -21,6 +21,7 @@
#include <linux/wireless.h>
#include <linux/vmalloc.h>
#include <linux/export.h>
+#include <linux/jiffies.h>
#include <net/wext.h>
#include "net-sysfs.h"
@@ -845,6 +846,116 @@
static struct netdev_queue_attribute queue_trans_timeout =
__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+#ifdef CONFIG_BQL
+/*
+ * Byte queue limits sysfs structures and functions.
+ */
+static ssize_t bql_show(char *buf, unsigned int value)
+{
+ return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t bql_set(const char *buf, const size_t count,
+ unsigned int *pvalue)
+{
+ unsigned int value;
+ int err;
+
+ if (!strcmp(buf, "max") || !strcmp(buf, "max\n"))
+ value = DQL_MAX_LIMIT;
+ else {
+ err = kstrtouint(buf, 10, &value);
+ if (err < 0)
+ return err;
+ if (value > DQL_MAX_LIMIT)
+ return -EINVAL;
+ }
+
+ *pvalue = value;
+
+ return count;
+}
+
+static ssize_t bql_show_hold_time(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attr,
+ char *buf)
+{
+ struct dql *dql = &queue->dql;
+
+ return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
+}
+
+static ssize_t bql_set_hold_time(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ const char *buf, size_t len)
+{
+ struct dql *dql = &queue->dql;
+ unsigned value;
+ int err;
+
+ err = kstrtouint(buf, 10, &value);
+ if (err < 0)
+ return err;
+
+ dql->slack_hold_time = msecs_to_jiffies(value);
+
+ return len;
+}
+
+static struct netdev_queue_attribute bql_hold_time_attribute =
+ __ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time,
+ bql_set_hold_time);
+
+static ssize_t bql_show_inflight(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attr,
+ char *buf)
+{
+ struct dql *dql = &queue->dql;
+
+ return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);
+}
+
+static struct netdev_queue_attribute bql_inflight_attribute =
+ __ATTR(inflight, S_IRUGO | S_IWUSR, bql_show_inflight, NULL);
+
+#define BQL_ATTR(NAME, FIELD) \
+static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
+ struct netdev_queue_attribute *attr, \
+ char *buf) \
+{ \
+ return bql_show(buf, queue->dql.FIELD); \
+} \
+ \
+static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
+ struct netdev_queue_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ return bql_set(buf, len, &queue->dql.FIELD); \
+} \
+ \
+static struct netdev_queue_attribute bql_ ## NAME ## _attribute = \
+ __ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME, \
+ bql_set_ ## NAME);
+
+BQL_ATTR(limit, limit)
+BQL_ATTR(limit_max, max_limit)
+BQL_ATTR(limit_min, min_limit)
+
+static struct attribute *dql_attrs[] = {
+ &bql_limit_attribute.attr,
+ &bql_limit_max_attribute.attr,
+ &bql_limit_min_attribute.attr,
+ &bql_hold_time_attribute.attr,
+ &bql_inflight_attribute.attr,
+ NULL
+};
+
+static struct attribute_group dql_group = {
+ .name = "byte_queue_limits",
+ .attrs = dql_attrs,
+};
+#endif /* CONFIG_BQL */
+
#ifdef CONFIG_XPS
static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
@@ -1096,17 +1207,17 @@
NULL
};
-#ifdef CONFIG_XPS
static void netdev_queue_release(struct kobject *kobj)
{
struct netdev_queue *queue = to_netdev_queue(kobj);
+#ifdef CONFIG_XPS
xps_queue_release(queue);
+#endif
memset(kobj, 0, sizeof(*kobj));
dev_put(queue->dev);
}
-#endif /* CONFIG_XPS */
static struct kobj_type netdev_queue_ktype = {
.sysfs_ops = &netdev_queue_sysfs_ops,
@@ -1125,14 +1236,21 @@
kobj->kset = net->queues_kset;
error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
"tx-%u", index);
- if (error) {
- kobject_put(kobj);
- return error;
- }
+ if (error)
+ goto exit;
+
+#ifdef CONFIG_BQL
+ error = sysfs_create_group(kobj, &dql_group);
+ if (error)
+ goto exit;
+#endif
kobject_uevent(kobj, KOBJ_ADD);
dev_hold(queue->dev);
+ return 0;
+exit:
+ kobject_put(kobj);
return error;
}
#endif /* CONFIG_SYSFS */
@@ -1152,8 +1270,14 @@
}
}
- while (--i >= new_num)
- kobject_put(&net->_tx[i].kobj);
+ while (--i >= new_num) {
+ struct netdev_queue *queue = net->_tx + i;
+
+#ifdef CONFIG_BQL
+ sysfs_remove_group(&queue->kobj, &dql_group);
+#endif
+ kobject_put(&queue->kobj);
+ }
return error;
#else

View File

@ -0,0 +1,29 @@
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 4 Dec 2011 12:38:00 +0000
Subject: bql: fix CONFIG_XPS=n build
commit b474ae77609b725098d5a7cc8f69c1c528710d53 upstream.
netdev_queue_release() should be called even if CONFIG_XPS=n
to properly release device reference.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
net/core/net-sysfs.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 3bf72b6..9d13463 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1221,9 +1221,7 @@ static void netdev_queue_release(struct kobject *kobj)
static struct kobj_type netdev_queue_ktype = {
.sysfs_ops = &netdev_queue_sysfs_ops,
-#ifdef CONFIG_XPS
.release = netdev_queue_release,
-#endif
.default_attrs = netdev_queue_default_attrs,
};

View File

@ -0,0 +1,30 @@
From: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Date: Sat, 14 Jan 2012 07:10:21 +0000
Subject: bql: Fix inconsistency between file mode and attr method.
commit 795d9a2538b205d9329f34798ec96503a07b7919 upstream.
There is no store() method for inflight attribute in the
tx-<n>/byte_queue_limits sysfs directory.
So remove S_IWUSR bit.
Signed-off-by: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
net/core/net-sysfs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f3dbd4f..a1727cd 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -929,7 +929,7 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue,
}
static struct netdev_queue_attribute bql_inflight_attribute =
- __ATTR(inflight, S_IRUGO | S_IWUSR, bql_show_inflight, NULL);
+ __ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
#define BQL_ATTR(NAME, FIELD) \
static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \

View File

@ -0,0 +1,31 @@
From: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Date: Wed, 30 May 2012 12:24:39 +0000
Subject: bql: Fix POSDIFF() to integer overflow aware.
commit 0cfd32b736ae0c36b42697584811042726c07cba upstream.
POSDIFF() fails to take into account integer overflow case.
Signed-off-by: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Denys Fedoryshchenko <denys@visp.net.lb>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
lib/dynamic_queue_limits.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index 6ab4587..c87eb76 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -10,7 +10,7 @@
#include <linux/jiffies.h>
#include <linux/dynamic_queue_limits.h>
-#define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0)
+#define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
/* Records completed count and recalculates the queue limit */
void dql_completed(struct dql *dql, unsigned int count)

View File

@ -0,0 +1,328 @@
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:32:35 +0000
Subject: dql: Dynamic queue limits
commit 75957ba36c05b979701e9ec64b37819adc12f830 upstream.
Implementation of dynamic queue limits (dql). This is a libary which
allows a queue limit to be dynamically managed. The goal of dql is
to set the queue limit, number of objects to the queue, to be minimized
without allowing the queue to be starved.
dql would be used with a queue which has these properties:
1) Objects are queued up to some limit which can be expressed as a
count of objects.
2) Periodically a completion process executes which retires consumed
objects.
3) Starvation occurs when limit has been reached, all queued data has
actually been consumed but completion processing has not yet run,
so queuing new data is blocked.
4) Minimizing the amount of queued data is desirable.
A canonical example of such a queue would be a NIC HW transmit queue.
The queue limit is dynamic, it will increase or decrease over time
depending on the workload. The queue limit is recalculated each time
completion processing is done. Increases occur when the queue is
starved and can exponentially increase over successive intervals.
Decreases occur when more data is being maintained in the queue than
needed to prevent starvation. The number of extra objects, or "slack",
is measured over successive intervals, and to avoid hysteresis the
limit is only reduced by the miminum slack seen over a configurable
time period.
dql API provides routines to manage the queue:
- dql_init is called to intialize the dql structure
- dql_reset is called to reset dynamic values
- dql_queued called when objects are being enqueued
- dql_avail returns availability in the queue
- dql_completed is called when objects have be consumed in the queue
Configuration consists of:
- max_limit, maximum limit
- min_limit, minimum limit
- slack_hold_time, time to measure instances of slack before reducing
queue limit
Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/linux/dynamic_queue_limits.h | 97 +++++++++++++++++++++++++
lib/Kconfig | 3 +
lib/Makefile | 2 +
lib/dynamic_queue_limits.c | 133 ++++++++++++++++++++++++++++++++++
4 files changed, 235 insertions(+)
create mode 100644 include/linux/dynamic_queue_limits.h
create mode 100644 lib/dynamic_queue_limits.c
diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
new file mode 100644
index 0000000..5621547
--- /dev/null
+++ b/include/linux/dynamic_queue_limits.h
@@ -0,0 +1,97 @@
+/*
+ * Dynamic queue limits (dql) - Definitions
+ *
+ * Copyright (c) 2011, Tom Herbert <therbert@google.com>
+ *
+ * This header file contains the definitions for dynamic queue limits (dql).
+ * dql would be used in conjunction with a producer/consumer type queue
+ * (possibly a HW queue). Such a queue would have these general properties:
+ *
+ * 1) Objects are queued up to some limit specified as number of objects.
+ * 2) Periodically a completion process executes which retires consumed
+ * objects.
+ * 3) Starvation occurs when limit has been reached, all queued data has
+ * actually been consumed, but completion processing has not yet run
+ * so queuing new data is blocked.
+ * 4) Minimizing the amount of queued data is desirable.
+ *
+ * The goal of dql is to calculate the limit as the minimum number of objects
+ * needed to prevent starvation.
+ *
+ * The primary functions of dql are:
+ * dql_queued - called when objects are enqueued to record number of objects
+ * dql_avail - returns how many objects are available to be queued based
+ * on the object limit and how many objects are already enqueued
+ * dql_completed - called at completion time to indicate how many objects
+ * were retired from the queue
+ *
+ * The dql implementation does not implement any locking for the dql data
+ * structures, the higher layer should provide this. dql_queued should
+ * be serialized to prevent concurrent execution of the function; this
+ * is also true for dql_completed. However, dql_queued and dlq_completed can
+ * be executed concurrently (i.e. they can be protected by different locks).
+ */
+
+#ifndef _LINUX_DQL_H
+#define _LINUX_DQL_H
+
+#ifdef __KERNEL__
+
+struct dql {
+ /* Fields accessed in enqueue path (dql_queued) */
+ unsigned int num_queued; /* Total ever queued */
+ unsigned int adj_limit; /* limit + num_completed */
+ unsigned int last_obj_cnt; /* Count at last queuing */
+
+ /* Fields accessed only by completion path (dql_completed) */
+
+ unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */
+ unsigned int num_completed; /* Total ever completed */
+
+ unsigned int prev_ovlimit; /* Previous over limit */
+ unsigned int prev_num_queued; /* Previous queue total */
+ unsigned int prev_last_obj_cnt; /* Previous queuing cnt */
+
+ unsigned int lowest_slack; /* Lowest slack found */
+ unsigned long slack_start_time; /* Time slacks seen */
+
+ /* Configuration */
+ unsigned int max_limit; /* Max limit */
+ unsigned int min_limit; /* Minimum limit */
+ unsigned int slack_hold_time; /* Time to measure slack */
+};
+
+/* Set some static maximums */
+#define DQL_MAX_OBJECT (UINT_MAX / 16)
+#define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT)
+
+/*
+ * Record number of objects queued. Assumes that caller has already checked
+ * availability in the queue with dql_avail.
+ */
+static inline void dql_queued(struct dql *dql, unsigned int count)
+{
+ BUG_ON(count > DQL_MAX_OBJECT);
+
+ dql->num_queued += count;
+ dql->last_obj_cnt = count;
+}
+
+/* Returns how many objects can be queued, < 0 indicates over limit. */
+static inline int dql_avail(const struct dql *dql)
+{
+ return dql->adj_limit - dql->num_queued;
+}
+
+/* Record number of completed objects and recalculate the limit. */
+void dql_completed(struct dql *dql, unsigned int count);
+
+/* Reset dql state */
+void dql_reset(struct dql *dql);
+
+/* Initialize dql state */
+int dql_init(struct dql *dql, unsigned hold_time);
+
+#endif /* _KERNEL_ */
+
+#endif /* _LINUX_DQL_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 32f3e5a..63b5782 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -244,6 +244,9 @@ config CPU_RMAP
bool
depends on SMP
+config DQL
+ bool
+
#
# Netlink attribute parsing support is select'ed if needed
#
diff --git a/lib/Makefile b/lib/Makefile
index a4da283..ff00d4d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -115,6 +115,8 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
obj-$(CONFIG_CORDIC) += cordic.o
+obj-$(CONFIG_DQL) += dynamic_queue_limits.o
+
hostprogs-y := gen_crc32table
clean-files := crc32table.h
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
new file mode 100644
index 0000000..3d1bdcd
--- /dev/null
+++ b/lib/dynamic_queue_limits.c
@@ -0,0 +1,133 @@
+/*
+ * Dynamic byte queue limits. See include/linux/dynamic_queue_limits.h
+ *
+ * Copyright (c) 2011, Tom Herbert <therbert@google.com>
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/dynamic_queue_limits.h>
+
+#define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0)
+
+/* Records completed count and recalculates the queue limit */
+void dql_completed(struct dql *dql, unsigned int count)
+{
+ unsigned int inprogress, prev_inprogress, limit;
+ unsigned int ovlimit, all_prev_completed, completed;
+
+ /* Can't complete more than what's in queue */
+ BUG_ON(count > dql->num_queued - dql->num_completed);
+
+ completed = dql->num_completed + count;
+ limit = dql->limit;
+ ovlimit = POSDIFF(dql->num_queued - dql->num_completed, limit);
+ inprogress = dql->num_queued - completed;
+ prev_inprogress = dql->prev_num_queued - dql->num_completed;
+ all_prev_completed = POSDIFF(completed, dql->prev_num_queued);
+
+ if ((ovlimit && !inprogress) ||
+ (dql->prev_ovlimit && all_prev_completed)) {
+ /*
+ * Queue considered starved if:
+ * - The queue was over-limit in the last interval,
+ * and there is no more data in the queue.
+ * OR
+ * - The queue was over-limit in the previous interval and
+ * when enqueuing it was possible that all queued data
+ * had been consumed. This covers the case when queue
+ * may have becomes starved between completion processing
+ * running and next time enqueue was scheduled.
+ *
+ * When queue is starved increase the limit by the amount
+ * of bytes both sent and completed in the last interval,
+ * plus any previous over-limit.
+ */
+ limit += POSDIFF(completed, dql->prev_num_queued) +
+ dql->prev_ovlimit;
+ dql->slack_start_time = jiffies;
+ dql->lowest_slack = UINT_MAX;
+ } else if (inprogress && prev_inprogress && !all_prev_completed) {
+ /*
+ * Queue was not starved, check if the limit can be decreased.
+ * A decrease is only considered if the queue has been busy in
+ * the whole interval (the check above).
+ *
+ * If there is slack, the amount of execess data queued above
+ * the the amount needed to prevent starvation, the queue limit
+ * can be decreased. To avoid hysteresis we consider the
+ * minimum amount of slack found over several iterations of the
+ * completion routine.
+ */
+ unsigned int slack, slack_last_objs;
+
+ /*
+ * Slack is the maximum of
+ * - The queue limit plus previous over-limit minus twice
+ * the number of objects completed. Note that two times
+ * number of completed bytes is a basis for an upper bound
+ * of the limit.
+ * - Portion of objects in the last queuing operation that
+ * was not part of non-zero previous over-limit. That is
+ * "round down" by non-overlimit portion of the last
+ * queueing operation.
+ */
+ slack = POSDIFF(limit + dql->prev_ovlimit,
+ 2 * (completed - dql->num_completed));
+ slack_last_objs = dql->prev_ovlimit ?
+ POSDIFF(dql->prev_last_obj_cnt, dql->prev_ovlimit) : 0;
+
+ slack = max(slack, slack_last_objs);
+
+ if (slack < dql->lowest_slack)
+ dql->lowest_slack = slack;
+
+ if (time_after(jiffies,
+ dql->slack_start_time + dql->slack_hold_time)) {
+ limit = POSDIFF(limit, dql->lowest_slack);
+ dql->slack_start_time = jiffies;
+ dql->lowest_slack = UINT_MAX;
+ }
+ }
+
+ /* Enforce bounds on limit */
+ limit = clamp(limit, dql->min_limit, dql->max_limit);
+
+ if (limit != dql->limit) {
+ dql->limit = limit;
+ ovlimit = 0;
+ }
+
+ dql->adj_limit = limit + completed;
+ dql->prev_ovlimit = ovlimit;
+ dql->prev_last_obj_cnt = dql->last_obj_cnt;
+ dql->num_completed = completed;
+ dql->prev_num_queued = dql->num_queued;
+}
+EXPORT_SYMBOL(dql_completed);
+
+void dql_reset(struct dql *dql)
+{
+ /* Reset all dynamic values */
+ dql->limit = 0;
+ dql->num_queued = 0;
+ dql->num_completed = 0;
+ dql->last_obj_cnt = 0;
+ dql->prev_num_queued = 0;
+ dql->prev_last_obj_cnt = 0;
+ dql->prev_ovlimit = 0;
+ dql->lowest_slack = UINT_MAX;
+ dql->slack_start_time = jiffies;
+}
+EXPORT_SYMBOL(dql_reset);
+
+int dql_init(struct dql *dql, unsigned hold_time)
+{
+ dql->max_limit = DQL_MAX_LIMIT;
+ dql->min_limit = 0;
+ dql->slack_hold_time = hold_time;
+ dql_reset(dql);
+ return 0;
+}
+EXPORT_SYMBOL(dql_init);

View File

@ -0,0 +1,28 @@
From: Tom Herbert <therbert@google.com>
Date: Sun, 11 Mar 2012 19:59:43 -0700
Subject: dql: Fix undefined jiffies
commit 930c514f6960454a5082f7e533bd2867df6fe9cb upstream.
In some configurations, jiffies may be undefined in
lib/dynamic_queue_limits.c. Adding include of jiffies.h to avoid
this.
Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
lib/dynamic_queue_limits.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index 3d1bdcd..6ab4587 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -7,6 +7,7 @@
#include <linux/types.h>
#include <linux/ctype.h>
#include <linux/kernel.h>
+#include <linux/jiffies.h>
#include <linux/dynamic_queue_limits.h>
#define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0)

View File

@ -0,0 +1,63 @@
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:33:16 +0000
Subject: e1000e: Support for byte queue limits
commit 3f0cfa3bc11e7f00c9994e0f469cbc0e7da7b00c upstream.
Changes to e1000e to use byte queue limits.
Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/intel/e1000e/netdev.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index a5bd7a3..c6e9763 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1079,6 +1079,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
unsigned int i, eop;
unsigned int count = 0;
unsigned int total_tx_bytes = 0, total_tx_packets = 0;
+ unsigned int bytes_compl = 0, pkts_compl = 0;
i = tx_ring->next_to_clean;
eop = tx_ring->buffer_info[i].next_to_watch;
@@ -1096,6 +1097,10 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
if (cleaned) {
total_tx_packets += buffer_info->segs;
total_tx_bytes += buffer_info->bytecount;
+ if (buffer_info->skb) {
+ bytes_compl += buffer_info->skb->len;
+ pkts_compl++;
+ }
}
e1000_put_txbuf(adapter, buffer_info);
@@ -1114,6 +1119,8 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
tx_ring->next_to_clean = i;
+ netdev_completed_queue(netdev, pkts_compl, bytes_compl);
+
#define TX_WAKE_THRESHOLD 32
if (count && netif_carrier_ok(netdev) &&
e1000_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD) {
@@ -2240,6 +2247,7 @@ static void e1000_clean_tx_ring(struct e1000_adapter *adapter)
e1000_put_txbuf(adapter, buffer_info);
}
+ netdev_reset_queue(adapter->netdev);
size = sizeof(struct e1000_buffer) * tx_ring->count;
memset(tx_ring->buffer_info, 0, size);
@@ -5027,6 +5035,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
/* if count is 0 then mapping error has occurred */
count = e1000_tx_map(adapter, skb, first, max_per_txd, nr_frags, mss);
if (count) {
+ netdev_sent_queue(netdev, skb->len);
e1000_tx_queue(adapter, tx_flags, count);
/* Make sure there is space in the ring for the next send. */
e1000_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 2);

View File

@ -0,0 +1,39 @@
From: Igor Maravic <igorm@etf.rs>
Date: Thu, 1 Dec 2011 23:48:20 +0000
Subject: forcedeath: Fix bql support for forcedeath
commit 7505afe28c16a8d386624930a018d0052c75d687 upstream.
Moved netdev_completed_queue() out of while loop in function nv_tx_done_optimized().
Because this function was in while loop,
BUG_ON(count > dql->num_queued - dql->num_completed)
was hit in dql_completed().
Signed-off-by: Igor Maravic <igorm@etf.rs>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/nvidia/forcedeth.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 5245dac..4c4e7f4 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -2561,13 +2561,14 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
nv_tx_flip_ownership(dev);
}
- netdev_completed_queue(np->dev, tx_work, bytes_cleaned);
-
if (unlikely(np->get_tx.ex++ == np->last_tx.ex))
np->get_tx.ex = np->first_tx.ex;
if (unlikely(np->get_tx_ctx++ == np->last_tx_ctx))
np->get_tx_ctx = np->first_tx_ctx;
}
+
+ netdev_completed_queue(np->dev, tx_work, bytes_cleaned);
+
if (unlikely((np->tx_stop == 1) && (np->get_tx.ex != orig_get_tx))) {
np->tx_stop = 0;
netif_wake_queue(dev);

View File

@ -0,0 +1,105 @@
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:33:23 +0000
Subject: forcedeth: Support for byte queue limits
commit b8bfca9439d4ed03446bc9a3fdaef81b364d32dd upstream.
Changes to forcedeth to use byte queue limits.
Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/nvidia/forcedeth.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -1849,6 +1849,7 @@
np->last_tx.ex = &np->tx_ring.ex[np->tx_ring_size-1];
np->get_tx_ctx = np->put_tx_ctx = np->first_tx_ctx = np->tx_skb;
np->last_tx_ctx = &np->tx_skb[np->tx_ring_size-1];
+ netdev_reset_queue(np->dev);
np->tx_pkts_in_progress = 0;
np->tx_change_owner = NULL;
np->tx_end_flip = NULL;
@@ -2194,6 +2195,9 @@
/* set tx flags */
start_tx->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
+
+ netdev_sent_queue(np->dev, skb->len);
+
np->put_tx.orig = put_tx;
spin_unlock_irqrestore(&np->lock, flags);
@@ -2338,6 +2342,9 @@
/* set tx flags */
start_tx->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
+
+ netdev_sent_queue(np->dev, skb->len);
+
np->put_tx.ex = put_tx;
spin_unlock_irqrestore(&np->lock, flags);
@@ -2375,6 +2382,7 @@
u32 flags;
int tx_work = 0;
struct ring_desc *orig_get_tx = np->get_tx.orig;
+ unsigned int bytes_compl = 0;
while ((np->get_tx.orig != np->put_tx.orig) &&
!((flags = le32_to_cpu(np->get_tx.orig->flaglen)) & NV_TX_VALID) &&
@@ -2391,6 +2399,7 @@
dev->stats.tx_packets++;
dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
}
+ bytes_compl += np->get_tx_ctx->skb->len;
dev_kfree_skb_any(np->get_tx_ctx->skb);
np->get_tx_ctx->skb = NULL;
tx_work++;
@@ -2404,6 +2413,7 @@
dev->stats.tx_packets++;
dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
}
+ bytes_compl += np->get_tx_ctx->skb->len;
dev_kfree_skb_any(np->get_tx_ctx->skb);
np->get_tx_ctx->skb = NULL;
tx_work++;
@@ -2414,6 +2424,9 @@
if (unlikely(np->get_tx_ctx++ == np->last_tx_ctx))
np->get_tx_ctx = np->first_tx_ctx;
}
+
+ netdev_completed_queue(np->dev, tx_work, bytes_compl);
+
if (unlikely((np->tx_stop == 1) && (np->get_tx.orig != orig_get_tx))) {
np->tx_stop = 0;
netif_wake_queue(dev);
@@ -2427,6 +2440,7 @@
u32 flags;
int tx_work = 0;
struct ring_desc_ex *orig_get_tx = np->get_tx.ex;
+ unsigned long bytes_cleaned = 0;
while ((np->get_tx.ex != np->put_tx.ex) &&
!((flags = le32_to_cpu(np->get_tx.ex->flaglen)) & NV_TX2_VALID) &&
@@ -2447,6 +2461,7 @@
dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
}
+ bytes_cleaned += np->get_tx_ctx->skb->len;
dev_kfree_skb_any(np->get_tx_ctx->skb);
np->get_tx_ctx->skb = NULL;
tx_work++;
@@ -2454,6 +2469,9 @@
if (np->tx_limit)
nv_tx_flip_ownership(dev);
}
+
+ netdev_completed_queue(np->dev, tx_work, bytes_cleaned);
+
if (unlikely(np->get_tx.ex++ == np->last_tx.ex))
np->get_tx.ex = np->first_tx.ex;
if (unlikely(np->get_tx_ctx++ == np->last_tx_ctx))

View File

@ -0,0 +1,65 @@
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 4 Jan 2012 20:23:36 +0000
Subject: igb: Add support for byte queue limits.
commit bdbc063129e811264cd6c311d8c2d9b95de01231 upstream.
This adds support for byte queue limits (BQL)
Since this driver collects bytes count in 'bytecount' field, use it also
in igb_tx_map()
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/intel/igb/igb.h | 5 +++++
drivers/net/ethernet/intel/igb/igb_main.c | 5 +++++
2 files changed, 10 insertions(+)
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index c69feeb..3d12e67 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -447,4 +447,9 @@ static inline s32 igb_get_phy_info(struct e1000_hw *hw)
return 0;
}
+static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring)
+{
+ return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
+}
+
#endif /* _IGB_H_ */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 89d576c..dcc68cc 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3201,6 +3201,7 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring)
buffer_info = &tx_ring->tx_buffer_info[i];
igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
}
+ netdev_tx_reset_queue(txring_txq(tx_ring));
size = sizeof(struct igb_tx_buffer) * tx_ring->count;
memset(tx_ring->tx_buffer_info, 0, size);
@@ -4238,6 +4239,8 @@ static void igb_tx_map(struct igb_ring *tx_ring,
frag++;
}
+ netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
/* write last descriptor with RS and EOP bits */
cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
tx_desc->read.cmd_type_len = cmd_type;
@@ -5777,6 +5780,8 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
}
}
+ netdev_tx_completed_queue(txring_txq(tx_ring),
+ total_packets, total_bytes);
i += tx_ring->count;
tx_ring->next_to_clean = i;
u64_stats_update_begin(&tx_ring->tx_syncp);

View File

@ -0,0 +1,56 @@
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 19 Jan 2012 18:31:34 +0000
Subject: igb: fix ethtool offline test
commit 51a76c30929cc8b7d541f51e634f146e54ea9bb7 upstream.
A bug was introduced with the following patch:
Commmit bdbc063129e811264cd6c311d8c2d9b95de01231
Author: Eric Dumazet <eric.dumazet@gmail.com>
igb: Add support for byte queue limits.
The ethtool offline tests will cause a perpetual link flap, this
is because the tests also need to account for byte queue limits (BQL).
CC: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
---
drivers/net/ethernet/intel/igb/igb_ethtool.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index aa399a8..e10821a 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1577,7 +1577,9 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
union e1000_adv_rx_desc *rx_desc;
struct igb_rx_buffer *rx_buffer_info;
struct igb_tx_buffer *tx_buffer_info;
+ struct netdev_queue *txq;
u16 rx_ntc, tx_ntc, count = 0;
+ unsigned int total_bytes = 0, total_packets = 0;
/* initialize next to clean and descriptor values */
rx_ntc = rx_ring->next_to_clean;
@@ -1601,6 +1603,8 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
/* unmap buffer on tx side */
tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc];
+ total_bytes += tx_buffer_info->bytecount;
+ total_packets += tx_buffer_info->gso_segs;
igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
/* increment rx/tx next to clean counters */
@@ -1615,6 +1619,9 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
}
+ txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
+ netdev_tx_completed_queue(txq, total_packets, total_bytes);
+
/* re-map buffers to ring, store next to clean values */
igb_alloc_rx_buffers(rx_ring, count);
rx_ring->next_to_clean = rx_ntc;

View File

@ -0,0 +1,79 @@
From: John Fastabend <john.r.fastabend@intel.com>
Date: Mon, 23 Apr 2012 12:22:39 +0000
Subject: igb, ixgbe: netdev_tx_reset_queue incorrectly called from tx init
path
commit dad8a3b3eaa0c2ca25368a0b9f65edca84e27a40 upstream.
igb and ixgbe incorrectly call netdev_tx_reset_queue() from
i{gb|xgbe}_clean_tx_ring() this sort of works in most cases except
when the number of real tx queues changes. When the number of real
tx queues changes netdev_tx_reset_queue() only gets called on the
new number of queues so when we reduce the number of queues we risk
triggering the watchdog timer and repeated device resets.
So this is not only a cosmetic issue but causes real bugs. For
example enabling/disabling DCB or FCoE in ixgbe will trigger this.
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: John Bishop <johnx.bishop@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/igb/igb_main.c | 4 ++--
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 2 ++
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++--
3 files changed, 6 insertions(+), 4 deletions(-)
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2711,8 +2711,6 @@
txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
wr32(E1000_TXDCTL(reg_idx), txdctl);
-
- netdev_tx_reset_queue(txring_txq(ring));
}
/**
@@ -3206,6 +3204,8 @@
igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
}
+ netdev_tx_reset_queue(txring_txq(tx_ring));
+
size = sizeof(struct igb_tx_buffer) * tx_ring->count;
memset(tx_ring->tx_buffer_info, 0, size);
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1764,6 +1764,8 @@
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
}
+ netdev_tx_reset_queue(txring_txq(tx_ring));
+
/* re-map buffers to ring, store next to clean values */
ixgbe_alloc_rx_buffers(rx_ring, count);
rx_ring->next_to_clean = rx_ntc;
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2411,8 +2411,6 @@
/* enable queue */
IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), txdctl);
- netdev_tx_reset_queue(txring_txq(ring));
-
/* TXDCTL.EN will return 0 on 82598 if link is down, so skip it */
if (hw->mac.type == ixgbe_mac_82598EB &&
!(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
@@ -3930,6 +3928,8 @@
ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
}
+ netdev_tx_reset_queue(txring_txq(tx_ring));
+
size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count;
memset(tx_ring->tx_buffer_info, 0, size);

View File

@ -0,0 +1,59 @@
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Tue, 7 Feb 2012 08:14:33 +0000
Subject: ixgbe: add support for byte queue limits
commit b2d96e0ac07cf4929c6b0eb13121672048368117 upstream.
This adds support for byte queue limits (BQL).
Based on patch from Eric Dumazet for igb.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe.h | 5 +++++
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 +++++++
2 files changed, 12 insertions(+)
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -629,4 +629,9 @@
extern int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type);
#endif /* IXGBE_FCOE */
+static inline struct netdev_queue *txring_txq(const struct ixgbe_ring *ring)
+{
+ return netdev_get_tx_queue(ring->netdev, ring->queue_index);
+}
+
#endif /* _IXGBE_H_ */
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -832,6 +832,9 @@
return true;
}
+ netdev_tx_completed_queue(txring_txq(tx_ring),
+ total_packets, total_bytes);
+
#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
(ixgbe_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
@@ -2408,6 +2411,8 @@
/* enable queue */
IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), txdctl);
+ netdev_tx_reset_queue(txring_txq(ring));
+
/* TXDCTL.EN will return 0 on 82598 if link is down, so skip it */
if (hw->mac.type == ixgbe_mac_82598EB &&
!(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
@@ -6581,6 +6586,8 @@
tx_buffer_info->gso_segs = gso_segs;
tx_buffer_info->skb = skb;
+ netdev_tx_sent_queue(txring_txq(tx_ring), tx_buffer_info->bytecount);
+
/* set the timestamp */
first->time_stamp = jiffies;

View File

@ -0,0 +1,56 @@
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:32:52 +0000
Subject: net: Add netdev interfaces for recording sends/comp
commit c5d67bd78c5dc540e3461c36fb3d389fbe0de4c3 upstream.
Add interfaces for drivers to call for recording number of packets and
bytes at send time and transmit completion. Also, added a function to
"reset" a queue. These will be used by Byte Queue Limits.
Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/linux/netdevice.h | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d19f932..9b24cc7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1859,6 +1859,34 @@ static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_qu
return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN;
}
+static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
+ unsigned int bytes)
+{
+}
+
+static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes)
+{
+ netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes);
+}
+
+static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
+ unsigned pkts, unsigned bytes)
+{
+}
+
+static inline void netdev_completed_queue(struct net_device *dev,
+ unsigned pkts, unsigned bytes)
+{
+ netdev_tx_completed_queue(netdev_get_tx_queue(dev, 0), pkts, bytes);
+}
+
+static inline void netdev_tx_reset_queue(struct netdev_queue *q)
+{
+}
+
+static inline void netdev_reset_queue(struct net_device *dev_queue)
+{
+ netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0));
}
/**

View File

@ -0,0 +1,279 @@
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:32:44 +0000
Subject: net: Add queue state xoff flag for stack
commit 7346649826382b769cfadf4a2fe8a84d060c55e9 upstream.
Create separate queue state flags so that either the stack or drivers
can turn on XOFF. Added a set of functions used in the stack to determine
if a queue is really stopped (either by stack or driver)
Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/linux/netdevice.h | 41 ++++++++++++++++++++++++++++++-----------
net/core/dev.c | 4 ++--
net/core/netpoll.c | 4 ++--
net/core/pktgen.c | 2 +-
net/sched/sch_generic.c | 8 ++++----
net/sched/sch_multiq.c | 6 ++++--
net/sched/sch_teql.c | 6 +++---
7 files changed, 46 insertions(+), 25 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ac9a4b9..d19f932 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -517,11 +517,23 @@ static inline void napi_synchronize(const struct napi_struct *n)
#endif
enum netdev_queue_state_t {
- __QUEUE_STATE_XOFF,
+ __QUEUE_STATE_DRV_XOFF,
+ __QUEUE_STATE_STACK_XOFF,
__QUEUE_STATE_FROZEN,
-#define QUEUE_STATE_XOFF_OR_FROZEN ((1 << __QUEUE_STATE_XOFF) | \
- (1 << __QUEUE_STATE_FROZEN))
+#define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF) | \
+ (1 << __QUEUE_STATE_STACK_XOFF))
+#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \
+ (1 << __QUEUE_STATE_FROZEN))
};
+/*
+ * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue. The
+ * netif_tx_* functions below are used to manipulate this flag. The
+ * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit
+ * queue independently. The netif_xmit_*stopped functions below are called
+ * to check if the queue has been stopped by the driver or stack (either
+ * of the XOFF bits are set in the state). Drivers should not need to call
+ * netif_xmit*stopped functions, they should only be using netif_tx_*.
+ */
struct netdev_queue {
/*
@@ -1718,7 +1730,7 @@ extern void __netif_schedule(struct Qdisc *q);
static inline void netif_schedule_queue(struct netdev_queue *txq)
{
- if (!test_bit(__QUEUE_STATE_XOFF, &txq->state))
+ if (!(txq->state & QUEUE_STATE_ANY_XOFF))
__netif_schedule(txq->qdisc);
}
@@ -1732,7 +1744,7 @@ static inline void netif_tx_schedule_all(struct net_device *dev)
static inline void netif_tx_start_queue(struct netdev_queue *dev_queue)
{
- clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+ clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
}
/**
@@ -1764,7 +1776,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
return;
}
#endif
- if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state))
+ if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state))
__netif_schedule(dev_queue->qdisc);
}
@@ -1796,7 +1808,7 @@ static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
pr_info("netif_stop_queue() cannot be called before register_netdev()\n");
return;
}
- set_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+ set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
}
/**
@@ -1823,7 +1835,7 @@ static inline void netif_tx_stop_all_queues(struct net_device *dev)
static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
{
- return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+ return test_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
}
/**
@@ -1837,9 +1849,16 @@ static inline int netif_queue_stopped(const struct net_device *dev)
return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
}
-static inline int netif_tx_queue_frozen_or_stopped(const struct netdev_queue *dev_queue)
+static inline int netif_xmit_stopped(const struct netdev_queue *dev_queue)
{
- return dev_queue->state & QUEUE_STATE_XOFF_OR_FROZEN;
+ return dev_queue->state & QUEUE_STATE_ANY_XOFF;
+}
+
+static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue)
+{
+ return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN;
+}
+
}
/**
@@ -1926,7 +1945,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
if (netpoll_trap())
return;
#endif
- if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state))
+ if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state))
__netif_schedule(txq->qdisc);
}
diff --git a/net/core/dev.c b/net/core/dev.c
index c7ef6c5..cb8f753 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2270,7 +2270,7 @@ gso:
return rc;
}
txq_trans_update(txq);
- if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
+ if (unlikely(netif_xmit_stopped(txq) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
@@ -2558,7 +2558,7 @@ int dev_queue_xmit(struct sk_buff *skb)
HARD_TX_LOCK(dev, txq, cpu);
- if (!netif_tx_queue_stopped(txq)) {
+ if (!netif_xmit_stopped(txq)) {
__this_cpu_inc(xmit_recursion);
rc = dev_hard_start_xmit(skb, dev, txq);
__this_cpu_dec(xmit_recursion);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 1a7d8e2..0d38808 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -76,7 +76,7 @@ static void queue_process(struct work_struct *work)
local_irq_save(flags);
__netif_tx_lock(txq, smp_processor_id());
- if (netif_tx_queue_frozen_or_stopped(txq) ||
+ if (netif_xmit_frozen_or_stopped(txq) ||
ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb);
__netif_tx_unlock(txq);
@@ -317,7 +317,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
if (__netif_tx_trylock(txq)) {
- if (!netif_tx_queue_stopped(txq)) {
+ if (!netif_xmit_stopped(txq)) {
status = ops->ndo_start_xmit(skb, dev);
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index aa53a35..449fe0f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3342,7 +3342,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
__netif_tx_lock_bh(txq);
- if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) {
+ if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
ret = NETDEV_TX_BUSY;
pkt_dev->last_ok = 0;
goto unlock;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 79ac145..67fc573 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -60,7 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
/* check the reason of requeuing without tx lock first */
txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
- if (!netif_tx_queue_frozen_or_stopped(txq)) {
+ if (!netif_xmit_frozen_or_stopped(txq)) {
q->gso_skb = NULL;
q->q.qlen--;
} else
@@ -121,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
spin_unlock(root_lock);
HARD_TX_LOCK(dev, txq, smp_processor_id());
- if (!netif_tx_queue_frozen_or_stopped(txq))
+ if (!netif_xmit_frozen_or_stopped(txq))
ret = dev_hard_start_xmit(skb, dev, txq);
HARD_TX_UNLOCK(dev, txq);
@@ -143,7 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
ret = dev_requeue_skb(skb, q);
}
- if (ret && netif_tx_queue_frozen_or_stopped(txq))
+ if (ret && netif_xmit_frozen_or_stopped(txq))
ret = 0;
return ret;
@@ -242,7 +242,7 @@ static void dev_watchdog(unsigned long arg)
* old device drivers set dev->trans_start
*/
trans_start = txq->trans_start ? : dev->trans_start;
- if (netif_tx_queue_stopped(txq) &&
+ if (netif_xmit_stopped(txq) &&
time_after(jiffies, (trans_start +
dev->watchdog_timeo))) {
some_queue_timedout = 1;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index edc1950..49131d7 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -107,7 +107,8 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
/* Check that target subqueue is available before
* pulling an skb to avoid head-of-line blocking.
*/
- if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+ if (!netif_xmit_stopped(
+ netdev_get_tx_queue(qdisc_dev(sch), q->curband))) {
qdisc = q->queues[q->curband];
skb = qdisc->dequeue(qdisc);
if (skb) {
@@ -138,7 +139,8 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch)
/* Check that target subqueue is available before
* pulling an skb to avoid head-of-line blocking.
*/
- if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
+ if (!netif_xmit_stopped(
+ netdev_get_tx_queue(qdisc_dev(sch), curband))) {
qdisc = q->queues[curband];
skb = qdisc->ops->peek(qdisc);
if (skb)
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index a3b7120..283bfe3 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -301,7 +301,7 @@ restart:
if (slave_txq->qdisc_sleeping != q)
continue;
- if (__netif_subqueue_stopped(slave, subq) ||
+ if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
!netif_running(slave)) {
busy = 1;
continue;
@@ -312,7 +312,7 @@ restart:
if (__netif_tx_trylock(slave_txq)) {
unsigned int length = qdisc_pkt_len(skb);
- if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
+ if (!netif_xmit_frozen_or_stopped(slave_txq) &&
slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
txq_trans_update(slave_txq);
__netif_tx_unlock(slave_txq);
@@ -324,7 +324,7 @@ restart:
}
__netif_tx_unlock(slave_txq);
}
- if (netif_queue_stopped(dev))
+ if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
busy = 1;
break;
case 1:

View File

@ -0,0 +1,58 @@
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Tue, 7 Feb 2012 02:29:01 +0000
Subject: net: Fix issue with netdev_tx_reset_queue not resetting queue from
XOFF state
commit 5c4903549c05bbb373479e0ce2992573c120654a upstream.
We are seeing dev_watchdog hangs on several drivers. I suspect this is due
to the __QUEUE_STATE_STACK_XOFF bit being set prior to a reset for link
change, and then not being cleared by netdev_tx_reset_queue. This change
corrects that.
In addition we were seeing dev_watchdog hangs on igb after running the
ethtool tests. We found this to be due to the fact that the ethtool test
runs the same logic as ndo_start_xmit, but we were never clearing the XOFF
flag since the loopback test in ethtool does not do byte queue accounting.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Stephen Ko <stephen.s.ko@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/igb/igb_main.c | 3 ++-
include/linux/netdevice.h | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index fda8247..e96cef8 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2752,6 +2752,8 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
wr32(E1000_TXDCTL(reg_idx), txdctl);
+
+ netdev_tx_reset_queue(txring_txq(ring));
}
/**
@@ -3244,7 +3246,6 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring)
buffer_info = &tx_ring->tx_buffer_info[i];
igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
}
- netdev_tx_reset_queue(txring_txq(tx_ring));
size = sizeof(struct igb_tx_buffer) * tx_ring->count;
memset(tx_ring->tx_buffer_info, 0, size);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b195a34..4bf314f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1939,6 +1939,7 @@ static inline void netdev_completed_queue(struct net_device *dev,
static inline void netdev_tx_reset_queue(struct netdev_queue *q)
{
#ifdef CONFIG_BQL
+ clear_bit(__QUEUE_STATE_STACK_XOFF, &q->state);
dql_reset(&q->dql);
#endif
}

View File

@ -0,0 +1,189 @@
From: david decotigny <david.decotigny@google.com>
Date: Wed, 16 Nov 2011 12:15:10 +0000
Subject: net: new counter for tx_timeout errors in sysfs
commit ccf5ff69fbbd8d877377f5786369cf5aa78a15fc upstream.
This adds the /sys/class/net/DEV/queues/Q/tx_timeout attribute
containing the total number of timeout events on the given queue. It
is always available with CONFIG_SYSFS, independently of
CONFIG_RPS/XPS.
Credits to Stephen Hemminger for a preliminary version of this patch.
Tested:
without CONFIG_SYSFS (compilation only)
with sysfs and without CONFIG_RPS & CONFIG_XPS
with sysfs and without CONFIG_RPS
with sysfs and without CONFIG_XPS
with defaults
Signed-off-by: David Decotigny <david.decotigny@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/linux/netdevice.h | 12 ++++++++++--
net/core/net-sysfs.c | 37 +++++++++++++++++++++++++++++++------
net/sched/sch_generic.c | 1 +
3 files changed, 42 insertions(+), 8 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 31da3bb..4d5698a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -532,7 +532,7 @@ struct netdev_queue {
struct Qdisc *qdisc;
unsigned long state;
struct Qdisc *qdisc_sleeping;
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
struct kobject kobj;
#endif
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
@@ -547,6 +547,12 @@ struct netdev_queue {
* please use this field instead of dev->trans_start
*/
unsigned long trans_start;
+
+ /*
+ * Number of TX timeouts for this queue
+ * (/sys/class/net/DEV/Q/trans_timeout)
+ */
+ unsigned long trans_timeout;
} ____cacheline_aligned_in_smp;
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -1109,9 +1115,11 @@ struct net_device {
unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
struct kset *queues_kset;
+#endif
+#ifdef CONFIG_RPS
struct netdev_rx_queue *_rx;
/* Number of RX queues allocated at register_netdev() time */
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a64382f..602b141 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -780,7 +780,7 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
#endif
}
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SYSFS
/*
* netdev_queue sysfs structures and functions.
*/
@@ -826,6 +826,23 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
.store = netdev_queue_attr_store,
};
+static ssize_t show_trans_timeout(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ char *buf)
+{
+ unsigned long trans_timeout;
+
+ spin_lock_irq(&queue->_xmit_lock);
+ trans_timeout = queue->trans_timeout;
+ spin_unlock_irq(&queue->_xmit_lock);
+
+ return sprintf(buf, "%lu", trans_timeout);
+}
+
+static struct netdev_queue_attribute queue_trans_timeout =
+ __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+
+#ifdef CONFIG_XPS
static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
struct net_device *dev = queue->dev;
@@ -1020,12 +1037,17 @@ error:
static struct netdev_queue_attribute xps_cpus_attribute =
__ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+#endif /* CONFIG_XPS */
static struct attribute *netdev_queue_default_attrs[] = {
+ &queue_trans_timeout.attr,
+#ifdef CONFIG_XPS
&xps_cpus_attribute.attr,
+#endif
NULL
};
+#ifdef CONFIG_XPS
static void netdev_queue_release(struct kobject *kobj)
{
struct netdev_queue *queue = to_netdev_queue(kobj);
@@ -1076,10 +1098,13 @@ static void netdev_queue_release(struct kobject *kobj)
memset(kobj, 0, sizeof(*kobj));
dev_put(queue->dev);
}
+#endif /* CONFIG_XPS */
static struct kobj_type netdev_queue_ktype = {
.sysfs_ops = &netdev_queue_sysfs_ops,
+#ifdef CONFIG_XPS
.release = netdev_queue_release,
+#endif
.default_attrs = netdev_queue_default_attrs,
};
@@ -1102,12 +1127,12 @@ static int netdev_queue_add_kobject(struct net_device *net, int index)
return error;
}
-#endif /* CONFIG_XPS */
+#endif /* CONFIG_SYSFS */
int
netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
{
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SYSFS
int i;
int error = 0;
@@ -1125,14 +1150,14 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
return error;
#else
return 0;
-#endif
+#endif /* CONFIG_SYSFS */
}
static int register_queue_kobjects(struct net_device *net)
{
int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
net->queues_kset = kset_create_and_add("queues",
NULL, &net->dev.kobj);
if (!net->queues_kset)
@@ -1173,7 +1198,7 @@ static void remove_queue_kobjects(struct net_device *net)
net_rx_queue_update_kobjects(net, real_rx, 0);
netdev_queue_update_kobjects(net, real_tx, 0);
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
kset_unregister(net->queues_kset);
#endif
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 69fca27..79ac145 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -246,6 +246,7 @@ static void dev_watchdog(unsigned long arg)
time_after(jiffies, (trans_start +
dev->watchdog_timeo))) {
some_queue_timedout = 1;
+ txq->trans_timeout++;
break;
}
}

View File

@ -0,0 +1,85 @@
From: Igor Maravic <igorm@etf.rs>
Date: Mon, 5 Mar 2012 00:01:25 +0100
Subject: r8169: add byte queue limit support.
commit 036dafa28da1e2565a8529de2ae663c37b7a0060 upstream.
Nothing fancy:
- sent bytes count is notified in the start_xmit path right before
updating the owner bit in the hardware Tx descriptor (E. Dumazet)
- avoid useless tp->dev dereferencing in start_xmit (E. Dumazet)
Use of netdev_reset_queue is favored over proper accounting in
rtl8169_tx_clear_range since the latter would need more work for the
same result (nb: said accounting degenerates to nothing in xmit_frags).
Signed-off-by: Igor Maravic <igorm@etf.rs>
Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
[bwh: Backported to 3.2:
- Adjust context
- Don't use 64-bit stats]
---
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -5340,6 +5340,7 @@
{
rtl8169_tx_clear_range(tp, tp->dirty_tx, NUM_TX_DESC);
tp->cur_tx = tp->dirty_tx = 0;
+ netdev_reset_queue(tp->dev);
}
static void rtl8169_schedule_work(struct net_device *dev, work_func_t task)
@@ -5552,6 +5553,8 @@
txd->opts2 = cpu_to_le32(opts[1]);
+ netdev_sent_queue(dev, skb->len);
+
wmb();
/* Anti gcc 2.95.3 bugware (sic) */
@@ -5644,11 +5647,17 @@
rtl8169_schedule_work(dev, rtl8169_reinit_task);
}
+struct rtl_txc {
+ int packets;
+ int bytes;
+};
+
static void rtl8169_tx_interrupt(struct net_device *dev,
struct rtl8169_private *tp,
void __iomem *ioaddr)
{
unsigned int dirty_tx, tx_left;
+ struct rtl_txc txc = { 0, 0 };
dirty_tx = tp->dirty_tx;
smp_rmb();
@@ -5667,15 +5676,22 @@
rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
tp->TxDescArray + entry);
if (status & LastFrag) {
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += tx_skb->skb->len;
- dev_kfree_skb(tx_skb->skb);
+ struct sk_buff *skb = tx_skb->skb;
+
+ txc.packets++;
+ txc.bytes += skb->len;
+ dev_kfree_skb(skb);
tx_skb->skb = NULL;
}
dirty_tx++;
tx_left--;
}
+ dev->stats.tx_packets += txc.packets;
+ dev->stats.tx_bytes += txc.bytes;
+
+ netdev_completed_queue(dev, txc.packets, txc.bytes);
+
if (tp->dirty_tx != dirty_tx) {
tp->dirty_tx = dirty_tx;
/* Sync with rtl8169_start_xmit:

View File

@ -0,0 +1,33 @@
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 30 Nov 2011 17:12:27 -0500
Subject: sfc: fix race in efx_enqueue_skb_tso()
commit 449fa023bca5b53bd924d91a27ffd34807fdeb80 upstream.
As soon as skb is pushed to hardware, it can be completed and freed, so
we should not dereference skb anymore.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/sfc/tx.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index ab4c635..e0e00b3 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -1173,11 +1173,11 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
goto mem_err;
}
+ netdev_tx_sent_queue(tx_queue->core_txq, skb->len);
+
/* Pass off to hardware */
efx_nic_push_buffers(tx_queue);
- netdev_tx_sent_queue(tx_queue->core_txq, skb->len);
-
tx_queue->tso_bursts++;
return NETDEV_TX_OK;

View File

@ -0,0 +1,121 @@
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:33:43 +0000
Subject: sfc: Support for byte queue limits
commit c3940999b29ca7d6ad9b37b827a058c90fd51992 upstream.
Changes to sfc to use byte queue limits.
Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/sfc/tx.c | 27 +++++++++++++++++++++------
1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index df88c543..ab4c635 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -31,7 +31,9 @@
#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u)
static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
- struct efx_tx_buffer *buffer)
+ struct efx_tx_buffer *buffer,
+ unsigned int *pkts_compl,
+ unsigned int *bytes_compl)
{
if (buffer->unmap_len) {
struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
@@ -48,6 +50,8 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
}
if (buffer->skb) {
+ (*pkts_compl)++;
+ (*bytes_compl) += buffer->skb->len;
dev_kfree_skb_any((struct sk_buff *) buffer->skb);
buffer->skb = NULL;
netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
@@ -250,6 +254,8 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
buffer->skb = skb;
buffer->continuation = false;
+ netdev_tx_sent_queue(tx_queue->core_txq, skb->len);
+
/* Pass off to hardware */
efx_nic_push_buffers(tx_queue);
@@ -267,10 +273,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
unwind:
/* Work backwards until we hit the original insert pointer value */
while (tx_queue->insert_count != tx_queue->write_count) {
+ unsigned int pkts_compl = 0, bytes_compl = 0;
--tx_queue->insert_count;
insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
buffer = &tx_queue->buffer[insert_ptr];
- efx_dequeue_buffer(tx_queue, buffer);
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
buffer->len = 0;
}
@@ -293,7 +300,9 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
* specified index.
*/
static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
- unsigned int index)
+ unsigned int index,
+ unsigned int *pkts_compl,
+ unsigned int *bytes_compl)
{
struct efx_nic *efx = tx_queue->efx;
unsigned int stop_index, read_ptr;
@@ -311,7 +320,7 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
return;
}
- efx_dequeue_buffer(tx_queue, buffer);
+ efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
buffer->continuation = true;
buffer->len = 0;
@@ -422,10 +431,12 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
{
unsigned fill_level;
struct efx_nic *efx = tx_queue->efx;
+ unsigned int pkts_compl = 0, bytes_compl = 0;
EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask);
- efx_dequeue_buffers(tx_queue, index);
+ efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
+ netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl);
/* See if we need to restart the netif queue. This barrier
* separates the update of read_count from the test of the
@@ -515,13 +526,15 @@ void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
/* Free any buffers left in the ring */
while (tx_queue->read_count != tx_queue->write_count) {
+ unsigned int pkts_compl = 0, bytes_compl = 0;
buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
- efx_dequeue_buffer(tx_queue, buffer);
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
buffer->continuation = true;
buffer->len = 0;
++tx_queue->read_count;
}
+ netdev_tx_reset_queue(tx_queue->core_txq);
}
void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
@@ -1163,6 +1176,8 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
/* Pass off to hardware */
efx_nic_push_buffers(tx_queue);
+ netdev_tx_sent_queue(tx_queue->core_txq, skb->len);
+
tx_queue->tso_bursts++;
return NETDEV_TX_OK;

View File

@ -0,0 +1,104 @@
From: stephen hemminger <shemminger@vyatta.com>
Date: Sun, 22 Jan 2012 09:40:40 +0000
Subject: skge: add byte queue limit support
commit da057fb7d272c7e7609465a54bcac8ec8072ead5 upstream.
This also changes the cleanup logic slightly to aggregate
completed notifications for multiple packets.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/marvell/skge.c | 37 +++++++++++++++++++++++------------
1 file changed, 24 insertions(+), 13 deletions(-)
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 299c33b..edb9bda 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -2817,6 +2817,8 @@ static netdev_tx_t skge_xmit_frame(struct sk_buff *skb,
td->control = BMU_OWN | BMU_SW | BMU_STF | control | len;
wmb();
+ netdev_sent_queue(dev, skb->len);
+
skge_write8(hw, Q_ADDR(txqaddr[skge->port], Q_CSR), CSR_START);
netif_printk(skge, tx_queued, KERN_DEBUG, skge->netdev,
@@ -2858,11 +2860,9 @@ mapping_error:
/* Free resources associated with this reing element */
-static void skge_tx_free(struct skge_port *skge, struct skge_element *e,
- u32 control)
+static inline void skge_tx_unmap(struct pci_dev *pdev, struct skge_element *e,
+ u32 control)
{
- struct pci_dev *pdev = skge->hw->pdev;
-
/* skb header vs. fragment */
if (control & BMU_STF)
pci_unmap_single(pdev, dma_unmap_addr(e, mapaddr),
@@ -2872,13 +2872,6 @@ static void skge_tx_free(struct skge_port *skge, struct skge_element *e,
pci_unmap_page(pdev, dma_unmap_addr(e, mapaddr),
dma_unmap_len(e, maplen),
PCI_DMA_TODEVICE);
-
- if (control & BMU_EOF) {
- netif_printk(skge, tx_done, KERN_DEBUG, skge->netdev,
- "tx done slot %td\n", e - skge->tx_ring.start);
-
- dev_kfree_skb(e->skb);
- }
}
/* Free all buffers in transmit ring */
@@ -2889,10 +2882,15 @@ static void skge_tx_clean(struct net_device *dev)
for (e = skge->tx_ring.to_clean; e != skge->tx_ring.to_use; e = e->next) {
struct skge_tx_desc *td = e->desc;
- skge_tx_free(skge, e, td->control);
+
+ skge_tx_unmap(skge->hw->pdev, e, td->control);
+
+ if (td->control & BMU_EOF)
+ dev_kfree_skb(e->skb);
td->control = 0;
}
+ netdev_reset_queue(dev);
skge->tx_ring.to_clean = e;
}
@@ -3157,6 +3155,7 @@ static void skge_tx_done(struct net_device *dev)
struct skge_port *skge = netdev_priv(dev);
struct skge_ring *ring = &skge->tx_ring;
struct skge_element *e;
+ unsigned int bytes_compl = 0, pkts_compl = 0;
skge_write8(skge->hw, Q_ADDR(txqaddr[skge->port], Q_CSR), CSR_IRQ_CL_F);
@@ -3166,8 +3165,20 @@ static void skge_tx_done(struct net_device *dev)
if (control & BMU_OWN)
break;
- skge_tx_free(skge, e, control);
+ skge_tx_unmap(skge->hw->pdev, e, control);
+
+ if (control & BMU_EOF) {
+ netif_printk(skge, tx_done, KERN_DEBUG, skge->netdev,
+ "tx done slot %td\n",
+ e - skge->tx_ring.start);
+
+ pkts_compl++;
+ bytes_compl += e->skb->len;
+
+ dev_kfree_skb(e->skb);
+ }
}
+ netdev_completed_queue(dev, pkts_compl, bytes_compl);
skge->tx_ring.to_clean = e;
/* Can run lockless until we need to synchronize to restart queue. */

View File

@ -0,0 +1,72 @@
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 29 Nov 2011 15:15:33 +0000
Subject: sky2: add bql support
commit ec2a5466b3ce680c92e8e05617b020fd825854b9 upstream.
This adds support for byte queue limits and aggregates statistics
update (suggestion from Eric).
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@drr.davemloft.net>
---
drivers/net/ethernet/marvell/sky2.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 29adc78..760c2b1 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -1110,6 +1110,7 @@ static void tx_init(struct sky2_port *sky2)
sky2->tx_prod = sky2->tx_cons = 0;
sky2->tx_tcpsum = 0;
sky2->tx_last_mss = 0;
+ netdev_reset_queue(sky2->netdev);
le = get_tx_le(sky2, &sky2->tx_prod);
le->addr = 0;
@@ -1971,6 +1972,7 @@ static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb,
if (tx_avail(sky2) <= MAX_SKB_TX_LE)
netif_stop_queue(dev);
+ netdev_sent_queue(dev, skb->len);
sky2_put_idx(hw, txqaddr[sky2->port], sky2->tx_prod);
return NETDEV_TX_OK;
@@ -2002,7 +2004,8 @@ mapping_error:
static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
{
struct net_device *dev = sky2->netdev;
- unsigned idx;
+ u16 idx;
+ unsigned int bytes_compl = 0, pkts_compl = 0;
BUG_ON(done >= sky2->tx_ring_size);
@@ -2017,10 +2020,8 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
netif_printk(sky2, tx_done, KERN_DEBUG, dev,
"tx done %u\n", idx);
- u64_stats_update_begin(&sky2->tx_stats.syncp);
- ++sky2->tx_stats.packets;
- sky2->tx_stats.bytes += skb->len;
- u64_stats_update_end(&sky2->tx_stats.syncp);
+ pkts_compl++;
+ bytes_compl += skb->len;
re->skb = NULL;
dev_kfree_skb_any(skb);
@@ -2031,6 +2032,13 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
sky2->tx_cons = idx;
smp_mb();
+
+ netdev_completed_queue(dev, pkts_compl, bytes_compl);
+
+ u64_stats_update_begin(&sky2->tx_stats.syncp);
+ sky2->tx_stats.packets += pkts_compl;
+ sky2->tx_stats.bytes += bytes_compl;
+ u64_stats_update_end(&sky2->tx_stats.syncp);
}
static void sky2_tx_reset(struct sky2_hw *hw, unsigned port)

View File

@ -0,0 +1,49 @@
From: Tom Herbert <therbert@google.com>
Date: Mon, 5 Mar 2012 19:53:50 +0000
Subject: tg3: Fix to use multi queue BQL interfaces
commit 5cb917bc4f3882ecee87064483111023086757d3 upstream.
Fix tg3 to use BQL multi queue related netdev interfaces since the
device supports multi queue.
Signed-off-by: Tom Herbert <therbert@google.com>
Reported-by: Christoph Lameter <cl@gentwo.org>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/broadcom/tg3.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 423d023..35c2a20 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -5352,7 +5352,7 @@ static void tg3_tx(struct tg3_napi *tnapi)
}
}
- netdev_completed_queue(tp->dev, pkts_compl, bytes_compl);
+ netdev_tx_completed_queue(txq, pkts_compl, bytes_compl);
tnapi->tx_cons = sw_idx;
@@ -6793,7 +6793,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
}
skb_tx_timestamp(skb);
- netdev_sent_queue(tp->dev, skb->len);
+ netdev_tx_sent_queue(txq, skb->len);
/* Packets are ready, update Tx producer idx local and on card. */
tw32_tx_mbox(tnapi->prodmbox, entry);
@@ -7275,8 +7275,8 @@ static void tg3_free_rings(struct tg3 *tp)
dev_kfree_skb_any(skb);
}
+ netdev_tx_reset_queue(netdev_get_tx_queue(tp->dev, j));
}
- netdev_reset_queue(tp->dev);
}
/* Initialize tx/rx rings for packet processing.

View File

@ -0,0 +1,62 @@
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:33:30 +0000
Subject: tg3: Support for byte queue limits
commit 298376d3e8f00147548c426959ce79efc47b669a upstream.
Changes to tg3 to use byte queue limits.
Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/broadcom/tg3.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index aa413d6..cf36312 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -5302,6 +5302,7 @@ static void tg3_tx(struct tg3_napi *tnapi)
u32 sw_idx = tnapi->tx_cons;
struct netdev_queue *txq;
int index = tnapi - tp->napi;
+ unsigned int pkts_compl = 0, bytes_compl = 0;
if (tg3_flag(tp, ENABLE_TSS))
index--;
@@ -5352,6 +5353,9 @@ static void tg3_tx(struct tg3_napi *tnapi)
sw_idx = NEXT_TX(sw_idx);
}
+ pkts_compl++;
+ bytes_compl += skb->len;
+
dev_kfree_skb(skb);
if (unlikely(tx_bug)) {
@@ -5360,6 +5364,8 @@ static void tg3_tx(struct tg3_napi *tnapi)
}
}
+ netdev_completed_queue(tp->dev, pkts_compl, bytes_compl);
+
tnapi->tx_cons = sw_idx;
/* Need to make the tx_cons update visible to tg3_start_xmit()
@@ -6804,6 +6810,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
}
skb_tx_timestamp(skb);
+ netdev_sent_queue(tp->dev, skb->len);
/* Packets are ready, update Tx producer idx local and on card. */
tw32_tx_mbox(tnapi->prodmbox, entry);
@@ -7286,6 +7293,7 @@ static void tg3_free_rings(struct tg3 *tp)
dev_kfree_skb_any(skb);
}
}
+ netdev_reset_queue(tp->dev);
}
/* Initialize tx/rx rings for packet processing.

View File

@ -0,0 +1,126 @@
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:33:02 +0000
Subject: xps: Add xps_queue_release function
commit 927fbec13e40648d3c87cbb1daaac5b1fb9c8775 upstream.
This patch moves the xps specific parts in netdev_queue_release into
its own function which netdev_queue_release can call. This allows
netdev_queue_release to be more generic (for adding new attributes
to tx queues).
Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
net/core/net-sysfs.c | 89 ++++++++++++++++++++++++++------------------------
1 file changed, 47 insertions(+), 42 deletions(-)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index db6c2f8..b17c14a 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -910,6 +910,52 @@ static DEFINE_MUTEX(xps_map_mutex);
#define xmap_dereference(P) \
rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
+static void xps_queue_release(struct netdev_queue *queue)
+{
+ struct net_device *dev = queue->dev;
+ struct xps_dev_maps *dev_maps;
+ struct xps_map *map;
+ unsigned long index;
+ int i, pos, nonempty = 0;
+
+ index = get_netdev_queue_index(queue);
+
+ mutex_lock(&xps_map_mutex);
+ dev_maps = xmap_dereference(dev->xps_maps);
+
+ if (dev_maps) {
+ for_each_possible_cpu(i) {
+ map = xmap_dereference(dev_maps->cpu_map[i]);
+ if (!map)
+ continue;
+
+ for (pos = 0; pos < map->len; pos++)
+ if (map->queues[pos] == index)
+ break;
+
+ if (pos < map->len) {
+ if (map->len > 1)
+ map->queues[pos] =
+ map->queues[--map->len];
+ else {
+ RCU_INIT_POINTER(dev_maps->cpu_map[i],
+ NULL);
+ kfree_rcu(map, rcu);
+ map = NULL;
+ }
+ }
+ if (map)
+ nonempty = 1;
+ }
+
+ if (!nonempty) {
+ RCU_INIT_POINTER(dev->xps_maps, NULL);
+ kfree_rcu(dev_maps, rcu);
+ }
+ }
+ mutex_unlock(&xps_map_mutex);
+}
+
static ssize_t store_xps_map(struct netdev_queue *queue,
struct netdev_queue_attribute *attribute,
const char *buf, size_t len)
@@ -1054,49 +1100,8 @@ static struct attribute *netdev_queue_default_attrs[] = {
static void netdev_queue_release(struct kobject *kobj)
{
struct netdev_queue *queue = to_netdev_queue(kobj);
- struct net_device *dev = queue->dev;
- struct xps_dev_maps *dev_maps;
- struct xps_map *map;
- unsigned long index;
- int i, pos, nonempty = 0;
-
- index = get_netdev_queue_index(queue);
-
- mutex_lock(&xps_map_mutex);
- dev_maps = xmap_dereference(dev->xps_maps);
- if (dev_maps) {
- for_each_possible_cpu(i) {
- map = xmap_dereference(dev_maps->cpu_map[i]);
- if (!map)
- continue;
-
- for (pos = 0; pos < map->len; pos++)
- if (map->queues[pos] == index)
- break;
-
- if (pos < map->len) {
- if (map->len > 1)
- map->queues[pos] =
- map->queues[--map->len];
- else {
- RCU_INIT_POINTER(dev_maps->cpu_map[i],
- NULL);
- kfree_rcu(map, rcu);
- map = NULL;
- }
- }
- if (map)
- nonempty = 1;
- }
-
- if (!nonempty) {
- RCU_INIT_POINTER(dev->xps_maps, NULL);
- kfree_rcu(dev_maps, rcu);
- }
- }
-
- mutex_unlock(&xps_map_mutex);
+ xps_queue_release(queue);
memset(kobj, 0, sizeof(*kobj));
dev_put(queue->dev);

33
debian/patches/series vendored
View File

@ -357,3 +357,36 @@ bugfix/all/cipso-don-t-follow-a-NULL-pointer-when-setsockopt-is.patch
features/all/debugfs-add-mode-uid-and-gid-options.patch
debian/debugfs-set-default-mode-to-700.patch
# Byte Queue Limits from 3.3; fixes and additional driver support from 3.4, 3.5
features/all/bql/net-new-counter-for-tx_timeout-errors-in-sysfs.patch
features/all/bql/dql-dynamic-queue-limits.patch
features/all/bql/net-add-queue-state-xoff-flag-for-stack.patch
features/all/bql/net-add-netdev-interfaces-for-recording-sends-comp.patch
features/all/bql/xps-add-xps_queue_release-function.patch
features/all/bql/bql-byte-queue-limits.patch
features/all/bql/bql-fix-config_xps-n-build.patch
features/all/bql/bql-fix-inconsistency-between-file-mode-and-attr-method.patch
features/all/bql/dql-fix-undefined-jiffies.patch
features/all/bql/bql-fix-posdiff-to-integer-overflow-aware.patch
features/all/bql/bql-avoid-unneeded-limit-decrement.patch
features/all/bql/bql-avoid-possible-inconsistent-calculation.patch
features/all/bql/e1000e-support-for-byte-queue-limits.patch
features/all/bql/forcedeth-support-for-byte-queue-limits.patch
features/all/bql/forcedeath-fix-bql-support-for-forcedeath.patch
features/all/bql/tg3-support-for-byte-queue-limits.patch
features/all/bql/tg3-fix-to-use-multi-queue-bql-interfaces.patch
features/all/bql/bnx2x-remove-unused-variable.patch
features/all/bql/bnx2x-support-for-byte-queue-limits.patch
features/all/bql/bnx2x-fix-crash-while-ethtool-t.patch
features/all/bql/sfc-support-for-byte-queue-limits.patch
features/all/bql/sfc-fix-race-in-efx_enqueue_skb_tso.patch
features/all/bql/sky2-add-bql-support.patch
features/all/bql/bnx2-support-for-byte-queue-limits.patch
features/all/bql/igb-add-support-for-byte-queue-limits.patch
features/all/bql/igb-fix-ethtool-offline-test.patch
features/all/bql/net-fix-issue-with-netdev_tx_reset_queue-not-resetting-queue-from.patch
features/all/bql/ixgbe-add-support-for-byte-queue-limits.patch
features/all/bql/igb-ixgbe-netdev_tx_reset_queue-incorrectly-called-from-tx-init.patch
features/all/bql/skge-add-byte-queue-limit-support.patch
features/all/bql/r8169-add-byte-queue-limit-support.patch