[xen] Backport various netback fixes (Closes: #767261).

svn path=/dists/sid/linux/; revision=22060
This commit is contained in:
Ian Campbell 2014-11-09 10:50:36 +00:00
parent c5a1eeca82
commit 534a88e05a
16 changed files with 2282 additions and 0 deletions

3
debian/changelog vendored
View File

@ -5,6 +5,9 @@ linux (3.16.7-3) UNRELEASED; urgency=medium
- PCI: Suspend/resume quirks for Apple thunderbolt
- Enable THUNDERBOLT as module
[ Ian Campbell ]
* [xen] Backport various netback fixes (Closes: #767261).
-- Ben Hutchings <ben@decadent.org.uk> Sun, 09 Nov 2014 10:13:09 +0000
linux (3.16.7-2) unstable; urgency=medium

View File

@ -0,0 +1,316 @@
From 5fe1b16f4eedda3a3bdceb2a0f65a0e7816555ab Mon Sep 17 00:00:00 2001
From: Zoltan Kiss <zoltan.kiss@citrix.com>
Date: Tue, 8 Jul 2014 19:49:14 +0100
Subject: [PATCH 01/14] xen-netback: Adding debugfs "io_ring_qX" files
Origin: https://git.kernel.org/linus/f51de24356e49e4dcb5095e87717065580912120
This patch adds debugfs capabilities to netback. There used to be a similar
patch floating around for classic kernel, but it used procfs. It is based on a
very similar blkback patch.
It creates xen-netback/[vifname]/io_ring_q[queueno] files, reading them output
various ring variables etc. Writing "kick" into it imitates an interrupt
happened, it can be useful to check whether the ring is just stalled due to a
missed interrupt.
Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: xen-devel@lists.xenproject.org
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit f51de24356e49e4dcb5095e87717065580912120)
---
drivers/net/xen-netback/common.h | 11 +++
drivers/net/xen-netback/interface.c | 2 +-
drivers/net/xen-netback/netback.c | 11 +++
drivers/net/xen-netback/xenbus.c | 178 ++++++++++++++++++++++++++++++++++-
4 files changed, 200 insertions(+), 2 deletions(-)
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 2532ce8..28c9822 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -44,6 +44,7 @@
#include <xen/interface/grant_table.h>
#include <xen/grant_table.h>
#include <xen/xenbus.h>
+#include <linux/debugfs.h>
typedef unsigned int pending_ring_idx_t;
#define INVALID_PENDING_RING_IDX (~0U)
@@ -224,6 +225,10 @@ struct xenvif {
struct xenvif_queue *queues;
unsigned int num_queues; /* active queues, resource allocated */
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *xenvif_dbg_root;
+#endif
+
/* Miscellaneous private stuff. */
struct net_device *dev;
};
@@ -297,10 +302,16 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue)
/* Callback from stack when TX packet can be released */
void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
+irqreturn_t xenvif_interrupt(int irq, void *dev_id);
+
extern bool separate_tx_rx_irq;
extern unsigned int rx_drain_timeout_msecs;
extern unsigned int rx_drain_timeout_jiffies;
extern unsigned int xenvif_max_queues;
+#ifdef CONFIG_DEBUG_FS
+extern struct dentry *xen_netback_dbg_root;
+#endif
+
#endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 9e97c7c..ef75b45 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -102,7 +102,7 @@ static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
+irqreturn_t xenvif_interrupt(int irq, void *dev_id)
{
xenvif_tx_interrupt(irq, dev_id);
xenvif_rx_interrupt(irq, dev_id);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index c65b636..769e553 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -2027,6 +2027,13 @@ static int __init netback_init(void)
rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
+#ifdef CONFIG_DEBUG_FS
+ xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
+ if (IS_ERR_OR_NULL(xen_netback_dbg_root))
+ pr_warn("Init of debugfs returned %ld!\n",
+ PTR_ERR(xen_netback_dbg_root));
+#endif /* CONFIG_DEBUG_FS */
+
return 0;
failed_init:
@@ -2037,6 +2044,10 @@ module_init(netback_init);
static void __exit netback_fini(void)
{
+#ifdef CONFIG_DEBUG_FS
+ if (!IS_ERR_OR_NULL(xen_netback_dbg_root))
+ debugfs_remove_recursive(xen_netback_dbg_root);
+#endif /* CONFIG_DEBUG_FS */
xenvif_xenbus_fini();
}
module_exit(netback_fini);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 3d85acd..580517d 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -44,6 +44,175 @@ static void unregister_hotplug_status_watch(struct backend_info *be);
static void set_backend_state(struct backend_info *be,
enum xenbus_state state);
+#ifdef CONFIG_DEBUG_FS
+struct dentry *xen_netback_dbg_root = NULL;
+
+static int xenvif_read_io_ring(struct seq_file *m, void *v)
+{
+ struct xenvif_queue *queue = m->private;
+ struct xen_netif_tx_back_ring *tx_ring = &queue->tx;
+ struct xen_netif_rx_back_ring *rx_ring = &queue->rx;
+
+ if (tx_ring->sring) {
+ struct xen_netif_tx_sring *sring = tx_ring->sring;
+
+ seq_printf(m, "Queue %d\nTX: nr_ents %u\n", queue->id,
+ tx_ring->nr_ents);
+ seq_printf(m, "req prod %u (%d) cons %u (%d) event %u (%d)\n",
+ sring->req_prod,
+ sring->req_prod - sring->rsp_prod,
+ tx_ring->req_cons,
+ tx_ring->req_cons - sring->rsp_prod,
+ sring->req_event,
+ sring->req_event - sring->rsp_prod);
+ seq_printf(m, "rsp prod %u (base) pvt %u (%d) event %u (%d)\n",
+ sring->rsp_prod,
+ tx_ring->rsp_prod_pvt,
+ tx_ring->rsp_prod_pvt - sring->rsp_prod,
+ sring->rsp_event,
+ sring->rsp_event - sring->rsp_prod);
+ seq_printf(m, "pending prod %u pending cons %u nr_pending_reqs %u\n",
+ queue->pending_prod,
+ queue->pending_cons,
+ nr_pending_reqs(queue));
+ seq_printf(m, "dealloc prod %u dealloc cons %u dealloc_queue %u\n\n",
+ queue->dealloc_prod,
+ queue->dealloc_cons,
+ queue->dealloc_prod - queue->dealloc_cons);
+ }
+
+ if (rx_ring->sring) {
+ struct xen_netif_rx_sring *sring = rx_ring->sring;
+
+ seq_printf(m, "RX: nr_ents %u\n", rx_ring->nr_ents);
+ seq_printf(m, "req prod %u (%d) cons %u (%d) event %u (%d)\n",
+ sring->req_prod,
+ sring->req_prod - sring->rsp_prod,
+ rx_ring->req_cons,
+ rx_ring->req_cons - sring->rsp_prod,
+ sring->req_event,
+ sring->req_event - sring->rsp_prod);
+ seq_printf(m, "rsp prod %u (base) pvt %u (%d) event %u (%d)\n\n",
+ sring->rsp_prod,
+ rx_ring->rsp_prod_pvt,
+ rx_ring->rsp_prod_pvt - sring->rsp_prod,
+ sring->rsp_event,
+ sring->rsp_event - sring->rsp_prod);
+ }
+
+ seq_printf(m, "NAPI state: %lx NAPI weight: %d TX queue len %u\n"
+ "Credit timer_pending: %d, credit: %lu, usec: %lu\n"
+ "remaining: %lu, expires: %lu, now: %lu\n",
+ queue->napi.state, queue->napi.weight,
+ skb_queue_len(&queue->tx_queue),
+ timer_pending(&queue->credit_timeout),
+ queue->credit_bytes,
+ queue->credit_usec,
+ queue->remaining_credit,
+ queue->credit_timeout.expires,
+ jiffies);
+
+ return 0;
+}
+
+#define XENVIF_KICK_STR "kick"
+
+static ssize_t
+xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct xenvif_queue *queue =
+ ((struct seq_file *)filp->private_data)->private;
+ int len;
+ char write[sizeof(XENVIF_KICK_STR)];
+
+ /* don't allow partial writes and check the length */
+ if (*ppos != 0)
+ return 0;
+ if (count < sizeof(XENVIF_KICK_STR) - 1)
+ return -ENOSPC;
+
+ len = simple_write_to_buffer(write,
+ sizeof(write),
+ ppos,
+ buf,
+ count);
+ if (len < 0)
+ return len;
+
+ if (!strncmp(write, XENVIF_KICK_STR, sizeof(XENVIF_KICK_STR) - 1))
+ xenvif_interrupt(0, (void *)queue);
+ else {
+ pr_warn("Unknown command to io_ring_q%d. Available: kick\n",
+ queue->id);
+ count = -EINVAL;
+ }
+ return count;
+}
+
+static int xenvif_dump_open(struct inode *inode, struct file *filp)
+{
+ int ret;
+ void *queue = NULL;
+
+ if (inode->i_private)
+ queue = inode->i_private;
+ ret = single_open(filp, xenvif_read_io_ring, queue);
+ filp->f_mode |= FMODE_PWRITE;
+ return ret;
+}
+
+static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
+ .owner = THIS_MODULE,
+ .open = xenvif_dump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = xenvif_write_io_ring,
+};
+
+static void xenvif_debugfs_addif(struct xenvif_queue *queue)
+{
+ struct dentry *pfile;
+ struct xenvif *vif = queue->vif;
+ int i;
+
+ if (IS_ERR_OR_NULL(xen_netback_dbg_root))
+ return;
+
+ vif->xenvif_dbg_root = debugfs_create_dir(vif->dev->name,
+ xen_netback_dbg_root);
+ if (!IS_ERR_OR_NULL(vif->xenvif_dbg_root)) {
+ for (i = 0; i < vif->num_queues; ++i) {
+ char filename[sizeof("io_ring_q") + 4];
+
+ snprintf(filename, sizeof(filename), "io_ring_q%d", i);
+ pfile = debugfs_create_file(filename,
+ S_IRUSR | S_IWUSR,
+ vif->xenvif_dbg_root,
+ &vif->queues[i],
+ &xenvif_dbg_io_ring_ops_fops);
+ if (IS_ERR_OR_NULL(pfile))
+ pr_warn("Creation of io_ring file returned %ld!\n",
+ PTR_ERR(pfile));
+ }
+ } else
+ netdev_warn(vif->dev,
+ "Creation of vif debugfs dir returned %ld!\n",
+ PTR_ERR(vif->xenvif_dbg_root));
+}
+
+static void xenvif_debugfs_delif(struct xenvif *vif)
+{
+ if (IS_ERR_OR_NULL(xen_netback_dbg_root))
+ return;
+
+ if (!IS_ERR_OR_NULL(vif->xenvif_dbg_root))
+ debugfs_remove_recursive(vif->xenvif_dbg_root);
+ vif->xenvif_dbg_root = NULL;
+}
+#endif /* CONFIG_DEBUG_FS */
+
static int netback_remove(struct xenbus_device *dev)
{
struct backend_info *be = dev_get_drvdata(&dev->dev);
@@ -246,8 +415,12 @@ static void backend_create_xenvif(struct backend_info *be)
static void backend_disconnect(struct backend_info *be)
{
- if (be->vif)
+ if (be->vif) {
+#ifdef CONFIG_DEBUG_FS
+ xenvif_debugfs_delif(be->vif);
+#endif /* CONFIG_DEBUG_FS */
xenvif_disconnect(be->vif);
+ }
}
static void backend_connect(struct backend_info *be)
@@ -560,6 +733,9 @@ static void connect(struct backend_info *be)
be->vif->num_queues = queue_index;
goto err;
}
+#ifdef CONFIG_DEBUG_FS
+ xenvif_debugfs_addif(queue);
+#endif /* CONFIG_DEBUG_FS */
}
/* Initialisation completed, tell core driver the number of
--
1.7.10.4

View File

@ -0,0 +1,43 @@
From 4b3437eeaea2f8d27974aa8615b425b232076e15 Mon Sep 17 00:00:00 2001
From: Zoltan Kiss <zoltan.kiss@citrix.com>
Date: Mon, 11 Aug 2014 13:01:44 +0100
Subject: [PATCH 05/14] xen-netback: Don't deschedule NAPI when carrier off
Origin: https://git.kernel.org/linus/2561cc15e3816e4323f9e79a6890bff94c0bbec2
In the patch called "xen-netback: Turn off the carrier if the guest is not able
to receive" NAPI was descheduled when the carrier was set off. That's
not what most of the drivers do, and we don't have any specific reason to do so
as well, so revert that change.
Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: xen-devel@lists.xenproject.org
Acked-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 2561cc15e3816e4323f9e79a6890bff94c0bbec2)
---
drivers/net/xen-netback/interface.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index b41ddbf..04696fc 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -78,12 +78,8 @@ int xenvif_poll(struct napi_struct *napi, int budget)
/* This vif is rogue, we pretend we've there is nothing to do
* for this vif to deschedule it from NAPI. But this interface
* will be turned off in thread context later.
- * Also, if a guest doesn't post enough slots to receive data on one of
- * its queues, the carrier goes down and NAPI is descheduled here so
- * the guest can't send more packets until it's ready to receive.
*/
- if (unlikely(queue->vif->disabled ||
- !netif_carrier_ok(queue->vif->dev))) {
+ if (unlikely(queue->vif->disabled)) {
napi_complete(napi);
return 0;
}
--
1.7.10.4

View File

@ -0,0 +1,46 @@
From f1bbbc1036f3dd10252aecc4088ac50ad1f57399 Mon Sep 17 00:00:00 2001
From: Zoltan Kiss <zoltan.kiss@citrix.com>
Date: Thu, 7 Aug 2014 13:38:38 +0100
Subject: [PATCH 04/14] xen-netback: Fix vif->disable handling
Origin: https://git.kernel.org/linus/743b0a92b92a0e1b6a68497ccd18a0d60a4b6082
In the patch called "xen-netback: Turn off the carrier if the guest is not able
to receive" new branches were introduced to this if statement, risking that a
queue with non-zero id can reenable the disabled interface.
Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: xen-devel@lists.xenproject.org
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 743b0a92b92a0e1b6a68497ccd18a0d60a4b6082)
---
drivers/net/xen-netback/netback.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index aa20933..4734472 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -2025,9 +2025,15 @@ int xenvif_kthread_guest_rx(void *data)
* context so we defer it here, if this thread is
* associated with queue 0.
*/
- if (unlikely(queue->vif->disabled && queue->id == 0))
+ if (unlikely(queue->vif->disabled && queue->id == 0)) {
xenvif_carrier_off(queue->vif);
- else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
+ } else if (unlikely(queue->vif->disabled)) {
+ /* kthread_stop() would be called upon this thread soon,
+ * be a bit proactive
+ */
+ skb_queue_purge(&queue->rx_queue);
+ queue->rx_last_skb_slots = 0;
+ } else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
&queue->status))) {
xenvif_rx_purge_event(queue);
} else if (!netif_carrier_ok(queue->vif->dev)) {
--
1.7.10.4

View File

@ -0,0 +1,318 @@
From f1c53c504eec39ad31bcfdeeb307c85075037a40 Mon Sep 17 00:00:00 2001
From: Zoltan Kiss <zoltan.kiss@citrix.com>
Date: Mon, 4 Aug 2014 16:20:58 +0100
Subject: [PATCH 03/14] xen-netback: Turn off the carrier if the guest is not
able to receive
Origin: https://git.kernel.org/linus/f34a4cf9c9b4fd35ba7f9a596cedb011879a1a4d
Currently when the guest is not able to receive more packets, qdisc layer starts
a timer, and when it goes off, qdisc is started again to deliver a packet again.
This is a very slow way to drain the queues, consumes unnecessary resources and
slows down other guests shutdown.
This patch change the behaviour by turning the carrier off when that timer
fires, so all the packets are freed up which were stucked waiting for that vif.
Instead of the rx_queue_purge bool it uses the VIF_STATUS_RX_PURGE_EVENT bit to
signal the thread that either the timeout happened or an RX interrupt arrived,
so the thread can check what it should do. It also disables NAPI, so the guest
can't transmit, but leaves the interrupts on, so it can resurrect.
Only the queues which brought down the interface can enable it again, the bit
QUEUE_STATUS_RX_STALLED makes sure of that.
Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: xen-devel@lists.xenproject.org
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit f34a4cf9c9b4fd35ba7f9a596cedb011879a1a4d)
---
drivers/net/xen-netback/common.h | 15 ++++--
drivers/net/xen-netback/interface.c | 49 ++++++++++--------
drivers/net/xen-netback/netback.c | 97 ++++++++++++++++++++++++++++++-----
3 files changed, 123 insertions(+), 38 deletions(-)
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 4a92fc1..ef3026f 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -176,9 +176,9 @@ struct xenvif_queue { /* Per-queue data for xenvif */
struct xen_netif_rx_back_ring rx;
struct sk_buff_head rx_queue;
RING_IDX rx_last_skb_slots;
- bool rx_queue_purge;
+ unsigned long status;
- struct timer_list wake_queue;
+ struct timer_list rx_stalled;
struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
@@ -200,7 +200,16 @@ struct xenvif_queue { /* Per-queue data for xenvif */
enum state_bit_shift {
/* This bit marks that the vif is connected */
- VIF_STATUS_CONNECTED
+ VIF_STATUS_CONNECTED,
+ /* This bit signals the RX thread that queuing was stopped (in
+ * start_xmit), and either the timer fired or an RX interrupt came
+ */
+ QUEUE_STATUS_RX_PURGE_EVENT,
+ /* This bit tells the interrupt handler that this queue was the reason
+ * for the carrier off, so it should kick the thread. Only queues which
+ * brought it down can turn on the carrier.
+ */
+ QUEUE_STATUS_RX_STALLED
};
struct xenvif {
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 67ebe35..b41ddbf 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -78,8 +78,12 @@ int xenvif_poll(struct napi_struct *napi, int budget)
/* This vif is rogue, we pretend we've there is nothing to do
* for this vif to deschedule it from NAPI. But this interface
* will be turned off in thread context later.
+ * Also, if a guest doesn't post enough slots to receive data on one of
+ * its queues, the carrier goes down and NAPI is descheduled here so
+ * the guest can't send more packets until it's ready to receive.
*/
- if (unlikely(queue->vif->disabled)) {
+ if (unlikely(queue->vif->disabled ||
+ !netif_carrier_ok(queue->vif->dev))) {
napi_complete(napi);
return 0;
}
@@ -97,7 +101,16 @@ int xenvif_poll(struct napi_struct *napi, int budget)
static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{
struct xenvif_queue *queue = dev_id;
+ struct netdev_queue *net_queue =
+ netdev_get_tx_queue(queue->vif->dev, queue->id);
+ /* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR
+ * the carrier went down and this queue was previously blocked
+ */
+ if (unlikely(netif_tx_queue_stopped(net_queue) ||
+ (!netif_carrier_ok(queue->vif->dev) &&
+ test_bit(QUEUE_STATUS_RX_STALLED, &queue->status))))
+ set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
xenvif_kick_thread(queue);
return IRQ_HANDLED;
@@ -125,16 +138,14 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
}
-/* Callback to wake the queue and drain it on timeout */
-static void xenvif_wake_queue_callback(unsigned long data)
+/* Callback to wake the queue's thread and turn the carrier off on timeout */
+static void xenvif_rx_stalled(unsigned long data)
{
struct xenvif_queue *queue = (struct xenvif_queue *)data;
if (xenvif_queue_stopped(queue)) {
- netdev_err(queue->vif->dev, "draining TX queue\n");
- queue->rx_queue_purge = true;
+ set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
xenvif_kick_thread(queue);
- xenvif_wake_queue(queue);
}
}
@@ -183,11 +194,11 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
* drain.
*/
if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
- queue->wake_queue.function = xenvif_wake_queue_callback;
- queue->wake_queue.data = (unsigned long)queue;
+ queue->rx_stalled.function = xenvif_rx_stalled;
+ queue->rx_stalled.data = (unsigned long)queue;
xenvif_stop_queue(queue);
- mod_timer(&queue->wake_queue,
- jiffies + rx_drain_timeout_jiffies);
+ mod_timer(&queue->rx_stalled,
+ jiffies + rx_drain_timeout_jiffies);
}
skb_queue_tail(&queue->rx_queue, skb);
@@ -515,7 +526,7 @@ int xenvif_init_queue(struct xenvif_queue *queue)
queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
}
- init_timer(&queue->wake_queue);
+ init_timer(&queue->rx_stalled);
netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
XENVIF_NAPI_WEIGHT);
@@ -666,7 +677,7 @@ void xenvif_disconnect(struct xenvif *vif)
queue = &vif->queues[queue_index];
if (queue->task) {
- del_timer_sync(&queue->wake_queue);
+ del_timer_sync(&queue->rx_stalled);
kthread_stop(queue->task);
queue->task = NULL;
}
@@ -708,16 +719,12 @@ void xenvif_free(struct xenvif *vif)
/* Here we want to avoid timeout messages if an skb can be legitimately
* stuck somewhere else. Realistically this could be an another vif's
* internal or QDisc queue. That another vif also has this
- * rx_drain_timeout_msecs timeout, but the timer only ditches the
- * internal queue. After that, the QDisc queue can put in worst case
- * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's
- * internal queue, so we need several rounds of such timeouts until we
- * can be sure that no another vif should have skb's from us. We are
- * not sending more skb's, so newly stuck packets are not interesting
- * for us here.
+ * rx_drain_timeout_msecs timeout, so give it time to drain out.
+ * Although if that other guest wakes up just before its timeout happens
+ * and takes only one skb from QDisc, it can hold onto other skbs for a
+ * longer period.
*/
- unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) *
- DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
+ unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000);
unregister_netdev(vif->dev);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 6c4cc0f..aa20933 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1869,8 +1869,7 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
static inline int rx_work_todo(struct xenvif_queue *queue)
{
return (!skb_queue_empty(&queue->rx_queue) &&
- xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)) ||
- queue->rx_queue_purge;
+ xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots));
}
static inline int tx_work_todo(struct xenvif_queue *queue)
@@ -1935,6 +1934,75 @@ static void xenvif_start_queue(struct xenvif_queue *queue)
xenvif_wake_queue(queue);
}
+/* Only called from the queue's thread, it handles the situation when the guest
+ * doesn't post enough requests on the receiving ring.
+ * First xenvif_start_xmit disables QDisc and start a timer, and then either the
+ * timer fires, or the guest send an interrupt after posting new request. If it
+ * is the timer, the carrier is turned off here.
+ * */
+static void xenvif_rx_purge_event(struct xenvif_queue *queue)
+{
+ /* Either the last unsuccesful skb or at least 1 slot should fit */
+ int needed = queue->rx_last_skb_slots ?
+ queue->rx_last_skb_slots : 1;
+
+ /* It is assumed that if the guest post new slots after this, the RX
+ * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up
+ * the thread again
+ */
+ set_bit(QUEUE_STATUS_RX_STALLED, &queue->status);
+ if (!xenvif_rx_ring_slots_available(queue, needed)) {
+ rtnl_lock();
+ if (netif_carrier_ok(queue->vif->dev)) {
+ /* Timer fired and there are still no slots. Turn off
+ * everything except the interrupts
+ */
+ netif_carrier_off(queue->vif->dev);
+ skb_queue_purge(&queue->rx_queue);
+ queue->rx_last_skb_slots = 0;
+ if (net_ratelimit())
+ netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id);
+ } else {
+ /* Probably an another queue already turned the carrier
+ * off, make sure nothing is stucked in the internal
+ * queue of this queue
+ */
+ skb_queue_purge(&queue->rx_queue);
+ queue->rx_last_skb_slots = 0;
+ }
+ rtnl_unlock();
+ } else if (!netif_carrier_ok(queue->vif->dev)) {
+ unsigned int num_queues = queue->vif->num_queues;
+ unsigned int i;
+ /* The carrier was down, but an interrupt kicked
+ * the thread again after new requests were
+ * posted
+ */
+ clear_bit(QUEUE_STATUS_RX_STALLED,
+ &queue->status);
+ rtnl_lock();
+ netif_carrier_on(queue->vif->dev);
+ netif_tx_wake_all_queues(queue->vif->dev);
+ rtnl_unlock();
+
+ for (i = 0; i < num_queues; i++) {
+ struct xenvif_queue *temp = &queue->vif->queues[i];
+
+ xenvif_napi_schedule_or_enable_events(temp);
+ }
+ if (net_ratelimit())
+ netdev_err(queue->vif->dev, "Carrier on again\n");
+ } else {
+ /* Queuing were stopped, but the guest posted
+ * new requests and sent an interrupt
+ */
+ clear_bit(QUEUE_STATUS_RX_STALLED,
+ &queue->status);
+ del_timer_sync(&queue->rx_stalled);
+ xenvif_start_queue(queue);
+ }
+}
+
int xenvif_kthread_guest_rx(void *data)
{
struct xenvif_queue *queue = data;
@@ -1944,8 +2012,12 @@ int xenvif_kthread_guest_rx(void *data)
wait_event_interruptible(queue->wq,
rx_work_todo(queue) ||
queue->vif->disabled ||
+ test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) ||
kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+
/* This frontend is found to be rogue, disable it in
* kthread context. Currently this is only set when
* netback finds out frontend sends malformed packet,
@@ -1955,24 +2027,21 @@ int xenvif_kthread_guest_rx(void *data)
*/
if (unlikely(queue->vif->disabled && queue->id == 0))
xenvif_carrier_off(queue->vif);
-
- if (kthread_should_stop())
- break;
-
- if (queue->rx_queue_purge) {
+ else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
+ &queue->status))) {
+ xenvif_rx_purge_event(queue);
+ } else if (!netif_carrier_ok(queue->vif->dev)) {
+ /* Another queue stalled and turned the carrier off, so
+ * purge the internal queue of queues which were not
+ * blocked
+ */
skb_queue_purge(&queue->rx_queue);
- queue->rx_queue_purge = false;
+ queue->rx_last_skb_slots = 0;
}
if (!skb_queue_empty(&queue->rx_queue))
xenvif_rx_action(queue);
- if (skb_queue_empty(&queue->rx_queue) &&
- xenvif_queue_stopped(queue)) {
- del_timer_sync(&queue->wake_queue);
- xenvif_start_queue(queue);
- }
-
cond_resched();
}
--
1.7.10.4

View File

@ -0,0 +1,132 @@
From 08cf39923b6a5728f0e1f8789f5f746c0ce0456d Mon Sep 17 00:00:00 2001
From: Zoltan Kiss <zoltan.kiss@citrix.com>
Date: Mon, 4 Aug 2014 16:20:57 +0100
Subject: [PATCH 02/14] xen-netback: Using a new state bit instead of carrier
Origin: https://git.kernel.org/linus/3d1af1df9762e56e563e8fd088a1b4ce2bcfaf8b
This patch introduces a new state bit VIF_STATUS_CONNECTED to track whether the
vif is in a connected state. Using carrier will not work with the next patch
in this series, which aims to turn the carrier temporarily off if the guest
doesn't seem to be able to receive packets.
Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: xen-devel@lists.xenproject.org
v2:
- rename the bitshift type to "enum state_bit_shift" here, not in the next patch
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 3d1af1df9762e56e563e8fd088a1b4ce2bcfaf8b)
---
drivers/net/xen-netback/common.h | 6 ++++++
drivers/net/xen-netback/interface.c | 19 +++++++++++--------
drivers/net/xen-netback/netback.c | 2 +-
3 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 28c9822..4a92fc1 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -198,6 +198,11 @@ struct xenvif_queue { /* Per-queue data for xenvif */
struct xenvif_stats stats;
};
+enum state_bit_shift {
+ /* This bit marks that the vif is connected */
+ VIF_STATUS_CONNECTED
+};
+
struct xenvif {
/* Unique identifier for this interface. */
domid_t domid;
@@ -220,6 +225,7 @@ struct xenvif {
* frontend is rogue.
*/
bool disabled;
+ unsigned long status;
/* Queues */
struct xenvif_queue *queues;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index ef75b45..67ebe35 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -55,7 +55,8 @@ static inline void xenvif_stop_queue(struct xenvif_queue *queue)
int xenvif_schedulable(struct xenvif *vif)
{
- return netif_running(vif->dev) && netif_carrier_ok(vif->dev);
+ return netif_running(vif->dev) &&
+ test_bit(VIF_STATUS_CONNECTED, &vif->status);
}
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
@@ -267,7 +268,7 @@ static void xenvif_down(struct xenvif *vif)
static int xenvif_open(struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
- if (netif_carrier_ok(dev))
+ if (test_bit(VIF_STATUS_CONNECTED, &vif->status))
xenvif_up(vif);
netif_tx_start_all_queues(dev);
return 0;
@@ -276,7 +277,7 @@ static int xenvif_open(struct net_device *dev)
static int xenvif_close(struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
- if (netif_carrier_ok(dev))
+ if (test_bit(VIF_STATUS_CONNECTED, &vif->status))
xenvif_down(vif);
netif_tx_stop_all_queues(dev);
return 0;
@@ -528,6 +529,7 @@ void xenvif_carrier_on(struct xenvif *vif)
if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
dev_set_mtu(vif->dev, ETH_DATA_LEN);
netdev_update_features(vif->dev);
+ set_bit(VIF_STATUS_CONNECTED, &vif->status);
netif_carrier_on(vif->dev);
if (netif_running(vif->dev))
xenvif_up(vif);
@@ -625,9 +627,11 @@ void xenvif_carrier_off(struct xenvif *vif)
struct net_device *dev = vif->dev;
rtnl_lock();
- netif_carrier_off(dev); /* discard queued packets */
- if (netif_running(dev))
- xenvif_down(vif);
+ if (test_and_clear_bit(VIF_STATUS_CONNECTED, &vif->status)) {
+ netif_carrier_off(dev); /* discard queued packets */
+ if (netif_running(dev))
+ xenvif_down(vif);
+ }
rtnl_unlock();
}
@@ -656,8 +660,7 @@ void xenvif_disconnect(struct xenvif *vif)
unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
- if (netif_carrier_ok(vif->dev))
- xenvif_carrier_off(vif);
+ xenvif_carrier_off(vif);
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index];
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 769e553..6c4cc0f 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1953,7 +1953,7 @@ int xenvif_kthread_guest_rx(void *data)
* context so we defer it here, if this thread is
* associated with queue 0.
*/
- if (unlikely(queue->vif->disabled && netif_carrier_ok(queue->vif->dev) && queue->id == 0))
+ if (unlikely(queue->vif->disabled && queue->id == 0))
xenvif_carrier_off(queue->vif);
if (kthread_should_stop())
--
1.7.10.4

View File

@ -0,0 +1,163 @@
From 69f5afd56eae674e6c9332fff53b674930a852df Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Tue, 12 Aug 2014 11:48:07 +0100
Subject: [PATCH 09/14] xen-netback: don't stop dealloc kthread too early
Origin: https://git.kernel.org/linus/a64bd934528e26e8956112e43a279fba2ee0634e
Reference count the number of packets in host stack, so that we don't
stop the deallocation thread too early. If not, we can end up with
xenvif_free permanently waiting for deallocation thread to unmap grefs.
Reported-by: Thomas Leonard <talex5@gmail.com>
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Zoltan Kiss <zoltan.kiss@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit a64bd934528e26e8956112e43a279fba2ee0634e)
---
drivers/net/xen-netback/common.h | 5 +++++
drivers/net/xen-netback/interface.c | 18 ++++++++++++++++++
drivers/net/xen-netback/netback.c | 26 +++++++++++++++++++-------
3 files changed, 42 insertions(+), 7 deletions(-)
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index ef3026f..d4eb8d2 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -165,6 +165,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
u16 dealloc_ring[MAX_PENDING_REQS];
struct task_struct *dealloc_task;
wait_queue_head_t dealloc_wq;
+ atomic_t inflight_packets;
/* Use kthread for guest RX */
struct task_struct *task;
@@ -329,4 +330,8 @@ extern unsigned int xenvif_max_queues;
extern struct dentry *xen_netback_dbg_root;
#endif
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+ struct sk_buff *skb);
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue);
+
#endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 23702ea..428c57c 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -43,6 +43,23 @@
#define XENVIF_QUEUE_LENGTH 32
#define XENVIF_NAPI_WEIGHT 64
+/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
+ * increasing the inflight counter. We need to increase the inflight
+ * counter because core driver calls into xenvif_zerocopy_callback
+ * which calls xenvif_skb_zerocopy_complete.
+ */
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+ struct sk_buff *skb)
+{
+ skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ atomic_inc(&queue->inflight_packets);
+}
+
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
+{
+ atomic_dec(&queue->inflight_packets);
+}
+
static inline void xenvif_stop_queue(struct xenvif_queue *queue)
{
struct net_device *dev = queue->vif->dev;
@@ -557,6 +574,7 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
init_waitqueue_head(&queue->wq);
init_waitqueue_head(&queue->dealloc_wq);
+ atomic_set(&queue->inflight_packets, 0);
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 4734472..08f6599 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1525,10 +1525,12 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
/* remove traces of mapped pages and frag_list */
skb_frag_list_init(skb);
uarg = skb_shinfo(skb)->destructor_arg;
+ /* increase inflight counter to offset decrement in callback */
+ atomic_inc(&queue->inflight_packets);
uarg->callback(uarg, true);
skb_shinfo(skb)->destructor_arg = NULL;
- skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, nskb);
kfree_skb(nskb);
return 0;
@@ -1589,7 +1591,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
if (net_ratelimit())
netdev_err(queue->vif->dev,
"Not enough memory to consolidate frag_list!\n");
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
kfree_skb(skb);
continue;
}
@@ -1609,7 +1611,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
"Can't setup checksum in net_tx_action\n");
/* We have to set this flag to trigger the callback */
if (skb_shinfo(skb)->destructor_arg)
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
kfree_skb(skb);
continue;
}
@@ -1641,7 +1643,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
* skb. E.g. the __pskb_pull_tail earlier can do such thing.
*/
if (skb_shinfo(skb)->destructor_arg) {
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ xenvif_skb_zerocopy_prepare(queue, skb);
queue->stats.tx_zerocopy_sent++;
}
@@ -1681,6 +1683,7 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
queue->stats.tx_zerocopy_success++;
else
queue->stats.tx_zerocopy_fail++;
+ xenvif_skb_zerocopy_complete(queue);
}
static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
@@ -2058,15 +2061,24 @@ int xenvif_kthread_guest_rx(void *data)
return 0;
}
+static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
+{
+ /* Dealloc thread must remain running until all inflight
+ * packets complete.
+ */
+ return kthread_should_stop() &&
+ !atomic_read(&queue->inflight_packets);
+}
+
int xenvif_dealloc_kthread(void *data)
{
struct xenvif_queue *queue = data;
- while (!kthread_should_stop()) {
+ for (;;) {
wait_event_interruptible(queue->dealloc_wq,
tx_dealloc_work_todo(queue) ||
- kthread_should_stop());
- if (kthread_should_stop())
+ xenvif_dealloc_kthread_should_stop(queue));
+ if (xenvif_dealloc_kthread_should_stop(queue))
break;
xenvif_tx_dealloc_action(queue);
--
1.7.10.4

View File

@ -0,0 +1,51 @@
From d78ae2894c307cabcf454877f6a45ed442778d02 Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Tue, 12 Aug 2014 11:59:30 +0100
Subject: [PATCH 07/14] xen-netback: fix debugfs entry creation
Origin: https://git.kernel.org/linus/628fa76b09d7b0923c142631fc25b6affbfb868d
The original code is bogus. The function gets called in a loop which
leaks entries created in previous rounds.
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Zoltan Kiss <zoltan.kiss@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 628fa76b09d7b0923c142631fc25b6affbfb868d)
---
drivers/net/xen-netback/xenbus.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 4c9041e..9c47b89 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -174,10 +174,9 @@ static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
.write = xenvif_write_io_ring,
};
-static void xenvif_debugfs_addif(struct xenvif_queue *queue)
+static void xenvif_debugfs_addif(struct xenvif *vif)
{
struct dentry *pfile;
- struct xenvif *vif = queue->vif;
int i;
if (IS_ERR_OR_NULL(xen_netback_dbg_root))
@@ -736,10 +735,11 @@ static void connect(struct backend_info *be)
be->vif->num_queues = queue_index;
goto err;
}
+ }
+
#ifdef CONFIG_DEBUG_FS
- xenvif_debugfs_addif(queue);
+ xenvif_debugfs_addif(be->vif);
#endif /* CONFIG_DEBUG_FS */
- }
/* Initialisation completed, tell core driver the number of
* active queues.
--
1.7.10.4

View File

@ -0,0 +1,65 @@
From 588d54039ef4c17c7bba79abe057daf5b69b2330 Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Tue, 12 Aug 2014 11:59:29 +0100
Subject: [PATCH 06/14] xen-netback: fix debugfs write length check
Origin: https://git.kernel.org/linus/5c807005fa60deef2db6616d9b7b24fc4c436be9
Enlarge buffer size and check input length properly, so that we don't
misuse -ENOSPC.
Note that command like "kickXXXX" is still allowed, that's one patch for
another day if we really want to be very strict on this.
Reported-by: SeeChen Ng <seechen81@gmail.com>
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Zoltan Kiss <zoltan.kiss@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 5c807005fa60deef2db6616d9b7b24fc4c436be9)
---
drivers/net/xen-netback/xenbus.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 580517d..4c9041e 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -116,6 +116,7 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v)
}
#define XENVIF_KICK_STR "kick"
+#define BUFFER_SIZE 32
static ssize_t
xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
@@ -124,22 +125,24 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
struct xenvif_queue *queue =
((struct seq_file *)filp->private_data)->private;
int len;
- char write[sizeof(XENVIF_KICK_STR)];
+ char write[BUFFER_SIZE];
/* don't allow partial writes and check the length */
if (*ppos != 0)
return 0;
- if (count < sizeof(XENVIF_KICK_STR) - 1)
+ if (count >= sizeof(write))
return -ENOSPC;
len = simple_write_to_buffer(write,
- sizeof(write),
+ sizeof(write) - 1,
ppos,
buf,
count);
if (len < 0)
return len;
+ write[len] = '\0';
+
if (!strncmp(write, XENVIF_KICK_STR, sizeof(XENVIF_KICK_STR) - 1))
xenvif_interrupt(0, (void *)queue);
else {
--
1.7.10.4

View File

@ -0,0 +1,614 @@
From 8a73fe2999810038cde046462258a3cd5afe170e Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Wed, 22 Oct 2014 14:08:54 +0100
Subject: [PATCH 13/14] xen-netback: fix unlimited guest Rx internal queue and
carrier flapping
Origin: https://git.kernel.org/linus/f48da8b14d04ca87ffcffe68829afd45f926ec6a
Netback needs to discard old to-guest skb's (guest Rx queue drain) and
it needs detect guest Rx stalls (to disable the carrier so packets are
discarded earlier), but the current implementation is very broken.
1. The check in hard_start_xmit of the slot availability did not
consider the number of packets that were already in the guest Rx
queue. This could allow the queue to grow without bound.
The guest stops consuming packets and the ring was allowed to fill
leaving S slot free. Netback queues a packet requiring more than S
slots (ensuring that the ring stays with S slots free). Netback
queue indefinately packets provided that then require S or fewer
slots.
2. The Rx stall detection is not triggered in this case since the
(host) Tx queue is not stopped.
3. If the Tx queue is stopped and a guest Rx interrupt occurs, netback
will consider this an Rx purge event which may result in it taking
the carrier down unnecessarily. It also considers a queue with
only 1 slot free as unstalled (even though the next packet might
not fit in this).
The internal guest Rx queue is limited by a byte length (to 512 Kib,
enough for half the ring). The (host) Tx queue is stopped and started
based on this limit. This sets an upper bound on the amount of memory
used by packets on the internal queue.
This allows the estimatation of the number of slots for an skb to be
removed (it wasn't a very good estimate anyway). Instead, the guest
Rx thread just waits for enough free slots for a maximum sized packet.
skbs queued on the internal queue have an 'expires' time (set to the
current time plus the drain timeout). The guest Rx thread will detect
when the skb at the head of the queue has expired and discard expired
skbs. This sets a clear upper bound on the length of time an skb can
be queued for. For a guest being destroyed the maximum time needed to
wait for all the packets it sent to be dropped is still the drain
timeout (10 s) since it will not be sending new packets.
Rx stall detection is reintroduced in a later commit.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit f48da8b14d04ca87ffcffe68829afd45f926ec6a)
---
drivers/net/xen-netback/common.h | 29 +++--
drivers/net/xen-netback/interface.c | 59 ++-------
drivers/net/xen-netback/netback.c | 243 ++++++++++++++++++-----------------
drivers/net/xen-netback/xenbus.c | 8 ++
4 files changed, 161 insertions(+), 178 deletions(-)
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 93ca77c..c264240 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -176,10 +176,9 @@ struct xenvif_queue { /* Per-queue data for xenvif */
char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
struct xen_netif_rx_back_ring rx;
struct sk_buff_head rx_queue;
- RING_IDX rx_last_skb_slots;
- unsigned long status;
- struct timer_list rx_stalled;
+ unsigned int rx_queue_max;
+ unsigned int rx_queue_len;
struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
@@ -199,18 +198,14 @@ struct xenvif_queue { /* Per-queue data for xenvif */
struct xenvif_stats stats;
};
+/* Maximum number of Rx slots a to-guest packet may use, including the
+ * slot needed for GSO meta-data.
+ */
+#define XEN_NETBK_RX_SLOTS_MAX (MAX_SKB_FRAGS + 1)
+
enum state_bit_shift {
/* This bit marks that the vif is connected */
VIF_STATUS_CONNECTED,
- /* This bit signals the RX thread that queuing was stopped (in
- * start_xmit), and either the timer fired or an RX interrupt came
- */
- QUEUE_STATUS_RX_PURGE_EVENT,
- /* This bit tells the interrupt handler that this queue was the reason
- * for the carrier off, so it should kick the thread. Only queues which
- * brought it down can turn on the carrier.
- */
- QUEUE_STATUS_RX_STALLED
};
struct xenvif {
@@ -246,6 +241,14 @@ struct xenvif {
struct net_device *dev;
};
+struct xenvif_rx_cb {
+ unsigned long expires;
+ int meta_slots_used;
+ bool full_coalesce;
+};
+
+#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
+
static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
{
return to_xenbus_device(vif->dev->dev.parent);
@@ -291,6 +294,8 @@ void xenvif_kick_thread(struct xenvif_queue *queue);
int xenvif_dealloc_kthread(void *data);
+void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
+
/* Determine whether the needed number of slots (req) are available,
* and set req_event if not.
*/
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 6879251..9e8af0b 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -43,6 +43,9 @@
#define XENVIF_QUEUE_LENGTH 32
#define XENVIF_NAPI_WEIGHT 64
+/* Number of bytes allowed on the internal guest Rx queue. */
+#define XENVIF_RX_QUEUE_BYTES (XEN_NETIF_RX_RING_SIZE/2 * PAGE_SIZE)
+
/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
* increasing the inflight counter. We need to increase the inflight
* counter because core driver calls into xenvif_zerocopy_callback
@@ -63,7 +66,8 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
int xenvif_schedulable(struct xenvif *vif)
{
return netif_running(vif->dev) &&
- test_bit(VIF_STATUS_CONNECTED, &vif->status);
+ test_bit(VIF_STATUS_CONNECTED, &vif->status) &&
+ !vif->disabled;
}
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
@@ -104,16 +108,7 @@ int xenvif_poll(struct napi_struct *napi, int budget)
static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{
struct xenvif_queue *queue = dev_id;
- struct netdev_queue *net_queue =
- netdev_get_tx_queue(queue->vif->dev, queue->id);
- /* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR
- * the carrier went down and this queue was previously blocked
- */
- if (unlikely(netif_tx_queue_stopped(net_queue) ||
- (!netif_carrier_ok(queue->vif->dev) &&
- test_bit(QUEUE_STATUS_RX_STALLED, &queue->status))))
- set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
xenvif_kick_thread(queue);
return IRQ_HANDLED;
@@ -141,24 +136,13 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
}
-/* Callback to wake the queue's thread and turn the carrier off on timeout */
-static void xenvif_rx_stalled(unsigned long data)
-{
- struct xenvif_queue *queue = (struct xenvif_queue *)data;
-
- if (xenvif_queue_stopped(queue)) {
- set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
- xenvif_kick_thread(queue);
- }
-}
-
static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
unsigned int num_queues = vif->num_queues;
u16 index;
- int min_slots_needed;
+ struct xenvif_rx_cb *cb;
BUG_ON(skb->dev != dev);
@@ -181,30 +165,10 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
!xenvif_schedulable(vif))
goto drop;
- /* At best we'll need one slot for the header and one for each
- * frag.
- */
- min_slots_needed = 1 + skb_shinfo(skb)->nr_frags;
+ cb = XENVIF_RX_CB(skb);
+ cb->expires = jiffies + rx_drain_timeout_jiffies;
- /* If the skb is GSO then we'll also need an extra slot for the
- * metadata.
- */
- if (skb_is_gso(skb))
- min_slots_needed++;
-
- /* If the skb can't possibly fit in the remaining slots
- * then turn off the queue to give the ring a chance to
- * drain.
- */
- if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
- queue->rx_stalled.function = xenvif_rx_stalled;
- queue->rx_stalled.data = (unsigned long)queue;
- netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
- mod_timer(&queue->rx_stalled,
- jiffies + rx_drain_timeout_jiffies);
- }
-
- skb_queue_tail(&queue->rx_queue, skb);
+ xenvif_rx_queue_tail(queue, skb);
xenvif_kick_thread(queue);
return NETDEV_TX_OK;
@@ -498,6 +462,8 @@ int xenvif_init_queue(struct xenvif_queue *queue)
init_timer(&queue->credit_timeout);
queue->credit_window_start = get_jiffies_64();
+ queue->rx_queue_max = XENVIF_RX_QUEUE_BYTES;
+
skb_queue_head_init(&queue->rx_queue);
skb_queue_head_init(&queue->tx_queue);
@@ -529,8 +495,6 @@ int xenvif_init_queue(struct xenvif_queue *queue)
queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
}
- init_timer(&queue->rx_stalled);
-
return 0;
}
@@ -664,7 +628,6 @@ void xenvif_disconnect(struct xenvif *vif)
netif_napi_del(&queue->napi);
if (queue->task) {
- del_timer_sync(&queue->rx_stalled);
kthread_stop(queue->task);
queue->task = NULL;
}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 08f6599..57aa3b5 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -55,8 +55,8 @@
bool separate_tx_rx_irq = 1;
module_param(separate_tx_rx_irq, bool, 0644);
-/* When guest ring is filled up, qdisc queues the packets for us, but we have
- * to timeout them, otherwise other guests' packets can get stuck there
+/* The time that packets can stay on the guest Rx internal queue
+ * before they are dropped.
*/
unsigned int rx_drain_timeout_msecs = 10000;
module_param(rx_drain_timeout_msecs, uint, 0444);
@@ -83,7 +83,6 @@ static void make_tx_response(struct xenvif_queue *queue,
s8 st);
static inline int tx_work_todo(struct xenvif_queue *queue);
-static inline int rx_work_todo(struct xenvif_queue *queue);
static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
u16 id,
@@ -163,6 +162,69 @@ bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed)
return false;
}
+void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&queue->rx_queue.lock, flags);
+
+ __skb_queue_tail(&queue->rx_queue, skb);
+
+ queue->rx_queue_len += skb->len;
+ if (queue->rx_queue_len > queue->rx_queue_max)
+ netif_tx_stop_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));
+
+ spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
+}
+
+static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
+{
+ struct sk_buff *skb;
+
+ spin_lock_irq(&queue->rx_queue.lock);
+
+ skb = __skb_dequeue(&queue->rx_queue);
+ if (skb)
+ queue->rx_queue_len -= skb->len;
+
+ spin_unlock_irq(&queue->rx_queue.lock);
+
+ return skb;
+}
+
+static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
+{
+ spin_lock_irq(&queue->rx_queue.lock);
+
+ if (queue->rx_queue_len < queue->rx_queue_max)
+ netif_tx_wake_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));
+
+ spin_unlock_irq(&queue->rx_queue.lock);
+}
+
+
+static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
+{
+ struct sk_buff *skb;
+ while ((skb = xenvif_rx_dequeue(queue)) != NULL)
+ kfree_skb(skb);
+}
+
+static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
+{
+ struct sk_buff *skb;
+
+ for(;;) {
+ skb = skb_peek(&queue->rx_queue);
+ if (!skb)
+ break;
+ if (time_before(jiffies, XENVIF_RX_CB(skb)->expires))
+ break;
+ xenvif_rx_dequeue(queue);
+ kfree_skb(skb);
+ }
+}
+
/*
* Returns true if we should start a new receive buffer instead of
* adding 'size' bytes to a buffer which currently contains 'offset'
@@ -237,13 +299,6 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
return meta;
}
-struct xenvif_rx_cb {
- int meta_slots_used;
- bool full_coalesce;
-};
-
-#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
-
/*
* Set up the grant operations for this fragment. If it's a flipping
* interface, we also set up the unmap request from here.
@@ -587,7 +642,8 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
skb_queue_head_init(&rxq);
- while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) {
+ while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)
+ && (skb = xenvif_rx_dequeue(queue)) != NULL) {
RING_IDX max_slots_needed;
RING_IDX old_req_cons;
RING_IDX ring_slots_used;
@@ -634,15 +690,6 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
max_slots_needed++;
- /* If the skb may not fit then bail out now */
- if (!xenvif_rx_ring_slots_available(queue, max_slots_needed)) {
- skb_queue_head(&queue->rx_queue, skb);
- need_to_notify = true;
- queue->rx_last_skb_slots = max_slots_needed;
- break;
- } else
- queue->rx_last_skb_slots = 0;
-
old_req_cons = queue->rx.req_cons;
XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
ring_slots_used = queue->rx.req_cons - old_req_cons;
@@ -1869,12 +1916,6 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
}
}
-static inline int rx_work_todo(struct xenvif_queue *queue)
-{
- return (!skb_queue_empty(&queue->rx_queue) &&
- xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots));
-}
-
static inline int tx_work_todo(struct xenvif_queue *queue)
{
if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
@@ -1931,92 +1972,64 @@ err:
return err;
}
-static void xenvif_start_queue(struct xenvif_queue *queue)
+static bool xenvif_have_rx_work(struct xenvif_queue *queue)
{
- if (xenvif_schedulable(queue->vif))
- xenvif_wake_queue(queue);
+ return (!skb_queue_empty(&queue->rx_queue)
+ && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
+ || kthread_should_stop()
+ || queue->vif->disabled;
}
-/* Only called from the queue's thread, it handles the situation when the guest
- * doesn't post enough requests on the receiving ring.
- * First xenvif_start_xmit disables QDisc and start a timer, and then either the
- * timer fires, or the guest send an interrupt after posting new request. If it
- * is the timer, the carrier is turned off here.
- * */
-static void xenvif_rx_purge_event(struct xenvif_queue *queue)
+static long xenvif_rx_queue_timeout(struct xenvif_queue *queue)
{
- /* Either the last unsuccesful skb or at least 1 slot should fit */
- int needed = queue->rx_last_skb_slots ?
- queue->rx_last_skb_slots : 1;
+ struct sk_buff *skb;
+ long timeout;
- /* It is assumed that if the guest post new slots after this, the RX
- * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up
- * the thread again
- */
- set_bit(QUEUE_STATUS_RX_STALLED, &queue->status);
- if (!xenvif_rx_ring_slots_available(queue, needed)) {
- rtnl_lock();
- if (netif_carrier_ok(queue->vif->dev)) {
- /* Timer fired and there are still no slots. Turn off
- * everything except the interrupts
- */
- netif_carrier_off(queue->vif->dev);
- skb_queue_purge(&queue->rx_queue);
- queue->rx_last_skb_slots = 0;
- if (net_ratelimit())
- netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id);
- } else {
- /* Probably an another queue already turned the carrier
- * off, make sure nothing is stucked in the internal
- * queue of this queue
- */
- skb_queue_purge(&queue->rx_queue);
- queue->rx_last_skb_slots = 0;
- }
- rtnl_unlock();
- } else if (!netif_carrier_ok(queue->vif->dev)) {
- unsigned int num_queues = queue->vif->num_queues;
- unsigned int i;
- /* The carrier was down, but an interrupt kicked
- * the thread again after new requests were
- * posted
- */
- clear_bit(QUEUE_STATUS_RX_STALLED,
- &queue->status);
- rtnl_lock();
- netif_carrier_on(queue->vif->dev);
- netif_tx_wake_all_queues(queue->vif->dev);
- rtnl_unlock();
+ skb = skb_peek(&queue->rx_queue);
+ if (!skb)
+ return MAX_SCHEDULE_TIMEOUT;
- for (i = 0; i < num_queues; i++) {
- struct xenvif_queue *temp = &queue->vif->queues[i];
+ timeout = XENVIF_RX_CB(skb)->expires - jiffies;
+ return timeout < 0 ? 0 : timeout;
+}
- xenvif_napi_schedule_or_enable_events(temp);
- }
- if (net_ratelimit())
- netdev_err(queue->vif->dev, "Carrier on again\n");
- } else {
- /* Queuing were stopped, but the guest posted
- * new requests and sent an interrupt
- */
- clear_bit(QUEUE_STATUS_RX_STALLED,
- &queue->status);
- del_timer_sync(&queue->rx_stalled);
- xenvif_start_queue(queue);
+/* Wait until the guest Rx thread has work.
+ *
+ * The timeout needs to be adjusted based on the current head of the
+ * queue (and not just the head at the beginning). In particular, if
+ * the queue is initially empty an infinite timeout is used and this
+ * needs to be reduced when a skb is queued.
+ *
+ * This cannot be done with wait_event_timeout() because it only
+ * calculates the timeout once.
+ */
+static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
+{
+ DEFINE_WAIT(wait);
+
+ if (xenvif_have_rx_work(queue))
+ return;
+
+ for (;;) {
+ long ret;
+
+ prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
+ if (xenvif_have_rx_work(queue))
+ break;
+ ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
+ if (!ret)
+ break;
}
+ finish_wait(&queue->wq, &wait);
}
int xenvif_kthread_guest_rx(void *data)
{
struct xenvif_queue *queue = data;
- struct sk_buff *skb;
+ struct xenvif *vif = queue->vif;
- while (!kthread_should_stop()) {
- wait_event_interruptible(queue->wq,
- rx_work_todo(queue) ||
- queue->vif->disabled ||
- test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) ||
- kthread_should_stop());
+ for (;;) {
+ xenvif_wait_for_rx_work(queue);
if (kthread_should_stop())
break;
@@ -2028,35 +2041,29 @@ int xenvif_kthread_guest_rx(void *data)
* context so we defer it here, if this thread is
* associated with queue 0.
*/
- if (unlikely(queue->vif->disabled && queue->id == 0)) {
- xenvif_carrier_off(queue->vif);
- } else if (unlikely(queue->vif->disabled)) {
- /* kthread_stop() would be called upon this thread soon,
- * be a bit proactive
- */
- skb_queue_purge(&queue->rx_queue);
- queue->rx_last_skb_slots = 0;
- } else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
- &queue->status))) {
- xenvif_rx_purge_event(queue);
- } else if (!netif_carrier_ok(queue->vif->dev)) {
- /* Another queue stalled and turned the carrier off, so
- * purge the internal queue of queues which were not
- * blocked
- */
- skb_queue_purge(&queue->rx_queue);
- queue->rx_last_skb_slots = 0;
+ if (unlikely(vif->disabled && queue->id == 0)) {
+ xenvif_carrier_off(vif);
+ xenvif_rx_queue_purge(queue);
+ continue;
}
if (!skb_queue_empty(&queue->rx_queue))
xenvif_rx_action(queue);
+ /* Queued packets may have foreign pages from other
+ * domains. These cannot be queued indefinitely as
+ * this would starve guests of grant refs and transmit
+ * slots.
+ */
+ xenvif_rx_queue_drop_expired(queue);
+
+ xenvif_rx_queue_maybe_wake(queue);
+
cond_resched();
}
/* Bin any remaining skbs */
- while ((skb = skb_dequeue(&queue->rx_queue)) != NULL)
- dev_kfree_skb(skb);
+ xenvif_rx_queue_purge(queue);
return 0;
}
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 5e5cca1..54b5f24 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -52,6 +52,7 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v)
struct xenvif_queue *queue = m->private;
struct xen_netif_tx_back_ring *tx_ring = &queue->tx;
struct xen_netif_rx_back_ring *rx_ring = &queue->rx;
+ struct netdev_queue *dev_queue;
if (tx_ring->sring) {
struct xen_netif_tx_sring *sring = tx_ring->sring;
@@ -112,6 +113,13 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v)
queue->credit_timeout.expires,
jiffies);
+ dev_queue = netdev_get_tx_queue(queue->vif->dev, queue->id);
+
+ seq_printf(m, "\nRx internal queue: len %u max %u pkts %u %s\n",
+ queue->rx_queue_len, queue->rx_queue_max,
+ skb_queue_len(&queue->rx_queue),
+ netif_tx_queue_stopped(dev_queue) ? "stopped" : "running");
+
return 0;
}
--
1.7.10.4

View File

@ -0,0 +1,103 @@
From 63350994825046216104c9c4c99db9e7a2715a97 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Wed, 22 Oct 2014 14:08:53 +0100
Subject: [PATCH 12/14] xen-netback: make feature-rx-notify mandatory
Origin: https://git.kernel.org/linus/bc96f648df1bbc2729abbb84513cf4f64273a1f1
Frontends that do not provide feature-rx-notify may stall because
netback depends on the notification from frontend to wake the guest Rx
thread (even if can_queue is false).
This could be fixed but feature-rx-notify was introduced in 2006 and I
am not aware of any frontends that do not implement this.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit bc96f648df1bbc2729abbb84513cf4f64273a1f1)
---
drivers/net/xen-netback/common.h | 5 -----
drivers/net/xen-netback/interface.c | 12 +-----------
drivers/net/xen-netback/xenbus.c | 13 ++++---------
3 files changed, 5 insertions(+), 25 deletions(-)
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index d4eb8d2..93ca77c 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -228,9 +228,6 @@ struct xenvif {
u8 ip_csum:1;
u8 ipv6_csum:1;
- /* Internal feature information. */
- u8 can_queue:1; /* can queue packets for receiver? */
-
/* Is this interface disabled? True when backend discovers
* frontend is rogue.
*/
@@ -272,8 +269,6 @@ void xenvif_xenbus_fini(void);
int xenvif_schedulable(struct xenvif *vif);
-int xenvif_must_stop_queue(struct xenvif_queue *queue);
-
int xenvif_queue_stopped(struct xenvif_queue *queue);
void xenvif_wake_queue(struct xenvif_queue *queue);
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 18cdc81..6879251 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -60,16 +60,6 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
atomic_dec(&queue->inflight_packets);
}
-static inline void xenvif_stop_queue(struct xenvif_queue *queue)
-{
- struct net_device *dev = queue->vif->dev;
-
- if (!queue->vif->can_queue)
- return;
-
- netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
-}
-
int xenvif_schedulable(struct xenvif *vif)
{
return netif_running(vif->dev) &&
@@ -209,7 +199,7 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
queue->rx_stalled.function = xenvif_rx_stalled;
queue->rx_stalled.data = (unsigned long)queue;
- xenvif_stop_queue(queue);
+ netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
mod_timer(&queue->rx_stalled,
jiffies + rx_drain_timeout_jiffies);
}
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 9c47b89..5e5cca1 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -873,15 +873,10 @@ static int read_xenbus_vif_flags(struct backend_info *be)
if (!rx_copy)
return -EOPNOTSUPP;
- if (vif->dev->tx_queue_len != 0) {
- if (xenbus_scanf(XBT_NIL, dev->otherend,
- "feature-rx-notify", "%d", &val) < 0)
- val = 0;
- if (val)
- vif->can_queue = 1;
- else
- /* Must be non-zero for pfifo_fast to work. */
- vif->dev->tx_queue_len = 1;
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
+ "feature-rx-notify", "%d", &val) < 0 || val == 0) {
+ xenbus_dev_fatal(dev, -EINVAL, "feature-rx-notify is mandatory");
+ return -EINVAL;
}
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
--
1.7.10.4

View File

@ -0,0 +1,65 @@
From 7b37ef7c88a7d4e20e268c02b65980b03281ddce Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Tue, 12 Aug 2014 11:48:06 +0100
Subject: [PATCH 08/14] xen-netback: move NAPI add/remove calls
Origin: https://git.kernel.org/linus/ea2c5e134237eadc9924ce821ded678750024549
Originally netif_napi_add was in xenvif_init_queue and netif_napi_del
was in xenvif_deinit_queue, while kthreads were handled in
xenvif_connect and xenvif_disconnect. Move netif_napi_add and
netif_napi_del to xenvif_connect and xenvif_disconnect so that they
reside together with kthread operations.
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Zoltan Kiss <zoltan.kiss@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit ea2c5e134237eadc9924ce821ded678750024549)
---
drivers/net/xen-netback/interface.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 04696fc..23702ea 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -524,9 +524,6 @@ int xenvif_init_queue(struct xenvif_queue *queue)
init_timer(&queue->rx_stalled);
- netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
- XENVIF_NAPI_WEIGHT);
-
return 0;
}
@@ -614,6 +611,9 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
wake_up_process(queue->task);
wake_up_process(queue->dealloc_task);
+ netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
+ XENVIF_NAPI_WEIGHT);
+
return 0;
err_rx_unbind:
@@ -672,6 +672,8 @@ void xenvif_disconnect(struct xenvif *vif)
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index];
+ netif_napi_del(&queue->napi);
+
if (queue->task) {
del_timer_sync(&queue->rx_stalled);
kthread_stop(queue->task);
@@ -704,7 +706,6 @@ void xenvif_disconnect(struct xenvif *vif)
void xenvif_deinit_queue(struct xenvif_queue *queue)
{
free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages);
- netif_napi_del(&queue->napi);
}
void xenvif_free(struct xenvif *vif)
--
1.7.10.4

View File

@ -0,0 +1,57 @@
From 7ffe40b3013c6c4d66ebbc9a946173013a590a61 Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Mon, 25 Aug 2014 16:44:00 +0100
Subject: [PATCH 11/14] xen-netback: move netif_napi_add before binding
interrupt
Origin: https://git.kernel.org/linus/e24f8191cc35ae3780b4656a6befae8b8657edc2
Interrupt is enabled when bind_interdomain_evtchn_to_irqhandler returns.
If there's interrupt pending interrupt handler is invoked.
NAPI needs to be initialised before binding interrupt otherwise the
interrupt handler will try to scheduling a NAPI instance that is not
initialised yet, resulting in kernel OOPS.
This fixes a regression introduced in ea2c5e13 ("xen-netback: move NAPI
add/remove calls").
Ideally function calls to create kthreads should also be moved before
binding but I intent to fix this regression with minimal changes and
refactor the code with another patch.
Reported-by: Thomas Leonard <talex5@gmail.com>
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit e24f8191cc35ae3780b4656a6befae8b8657edc2)
---
drivers/net/xen-netback/interface.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 10d832a..18cdc81 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -576,6 +576,9 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
init_waitqueue_head(&queue->dealloc_wq);
atomic_set(&queue->inflight_packets, 0);
+ netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
+ XENVIF_NAPI_WEIGHT);
+
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
err = bind_interdomain_evtchn_to_irqhandler(
@@ -629,9 +632,6 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
wake_up_process(queue->task);
wake_up_process(queue->dealloc_task);
- netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
- XENVIF_NAPI_WEIGHT);
-
return 0;
err_rx_unbind:
--
1.7.10.4

View File

@ -0,0 +1,218 @@
From bd54d3b8d2a15e230f81965041169ba0dc2210c8 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Wed, 22 Oct 2014 14:08:55 +0100
Subject: [PATCH 14/14] xen-netback: reintroduce guest Rx stall detection
Origin: https://git.kernel.org/linus/ecf08d2dbb96d5a4b4bcc53a39e8d29cc8fef02e
If a frontend not receiving packets it is useful to detect this and
turn off the carrier so packets are dropped early instead of being
queued and drained when they expire.
A to-guest queue is stalled if it doesn't have enough free slots for a
an extended period of time (default 60 s).
If at least one queue is stalled, the carrier is turned off (in the
expectation that the other queues will soon stall as well). The
carrier is only turned on once all queues are ready.
When the frontend connects, all the queues start in the stalled state
and only become ready once the frontend queues enough Rx requests.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit ecf08d2dbb96d5a4b4bcc53a39e8d29cc8fef02e)
---
drivers/net/xen-netback/common.h | 5 +++
drivers/net/xen-netback/interface.c | 5 ++-
drivers/net/xen-netback/netback.c | 76 +++++++++++++++++++++++++++++++++++
drivers/net/xen-netback/xenbus.c | 1 +
4 files changed, 86 insertions(+), 1 deletion(-)
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index c264240..083ecc9 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -179,6 +179,8 @@ struct xenvif_queue { /* Per-queue data for xenvif */
unsigned int rx_queue_max;
unsigned int rx_queue_len;
+ unsigned long last_rx_time;
+ bool stalled;
struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
@@ -232,6 +234,9 @@ struct xenvif {
/* Queues */
struct xenvif_queue *queues;
unsigned int num_queues; /* active queues, resource allocated */
+ unsigned int stalled_queues;
+
+ spinlock_t lock;
#ifdef CONFIG_DEBUG_FS
struct dentry *xenvif_dbg_root;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 9e8af0b..21d8db8 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -419,6 +419,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
vif->queues = NULL;
vif->num_queues = 0;
+ spin_lock_init(&vif->lock);
+
dev->netdev_ops = &xenvif_netdev_ops;
dev->hw_features = NETIF_F_SG |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -505,7 +507,6 @@ void xenvif_carrier_on(struct xenvif *vif)
dev_set_mtu(vif->dev, ETH_DATA_LEN);
netdev_update_features(vif->dev);
set_bit(VIF_STATUS_CONNECTED, &vif->status);
- netif_carrier_on(vif->dev);
if (netif_running(vif->dev))
xenvif_up(vif);
rtnl_unlock();
@@ -565,6 +566,8 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
disable_irq(queue->rx_irq);
}
+ queue->stalled = true;
+
task = kthread_create(xenvif_kthread_guest_rx,
(void *)queue, "%s-guest-rx", queue->name);
if (IS_ERR(task)) {
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 57aa3b5..6563f07 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -62,6 +62,13 @@ unsigned int rx_drain_timeout_msecs = 10000;
module_param(rx_drain_timeout_msecs, uint, 0444);
unsigned int rx_drain_timeout_jiffies;
+/* The length of time before the frontend is considered unresponsive
+ * because it isn't providing Rx slots.
+ */
+static unsigned int rx_stall_timeout_msecs = 60000;
+module_param(rx_stall_timeout_msecs, uint, 0444);
+static unsigned int rx_stall_timeout_jiffies;
+
unsigned int xenvif_max_queues;
module_param_named(max_queues, xenvif_max_queues, uint, 0644);
MODULE_PARM_DESC(max_queues,
@@ -649,6 +656,8 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
RING_IDX ring_slots_used;
int i;
+ queue->last_rx_time = jiffies;
+
/* We need a cheap worse case estimate for the number of
* slots we'll use.
*/
@@ -1972,10 +1981,67 @@ err:
return err;
}
+static void xenvif_queue_carrier_off(struct xenvif_queue *queue)
+{
+ struct xenvif *vif = queue->vif;
+
+ queue->stalled = true;
+
+ /* At least one queue has stalled? Disable the carrier. */
+ spin_lock(&vif->lock);
+ if (vif->stalled_queues++ == 0) {
+ netdev_info(vif->dev, "Guest Rx stalled");
+ netif_carrier_off(vif->dev);
+ }
+ spin_unlock(&vif->lock);
+}
+
+static void xenvif_queue_carrier_on(struct xenvif_queue *queue)
+{
+ struct xenvif *vif = queue->vif;
+
+ queue->last_rx_time = jiffies; /* Reset Rx stall detection. */
+ queue->stalled = false;
+
+ /* All queues are ready? Enable the carrier. */
+ spin_lock(&vif->lock);
+ if (--vif->stalled_queues == 0) {
+ netdev_info(vif->dev, "Guest Rx ready");
+ netif_carrier_on(vif->dev);
+ }
+ spin_unlock(&vif->lock);
+}
+
+static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
+{
+ RING_IDX prod, cons;
+
+ prod = queue->rx.sring->req_prod;
+ cons = queue->rx.req_cons;
+
+ return !queue->stalled
+ && prod - cons < XEN_NETBK_RX_SLOTS_MAX
+ && time_after(jiffies,
+ queue->last_rx_time + rx_stall_timeout_jiffies);
+}
+
+static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
+{
+ RING_IDX prod, cons;
+
+ prod = queue->rx.sring->req_prod;
+ cons = queue->rx.req_cons;
+
+ return queue->stalled
+ && prod - cons >= XEN_NETBK_RX_SLOTS_MAX;
+}
+
static bool xenvif_have_rx_work(struct xenvif_queue *queue)
{
return (!skb_queue_empty(&queue->rx_queue)
&& xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
+ || xenvif_rx_queue_stalled(queue)
+ || xenvif_rx_queue_ready(queue)
|| kthread_should_stop()
|| queue->vif->disabled;
}
@@ -2050,6 +2116,15 @@ int xenvif_kthread_guest_rx(void *data)
if (!skb_queue_empty(&queue->rx_queue))
xenvif_rx_action(queue);
+ /* If the guest hasn't provided any Rx slots for a
+ * while it's probably not responsive, drop the
+ * carrier so packets are dropped earlier.
+ */
+ if (xenvif_rx_queue_stalled(queue))
+ xenvif_queue_carrier_off(queue);
+ else if (xenvif_rx_queue_ready(queue))
+ xenvif_queue_carrier_on(queue);
+
/* Queued packets may have foreign pages from other
* domains. These cannot be queued indefinitely as
* this would starve guests of grant refs and transmit
@@ -2120,6 +2195,7 @@ static int __init netback_init(void)
goto failed_init;
rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
+ rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs);
#ifdef CONFIG_DEBUG_FS
xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 54b5f24..396b3d9 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -711,6 +711,7 @@ static void connect(struct backend_info *be)
be->vif->queues = vzalloc(requested_num_queues *
sizeof(struct xenvif_queue));
be->vif->num_queues = requested_num_queues;
+ be->vif->stalled_queues = requested_num_queues;
for (queue_index = 0; queue_index < requested_num_queues; ++queue_index) {
queue = &be->vif->queues[queue_index];
--
1.7.10.4

View File

@ -0,0 +1,74 @@
From db6a4a063a4f3f8069fb7f95d07bbc1d0d6fd1bd Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Tue, 12 Aug 2014 11:48:08 +0100
Subject: [PATCH 10/14] xen-netback: remove loop waiting function
Origin: https://git.kernel.org/linus/b1252858213f39700dac1bc3295b6e88f6cce24b
The original implementation relies on a loop to check if all inflight
packets are freed. Now we have proper reference counting, there's no
need to use loop anymore.
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Zoltan Kiss <zoltan.kiss@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit b1252858213f39700dac1bc3295b6e88f6cce24b)
---
drivers/net/xen-netback/interface.c | 29 -----------------------------
1 file changed, 29 deletions(-)
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 428c57c..10d832a 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -660,25 +660,6 @@ void xenvif_carrier_off(struct xenvif *vif)
rtnl_unlock();
}
-static void xenvif_wait_unmap_timeout(struct xenvif_queue *queue,
- unsigned int worst_case_skb_lifetime)
-{
- int i, unmap_timeout = 0;
-
- for (i = 0; i < MAX_PENDING_REQS; ++i) {
- if (queue->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
- unmap_timeout++;
- schedule_timeout(msecs_to_jiffies(1000));
- if (unmap_timeout > worst_case_skb_lifetime &&
- net_ratelimit())
- netdev_err(queue->vif->dev,
- "Page still granted! Index: %x\n",
- i);
- i = -1;
- }
- }
-}
-
void xenvif_disconnect(struct xenvif *vif)
{
struct xenvif_queue *queue = NULL;
@@ -731,21 +712,11 @@ void xenvif_free(struct xenvif *vif)
struct xenvif_queue *queue = NULL;
unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
- /* Here we want to avoid timeout messages if an skb can be legitimately
- * stuck somewhere else. Realistically this could be an another vif's
- * internal or QDisc queue. That another vif also has this
- * rx_drain_timeout_msecs timeout, so give it time to drain out.
- * Although if that other guest wakes up just before its timeout happens
- * and takes only one skb from QDisc, it can hold onto other skbs for a
- * longer period.
- */
- unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000);
unregister_netdev(vif->dev);
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
queue = &vif->queues[queue_index];
- xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime);
xenvif_deinit_queue(queue);
}
--
1.7.10.4

14
debian/patches/series vendored
View File

@ -146,6 +146,20 @@ bugfix/all/net-sctp-fix-panic-on-duplicate-ASCONF-chunks.patch
bugfix/all/net-sctp-fix-remote-memory-pressure-from-excessive-q.patch
bugfix/all/mnt-Prevent-pivot_root-from-creating-a-loop-in-the-m.patch
bugfix/all/net-mv643xx-disable-tso-by-default.patch
bugfix/all/xen-netback-Adding-debugfs-io_ring_qX-files.patch
bugfix/all/xen-netback-Using-a-new-state-bit-instead-of-carrier.patch
bugfix/all/xen-netback-Turn-off-the-carrier-if-the-guest-is-not.patch
bugfix/all/xen-netback-Fix-vif-disable-handling.patch
bugfix/all/xen-netback-Don-t-deschedule-NAPI-when-carrier-off.patch
bugfix/all/xen-netback-fix-debugfs-write-length-check.patch
bugfix/all/xen-netback-fix-debugfs-entry-creation.patch
bugfix/all/xen-netback-move-NAPI-add-remove-calls.patch
bugfix/all/xen-netback-don-t-stop-dealloc-kthread-too-early.patch
bugfix/all/xen-netback-remove-loop-waiting-function.patch
bugfix/all/xen-netback-move-netif_napi_add-before-binding-inter.patch
bugfix/all/xen-netback-make-feature-rx-notify-mandatory.patch
bugfix/all/xen-netback-fix-unlimited-guest-Rx-internal-queue-an.patch
bugfix/all/xen-netback-reintroduce-guest-Rx-stall-detection.patch
# memfd_create() & kdbus backport
features/all/kdbus/mm-allow-drivers-to-prevent-new-writable-mappings.patch