Cherry-pick some of the more important fixes from 3.10.4-rc1

svn path=/dists/sid/linux/; revision=20412
This commit is contained in:
Ben Hutchings 2013-07-27 22:33:06 +00:00
parent e4a48208f7
commit 8680744925
12 changed files with 742 additions and 0 deletions

11
debian/changelog vendored
View File

@ -82,6 +82,17 @@ linux (3.10.3-1) UNRELEASED; urgency=low
/usr/src/linux-config-$UPSTREAMVERSION directory
* ata: Disable SATA_INIC162X - this driver corrupts data and is not
expected to be fixed (Closes: #714295)
* writeback: Fix periodic writeback after fs mount
* sfc: Fix memory leak when discarding scattered packets
* neighbour: fix a race in neigh_destroy()
* virtio: support unlocked queue poll
* virtio_net: fix race in RX VQ processing
* vhost-net: fix use-after-free in vhost_net_flush
* tuntap: correctly linearize skb when zerocopy is used
* macvtap: correctly linearize skb when zerocopy is used
* ipv6: in case of link failure remove route directly instead of
letting it expire
* 9p: fix off by one causing access violations and memory corruption
-- Arnaud Patard <arnaud.patard@rtp-net.org> Tue, 16 Jul 2013 08:19:29 +0200

View File

@ -0,0 +1,73 @@
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 11 Jul 2013 13:16:54 -0400
Subject: 9p: fix off by one causing access violations and memory corruption
Origin: https://git.kernel.org/linus/110ecd69a9feea82a152bbf9b12aba57e6396883
p9_release_pages() would attempt to dereference one value past the end of
pages[]. This would cause the following crashes:
[ 6293.171817] BUG: unable to handle kernel paging request at ffff8807c96f3000
[ 6293.174146] IP: [<ffffffff8412793b>] p9_release_pages+0x3b/0x60
[ 6293.176447] PGD 79c5067 PUD 82c1e3067 PMD 82c197067 PTE 80000007c96f3060
[ 6293.180060] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
[ 6293.180060] Modules linked in:
[ 6293.180060] CPU: 62 PID: 174043 Comm: modprobe Tainted: G W 3.10.0-next-20130710-sasha #3954
[ 6293.180060] task: ffff8807b803b000 ti: ffff880787dde000 task.ti: ffff880787dde000
[ 6293.180060] RIP: 0010:[<ffffffff8412793b>] [<ffffffff8412793b>] p9_release_pages+0x3b/0x60
[ 6293.214316] RSP: 0000:ffff880787ddfc28 EFLAGS: 00010202
[ 6293.214316] RAX: 0000000000000001 RBX: ffff8807c96f2ff8 RCX: 0000000000000000
[ 6293.222017] RDX: ffff8807b803b000 RSI: 0000000000000001 RDI: ffffea001c7e3d40
[ 6293.222017] RBP: ffff880787ddfc48 R08: 0000000000000000 R09: 0000000000000000
[ 6293.222017] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000001
[ 6293.222017] R13: 0000000000000001 R14: ffff8807cc50c070 R15: ffff8807cc50c070
[ 6293.222017] FS: 00007f572641d700(0000) GS:ffff8807f3600000(0000) knlGS:0000000000000000
[ 6293.256784] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 6293.256784] CR2: ffff8807c96f3000 CR3: 00000007c8e81000 CR4: 00000000000006e0
[ 6293.256784] Stack:
[ 6293.256784] ffff880787ddfcc8 ffff880787ddfcc8 0000000000000000 ffff880787ddfcc8
[ 6293.256784] ffff880787ddfd48 ffffffff84128be8 ffff880700000002 0000000000000001
[ 6293.256784] ffff8807b803b000 ffff880787ddfce0 0000100000000000 0000000000000000
[ 6293.256784] Call Trace:
[ 6293.256784] [<ffffffff84128be8>] p9_virtio_zc_request+0x598/0x630
[ 6293.256784] [<ffffffff8115c610>] ? wake_up_bit+0x40/0x40
[ 6293.256784] [<ffffffff841209b1>] p9_client_zc_rpc+0x111/0x3a0
[ 6293.256784] [<ffffffff81174b78>] ? sched_clock_cpu+0x108/0x120
[ 6293.256784] [<ffffffff84122a21>] p9_client_read+0xe1/0x2c0
[ 6293.256784] [<ffffffff81708a90>] v9fs_file_read+0x90/0xc0
[ 6293.256784] [<ffffffff812bd073>] vfs_read+0xc3/0x130
[ 6293.256784] [<ffffffff811a78bd>] ? trace_hardirqs_on+0xd/0x10
[ 6293.256784] [<ffffffff812bd5a2>] SyS_read+0x62/0xa0
[ 6293.256784] [<ffffffff841a1a00>] tracesys+0xdd/0xe2
[ 6293.256784] Code: 66 90 48 89 fb 41 89 f5 48 8b 3f 48 85 ff 74 29 85 f6 74 25 45 31 e4 66 0f 1f 84 00 00 00 00 00 e8 eb 14 12 fd 41 ff c4 49 63 c4 <48> 8b 3c c3 48 85 ff 74 05 45 39 e5 75 e7 48 83 c4 08 5b 41 5c
[ 6293.256784] RIP [<ffffffff8412793b>] p9_release_pages+0x3b/0x60
[ 6293.256784] RSP <ffff880787ddfc28>
[ 6293.256784] CR2: ffff8807c96f3000
[ 6293.256784] ---[ end trace 50822ee72cd360fc ]---
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
net/9p/trans_common.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index de8df95..2ee3879 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -24,11 +24,11 @@
*/
void p9_release_pages(struct page **pages, int nr_pages)
{
- int i = 0;
- while (pages[i] && nr_pages--) {
- put_page(pages[i]);
- i++;
- }
+ int i;
+
+ for (i = 0; i < nr_pages; i++)
+ if (pages[i])
+ put_page(pages[i]);
}
EXPORT_SYMBOL(p9_release_pages);

View File

@ -0,0 +1,95 @@
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 10 Jul 2013 23:00:57 +0200
Subject: ipv6: in case of link failure remove route directly instead of
letting it expire
Origin: https://git.kernel.org/linus/1eb4f758286884e7566627164bca4c4a16952a83
We could end up expiring a route which is part of an ecmp route set. Doing
so would invalidate the rt->rt6i_nsiblings calculations and could provoke
the following panic:
[ 80.144667] ------------[ cut here ]------------
[ 80.145172] kernel BUG at net/ipv6/ip6_fib.c:733!
[ 80.145172] invalid opcode: 0000 [#1] SMP
[ 80.145172] Modules linked in: 8021q nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat nf_nat_ipv4 nf_nat iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables
+snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm snd_page_alloc snd_timer virtio_balloon snd soundcore i2c_piix4 i2c_core virtio_net virtio_blk
[ 80.145172] CPU: 1 PID: 786 Comm: ping6 Not tainted 3.10.0+ #118
[ 80.145172] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 80.145172] task: ffff880117fa0000 ti: ffff880118770000 task.ti: ffff880118770000
[ 80.145172] RIP: 0010:[<ffffffff815f3b5d>] [<ffffffff815f3b5d>] fib6_add+0x75d/0x830
[ 80.145172] RSP: 0018:ffff880118771798 EFLAGS: 00010202
[ 80.145172] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88011350e480
[ 80.145172] RDX: ffff88011350e238 RSI: 0000000000000004 RDI: ffff88011350f738
[ 80.145172] RBP: ffff880118771848 R08: ffff880117903280 R09: 0000000000000001
[ 80.145172] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88011350f680
[ 80.145172] R13: ffff880117903280 R14: ffff880118771890 R15: ffff88011350ef90
[ 80.145172] FS: 00007f02b5127740(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000
[ 80.145172] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 80.145172] CR2: 00007f981322a000 CR3: 00000001181b1000 CR4: 00000000000006e0
[ 80.145172] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 80.145172] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 80.145172] Stack:
[ 80.145172] 0000000000000001 ffff880100000000 ffff880100000000 ffff880117903280
[ 80.145172] 0000000000000000 ffff880119a4cf00 0000000000000400 00000000000007fa
[ 80.145172] 0000000000000000 0000000000000000 0000000000000000 ffff88011350f680
[ 80.145172] Call Trace:
[ 80.145172] [<ffffffff815eeceb>] ? rt6_bind_peer+0x4b/0x90
[ 80.145172] [<ffffffff815ed985>] __ip6_ins_rt+0x45/0x70
[ 80.145172] [<ffffffff815eee35>] ip6_ins_rt+0x35/0x40
[ 80.145172] [<ffffffff815ef1e4>] ip6_pol_route.isra.44+0x3a4/0x4b0
[ 80.145172] [<ffffffff815ef34a>] ip6_pol_route_output+0x2a/0x30
[ 80.145172] [<ffffffff81616077>] fib6_rule_action+0xd7/0x210
[ 80.145172] [<ffffffff815ef320>] ? ip6_pol_route_input+0x30/0x30
[ 80.145172] [<ffffffff81553026>] fib_rules_lookup+0xc6/0x140
[ 80.145172] [<ffffffff81616374>] fib6_rule_lookup+0x44/0x80
[ 80.145172] [<ffffffff815ef320>] ? ip6_pol_route_input+0x30/0x30
[ 80.145172] [<ffffffff815edea3>] ip6_route_output+0x73/0xb0
[ 80.145172] [<ffffffff815dfdf3>] ip6_dst_lookup_tail+0x2c3/0x2e0
[ 80.145172] [<ffffffff813007b1>] ? list_del+0x11/0x40
[ 80.145172] [<ffffffff81082a4c>] ? remove_wait_queue+0x3c/0x50
[ 80.145172] [<ffffffff815dfe4d>] ip6_dst_lookup_flow+0x3d/0xa0
[ 80.145172] [<ffffffff815fda77>] rawv6_sendmsg+0x267/0xc20
[ 80.145172] [<ffffffff815a8a83>] inet_sendmsg+0x63/0xb0
[ 80.145172] [<ffffffff8128eb93>] ? selinux_socket_sendmsg+0x23/0x30
[ 80.145172] [<ffffffff815218d6>] sock_sendmsg+0xa6/0xd0
[ 80.145172] [<ffffffff81524a68>] SYSC_sendto+0x128/0x180
[ 80.145172] [<ffffffff8109825c>] ? update_curr+0xec/0x170
[ 80.145172] [<ffffffff81041d09>] ? kvm_clock_get_cycles+0x9/0x10
[ 80.145172] [<ffffffff810afd1e>] ? __getnstimeofday+0x3e/0xd0
[ 80.145172] [<ffffffff8152509e>] SyS_sendto+0xe/0x10
[ 80.145172] [<ffffffff8164efd9>] system_call_fastpath+0x16/0x1b
[ 80.145172] Code: fe ff ff 41 f6 45 2a 06 0f 85 ca fe ff ff 49 8b 7e 08 4c 89 ee e8 94 ef ff ff e9 b9 fe ff ff 48 8b 82 28 05 00 00 e9 01 ff ff ff <0f> 0b 49 8b 54 24 30 0d 00 00 40 00 89 83 14 01 00 00 48 89 53
[ 80.145172] RIP [<ffffffff815f3b5d>] fib6_add+0x75d/0x830
[ 80.145172] RSP <ffff880118771798>
[ 80.387413] ---[ end trace 02f20b7a8b81ed95 ]---
[ 80.390154] Kernel panic - not syncing: Fatal exception in interrupt
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
net/ipv6/route.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index bd5fd70..5b127e0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1080,10 +1080,13 @@ static void ip6_link_failure(struct sk_buff *skb)
rt = (struct rt6_info *) skb_dst(skb);
if (rt) {
- if (rt->rt6i_flags & RTF_CACHE)
- rt6_update_expires(rt, 0);
- else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
+ if (rt->rt6i_flags & RTF_CACHE) {
+ dst_hold(&rt->dst);
+ if (ip6_del_rt(rt))
+ dst_free(&rt->dst);
+ } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
rt->rt6i_node->fn_sernum = -1;
+ }
}
}

View File

@ -0,0 +1,53 @@
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 10 Jul 2013 13:43:28 +0800
Subject: macvtap: correctly linearize skb when zerocopy is used
Origin: https://git.kernel.org/linus/61d46bf979d5cd7c164709a80ad5676a35494aae
Userspace may produce vectors greater than MAX_SKB_FRAGS. When we try to
linearize parts of the skb to let the rest of iov to be fit in
the frags, we need count copylen into linear when calling macvtap_alloc_skb()
instead of partly counting it into data_len. Since this breaks
zerocopy_sg_from_iovec() since its inner counter assumes nr_frags should
be zero at beginning. This cause nr_frags to be increased wrongly without
setting the correct frags.
This bug were introduced from b92946e2919134ebe2a4083e4302236295ea2a73
(macvtap: zerocopy: validate vectors before building skb).
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/macvtap.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index f2c4a3b..876c722 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -712,6 +712,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
int vnet_hdr_len = 0;
int copylen = 0;
bool zerocopy = false;
+ size_t linear;
if (q->flags & IFF_VNET_HDR) {
vnet_hdr_len = q->vnet_hdr_sz;
@@ -766,11 +767,14 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
copylen = vnet_hdr.hdr_len;
if (!copylen)
copylen = GOODCOPY_LEN;
- } else
+ linear = copylen;
+ } else {
copylen = len;
+ linear = vnet_hdr.hdr_len;
+ }
skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen,
- vnet_hdr.hdr_len, noblock, &err);
+ linear, noblock, &err);
if (!skb)
goto err;

View File

@ -0,0 +1,75 @@
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 28 Jun 2013 02:37:42 -0700
Subject: neighbour: fix a race in neigh_destroy()
Origin: https://git.kernel.org/linus/c9ab4d85de222f3390c67aedc9c18a50e767531e
There is a race in neighbour code, because neigh_destroy() uses
skb_queue_purge(&neigh->arp_queue) without holding neighbour lock,
while other parts of the code assume neighbour rwlock is what
protects arp_queue
Convert all skb_queue_purge() calls to the __skb_queue_purge() variant
Use __skb_queue_head_init() instead of skb_queue_head_init()
to make clear we do not use arp_queue.lock
And hold neigh->lock in neigh_destroy() to close the race.
Reported-by: Joe Jin <joe.jin@oracle.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
net/core/neighbour.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2569ab2..b7de821 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -231,7 +231,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
we must kill timers etc. and move
it to safe state.
*/
- skb_queue_purge(&n->arp_queue);
+ __skb_queue_purge(&n->arp_queue);
n->arp_queue_len_bytes = 0;
n->output = neigh_blackhole;
if (n->nud_state & NUD_VALID)
@@ -286,7 +286,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
if (!n)
goto out_entries;
- skb_queue_head_init(&n->arp_queue);
+ __skb_queue_head_init(&n->arp_queue);
rwlock_init(&n->lock);
seqlock_init(&n->ha_lock);
n->updated = n->used = now;
@@ -708,7 +708,9 @@ void neigh_destroy(struct neighbour *neigh)
if (neigh_del_timer(neigh))
pr_warn("Impossible event\n");
- skb_queue_purge(&neigh->arp_queue);
+ write_lock_bh(&neigh->lock);
+ __skb_queue_purge(&neigh->arp_queue);
+ write_unlock_bh(&neigh->lock);
neigh->arp_queue_len_bytes = 0;
if (dev->netdev_ops->ndo_neigh_destroy)
@@ -858,7 +860,7 @@ static void neigh_invalidate(struct neighbour *neigh)
neigh->ops->error_report(neigh, skb);
write_lock(&neigh->lock);
}
- skb_queue_purge(&neigh->arp_queue);
+ __skb_queue_purge(&neigh->arp_queue);
neigh->arp_queue_len_bytes = 0;
}
@@ -1210,7 +1212,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
write_lock_bh(&neigh->lock);
}
- skb_queue_purge(&neigh->arp_queue);
+ __skb_queue_purge(&neigh->arp_queue);
neigh->arp_queue_len_bytes = 0;
}
out:

View File

@ -0,0 +1,83 @@
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 4 Jul 2013 23:48:46 +0100
Subject: sfc: Fix memory leak when discarding scattered packets
Origin: https://git.kernel.org/linus/734d4e159b283a4ae4d007b7e7a91d84398ccb92
Commit 2768935a4660 ('sfc: reuse pages to avoid DMA mapping/unmapping
costs') did not fully take account of DMA scattering which was
introduced immediately before. If a received packet is invalid and
must be discarded, we only drop a reference to the first buffer's
page, but we need to drop a reference for each buffer the packet
used.
I think this bug was missed partly because efx_recycle_rx_buffers()
was not renamed and so no longer does what its name says. It does not
change the state of buffers, but only prepares the underlying pages
for recycling. Rename it accordingly.
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/sfc/rx.c | 27 ++++++++++++++++++++-------
1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 65646cd..6af9cfd 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -282,9 +282,9 @@ static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
}
/* Recycle the pages that are used by buffers that have just been received. */
-static void efx_recycle_rx_buffers(struct efx_channel *channel,
- struct efx_rx_buffer *rx_buf,
- unsigned int n_frags)
+static void efx_recycle_rx_pages(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags)
{
struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
@@ -294,6 +294,20 @@ static void efx_recycle_rx_buffers(struct efx_channel *channel,
} while (--n_frags);
}
+static void efx_discard_rx_packet(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags)
+{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+
+ efx_recycle_rx_pages(channel, rx_buf, n_frags);
+
+ do {
+ efx_free_rx_buffer(rx_buf);
+ rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+ } while (--n_frags);
+}
+
/**
* efx_fast_push_rx_descriptors - push new RX descriptors quickly
* @rx_queue: RX descriptor queue
@@ -533,8 +547,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
*/
if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) {
efx_rx_flush_packet(channel);
- put_page(rx_buf->page);
- efx_recycle_rx_buffers(channel, rx_buf, n_frags);
+ efx_discard_rx_packet(channel, rx_buf, n_frags);
return;
}
@@ -570,9 +583,9 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
efx_sync_rx_buffer(efx, rx_buf, rx_buf->len);
}
- /* All fragments have been DMA-synced, so recycle buffers and pages. */
+ /* All fragments have been DMA-synced, so recycle pages. */
rx_buf = efx_rx_buffer(rx_queue, index);
- efx_recycle_rx_buffers(channel, rx_buf, n_frags);
+ efx_recycle_rx_pages(channel, rx_buf, n_frags);
/* Pipeline receives so that we give time for packet headers to be
* prefetched into cache.

View File

@ -0,0 +1,53 @@
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 10 Jul 2013 13:43:27 +0800
Subject: tuntap: correctly linearize skb when zerocopy is used
Origin: https://git.kernel.org/linus/3dd5c3308e8b671e8e8882ba972f51cefbe9fd0d
Userspace may produce vectors greater than MAX_SKB_FRAGS. When we try to
linearize parts of the skb to let the rest of iov to be fit in
the frags, we need count copylen into linear when calling tun_alloc_skb()
instead of partly counting it into data_len. Since this breaks
zerocopy_sg_from_iovec() since its inner counter assumes nr_frags should
be zero at beginning. This cause nr_frags to be increased wrongly without
setting the correct frags.
This bug were introduced from 0690899b4d4501b3505be069b9a687e68ccbe15b
(tun: experimental zero copy tx support)
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/tun.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 7eab5fc..5cdcf92 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1042,7 +1042,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
{
struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
struct sk_buff *skb;
- size_t len = total_len, align = NET_SKB_PAD;
+ size_t len = total_len, align = NET_SKB_PAD, linear;
struct virtio_net_hdr gso = { 0 };
int offset = 0;
int copylen;
@@ -1106,10 +1106,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
copylen = gso.hdr_len;
if (!copylen)
copylen = GOODCOPY_LEN;
- } else
+ linear = copylen;
+ } else {
copylen = len;
+ linear = gso.hdr_len;
+ }
- skb = tun_alloc_skb(tfile, align, copylen, gso.hdr_len, noblock);
+ skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
if (IS_ERR(skb)) {
if (PTR_ERR(skb) != -EAGAIN)
tun->dev->stats.rx_dropped++;

View File

@ -0,0 +1,56 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 25 Jun 2013 17:29:46 +0300
Subject: vhost-net: fix use-after-free in vhost_net_flush
Origin: https://git.kernel.org/linus/c38e39c378f46f00ce922dd40a91043a9925c28d
vhost_net_ubuf_put_and_wait has a confusing name:
it will actually also free it's argument.
Thus since commit 1280c27f8e29acf4af2da914e80ec27c3dbd5c01
"vhost-net: flush outstanding DMAs on memory change"
vhost_net_flush tries to use the argument after passing it
to vhost_net_ubuf_put_and_wait, this results
in use after free.
To fix, don't free the argument in vhost_net_ubuf_put_and_wait,
add an new API for callers that want to free ubufs.
Acked-by: Asias He <asias@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/net.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f80d3dd..8ca5ac7 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -150,6 +150,11 @@ static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs)
{
kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal);
wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
+}
+
+static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs)
+{
+ vhost_net_ubuf_put_and_wait(ubufs);
kfree(ubufs);
}
@@ -948,7 +953,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
mutex_unlock(&vq->mutex);
if (oldubufs) {
- vhost_net_ubuf_put_and_wait(oldubufs);
+ vhost_net_ubuf_put_wait_and_free(oldubufs);
mutex_lock(&vq->mutex);
vhost_zerocopy_signal_used(n, vq);
mutex_unlock(&vq->mutex);
@@ -966,7 +971,7 @@ err_used:
rcu_assign_pointer(vq->private_data, oldsock);
vhost_net_enable_vq(n, vq);
if (ubufs)
- vhost_net_ubuf_put_and_wait(ubufs);
+ vhost_net_ubuf_put_wait_and_free(ubufs);
err_ubufs:
fput(sock->file);
err_vq:

View File

@ -0,0 +1,119 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 9 Jul 2013 13:19:18 +0300
Subject: virtio: support unlocked queue poll
Origin: https://git.kernel.org/linus/cc229884d3f77ec3b1240e467e0236c3e0647c0c
This adds a way to check ring empty state after enable_cb outside any
locks. Will be used by virtio_net.
Note: there's room for more optimization: caller is likely to have a
memory barrier already, which means we might be able to get rid of a
barrier here. Deferring this optimization until we do some
benchmarking.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/virtio/virtio_ring.c | 56 ++++++++++++++++++++++++++++++++++----------
include/linux/virtio.h | 4 ++++
2 files changed, 48 insertions(+), 12 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5217baf..37d58f8 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -607,19 +607,21 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
/**
- * virtqueue_enable_cb - restart callbacks after disable_cb.
+ * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
* @vq: the struct virtqueue we're talking about.
*
- * This re-enables callbacks; it returns "false" if there are pending
- * buffers in the queue, to detect a possible race between the driver
- * checking for more work, and enabling callbacks.
+ * This re-enables callbacks; it returns current queue state
+ * in an opaque unsigned value. This value should be later tested by
+ * virtqueue_poll, to detect a possible race between the driver checking for
+ * more work, and enabling callbacks.
*
* Caller must ensure we don't call this with other virtqueue
* operations at the same time (except where noted).
*/
-bool virtqueue_enable_cb(struct virtqueue *_vq)
+unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
+ u16 last_used_idx;
START_USE(vq);
@@ -629,15 +631,45 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
* either clear the flags bit or point the event index at the next
* entry. Always do both to keep code simple. */
vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
- vring_used_event(&vq->vring) = vq->last_used_idx;
+ vring_used_event(&vq->vring) = last_used_idx = vq->last_used_idx;
+ END_USE(vq);
+ return last_used_idx;
+}
+EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
+
+/**
+ * virtqueue_poll - query pending used buffers
+ * @vq: the struct virtqueue we're talking about.
+ * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
+ *
+ * Returns "true" if there are pending used buffers in the queue.
+ *
+ * This does not need to be serialized.
+ */
+bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
virtio_mb(vq->weak_barriers);
- if (unlikely(more_used(vq))) {
- END_USE(vq);
- return false;
- }
+ return (u16)last_used_idx != vq->vring.used->idx;
+}
+EXPORT_SYMBOL_GPL(virtqueue_poll);
- END_USE(vq);
- return true;
+/**
+ * virtqueue_enable_cb - restart callbacks after disable_cb.
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * This re-enables callbacks; it returns "false" if there are pending
+ * buffers in the queue, to detect a possible race between the driver
+ * checking for more work, and enabling callbacks.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
+bool virtqueue_enable_cb(struct virtqueue *_vq)
+{
+ unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
+ return !virtqueue_poll(_vq, last_used_idx);
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 9ff8645..72398ee 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -70,6 +70,10 @@ void virtqueue_disable_cb(struct virtqueue *vq);
bool virtqueue_enable_cb(struct virtqueue *vq);
+unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq);
+
+bool virtqueue_poll(struct virtqueue *vq, unsigned);
+
bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
void *virtqueue_detach_unused_buf(struct virtqueue *vq);

View File

@ -0,0 +1,56 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 9 Jul 2013 08:13:04 +0300
Subject: virtio_net: fix race in RX VQ processing
Origin: https://git.kernel.org/linus/cbdadbbf0c790f79350a8f36029208944c5487d0
virtio net called virtqueue_enable_cq on RX path after napi_complete, so
with NAPI_STATE_SCHED clear - outside the implicit napi lock.
This violates the requirement to synchronize virtqueue_enable_cq wrt
virtqueue_add_buf. In particular, used event can move backwards,
causing us to lose interrupts.
In a debug build, this can trigger panic within START_USE.
Jason Wang reports that he can trigger the races artificially,
by adding udelay() in virtqueue_enable_cb() after virtio_mb().
However, we must call napi_complete to clear NAPI_STATE_SCHED before
polling the virtqueue for used buffers, otherwise napi_schedule_prep in
a callback will fail, causing us to lose RX events.
To fix, call virtqueue_enable_cb_prepare with NAPI_STATE_SCHED
set (under napi lock), later call virtqueue_poll with
NAPI_STATE_SCHED clear (outside the lock).
Reported-by: Jason Wang <jasowang@redhat.com>
Tested-by: Jason Wang <jasowang@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/virtio_net.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c9e0038..42d670a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -602,7 +602,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
container_of(napi, struct receive_queue, napi);
struct virtnet_info *vi = rq->vq->vdev->priv;
void *buf;
- unsigned int len, received = 0;
+ unsigned int r, len, received = 0;
again:
while (received < budget &&
@@ -619,8 +619,9 @@ again:
/* Out of packets? */
if (received < budget) {
+ r = virtqueue_enable_cb_prepare(rq->vq);
napi_complete(napi);
- if (unlikely(!virtqueue_enable_cb(rq->vq)) &&
+ if (unlikely(virtqueue_poll(rq->vq, r)) &&
napi_schedule_prep(napi)) {
virtqueue_disable_cb(rq->vq);
__napi_schedule(napi);

View File

@ -0,0 +1,55 @@
From: Jan Kara <jack@suse.cz>
Date: Fri, 28 Jun 2013 16:04:02 +0200
Subject: writeback: Fix periodic writeback after fs mount
Origin: https://git.kernel.org/linus/a5faeaf9109578e65e1a32e2a3e76c8b47e7dcb6
Code in blkdev.c moves a device inode to default_backing_dev_info when
the last reference to the device is put and moves the device inode back
to its bdi when the first reference is acquired. This includes moving to
wb.b_dirty list if the device inode is dirty. The code however doesn't
setup timer to wake corresponding flusher thread and while wb.b_dirty
list is non-empty __mark_inode_dirty() will not set it up either. Thus
periodic writeback is effectively disabled until a sync(2) call which can
lead to unexpected data loss in case of crash or power failure.
Fix the problem by setting up a timer for periodic writeback in case we
add the first dirty inode to wb.b_dirty list in bdev_inode_switch_bdi().
Reported-by: Bert De Jonghe <Bert.DeJonghe@amplidata.com>
CC: stable@vger.kernel.org # >= 3.0
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
fs/block_dev.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2091db8..85f5c85 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -58,17 +58,24 @@ static void bdev_inode_switch_bdi(struct inode *inode,
struct backing_dev_info *dst)
{
struct backing_dev_info *old = inode->i_data.backing_dev_info;
+ bool wakeup_bdi = false;
if (unlikely(dst == old)) /* deadlock avoidance */
return;
bdi_lock_two(&old->wb, &dst->wb);
spin_lock(&inode->i_lock);
inode->i_data.backing_dev_info = dst;
- if (inode->i_state & I_DIRTY)
+ if (inode->i_state & I_DIRTY) {
+ if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb))
+ wakeup_bdi = true;
list_move(&inode->i_wb_list, &dst->wb.b_dirty);
+ }
spin_unlock(&inode->i_lock);
spin_unlock(&old->wb.list_lock);
spin_unlock(&dst->wb.list_lock);
+
+ if (wakeup_bdi)
+ bdi_wakeup_thread_delayed(dst);
}
/* Kill _all_ buffers and pagecache , dirty or not.. */

13
debian/patches/series vendored
View File

@ -104,8 +104,21 @@ bugfix/all/alx-fix-MAC-address-alignment-problem.patch
bugfix/all/alx-fix-ethtool-support-code.patch
bugfix/all/alx-remove-WoL-support.patch
bugfix/all/alx-fix-lockdep-annotation.patch
bugfix/all/xen-blkback-Check-device-permissions-before-allowing.patch
bugfix/all/be2net-Fix-to-avoid-hardware-workaround-when-not-nee.patch
features/all/iwlwifi-mvm-support-BSS-only.patch
features/all/iwlwifi-mvm-adjust-firmware-D3-configuration-API.patch
features/all/iwlwifi-bump-required-firmware-API-version-for-3160-.patch
# Cherry-picked fixes from 3.10.4-rc1
bugfix/all/writeback-Fix-periodic-writeback-after-fs-mount.patch
bugfix/all/sfc-Fix-memory-leak-when-discarding-scattered-packet.patch
bugfix/all/neighbour-fix-a-race-in-neigh_destroy.patch
bugfix/all/virtio-support-unlocked-queue-poll.patch
bugfix/all/virtio_net-fix-race-in-RX-VQ-processing.patch
bugfix/all/vhost-net-fix-use-after-free-in-vhost_net_flush.patch
bugfix/all/tuntap-correctly-linearize-skb-when-zerocopy-is-used.patch
bugfix/all/macvtap-correctly-linearize-skb-when-zerocopy-is-use.patch
bugfix/all/ipv6-in-case-of-link-failure-remove-route-directly-i.patch
bugfix/all/9p-fix-off-by-one-causing-access-violations-and-memo.patch