From 86807449251de0efdb28f7c513dc4da7b82cc45c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 27 Jul 2013 22:33:06 +0000 Subject: [PATCH] Cherry-pick some of the more important fixes from 3.10.4-rc1 svn path=/dists/sid/linux/; revision=20412 --- debian/changelog | 11 ++ ...e-causing-access-violations-and-memo.patch | 73 +++++++++++ ...link-failure-remove-route-directly-i.patch | 95 ++++++++++++++ ...y-linearize-skb-when-zerocopy-is-use.patch | 53 ++++++++ ...eighbour-fix-a-race-in-neigh_destroy.patch | 75 +++++++++++ ...eak-when-discarding-scattered-packet.patch | 83 ++++++++++++ ...-linearize-skb-when-zerocopy-is-used.patch | 53 ++++++++ ...ix-use-after-free-in-vhost_net_flush.patch | 56 +++++++++ .../virtio-support-unlocked-queue-poll.patch | 119 ++++++++++++++++++ ...tio_net-fix-race-in-RX-VQ-processing.patch | 56 +++++++++ ...ix-periodic-writeback-after-fs-mount.patch | 55 ++++++++ debian/patches/series | 13 ++ 12 files changed, 742 insertions(+) create mode 100644 debian/patches/bugfix/all/9p-fix-off-by-one-causing-access-violations-and-memo.patch create mode 100644 debian/patches/bugfix/all/ipv6-in-case-of-link-failure-remove-route-directly-i.patch create mode 100644 debian/patches/bugfix/all/macvtap-correctly-linearize-skb-when-zerocopy-is-use.patch create mode 100644 debian/patches/bugfix/all/neighbour-fix-a-race-in-neigh_destroy.patch create mode 100644 debian/patches/bugfix/all/sfc-Fix-memory-leak-when-discarding-scattered-packet.patch create mode 100644 debian/patches/bugfix/all/tuntap-correctly-linearize-skb-when-zerocopy-is-used.patch create mode 100644 debian/patches/bugfix/all/vhost-net-fix-use-after-free-in-vhost_net_flush.patch create mode 100644 debian/patches/bugfix/all/virtio-support-unlocked-queue-poll.patch create mode 100644 debian/patches/bugfix/all/virtio_net-fix-race-in-RX-VQ-processing.patch create mode 100644 debian/patches/bugfix/all/writeback-Fix-periodic-writeback-after-fs-mount.patch diff --git a/debian/changelog b/debian/changelog index 3b3f90a05..cc3543533 100644 --- a/debian/changelog +++ b/debian/changelog @@ -82,6 +82,17 @@ linux (3.10.3-1) UNRELEASED; urgency=low /usr/src/linux-config-$UPSTREAMVERSION directory * ata: Disable SATA_INIC162X - this driver corrupts data and is not expected to be fixed (Closes: #714295) + * writeback: Fix periodic writeback after fs mount + * sfc: Fix memory leak when discarding scattered packets + * neighbour: fix a race in neigh_destroy() + * virtio: support unlocked queue poll + * virtio_net: fix race in RX VQ processing + * vhost-net: fix use-after-free in vhost_net_flush + * tuntap: correctly linearize skb when zerocopy is used + * macvtap: correctly linearize skb when zerocopy is used + * ipv6: in case of link failure remove route directly instead of + letting it expire + * 9p: fix off by one causing access violations and memory corruption -- Arnaud Patard Tue, 16 Jul 2013 08:19:29 +0200 diff --git a/debian/patches/bugfix/all/9p-fix-off-by-one-causing-access-violations-and-memo.patch b/debian/patches/bugfix/all/9p-fix-off-by-one-causing-access-violations-and-memo.patch new file mode 100644 index 000000000..c00d03afc --- /dev/null +++ b/debian/patches/bugfix/all/9p-fix-off-by-one-causing-access-violations-and-memo.patch @@ -0,0 +1,73 @@ +From: Sasha Levin +Date: Thu, 11 Jul 2013 13:16:54 -0400 +Subject: 9p: fix off by one causing access violations and memory corruption +Origin: https://git.kernel.org/linus/110ecd69a9feea82a152bbf9b12aba57e6396883 + +p9_release_pages() would attempt to dereference one value past the end of +pages[]. This would cause the following crashes: + +[ 6293.171817] BUG: unable to handle kernel paging request at ffff8807c96f3000 +[ 6293.174146] IP: [] p9_release_pages+0x3b/0x60 +[ 6293.176447] PGD 79c5067 PUD 82c1e3067 PMD 82c197067 PTE 80000007c96f3060 +[ 6293.180060] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC +[ 6293.180060] Modules linked in: +[ 6293.180060] CPU: 62 PID: 174043 Comm: modprobe Tainted: G W 3.10.0-next-20130710-sasha #3954 +[ 6293.180060] task: ffff8807b803b000 ti: ffff880787dde000 task.ti: ffff880787dde000 +[ 6293.180060] RIP: 0010:[] [] p9_release_pages+0x3b/0x60 +[ 6293.214316] RSP: 0000:ffff880787ddfc28 EFLAGS: 00010202 +[ 6293.214316] RAX: 0000000000000001 RBX: ffff8807c96f2ff8 RCX: 0000000000000000 +[ 6293.222017] RDX: ffff8807b803b000 RSI: 0000000000000001 RDI: ffffea001c7e3d40 +[ 6293.222017] RBP: ffff880787ddfc48 R08: 0000000000000000 R09: 0000000000000000 +[ 6293.222017] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000001 +[ 6293.222017] R13: 0000000000000001 R14: ffff8807cc50c070 R15: ffff8807cc50c070 +[ 6293.222017] FS: 00007f572641d700(0000) GS:ffff8807f3600000(0000) knlGS:0000000000000000 +[ 6293.256784] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b +[ 6293.256784] CR2: ffff8807c96f3000 CR3: 00000007c8e81000 CR4: 00000000000006e0 +[ 6293.256784] Stack: +[ 6293.256784] ffff880787ddfcc8 ffff880787ddfcc8 0000000000000000 ffff880787ddfcc8 +[ 6293.256784] ffff880787ddfd48 ffffffff84128be8 ffff880700000002 0000000000000001 +[ 6293.256784] ffff8807b803b000 ffff880787ddfce0 0000100000000000 0000000000000000 +[ 6293.256784] Call Trace: +[ 6293.256784] [] p9_virtio_zc_request+0x598/0x630 +[ 6293.256784] [] ? wake_up_bit+0x40/0x40 +[ 6293.256784] [] p9_client_zc_rpc+0x111/0x3a0 +[ 6293.256784] [] ? sched_clock_cpu+0x108/0x120 +[ 6293.256784] [] p9_client_read+0xe1/0x2c0 +[ 6293.256784] [] v9fs_file_read+0x90/0xc0 +[ 6293.256784] [] vfs_read+0xc3/0x130 +[ 6293.256784] [] ? trace_hardirqs_on+0xd/0x10 +[ 6293.256784] [] SyS_read+0x62/0xa0 +[ 6293.256784] [] tracesys+0xdd/0xe2 +[ 6293.256784] Code: 66 90 48 89 fb 41 89 f5 48 8b 3f 48 85 ff 74 29 85 f6 74 25 45 31 e4 66 0f 1f 84 00 00 00 00 00 e8 eb 14 12 fd 41 ff c4 49 63 c4 <48> 8b 3c c3 48 85 ff 74 05 45 39 e5 75 e7 48 83 c4 08 5b 41 5c +[ 6293.256784] RIP [] p9_release_pages+0x3b/0x60 +[ 6293.256784] RSP +[ 6293.256784] CR2: ffff8807c96f3000 +[ 6293.256784] ---[ end trace 50822ee72cd360fc ]--- + +Signed-off-by: Sasha Levin +Signed-off-by: David S. Miller +--- + net/9p/trans_common.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c +index de8df95..2ee3879 100644 +--- a/net/9p/trans_common.c ++++ b/net/9p/trans_common.c +@@ -24,11 +24,11 @@ + */ + void p9_release_pages(struct page **pages, int nr_pages) + { +- int i = 0; +- while (pages[i] && nr_pages--) { +- put_page(pages[i]); +- i++; +- } ++ int i; ++ ++ for (i = 0; i < nr_pages; i++) ++ if (pages[i]) ++ put_page(pages[i]); + } + EXPORT_SYMBOL(p9_release_pages); + diff --git a/debian/patches/bugfix/all/ipv6-in-case-of-link-failure-remove-route-directly-i.patch b/debian/patches/bugfix/all/ipv6-in-case-of-link-failure-remove-route-directly-i.patch new file mode 100644 index 000000000..cfbe20d20 --- /dev/null +++ b/debian/patches/bugfix/all/ipv6-in-case-of-link-failure-remove-route-directly-i.patch @@ -0,0 +1,95 @@ +From: Hannes Frederic Sowa +Date: Wed, 10 Jul 2013 23:00:57 +0200 +Subject: ipv6: in case of link failure remove route directly instead of + letting it expire +Origin: https://git.kernel.org/linus/1eb4f758286884e7566627164bca4c4a16952a83 + +We could end up expiring a route which is part of an ecmp route set. Doing +so would invalidate the rt->rt6i_nsiblings calculations and could provoke +the following panic: + +[ 80.144667] ------------[ cut here ]------------ +[ 80.145172] kernel BUG at net/ipv6/ip6_fib.c:733! +[ 80.145172] invalid opcode: 0000 [#1] SMP +[ 80.145172] Modules linked in: 8021q nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat nf_nat_ipv4 nf_nat iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables ++snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm snd_page_alloc snd_timer virtio_balloon snd soundcore i2c_piix4 i2c_core virtio_net virtio_blk +[ 80.145172] CPU: 1 PID: 786 Comm: ping6 Not tainted 3.10.0+ #118 +[ 80.145172] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 +[ 80.145172] task: ffff880117fa0000 ti: ffff880118770000 task.ti: ffff880118770000 +[ 80.145172] RIP: 0010:[] [] fib6_add+0x75d/0x830 +[ 80.145172] RSP: 0018:ffff880118771798 EFLAGS: 00010202 +[ 80.145172] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88011350e480 +[ 80.145172] RDX: ffff88011350e238 RSI: 0000000000000004 RDI: ffff88011350f738 +[ 80.145172] RBP: ffff880118771848 R08: ffff880117903280 R09: 0000000000000001 +[ 80.145172] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88011350f680 +[ 80.145172] R13: ffff880117903280 R14: ffff880118771890 R15: ffff88011350ef90 +[ 80.145172] FS: 00007f02b5127740(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000 +[ 80.145172] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b +[ 80.145172] CR2: 00007f981322a000 CR3: 00000001181b1000 CR4: 00000000000006e0 +[ 80.145172] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 80.145172] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 +[ 80.145172] Stack: +[ 80.145172] 0000000000000001 ffff880100000000 ffff880100000000 ffff880117903280 +[ 80.145172] 0000000000000000 ffff880119a4cf00 0000000000000400 00000000000007fa +[ 80.145172] 0000000000000000 0000000000000000 0000000000000000 ffff88011350f680 +[ 80.145172] Call Trace: +[ 80.145172] [] ? rt6_bind_peer+0x4b/0x90 +[ 80.145172] [] __ip6_ins_rt+0x45/0x70 +[ 80.145172] [] ip6_ins_rt+0x35/0x40 +[ 80.145172] [] ip6_pol_route.isra.44+0x3a4/0x4b0 +[ 80.145172] [] ip6_pol_route_output+0x2a/0x30 +[ 80.145172] [] fib6_rule_action+0xd7/0x210 +[ 80.145172] [] ? ip6_pol_route_input+0x30/0x30 +[ 80.145172] [] fib_rules_lookup+0xc6/0x140 +[ 80.145172] [] fib6_rule_lookup+0x44/0x80 +[ 80.145172] [] ? ip6_pol_route_input+0x30/0x30 +[ 80.145172] [] ip6_route_output+0x73/0xb0 +[ 80.145172] [] ip6_dst_lookup_tail+0x2c3/0x2e0 +[ 80.145172] [] ? list_del+0x11/0x40 +[ 80.145172] [] ? remove_wait_queue+0x3c/0x50 +[ 80.145172] [] ip6_dst_lookup_flow+0x3d/0xa0 +[ 80.145172] [] rawv6_sendmsg+0x267/0xc20 +[ 80.145172] [] inet_sendmsg+0x63/0xb0 +[ 80.145172] [] ? selinux_socket_sendmsg+0x23/0x30 +[ 80.145172] [] sock_sendmsg+0xa6/0xd0 +[ 80.145172] [] SYSC_sendto+0x128/0x180 +[ 80.145172] [] ? update_curr+0xec/0x170 +[ 80.145172] [] ? kvm_clock_get_cycles+0x9/0x10 +[ 80.145172] [] ? __getnstimeofday+0x3e/0xd0 +[ 80.145172] [] SyS_sendto+0xe/0x10 +[ 80.145172] [] system_call_fastpath+0x16/0x1b +[ 80.145172] Code: fe ff ff 41 f6 45 2a 06 0f 85 ca fe ff ff 49 8b 7e 08 4c 89 ee e8 94 ef ff ff e9 b9 fe ff ff 48 8b 82 28 05 00 00 e9 01 ff ff ff <0f> 0b 49 8b 54 24 30 0d 00 00 40 00 89 83 14 01 00 00 48 89 53 +[ 80.145172] RIP [] fib6_add+0x75d/0x830 +[ 80.145172] RSP +[ 80.387413] ---[ end trace 02f20b7a8b81ed95 ]--- +[ 80.390154] Kernel panic - not syncing: Fatal exception in interrupt + +Cc: Nicolas Dichtel +Cc: YOSHIFUJI Hideaki +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +--- + net/ipv6/route.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index bd5fd70..5b127e0 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1080,10 +1080,13 @@ static void ip6_link_failure(struct sk_buff *skb) + + rt = (struct rt6_info *) skb_dst(skb); + if (rt) { +- if (rt->rt6i_flags & RTF_CACHE) +- rt6_update_expires(rt, 0); +- else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) ++ if (rt->rt6i_flags & RTF_CACHE) { ++ dst_hold(&rt->dst); ++ if (ip6_del_rt(rt)) ++ dst_free(&rt->dst); ++ } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { + rt->rt6i_node->fn_sernum = -1; ++ } + } + } + diff --git a/debian/patches/bugfix/all/macvtap-correctly-linearize-skb-when-zerocopy-is-use.patch b/debian/patches/bugfix/all/macvtap-correctly-linearize-skb-when-zerocopy-is-use.patch new file mode 100644 index 000000000..858f65bf0 --- /dev/null +++ b/debian/patches/bugfix/all/macvtap-correctly-linearize-skb-when-zerocopy-is-use.patch @@ -0,0 +1,53 @@ +From: Jason Wang +Date: Wed, 10 Jul 2013 13:43:28 +0800 +Subject: macvtap: correctly linearize skb when zerocopy is used +Origin: https://git.kernel.org/linus/61d46bf979d5cd7c164709a80ad5676a35494aae + +Userspace may produce vectors greater than MAX_SKB_FRAGS. When we try to +linearize parts of the skb to let the rest of iov to be fit in +the frags, we need count copylen into linear when calling macvtap_alloc_skb() +instead of partly counting it into data_len. Since this breaks +zerocopy_sg_from_iovec() since its inner counter assumes nr_frags should +be zero at beginning. This cause nr_frags to be increased wrongly without +setting the correct frags. + +This bug were introduced from b92946e2919134ebe2a4083e4302236295ea2a73 +(macvtap: zerocopy: validate vectors before building skb). + +Cc: Michael S. Tsirkin +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +--- + drivers/net/macvtap.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c +index f2c4a3b..876c722 100644 +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -712,6 +712,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, + int vnet_hdr_len = 0; + int copylen = 0; + bool zerocopy = false; ++ size_t linear; + + if (q->flags & IFF_VNET_HDR) { + vnet_hdr_len = q->vnet_hdr_sz; +@@ -766,11 +767,14 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, + copylen = vnet_hdr.hdr_len; + if (!copylen) + copylen = GOODCOPY_LEN; +- } else ++ linear = copylen; ++ } else { + copylen = len; ++ linear = vnet_hdr.hdr_len; ++ } + + skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen, +- vnet_hdr.hdr_len, noblock, &err); ++ linear, noblock, &err); + if (!skb) + goto err; + diff --git a/debian/patches/bugfix/all/neighbour-fix-a-race-in-neigh_destroy.patch b/debian/patches/bugfix/all/neighbour-fix-a-race-in-neigh_destroy.patch new file mode 100644 index 000000000..102aef379 --- /dev/null +++ b/debian/patches/bugfix/all/neighbour-fix-a-race-in-neigh_destroy.patch @@ -0,0 +1,75 @@ +From: Eric Dumazet +Date: Fri, 28 Jun 2013 02:37:42 -0700 +Subject: neighbour: fix a race in neigh_destroy() +Origin: https://git.kernel.org/linus/c9ab4d85de222f3390c67aedc9c18a50e767531e + +There is a race in neighbour code, because neigh_destroy() uses +skb_queue_purge(&neigh->arp_queue) without holding neighbour lock, +while other parts of the code assume neighbour rwlock is what +protects arp_queue + +Convert all skb_queue_purge() calls to the __skb_queue_purge() variant + +Use __skb_queue_head_init() instead of skb_queue_head_init() +to make clear we do not use arp_queue.lock + +And hold neigh->lock in neigh_destroy() to close the race. + +Reported-by: Joe Jin +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +--- + net/core/neighbour.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/net/core/neighbour.c b/net/core/neighbour.c +index 2569ab2..b7de821 100644 +--- a/net/core/neighbour.c ++++ b/net/core/neighbour.c +@@ -231,7 +231,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) + we must kill timers etc. and move + it to safe state. + */ +- skb_queue_purge(&n->arp_queue); ++ __skb_queue_purge(&n->arp_queue); + n->arp_queue_len_bytes = 0; + n->output = neigh_blackhole; + if (n->nud_state & NUD_VALID) +@@ -286,7 +286,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device + if (!n) + goto out_entries; + +- skb_queue_head_init(&n->arp_queue); ++ __skb_queue_head_init(&n->arp_queue); + rwlock_init(&n->lock); + seqlock_init(&n->ha_lock); + n->updated = n->used = now; +@@ -708,7 +708,9 @@ void neigh_destroy(struct neighbour *neigh) + if (neigh_del_timer(neigh)) + pr_warn("Impossible event\n"); + +- skb_queue_purge(&neigh->arp_queue); ++ write_lock_bh(&neigh->lock); ++ __skb_queue_purge(&neigh->arp_queue); ++ write_unlock_bh(&neigh->lock); + neigh->arp_queue_len_bytes = 0; + + if (dev->netdev_ops->ndo_neigh_destroy) +@@ -858,7 +860,7 @@ static void neigh_invalidate(struct neighbour *neigh) + neigh->ops->error_report(neigh, skb); + write_lock(&neigh->lock); + } +- skb_queue_purge(&neigh->arp_queue); ++ __skb_queue_purge(&neigh->arp_queue); + neigh->arp_queue_len_bytes = 0; + } + +@@ -1210,7 +1212,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, + + write_lock_bh(&neigh->lock); + } +- skb_queue_purge(&neigh->arp_queue); ++ __skb_queue_purge(&neigh->arp_queue); + neigh->arp_queue_len_bytes = 0; + } + out: diff --git a/debian/patches/bugfix/all/sfc-Fix-memory-leak-when-discarding-scattered-packet.patch b/debian/patches/bugfix/all/sfc-Fix-memory-leak-when-discarding-scattered-packet.patch new file mode 100644 index 000000000..9ae03aa19 --- /dev/null +++ b/debian/patches/bugfix/all/sfc-Fix-memory-leak-when-discarding-scattered-packet.patch @@ -0,0 +1,83 @@ +From: Ben Hutchings +Date: Thu, 4 Jul 2013 23:48:46 +0100 +Subject: sfc: Fix memory leak when discarding scattered packets +Origin: https://git.kernel.org/linus/734d4e159b283a4ae4d007b7e7a91d84398ccb92 + +Commit 2768935a4660 ('sfc: reuse pages to avoid DMA mapping/unmapping +costs') did not fully take account of DMA scattering which was +introduced immediately before. If a received packet is invalid and +must be discarded, we only drop a reference to the first buffer's +page, but we need to drop a reference for each buffer the packet +used. + +I think this bug was missed partly because efx_recycle_rx_buffers() +was not renamed and so no longer does what its name says. It does not +change the state of buffers, but only prepares the underlying pages +for recycling. Rename it accordingly. + +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +--- + drivers/net/ethernet/sfc/rx.c | 27 ++++++++++++++++++++------- + 1 file changed, 20 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c +index 65646cd..6af9cfd 100644 +--- a/drivers/net/ethernet/sfc/rx.c ++++ b/drivers/net/ethernet/sfc/rx.c +@@ -282,9 +282,9 @@ static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, + } + + /* Recycle the pages that are used by buffers that have just been received. */ +-static void efx_recycle_rx_buffers(struct efx_channel *channel, +- struct efx_rx_buffer *rx_buf, +- unsigned int n_frags) ++static void efx_recycle_rx_pages(struct efx_channel *channel, ++ struct efx_rx_buffer *rx_buf, ++ unsigned int n_frags) + { + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + +@@ -294,6 +294,20 @@ static void efx_recycle_rx_buffers(struct efx_channel *channel, + } while (--n_frags); + } + ++static void efx_discard_rx_packet(struct efx_channel *channel, ++ struct efx_rx_buffer *rx_buf, ++ unsigned int n_frags) ++{ ++ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); ++ ++ efx_recycle_rx_pages(channel, rx_buf, n_frags); ++ ++ do { ++ efx_free_rx_buffer(rx_buf); ++ rx_buf = efx_rx_buf_next(rx_queue, rx_buf); ++ } while (--n_frags); ++} ++ + /** + * efx_fast_push_rx_descriptors - push new RX descriptors quickly + * @rx_queue: RX descriptor queue +@@ -533,8 +547,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, + */ + if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { + efx_rx_flush_packet(channel); +- put_page(rx_buf->page); +- efx_recycle_rx_buffers(channel, rx_buf, n_frags); ++ efx_discard_rx_packet(channel, rx_buf, n_frags); + return; + } + +@@ -570,9 +583,9 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, + efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); + } + +- /* All fragments have been DMA-synced, so recycle buffers and pages. */ ++ /* All fragments have been DMA-synced, so recycle pages. */ + rx_buf = efx_rx_buffer(rx_queue, index); +- efx_recycle_rx_buffers(channel, rx_buf, n_frags); ++ efx_recycle_rx_pages(channel, rx_buf, n_frags); + + /* Pipeline receives so that we give time for packet headers to be + * prefetched into cache. diff --git a/debian/patches/bugfix/all/tuntap-correctly-linearize-skb-when-zerocopy-is-used.patch b/debian/patches/bugfix/all/tuntap-correctly-linearize-skb-when-zerocopy-is-used.patch new file mode 100644 index 000000000..9afd969fc --- /dev/null +++ b/debian/patches/bugfix/all/tuntap-correctly-linearize-skb-when-zerocopy-is-used.patch @@ -0,0 +1,53 @@ +From: Jason Wang +Date: Wed, 10 Jul 2013 13:43:27 +0800 +Subject: tuntap: correctly linearize skb when zerocopy is used +Origin: https://git.kernel.org/linus/3dd5c3308e8b671e8e8882ba972f51cefbe9fd0d + +Userspace may produce vectors greater than MAX_SKB_FRAGS. When we try to +linearize parts of the skb to let the rest of iov to be fit in +the frags, we need count copylen into linear when calling tun_alloc_skb() +instead of partly counting it into data_len. Since this breaks +zerocopy_sg_from_iovec() since its inner counter assumes nr_frags should +be zero at beginning. This cause nr_frags to be increased wrongly without +setting the correct frags. + +This bug were introduced from 0690899b4d4501b3505be069b9a687e68ccbe15b +(tun: experimental zero copy tx support) + +Cc: Michael S. Tsirkin +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +--- + drivers/net/tun.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/tun.c b/drivers/net/tun.c +index 7eab5fc..5cdcf92 100644 +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1042,7 +1042,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, + { + struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; + struct sk_buff *skb; +- size_t len = total_len, align = NET_SKB_PAD; ++ size_t len = total_len, align = NET_SKB_PAD, linear; + struct virtio_net_hdr gso = { 0 }; + int offset = 0; + int copylen; +@@ -1106,10 +1106,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, + copylen = gso.hdr_len; + if (!copylen) + copylen = GOODCOPY_LEN; +- } else ++ linear = copylen; ++ } else { + copylen = len; ++ linear = gso.hdr_len; ++ } + +- skb = tun_alloc_skb(tfile, align, copylen, gso.hdr_len, noblock); ++ skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); + if (IS_ERR(skb)) { + if (PTR_ERR(skb) != -EAGAIN) + tun->dev->stats.rx_dropped++; diff --git a/debian/patches/bugfix/all/vhost-net-fix-use-after-free-in-vhost_net_flush.patch b/debian/patches/bugfix/all/vhost-net-fix-use-after-free-in-vhost_net_flush.patch new file mode 100644 index 000000000..864eb8c62 --- /dev/null +++ b/debian/patches/bugfix/all/vhost-net-fix-use-after-free-in-vhost_net_flush.patch @@ -0,0 +1,56 @@ +From: "Michael S. Tsirkin" +Date: Tue, 25 Jun 2013 17:29:46 +0300 +Subject: vhost-net: fix use-after-free in vhost_net_flush +Origin: https://git.kernel.org/linus/c38e39c378f46f00ce922dd40a91043a9925c28d + +vhost_net_ubuf_put_and_wait has a confusing name: +it will actually also free it's argument. +Thus since commit 1280c27f8e29acf4af2da914e80ec27c3dbd5c01 + "vhost-net: flush outstanding DMAs on memory change" +vhost_net_flush tries to use the argument after passing it +to vhost_net_ubuf_put_and_wait, this results +in use after free. +To fix, don't free the argument in vhost_net_ubuf_put_and_wait, +add an new API for callers that want to free ubufs. + +Acked-by: Asias He +Acked-by: Jason Wang +Signed-off-by: Michael S. Tsirkin +--- + drivers/vhost/net.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c +index f80d3dd..8ca5ac7 100644 +--- a/drivers/vhost/net.c ++++ b/drivers/vhost/net.c +@@ -150,6 +150,11 @@ static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) + { + kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal); + wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); ++} ++ ++static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) ++{ ++ vhost_net_ubuf_put_and_wait(ubufs); + kfree(ubufs); + } + +@@ -948,7 +953,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) + mutex_unlock(&vq->mutex); + + if (oldubufs) { +- vhost_net_ubuf_put_and_wait(oldubufs); ++ vhost_net_ubuf_put_wait_and_free(oldubufs); + mutex_lock(&vq->mutex); + vhost_zerocopy_signal_used(n, vq); + mutex_unlock(&vq->mutex); +@@ -966,7 +971,7 @@ err_used: + rcu_assign_pointer(vq->private_data, oldsock); + vhost_net_enable_vq(n, vq); + if (ubufs) +- vhost_net_ubuf_put_and_wait(ubufs); ++ vhost_net_ubuf_put_wait_and_free(ubufs); + err_ubufs: + fput(sock->file); + err_vq: diff --git a/debian/patches/bugfix/all/virtio-support-unlocked-queue-poll.patch b/debian/patches/bugfix/all/virtio-support-unlocked-queue-poll.patch new file mode 100644 index 000000000..d01dc02db --- /dev/null +++ b/debian/patches/bugfix/all/virtio-support-unlocked-queue-poll.patch @@ -0,0 +1,119 @@ +From: "Michael S. Tsirkin" +Date: Tue, 9 Jul 2013 13:19:18 +0300 +Subject: virtio: support unlocked queue poll +Origin: https://git.kernel.org/linus/cc229884d3f77ec3b1240e467e0236c3e0647c0c + +This adds a way to check ring empty state after enable_cb outside any +locks. Will be used by virtio_net. + +Note: there's room for more optimization: caller is likely to have a +memory barrier already, which means we might be able to get rid of a +barrier here. Deferring this optimization until we do some +benchmarking. + +Signed-off-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +--- + drivers/virtio/virtio_ring.c | 56 ++++++++++++++++++++++++++++++++++---------- + include/linux/virtio.h | 4 ++++ + 2 files changed, 48 insertions(+), 12 deletions(-) + +diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c +index 5217baf..37d58f8 100644 +--- a/drivers/virtio/virtio_ring.c ++++ b/drivers/virtio/virtio_ring.c +@@ -607,19 +607,21 @@ void virtqueue_disable_cb(struct virtqueue *_vq) + EXPORT_SYMBOL_GPL(virtqueue_disable_cb); + + /** +- * virtqueue_enable_cb - restart callbacks after disable_cb. ++ * virtqueue_enable_cb_prepare - restart callbacks after disable_cb + * @vq: the struct virtqueue we're talking about. + * +- * This re-enables callbacks; it returns "false" if there are pending +- * buffers in the queue, to detect a possible race between the driver +- * checking for more work, and enabling callbacks. ++ * This re-enables callbacks; it returns current queue state ++ * in an opaque unsigned value. This value should be later tested by ++ * virtqueue_poll, to detect a possible race between the driver checking for ++ * more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +-bool virtqueue_enable_cb(struct virtqueue *_vq) ++unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) + { + struct vring_virtqueue *vq = to_vvq(_vq); ++ u16 last_used_idx; + + START_USE(vq); + +@@ -629,15 +631,45 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ + vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; +- vring_used_event(&vq->vring) = vq->last_used_idx; ++ vring_used_event(&vq->vring) = last_used_idx = vq->last_used_idx; ++ END_USE(vq); ++ return last_used_idx; ++} ++EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); ++ ++/** ++ * virtqueue_poll - query pending used buffers ++ * @vq: the struct virtqueue we're talking about. ++ * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). ++ * ++ * Returns "true" if there are pending used buffers in the queue. ++ * ++ * This does not need to be serialized. ++ */ ++bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) ++{ ++ struct vring_virtqueue *vq = to_vvq(_vq); ++ + virtio_mb(vq->weak_barriers); +- if (unlikely(more_used(vq))) { +- END_USE(vq); +- return false; +- } ++ return (u16)last_used_idx != vq->vring.used->idx; ++} ++EXPORT_SYMBOL_GPL(virtqueue_poll); + +- END_USE(vq); +- return true; ++/** ++ * virtqueue_enable_cb - restart callbacks after disable_cb. ++ * @vq: the struct virtqueue we're talking about. ++ * ++ * This re-enables callbacks; it returns "false" if there are pending ++ * buffers in the queue, to detect a possible race between the driver ++ * checking for more work, and enabling callbacks. ++ * ++ * Caller must ensure we don't call this with other virtqueue ++ * operations at the same time (except where noted). ++ */ ++bool virtqueue_enable_cb(struct virtqueue *_vq) ++{ ++ unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); ++ return !virtqueue_poll(_vq, last_used_idx); + } + EXPORT_SYMBOL_GPL(virtqueue_enable_cb); + +diff --git a/include/linux/virtio.h b/include/linux/virtio.h +index 9ff8645..72398ee 100644 +--- a/include/linux/virtio.h ++++ b/include/linux/virtio.h +@@ -70,6 +70,10 @@ void virtqueue_disable_cb(struct virtqueue *vq); + + bool virtqueue_enable_cb(struct virtqueue *vq); + ++unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq); ++ ++bool virtqueue_poll(struct virtqueue *vq, unsigned); ++ + bool virtqueue_enable_cb_delayed(struct virtqueue *vq); + + void *virtqueue_detach_unused_buf(struct virtqueue *vq); diff --git a/debian/patches/bugfix/all/virtio_net-fix-race-in-RX-VQ-processing.patch b/debian/patches/bugfix/all/virtio_net-fix-race-in-RX-VQ-processing.patch new file mode 100644 index 000000000..4492a279e --- /dev/null +++ b/debian/patches/bugfix/all/virtio_net-fix-race-in-RX-VQ-processing.patch @@ -0,0 +1,56 @@ +From: "Michael S. Tsirkin" +Date: Tue, 9 Jul 2013 08:13:04 +0300 +Subject: virtio_net: fix race in RX VQ processing +Origin: https://git.kernel.org/linus/cbdadbbf0c790f79350a8f36029208944c5487d0 + +virtio net called virtqueue_enable_cq on RX path after napi_complete, so +with NAPI_STATE_SCHED clear - outside the implicit napi lock. +This violates the requirement to synchronize virtqueue_enable_cq wrt +virtqueue_add_buf. In particular, used event can move backwards, +causing us to lose interrupts. +In a debug build, this can trigger panic within START_USE. + +Jason Wang reports that he can trigger the races artificially, +by adding udelay() in virtqueue_enable_cb() after virtio_mb(). + +However, we must call napi_complete to clear NAPI_STATE_SCHED before +polling the virtqueue for used buffers, otherwise napi_schedule_prep in +a callback will fail, causing us to lose RX events. + +To fix, call virtqueue_enable_cb_prepare with NAPI_STATE_SCHED +set (under napi lock), later call virtqueue_poll with +NAPI_STATE_SCHED clear (outside the lock). + +Reported-by: Jason Wang +Tested-by: Jason Wang +Acked-by: Jason Wang +Signed-off-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +--- + drivers/net/virtio_net.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index c9e0038..42d670a 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -602,7 +602,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget) + container_of(napi, struct receive_queue, napi); + struct virtnet_info *vi = rq->vq->vdev->priv; + void *buf; +- unsigned int len, received = 0; ++ unsigned int r, len, received = 0; + + again: + while (received < budget && +@@ -619,8 +619,9 @@ again: + + /* Out of packets? */ + if (received < budget) { ++ r = virtqueue_enable_cb_prepare(rq->vq); + napi_complete(napi); +- if (unlikely(!virtqueue_enable_cb(rq->vq)) && ++ if (unlikely(virtqueue_poll(rq->vq, r)) && + napi_schedule_prep(napi)) { + virtqueue_disable_cb(rq->vq); + __napi_schedule(napi); diff --git a/debian/patches/bugfix/all/writeback-Fix-periodic-writeback-after-fs-mount.patch b/debian/patches/bugfix/all/writeback-Fix-periodic-writeback-after-fs-mount.patch new file mode 100644 index 000000000..9e4ea09cd --- /dev/null +++ b/debian/patches/bugfix/all/writeback-Fix-periodic-writeback-after-fs-mount.patch @@ -0,0 +1,55 @@ +From: Jan Kara +Date: Fri, 28 Jun 2013 16:04:02 +0200 +Subject: writeback: Fix periodic writeback after fs mount +Origin: https://git.kernel.org/linus/a5faeaf9109578e65e1a32e2a3e76c8b47e7dcb6 + +Code in blkdev.c moves a device inode to default_backing_dev_info when +the last reference to the device is put and moves the device inode back +to its bdi when the first reference is acquired. This includes moving to +wb.b_dirty list if the device inode is dirty. The code however doesn't +setup timer to wake corresponding flusher thread and while wb.b_dirty +list is non-empty __mark_inode_dirty() will not set it up either. Thus +periodic writeback is effectively disabled until a sync(2) call which can +lead to unexpected data loss in case of crash or power failure. + +Fix the problem by setting up a timer for periodic writeback in case we +add the first dirty inode to wb.b_dirty list in bdev_inode_switch_bdi(). + +Reported-by: Bert De Jonghe +CC: stable@vger.kernel.org # >= 3.0 +Signed-off-by: Jan Kara +Signed-off-by: Jens Axboe +--- + fs/block_dev.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/fs/block_dev.c b/fs/block_dev.c +index 2091db8..85f5c85 100644 +--- a/fs/block_dev.c ++++ b/fs/block_dev.c +@@ -58,17 +58,24 @@ static void bdev_inode_switch_bdi(struct inode *inode, + struct backing_dev_info *dst) + { + struct backing_dev_info *old = inode->i_data.backing_dev_info; ++ bool wakeup_bdi = false; + + if (unlikely(dst == old)) /* deadlock avoidance */ + return; + bdi_lock_two(&old->wb, &dst->wb); + spin_lock(&inode->i_lock); + inode->i_data.backing_dev_info = dst; +- if (inode->i_state & I_DIRTY) ++ if (inode->i_state & I_DIRTY) { ++ if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) ++ wakeup_bdi = true; + list_move(&inode->i_wb_list, &dst->wb.b_dirty); ++ } + spin_unlock(&inode->i_lock); + spin_unlock(&old->wb.list_lock); + spin_unlock(&dst->wb.list_lock); ++ ++ if (wakeup_bdi) ++ bdi_wakeup_thread_delayed(dst); + } + + /* Kill _all_ buffers and pagecache , dirty or not.. */ diff --git a/debian/patches/series b/debian/patches/series index d77807dc0..03bc07cd7 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -104,8 +104,21 @@ bugfix/all/alx-fix-MAC-address-alignment-problem.patch bugfix/all/alx-fix-ethtool-support-code.patch bugfix/all/alx-remove-WoL-support.patch bugfix/all/alx-fix-lockdep-annotation.patch + bugfix/all/xen-blkback-Check-device-permissions-before-allowing.patch bugfix/all/be2net-Fix-to-avoid-hardware-workaround-when-not-nee.patch features/all/iwlwifi-mvm-support-BSS-only.patch features/all/iwlwifi-mvm-adjust-firmware-D3-configuration-API.patch features/all/iwlwifi-bump-required-firmware-API-version-for-3160-.patch + +# Cherry-picked fixes from 3.10.4-rc1 +bugfix/all/writeback-Fix-periodic-writeback-after-fs-mount.patch +bugfix/all/sfc-Fix-memory-leak-when-discarding-scattered-packet.patch +bugfix/all/neighbour-fix-a-race-in-neigh_destroy.patch +bugfix/all/virtio-support-unlocked-queue-poll.patch +bugfix/all/virtio_net-fix-race-in-RX-VQ-processing.patch +bugfix/all/vhost-net-fix-use-after-free-in-vhost_net_flush.patch +bugfix/all/tuntap-correctly-linearize-skb-when-zerocopy-is-used.patch +bugfix/all/macvtap-correctly-linearize-skb-when-zerocopy-is-use.patch +bugfix/all/ipv6-in-case-of-link-failure-remove-route-directly-i.patch +bugfix/all/9p-fix-off-by-one-causing-access-violations-and-memo.patch