diff --git a/debian/changelog b/debian/changelog index 8291d3378..3b46b76c7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,74 @@ -linux (4.3.1-2) UNRELEASED; urgency=medium +linux (4.3.3-1) UNRELEASED; urgency=medium + * New upstream stable update: + https://www.kernel.org/pub/linux/kernel/v4.x/ChangeLog-4.3.2 + - X.509: Fix the time validation [ver #2] + https://www.kernel.org/pub/linux/kernel/v4.x/ChangeLog-4.3.3 + - r8169: fix kasan reported skb use-after-free. (regression in 4.3) + - af-unix: fix use-after-free with concurrent readers while splicing + (regression in 4.2) + - af_unix: don't append consumed skbs to sk_receive_queue + (regression in 4.2) + - af_unix: take receive queue lock while appending new skb + (regression in 4.2) + - af-unix: passcred support for sendpage (regression in 4.2) + - ipv6: Avoid creating RTF_CACHE from a rt that is not managed by fib6 tree + (regression in 4.2) + - ipv6: Check expire on DST_NOCACHE route + - ipv6: Check rt->dst.from for the DST_NOCACHE route (regression in 4.3) + - Revert "ipv6: ndisc: inherit metadata dst when creating ndisc requests" + (regression in 4.3) + - packet: only allow extra vlan len on ethernet devices + - packet: infer protocol from ethernet header if unset + - packet: fix tpacket_snd max frame len + - sctp: translate host order to network order when setting a hmacid + - net/mlx5e: Added self loopback prevention (regression in 4.3) + - net/mlx4_core: Fix sleeping while holding spinlock at rem_slave_counters + (regression in 4.2) + - ip_tunnel: disable preemption when updating per-cpu tstats + - net/ip6_tunnel: fix dst leak (regression in 4.3) + - tcp: disable Fast Open on timeouts after handshake + - tcp: fix potential huge kmalloc() calls in TCP_REPAIR + - tcp: initialize tp->copied_seq in case of cross SYN connection + - net, scm: fix PaX detected msg_controllen overflow in scm_detach_fds + - net: ipmr: fix static mfc/dev leaks on table destruction + - net: ip6mr: fix static mfc/dev leaks on table destruction + - vrf: fix double free and memory corruption on register_netdevice failure + - tipc: fix error handling of expanding buffer headroom (regression in 4.3) + - ipv6: distinguish frag queues by device for multicast and link-local + packets + - bpf, array: fix heap out-of-bounds access when updating elements + - ipv6: add complete rcu protection around np->opt + - net/neighbour: fix crash at dumping device-agnostic proxy entries + - ipv6: sctp: implement sctp_v6_destroy_sock() + - openvswitch: fix hangup on vxlan/gre/geneve device deletion + - net_sched: fix qdisc_tree_decrease_qlen() races + - btrfs: fix resending received snapshot with parent (regression in 4.2) + - Btrfs: fix file corruption and data loss after cloning inline extents + - Btrfs: fix regression when running delayed references (regression in 4.2) + - Btrfs: fix race leading to incorrect item deletion when dropping extents + - Btrfs: fix race leading to BUG_ON when running delalloc for nodatacow + - Btrfs: fix race when listing an inode's xattrs + - rbd: don't put snap_context twice in rbd_queue_workfn() + - ext4 crypto: fix memory leak in ext4_bio_write_page() + - ext4 crypto: fix bugs in ext4_encrypted_zeroout() + - ext4: fix potential use after free in __ext4_journal_stop + (regression in 4.2) + - ext4, jbd2: ensure entering into panic after recording an error in + superblock + - nfsd: serialize state seqid morphing operations + - nfsd: eliminate sending duplicate and repeated delegations + - nfs4: start callback_ident at idr 1 + - nfs4: resend LAYOUTGET when there is a race that changes the seqid + - nfs: if we have no valid attrs, then don't declare the attribute cache + valid + - ocfs2: fix umask ignored issue + - block: fix segment split (regression in 4.3) + - ceph: fix message length computation + - Btrfs: fix regression running delayed references when using qgroups + (regression in 4.2) + + [ Ben Hutchings ] * net: add validation for the socket syscall protocol argument (CVE-2015-8543) * [armel/kirkwood] udeb: Override inclusion of gpio_keys in input-modules (fixes FTBFS) diff --git a/debian/patches/bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch b/debian/patches/bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch deleted file mode 100644 index 7cd84014d..000000000 --- a/debian/patches/bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch +++ /dev/null @@ -1,283 +0,0 @@ -From: Filipe Manana -Date: Fri, 16 Oct 2015 12:34:25 +0100 -Subject: Btrfs: fix truncation of compressed and inlined extents -Origin: https://git.kernel.org/linus/0305cd5f7fca85dae392b9ba85b116896eb7c1c7 - -When truncating a file to a smaller size which consists of an inline -extent that is compressed, we did not discard (or made unusable) the -data between the new file size and the old file size, wasting metadata -space and allowing for the truncated data to be leaked and the data -corruption/loss mentioned below. -We were also not correctly decrementing the number of bytes used by the -inode, we were setting it to zero, giving a wrong report for callers of -the stat(2) syscall. The fsck tool also reported an error about a mismatch -between the nbytes of the file versus the real space used by the file. - -Now because we weren't discarding the truncated region of the file, it -was possible for a caller of the clone ioctl to actually read the data -that was truncated, allowing for a security breach without requiring root -access to the system, using only standard filesystem operations. The -scenario is the following: - - 1) User A creates a file which consists of an inline and compressed - extent with a size of 2000 bytes - the file is not accessible to - any other users (no read, write or execution permission for anyone - else); - - 2) The user truncates the file to a size of 1000 bytes; - - 3) User A makes the file world readable; - - 4) User B creates a file consisting of an inline extent of 2000 bytes; - - 5) User B issues a clone operation from user A's file into its own - file (using a length argument of 0, clone the whole range); - - 6) User B now gets to see the 1000 bytes that user A truncated from - its file before it made its file world readbale. User B also lost - the bytes in the range [1000, 2000[ bytes from its own file, but - that might be ok if his/her intention was reading stale data from - user A that was never supposed to be public. - -Note that this contrasts with the case where we truncate a file from 2000 -bytes to 1000 bytes and then truncate it back from 1000 to 2000 bytes. In -this case reading any byte from the range [1000, 2000[ will return a value -of 0x00, instead of the original data. - -This problem exists since the clone ioctl was added and happens both with -and without my recent data loss and file corruption fixes for the clone -ioctl (patch "Btrfs: fix file corruption and data loss after cloning -inline extents"). - -So fix this by truncating the compressed inline extents as we do for the -non-compressed case, which involves decompressing, if the data isn't already -in the page cache, compressing the truncated version of the extent, writing -the compressed content into the inline extent and then truncate it. - -The following test case for fstests reproduces the problem. In order for -the test to pass both this fix and my previous fix for the clone ioctl -that forbids cloning a smaller inline extent into a larger one, -which is titled "Btrfs: fix file corruption and data loss after cloning -inline extents", are needed. Without that other fix the test fails in a -different way that does not leak the truncated data, instead part of -destination file gets replaced with zeroes (because the destination file -has a larger inline extent than the source). - - seq=`basename $0` - seqres=$RESULT_DIR/$seq - echo "QA output created by $seq" - tmp=/tmp/$$ - status=1 # failure is the default! - trap "_cleanup; exit \$status" 0 1 2 3 15 - - _cleanup() - { - rm -f $tmp.* - } - - # get standard environment, filters and checks - . ./common/rc - . ./common/filter - - # real QA test starts here - _need_to_be_root - _supported_fs btrfs - _supported_os Linux - _require_scratch - _require_cloner - - rm -f $seqres.full - - _scratch_mkfs >>$seqres.full 2>&1 - _scratch_mount "-o compress" - - # Create our test files. File foo is going to be the source of a clone operation - # and consists of a single inline extent with an uncompressed size of 512 bytes, - # while file bar consists of a single inline extent with an uncompressed size of - # 256 bytes. For our test's purpose, it's important that file bar has an inline - # extent with a size smaller than foo's inline extent. - $XFS_IO_PROG -f -c "pwrite -S 0xa1 0 128" \ - -c "pwrite -S 0x2a 128 384" \ - $SCRATCH_MNT/foo | _filter_xfs_io - $XFS_IO_PROG -f -c "pwrite -S 0xbb 0 256" $SCRATCH_MNT/bar | _filter_xfs_io - - # Now durably persist all metadata and data. We do this to make sure that we get - # on disk an inline extent with a size of 512 bytes for file foo. - sync - - # Now truncate our file foo to a smaller size. Because it consists of a - # compressed and inline extent, btrfs did not shrink the inline extent to the - # new size (if the extent was not compressed, btrfs would shrink it to 128 - # bytes), it only updates the inode's i_size to 128 bytes. - $XFS_IO_PROG -c "truncate 128" $SCRATCH_MNT/foo - - # Now clone foo's inline extent into bar. - # This clone operation should fail with errno EOPNOTSUPP because the source - # file consists only of an inline extent and the file's size is smaller than - # the inline extent of the destination (128 bytes < 256 bytes). However the - # clone ioctl was not prepared to deal with a file that has a size smaller - # than the size of its inline extent (something that happens only for compressed - # inline extents), resulting in copying the full inline extent from the source - # file into the destination file. - # - # Note that btrfs' clone operation for inline extents consists of removing the - # inline extent from the destination inode and copy the inline extent from the - # source inode into the destination inode, meaning that if the destination - # inode's inline extent is larger (N bytes) than the source inode's inline - # extent (M bytes), some bytes (N - M bytes) will be lost from the destination - # file. Btrfs could copy the source inline extent's data into the destination's - # inline extent so that we would not lose any data, but that's currently not - # done due to the complexity that would be needed to deal with such cases - # (specially when one or both extents are compressed), returning EOPNOTSUPP, as - # it's normally not a very common case to clone very small files (only case - # where we get inline extents) and copying inline extents does not save any - # space (unlike for normal, non-inlined extents). - $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/foo $SCRATCH_MNT/bar - - # Now because the above clone operation used to succeed, and due to foo's inline - # extent not being shinked by the truncate operation, our file bar got the whole - # inline extent copied from foo, making us lose the last 128 bytes from bar - # which got replaced by the bytes in range [128, 256[ from foo before foo was - # truncated - in other words, data loss from bar and being able to read old and - # stale data from foo that should not be possible to read anymore through normal - # filesystem operations. Contrast with the case where we truncate a file from a - # size N to a smaller size M, truncate it back to size N and then read the range - # [M, N[, we should always get the value 0x00 for all the bytes in that range. - - # We expected the clone operation to fail with errno EOPNOTSUPP and therefore - # not modify our file's bar data/metadata. So its content should be 256 bytes - # long with all bytes having the value 0xbb. - # - # Without the btrfs bug fix, the clone operation succeeded and resulted in - # leaking truncated data from foo, the bytes that belonged to its range - # [128, 256[, and losing data from bar in that same range. So reading the - # file gave us the following content: - # - # 0000000 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 - # * - # 0000200 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a - # * - # 0000400 - echo "File bar's content after the clone operation:" - od -t x1 $SCRATCH_MNT/bar - - # Also because the foo's inline extent was not shrunk by the truncate - # operation, btrfs' fsck, which is run by the fstests framework everytime a - # test completes, failed reporting the following error: - # - # root 5 inode 257 errors 400, nbytes wrong - - status=0 - exit - -Cc: stable@vger.kernel.org -Signed-off-by: Filipe Manana ---- - fs/btrfs/inode.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++---------- - 1 file changed, 68 insertions(+), 14 deletions(-) - ---- a/fs/btrfs/inode.c -+++ b/fs/btrfs/inode.c -@@ -4184,6 +4184,47 @@ static int truncate_space_check(struct b - - } - -+static int truncate_inline_extent(struct inode *inode, -+ struct btrfs_path *path, -+ struct btrfs_key *found_key, -+ const u64 item_end, -+ const u64 new_size) -+{ -+ struct extent_buffer *leaf = path->nodes[0]; -+ int slot = path->slots[0]; -+ struct btrfs_file_extent_item *fi; -+ u32 size = (u32)(new_size - found_key->offset); -+ struct btrfs_root *root = BTRFS_I(inode)->root; -+ -+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); -+ -+ if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) { -+ loff_t offset = new_size; -+ loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE); -+ -+ /* -+ * Zero out the remaining of the last page of our inline extent, -+ * instead of directly truncating our inline extent here - that -+ * would be much more complex (decompressing all the data, then -+ * compressing the truncated data, which might be bigger than -+ * the size of the inline extent, resize the extent, etc). -+ * We release the path because to get the page we might need to -+ * read the extent item from disk (data not in the page cache). -+ */ -+ btrfs_release_path(path); -+ return btrfs_truncate_page(inode, offset, page_end - offset, 0); -+ } -+ -+ btrfs_set_file_extent_ram_bytes(leaf, fi, size); -+ size = btrfs_file_extent_calc_inline_size(size); -+ btrfs_truncate_item(root, path, size, 1); -+ -+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) -+ inode_sub_bytes(inode, item_end + 1 - new_size); -+ -+ return 0; -+} -+ - /* - * this can truncate away extent items, csum items and directory items. - * It starts at a high offset and removes keys until it can't find -@@ -4378,27 +4419,40 @@ search_again: - * special encodings - */ - if (!del_item && -- btrfs_file_extent_compression(leaf, fi) == 0 && - btrfs_file_extent_encryption(leaf, fi) == 0 && - btrfs_file_extent_other_encoding(leaf, fi) == 0) { -- u32 size = new_size - found_key.offset; -- -- if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) -- inode_sub_bytes(inode, item_end + 1 - -- new_size); - - /* -- * update the ram bytes to properly reflect -- * the new size of our item -+ * Need to release path in order to truncate a -+ * compressed extent. So delete any accumulated -+ * extent items so far. - */ -- btrfs_set_file_extent_ram_bytes(leaf, fi, size); -- size = -- btrfs_file_extent_calc_inline_size(size); -- btrfs_truncate_item(root, path, size, 1); -+ if (btrfs_file_extent_compression(leaf, fi) != -+ BTRFS_COMPRESS_NONE && pending_del_nr) { -+ err = btrfs_del_items(trans, root, path, -+ pending_del_slot, -+ pending_del_nr); -+ if (err) { -+ btrfs_abort_transaction(trans, -+ root, -+ err); -+ goto error; -+ } -+ pending_del_nr = 0; -+ } -+ -+ err = truncate_inline_extent(inode, path, -+ &found_key, -+ item_end, -+ new_size); -+ if (err) { -+ btrfs_abort_transaction(trans, -+ root, err); -+ goto error; -+ } - } else if (test_bit(BTRFS_ROOT_REF_COWS, - &root->state)) { -- inode_sub_bytes(inode, item_end + 1 - -- found_key.offset); -+ inode_sub_bytes(inode, item_end + 1 - new_size); - } - } - delete: diff --git a/debian/patches/bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch b/debian/patches/bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch deleted file mode 100644 index 299242e21..000000000 --- a/debian/patches/bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch +++ /dev/null @@ -1,69 +0,0 @@ -From: Quentin Casasnovas -Subject: RDS: fix race condition when sending a message on unbound socket. -Date: Fri, 16 Oct 2015 17:11:42 +0200 -Origin: https://lkml.org/lkml/2015/10/16/530 - -Sasha's found a NULL pointer dereference in the RDS connection code when -sending a message to an apparently unbound socket. The problem is caused -by the code checking if the socket is bound in rds_sendmsg(), which checks -the rs_bound_addr field without taking a lock on the socket. This opens a -race where rs_bound_addr is temporarily set but where the transport is not -in rds_bind(), leading to a NULL pointer dereference when trying to -dereference 'trans' in __rds_conn_create(). - -Vegard wrote a reproducer for this issue, so kindly ask him to share if -you're interested. - -I cannot reproduce the NULL pointer dereference using Vegard's reproducer -with this patch, whereas I could without. - -Complete earlier incomplete fix to CVE-2015-6937: - - 74e98eb08588 ("RDS: verify the underlying transport exists before creating a connection") - -Signed-off-by: Quentin Casasnovas -Reviewed-by: Vegard Nossum -Reviewed-by: Sasha Levin -Cc: Vegard Nossum -Cc: Sasha Levin -Cc: Chien Yen -Cc: Santosh Shilimkar -Cc: David S. Miller -Cc: stable@vger.kernel.org ---- - net/rds/connection.c | 6 ------ - net/rds/send.c | 4 +++- - 2 files changed, 3 insertions(+), 7 deletions(-) - ---- a/net/rds/connection.c -+++ b/net/rds/connection.c -@@ -190,12 +190,6 @@ new_conn: - } - } - -- if (trans == NULL) { -- kmem_cache_free(rds_conn_slab, conn); -- conn = ERR_PTR(-ENODEV); -- goto out; -- } -- - conn->c_trans = trans; - - ret = trans->conn_alloc(conn, gfp); ---- a/net/rds/send.c -+++ b/net/rds/send.c -@@ -1009,11 +1009,13 @@ int rds_sendmsg(struct socket *sock, str - release_sock(sk); - } - -- /* racing with another thread binding seems ok here */ -+ lock_sock(sk); - if (daddr == 0 || rs->rs_bound_addr == 0) { -+ release_sock(sk); - ret = -ENOTCONN; /* XXX not a great errno */ - goto out; - } -+ release_sock(sk); - - if (payload_len > rds_sk_sndbuf(rs)) { - ret = -EMSGSIZE; diff --git a/debian/patches/bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch b/debian/patches/bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch deleted file mode 100644 index 6fb47c798..000000000 --- a/debian/patches/bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch +++ /dev/null @@ -1,325 +0,0 @@ -From: Rainer Weikusat -Date: Fri, 20 Nov 2015 22:07:23 +0000 -Subject: unix: avoid use-after-free in ep_remove_wait_queue -Origin: https://git.kernel.org/cgit/linux/kernel/git/davem/net.git//commit?id=7d267278a9ece963d77eefec61630223fce08c6c - -Rainer Weikusat writes: -An AF_UNIX datagram socket being the client in an n:1 association with -some server socket is only allowed to send messages to the server if the -receive queue of this socket contains at most sk_max_ack_backlog -datagrams. This implies that prospective writers might be forced to go -to sleep despite none of the message presently enqueued on the server -receive queue were sent by them. In order to ensure that these will be -woken up once space becomes again available, the present unix_dgram_poll -routine does a second sock_poll_wait call with the peer_wait wait queue -of the server socket as queue argument (unix_dgram_recvmsg does a wake -up on this queue after a datagram was received). This is inherently -problematic because the server socket is only guaranteed to remain alive -for as long as the client still holds a reference to it. In case the -connection is dissolved via connect or by the dead peer detection logic -in unix_dgram_sendmsg, the server socket may be freed despite "the -polling mechanism" (in particular, epoll) still has a pointer to the -corresponding peer_wait queue. There's no way to forcibly deregister a -wait queue with epoll. - -Based on an idea by Jason Baron, the patch below changes the code such -that a wait_queue_t belonging to the client socket is enqueued on the -peer_wait queue of the server whenever the peer receive queue full -condition is detected by either a sendmsg or a poll. A wake up on the -peer queue is then relayed to the ordinary wait queue of the client -socket via wake function. The connection to the peer wait queue is again -dissolved if either a wake up is about to be relayed or the client -socket reconnects or a dead peer is detected or the client socket is -itself closed. This enables removing the second sock_poll_wait from -unix_dgram_poll, thus avoiding the use-after-free, while still ensuring -that no blocked writer sleeps forever. - -Signed-off-by: Rainer Weikusat -Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets") -Reviewed-by: Jason Baron -Signed-off-by: David S. Miller -[bwh: Backported to 4.2: adjust context] ---- - include/net/af_unix.h | 1 + - net/unix/af_unix.c | 183 ++++++++++++++++++++++++++++++++++++++++++++------ - 2 files changed, 165 insertions(+), 19 deletions(-) - ---- a/include/net/af_unix.h -+++ b/include/net/af_unix.h -@@ -62,6 +62,7 @@ struct unix_sock { - #define UNIX_GC_CANDIDATE 0 - #define UNIX_GC_MAYBE_CYCLE 1 - struct socket_wq peer_wq; -+ wait_queue_t peer_wake; - }; - - static inline struct unix_sock *unix_sk(const struct sock *sk) ---- a/net/unix/af_unix.c -+++ b/net/unix/af_unix.c -@@ -326,6 +326,118 @@ found: - return s; - } - -+/* Support code for asymmetrically connected dgram sockets -+ * -+ * If a datagram socket is connected to a socket not itself connected -+ * to the first socket (eg, /dev/log), clients may only enqueue more -+ * messages if the present receive queue of the server socket is not -+ * "too large". This means there's a second writeability condition -+ * poll and sendmsg need to test. The dgram recv code will do a wake -+ * up on the peer_wait wait queue of a socket upon reception of a -+ * datagram which needs to be propagated to sleeping would-be writers -+ * since these might not have sent anything so far. This can't be -+ * accomplished via poll_wait because the lifetime of the server -+ * socket might be less than that of its clients if these break their -+ * association with it or if the server socket is closed while clients -+ * are still connected to it and there's no way to inform "a polling -+ * implementation" that it should let go of a certain wait queue -+ * -+ * In order to propagate a wake up, a wait_queue_t of the client -+ * socket is enqueued on the peer_wait queue of the server socket -+ * whose wake function does a wake_up on the ordinary client socket -+ * wait queue. This connection is established whenever a write (or -+ * poll for write) hit the flow control condition and broken when the -+ * association to the server socket is dissolved or after a wake up -+ * was relayed. -+ */ -+ -+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags, -+ void *key) -+{ -+ struct unix_sock *u; -+ wait_queue_head_t *u_sleep; -+ -+ u = container_of(q, struct unix_sock, peer_wake); -+ -+ __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, -+ q); -+ u->peer_wake.private = NULL; -+ -+ /* relaying can only happen while the wq still exists */ -+ u_sleep = sk_sleep(&u->sk); -+ if (u_sleep) -+ wake_up_interruptible_poll(u_sleep, key); -+ -+ return 0; -+} -+ -+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) -+{ -+ struct unix_sock *u, *u_other; -+ int rc; -+ -+ u = unix_sk(sk); -+ u_other = unix_sk(other); -+ rc = 0; -+ spin_lock(&u_other->peer_wait.lock); -+ -+ if (!u->peer_wake.private) { -+ u->peer_wake.private = other; -+ __add_wait_queue(&u_other->peer_wait, &u->peer_wake); -+ -+ rc = 1; -+ } -+ -+ spin_unlock(&u_other->peer_wait.lock); -+ return rc; -+} -+ -+static void unix_dgram_peer_wake_disconnect(struct sock *sk, -+ struct sock *other) -+{ -+ struct unix_sock *u, *u_other; -+ -+ u = unix_sk(sk); -+ u_other = unix_sk(other); -+ spin_lock(&u_other->peer_wait.lock); -+ -+ if (u->peer_wake.private == other) { -+ __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); -+ u->peer_wake.private = NULL; -+ } -+ -+ spin_unlock(&u_other->peer_wait.lock); -+} -+ -+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, -+ struct sock *other) -+{ -+ unix_dgram_peer_wake_disconnect(sk, other); -+ wake_up_interruptible_poll(sk_sleep(sk), -+ POLLOUT | -+ POLLWRNORM | -+ POLLWRBAND); -+} -+ -+/* preconditions: -+ * - unix_peer(sk) == other -+ * - association is stable -+ */ -+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) -+{ -+ int connected; -+ -+ connected = unix_dgram_peer_wake_connect(sk, other); -+ -+ if (unix_recvq_full(other)) -+ return 1; -+ -+ if (connected) -+ unix_dgram_peer_wake_disconnect(sk, other); -+ -+ return 0; -+} -+ - static inline int unix_writable(struct sock *sk) - { - return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; -@@ -430,6 +542,8 @@ static void unix_release_sock(struct soc - skpair->sk_state_change(skpair); - sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); - } -+ -+ unix_dgram_peer_wake_disconnect(sk, skpair); - sock_put(skpair); /* It may now die */ - unix_peer(sk) = NULL; - } -@@ -664,6 +778,7 @@ static struct sock *unix_create1(struct - INIT_LIST_HEAD(&u->link); - mutex_init(&u->readlock); /* single task reading lock */ - init_waitqueue_head(&u->peer_wait); -+ init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); - unix_insert_socket(unix_sockets_unbound(sk), sk); - out: - if (sk == NULL) -@@ -1031,6 +1146,8 @@ restart: - if (unix_peer(sk)) { - struct sock *old_peer = unix_peer(sk); - unix_peer(sk) = other; -+ unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); -+ - unix_state_double_unlock(sk, other); - - if (other != old_peer) -@@ -1470,6 +1587,7 @@ static int unix_dgram_sendmsg(struct soc - struct scm_cookie scm; - int max_level; - int data_len = 0; -+ int sk_locked; - - wait_for_unix_gc(); - err = scm_send(sock, msg, &scm, false); -@@ -1548,12 +1666,14 @@ restart: - goto out_free; - } - -+ sk_locked = 0; - unix_state_lock(other); -+restart_locked: - err = -EPERM; - if (!unix_may_send(sk, other)) - goto out_unlock; - -- if (sock_flag(other, SOCK_DEAD)) { -+ if (unlikely(sock_flag(other, SOCK_DEAD))) { - /* - * Check with 1003.1g - what should - * datagram error -@@ -1561,10 +1681,14 @@ restart: - unix_state_unlock(other); - sock_put(other); - -+ if (!sk_locked) -+ unix_state_lock(sk); -+ - err = 0; -- unix_state_lock(sk); - if (unix_peer(sk) == other) { - unix_peer(sk) = NULL; -+ unix_dgram_peer_wake_disconnect_wakeup(sk, other); -+ - unix_state_unlock(sk); - - unix_dgram_disconnected(sk, other); -@@ -1590,21 +1714,38 @@ restart: - goto out_unlock; - } - -- if (unix_peer(other) != sk && unix_recvq_full(other)) { -- if (!timeo) { -- err = -EAGAIN; -- goto out_unlock; -+ if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { -+ if (timeo) { -+ timeo = unix_wait_for_peer(other, timeo); -+ -+ err = sock_intr_errno(timeo); -+ if (signal_pending(current)) -+ goto out_free; -+ -+ goto restart; - } - -- timeo = unix_wait_for_peer(other, timeo); -+ if (!sk_locked) { -+ unix_state_unlock(other); -+ unix_state_double_lock(sk, other); -+ } - -- err = sock_intr_errno(timeo); -- if (signal_pending(current)) -- goto out_free; -+ if (unix_peer(sk) != other || -+ unix_dgram_peer_wake_me(sk, other)) { -+ err = -EAGAIN; -+ sk_locked = 1; -+ goto out_unlock; -+ } - -- goto restart; -+ if (!sk_locked) { -+ sk_locked = 1; -+ goto restart_locked; -+ } - } - -+ if (unlikely(sk_locked)) -+ unix_state_unlock(sk); -+ - if (sock_flag(other, SOCK_RCVTSTAMP)) - __net_timestamp(skb); - maybe_add_creds(skb, sock, other); -@@ -1618,6 +1759,8 @@ restart: - return len; - - out_unlock: -+ if (sk_locked) -+ unix_state_unlock(sk); - unix_state_unlock(other); - out_free: - kfree_skb(skb); -@@ -2453,14 +2596,16 @@ static unsigned int unix_dgram_poll(stru - return mask; - - writable = unix_writable(sk); -- other = unix_peer_get(sk); -- if (other) { -- if (unix_peer(other) != sk) { -- sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); -- if (unix_recvq_full(other)) -- writable = 0; -- } -- sock_put(other); -+ if (writable) { -+ unix_state_lock(sk); -+ -+ other = unix_peer(sk); -+ if (other && unix_peer(other) != sk && -+ unix_recvq_full(other) && -+ unix_dgram_peer_wake_me(sk, other)) -+ writable = 0; -+ -+ unix_state_unlock(sk); - } - - if (writable) diff --git a/debian/patches/series b/debian/patches/series index 631542793..c45841638 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -85,16 +85,13 @@ bugfix/all/selftests-kprobe-choose-an-always-defined-function-t.patch bugfix/all/selftests-make-scripts-executable.patch bugfix/all/selftests-vm-try-harder-to-allocate-huge-pages.patch bugfix/all/selftests-breakpoints-actually-build-it.patch -bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch bugfix/all/media-media-vivid-osd-fix-info-leak-in-ioctl.patch bugfix/x86/kvm-svm-unconditionally-intercept-DB.patch bugfix/x86/kvm-x86-rename-update_db_bp_intercept-to-update_bp_i.patch bugfix/all/usbvision-fix-overflow-of-interfaces-array.patch bugfix/all/media-usbvision-fix-crash-on-detecting-device-with-i.patch -bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch bugfix/all/isdn_ppp-add-checks-for-allocation-failure-in-isdn_p.patch bugfix/all/ppp-slip-validate-vj-compression-slot-parameters-com.patch -bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch bugfix/x86/drm-i915-shut-up-gen8-sde-irq-dmesg-noise.patch bugfix/arm/arm-dts-kirkwood-fix-qnap-ts219-power-off.patch bugfix/x86/drm-i915-mark-uneven-memory-banks-on-gen4-desktop-as.patch