[linux] 03/04: Update to 4.3.3

debian-kernel at lists.debian.org debian-kernel at lists.debian.org
Tue Dec 15 17:45:21 UTC 2015


This is an automated email from the git hooks/post-receive script.

benh pushed a commit to branch sid
in repository linux.

commit c4e89babe4aef33abcd443d784bd6535d02401dd
Author: Ben Hutchings <ben at decadent.org.uk>
Date:   Tue Dec 15 17:40:55 2015 +0000

    Update to 4.3.3
    
    Drop 3 security fixes that were included in it.
---
 debian/changelog                                   |  71 ++++-
 ...runcation-of-compressed-and-inlined-exten.patch | 283 ------------------
 ...-when-sending-a-message-on-unbound-socket.patch |  69 -----
 ...id-use-after-free-in-ep_remove_wait_queue.patch | 325 ---------------------
 debian/patches/series                              |   3 -
 5 files changed, 70 insertions(+), 681 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index 8291d33..3b46b76 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,74 @@
-linux (4.3.1-2) UNRELEASED; urgency=medium
+linux (4.3.3-1) UNRELEASED; urgency=medium
 
+  * New upstream stable update:
+    https://www.kernel.org/pub/linux/kernel/v4.x/ChangeLog-4.3.2
+    - X.509: Fix the time validation [ver #2]
+    https://www.kernel.org/pub/linux/kernel/v4.x/ChangeLog-4.3.3
+    - r8169: fix kasan reported skb use-after-free. (regression in 4.3)
+    - af-unix: fix use-after-free with concurrent readers while splicing
+      (regression in 4.2)
+    - af_unix: don't append consumed skbs to sk_receive_queue
+      (regression in 4.2)
+    - af_unix: take receive queue lock while appending new skb
+      (regression in 4.2)
+    - af-unix: passcred support for sendpage (regression in 4.2)
+    - ipv6: Avoid creating RTF_CACHE from a rt that is not managed by fib6 tree
+      (regression in 4.2)
+    - ipv6: Check expire on DST_NOCACHE route
+    - ipv6: Check rt->dst.from for the DST_NOCACHE route (regression in 4.3)
+    - Revert "ipv6: ndisc: inherit metadata dst when creating ndisc requests"
+      (regression in 4.3)
+    - packet: only allow extra vlan len on ethernet devices
+    - packet: infer protocol from ethernet header if unset
+    - packet: fix tpacket_snd max frame len
+    - sctp: translate host order to network order when setting a hmacid
+    - net/mlx5e: Added self loopback prevention (regression in 4.3)
+    - net/mlx4_core: Fix sleeping while holding spinlock at rem_slave_counters
+      (regression in 4.2)
+    - ip_tunnel: disable preemption when updating per-cpu tstats
+    - net/ip6_tunnel: fix dst leak (regression in 4.3)
+    - tcp: disable Fast Open on timeouts after handshake
+    - tcp: fix potential huge kmalloc() calls in TCP_REPAIR
+    - tcp: initialize tp->copied_seq in case of cross SYN connection
+    - net, scm: fix PaX detected msg_controllen overflow in scm_detach_fds
+    - net: ipmr: fix static mfc/dev leaks on table destruction
+    - net: ip6mr: fix static mfc/dev leaks on table destruction
+    - vrf: fix double free and memory corruption on register_netdevice failure
+    - tipc: fix error handling of expanding buffer headroom (regression in 4.3)
+    - ipv6: distinguish frag queues by device for multicast and link-local
+      packets
+    - bpf, array: fix heap out-of-bounds access when updating elements
+    - ipv6: add complete rcu protection around np->opt
+    - net/neighbour: fix crash at dumping device-agnostic proxy entries
+    - ipv6: sctp: implement sctp_v6_destroy_sock()
+    - openvswitch: fix hangup on vxlan/gre/geneve device deletion
+    - net_sched: fix qdisc_tree_decrease_qlen() races
+    - btrfs: fix resending received snapshot with parent (regression in 4.2)
+    - Btrfs: fix file corruption and data loss after cloning inline extents
+    - Btrfs: fix regression when running delayed references (regression in 4.2)
+    - Btrfs: fix race leading to incorrect item deletion when dropping extents
+    - Btrfs: fix race leading to BUG_ON when running delalloc for nodatacow
+    - Btrfs: fix race when listing an inode's xattrs
+    - rbd: don't put snap_context twice in rbd_queue_workfn()
+    - ext4 crypto: fix memory leak in ext4_bio_write_page()
+    - ext4 crypto: fix bugs in ext4_encrypted_zeroout()
+    - ext4: fix potential use after free in __ext4_journal_stop
+      (regression in 4.2)
+    - ext4, jbd2: ensure entering into panic after recording an error in
+      superblock
+    - nfsd: serialize state seqid morphing operations
+    - nfsd: eliminate sending duplicate and repeated delegations
+    - nfs4: start callback_ident at idr 1
+    - nfs4: resend LAYOUTGET when there is a race that changes the seqid
+    - nfs: if we have no valid attrs, then don't declare the attribute cache
+      valid
+    - ocfs2: fix umask ignored issue
+    - block: fix segment split (regression in 4.3)
+    - ceph: fix message length computation
+    - Btrfs: fix regression running delayed references when using qgroups
+      (regression in 4.2)
+
+  [ Ben Hutchings ]
   * net: add validation for the socket syscall protocol argument (CVE-2015-8543)
   * [armel/kirkwood] udeb: Override inclusion of gpio_keys in input-modules
     (fixes FTBFS)
diff --git a/debian/patches/bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch b/debian/patches/bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch
deleted file mode 100644
index 7cd8401..0000000
--- a/debian/patches/bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch
+++ /dev/null
@@ -1,283 +0,0 @@
-From: Filipe Manana <fdmanana at suse.com>
-Date: Fri, 16 Oct 2015 12:34:25 +0100
-Subject: Btrfs: fix truncation of compressed and inlined extents
-Origin: https://git.kernel.org/linus/0305cd5f7fca85dae392b9ba85b116896eb7c1c7
-
-When truncating a file to a smaller size which consists of an inline
-extent that is compressed, we did not discard (or made unusable) the
-data between the new file size and the old file size, wasting metadata
-space and allowing for the truncated data to be leaked and the data
-corruption/loss mentioned below.
-We were also not correctly decrementing the number of bytes used by the
-inode, we were setting it to zero, giving a wrong report for callers of
-the stat(2) syscall. The fsck tool also reported an error about a mismatch
-between the nbytes of the file versus the real space used by the file.
-
-Now because we weren't discarding the truncated region of the file, it
-was possible for a caller of the clone ioctl to actually read the data
-that was truncated, allowing for a security breach without requiring root
-access to the system, using only standard filesystem operations. The
-scenario is the following:
-
-   1) User A creates a file which consists of an inline and compressed
-      extent with a size of 2000 bytes - the file is not accessible to
-      any other users (no read, write or execution permission for anyone
-      else);
-
-   2) The user truncates the file to a size of 1000 bytes;
-
-   3) User A makes the file world readable;
-
-   4) User B creates a file consisting of an inline extent of 2000 bytes;
-
-   5) User B issues a clone operation from user A's file into its own
-      file (using a length argument of 0, clone the whole range);
-
-   6) User B now gets to see the 1000 bytes that user A truncated from
-      its file before it made its file world readbale. User B also lost
-      the bytes in the range [1000, 2000[ bytes from its own file, but
-      that might be ok if his/her intention was reading stale data from
-      user A that was never supposed to be public.
-
-Note that this contrasts with the case where we truncate a file from 2000
-bytes to 1000 bytes and then truncate it back from 1000 to 2000 bytes. In
-this case reading any byte from the range [1000, 2000[ will return a value
-of 0x00, instead of the original data.
-
-This problem exists since the clone ioctl was added and happens both with
-and without my recent data loss and file corruption fixes for the clone
-ioctl (patch "Btrfs: fix file corruption and data loss after cloning
-inline extents").
-
-So fix this by truncating the compressed inline extents as we do for the
-non-compressed case, which involves decompressing, if the data isn't already
-in the page cache, compressing the truncated version of the extent, writing
-the compressed content into the inline extent and then truncate it.
-
-The following test case for fstests reproduces the problem. In order for
-the test to pass both this fix and my previous fix for the clone ioctl
-that forbids cloning a smaller inline extent into a larger one,
-which is titled "Btrfs: fix file corruption and data loss after cloning
-inline extents", are needed. Without that other fix the test fails in a
-different way that does not leak the truncated data, instead part of
-destination file gets replaced with zeroes (because the destination file
-has a larger inline extent than the source).
-
-  seq=`basename $0`
-  seqres=$RESULT_DIR/$seq
-  echo "QA output created by $seq"
-  tmp=/tmp/$$
-  status=1	# failure is the default!
-  trap "_cleanup; exit \$status" 0 1 2 3 15
-
-  _cleanup()
-  {
-      rm -f $tmp.*
-  }
-
-  # get standard environment, filters and checks
-  . ./common/rc
-  . ./common/filter
-
-  # real QA test starts here
-  _need_to_be_root
-  _supported_fs btrfs
-  _supported_os Linux
-  _require_scratch
-  _require_cloner
-
-  rm -f $seqres.full
-
-  _scratch_mkfs >>$seqres.full 2>&1
-  _scratch_mount "-o compress"
-
-  # Create our test files. File foo is going to be the source of a clone operation
-  # and consists of a single inline extent with an uncompressed size of 512 bytes,
-  # while file bar consists of a single inline extent with an uncompressed size of
-  # 256 bytes. For our test's purpose, it's important that file bar has an inline
-  # extent with a size smaller than foo's inline extent.
-  $XFS_IO_PROG -f -c "pwrite -S 0xa1 0 128"   \
-          -c "pwrite -S 0x2a 128 384" \
-          $SCRATCH_MNT/foo | _filter_xfs_io
-  $XFS_IO_PROG -f -c "pwrite -S 0xbb 0 256" $SCRATCH_MNT/bar | _filter_xfs_io
-
-  # Now durably persist all metadata and data. We do this to make sure that we get
-  # on disk an inline extent with a size of 512 bytes for file foo.
-  sync
-
-  # Now truncate our file foo to a smaller size. Because it consists of a
-  # compressed and inline extent, btrfs did not shrink the inline extent to the
-  # new size (if the extent was not compressed, btrfs would shrink it to 128
-  # bytes), it only updates the inode's i_size to 128 bytes.
-  $XFS_IO_PROG -c "truncate 128" $SCRATCH_MNT/foo
-
-  # Now clone foo's inline extent into bar.
-  # This clone operation should fail with errno EOPNOTSUPP because the source
-  # file consists only of an inline extent and the file's size is smaller than
-  # the inline extent of the destination (128 bytes < 256 bytes). However the
-  # clone ioctl was not prepared to deal with a file that has a size smaller
-  # than the size of its inline extent (something that happens only for compressed
-  # inline extents), resulting in copying the full inline extent from the source
-  # file into the destination file.
-  #
-  # Note that btrfs' clone operation for inline extents consists of removing the
-  # inline extent from the destination inode and copy the inline extent from the
-  # source inode into the destination inode, meaning that if the destination
-  # inode's inline extent is larger (N bytes) than the source inode's inline
-  # extent (M bytes), some bytes (N - M bytes) will be lost from the destination
-  # file. Btrfs could copy the source inline extent's data into the destination's
-  # inline extent so that we would not lose any data, but that's currently not
-  # done due to the complexity that would be needed to deal with such cases
-  # (specially when one or both extents are compressed), returning EOPNOTSUPP, as
-  # it's normally not a very common case to clone very small files (only case
-  # where we get inline extents) and copying inline extents does not save any
-  # space (unlike for normal, non-inlined extents).
-  $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/foo $SCRATCH_MNT/bar
-
-  # Now because the above clone operation used to succeed, and due to foo's inline
-  # extent not being shinked by the truncate operation, our file bar got the whole
-  # inline extent copied from foo, making us lose the last 128 bytes from bar
-  # which got replaced by the bytes in range [128, 256[ from foo before foo was
-  # truncated - in other words, data loss from bar and being able to read old and
-  # stale data from foo that should not be possible to read anymore through normal
-  # filesystem operations. Contrast with the case where we truncate a file from a
-  # size N to a smaller size M, truncate it back to size N and then read the range
-  # [M, N[, we should always get the value 0x00 for all the bytes in that range.
-
-  # We expected the clone operation to fail with errno EOPNOTSUPP and therefore
-  # not modify our file's bar data/metadata. So its content should be 256 bytes
-  # long with all bytes having the value 0xbb.
-  #
-  # Without the btrfs bug fix, the clone operation succeeded and resulted in
-  # leaking truncated data from foo, the bytes that belonged to its range
-  # [128, 256[, and losing data from bar in that same range. So reading the
-  # file gave us the following content:
-  #
-  # 0000000 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1
-  # *
-  # 0000200 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a
-  # *
-  # 0000400
-  echo "File bar's content after the clone operation:"
-  od -t x1 $SCRATCH_MNT/bar
-
-  # Also because the foo's inline extent was not shrunk by the truncate
-  # operation, btrfs' fsck, which is run by the fstests framework everytime a
-  # test completes, failed reporting the following error:
-  #
-  #  root 5 inode 257 errors 400, nbytes wrong
-
-  status=0
-  exit
-
-Cc: stable at vger.kernel.org
-Signed-off-by: Filipe Manana <fdmanana at suse.com>
----
- fs/btrfs/inode.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++----------
- 1 file changed, 68 insertions(+), 14 deletions(-)
-
---- a/fs/btrfs/inode.c
-+++ b/fs/btrfs/inode.c
-@@ -4184,6 +4184,47 @@ static int truncate_space_check(struct b
- 
- }
- 
-+static int truncate_inline_extent(struct inode *inode,
-+				  struct btrfs_path *path,
-+				  struct btrfs_key *found_key,
-+				  const u64 item_end,
-+				  const u64 new_size)
-+{
-+	struct extent_buffer *leaf = path->nodes[0];
-+	int slot = path->slots[0];
-+	struct btrfs_file_extent_item *fi;
-+	u32 size = (u32)(new_size - found_key->offset);
-+	struct btrfs_root *root = BTRFS_I(inode)->root;
-+
-+	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
-+
-+	if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
-+		loff_t offset = new_size;
-+		loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
-+
-+		/*
-+		 * Zero out the remaining of the last page of our inline extent,
-+		 * instead of directly truncating our inline extent here - that
-+		 * would be much more complex (decompressing all the data, then
-+		 * compressing the truncated data, which might be bigger than
-+		 * the size of the inline extent, resize the extent, etc).
-+		 * We release the path because to get the page we might need to
-+		 * read the extent item from disk (data not in the page cache).
-+		 */
-+		btrfs_release_path(path);
-+		return btrfs_truncate_page(inode, offset, page_end - offset, 0);
-+	}
-+
-+	btrfs_set_file_extent_ram_bytes(leaf, fi, size);
-+	size = btrfs_file_extent_calc_inline_size(size);
-+	btrfs_truncate_item(root, path, size, 1);
-+
-+	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
-+		inode_sub_bytes(inode, item_end + 1 - new_size);
-+
-+	return 0;
-+}
-+
- /*
-  * this can truncate away extent items, csum items and directory items.
-  * It starts at a high offset and removes keys until it can't find
-@@ -4378,27 +4419,40 @@ search_again:
- 			 * special encodings
- 			 */
- 			if (!del_item &&
--			    btrfs_file_extent_compression(leaf, fi) == 0 &&
- 			    btrfs_file_extent_encryption(leaf, fi) == 0 &&
- 			    btrfs_file_extent_other_encoding(leaf, fi) == 0) {
--				u32 size = new_size - found_key.offset;
--
--				if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
--					inode_sub_bytes(inode, item_end + 1 -
--							new_size);
- 
- 				/*
--				 * update the ram bytes to properly reflect
--				 * the new size of our item
-+				 * Need to release path in order to truncate a
-+				 * compressed extent. So delete any accumulated
-+				 * extent items so far.
- 				 */
--				btrfs_set_file_extent_ram_bytes(leaf, fi, size);
--				size =
--				    btrfs_file_extent_calc_inline_size(size);
--				btrfs_truncate_item(root, path, size, 1);
-+				if (btrfs_file_extent_compression(leaf, fi) !=
-+				    BTRFS_COMPRESS_NONE && pending_del_nr) {
-+					err = btrfs_del_items(trans, root, path,
-+							      pending_del_slot,
-+							      pending_del_nr);
-+					if (err) {
-+						btrfs_abort_transaction(trans,
-+									root,
-+									err);
-+						goto error;
-+					}
-+					pending_del_nr = 0;
-+				}
-+
-+				err = truncate_inline_extent(inode, path,
-+							     &found_key,
-+							     item_end,
-+							     new_size);
-+				if (err) {
-+					btrfs_abort_transaction(trans,
-+								root, err);
-+					goto error;
-+				}
- 			} else if (test_bit(BTRFS_ROOT_REF_COWS,
- 					    &root->state)) {
--				inode_sub_bytes(inode, item_end + 1 -
--						found_key.offset);
-+				inode_sub_bytes(inode, item_end + 1 - new_size);
- 			}
- 		}
- delete:
diff --git a/debian/patches/bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch b/debian/patches/bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch
deleted file mode 100644
index 299242e..0000000
--- a/debian/patches/bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-From: Quentin Casasnovas <quentin.casasnovas at oracle.com>
-Subject: RDS: fix race condition when sending a message on unbound socket.
-Date: Fri, 16 Oct 2015 17:11:42 +0200
-Origin: https://lkml.org/lkml/2015/10/16/530
-
-Sasha's found a NULL pointer dereference in the RDS connection code when
-sending a message to an apparently unbound socket.  The problem is caused
-by the code checking if the socket is bound in rds_sendmsg(), which checks
-the rs_bound_addr field without taking a lock on the socket.  This opens a
-race where rs_bound_addr is temporarily set but where the transport is not
-in rds_bind(), leading to a NULL pointer dereference when trying to
-dereference 'trans' in __rds_conn_create().
-
-Vegard wrote a reproducer for this issue, so kindly ask him to share if
-you're interested.
-
-I cannot reproduce the NULL pointer dereference using Vegard's reproducer
-with this patch, whereas I could without.
-
-Complete earlier incomplete fix to CVE-2015-6937:
-
-  74e98eb08588 ("RDS: verify the underlying transport exists before creating a connection")
-
-Signed-off-by: Quentin Casasnovas <quentin.casasnovas at oracle.com>
-Reviewed-by: Vegard Nossum <vegard.nossum at oracle.com>
-Reviewed-by: Sasha Levin <sasha.levin at oracle.com>
-Cc: Vegard Nossum <vegard.nossum at oracle.com>
-Cc: Sasha Levin <sasha.levin at oracle.com>
-Cc: Chien Yen <chien.yen at oracle.com>
-Cc: Santosh Shilimkar <santosh.shilimkar at oracle.com>
-Cc: David S. Miller <davem at davemloft.net>
-Cc: stable at vger.kernel.org
----
- net/rds/connection.c | 6 ------
- net/rds/send.c       | 4 +++-
- 2 files changed, 3 insertions(+), 7 deletions(-)
-
---- a/net/rds/connection.c
-+++ b/net/rds/connection.c
-@@ -190,12 +190,6 @@ new_conn:
- 		}
- 	}
- 
--	if (trans == NULL) {
--		kmem_cache_free(rds_conn_slab, conn);
--		conn = ERR_PTR(-ENODEV);
--		goto out;
--	}
--
- 	conn->c_trans = trans;
- 
- 	ret = trans->conn_alloc(conn, gfp);
---- a/net/rds/send.c
-+++ b/net/rds/send.c
-@@ -1009,11 +1009,13 @@ int rds_sendmsg(struct socket *sock, str
- 		release_sock(sk);
- 	}
- 
--	/* racing with another thread binding seems ok here */
-+	lock_sock(sk);
- 	if (daddr == 0 || rs->rs_bound_addr == 0) {
-+		release_sock(sk);
- 		ret = -ENOTCONN; /* XXX not a great errno */
- 		goto out;
- 	}
-+	release_sock(sk);
- 
- 	if (payload_len > rds_sk_sndbuf(rs)) {
- 		ret = -EMSGSIZE;
diff --git a/debian/patches/bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch b/debian/patches/bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch
deleted file mode 100644
index 6fb47c7..0000000
--- a/debian/patches/bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch
+++ /dev/null
@@ -1,325 +0,0 @@
-From: Rainer Weikusat <rweikusat at mobileactivedefense.com>
-Date: Fri, 20 Nov 2015 22:07:23 +0000
-Subject: unix: avoid use-after-free in ep_remove_wait_queue
-Origin: https://git.kernel.org/cgit/linux/kernel/git/davem/net.git//commit?id=7d267278a9ece963d77eefec61630223fce08c6c
-
-Rainer Weikusat <rweikusat at mobileactivedefense.com> writes:
-An AF_UNIX datagram socket being the client in an n:1 association with
-some server socket is only allowed to send messages to the server if the
-receive queue of this socket contains at most sk_max_ack_backlog
-datagrams. This implies that prospective writers might be forced to go
-to sleep despite none of the message presently enqueued on the server
-receive queue were sent by them. In order to ensure that these will be
-woken up once space becomes again available, the present unix_dgram_poll
-routine does a second sock_poll_wait call with the peer_wait wait queue
-of the server socket as queue argument (unix_dgram_recvmsg does a wake
-up on this queue after a datagram was received). This is inherently
-problematic because the server socket is only guaranteed to remain alive
-for as long as the client still holds a reference to it. In case the
-connection is dissolved via connect or by the dead peer detection logic
-in unix_dgram_sendmsg, the server socket may be freed despite "the
-polling mechanism" (in particular, epoll) still has a pointer to the
-corresponding peer_wait queue. There's no way to forcibly deregister a
-wait queue with epoll.
-
-Based on an idea by Jason Baron, the patch below changes the code such
-that a wait_queue_t belonging to the client socket is enqueued on the
-peer_wait queue of the server whenever the peer receive queue full
-condition is detected by either a sendmsg or a poll. A wake up on the
-peer queue is then relayed to the ordinary wait queue of the client
-socket via wake function. The connection to the peer wait queue is again
-dissolved if either a wake up is about to be relayed or the client
-socket reconnects or a dead peer is detected or the client socket is
-itself closed. This enables removing the second sock_poll_wait from
-unix_dgram_poll, thus avoiding the use-after-free, while still ensuring
-that no blocked writer sleeps forever.
-
-Signed-off-by: Rainer Weikusat <rweikusat at mobileactivedefense.com>
-Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets")
-Reviewed-by: Jason Baron <jbaron at akamai.com>
-Signed-off-by: David S. Miller <davem at davemloft.net>
-[bwh: Backported to 4.2: adjust context]
----
- include/net/af_unix.h |   1 +
- net/unix/af_unix.c    | 183 ++++++++++++++++++++++++++++++++++++++++++++------
- 2 files changed, 165 insertions(+), 19 deletions(-)
-
---- a/include/net/af_unix.h
-+++ b/include/net/af_unix.h
-@@ -62,6 +62,7 @@ struct unix_sock {
- #define UNIX_GC_CANDIDATE	0
- #define UNIX_GC_MAYBE_CYCLE	1
- 	struct socket_wq	peer_wq;
-+	wait_queue_t		peer_wake;
- };
- 
- static inline struct unix_sock *unix_sk(const struct sock *sk)
---- a/net/unix/af_unix.c
-+++ b/net/unix/af_unix.c
-@@ -326,6 +326,118 @@ found:
- 	return s;
- }
- 
-+/* Support code for asymmetrically connected dgram sockets
-+ *
-+ * If a datagram socket is connected to a socket not itself connected
-+ * to the first socket (eg, /dev/log), clients may only enqueue more
-+ * messages if the present receive queue of the server socket is not
-+ * "too large". This means there's a second writeability condition
-+ * poll and sendmsg need to test. The dgram recv code will do a wake
-+ * up on the peer_wait wait queue of a socket upon reception of a
-+ * datagram which needs to be propagated to sleeping would-be writers
-+ * since these might not have sent anything so far. This can't be
-+ * accomplished via poll_wait because the lifetime of the server
-+ * socket might be less than that of its clients if these break their
-+ * association with it or if the server socket is closed while clients
-+ * are still connected to it and there's no way to inform "a polling
-+ * implementation" that it should let go of a certain wait queue
-+ *
-+ * In order to propagate a wake up, a wait_queue_t of the client
-+ * socket is enqueued on the peer_wait queue of the server socket
-+ * whose wake function does a wake_up on the ordinary client socket
-+ * wait queue. This connection is established whenever a write (or
-+ * poll for write) hit the flow control condition and broken when the
-+ * association to the server socket is dissolved or after a wake up
-+ * was relayed.
-+ */
-+
-+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
-+				      void *key)
-+{
-+	struct unix_sock *u;
-+	wait_queue_head_t *u_sleep;
-+
-+	u = container_of(q, struct unix_sock, peer_wake);
-+
-+	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
-+			    q);
-+	u->peer_wake.private = NULL;
-+
-+	/* relaying can only happen while the wq still exists */
-+	u_sleep = sk_sleep(&u->sk);
-+	if (u_sleep)
-+		wake_up_interruptible_poll(u_sleep, key);
-+
-+	return 0;
-+}
-+
-+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
-+{
-+	struct unix_sock *u, *u_other;
-+	int rc;
-+
-+	u = unix_sk(sk);
-+	u_other = unix_sk(other);
-+	rc = 0;
-+	spin_lock(&u_other->peer_wait.lock);
-+
-+	if (!u->peer_wake.private) {
-+		u->peer_wake.private = other;
-+		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
-+
-+		rc = 1;
-+	}
-+
-+	spin_unlock(&u_other->peer_wait.lock);
-+	return rc;
-+}
-+
-+static void unix_dgram_peer_wake_disconnect(struct sock *sk,
-+					    struct sock *other)
-+{
-+	struct unix_sock *u, *u_other;
-+
-+	u = unix_sk(sk);
-+	u_other = unix_sk(other);
-+	spin_lock(&u_other->peer_wait.lock);
-+
-+	if (u->peer_wake.private == other) {
-+		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
-+		u->peer_wake.private = NULL;
-+	}
-+
-+	spin_unlock(&u_other->peer_wait.lock);
-+}
-+
-+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
-+						   struct sock *other)
-+{
-+	unix_dgram_peer_wake_disconnect(sk, other);
-+	wake_up_interruptible_poll(sk_sleep(sk),
-+				   POLLOUT |
-+				   POLLWRNORM |
-+				   POLLWRBAND);
-+}
-+
-+/* preconditions:
-+ *	- unix_peer(sk) == other
-+ *	- association is stable
-+ */
-+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
-+{
-+	int connected;
-+
-+	connected = unix_dgram_peer_wake_connect(sk, other);
-+
-+	if (unix_recvq_full(other))
-+		return 1;
-+
-+	if (connected)
-+		unix_dgram_peer_wake_disconnect(sk, other);
-+
-+	return 0;
-+}
-+
- static inline int unix_writable(struct sock *sk)
- {
- 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
-@@ -430,6 +542,8 @@ static void unix_release_sock(struct soc
- 			skpair->sk_state_change(skpair);
- 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
- 		}
-+
-+		unix_dgram_peer_wake_disconnect(sk, skpair);
- 		sock_put(skpair); /* It may now die */
- 		unix_peer(sk) = NULL;
- 	}
-@@ -664,6 +778,7 @@ static struct sock *unix_create1(struct
- 	INIT_LIST_HEAD(&u->link);
- 	mutex_init(&u->readlock); /* single task reading lock */
- 	init_waitqueue_head(&u->peer_wait);
-+	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
- 	unix_insert_socket(unix_sockets_unbound(sk), sk);
- out:
- 	if (sk == NULL)
-@@ -1031,6 +1146,8 @@ restart:
- 	if (unix_peer(sk)) {
- 		struct sock *old_peer = unix_peer(sk);
- 		unix_peer(sk) = other;
-+		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
-+
- 		unix_state_double_unlock(sk, other);
- 
- 		if (other != old_peer)
-@@ -1470,6 +1587,7 @@ static int unix_dgram_sendmsg(struct soc
- 	struct scm_cookie scm;
- 	int max_level;
- 	int data_len = 0;
-+	int sk_locked;
- 
- 	wait_for_unix_gc();
- 	err = scm_send(sock, msg, &scm, false);
-@@ -1548,12 +1666,14 @@ restart:
- 		goto out_free;
- 	}
- 
-+	sk_locked = 0;
- 	unix_state_lock(other);
-+restart_locked:
- 	err = -EPERM;
- 	if (!unix_may_send(sk, other))
- 		goto out_unlock;
- 
--	if (sock_flag(other, SOCK_DEAD)) {
-+	if (unlikely(sock_flag(other, SOCK_DEAD))) {
- 		/*
- 		 *	Check with 1003.1g - what should
- 		 *	datagram error
-@@ -1561,10 +1681,14 @@ restart:
- 		unix_state_unlock(other);
- 		sock_put(other);
- 
-+		if (!sk_locked)
-+			unix_state_lock(sk);
-+
- 		err = 0;
--		unix_state_lock(sk);
- 		if (unix_peer(sk) == other) {
- 			unix_peer(sk) = NULL;
-+			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
-+
- 			unix_state_unlock(sk);
- 
- 			unix_dgram_disconnected(sk, other);
-@@ -1590,21 +1714,38 @@ restart:
- 			goto out_unlock;
- 	}
- 
--	if (unix_peer(other) != sk && unix_recvq_full(other)) {
--		if (!timeo) {
--			err = -EAGAIN;
--			goto out_unlock;
-+	if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
-+		if (timeo) {
-+			timeo = unix_wait_for_peer(other, timeo);
-+
-+			err = sock_intr_errno(timeo);
-+			if (signal_pending(current))
-+				goto out_free;
-+
-+			goto restart;
- 		}
- 
--		timeo = unix_wait_for_peer(other, timeo);
-+		if (!sk_locked) {
-+			unix_state_unlock(other);
-+			unix_state_double_lock(sk, other);
-+		}
- 
--		err = sock_intr_errno(timeo);
--		if (signal_pending(current))
--			goto out_free;
-+		if (unix_peer(sk) != other ||
-+		    unix_dgram_peer_wake_me(sk, other)) {
-+			err = -EAGAIN;
-+			sk_locked = 1;
-+			goto out_unlock;
-+		}
- 
--		goto restart;
-+		if (!sk_locked) {
-+			sk_locked = 1;
-+			goto restart_locked;
-+		}
- 	}
- 
-+	if (unlikely(sk_locked))
-+		unix_state_unlock(sk);
-+
- 	if (sock_flag(other, SOCK_RCVTSTAMP))
- 		__net_timestamp(skb);
- 	maybe_add_creds(skb, sock, other);
-@@ -1618,6 +1759,8 @@ restart:
- 	return len;
- 
- out_unlock:
-+	if (sk_locked)
-+		unix_state_unlock(sk);
- 	unix_state_unlock(other);
- out_free:
- 	kfree_skb(skb);
-@@ -2453,14 +2596,16 @@ static unsigned int unix_dgram_poll(stru
- 		return mask;
- 
- 	writable = unix_writable(sk);
--	other = unix_peer_get(sk);
--	if (other) {
--		if (unix_peer(other) != sk) {
--			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
--			if (unix_recvq_full(other))
--				writable = 0;
--		}
--		sock_put(other);
-+	if (writable) {
-+		unix_state_lock(sk);
-+
-+		other = unix_peer(sk);
-+		if (other && unix_peer(other) != sk &&
-+		    unix_recvq_full(other) &&
-+		    unix_dgram_peer_wake_me(sk, other))
-+			writable = 0;
-+
-+		unix_state_unlock(sk);
- 	}
- 
- 	if (writable)
diff --git a/debian/patches/series b/debian/patches/series
index 6315427..c458416 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -85,16 +85,13 @@ bugfix/all/selftests-kprobe-choose-an-always-defined-function-t.patch
 bugfix/all/selftests-make-scripts-executable.patch
 bugfix/all/selftests-vm-try-harder-to-allocate-huge-pages.patch
 bugfix/all/selftests-breakpoints-actually-build-it.patch
-bugfix/all/rds-fix-race-condition-when-sending-a-message-on-unbound-socket.patch
 bugfix/all/media-media-vivid-osd-fix-info-leak-in-ioctl.patch
 bugfix/x86/kvm-svm-unconditionally-intercept-DB.patch
 bugfix/x86/kvm-x86-rename-update_db_bp_intercept-to-update_bp_i.patch
 bugfix/all/usbvision-fix-overflow-of-interfaces-array.patch
 bugfix/all/media-usbvision-fix-crash-on-detecting-device-with-i.patch
-bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch
 bugfix/all/isdn_ppp-add-checks-for-allocation-failure-in-isdn_p.patch
 bugfix/all/ppp-slip-validate-vj-compression-slot-parameters-com.patch
-bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch
 bugfix/x86/drm-i915-shut-up-gen8-sde-irq-dmesg-noise.patch
 bugfix/arm/arm-dts-kirkwood-fix-qnap-ts219-power-off.patch
 bugfix/x86/drm-i915-mark-uneven-memory-banks-on-gen4-desktop-as.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/kernel/linux.git



More information about the Kernel-svn-changes mailing list