[linux] 03/07: mnt: Add a per mount namespace limit on the number of mounts (CVE-2016-6213)

debian-kernel at lists.debian.org debian-kernel at lists.debian.org
Tue Mar 14 14:16:59 UTC 2017


This is an automated email from the git hooks/post-receive script.

benh pushed a commit to branch jessie-security
in repository linux.

commit 0a8687976b3d41602055a04a9a9c5a6a870a39b4
Author: Ben Hutchings <ben at decadent.org.uk>
Date:   Tue Mar 14 01:06:51 2017 +0000

    mnt: Add a per mount namespace limit on the number of mounts (CVE-2016-6213)
---
 debian/changelog                                   |   1 +
 ...er-mount-namespace-limit-on-the-number-of.patch | 268 +++++++++++++++++++++
 .../vfs-fix-abi-change-for-cve-2016-6213-fix.patch |  23 ++
 debian/patches/series                              |   2 +
 4 files changed, 294 insertions(+)

diff --git a/debian/changelog b/debian/changelog
index 4792e57..9b521aa 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,7 @@ linux (3.16.39-1+deb8u3) UNRELEASED; urgency=medium
   * timer: Restrict timer_stats to initial PID namespace (CVE-2017-5967)
   * mbcache: Reschedule before restarting iteration in mb_cache_entry_alloc()
     (mitigates CVE-2015-8952)
+  * mnt: Add a per mount namespace limit on the number of mounts (CVE-2016-6213)
 
  -- Ben Hutchings <ben at decadent.org.uk>  Mon, 13 Mar 2017 23:29:39 +0000
 
diff --git a/debian/patches/bugfix/all/mnt-add-a-per-mount-namespace-limit-on-the-number-of.patch b/debian/patches/bugfix/all/mnt-add-a-per-mount-namespace-limit-on-the-number-of.patch
new file mode 100644
index 0000000..c22a77b
--- /dev/null
+++ b/debian/patches/bugfix/all/mnt-add-a-per-mount-namespace-limit-on-the-number-of.patch
@@ -0,0 +1,268 @@
+From: "Eric W. Biederman" <ebiederm at xmission.com>
+Date: Wed, 28 Sep 2016 00:27:17 -0500
+Subject: mnt: Add a per mount namespace limit on the number of mounts
+Origin: https://git.kernel.org/linus/d29216842a85c7970c536108e093963f02714498
+Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2016-6213
+
+CAI Qian <caiqian at redhat.com> pointed out that the semantics
+of shared subtrees make it possible to create an exponentially
+increasing number of mounts in a mount namespace.
+
+    mkdir /tmp/1 /tmp/2
+    mount --make-rshared /
+    for i in $(seq 1 20) ; do mount --bind /tmp/1 /tmp/2 ; done
+
+Will create create 2^20 or 1048576 mounts, which is a practical problem
+as some people have managed to hit this by accident.
+
+As such CVE-2016-6213 was assigned.
+
+Ian Kent <raven at themaw.net> described the situation for autofs users
+as follows:
+
+> The number of mounts for direct mount maps is usually not very large because of
+> the way they are implemented, large direct mount maps can have performance
+> problems. There can be anywhere from a few (likely case a few hundred) to less
+> than 10000, plus mounts that have been triggered and not yet expired.
+>
+> Indirect mounts have one autofs mount at the root plus the number of mounts that
+> have been triggered and not yet expired.
+>
+> The number of autofs indirect map entries can range from a few to the common
+> case of several thousand and in rare cases up to between 30000 and 50000. I've
+> not heard of people with maps larger than 50000 entries.
+>
+> The larger the number of map entries the greater the possibility for a large
+> number of active mounts so it's not hard to expect cases of a 1000 or somewhat
+> more active mounts.
+
+So I am setting the default number of mounts allowed per mount
+namespace at 100,000.  This is more than enough for any use case I
+know of, but small enough to quickly stop an exponential increase
+in mounts.  Which should be perfect to catch misconfigurations and
+malfunctioning programs.
+
+For anyone who needs a higher limit this can be changed by writing
+to the new /proc/sys/fs/mount-max sysctl.
+
+Tested-by: CAI Qian <caiqian at redhat.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
+[bwh: Backported to 3.16:
+ - Use ACCESS_ONCE() instead of READ_ONCE()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ Documentation/sysctl/fs.txt |  7 +++++++
+ fs/mount.h                  |  2 ++
+ fs/namespace.c              | 49 ++++++++++++++++++++++++++++++++++++++++++++-
+ fs/pnode.c                  |  2 +-
+ fs/pnode.h                  |  1 +
+ include/linux/mount.h       |  2 ++
+ kernel/sysctl.c             |  9 +++++++++
+ 7 files changed, 70 insertions(+), 2 deletions(-)
+
+--- a/Documentation/sysctl/fs.txt
++++ b/Documentation/sysctl/fs.txt
+@@ -265,6 +265,13 @@ aio-nr can grow to.
+ 
+ ==============================================================
+ 
++mount-max:
++
++This denotes the maximum number of mounts that may exist
++in a mount namespace.
++
++==============================================================
++
+ 
+ 2. /proc/sys/fs/binfmt_misc
+ ----------------------------------------------------------
+--- a/fs/mount.h
++++ b/fs/mount.h
+@@ -11,6 +11,8 @@ struct mnt_namespace {
+ 	u64			seq;	/* Sequence number to prevent loops */
+ 	wait_queue_head_t poll;
+ 	u64 event;
++	unsigned int		mounts; /* # of mounts in the namespace */
++	unsigned int		pending_mounts;
+ };
+ 
+ struct mnt_pcp {
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -27,6 +27,9 @@
+ #include "pnode.h"
+ #include "internal.h"
+ 
++/* Maximum number of mounts in a mount namespace */
++unsigned int sysctl_mount_max __read_mostly = 100000;
++
+ static unsigned int m_hash_mask __read_mostly;
+ static unsigned int m_hash_shift __read_mostly;
+ static unsigned int mp_hash_mask __read_mostly;
+@@ -812,6 +815,9 @@ static void commit_tree(struct mount *mn
+ 
+ 	list_splice(&head, n->list.prev);
+ 
++	n->mounts += n->pending_mounts;
++	n->pending_mounts = 0;
++
+ 	attach_shadowed(mnt, parent, shadows);
+ 	touch_mnt_namespace(n);
+ }
+@@ -1285,9 +1291,14 @@ static void umount_tree(struct mount *mn
+ 		propagate_umount(&tmp_list);
+ 
+ 	hlist_for_each_entry(p, &tmp_list, mnt_hash) {
++		struct mnt_namespace *ns;
+ 		list_del_init(&p->mnt_expire);
+ 		list_del_init(&p->mnt_list);
+-		__touch_mnt_namespace(p->mnt_ns);
++		ns = p->mnt_ns;
++		if (ns) {
++			ns->mounts--;
++			__touch_mnt_namespace(ns);
++		}
+ 		p->mnt_ns = NULL;
+ 		if (how & UMOUNT_SYNC)
+ 			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
+@@ -1643,6 +1654,28 @@ static int invent_group_ids(struct mount
+ 	return 0;
+ }
+ 
++int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
++{
++	unsigned int max = ACCESS_ONCE(sysctl_mount_max);
++	unsigned int mounts = 0, old, pending, sum;
++	struct mount *p;
++
++	for (p = mnt; p; p = next_mnt(p, mnt))
++		mounts++;
++
++	old = ns->mounts;
++	pending = ns->pending_mounts;
++	sum = old + pending;
++	if ((old > sum) ||
++	    (pending > sum) ||
++	    (max < sum) ||
++	    (mounts > (max - sum)))
++		return -ENOSPC;
++
++	ns->pending_mounts = pending + mounts;
++	return 0;
++}
++
+ /*
+  *  @source_mnt : mount tree to be attached
+  *  @nd         : place the mount tree @source_mnt is attached
+@@ -1712,10 +1745,18 @@ static int attach_recursive_mnt(struct m
+ 			struct path *parent_path)
+ {
+ 	HLIST_HEAD(tree_list);
++	struct mnt_namespace *ns = dest_mnt->mnt_ns;
+ 	struct mount *child, *p;
+ 	struct hlist_node *n;
+ 	int err;
+ 
++	/* Is there space to add these mounts to the mount namespace? */
++	if (!parent_path) {
++		err = count_mounts(ns, source_mnt);
++		if (err)
++			goto out;
++	}
++
+ 	if (IS_MNT_SHARED(dest_mnt)) {
+ 		err = invent_group_ids(source_mnt, true);
+ 		if (err)
+@@ -1752,11 +1793,13 @@ static int attach_recursive_mnt(struct m
+  out_cleanup_ids:
+ 	while (!hlist_empty(&tree_list)) {
+ 		child = hlist_entry(tree_list.first, struct mount, mnt_hash);
++		child->mnt_parent->mnt_ns->pending_mounts = 0;
+ 		umount_tree(child, UMOUNT_SYNC);
+ 	}
+ 	unlock_mount_hash();
+ 	cleanup_group_ids(source_mnt, NULL);
+  out:
++	ns->pending_mounts = 0;
+ 	return err;
+ }
+ 
+@@ -2588,6 +2631,8 @@ static struct mnt_namespace *alloc_mnt_n
+ 	init_waitqueue_head(&new_ns->poll);
+ 	new_ns->event = 0;
+ 	new_ns->user_ns = get_user_ns(user_ns);
++	new_ns->mounts = 0;
++	new_ns->pending_mounts = 0;
+ 	return new_ns;
+ }
+ 
+@@ -2637,6 +2682,7 @@ struct mnt_namespace *copy_mnt_ns(unsign
+ 	q = new;
+ 	while (p) {
+ 		q->mnt_ns = new_ns;
++		new_ns->mounts++;
+ 		if (new_fs) {
+ 			if (&p->mnt == new_fs->root.mnt) {
+ 				new_fs->root.mnt = mntget(&q->mnt);
+@@ -2675,6 +2721,7 @@ static struct mnt_namespace *create_mnt_
+ 		struct mount *mnt = real_mount(m);
+ 		mnt->mnt_ns = new_ns;
+ 		new_ns->root = mnt;
++		new_ns->mounts++;
+ 		list_add(&mnt->mnt_list, &new_ns->list);
+ 	} else {
+ 		mntput(m);
+--- a/fs/pnode.c
++++ b/fs/pnode.c
+@@ -258,7 +258,7 @@ static int propagate_one(struct mount *m
+ 		read_sequnlock_excl(&mount_lock);
+ 	}
+ 	hlist_add_head(&child->mnt_hash, list);
+-	return 0;
++	return count_mounts(m->mnt_ns, child);
+ }
+ 
+ /*
+--- a/fs/pnode.h
++++ b/fs/pnode.h
+@@ -50,4 +50,5 @@ void mnt_set_mountpoint(struct mount *,
+ struct mount *copy_tree(struct mount *, struct dentry *, int);
+ bool is_path_reachable(struct mount *, struct dentry *,
+ 			 const struct path *root);
++int count_mounts(struct mnt_namespace *ns, struct mount *mnt);
+ #endif /* _LINUX_PNODE_H */
+--- a/include/linux/mount.h
++++ b/include/linux/mount.h
+@@ -91,4 +91,6 @@ extern void mark_mounts_for_expiry(struc
+ 
+ extern dev_t name_to_dev_t(char *name);
+ 
++extern unsigned int sysctl_mount_max;
++
+ #endif /* _LINUX_MOUNT_H */
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -63,6 +63,7 @@
+ #include <linux/binfmts.h>
+ #include <linux/sched/sysctl.h>
+ #include <linux/kexec.h>
++#include <linux/mount.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/processor.h>
+@@ -1697,6 +1698,14 @@ static struct ctl_table fs_table[] = {
+ 		.mode		= 0644,
+ 		.proc_handler	= proc_doulongvec_minmax,
+ 	},
++	{
++		.procname	= "mount-max",
++		.data		= &sysctl_mount_max,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= proc_dointvec_minmax,
++		.extra1		= &one,
++	},
+ 	{ }
+ };
+ 
diff --git a/debian/patches/debian/vfs-fix-abi-change-for-cve-2016-6213-fix.patch b/debian/patches/debian/vfs-fix-abi-change-for-cve-2016-6213-fix.patch
new file mode 100644
index 0000000..4fd7d9a
--- /dev/null
+++ b/debian/patches/debian/vfs-fix-abi-change-for-cve-2016-6213-fix.patch
@@ -0,0 +1,23 @@
+From: Ben Hutchings <ben at decadent.org.uk>
+Date: Tue, 14 Mar 2017 03:13:58 +0000
+Subject: vfs: Fix ABI change for CVE-2016-6213 fix
+Forwarded: not-needed
+
+The fix for CVE-2016-6213 added two fields to struct mnt_namespace.
+This is defined in fs/mount.h so it's not exposed to OOT modules; it's
+also not used by in-tree modules.  Hide the change from genksyms.
+
+---
+--- a/fs/mount.h
++++ b/fs/mount.h
+@@ -11,8 +11,10 @@ struct mnt_namespace {
+ 	u64			seq;	/* Sequence number to prevent loops */
+ 	wait_queue_head_t poll;
+ 	u64 event;
++#ifndef __GENKSYMS__
+ 	unsigned int		mounts; /* # of mounts in the namespace */
+ 	unsigned int		pending_mounts;
++#endif
+ };
+ 
+ struct mnt_pcp {
diff --git a/debian/patches/series b/debian/patches/series
index ddd05d3..143f7b0 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -706,6 +706,7 @@ bugfix/x86/kvm-nvmx-allow-l1-to-intercept-software-exceptions-bp-and-of.patch
 bugfix/all/irda-fix-lockdep-annotations-in-hashbin_delete.patch
 bugfix/all/timer-restrict-timer_stats-to-initial-pid-namespace.patch
 bugfix/all/mbcache-reschedule-before-restarting-iteration-in-mb_cache_entry_alloc.patch
+bugfix/all/mnt-add-a-per-mount-namespace-limit-on-the-number-of.patch
 
 # Fix ABI changes
 debian/of-fix-abi-changes.patch
@@ -760,3 +761,4 @@ debian/revert-arm64-define-at_vector_size_arch-for-arch_dlinfo.patch
 debian/revert-s390-define-at_vector_size_arch-for-arch_dlinfo.patch
 debian/revert-block-fix-bdi-vs-gendisk-lifetime-mismatch.patch
 debian/net-fix-abi-change-for-sk_filter-changes.patch
+debian/vfs-fix-abi-change-for-cve-2016-6213-fix.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/kernel/linux.git



More information about the Kernel-svn-changes mailing list