[linux] 01/02: Apply fixes for CVE-2017-1000364

debian-kernel at lists.debian.org debian-kernel at lists.debian.org
Tue Jun 20 13:34:52 UTC 2017


This is an automated email from the git hooks/post-receive script.

benh pushed a commit to branch wheezy-security
in repository linux.

commit e98bbc21760b620b04c2a5b09b93e33043ae16c0
Author: Ben Hutchings <ben at decadent.org.uk>
Date:   Sun Jun 18 01:48:48 2017 +0100

    Apply fixes for CVE-2017-1000364
---
 debian/changelog                                   |   5 +
 .../all/mm-allow-to-configure-stack-gap-size.patch | 129 ++++++
 .../mm-do-not-collapse-stack-gap-into-thp.patch    |  56 +++
 ...row-the-stack-vma-just-because-of-an-over.patch |  74 ++++
 .../bugfix/all/mm-enlarge-stack-guard-gap.patch    | 489 +++++++++++++++++++++
 ...-the-stack-gap-for-unpopulated-growing-vm.patch |  45 ++
 debian/patches/series                              |   5 +
 7 files changed, 803 insertions(+)

diff --git a/debian/changelog b/debian/changelog
index 31619ea..39420dd 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -81,6 +81,11 @@ linux (3.2.89-1) UNRELEASED; urgency=medium
   * net: add kfree_skb_list()
   * ipv6: Fix leak in ipv6_gso_segment().
   * Ignore ABI changes in IB, rds
+  * mm: do not grow the stack vma just because of an overrun on preceding vma
+  * mm: enlarge stack guard gap (CVE-2017-1000364)
+  * mm: allow to configure stack gap size
+  * mm, proc: cap the stack gap for unpopulated growing vmas
+  * mm: do not collapse stack gap into THP
 
  -- Ben Hutchings <ben at decadent.org.uk>  Wed, 31 May 2017 11:48:09 +0100
 
diff --git a/debian/patches/bugfix/all/mm-allow-to-configure-stack-gap-size.patch b/debian/patches/bugfix/all/mm-allow-to-configure-stack-gap-size.patch
new file mode 100644
index 0000000..5d4de23
--- /dev/null
+++ b/debian/patches/bugfix/all/mm-allow-to-configure-stack-gap-size.patch
@@ -0,0 +1,129 @@
+From: Michal Hocko <mhocko at suse.com>
+Date: Wed, 14 Jun 2017 08:17:02 +0200
+Subject: mm: allow to configure stack gap size
+Bug-Debian: https://security-tracker.debian.org/tracker/CVE-2017-1000364
+
+Add a kernel command line option (stack_guard_gap) to specify the stack
+gap size (in page unites) and export the value in /proc/<pid>/smaps for
+stack vmas. This might be used for special applications like CRIU/RR.
+
+Suggested-by: Linus Torvalds <torvalds at linux-foundation.org>
+Signed-off-by: Michal Hocko <mhocko at suse.com>
+[carnil: backport to 3.16
+ - context adjustment
+ - adjust location for documentation
+ - is_stack -> vm_is_stack]
+[bwh: Backported to 3.2: fold in later change to use has_gap, as we don't
+ have vm_is_stack()
+---
+ Documentation/kernel-parameters.txt |  7 +++++++
+ fs/proc/task_mmu.c                  | 21 ++++++++++++++++-----
+ mm/mmap.c                           | 13 +++++++++++++
+ 3 files changed, 36 insertions(+), 5 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2463,6 +2463,13 @@ bytes respectively. Such letter suffixes
+ 	spia_pedr=
+ 	spia_peddr=
+ 
++	stack_guard_gap=	[MM]
++			override the default stack gap protection. The value
++			is in page units and it defines how many pages prior
++			to (for stacks growing down) resp. after (for stacks
++			growing up) the main stack are reserved for no other
++			mapping. Default value is 256 pages.
++
+ 	stacktrace	[FTRACE]
+ 			Enabled the stack tracer on boot up.
+ 
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -210,7 +210,8 @@ static int do_maps_open(struct inode *in
+ 	return ret;
+ }
+ 
+-static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
++static void
++show_map_vma(struct seq_file *m, struct vm_area_struct *vma, bool *has_gap)
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	struct file *file = vma->vm_file;
+@@ -232,11 +233,17 @@ static void show_map_vma(struct seq_file
+ 	start = vma->vm_start;
+ 	end = vma->vm_end;
+ 	if (vma->vm_flags & VM_GROWSDOWN) {
+-		if (stack_guard_area(vma, start))
++		if (stack_guard_area(vma, start)) {
+ 			start += stack_guard_gap;
++			if (has_gap)
++				*has_gap = true;
++		}
+ 	} else if (vma->vm_flags & VM_GROWSUP) {
+-		if (stack_guard_area(vma, end))
++		if (stack_guard_area(vma, end)) {
+ 			end -= stack_guard_gap;
++			if (has_gap)
++				*has_gap = true;
++		}
+ 	}
+ 
+ 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
+@@ -285,7 +292,7 @@ static int show_map(struct seq_file *m,
+ 	struct proc_maps_private *priv = m->private;
+ 	struct task_struct *task = priv->task;
+ 
+-	show_map_vma(m, vma);
++	show_map_vma(m, vma, NULL);
+ 
+ 	if (m->count < m->size)  /* vma is copied successfully */
+ 		m->version = (vma != get_gate_vma(task->mm))
+@@ -440,6 +447,7 @@ static int show_smap(struct seq_file *m,
+ 		.mm = vma->vm_mm,
+ 		.private = &mss,
+ 	};
++	bool has_gap = false;
+ 
+ 	memset(&mss, 0, sizeof mss);
+ 	mss.vma = vma;
+@@ -447,7 +455,7 @@ static int show_smap(struct seq_file *m,
+ 	if (vma->vm_mm && !is_vm_hugetlb_page(vma))
+ 		walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
+ 
+-	show_map_vma(m, vma);
++	show_map_vma(m, vma, &has_gap);
+ 
+ 	seq_printf(m,
+ 		   "Size:           %8lu kB\n"
+@@ -480,6 +488,9 @@ static int show_smap(struct seq_file *m,
+ 		   (vma->vm_flags & VM_LOCKED) ?
+ 			(unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
+ 
++	if (has_gap)
++		seq_printf(m, "Stack_Gap:      %8lu kB\n", stack_guard_gap >>10);
++
+ 	if (m->count < m->size)  /* vma is copied successfully */
+ 		m->version = (vma != get_gate_vma(task->mm))
+ 			? vma->vm_start : 0;
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -1799,6 +1799,19 @@ int expand_downwards(struct vm_area_stru
+ /* enforced gap between the expanding stack and other mappings. */
+ unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+ 
++static int __init cmdline_parse_stack_guard_gap(char *p)
++{
++	unsigned long val;
++	char *endptr;
++
++	val = simple_strtoul(p, &endptr, 10);
++	if (!*endptr)
++		stack_guard_gap = val << PAGE_SHIFT;
++
++	return 0;
++}
++__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
++
+ /*
+  * Note how expand_stack() refuses to expand the stack all the way to
+  * abut the next virtual mapping, *unless* that mapping itself is also
diff --git a/debian/patches/bugfix/all/mm-do-not-collapse-stack-gap-into-thp.patch b/debian/patches/bugfix/all/mm-do-not-collapse-stack-gap-into-thp.patch
new file mode 100644
index 0000000..06b3134
--- /dev/null
+++ b/debian/patches/bugfix/all/mm-do-not-collapse-stack-gap-into-thp.patch
@@ -0,0 +1,56 @@
+From: Michal Hocko <mhocko at suse.com>
+Date: Wed, 14 Jun 2017 08:18:00 +0200
+Subject: mm: do not collapse stack gap into THP
+Bug-Debian: https://security-tracker.debian.org/tracker/CVE-2017-1000364
+
+Oleg has noticed that khugepaged will happilly collapse stack vma (as
+long as it is not an early stack - see is_vma_temporary_stack) and
+it might effectively remove the stack gap area as well because a larger
+part of the stack vma is usually populated. The same applies to the
+page fault handler.
+
+Fix this by checking stack_guard_area when revalidating a VMA
+in hugepage_vma_revalidate.  We do not want to hook/replace
+is_vma_temporary_stack() check because THP might be still useful for
+stack, all we need is excluding the gap from collapsing into a THP.
+
+Also check the to-be-created THP in do_huge_pmd_anonymous_page to
+make sure it is completely outside of the gap area because we we could
+create THP covering the gap area.
+
+Noticed-by: Oleg Nesterov <oleg at redhat.com>
+Signed-off-by: Michal Hocko <mhocko at suse.com>
+[carnil: backport to 3.16: move stack_guard_area check to huge_memory.c
+in collapse_huge_page]
+[bwh: Backported to 3.2:
+ - Use 'goto' rather than 'return VM_FAULT_FALLBACK' to fall back to PTEs
+ - Adjust context]
+---
+ mm/huge_memory.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index d6e6cafdb2c9..ef201fe46804 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -680,6 +680,9 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ 	pte_t *pte;
+ 
+ 	if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) {
++		if (stack_guard_area(vma, haddr) ||
++				stack_guard_area(vma, haddr + HPAGE_PMD_SIZE))
++			goto out;
+ 		if (unlikely(anon_vma_prepare(vma)))
+ 			return VM_FAULT_OOM;
+ 		if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
+@@ -1922,6 +1925,10 @@ static void collapse_huge_page(struct mm_struct *mm,
+ 		goto out;
+ 	if (!hugepage_vma_check(vma))
+ 		goto out;
++
++	/* never try to collapse stack gap */
++	if (stack_guard_area(vma, hstart) || stack_guard_area(vma, hend))
++		goto out;
+ 	pgd = pgd_offset(mm, address);
+ 	if (!pgd_present(*pgd))
+ 		goto out;
diff --git a/debian/patches/bugfix/all/mm-do-not-grow-the-stack-vma-just-because-of-an-over.patch b/debian/patches/bugfix/all/mm-do-not-grow-the-stack-vma-just-because-of-an-over.patch
new file mode 100644
index 0000000..44990aa
--- /dev/null
+++ b/debian/patches/bugfix/all/mm-do-not-grow-the-stack-vma-just-because-of-an-over.patch
@@ -0,0 +1,74 @@
+From: Linus Torvalds <torvalds at linux-foundation.org>
+Date: Wed, 27 Feb 2013 08:36:04 -0800
+Subject: mm: do not grow the stack vma just because of an overrun on preceding vma
+Origin: https://git.kernel.org/linus/09884964335e85e897876d17783c2ad33cf8a2e0
+
+The stack vma is designed to grow automatically (marked with VM_GROWSUP
+or VM_GROWSDOWN depending on architecture) when an access is made beyond
+the existing boundary.  However, particularly if you have not limited
+your stack at all ("ulimit -s unlimited"), this can cause the stack to
+grow even if the access was really just one past *another* segment.
+
+And that's wrong, especially since we first grow the segment, but then
+immediately later enforce the stack guard page on the last page of the
+segment.  So _despite_ first growing the stack segment as a result of
+the access, the kernel will then make the access cause a SIGSEGV anyway!
+
+So do the same logic as the guard page check does, and consider an
+access to within one page of the next segment to be a bad access, rather
+than growing the stack to abut the next segment.
+
+Reported-and-tested-by: Heiko Carstens <heiko.carstens at de.ibm.com>
+Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
+---
+ mm/mmap.c | 27 +++++++++++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+diff --git a/mm/mmap.c b/mm/mmap.c
+index 94f4e3444ae5..e949a2026a72 100644
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -1802,9 +1802,28 @@ int expand_downwards(struct vm_area_struct *vma,
+ 	return error;
+ }
+ 
++/*
++ * Note how expand_stack() refuses to expand the stack all the way to
++ * abut the next virtual mapping, *unless* that mapping itself is also
++ * a stack mapping. We want to leave room for a guard page, after all
++ * (the guard page itself is not added here, that is done by the
++ * actual page faulting logic)
++ *
++ * This matches the behavior of the guard page logic (see mm/memory.c:
++ * check_stack_guard_page()), which only allows the guard page to be
++ * removed under these circumstances.
++ */
+ #ifdef CONFIG_STACK_GROWSUP
+ int expand_stack(struct vm_area_struct *vma, unsigned long address)
+ {
++	struct vm_area_struct *next;
++
++	address &= PAGE_MASK;
++	next = vma->vm_next;
++	if (next && next->vm_start == address + PAGE_SIZE) {
++		if (!(next->vm_flags & VM_GROWSUP))
++			return -ENOMEM;
++	}
+ 	return expand_upwards(vma, address);
+ }
+ 
+@@ -1827,6 +1846,14 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
+ #else
+ int expand_stack(struct vm_area_struct *vma, unsigned long address)
+ {
++	struct vm_area_struct *prev;
++
++	address &= PAGE_MASK;
++	prev = vma->vm_prev;
++	if (prev && prev->vm_end == address) {
++		if (!(prev->vm_flags & VM_GROWSDOWN))
++			return -ENOMEM;
++	}
+ 	return expand_downwards(vma, address);
+ }
+ 
diff --git a/debian/patches/bugfix/all/mm-enlarge-stack-guard-gap.patch b/debian/patches/bugfix/all/mm-enlarge-stack-guard-gap.patch
new file mode 100644
index 0000000..4f227dd
--- /dev/null
+++ b/debian/patches/bugfix/all/mm-enlarge-stack-guard-gap.patch
@@ -0,0 +1,489 @@
+From: Michal Hocko <mhocko at suse.com>
+Date: Wed, 14 Jun 2017 08:16:54 +0200
+Subject: mm: enlarge stack guard gap
+Bug-Debian: https://security-tracker.debian.org/tracker/CVE-2017-1000364
+
+Stack guard page is a useful feature to reduce a risk of stack smashing
+into a different mapping. We have been using a single page gap which
+is sufficient to prevent having stack adjacent to a different mapping.
+But this seems to be insufficient in the light of the stack usage in
+the userspace. E.g. glibc uses as large as 64kB alloca() in many
+commonly used functions. Others use constructs liks gid_t
+buffer[NGROUPS_MAX] which is 256kB or stack strings with MAX_ARG_STRLEN.
+
+This will become especially dangerous for suid binaries and the default
+no limit for the stack size limit because those applications can be
+tricked to consume a large portion of the stack and a single glibc call
+could jump over the guard page. These attacks are not theoretical,
+unfortunatelly.
+
+Make those attacks less probable by increasing the stack guard gap
+to 1MB (on systems with 4k pages but make it depend on the page size
+because systems with larger base pages might cap stack allocations in
+the PAGE_SIZE units) which should cover larger alloca() and VLA stack
+allocations. It is obviously not a full fix because the problem is
+somehow inherent but it should reduce attack space a lot. One could
+argue that the gap size should be configurable from the userspace but
+that can be done later on top when somebody finds that the new 1MB is
+not suitable or even wrong for some special case applications.
+
+Implementation wise, get rid of check_stack_guard_page and move all the
+guard page specific code to expandable_stack_area which always tries to
+guarantee the gap. do_anonymous_page then just calls expand_stack. Also
+get rid of stack_guard_page_{start,end} and replace them with
+stack_guard_area to handle stack population and /proc/<pid>/[s]maps.
+
+This should clean up the code which is quite scattered currently
+and therefore justify the change.
+
+Signed-off-by: Michal Hocko <mhocko at suse.com>
+[carnil: backport for 3.16:
+ - vmf->address -> address;
+ - context adjustment;
+ - backport for pre-"FOLL_MLOCK"-"FOLL_POPULATE"-rename]
+[bwh: Backported to 3.2:
+ - stack_guard_area() replaces local stack_guard_page() function
+ - Use #ifndef instead of IS_ENABLED() to test CONFIG_STACK_GROWSUP
+ - Adjust filename, context
+---
+ arch/ia64/mm/fault.c |   2 +-
+ fs/exec.c            |   9 ++-
+ fs/proc/task_mmu.c   |  11 ++--
+ include/linux/mm.h   |  40 +++----------
+ mm/memory.c          |  46 ++-------------
+ mm/mmap.c            | 158 +++++++++++++++++++++++++++++++++++++++++----------
+ 6 files changed, 158 insertions(+), 108 deletions(-)
+
+--- a/arch/ia64/mm/fault.c
++++ b/arch/ia64/mm/fault.c
+@@ -200,7 +200,7 @@ ia64_do_page_fault (unsigned long addres
+ 		 */
+ 		if (address > vma->vm_end + PAGE_SIZE - sizeof(long))
+ 			goto bad_area;
+-		if (expand_upwards(vma, address))
++		if (expand_upwards(vma, address, 0))
+ 			goto bad_area;
+ 	}
+ 	goto good_area;
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -196,7 +196,7 @@ static struct page *get_arg_page(struct
+ 
+ #ifdef CONFIG_STACK_GROWSUP
+ 	if (write) {
+-		ret = expand_downwards(bprm->vma, pos);
++		ret = expand_downwards(bprm->vma, pos, 0);
+ 		if (ret < 0)
+ 			return NULL;
+ 	}
+@@ -210,6 +210,13 @@ static struct page *get_arg_page(struct
+ 		unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+ 		struct rlimit *rlim;
+ 
++		/*
++		 * GRWOSUP doesn't really have any gap at this stage because we grow
++		 * the stack down now. See the expand_downwards above.
++		 */
++#ifndef CONFIG_STACK_GROWSUP
++		size -= stack_guard_gap;
++#endif
+ 		acct_arg_size(bprm, size / PAGE_SIZE);
+ 
+ 		/*
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -230,11 +230,14 @@ static void show_map_vma(struct seq_file
+ 
+ 	/* We don't show the stack guard page in /proc/maps */
+ 	start = vma->vm_start;
+-	if (stack_guard_page_start(vma, start))
+-		start += PAGE_SIZE;
+ 	end = vma->vm_end;
+-	if (stack_guard_page_end(vma, end))
+-		end -= PAGE_SIZE;
++	if (vma->vm_flags & VM_GROWSDOWN) {
++		if (stack_guard_area(vma, start))
++			start += stack_guard_gap;
++	} else if (vma->vm_flags & VM_GROWSUP) {
++		if (stack_guard_area(vma, end))
++			end -= stack_guard_gap;
++	}
+ 
+ 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
+ 			start,
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1015,34 +1015,6 @@ int set_page_dirty(struct page *page);
+ int set_page_dirty_lock(struct page *page);
+ int clear_page_dirty_for_io(struct page *page);
+ 
+-/* Is the vma a continuation of the stack vma above it? */
+-static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
+-{
+-	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
+-}
+-
+-static inline int stack_guard_page_start(struct vm_area_struct *vma,
+-					     unsigned long addr)
+-{
+-	return (vma->vm_flags & VM_GROWSDOWN) &&
+-		(vma->vm_start == addr) &&
+-		!vma_growsdown(vma->vm_prev, addr);
+-}
+-
+-/* Is the vma a continuation of the stack vma below it? */
+-static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
+-{
+-	return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
+-}
+-
+-static inline int stack_guard_page_end(struct vm_area_struct *vma,
+-					   unsigned long addr)
+-{
+-	return (vma->vm_flags & VM_GROWSUP) &&
+-		(vma->vm_end == addr) &&
+-		!vma_growsup(vma->vm_next, addr);
+-}
+-
+ extern unsigned long move_page_tables(struct vm_area_struct *vma,
+ 		unsigned long old_addr, struct vm_area_struct *new_vma,
+ 		unsigned long new_addr, unsigned long len);
+@@ -1462,16 +1434,22 @@ unsigned long ra_submit(struct file_ra_s
+ 			struct address_space *mapping,
+ 			struct file *filp);
+ 
++extern unsigned long stack_guard_gap;
+ /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
+ extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
++extern int stack_guard_area(struct vm_area_struct *vma, unsigned long address);
+ 
+ /* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */
+ extern int expand_downwards(struct vm_area_struct *vma,
+-		unsigned long address);
++		unsigned long address, unsigned long gap);
++unsigned long expandable_stack_area(struct vm_area_struct *vma,
++		unsigned long address, unsigned long *gap);
++
+ #if VM_GROWSUP
+-extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
++extern int expand_upwards(struct vm_area_struct *vma,
++		unsigned long address, unsigned long gap);
+ #else
+-  #define expand_upwards(vma, address) (0)
++  #define expand_upwards(vma, address, gap) (0)
+ #endif
+ 
+ /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1605,12 +1605,6 @@ no_page_table:
+ 	return page;
+ }
+ 
+-static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
+-{
+-	return stack_guard_page_start(vma, addr) ||
+-	       stack_guard_page_end(vma, addr+PAGE_SIZE);
+-}
+-
+ /**
+  * __get_user_pages() - pin user pages in memory
+  * @tsk:	task_struct of target task
+@@ -1763,7 +1757,7 @@ int __get_user_pages(struct task_struct
+ 
+ 				/* For mlock, just skip the stack guard page. */
+ 				if (foll_flags & FOLL_MLOCK) {
+-					if (stack_guard_page(vma, start))
++					if (stack_guard_area(vma, start))
+ 						goto next_page;
+ 				}
+ 				if (foll_flags & FOLL_WRITE)
+@@ -3121,39 +3115,7 @@ out_release:
+ 	return ret;
+ }
+ 
+-/*
+- * This is like a special single-page "expand_{down|up}wards()",
+- * except we must first make sure that 'address{-|+}PAGE_SIZE'
+- * doesn't hit another vma.
+- */
+-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
+-{
+-	address &= PAGE_MASK;
+-	if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
+-		struct vm_area_struct *prev = vma->vm_prev;
+ 
+-		/*
+-		 * Is there a mapping abutting this one below?
+-		 *
+-		 * That's only ok if it's the same stack mapping
+-		 * that has gotten split..
+-		 */
+-		if (prev && prev->vm_end == address)
+-			return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
+-
+-		return expand_downwards(vma, address - PAGE_SIZE);
+-	}
+-	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
+-		struct vm_area_struct *next = vma->vm_next;
+-
+-		/* As VM_GROWSDOWN but s/below/above/ */
+-		if (next && next->vm_start == address + PAGE_SIZE)
+-			return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
+-
+-		return expand_upwards(vma, address + PAGE_SIZE);
+-	}
+-	return 0;
+-}
+ 
+ /*
+  * We enter with non-exclusive mmap_sem (to exclude vma changes,
+@@ -3175,8 +3137,10 @@ static int do_anonymous_page(struct mm_s
+ 		return VM_FAULT_SIGBUS;
+ 
+ 	/* Check if we need to add a guard page to the stack */
+-	if (check_stack_guard_page(vma, address) < 0)
+-		return VM_FAULT_SIGSEGV;
++	if (stack_guard_area(vma, address)) {
++		if (expand_stack(vma, address) < 0)
++			return VM_FAULT_SIGSEGV;
++	}
+ 
+ 	/* Use the zero-page for reads */
+ 	if (!(flags & FAULT_FLAG_WRITE)) {
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -1647,7 +1647,8 @@ out:
+  * update accounting. This is shared with both the
+  * grow-up and grow-down cases.
+  */
+-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
++static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow,
++		unsigned long gap)
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	struct rlimit *rlim = current->signal->rlim;
+@@ -1660,7 +1661,7 @@ static int acct_stack_growth(struct vm_a
+ 	/* Stack limit test */
+ 	actual_size = size;
+ 	if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
+-		actual_size -= PAGE_SIZE;
++		actual_size -= gap;
+ 	if (actual_size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+ 		return -ENOMEM;
+ 
+@@ -1701,7 +1702,7 @@ static int acct_stack_growth(struct vm_a
+  * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
+  * vma is the last one with address > vma->vm_end.  Have to extend vma.
+  */
+-int expand_upwards(struct vm_area_struct *vma, unsigned long address)
++int expand_upwards(struct vm_area_struct *vma, unsigned long address, unsigned long gap)
+ {
+ 	int error;
+ 
+@@ -1720,14 +1721,7 @@ int expand_upwards(struct vm_area_struct
+ 	 * vma->vm_start/vm_end cannot change under us because the caller
+ 	 * is required to hold the mmap_sem in read mode.  We need the
+ 	 * anon_vma lock to serialize against concurrent expand_stacks.
+-	 * Also guard against wrapping around to address 0.
+ 	 */
+-	if (address < PAGE_ALIGN(address+4))
+-		address = PAGE_ALIGN(address+4);
+-	else {
+-		vma_unlock_anon_vma(vma);
+-		return -ENOMEM;
+-	}
+ 	error = 0;
+ 
+ 	/* Somebody else might have raced and expanded it already */
+@@ -1739,7 +1733,7 @@ int expand_upwards(struct vm_area_struct
+ 
+ 		error = -ENOMEM;
+ 		if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
+-			error = acct_stack_growth(vma, size, grow);
++			error = acct_stack_growth(vma, size, grow, gap);
+ 			if (!error) {
+ 				vma->vm_end = address;
+ 				perf_event_mmap(vma);
+@@ -1756,7 +1750,7 @@ int expand_upwards(struct vm_area_struct
+  * vma is the first one with address < vma->vm_start.  Have to extend vma.
+  */
+ int expand_downwards(struct vm_area_struct *vma,
+-				   unsigned long address)
++				   unsigned long address, unsigned long gap)
+ {
+ 	int error;
+ 
+@@ -1789,7 +1783,7 @@ int expand_downwards(struct vm_area_stru
+ 
+ 		error = -ENOMEM;
+ 		if (grow <= vma->vm_pgoff) {
+-			error = acct_stack_growth(vma, size, grow);
++			error = acct_stack_growth(vma, size, grow, gap);
+ 			if (!error) {
+ 				vma->vm_start = address;
+ 				vma->vm_pgoff -= grow;
+@@ -1802,29 +1796,72 @@ int expand_downwards(struct vm_area_stru
+ 	return error;
+ }
+ 
++/* enforced gap between the expanding stack and other mappings. */
++unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
++
+ /*
+  * Note how expand_stack() refuses to expand the stack all the way to
+  * abut the next virtual mapping, *unless* that mapping itself is also
+- * a stack mapping. We want to leave room for a guard page, after all
++ * a stack mapping. We want to leave room for a guard area, after all
+  * (the guard page itself is not added here, that is done by the
+  * actual page faulting logic)
+- *
+- * This matches the behavior of the guard page logic (see mm/memory.c:
+- * check_stack_guard_page()), which only allows the guard page to be
+- * removed under these circumstances.
+  */
+ #ifdef CONFIG_STACK_GROWSUP
++unsigned long expandable_stack_area(struct vm_area_struct *vma,
++		unsigned long address, unsigned long *gap)
++{
++	struct vm_area_struct *next = vma->vm_next;
++	unsigned long guard_gap = stack_guard_gap;
++	unsigned long guard_addr;
++
++	address = ALIGN(address, PAGE_SIZE);;
++	if (!next)
++		goto out;
++
++	if (next->vm_flags & VM_GROWSUP) {
++		guard_gap = min(guard_gap, next->vm_start - address);
++		goto out;
++	}
++
++	if (next->vm_start - address < guard_gap)
++		return -ENOMEM;
++out:
++	if (TASK_SIZE - address < guard_gap)
++		guard_gap = TASK_SIZE - address;
++	guard_addr = address + guard_gap;
++	*gap = guard_gap;
++
++	return guard_addr;
++}
++
+ int expand_stack(struct vm_area_struct *vma, unsigned long address)
+ {
++	unsigned long gap;
++
++	address = expandable_stack_area(vma, address, &gap);
++	if (IS_ERR_VALUE(address))
++		return -ENOMEM;
++	return expand_upwards(vma, address, gap);
++}
++
++int stack_guard_area(struct vm_area_struct *vma, unsigned long address)
++{
+ 	struct vm_area_struct *next;
+ 
+-	address &= PAGE_MASK;
++	if (!(vma->vm_flags & VM_GROWSUP))
++		return 0;
++
++	/*
++	 * strictly speaking there is a guard gap between disjoint stacks
++	 * but the gap is not canonical (it might be smaller) and it is
++	 * reasonably safe to assume that we can ignore that gap for stack
++	 * POPULATE or /proc/<pid>[s]maps purposes
++	 */
+ 	next = vma->vm_next;
+-	if (next && next->vm_start == address + PAGE_SIZE) {
+-		if (!(next->vm_flags & VM_GROWSUP))
+-			return -ENOMEM;
+-	}
+-	return expand_upwards(vma, address);
++	if (next && next->vm_flags & VM_GROWSUP)
++		return 0;
++
++	return vma->vm_end - address <= stack_guard_gap;
+ }
+ 
+ struct vm_area_struct *
+@@ -1844,17 +1881,73 @@ find_extend_vma(struct mm_struct *mm, un
+ 	return prev;
+ }
+ #else
++unsigned long expandable_stack_area(struct vm_area_struct *vma,
++		unsigned long address, unsigned long *gap)
++{
++	struct vm_area_struct *prev = vma->vm_prev;
++	unsigned long guard_gap = stack_guard_gap;
++	unsigned long guard_addr;
++
++	address &= PAGE_MASK;
++	if (!prev)
++		goto out;
++
++	/*
++	 * Is there a mapping abutting this one below?
++	 *
++	 * That's only ok if it's the same stack mapping
++	 * that has gotten split or there is sufficient gap
++	 * between mappings
++	 */
++	if (prev->vm_flags & VM_GROWSDOWN) {
++		guard_gap = min(guard_gap, address - prev->vm_end);
++		goto out;
++	}
++
++	if (address - prev->vm_end < guard_gap)
++		return -ENOMEM;
++
++out:
++	/* make sure we won't underflow */
++	if (address < mmap_min_addr)
++		return -ENOMEM;
++	if (address - mmap_min_addr < guard_gap)
++		guard_gap = address - mmap_min_addr;
++
++	guard_addr = address - guard_gap;
++	*gap = guard_gap;
++
++	return guard_addr;
++}
++
+ int expand_stack(struct vm_area_struct *vma, unsigned long address)
+ {
++	unsigned long gap;
++
++	address = expandable_stack_area(vma, address, &gap);
++	if (IS_ERR_VALUE(address))
++		return -ENOMEM;
++	return expand_downwards(vma, address, gap);
++}
++
++int stack_guard_area(struct vm_area_struct *vma, unsigned long address)
++{
+ 	struct vm_area_struct *prev;
+ 
+-	address &= PAGE_MASK;
++	if (!(vma->vm_flags & VM_GROWSDOWN))
++		return 0;
++
++	/*
++	 * strictly speaking there is a guard gap between disjoint stacks
++	 * but the gap is not canonical (it might be smaller) and it is
++	 * reasonably safe to assume that we can ignore that gap for stack
++	 * POPULATE or /proc/<pid>[s]maps purposes
++	 */
+ 	prev = vma->vm_prev;
+-	if (prev && prev->vm_end == address) {
+-		if (!(prev->vm_flags & VM_GROWSDOWN))
+-			return -ENOMEM;
+-	}
+-	return expand_downwards(vma, address);
++	if (prev && prev->vm_flags & VM_GROWSDOWN)
++		return 0;
++
++	return address - vma->vm_start < stack_guard_gap;
+ }
+ 
+ struct vm_area_struct *
diff --git a/debian/patches/bugfix/all/mm-proc-cap-the-stack-gap-for-unpopulated-growing-vm.patch b/debian/patches/bugfix/all/mm-proc-cap-the-stack-gap-for-unpopulated-growing-vm.patch
new file mode 100644
index 0000000..046eda2
--- /dev/null
+++ b/debian/patches/bugfix/all/mm-proc-cap-the-stack-gap-for-unpopulated-growing-vm.patch
@@ -0,0 +1,45 @@
+From: Michal Hocko <mhocko at suse.com>
+Date: Wed, 14 Jun 2017 08:17:15 +0200
+Subject: mm, proc: cap the stack gap for unpopulated growing vmas
+Bug-Debian: https://security-tracker.debian.org/tracker/CVE-2017-1000364
+
+Oleg has noticed that show_map_vma has been overly eager to cut the
+the vma range for growing VMAs. This wasn't a big deal with 4kB stack
+gap but now that the gap is much larger we can simply get a bogus VMA
+range in show_map_vma.
+To quote Oleg
+: On ppc PAGE_SIZE == 64K, so stack_guard_gap == 16M, the application does
+: mmap(..., length=4M, ... MAP_GROWSDOWN) and /proc/pid/maps happily reports
+:
+:       30001000000-30000400000 rw-p 00000000 00:00 0
+
+Let's cap the reported range and show an empty range for this peculiar
+case which is what we have been doing for a long time.  Note that the
+range will expand as soon as the first page fault happens on this range.
+
+Reported-by: Jan Stancek <jstancek at redhat.com>
+Signed-off-by: Michal Hocko <mhocko at suse.com>
+---
+ fs/proc/task_mmu.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
+index 02859e5a8cfc..d37ddc215907 100644
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -234,13 +234,13 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, bool *has_gap)
+ 	end = vma->vm_end;
+ 	if (vma->vm_flags & VM_GROWSDOWN) {
+ 		if (stack_guard_area(vma, start)) {
+-			start += stack_guard_gap;
++			start = min(end, start + stack_guard_gap);
+ 			if (has_gap)
+ 				*has_gap = true;
+ 		}
+ 	} else if (vma->vm_flags & VM_GROWSUP) {
+ 		if (stack_guard_area(vma, end)) {
+-			end -= stack_guard_gap;
++			end = max(start, end - stack_guard_gap);
+ 			if (has_gap)
+ 				*has_gap = true;
+ 		}
diff --git a/debian/patches/series b/debian/patches/series
index a1c18c3..46a35ed 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1110,6 +1110,11 @@ bugfix/all/timer-restrict-timer_stats-to-initial-pid-namespace.patch
 bugfix/all/ipv6-xfrm-handle-errors-reported-by-xfrm6_find_1stfr.patch
 features/all/net-add-kfree_skb_list.patch
 bugfix/all/ipv6-fix-leak-in-ipv6_gso_segment.patch
+bugfix/all/mm-do-not-grow-the-stack-vma-just-because-of-an-over.patch
+bugfix/all/mm-enlarge-stack-guard-gap.patch
+bugfix/all/mm-allow-to-configure-stack-gap-size.patch
+bugfix/all/mm-proc-cap-the-stack-gap-for-unpopulated-growing-vm.patch
+bugfix/all/mm-do-not-collapse-stack-gap-into-thp.patch
 
 # ABI maintenance
 debian/perf-hide-abi-change-in-3.2.30.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/kernel/linux.git



More information about the Kernel-svn-changes mailing list