[kernel] r17228 - in dists/squeeze/linux-2.6/debian: . patches/features/all/xen

Bastian Blank waldi at alioth.debian.org
Mon Apr 18 10:25:59 UTC 2011


Author: waldi
Date: Mon Apr 18 10:25:53 2011
New Revision: 17228

Log:
* debian/changelog: Update.
* debian/patches/features/all/xen/vmalloc-eagerly-clear-ptes-on-vunmap.patch:
  Replace patch with real one.

Modified:
   dists/squeeze/linux-2.6/debian/changelog
   dists/squeeze/linux-2.6/debian/patches/features/all/xen/vmalloc-eagerly-clear-ptes-on-vunmap.patch

Modified: dists/squeeze/linux-2.6/debian/changelog
==============================================================================
--- dists/squeeze/linux-2.6/debian/changelog	Mon Apr 18 02:33:27 2011	(r17227)
+++ dists/squeeze/linux-2.6/debian/changelog	Mon Apr 18 10:25:53 2011	(r17228)
@@ -12,6 +12,7 @@
   [ Bastian Blank ]
   * [x86] Revert "x86: Cleanup highmap after brk is concluded"
     (closes: #621072)
+  * [xen] Remove lazy vunmap completely. (closes: #613634)
 
  -- Ben Hutchings <ben at decadent.org.uk>  Fri, 08 Apr 2011 01:13:01 +0100
 

Modified: dists/squeeze/linux-2.6/debian/patches/features/all/xen/vmalloc-eagerly-clear-ptes-on-vunmap.patch
==============================================================================
--- dists/squeeze/linux-2.6/debian/patches/features/all/xen/vmalloc-eagerly-clear-ptes-on-vunmap.patch	Mon Apr 18 02:33:27 2011	(r17227)
+++ dists/squeeze/linux-2.6/debian/patches/features/all/xen/vmalloc-eagerly-clear-ptes-on-vunmap.patch	Mon Apr 18 10:25:53 2011	(r17228)
@@ -1,31 +1,110 @@
-From a8bf92faf72dba6e0dd2130256cb81c3e68f672b Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Mon, 29 Nov 2010 11:35:04 -0800
-Subject: [PATCH] vmalloc: eagerly clear ptes on vunmap
-
-When unmapping a region in the vmalloc space, clear the ptes immediately.
-There's no point in deferring this because there's no amortization
-benefit.
-
-The TLBs are left dirty, and they are flushed lazily to amortize the
-cost of the IPIs.
-
-This specific motivation for this patch is a regression since 2.6.36 when
-using NFS under Xen, triggered by the NFS client's use of vm_map_ram()
-introduced in 56e4ebf877b6043c289bda32a5a7385b80c17dee.  XFS also uses
-vm_map_ram() and could cause similar problems.
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Cc: Nick Piggin <npiggin at kernel.dk>
----
- mm/vmalloc.c |    8 +++++---
- 1 files changed, 5 insertions(+), 3 deletions(-)
-
+commit ae333e97552c81ab10395ad1ffc6d6daaadb144a
+Author: Jeremy Fitzhardinge <jeremy at goop.org>
+Date:   Thu Dec 2 14:31:18 2010 -0800
+
+    vmalloc: eagerly clear ptes on vunmap
+    
+    [ Backport from 64141da587241301ce8638cc945f8b67853156ec ]
+    
+    On stock 2.6.37-rc4, running:
+    
+      # mount lilith:/export /mnt/lilith
+      # find  /mnt/lilith/ -type f -print0 | xargs -0 file
+    
+    crashes the machine fairly quickly under Xen.  Often it results in oops
+    messages, but the couple of times I tried just now, it just hung quietly
+    and made Xen print some rude messages:
+    
+        (XEN) mm.c:2389:d80 Bad type (saw 7400000000000001 != exp
+        3000000000000000) for mfn 1d7058 (pfn 18fa7)
+        (XEN) mm.c:964:d80 Attempt to create linear p.t. with write perms
+        (XEN) mm.c:2389:d80 Bad type (saw 7400000000000010 != exp
+        1000000000000000) for mfn 1d2e04 (pfn 1d1fb)
+        (XEN) mm.c:2965:d80 Error while pinning mfn 1d2e04
+    
+    Which means the domain tried to map a pagetable page RW, which would
+    allow it to map arbitrary memory, so Xen stopped it.  This is because
+    vm_unmap_ram() left some pages mapped in the vmalloc area after NFS had
+    finished with them, and those pages got recycled as pagetable pages
+    while still having these RW aliases.
+    
+    Removing those mappings immediately removes the Xen-visible aliases, and
+    so it has no problem with those pages being reused as pagetable pages.
+    Deferring the TLB flush doesn't upset Xen because it can flush the TLB
+    itself as needed to maintain its invariants.
+    
+    When unmapping a region in the vmalloc space, clear the ptes
+    immediately.  There's no point in deferring this because there's no
+    amortization benefit.
+    
+    The TLBs are left dirty, and they are flushed lazily to amortize the
+    cost of the IPIs.
+    
+    This specific motivation for this patch is an oops-causing regression
+    since 2.6.36 when using NFS under Xen, triggered by the NFS client's use
+    of vm_map_ram() introduced in 56e4ebf877b60 ("NFS: readdir with vmapped
+    pages") .  XFS also uses vm_map_ram() and could cause similar problems.
+    
+    Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+    Cc: Nick Piggin <npiggin at kernel.dk>
+    Cc: Bryan Schumaker <bjschuma at netapp.com>
+    Cc: Trond Myklebust <Trond.Myklebust at netapp.com>
+    Cc: Alex Elder <aelder at sgi.com>
+    Cc: Dave Chinner <david at fromorbit.com>
+    Cc: Christoph Hellwig <hch at lst.de>
+    Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
+    Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
+
+diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
+index fa36ab8..204e3ba 100644
+--- a/arch/x86/xen/mmu.c
++++ b/arch/x86/xen/mmu.c
+@@ -2430,8 +2430,6 @@ void __init xen_init_mmu_ops(void)
+ 	x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
+ 	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
+ 	pv_mmu_ops = xen_mmu_ops;
+-
+-	vmap_lazy_unmap = false;
+ }
+ 
+ /* Protected by xen_reservation_lock. */
+diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
+index 1a2ba21..3c123c3 100644
+--- a/include/linux/vmalloc.h
++++ b/include/linux/vmalloc.h
+@@ -7,8 +7,6 @@
+ 
+ struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
+ 
+-extern bool vmap_lazy_unmap;
+-
+ /* bits in flags of vmalloc's vm_struct below */
+ #define VM_IOREMAP	0x00000001	/* ioremap() and friends */
+ #define VM_ALLOC	0x00000002	/* vmalloc() */
 diff --git a/mm/vmalloc.c b/mm/vmalloc.c
-index 0f551a4..370915c 100644
+index 4f701c2..80cbd7b 100644
 --- a/mm/vmalloc.c
 +++ b/mm/vmalloc.c
-@@ -547,7 +547,6 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
+@@ -31,8 +31,6 @@
+ #include <asm/tlbflush.h>
+ #include <asm/shmparam.h>
+ 
+-bool vmap_lazy_unmap __read_mostly = true;
+-
+ /*** Page table manipulation functions ***/
+ 
+ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
+@@ -503,9 +501,6 @@ static unsigned long lazy_max_pages(void)
+ {
+ 	unsigned int log;
+ 
+-	if (!vmap_lazy_unmap)
+-		return 0;
+-
+ 	log = fls(num_online_cpus());
+ 
+ 	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
+@@ -566,7 +561,6 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
  			if (va->va_end > *end)
  				*end = va->va_end;
  			nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
@@ -33,28 +112,57 @@
  			list_add_tail(&va->purge_list, &valist);
  			va->flags |= VM_LAZY_FREEING;
  			va->flags &= ~VM_LAZY_FREE;
-@@ -599,6 +598,8 @@ static void purge_vmap_area_lazy(void)
+@@ -612,10 +606,11 @@ static void purge_vmap_area_lazy(void)
+ }
+ 
+ /*
+- * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
+- * called for the correct range previously.
++ * Free a vmap area, caller ensuring that the area has been unmapped
++ * and flush_cache_vunmap had been called for the correct range
++ * previously.
   */
- static void free_unmap_vmap_area_noflush(struct vmap_area *va)
+-static void free_unmap_vmap_area_noflush(struct vmap_area *va)
++static void free_vmap_area_noflush(struct vmap_area *va)
  {
-+	unmap_vmap_area(va);
-+
  	va->flags |= VM_LAZY_FREE;
  	atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
- 	if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
-@@ -869,8 +870,10 @@ static void vb_free(const void *addr, unsigned long size)
- 		BUG_ON(vb->free || !list_empty(&vb->free_list));
- 		spin_unlock(&vb->lock);
- 		free_vmap_block(vb);
--	} else
-+	} else {
- 		spin_unlock(&vb->lock);
-+		vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
-+	}
+@@ -624,6 +619,16 @@ static void free_unmap_vmap_area_noflush(struct vmap_area *va)
+ }
+ 
+ /*
++ * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
++ * called for the correct range previously.
++ */
++static void free_unmap_vmap_area_noflush(struct vmap_area *va)
++{
++	unmap_vmap_area(va);
++	free_vmap_area_noflush(va);
++}
++
++/*
+  * Free and unmap a vmap area
+  */
+ static void free_unmap_vmap_area(struct vmap_area *va)
+@@ -799,7 +804,7 @@ static void free_vmap_block(struct vmap_block *vb)
+ 	spin_unlock(&vmap_block_tree_lock);
+ 	BUG_ON(tmp != vb);
+ 
+-	free_unmap_vmap_area_noflush(vb->va);
++	free_vmap_area_noflush(vb->va);
+ 	call_rcu(&vb->rcu_head, rcu_free_vb);
  }
  
- /**
-@@ -913,7 +916,6 @@ void vm_unmap_aliases(void)
+@@ -936,6 +941,8 @@ static void vb_free(const void *addr, unsigned long size)
+ 	rcu_read_unlock();
+ 	BUG_ON(!vb);
+ 
++	vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
++
+ 	spin_lock(&vb->lock);
+ 	BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
+ 
+@@ -988,7 +995,6 @@ void vm_unmap_aliases(void)
  
  				s = vb->va->va_start + (i << PAGE_SHIFT);
  				e = vb->va->va_start + (j << PAGE_SHIFT);
@@ -62,6 +170,3 @@
  				flush = 1;
  
  				if (s < start)
--- 
-1.7.4.1
-



More information about the Kernel-svn-changes mailing list