[kernel] r16044 - in dists/sid/linux-2.6/debian: . config/featureset-xen patches/features/all/xen

Bastian Blank waldi at alioth.debian.org
Mon Aug 2 14:46:35 UTC 2010


Author: waldi
Date: Mon Aug  2 14:46:31 2010
New Revision: 16044

Log:
* debian/changelog: Update.
* debian/patches/features/all/xen/pvops.patch: Update patch to 78b55f90e723.
* debian/patches/features/all/xen/pvops-update.patch: Fix to apply.
* debian/config/featureset-xen/defines: Ignore all ABI changes.

Modified:
   dists/sid/linux-2.6/debian/changelog
   dists/sid/linux-2.6/debian/config/featureset-xen/defines
   dists/sid/linux-2.6/debian/patches/features/all/xen/pvops-update.patch
   dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch

Modified: dists/sid/linux-2.6/debian/changelog
==============================================================================
--- dists/sid/linux-2.6/debian/changelog	Mon Aug  2 12:16:57 2010	(r16043)
+++ dists/sid/linux-2.6/debian/changelog	Mon Aug  2 14:46:31 2010	(r16044)
@@ -25,6 +25,10 @@
   [ dann frazier ]
   * [ia64] Fix crash when gcore reads gate area (Closes: #588574)
 
+  [ Bastian Blank ]
+  * Update Xen patch.
+    - Ignore ABI changes.
+
  -- Ben Hutchings <ben at decadent.org.uk>  Sat, 24 Jul 2010 00:41:51 +0100
 
 linux-2.6 (2.6.32-18) unstable; urgency=low

Modified: dists/sid/linux-2.6/debian/config/featureset-xen/defines
==============================================================================
--- dists/sid/linux-2.6/debian/config/featureset-xen/defines	Mon Aug  2 12:16:57 2010	(r16043)
+++ dists/sid/linux-2.6/debian/config/featureset-xen/defines	Mon Aug  2 14:46:31 2010	(r16044)
@@ -1,3 +1,6 @@
+[abi]
+ignore-changes: *
+
 [description]
 parts: xen
 part-long-xen: This kernel also runs on a Xen hypervisor.

Modified: dists/sid/linux-2.6/debian/patches/features/all/xen/pvops-update.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/xen/pvops-update.patch	Mon Aug  2 12:16:57 2010	(r16043)
+++ dists/sid/linux-2.6/debian/patches/features/all/xen/pvops-update.patch	Mon Aug  2 14:46:31 2010	(r16044)
@@ -1,16 +1,27 @@
 diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index 70636d0..88262bb 100644
+index 99831c7..1930f64 100644
 --- a/drivers/xen/netback/xenbus.c
 +++ b/drivers/xen/netback/xenbus.c
-@@ -163,7 +163,6 @@ fail:
+@@ -162,17 +162,11 @@ fail:
+  */
  static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
  {
- 	struct backend_info *be = dev_get_drvdata(&xdev->dev);
--	struct xen_netif *netif = be->netif;
+-	struct backend_info *be;
+-	struct xen_netif *netif;
++	struct backend_info *be = dev_get_drvdata(&xdev->dev);
  	char *val;
  
  	DPRINTK("netback_uevent");
-@@ -182,7 +181,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
+ 
+-	be = dev_get_drvdata(&xdev->dev);
+-	if (!be)
+-		return 0;
+-	netif = be->netif;
+-
+ 	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+ 	if (IS_ERR(val)) {
+ 		int err = PTR_ERR(val);
+@@ -187,7 +181,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
  		kfree(val);
  	}
  

Modified: dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch	Mon Aug  2 12:16:57 2010	(r16043)
+++ dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch	Mon Aug  2 14:46:31 2010	(r16044)
@@ -1,6 +1,35 @@
-Patch based on commit f6fe6583b77a49b569eef1b66c3d761eec2e561b of
+Patch based on commit 78b55f90e72348e231092dbe3e50ac7414b9e1af of
 git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git.
 
+diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
+index 5f6aa11..3e30e60 100644
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -113,6 +113,7 @@ parameter is applicable:
+ 			More X86-64 boot options can be found in
+ 			Documentation/x86/x86_64/boot-options.txt .
+ 	X86	Either 32bit or 64bit x86 (same as X86-32+X86-64)
++	XEN	Xen support is enabled
+ 
+ In addition, the following text indicates that the option:
+ 
+@@ -2760,6 +2761,16 @@ and is between 256 and 4096 characters. It is defined in the file
+ 	xd=		[HW,XT] Original XT pre-IDE (RLL encoded) disks.
+ 	xd_geo=		See header of drivers/block/xd.c.
+ 
++	xen_emul_unplug=		[HW,X86,XEN]
++			Unplug Xen emulated devices
++			Format: [unplug0,][unplug1]
++			ide-disks -- unplug primary master IDE devices
++			aux-ide-disks -- unplug non-primary-master IDE devices
++			nics -- unplug network devices
++			all -- unplug all emulated devices (NICs and IDE disks)
++			ignore -- continue loading the Xen platform PCI driver even
++				if the version check failed
++
+ 	xirc2ps_cs=	[NET,PCMCIA]
+ 			Format:
+ 			<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
 diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
 index 29a6ff8..81f9b94 100644
 --- a/Documentation/x86/x86_64/boot-options.txt
@@ -178,6 +207,332 @@
  static inline void detect_calgary(void) { return; }
  #endif
  
+diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
+index ee1931b..5af5051 100644
+--- a/arch/x86/include/asm/cmpxchg_32.h
++++ b/arch/x86/include/asm/cmpxchg_32.h
+@@ -34,12 +34,12 @@ static inline void __set_64bit(unsigned long long *ptr,
+ 			       unsigned int low, unsigned int high)
+ {
+ 	asm volatile("\n1:\t"
+-		     "movl (%0), %%eax\n\t"
+-		     "movl 4(%0), %%edx\n\t"
+-		     LOCK_PREFIX "cmpxchg8b (%0)\n\t"
++		     "movl (%1), %%eax\n\t"
++		     "movl 4(%1), %%edx\n\t"
++		     LOCK_PREFIX "cmpxchg8b %0\n\t"
+ 		     "jnz 1b"
+-		     : /* no outputs */
+-		     : "D"(ptr),
++		     : "=m"(*ptr)
++		     : "D" (ptr),
+ 		       "b"(low),
+ 		       "c"(high)
+ 		     : "ax", "dx", "memory");
+@@ -82,20 +82,20 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
+ 	switch (size) {
+ 	case 1:
+ 		asm volatile("xchgb %b0,%1"
+-			     : "=q" (x)
+-			     : "m" (*__xg(ptr)), "0" (x)
++			     : "=q" (x), "+m" (*__xg(ptr))
++			     : "0" (x)
+ 			     : "memory");
+ 		break;
+ 	case 2:
+ 		asm volatile("xchgw %w0,%1"
+-			     : "=r" (x)
+-			     : "m" (*__xg(ptr)), "0" (x)
++			     : "=r" (x), "+m" (*__xg(ptr))
++			     : "0" (x)
+ 			     : "memory");
+ 		break;
+ 	case 4:
+ 		asm volatile("xchgl %0,%1"
+-			     : "=r" (x)
+-			     : "m" (*__xg(ptr)), "0" (x)
++			     : "=r" (x), "+m" (*__xg(ptr))
++			     : "0" (x)
+ 			     : "memory");
+ 		break;
+ 	}
+@@ -139,21 +139,21 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ 	unsigned long prev;
+ 	switch (size) {
+ 	case 1:
+-		asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
+-			     : "=a"(prev)
+-			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile(LOCK_PREFIX "cmpxchgb %b2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "q"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 2:
+-		asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile(LOCK_PREFIX "cmpxchgw %w2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 4:
+-		asm volatile(LOCK_PREFIX "cmpxchgl %1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile(LOCK_PREFIX "cmpxchgl %2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	}
+@@ -172,21 +172,21 @@ static inline unsigned long __sync_cmpxchg(volatile void *ptr,
+ 	unsigned long prev;
+ 	switch (size) {
+ 	case 1:
+-		asm volatile("lock; cmpxchgb %b1,%2"
+-			     : "=a"(prev)
+-			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("lock; cmpxchgb %b2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "q"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 2:
+-		asm volatile("lock; cmpxchgw %w1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("lock; cmpxchgw %w2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 4:
+-		asm volatile("lock; cmpxchgl %1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("lock; cmpxchgl %2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	}
+@@ -200,21 +200,21 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
+ 	unsigned long prev;
+ 	switch (size) {
+ 	case 1:
+-		asm volatile("cmpxchgb %b1,%2"
+-			     : "=a"(prev)
+-			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("cmpxchgb %b2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "q"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 2:
+-		asm volatile("cmpxchgw %w1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("cmpxchgw %w2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 4:
+-		asm volatile("cmpxchgl %1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("cmpxchgl %2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	}
+@@ -226,11 +226,10 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr,
+ 					     unsigned long long new)
+ {
+ 	unsigned long long prev;
+-	asm volatile(LOCK_PREFIX "cmpxchg8b %3"
+-		     : "=A"(prev)
++	asm volatile(LOCK_PREFIX "cmpxchg8b %1"
++		     : "=A"(prev), "+m" (*__xg(ptr))
+ 		     : "b"((unsigned long)new),
+ 		       "c"((unsigned long)(new >> 32)),
+-		       "m"(*__xg(ptr)),
+ 		       "0"(old)
+ 		     : "memory");
+ 	return prev;
+@@ -241,11 +240,10 @@ static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
+ 						   unsigned long long new)
+ {
+ 	unsigned long long prev;
+-	asm volatile("cmpxchg8b %3"
+-		     : "=A"(prev)
++	asm volatile("cmpxchg8b %1"
++		     : "=A"(prev), "+m"(*__xg(ptr))
+ 		     : "b"((unsigned long)new),
+ 		       "c"((unsigned long)(new >> 32)),
+-		       "m"(*__xg(ptr)),
+ 		       "0"(old)
+ 		     : "memory");
+ 	return prev;
+diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
+index 52de72e..1871cb0 100644
+--- a/arch/x86/include/asm/cmpxchg_64.h
++++ b/arch/x86/include/asm/cmpxchg_64.h
+@@ -26,26 +26,26 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
+ 	switch (size) {
+ 	case 1:
+ 		asm volatile("xchgb %b0,%1"
+-			     : "=q" (x)
+-			     : "m" (*__xg(ptr)), "0" (x)
++			     : "=q" (x), "+m" (*__xg(ptr))
++			     : "0" (x)
+ 			     : "memory");
+ 		break;
+ 	case 2:
+ 		asm volatile("xchgw %w0,%1"
+-			     : "=r" (x)
+-			     : "m" (*__xg(ptr)), "0" (x)
++			     : "=r" (x), "+m" (*__xg(ptr))
++			     : "0" (x)
+ 			     : "memory");
+ 		break;
+ 	case 4:
+ 		asm volatile("xchgl %k0,%1"
+-			     : "=r" (x)
+-			     : "m" (*__xg(ptr)), "0" (x)
++			     : "=r" (x), "+m" (*__xg(ptr))
++			     : "0" (x)
+ 			     : "memory");
+ 		break;
+ 	case 8:
+ 		asm volatile("xchgq %0,%1"
+-			     : "=r" (x)
+-			     : "m" (*__xg(ptr)), "0" (x)
++			     : "=r" (x), "+m" (*__xg(ptr))
++			     : "0" (x)
+ 			     : "memory");
+ 		break;
+ 	}
+@@ -66,27 +66,27 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ 	unsigned long prev;
+ 	switch (size) {
+ 	case 1:
+-		asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
+-			     : "=a"(prev)
+-			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile(LOCK_PREFIX "cmpxchgb %b2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "q"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 2:
+-		asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile(LOCK_PREFIX "cmpxchgw %w2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 4:
+-		asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile(LOCK_PREFIX "cmpxchgl %k2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 8:
+-		asm volatile(LOCK_PREFIX "cmpxchgq %1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile(LOCK_PREFIX "cmpxchgq %2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	}
+@@ -105,21 +105,27 @@ static inline unsigned long __sync_cmpxchg(volatile void *ptr,
+ 	unsigned long prev;
+ 	switch (size) {
+ 	case 1:
+-		asm volatile("lock; cmpxchgb %b1,%2"
+-			     : "=a"(prev)
+-			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("lock; cmpxchgb %b2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "q"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 2:
+-		asm volatile("lock; cmpxchgw %w1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("lock; cmpxchgw %w2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 4:
+-		asm volatile("lock; cmpxchgl %1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("lock; cmpxchgl %k2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
++			     : "memory");
++		return prev;
++	case 8:
++		asm volatile("lock; cmpxchgq %2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	}
+@@ -133,27 +139,27 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
+ 	unsigned long prev;
+ 	switch (size) {
+ 	case 1:
+-		asm volatile("cmpxchgb %b1,%2"
+-			     : "=a"(prev)
+-			     : "q"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("cmpxchgb %b2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "q"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 2:
+-		asm volatile("cmpxchgw %w1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("cmpxchgw %w2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 4:
+-		asm volatile("cmpxchgl %k1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("cmpxchgl %k2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	case 8:
+-		asm volatile("cmpxchgq %1,%2"
+-			     : "=a"(prev)
+-			     : "r"(new), "m"(*__xg(ptr)), "0"(old)
++		asm volatile("cmpxchgq %2,%1"
++			     : "=a"(prev), "+m"(*__xg(ptr))
++			     : "r"(new), "0"(old)
+ 			     : "memory");
+ 		return prev;
+ 	}
 diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
 index 6a25d5d..ac91eed 100644
 --- a/arch/x86/include/asm/dma-mapping.h
@@ -258,10 +613,10 @@
  #define hpet_readl(a) 0
  
 diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
-index 439a9ac..4cfd4de 100644
+index 439a9ac..bf88684 100644
 --- a/arch/x86/include/asm/hugetlb.h
 +++ b/arch/x86/include/asm/hugetlb.h
-@@ -36,16 +36,24 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+@@ -36,16 +36,28 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
  	free_pgd_range(tlb, addr, end, floor, ceiling);
  }
  
@@ -274,7 +629,11 @@
  				   pte_t *ptep, pte_t pte)
  {
 -	set_pte_at(mm, addr, ptep, pte);
-+	set_pmd((pmd_t *)ptep, __pmd(pte_val(pte)));
++#if PAGETABLE_LEVELS >= 3
++	set_pmd((pmd_t *)ptep, native_make_pmd(native_pte_val(pte)));
++#else
++	set_pgd((pgd_t *)ptep, native_make_pgd(native_pte_val(pte)));
++#endif
  }
  
  static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
@@ -288,7 +647,7 @@
  }
  
  static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
-@@ -66,19 +74,25 @@ static inline pte_t huge_pte_wrprotect(pte_t pte)
+@@ -66,19 +78,25 @@ static inline pte_t huge_pte_wrprotect(pte_t pte)
  static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
  					   unsigned long addr, pte_t *ptep)
  {
@@ -386,10 +745,23 @@
  extern int force_iommu, no_iommu;
  extern int iommu_detected;
 diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
-index 6e90a04..451a45b 100644
+index 6e90a04..ba4dc7b 100644
 --- a/arch/x86/include/asm/irq_vectors.h
 +++ b/arch/x86/include/asm/irq_vectors.h
-@@ -157,6 +157,14 @@ static inline int invalid_vm86_irq(int irq)
+@@ -120,6 +120,12 @@
+  */
+ #define MCE_SELF_VECTOR			0xeb
+ 
++#ifdef CONFIG_XEN
++/* Xen vector callback to receive events in a HVM domain */
++#define XEN_HVM_EVTCHN_CALLBACK		0xe9
++#endif
++
++
+ /*
+  * First APIC vector available to drivers: (vectors 0x30-0xee) we
+  * start at 0x31(0x41) to spread out vectors evenly between priority
+@@ -157,6 +163,14 @@ static inline int invalid_vm86_irq(int irq)
  #define CPU_VECTOR_LIMIT		(  8 * NR_CPUS      )
  #define IO_APIC_VECTOR_LIMIT		( 32 * MAX_IO_APICS )
  
@@ -404,7 +776,7 @@
  #ifdef CONFIG_X86_IO_APIC
  # ifdef CONFIG_SPARSE_IRQ
  #  define NR_IRQS					\
-@@ -165,13 +173,13 @@ static inline int invalid_vm86_irq(int irq)
+@@ -165,13 +179,13 @@ static inline int invalid_vm86_irq(int irq)
  		(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
  # else
  #  if NR_CPUS < MAX_IO_APICS
@@ -454,7 +826,7 @@
  
  #ifdef CONFIG_SMP
 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
-index efb3899..63a55bc 100644
+index efb3899..e571db4 100644
 --- a/arch/x86/include/asm/paravirt.h
 +++ b/arch/x86/include/asm/paravirt.h
 @@ -330,11 +330,18 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
@@ -476,6 +848,67 @@
  /* The paravirtualized I/O functions */
  static inline void slow_down_io(void)
  {
+@@ -770,15 +777,28 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
+ #define PV_RESTORE_REGS "popl %edx; popl %ecx;"
+ 
+ /* save and restore all caller-save registers, except return value */
+-#define PV_SAVE_ALL_CALLER_REGS		"pushl %ecx;"
+-#define PV_RESTORE_ALL_CALLER_REGS	"popl  %ecx;"
++#define __PV_SAVE_ALL_CALLER_REGS	"pushl %ecx;"
++#define __PV_RESTORE_ALL_CALLER_REGS	"popl  %ecx;"
++
++#ifdef CONFIG_FRAME_POINTER
++#define PV_SAVE_ALL_CALLER_REGS			\
++	"push %ebp;"				\
++	"mov %esp, %ebp;"			\
++	__PV_SAVE_ALL_CALLER_REGS
++#define PV_RESTORE_ALL_CALLER_REGS		\
++	__PV_RESTORE_ALL_CALLER_REGS		\
++	"leave;"
++#else
++#define PV_SAVE_ALL_CALLER_REGS		__PV_SAVE_ALL_CALLER_REGS
++#define PV_RESTORE_ALL_CALLER_REGS	__PV_RESTORE_ALL_CALLER_REGS
++#endif
+ 
+ #define PV_FLAGS_ARG "0"
+ #define PV_EXTRA_CLOBBERS
+ #define PV_VEXTRA_CLOBBERS
+ #else
+ /* save and restore all caller-save registers, except return value */
+-#define PV_SAVE_ALL_CALLER_REGS						\
++#define __PV_SAVE_ALL_CALLER_REGS					\
+ 	"push %rcx;"							\
+ 	"push %rdx;"							\
+ 	"push %rsi;"							\
+@@ -787,7 +807,7 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
+ 	"push %r9;"							\
+ 	"push %r10;"							\
+ 	"push %r11;"
+-#define PV_RESTORE_ALL_CALLER_REGS					\
++#define __PV_RESTORE_ALL_CALLER_REGS					\
+ 	"pop %r11;"							\
+ 	"pop %r10;"							\
+ 	"pop %r9;"							\
+@@ -797,6 +817,19 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
+ 	"pop %rdx;"							\
+ 	"pop %rcx;"
+ 
++#ifdef CONFIG_FRAME_POINTER
++#define PV_SAVE_ALL_CALLER_REGS			\
++	"push %rbp;"				\
++	"mov %rsp, %rbp;"			\
++	__PV_SAVE_ALL_CALLER_REGS
++#define PV_RESTORE_ALL_CALLER_REGS		\
++	__PV_RESTORE_ALL_CALLER_REGS		\
++	"leaveq;"
++#else
++#define PV_SAVE_ALL_CALLER_REGS		__PV_SAVE_ALL_CALLER_REGS
++#define PV_RESTORE_ALL_CALLER_REGS	__PV_RESTORE_ALL_CALLER_REGS
++#endif
++
+ /* We save some registers, but all of them, that's too much. We clobber all
+  * caller saved registers but the argument parameter */
+ #define PV_SAVE_REGS "pushq %%rdi;"
 diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
 index 9357473..3202dcc 100644
 --- a/arch/x86/include/asm/paravirt_types.h
@@ -822,10 +1255,10 @@
  
  static inline void
 diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
-index d5b7e90..8d5e15a 100644
+index d5b7e90..396ff4c 100644
 --- a/arch/x86/include/asm/xen/hypervisor.h
 +++ b/arch/x86/include/asm/xen/hypervisor.h
-@@ -37,31 +37,10 @@
+@@ -37,31 +37,4 @@
  extern struct shared_info *HYPERVISOR_shared_info;
  extern struct start_info *xen_start_info;
  
@@ -835,14 +1268,12 @@
 -	XEN_HVM_DOMAIN,		/* running in a Xen hvm domain */
 -};
 -
- #ifdef CONFIG_XEN
+-#ifdef CONFIG_XEN
 -extern enum xen_domain_type xen_domain_type;
-+extern void xen_guest_init(void);
- #else
+-#else
 -#define xen_domain_type		XEN_NATIVE
-+#define xen_guest_init() do { } while (0)
- #endif
- 
+-#endif
+-
 -#define xen_domain()		(xen_domain_type != XEN_NATIVE)
 -#define xen_pv_domain()		(xen_domain() &&			\
 -				 xen_domain_type == XEN_PV_DOMAIN)
@@ -1260,7 +1691,7 @@
  #include "sleep.h"
  
 diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
-index c0ebc63..c8b5021 100644
+index f0fa7a1..0c1876b 100644
 --- a/arch/x86/kernel/amd_iommu.c
 +++ b/arch/x86/kernel/amd_iommu.c
 @@ -928,7 +928,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
@@ -1462,7 +1893,7 @@
  	} else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
  		   force_iommu ||
 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
-index dc4f486..dfb14f9 100644
+index dc4f486..7c954ff 100644
 --- a/arch/x86/kernel/apic/io_apic.c
 +++ b/arch/x86/kernel/apic/io_apic.c
 @@ -63,7 +63,12 @@
@@ -1514,7 +1945,7 @@
  	if (type == PCI_CAP_ID_MSI && nvec > 1)
  		return 1;
  
-+	if (xen_domain())
++	if (xen_pv_domain())
 +		return xen_pci_setup_msi_irqs(dev, nvec, type);
 +
  	node = dev_to_node(&dev->dev);
@@ -1887,8 +2318,35 @@
  #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
  
  static void kdump_nmi_callback(int cpu, struct die_args *args)
+diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
+index c097e7d..21feb03 100644
+--- a/arch/x86/kernel/entry_32.S
++++ b/arch/x86/kernel/entry_32.S
+@@ -1088,6 +1088,8 @@ ENTRY(xen_failsafe_callback)
+ .previous
+ ENDPROC(xen_failsafe_callback)
+ 
++BUILD_INTERRUPT(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK)
++
+ #endif	/* CONFIG_XEN */
+ 
+ #ifdef CONFIG_FUNCTION_TRACER
+diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
+index b5c061f..1bf0911 100644
+--- a/arch/x86/kernel/entry_64.S
++++ b/arch/x86/kernel/entry_64.S
+@@ -1364,6 +1364,9 @@ ENTRY(xen_failsafe_callback)
+ 	CFI_ENDPROC
+ END(xen_failsafe_callback)
+ 
++apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
++	xen_hvm_callback_vector smp_xen_hvm_callback_vector
++
+ #endif /* CONFIG_XEN */
+ 
+ /*
 diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
-index 74f5a3f..b69c4e8 100644
+index 74f5a3f..9712ffc 100644
 --- a/arch/x86/kernel/hpet.c
 +++ b/arch/x86/kernel/hpet.c
 @@ -98,7 +98,7 @@ static int __init hpet_setup(char *str)
@@ -1900,6 +2358,33 @@
  {
  	boot_hpet_disable = 1;
  	return 1;
+@@ -949,16 +949,18 @@ fs_initcall(hpet_late_init);
+ 
+ void hpet_disable(void)
+ {
+-	if (is_hpet_capable()) {
+-		unsigned long cfg = hpet_readl(HPET_CFG);
++	unsigned int cfg;
+ 
+-		if (hpet_legacy_int_enabled) {
+-			cfg &= ~HPET_CFG_LEGACY;
+-			hpet_legacy_int_enabled = 0;
+-		}
+-		cfg &= ~HPET_CFG_ENABLE;
+-		hpet_writel(cfg, HPET_CFG);
++	if (!is_hpet_capable() || !hpet_address || !hpet_virt_address)
++		return;
++
++	cfg = hpet_readl(HPET_CFG);
++	if (hpet_legacy_int_enabled) {
++		cfg &= ~HPET_CFG_LEGACY;
++		hpet_legacy_int_enabled = 0;
+ 	}
++	cfg &= ~HPET_CFG_ENABLE;
++	hpet_writel(cfg, HPET_CFG);
+ }
+ 
+ #ifdef CONFIG_HPET_EMULATE_RTC
 diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
 index 99c4d30..919c1a8 100644
 --- a/arch/x86/kernel/ioport.c
@@ -2717,7 +3202,7 @@
  	}
  }
 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index d0ba107..0b4f9d1 100644
+index 5fd5b07..11d8667 100644
 --- a/arch/x86/kernel/process.c
 +++ b/arch/x86/kernel/process.c
 @@ -73,16 +73,12 @@ void exit_thread(void)
@@ -2787,10 +3272,18 @@
  }
  
 diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
-index 8425f7e..abd6489 100644
+index d7a0888..594e324 100644
 --- a/arch/x86/kernel/setup.c
 +++ b/arch/x86/kernel/setup.c
-@@ -89,6 +89,7 @@
+@@ -70,6 +70,7 @@
+ #include <linux/tboot.h>
+ 
+ #include <video/edid.h>
++#include <xen/xen.h>
+ 
+ #include <asm/mtrr.h>
+ #include <asm/apic.h>
+@@ -89,6 +90,7 @@
  #include <asm/cacheflush.h>
  #include <asm/processor.h>
  #include <asm/bugs.h>
@@ -2798,15 +3291,7 @@
  
  #include <asm/system.h>
  #include <asm/vsyscall.h>
-@@ -102,6 +103,7 @@
- 
- #include <asm/paravirt.h>
- #include <asm/hypervisor.h>
-+#include <asm/xen/hypervisor.h>
- 
- #include <asm/percpu.h>
- #include <asm/topology.h>
-@@ -955,6 +957,9 @@ void __init setup_arch(char **cmdline_p)
+@@ -966,6 +968,9 @@ void __init setup_arch(char **cmdline_p)
  
  	initmem_init(0, max_pfn);
  
@@ -2816,11 +3301,11 @@
  #ifdef CONFIG_ACPI_SLEEP
  	/*
  	 * Reserve low memory region for sleep support.
-@@ -1023,6 +1028,7 @@ void __init setup_arch(char **cmdline_p)
+@@ -1034,6 +1039,7 @@ void __init setup_arch(char **cmdline_p)
  	probe_nr_irqs_gsi();
  
  	kvm_guest_init();
-+	xen_guest_init();
++	xen_hvm_guest_init();
  
  	e820_reserve_resources();
  	e820_mark_nosave_regions(max_low_pfn);
@@ -3282,10 +3767,21 @@
 +}
 +
 diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
-index b83e119..7675f9b 100644
+index b83e119..3db328f 100644
 --- a/arch/x86/xen/Kconfig
 +++ b/arch/x86/xen/Kconfig
-@@ -36,3 +36,40 @@ config XEN_DEBUG_FS
+@@ -29,6 +29,10 @@ config XEN_SAVE_RESTORE
+        depends on XEN && PM
+        default y
+ 
++config XEN_SCHED_CLOCK
++       bool
++       default n
++
+ config XEN_DEBUG_FS
+ 	bool "Enable Xen debug and tuning parameters in debugfs"
+ 	depends on XEN && DEBUG_FS
+@@ -36,3 +40,40 @@ config XEN_DEBUG_FS
  	help
  	  Enable statistics output and various tuning options in debugfs.
  	  Enabling this option may incur a significant performance overhead.
@@ -3327,10 +3823,16 @@
 +         Enable support for passing PCI devices through to
 +	 unprivileged domains. (COMPLETELY UNTESTED)
 diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
-index 3bb4fc2..08ac224 100644
+index 3bb4fc2..13ca65c 100644
 --- a/arch/x86/xen/Makefile
 +++ b/arch/x86/xen/Makefile
-@@ -17,4 +17,7 @@ obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
+@@ -12,9 +12,12 @@ CFLAGS_mmu.o			:= $(nostackp)
+ 
+ obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
+ 			time.o xen-asm.o xen-asm_$(BITS).o \
+-			grant-table.o suspend.o
++			grant-table.o suspend.o platform-pci-unplug.o
+ 
  obj-$(CONFIG_SMP)		+= smp.o
  obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
  obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
@@ -3380,10 +3882,18 @@
 +#endif
 +}
 diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
-index 3578688..7638cd6 100644
+index 3578688..b20e9c5 100644
 --- a/arch/x86/xen/enlighten.c
 +++ b/arch/x86/xen/enlighten.c
-@@ -28,12 +28,19 @@
+@@ -11,6 +11,7 @@
+  * Jeremy Fitzhardinge <jeremy at xensource.com>, XenSource Inc, 2007
+  */
+ 
++#include <linux/cpu.h>
+ #include <linux/kernel.h>
+ #include <linux/init.h>
+ #include <linux/smp.h>
+@@ -28,12 +29,15 @@
  #include <linux/highmem.h>
  #include <linux/console.h>
  
@@ -3393,17 +3903,21 @@
  #include <xen/interface/physdev.h>
  #include <xen/interface/vcpu.h>
 +#include <xen/interface/memory.h>
-+#include <xen/interface/hvm/hvm_op.h>
-+#include <xen/interface/hvm/params.h>
-+#include <xen/interface/platform_pci.h>
  #include <xen/features.h>
  #include <xen/page.h>
 +#include <xen/hvm.h>
-+#include <xen/events.h>
  #include <xen/hvc-console.h>
  
  #include <asm/paravirt.h>
-@@ -66,6 +73,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
+@@ -53,6 +57,7 @@
+ #include <asm/tlbflush.h>
+ #include <asm/reboot.h>
+ #include <asm/stackprotector.h>
++#include <asm/hypervisor.h>
+ 
+ #include "xen-ops.h"
+ #include "mmu.h"
+@@ -66,6 +71,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
  enum xen_domain_type xen_domain_type = XEN_NATIVE;
  EXPORT_SYMBOL_GPL(xen_domain_type);
  
@@ -3415,17 +3929,48 @@
  struct start_info *xen_start_info;
  EXPORT_SYMBOL_GPL(xen_start_info);
  
-@@ -73,6 +85,9 @@ struct shared_info xen_dummy_shared_info;
+@@ -73,6 +83,9 @@ struct shared_info xen_dummy_shared_info;
  
  void *xen_initial_gdt;
  
-+int xen_have_vector_callback;
-+int unplug;
++__read_mostly int xen_have_vector_callback;
++EXPORT_SYMBOL_GPL(xen_have_vector_callback);
 +
  /*
   * Point at some empty memory to start with. We map the real shared_info
   * page as soon as fixmap is up and running.
-@@ -167,13 +182,16 @@ static void __init xen_banner(void)
+@@ -101,13 +114,17 @@ static void xen_vcpu_setup(int cpu)
+ 	struct vcpu_info *vcpup;
+ 
+ 	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
+-	per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ 
+-	if (!have_vcpu_info_placement)
+-		return;		/* already tested, not available */
++	if (cpu < MAX_VIRT_CPUS)
++		per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ 
+-	vcpup = &per_cpu(xen_vcpu_info, cpu);
++	if (!have_vcpu_info_placement) {
++		if (cpu >= MAX_VIRT_CPUS && setup_max_cpus > MAX_VIRT_CPUS)
++			setup_max_cpus = MAX_VIRT_CPUS;
++		return;
++	}
+ 
++	vcpup = &per_cpu(xen_vcpu_info, cpu);
+ 	info.mfn = arbitrary_virt_to_mfn(vcpup);
+ 	info.offset = offset_in_page(vcpup);
+ 
+@@ -122,6 +139,8 @@ static void xen_vcpu_setup(int cpu)
+ 	if (err) {
+ 		printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
+ 		have_vcpu_info_placement = 0;
++		if (setup_max_cpus > MAX_VIRT_CPUS)
++			setup_max_cpus = MAX_VIRT_CPUS;
+ 	} else {
+ 		/* This cpu is using the registered vcpu info, even if
+ 		   later ones fail to. */
+@@ -167,13 +186,16 @@ static void __init xen_banner(void)
  
  	printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
  	       pv_info.name);
@@ -3444,7 +3989,7 @@
  
  static void xen_cpuid(unsigned int *ax, unsigned int *bx,
  		      unsigned int *cx, unsigned int *dx)
-@@ -187,7 +205,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
+@@ -187,7 +209,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
  	 * unsupported kernel subsystems as possible.
  	 */
  	switch (*ax) {
@@ -3453,7 +3998,7 @@
  		maskecx = cpuid_leaf1_ecx_mask;
  		maskedx = cpuid_leaf1_edx_mask;
  		break;
-@@ -196,6 +214,10 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
+@@ -196,6 +218,10 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
  		/* Suppress extended topology stuff */
  		maskebx = 0;
  		break;
@@ -3464,7 +4009,7 @@
  	}
  
  	asm(XEN_EMULATE_PREFIX "cpuid"
-@@ -215,13 +237,15 @@ static __init void xen_init_cpuid_mask(void)
+@@ -215,13 +241,15 @@ static __init void xen_init_cpuid_mask(void)
  	unsigned int ax, bx, cx, dx;
  
  	cpuid_leaf1_edx_mask =
@@ -3484,7 +4029,7 @@
  			  (1 << X86_FEATURE_ACPI));  /* disable ACPI */
  
  	ax = 1;
-@@ -406,7 +430,7 @@ static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
+@@ -406,7 +434,7 @@ static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
  
  		pte = pfn_pte(pfn, PAGE_KERNEL_RO);
  
@@ -3493,7 +4038,7 @@
  			BUG();
  
  		frames[f] = mfn;
-@@ -517,13 +541,13 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
+@@ -517,13 +545,13 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
  		return 0;
  #ifdef CONFIG_X86_MCE
  	} else if (addr == (unsigned long)machine_check) {
@@ -3513,7 +4058,7 @@
  #endif	/* CONFIG_X86_64 */
  	info->address = addr;
  
-@@ -679,6 +703,18 @@ static void xen_set_iopl_mask(unsigned mask)
+@@ -679,6 +707,18 @@ static void xen_set_iopl_mask(unsigned mask)
  	HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
  }
  
@@ -3532,7 +4077,7 @@
  static void xen_io_delay(void)
  {
  }
-@@ -716,7 +752,7 @@ static u32 xen_safe_apic_wait_icr_idle(void)
+@@ -716,7 +756,7 @@ static u32 xen_safe_apic_wait_icr_idle(void)
          return 0;
  }
  
@@ -3541,7 +4086,7 @@
  {
  	apic->read = xen_apic_read;
  	apic->write = xen_apic_write;
-@@ -728,7 +764,6 @@ static void set_xen_basic_apic_ops(void)
+@@ -728,7 +768,6 @@ static void set_xen_basic_apic_ops(void)
  
  #endif
  
@@ -3549,7 +4094,7 @@
  static void xen_clts(void)
  {
  	struct multicall_space mcs;
-@@ -811,6 +846,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+@@ -811,6 +850,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
  		   Xen console noise. */
  		break;
  
@@ -3561,6 +4106,17 @@
  	default:
  		ret = native_write_msr_safe(msr, low, high);
  	}
+@@ -923,10 +967,6 @@ static const struct pv_init_ops xen_init_ops __initdata = {
+ 	.patch = xen_patch,
+ };
+ 
+-static const struct pv_time_ops xen_time_ops __initdata = {
+-	.sched_clock = xen_sched_clock,
+-};
+-
+ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
+ 	.cpuid = xen_cpuid,
+ 
 @@ -978,6 +1018,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
  	.load_sp0 = xen_load_sp0,
  
@@ -3593,7 +4149,7 @@
  	.shutdown = xen_machine_halt,
  	.crash_shutdown = xen_crash_shutdown,
  	.emergency_restart = xen_emergency_restart,
-@@ -1061,6 +1110,8 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1061,10 +1110,11 @@ asmlinkage void __init xen_start_kernel(void)
  
  	xen_domain_type = XEN_PV_DOMAIN;
  
@@ -3602,20 +4158,26 @@
  	/* Install Xen paravirt ops */
  	pv_info = xen_info;
  	pv_init_ops = xen_init_ops;
-@@ -1086,6 +1137,12 @@ asmlinkage void __init xen_start_kernel(void)
+-	pv_time_ops = xen_time_ops;
+ 	pv_cpu_ops = xen_cpu_ops;
+ 	pv_apic_ops = xen_apic_ops;
+ 
+@@ -1072,13 +1122,7 @@ asmlinkage void __init xen_start_kernel(void)
+ 	x86_init.oem.arch_setup = xen_arch_setup;
+ 	x86_init.oem.banner = xen_banner;
+ 
+-	x86_init.timers.timer_init = xen_time_init;
+-	x86_init.timers.setup_percpu_clockev = x86_init_noop;
+-	x86_cpuinit.setup_percpu_clockev = x86_init_noop;
+-
+-	x86_platform.calibrate_tsc = xen_tsc_khz;
+-	x86_platform.get_wallclock = xen_get_wallclock;
+-	x86_platform.set_wallclock = xen_set_wallclock;
++	xen_init_time_ops();
  
- 	xen_init_mmu_ops();
- 
-+	/*
-+	 * Prevent page tables from being allocated in highmem, even
-+	 * if CONFIG_HIGHPTE is enabled.
-+	 */
-+	__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
-+
- 	/* Prevent unwanted bits from being set in PTEs. */
- 	__supported_pte_mask &= ~_PAGE_GLOBAL;
- 	if (!xen_initial_domain())
-@@ -1116,6 +1173,10 @@ asmlinkage void __init xen_start_kernel(void)
+ 	/*
+ 	 * Set up some pagetable state before starting to set any ptes.
+@@ -1116,6 +1160,10 @@ asmlinkage void __init xen_start_kernel(void)
  	 */
  	xen_setup_stackprotector();
  
@@ -3626,7 +4188,7 @@
  	xen_init_irq_ops();
  	xen_init_cpuid_mask();
  
-@@ -1144,6 +1205,8 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1144,6 +1192,8 @@ asmlinkage void __init xen_start_kernel(void)
  
  	pgd = (pgd_t *)xen_start_info->pt_base;
  
@@ -3635,7 +4197,7 @@
  	/* Don't do the full vcpu_info placement stuff until we have a
  	   possible map and a non-dummy shared_info. */
  	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
-@@ -1153,6 +1216,7 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1153,6 +1203,7 @@ asmlinkage void __init xen_start_kernel(void)
  
  	xen_raw_console_write("mapping kernel into physical memory\n");
  	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
@@ -3643,7 +4205,7 @@
  
  	init_mm.pgd = pgd;
  
-@@ -1162,6 +1226,14 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1162,6 +1213,14 @@ asmlinkage void __init xen_start_kernel(void)
  	if (xen_feature(XENFEAT_supervisor_mode_kernel))
  		pv_info.kernel_rpl = 0;
  
@@ -3658,7 +4220,7 @@
  	/* set the limit of our address space */
  	xen_reserve_top();
  
-@@ -1184,6 +1256,16 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1184,6 +1243,16 @@ asmlinkage void __init xen_start_kernel(void)
  		add_preferred_console("xenboot", 0, NULL);
  		add_preferred_console("tty", 0, NULL);
  		add_preferred_console("hvc", 0, NULL);
@@ -3675,7 +4237,7 @@
  	}
  
  	xen_raw_console_write("about to get started...\n");
-@@ -1197,3 +1279,141 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1197,3 +1266,124 @@ asmlinkage void __init xen_start_kernel(void)
  	x86_64_start_reservations((char *)__pa_symbol(&boot_params));
  #endif
  }
@@ -3687,9 +4249,9 @@
 +
 +	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
 +		cpuid(base, &eax, &ebx, &ecx, &edx);
-+		*(uint32_t*)(signature + 0) = ebx;
-+		*(uint32_t*)(signature + 4) = ecx;
-+		*(uint32_t*)(signature + 8) = edx;
++		*(uint32_t *)(signature + 0) = ebx;
++		*(uint32_t *)(signature + 4) = ecx;
++		*(uint32_t *)(signature + 8) = edx;
 +		signature[12] = 0;
 +
 +		if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2))
@@ -3729,8 +4291,9 @@
 +	return 0;
 +}
 +
-+static void init_shared_info(void)
++void xen_hvm_init_shared_info(void)
 +{
++	int cpu;
 +	struct xen_add_to_physmap xatp;
 +	static struct shared_info *shared_info_page = 0;
 +
@@ -3745,31 +4308,41 @@
 +
 +	HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
 +
-+	/* Don't do the full vcpu_info placement stuff until we have a
-+	   possible map and a non-dummy shared_info. */
-+	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
++	/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
++	 * page, we use it in the event channel upcall and in some pvclock
++	 * related functions. We don't need the vcpu_info placement
++	 * optimizations because we don't use any pv_mmu or pv_irq op on
++	 * HVM.
++	 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
++	 * online but xen_hvm_init_shared_info is run at resume time too and
++	 * in that case multiple vcpus might be online. */
++	for_each_online_cpu(cpu) {
++		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
++	}
 +}
 +
-+int xen_set_callback_via(uint64_t via)
++static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
++				    unsigned long action, void *hcpu)
 +{
-+	struct xen_hvm_param a;
-+
-+	a.domid = DOMID_SELF;
-+	a.index = HVM_PARAM_CALLBACK_IRQ;
-+	a.value = via;
-+	return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
++	int cpu = (long)hcpu;
++	switch (action) {
++	case CPU_UP_PREPARE:
++		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
++		break;
++	default:
++		break;
++	}
++	return NOTIFY_OK;
 +}
 +
-+void do_hvm_pv_evtchn_intr(void)
-+{
-+	xen_hvm_evtchn_do_upcall(get_irq_regs());
-+}
++static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = {
++	.notifier_call	= xen_hvm_cpu_notify,
++};
 +
-+void xen_guest_init(void)
++void __init xen_hvm_guest_init(void)
 +{
 +	int r;
 +	int major, minor;
-+	uint64_t callback_via;
 +
 +	if (xen_pv_domain())
 +		return;
@@ -3778,47 +4351,19 @@
 +	if (r < 0)
 +		return;
 +
-+	init_shared_info();
++	xen_hvm_init_shared_info();
 +
-+	if (xen_feature(XENFEAT_hvm_callback_vector)) {
-+		callback_via = HVM_CALLBACK_VECTOR(GENERIC_INTERRUPT_VECTOR);
-+		xen_set_callback_via(callback_via);
-+		generic_interrupt_extension = do_hvm_pv_evtchn_intr;
++	if (xen_feature(XENFEAT_hvm_callback_vector))
 +		xen_have_vector_callback = 1;
-+	}
-+	if (unplug) {
-+		/* unplug emulated devices */
-+		outw(UNPLUG_ALL, XEN_IOPORT_UNPLUG);
-+	}
++	register_cpu_notifier(&xen_hvm_cpu_notifier);
++	xen_unplug_emulated_devices();
 +	have_vcpu_info_placement = 0;
 +	x86_init.irqs.intr_init = xen_init_IRQ;
++	xen_hvm_init_time_ops();
++	xen_hvm_init_mmu_ops();
 +}
-+
-+static int __init parse_unplug(char *arg)
-+{
-+	char *p, *q;
-+
-+	for (p = arg; p; p = q) {
-+		q = strchr(arg, ',');
-+		if (q)
-+			*q++ = '\0';
-+		if (!strcmp(p, "all"))
-+			unplug |= UNPLUG_ALL;
-+		else if (!strcmp(p, "ide-disks"))
-+			unplug |= UNPLUG_ALL_IDE_DISKS;
-+		else if (!strcmp(p, "aux-ide-disks"))
-+			unplug |= UNPLUG_AUX_IDE_DISKS;
-+		else if (!strcmp(p, "nics"))
-+			unplug |= UNPLUG_ALL_NICS;
-+		else
-+			printk(KERN_WARNING "unrecognised option '%s' "
-+				 "in module parameter 'dev_unplug'\n", p);
-+	}
-+	return 0;
-+}
-+early_param("xen_unplug", parse_unplug);
 diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
-index 350a3de..8c6a858 100644
+index 350a3de..74e284f 100644
 --- a/arch/x86/xen/mmu.c
 +++ b/arch/x86/xen/mmu.c
 @@ -42,6 +42,7 @@
@@ -3829,7 +4374,7 @@
  #include <linux/module.h>
  
  #include <asm/pgtable.h>
-@@ -50,7 +51,10 @@
+@@ -50,14 +51,19 @@
  #include <asm/mmu_context.h>
  #include <asm/setup.h>
  #include <asm/paravirt.h>
@@ -3840,15 +4385,16 @@
  
  #include <asm/xen/hypercall.h>
  #include <asm/xen/hypervisor.h>
-@@ -58,6 +62,7 @@
+ 
  #include <xen/page.h>
  #include <xen/interface/xen.h>
++#include <xen/interface/hvm/hvm_op.h>
  #include <xen/interface/version.h>
 +#include <xen/interface/memory.h>
  #include <xen/hvc-console.h>
  
  #include "multicalls.h"
-@@ -66,6 +71,13 @@
+@@ -66,6 +72,13 @@
  
  #define MMU_UPDATE_HISTO	30
  
@@ -3862,7 +4408,7 @@
  #ifdef CONFIG_XEN_DEBUG_FS
  
  static struct {
-@@ -184,6 +196,26 @@ static inline unsigned p2m_index(unsigned long pfn)
+@@ -184,6 +197,26 @@ static inline unsigned p2m_index(unsigned long pfn)
  	return pfn % P2M_ENTRIES_PER_PAGE;
  }
  
@@ -3889,7 +4435,7 @@
  /* Build the parallel p2m_top_mfn structures */
  void xen_build_mfn_list_list(void)
  {
-@@ -315,6 +347,7 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr)
+@@ -315,6 +348,7 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr)
  
  	return PFN_DOWN(maddr.maddr);
  }
@@ -3897,7 +4443,7 @@
  
  xmaddr_t arbitrary_virt_to_machine(void *vaddr)
  {
-@@ -376,6 +409,34 @@ static bool xen_page_pinned(void *ptr)
+@@ -376,6 +410,34 @@ static bool xen_page_pinned(void *ptr)
  	return PagePinned(page);
  }
  
@@ -3932,7 +4478,7 @@
  static void xen_extend_mmu_update(const struct mmu_update *update)
  {
  	struct multicall_space mcs;
-@@ -452,6 +513,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
+@@ -452,6 +514,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
  void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
  		    pte_t *ptep, pte_t pteval)
  {
@@ -3944,7 +4490,7 @@
  	ADD_STATS(set_pte_at, 1);
  //	ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
  	ADD_STATS(set_pte_at_current, mm == current->mm);
-@@ -522,9 +588,34 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
+@@ -522,9 +589,34 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
  	return val;
  }
  
@@ -3980,7 +4526,7 @@
  }
  PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
  
-@@ -534,9 +625,62 @@ pgdval_t xen_pgd_val(pgd_t pgd)
+@@ -534,9 +626,62 @@ pgdval_t xen_pgd_val(pgd_t pgd)
  }
  PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
  
@@ -4044,7 +4590,7 @@
  	return native_make_pte(pte);
  }
  PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
-@@ -592,6 +736,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)
+@@ -592,6 +737,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)
  
  void xen_set_pte(pte_t *ptep, pte_t pte)
  {
@@ -4056,7 +4602,7 @@
  	ADD_STATS(pte_update, 1);
  //	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
  	ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-@@ -608,6 +757,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
+@@ -608,6 +758,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
  #ifdef CONFIG_X86_PAE
  void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
  {
@@ -4068,7 +4614,7 @@
  	set_64bit((u64 *)ptep, native_pte_val(pte));
  }
  
-@@ -934,8 +1088,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
+@@ -934,8 +1089,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
     read-only, and can be pinned. */
  static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
  {
@@ -4077,7 +4623,7 @@
  	xen_mc_batch();
  
  	if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
-@@ -1219,7 +1371,7 @@ void xen_exit_mmap(struct mm_struct *mm)
+@@ -1219,7 +1372,7 @@ void xen_exit_mmap(struct mm_struct *mm)
  	spin_lock(&mm->page_table_lock);
  
  	/* pgd may not be pinned in the error exit path of execve */
@@ -4086,7 +4632,7 @@
  		xen_pgd_unpin(mm);
  
  	spin_unlock(&mm->page_table_lock);
-@@ -1288,12 +1440,19 @@ static void xen_flush_tlb_single(unsigned long addr)
+@@ -1288,12 +1441,19 @@ static void xen_flush_tlb_single(unsigned long addr)
  	preempt_enable();
  }
  
@@ -4107,7 +4653,7 @@
  	} *args;
  	struct multicall_space mcs;
  
-@@ -1417,6 +1576,13 @@ static int xen_pgd_alloc(struct mm_struct *mm)
+@@ -1417,6 +1577,13 @@ static int xen_pgd_alloc(struct mm_struct *mm)
  	return ret;
  }
  
@@ -4121,7 +4667,7 @@
  static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
  {
  #ifdef CONFIG_X86_64
-@@ -1448,10 +1614,17 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
+@@ -1448,10 +1615,17 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
  #ifdef CONFIG_X86_32
  static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
  {
@@ -4141,7 +4687,7 @@
  
  	return pte;
  }
-@@ -1517,7 +1690,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
+@@ -1517,7 +1691,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
  	if (PagePinned(virt_to_page(mm->pgd))) {
  		SetPagePinned(page);
  
@@ -4149,7 +4695,7 @@
  		if (!PageHighMem(page)) {
  			make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
  			if (level == PT_PTE && USE_SPLIT_PTLOCKS)
-@@ -1620,6 +1792,7 @@ static void *m2v(phys_addr_t maddr)
+@@ -1620,6 +1793,7 @@ static void *m2v(phys_addr_t maddr)
  	return __ka(m2p(maddr));
  }
  
@@ -4157,7 +4703,7 @@
  static void set_page_prot(void *addr, pgprot_t prot)
  {
  	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
-@@ -1675,6 +1848,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+@@ -1675,6 +1849,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
  	set_page_prot(pmd, PAGE_KERNEL_RO);
  }
  
@@ -4178,7 +4724,7 @@
  #ifdef CONFIG_X86_64
  static void convert_pfn_mfn(void *v)
  {
-@@ -1766,6 +1953,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+@@ -1766,6 +1954,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
  					 unsigned long max_pfn)
  {
  	pmd_t *kernel_pmd;
@@ -4186,7 +4732,7 @@
  
  	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
  				  xen_start_info->nr_pt_frames * PAGE_SIZE +
-@@ -1777,6 +1965,20 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+@@ -1777,6 +1966,20 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
  	xen_map_identity_early(level2_kernel_pgt, max_pfn);
  
  	memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
@@ -4207,7 +4753,7 @@
  	set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
  			__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
  
-@@ -1799,6 +2001,8 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+@@ -1799,6 +2002,8 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
  }
  #endif	/* CONFIG_X86_64 */
  
@@ -4216,7 +4762,7 @@
  static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  {
  	pte_t pte;
-@@ -1828,9 +2032,26 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
+@@ -1828,9 +2033,26 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  		pte = pfn_pte(phys, prot);
  		break;
  
@@ -4244,7 +4790,7 @@
  	}
  
  	__native_set_fixmap(idx, pte);
-@@ -1845,6 +2066,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
+@@ -1845,6 +2067,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  #endif
  }
  
@@ -4274,7 +4820,7 @@
  static __init void xen_post_allocator_init(void)
  {
  	pv_mmu_ops.set_pte = xen_set_pte;
-@@ -1960,7 +2204,270 @@ void __init xen_init_mmu_ops(void)
+@@ -1960,6 +2205,301 @@ void __init xen_init_mmu_ops(void)
  	x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
  	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
  	pv_mmu_ops = xen_mmu_ops;
@@ -4540,11 +5086,52 @@
 +	flush_tlb_all();
 +
 +	return err;
- }
++}
 +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
++
++static void xen_hvm_exit_mmap(struct mm_struct *mm)
++{
++	struct xen_hvm_pagetable_dying a;
++	int rc;
++
++	a.domid = DOMID_SELF;
++	a.gpa = __pa(mm->pgd);
++	rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
++	WARN_ON_ONCE(rc < 0);
++}
++
++static int is_pagetable_dying_supported(void)
++{
++	struct xen_hvm_pagetable_dying a;
++	int rc = 0;
++
++	a.domid = DOMID_SELF;
++	a.gpa = 0x00;
++	rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
++	if (rc < 0) {
++		printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
++		return 0;
++	}
++	return 1;
++}
++
++void __init xen_hvm_init_mmu_ops(void)
++{
++	if (is_pagetable_dying_supported())
++		pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
+ }
  
  #ifdef CONFIG_XEN_DEBUG_FS
- 
+diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
+index 5fe6bc7..fa938c4 100644
+--- a/arch/x86/xen/mmu.h
++++ b/arch/x86/xen/mmu.h
+@@ -60,4 +60,5 @@ void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+ unsigned long xen_read_cr2_direct(void);
+ 
+ extern void xen_init_mmu_ops(void);
++extern void xen_hvm_init_mmu_ops(void);
+ #endif	/* _XEN_MMU_H */
 diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
 new file mode 100644
 index 0000000..4d55524
@@ -4605,7 +5192,7 @@
 +}
 diff --git a/arch/x86/xen/pci.c b/arch/x86/xen/pci.c
 new file mode 100644
-index 0000000..3def132
+index 0000000..8ca31f1
 --- /dev/null
 +++ b/arch/x86/xen/pci.c
 @@ -0,0 +1,296 @@
@@ -4634,7 +5221,7 @@
 +	int shareable = 0;
 +	char *name;
 +
-+	if (!xen_domain())
++	if (!xen_pv_domain())
 +		return -1;
 +
 +	if (triggering == ACPI_EDGE_SENSITIVE) {
@@ -4672,7 +5259,7 @@
 +	int rc, irq;
 +	struct physdev_setup_gsi setup_gsi;
 +
-+	if (!xen_domain())
++	if (!xen_pv_domain())
 +		return -1;
 +
 +	printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
@@ -4905,6 +5492,147 @@
 +	return 0;
 +}
 +EXPORT_SYMBOL(xen_unregister_device_domain_owner);
+diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
+new file mode 100644
+index 0000000..2f7f3fb
+--- /dev/null
++++ b/arch/x86/xen/platform-pci-unplug.c
+@@ -0,0 +1,135 @@
++/******************************************************************************
++ * platform-pci-unplug.c
++ *
++ * Xen platform PCI device driver
++ * Copyright (c) 2010, Citrix
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++ * Place - Suite 330, Boston, MA 02111-1307 USA.
++ *
++ */
++
++#include <linux/init.h>
++#include <linux/io.h>
++#include <linux/module.h>
++
++#include <xen/platform_pci.h>
++
++#define XEN_PLATFORM_ERR_MAGIC -1
++#define XEN_PLATFORM_ERR_PROTOCOL -2
++#define XEN_PLATFORM_ERR_BLACKLIST -3
++
++/* store the value of xen_emul_unplug after the unplug is done */
++int xen_platform_pci_unplug;
++EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
++static int xen_emul_unplug;
++
++static int __init check_platform_magic(void)
++{
++	short magic;
++	char protocol;
++
++	magic = inw(XEN_IOPORT_MAGIC);
++	if (magic != XEN_IOPORT_MAGIC_VAL) {
++		printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n");
++		return XEN_PLATFORM_ERR_MAGIC;
++	}
++
++	protocol = inb(XEN_IOPORT_PROTOVER);
++
++	printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n",
++			protocol);
++
++	switch (protocol) {
++	case 1:
++		outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM);
++		outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER);
++		if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) {
++			printk(KERN_ERR "Xen Platform: blacklisted by host\n");
++			return XEN_PLATFORM_ERR_BLACKLIST;
++		}
++		break;
++	default:
++		printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version");
++		return XEN_PLATFORM_ERR_PROTOCOL;
++	}
++
++	return 0;
++}
++
++void __init xen_unplug_emulated_devices(void)
++{
++	int r;
++
++	/* check the version of the xen platform PCI device */
++	r = check_platform_magic();
++	/* If the version matches enable the Xen platform PCI driver.
++	 * Also enable the Xen platform PCI driver if the version is really old
++	 * and the user told us to ignore it. */
++	if (r && !(r == XEN_PLATFORM_ERR_MAGIC &&
++			(xen_emul_unplug & XEN_UNPLUG_IGNORE)))
++		return;
++	/* Set the default value of xen_emul_unplug depending on whether or
++	 * not the Xen PV frontends and the Xen platform PCI driver have
++	 * been compiled for this kernel (modules or built-in are both OK). */
++	if (!xen_emul_unplug) {
++		if (xen_must_unplug_nics()) {
++			printk(KERN_INFO "Netfront and the Xen platform PCI driver have "
++					"been compiled for this kernel: unplug emulated NICs.\n");
++			xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
++		}
++		if (xen_must_unplug_disks()) {
++			printk(KERN_INFO "Blkfront and the Xen platform PCI driver have "
++					"been compiled for this kernel: unplug emulated disks.\n"
++					"You might have to change the root device\n"
++					"from /dev/hd[a-d] to /dev/xvd[a-d]\n"
++					"in your root= kernel command line option\n");
++			xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS;
++		}
++	}
++	/* Now unplug the emulated devices */
++	if (!(xen_emul_unplug & XEN_UNPLUG_IGNORE))
++		outw(xen_emul_unplug, XEN_IOPORT_UNPLUG);
++	xen_platform_pci_unplug = xen_emul_unplug;
++}
++
++static int __init parse_xen_emul_unplug(char *arg)
++{
++	char *p, *q;
++	int l;
++
++	for (p = arg; p; p = q) {
++		q = strchr(p, ',');
++		if (q) {
++			l = q - p;
++			q++;
++		} else {
++			l = strlen(p);
++		}
++		if (!strncmp(p, "all", l))
++			xen_emul_unplug |= XEN_UNPLUG_ALL;
++		else if (!strncmp(p, "ide-disks", l))
++			xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS;
++		else if (!strncmp(p, "aux-ide-disks", l))
++			xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS;
++		else if (!strncmp(p, "nics", l))
++			xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
++		else if (!strncmp(p, "ignore", l))
++			xen_emul_unplug |= XEN_UNPLUG_IGNORE;
++		else
++			printk(KERN_WARNING "unrecognised option '%s' "
++				 "in parameter 'xen_emul_unplug'\n", p);
++	}
++	return 0;
++}
++early_param("xen_emul_unplug", parse_xen_emul_unplug);
 diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
 index ad0047f..f008629 100644
 --- a/arch/x86/xen/setup.c
@@ -5123,13 +5851,84 @@
  	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
  
  	/* make sure interrupts start blocked */
+diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
+index a9c6611..1d789d5 100644
+--- a/arch/x86/xen/suspend.c
++++ b/arch/x86/xen/suspend.c
+@@ -26,6 +26,18 @@ void xen_pre_suspend(void)
+ 		BUG();
+ }
+ 
++void xen_hvm_post_suspend(int suspend_cancelled)
++{
++	int cpu;
++	xen_hvm_init_shared_info();
++	xen_callback_vector();
++	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
++		for_each_online_cpu(cpu) {
++			xen_setup_runstate_info(cpu);
++		}
++	}
++}
++
+ void xen_post_suspend(int suspend_cancelled)
+ {
+ 	xen_build_mfn_list_list();
 diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
-index 9d1f853..af5463a 100644
+index 9d1f853..ca8efdb 100644
 --- a/arch/x86/xen/time.c
 +++ b/arch/x86/xen/time.c
-@@ -239,8 +239,22 @@ unsigned long xen_get_wallclock(void)
+@@ -19,6 +19,7 @@
+ #include <asm/xen/hypercall.h>
+ 
+ #include <xen/events.h>
++#include <xen/features.h>
+ #include <xen/interface/xen.h>
+ #include <xen/interface/vcpu.h>
+ 
+@@ -154,12 +155,13 @@ static void do_stolen_accounting(void)
+ 	account_idle_ticks(ticks);
+ }
+ 
++#ifdef CONFIG_XEN_SCHED_CLOCK
+ /*
+  * Xen sched_clock implementation.  Returns the number of unstolen
+  * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
+  * states.
+  */
+-unsigned long long xen_sched_clock(void)
++static unsigned long long xen_sched_clock(void)
+ {
+ 	struct vcpu_runstate_info state;
+ 	cycle_t now;
+@@ -191,10 +193,10 @@ unsigned long long xen_sched_clock(void)
+ 
+ 	return ret;
+ }
+-
++#endif
+ 
+ /* Get the TSC speed from Xen */
+-unsigned long xen_tsc_khz(void)
++static unsigned long xen_tsc_khz(void)
+ {
+ 	struct pvclock_vcpu_time_info *info =
+ 		&HYPERVISOR_shared_info->vcpu_info[0].time;
+@@ -229,7 +231,7 @@ static void xen_read_wallclock(struct timespec *ts)
+ 	put_cpu_var(xen_vcpu);
+ }
+ 
+-unsigned long xen_get_wallclock(void)
++static unsigned long xen_get_wallclock(void)
+ {
+ 	struct timespec ts;
  
- int xen_set_wallclock(unsigned long now)
+@@ -237,10 +239,24 @@ unsigned long xen_get_wallclock(void)
+ 	return ts.tv_sec;
+ }
+ 
+-int xen_set_wallclock(unsigned long now)
++static int xen_set_wallclock(unsigned long now)
  {
 +	struct xen_platform_op op;
 +	int rc;
@@ -5151,8 +5950,80 @@
  }
  
  static struct clocksource xen_clocksource __read_mostly = {
-diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
-new file mode 100644
+@@ -442,6 +458,8 @@ void xen_setup_timer(int cpu)
+ 
+ 	evt->cpumask = cpumask_of(cpu);
+ 	evt->irq = irq;
++
++	xen_setup_runstate_info(cpu);
+ }
+ 
+ void xen_teardown_timer(int cpu)
+@@ -472,7 +490,7 @@ void xen_timer_resume(void)
+ 	}
+ }
+ 
+-__init void xen_time_init(void)
++static __init void xen_time_init(void)
+ {
+ 	int cpu = smp_processor_id();
+ 
+@@ -496,3 +514,53 @@ __init void xen_time_init(void)
+ 	xen_setup_timer(cpu);
+ 	xen_setup_cpu_clockevents();
+ }
++
++static const struct pv_time_ops xen_time_ops __initdata = {
++#ifdef CONFIG_XEN_SCHED_CLOCK
++       .sched_clock = xen_sched_clock,
++#else
++       .sched_clock = xen_clocksource_read,
++#endif
++};
++
++__init void xen_init_time_ops(void)
++{
++	pv_time_ops = xen_time_ops;
++
++	x86_init.timers.timer_init = xen_time_init;
++	x86_init.timers.setup_percpu_clockev = x86_init_noop;
++	x86_cpuinit.setup_percpu_clockev = x86_init_noop;
++
++	x86_platform.calibrate_tsc = xen_tsc_khz;
++	x86_platform.get_wallclock = xen_get_wallclock;
++	x86_platform.set_wallclock = xen_set_wallclock;
++}
++
++static void xen_hvm_setup_cpu_clockevents(void)
++{
++	int cpu = smp_processor_id();
++	xen_setup_runstate_info(cpu);
++	xen_setup_timer(cpu);
++	xen_setup_cpu_clockevents();
++}
++
++__init void xen_hvm_init_time_ops(void)
++{
++	/* vector callback is needed otherwise we cannot receive interrupts
++	 * on cpu > 0 */
++	if (!xen_have_vector_callback && num_present_cpus() > 1)
++		return;
++	if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
++		printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
++				"disable pv timer\n");
++		return;
++	}
++
++	pv_time_ops = xen_time_ops;
++	x86_init.timers.setup_percpu_clockev = xen_time_init;
++	x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
++
++	x86_platform.calibrate_tsc = xen_tsc_khz;
++	x86_platform.get_wallclock = xen_get_wallclock;
++	x86_platform.set_wallclock = xen_set_wallclock;
++}
+diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
+new file mode 100644
 index 0000000..1cd7f4d
 --- /dev/null
 +++ b/arch/x86/xen/vga.c
@@ -5225,7 +6096,7 @@
 +	}
 +}
 diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
-index f9153a3..1c1eff4 100644
+index f9153a3..03e97f5 100644
 --- a/arch/x86/xen/xen-ops.h
 +++ b/arch/x86/xen/xen-ops.h
 @@ -30,6 +30,9 @@ void xen_setup_machphys_mapping(void);
@@ -5238,7 +6109,32 @@
  
  char * __init xen_memory_setup(void);
  void __init xen_arch_setup(void);
-@@ -82,6 +85,23 @@ static inline void xen_uninit_lock_cpu(int cpu)
+@@ -38,6 +41,10 @@ void xen_enable_sysenter(void);
+ void xen_enable_syscall(void);
+ void xen_vcpu_restore(void);
+ 
++void xen_callback_vector(void);
++void xen_hvm_init_shared_info(void);
++void __init xen_unplug_emulated_devices(void);
++
+ void __init xen_build_dynamic_phys_to_machine(void);
+ 
+ void xen_init_irq_ops(void);
+@@ -46,11 +53,8 @@ void xen_setup_runstate_info(int cpu);
+ void xen_teardown_timer(int cpu);
+ cycle_t xen_clocksource_read(void);
+ void xen_setup_cpu_clockevents(void);
+-unsigned long xen_tsc_khz(void);
+-void __init xen_time_init(void);
+-unsigned long xen_get_wallclock(void);
+-int xen_set_wallclock(unsigned long time);
+-unsigned long long xen_sched_clock(void);
++void __init xen_init_time_ops(void);
++void __init xen_hvm_init_time_ops(void);
+ 
+ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
+ 
+@@ -82,6 +86,23 @@ static inline void xen_uninit_lock_cpu(int cpu)
  }
  #endif
  
@@ -5373,7 +6269,7 @@
  	status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control);
  	if (ACPI_FAILURE(status)) {
 diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
-index ec742a4..4ccecf6 100644
+index ec742a4..492a899 100644
 --- a/drivers/acpi/processor_core.c
 +++ b/drivers/acpi/processor_core.c
 @@ -58,6 +58,7 @@
@@ -5432,7 +6328,23 @@
  {
  
  	if (acpi_device_dir(device)) {
-@@ -711,7 +710,7 @@ static int acpi_processor_get_info(struct acpi_device *device)
+@@ -408,15 +407,6 @@ static int acpi_processor_remove_fs(struct acpi_device *device)
+ 
+ 	return 0;
+ }
+-#else
+-static inline int acpi_processor_add_fs(struct acpi_device *device)
+-{
+-	return 0;
+-}
+-static inline int acpi_processor_remove_fs(struct acpi_device *device)
+-{
+-	return 0;
+-}
+ #endif
+ 
+ /* Use the acpiid in MADT to map cpus in case of SMP */
+@@ -711,7 +701,7 @@ static int acpi_processor_get_info(struct acpi_device *device)
  
  static DEFINE_PER_CPU(void *, processor_device_array);
  
@@ -5441,7 +6353,7 @@
  {
  	struct acpi_processor *pr = acpi_driver_data(device);
  	int saved;
-@@ -879,7 +878,7 @@ err_free_cpumask:
+@@ -879,7 +869,7 @@ err_free_cpumask:
  	return result;
  }
  
@@ -5450,7 +6362,7 @@
  {
  	struct acpi_processor *pr = NULL;
  
-@@ -1154,7 +1153,11 @@ static int __init acpi_processor_init(void)
+@@ -1154,7 +1144,11 @@ static int __init acpi_processor_init(void)
  	if (result < 0)
  		goto out_proc;
  
@@ -5463,7 +6375,7 @@
  	if (result < 0)
  		goto out_cpuidle;
  
-@@ -1190,7 +1193,10 @@ static void __exit acpi_processor_exit(void)
+@@ -1190,7 +1184,10 @@ static void __exit acpi_processor_exit(void)
  
  	acpi_processor_uninstall_hotplug_notify();
  
@@ -6179,7 +7091,7 @@
 +	acpi_bus_unregister_driver(&xen_acpi_processor_driver);
 +}
 diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
-index 7c85265..882ed92 100644
+index 9ed9292..3770a02 100644
 --- a/drivers/acpi/sleep.c
 +++ b/drivers/acpi/sleep.c
 @@ -19,6 +19,8 @@
@@ -6235,10 +7147,10 @@
  	  This driver implements the front-end of the Xen virtual
  	  block device driver.  It communicates with a back-end driver
 diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
-index b8578bb..75f730b 100644
+index b8578bb..89adac5 100644
 --- a/drivers/block/xen-blkfront.c
 +++ b/drivers/block/xen-blkfront.c
-@@ -42,6 +42,7 @@
+@@ -42,10 +42,12 @@
  #include <linux/module.h>
  #include <linux/scatterlist.h>
  
@@ -6246,7 +7158,12 @@
  #include <xen/xenbus.h>
  #include <xen/grant_table.h>
  #include <xen/events.h>
-@@ -76,6 +77,7 @@ static const struct block_device_operations xlvbd_block_fops;
+ #include <xen/page.h>
++#include <xen/platform_pci.h>
+ 
+ #include <xen/interface/grant_table.h>
+ #include <xen/interface/io/blkif.h>
+@@ -76,6 +78,7 @@ static const struct block_device_operations xlvbd_block_fops;
   */
  struct blkfront_info
  {
@@ -6254,7 +7171,7 @@
  	struct xenbus_device *xbdev;
  	struct gendisk *gd;
  	int vdevice;
-@@ -92,16 +94,14 @@ struct blkfront_info
+@@ -92,16 +95,14 @@ struct blkfront_info
  	unsigned long shadow_free;
  	int feature_barrier;
  	int is_ready;
@@ -6275,7 +7192,7 @@
  #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
  	(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
  #define GRANT_INVALID_REF	0
-@@ -136,6 +136,55 @@ static void add_id_to_freelist(struct blkfront_info *info,
+@@ -136,6 +137,55 @@ static void add_id_to_freelist(struct blkfront_info *info,
  	info->shadow_free = id;
  }
  
@@ -6331,7 +7248,7 @@
  static void blkif_restart_queue_callback(void *arg)
  {
  	struct blkfront_info *info = (struct blkfront_info *)arg;
-@@ -416,9 +465,14 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+@@ -416,9 +466,14 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
  	if ((minor % nr_parts) == 0)
  		nr_minors = nr_parts;
  
@@ -6347,7 +7264,7 @@
  
  	offset = minor / nr_parts;
  
-@@ -449,7 +503,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+@@ -449,7 +504,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
  
  	if (xlvbd_init_blk_queue(gd, sector_size)) {
  		del_gendisk(gd);
@@ -6356,7 +7273,7 @@
  	}
  
  	info->rq = gd->queue;
-@@ -469,10 +523,45 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+@@ -469,10 +524,45 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
  
  	return 0;
  
@@ -6402,7 +7319,7 @@
  static void kick_pending_request_queues(struct blkfront_info *info)
  {
  	if (!RING_FULL(&info->ring)) {
-@@ -650,7 +739,7 @@ fail:
+@@ -650,7 +740,7 @@ fail:
  
  
  /* Common code used when first setting up, and when resuming. */
@@ -6411,7 +7328,7 @@
  			   struct blkfront_info *info)
  {
  	const char *message = NULL;
-@@ -710,7 +799,6 @@ again:
+@@ -710,7 +800,6 @@ again:
  	return err;
  }
  
@@ -6419,7 +7336,29 @@
  /**
   * Entry point to this code when a new device is created.  Allocate the basic
   * structures and the ring buffer for communication with the backend, and
-@@ -742,6 +830,7 @@ static int blkfront_probe(struct xenbus_device *dev,
+@@ -736,12 +825,29 @@ static int blkfront_probe(struct xenbus_device *dev,
+ 		}
+ 	}
+ 
++	/* no unplug has been done: do not hook devices != xen vbds */
++	if (xen_hvm_domain() && (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE)) {
++		int major;
++
++		if (!VDEV_IS_EXTENDED(vdevice))
++			major = BLKIF_MAJOR(vdevice);
++		else
++			major = XENVBD_MAJOR;
++
++		if (major != XENVBD_MAJOR) {
++			printk(KERN_INFO
++					"%s: HVM does not support vbd %d as xen block device\n",
++					__FUNCTION__, vdevice);
++			return -ENODEV;
++		}
++	}
+ 	info = kzalloc(sizeof(*info), GFP_KERNEL);
+ 	if (!info) {
+ 		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
  		return -ENOMEM;
  	}
  
@@ -6427,7 +7366,7 @@
  	info->xbdev = dev;
  	info->vdevice = vdevice;
  	info->connected = BLKIF_STATE_DISCONNECTED;
-@@ -755,7 +844,7 @@ static int blkfront_probe(struct xenbus_device *dev,
+@@ -755,7 +861,7 @@ static int blkfront_probe(struct xenbus_device *dev,
  	info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
  	dev_set_drvdata(&dev->dev, info);
  
@@ -6436,7 +7375,7 @@
  	if (err) {
  		kfree(info);
  		dev_set_drvdata(&dev->dev, NULL);
-@@ -850,13 +939,50 @@ static int blkfront_resume(struct xenbus_device *dev)
+@@ -850,13 +956,50 @@ static int blkfront_resume(struct xenbus_device *dev)
  
  	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
  
@@ -6488,7 +7427,7 @@
  
  /*
   * Invoked when the backend is finally 'ready' (and has told produced
-@@ -869,10 +995,29 @@ static void blkfront_connect(struct blkfront_info *info)
+@@ -869,10 +1012,29 @@ static void blkfront_connect(struct blkfront_info *info)
  	unsigned int binfo;
  	int err;
  
@@ -6520,7 +7459,7 @@
  	dev_dbg(&info->xbdev->dev, "%s:%s.\n",
  		__func__, info->xbdev->otherend);
  
-@@ -915,57 +1060,21 @@ static void blkfront_connect(struct blkfront_info *info)
+@@ -915,57 +1077,21 @@ static void blkfront_connect(struct blkfront_info *info)
  }
  
  /**
@@ -6582,7 +7521,7 @@
  	case XenbusStateUnknown:
  	case XenbusStateClosed:
  		break;
-@@ -975,35 +1084,56 @@ static void backend_changed(struct xenbus_device *dev,
+@@ -975,35 +1101,56 @@ static void backend_changed(struct xenbus_device *dev,
  		break;
  
  	case XenbusStateClosing:
@@ -6659,7 +7598,7 @@
  
  	return 0;
  }
-@@ -1012,30 +1142,68 @@ static int blkfront_is_ready(struct xenbus_device *dev)
+@@ -1012,30 +1159,68 @@ static int blkfront_is_ready(struct xenbus_device *dev)
  {
  	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
  
@@ -6742,7 +7681,7 @@
  	return 0;
  }
  
-@@ -1061,7 +1229,7 @@ static struct xenbus_driver blkfront = {
+@@ -1061,7 +1246,7 @@ static struct xenbus_driver blkfront = {
  	.probe = blkfront_probe,
  	.remove = blkfront_remove,
  	.resume = blkfront_resume,
@@ -7210,7 +8149,7 @@
  	help
  	  The network device frontend driver allows the kernel to
 diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
-index baa051d..ee7465a 100644
+index baa051d..328fe40 100644
 --- a/drivers/net/xen-netfront.c
 +++ b/drivers/net/xen-netfront.c
 @@ -42,6 +42,7 @@
@@ -7221,7 +8160,139 @@
  #include <xen/xenbus.h>
  #include <xen/events.h>
  #include <xen/page.h>
-@@ -1393,7 +1394,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
+@@ -58,6 +59,19 @@ struct netfront_cb {
+ 	unsigned offset;
+ };
+ 
++#define MICRO_SECOND 1000000UL
++#define NANO_SECOND 1000000000UL
++#define DEFAULT_SMART_POLL_FREQ   1000UL
++
++struct netfront_smart_poll {
++	struct hrtimer timer;
++	struct net_device *netdev;
++	unsigned int smart_poll_freq;
++	unsigned int feature_smart_poll;
++	unsigned int active;
++	unsigned long counter;
++};
++
+ #define NETFRONT_SKB_CB(skb)	((struct netfront_cb *)((skb)->cb))
+ 
+ #define RX_COPY_THRESHOLD 256
+@@ -104,7 +118,7 @@ struct netfront_info {
+ 
+ 	/* Receive-ring batched refills. */
+ #define RX_MIN_TARGET 8
+-#define RX_DFL_MIN_TARGET 64
++#define RX_DFL_MIN_TARGET 80
+ #define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+ 	unsigned rx_min_target, rx_max_target, rx_target;
+ 	struct sk_buff_head rx_batch;
+@@ -118,6 +132,8 @@ struct netfront_info {
+ 	unsigned long rx_pfn_array[NET_RX_RING_SIZE];
+ 	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
+ 	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
++
++	struct netfront_smart_poll smart_poll;
+ };
+ 
+ struct netfront_rx_info {
+@@ -337,15 +353,17 @@ static int xennet_open(struct net_device *dev)
+ 	return 0;
+ }
+ 
+-static void xennet_tx_buf_gc(struct net_device *dev)
++static int xennet_tx_buf_gc(struct net_device *dev)
+ {
+ 	RING_IDX cons, prod;
++	RING_IDX cons_begin, cons_end;
+ 	unsigned short id;
+ 	struct netfront_info *np = netdev_priv(dev);
+ 	struct sk_buff *skb;
+ 
+ 	BUG_ON(!netif_carrier_ok(dev));
+ 
++	cons_begin = np->tx.rsp_cons;
+ 	do {
+ 		prod = np->tx.sring->rsp_prod;
+ 		rmb(); /* Ensure we see responses up to 'rp'. */
+@@ -390,7 +408,11 @@ static void xennet_tx_buf_gc(struct net_device *dev)
+ 		mb();		/* update shared area */
+ 	} while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
+ 
++	cons_end = np->tx.rsp_cons;
++
+ 	xennet_maybe_wake_tx(dev);
++
++	return (cons_begin == cons_end);
+ }
+ 
+ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
+@@ -1305,6 +1327,50 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+ 	return 0;
+ }
+ 
++static enum hrtimer_restart smart_poll_function(struct hrtimer *timer)
++{
++	struct netfront_smart_poll *psmart_poll;
++	struct net_device *dev;
++	struct netfront_info *np;
++	unsigned long flags;
++	unsigned int tx_active = 0, rx_active = 0;
++
++	psmart_poll = container_of(timer, struct netfront_smart_poll, timer);
++	dev = psmart_poll->netdev;
++	np = netdev_priv(dev);
++
++	spin_lock_irqsave(&np->tx_lock, flags);
++	np->smart_poll.counter++;
++
++	if (likely(netif_carrier_ok(dev))) {
++		tx_active = !(xennet_tx_buf_gc(dev));
++		/* Under tx_lock: protects access to rx shared-ring indexes. */
++		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) {
++			rx_active = 1;
++			napi_schedule(&np->napi);
++		}
++	}
++
++	np->smart_poll.active |= (tx_active || rx_active);
++	if (np->smart_poll.counter %
++			(np->smart_poll.smart_poll_freq / 10) == 0) {
++		if (!np->smart_poll.active) {
++			np->rx.sring->private.netif.smartpoll_active = 0;
++			goto end;
++		}
++		np->smart_poll.active = 0;
++	}
++
++	if (np->rx.sring->private.netif.smartpoll_active)
++		hrtimer_start(timer,
++			ktime_set(0, NANO_SECOND/psmart_poll->smart_poll_freq),
++			HRTIMER_MODE_REL);
++
++end:
++	spin_unlock_irqrestore(&np->tx_lock, flags);
++	return HRTIMER_NORESTART;
++}
++
+ static irqreturn_t xennet_interrupt(int irq, void *dev_id)
+ {
+ 	struct net_device *dev = dev_id;
+@@ -1320,6 +1386,11 @@ static irqreturn_t xennet_interrupt(int irq, void *dev_id)
+ 			napi_schedule(&np->napi);
+ 	}
+ 
++	if (np->smart_poll.feature_smart_poll)
++		hrtimer_start(&np->smart_poll.timer,
++			ktime_set(0, NANO_SECOND/np->smart_poll.smart_poll_freq),
++			HRTIMER_MODE_REL);
++
+ 	spin_unlock_irqrestore(&np->tx_lock, flags);
+ 
+ 	return IRQ_HANDLED;
+@@ -1393,7 +1464,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
  }
  
  /* Common code used when first setting up, and when resuming. */
@@ -7230,16 +8301,45 @@
  			   struct netfront_info *info)
  {
  	const char *message;
-@@ -1543,7 +1544,7 @@ static int xennet_connect(struct net_device *dev)
+@@ -1456,6 +1527,12 @@ again:
+ 		goto abort_transaction;
+ 	}
+ 
++	err = xenbus_printf(xbt, dev->nodename, "feature-smart-poll", "%d", 1);
++	if (err) {
++		message = "writing feature-smart-poll";
++		goto abort_transaction;
++	}
++
+ 	err = xenbus_transaction_end(xbt, 0);
+ 	if (err) {
+ 		if (err == -EAGAIN)
+@@ -1543,7 +1620,23 @@ static int xennet_connect(struct net_device *dev)
  		return -ENODEV;
  	}
  
 -	err = talk_to_backend(np->xbdev, np);
++	err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
++			   "feature-smart-poll", "%u",
++			   &np->smart_poll.feature_smart_poll);
++	if (err != 1)
++		np->smart_poll.feature_smart_poll = 0;
++
++	if (np->smart_poll.feature_smart_poll) {
++		hrtimer_init(&np->smart_poll.timer, CLOCK_MONOTONIC,
++			     HRTIMER_MODE_REL);
++		np->smart_poll.timer.function = smart_poll_function;
++		np->smart_poll.netdev = dev;
++		np->smart_poll.smart_poll_freq = DEFAULT_SMART_POLL_FREQ;
++		np->smart_poll.active = 0;
++		np->smart_poll.counter = 0;
++	}
++
 +	err = talk_to_netback(np->xbdev, np);
  	if (err)
  		return err;
  
-@@ -1597,7 +1598,7 @@ static int xennet_connect(struct net_device *dev)
+@@ -1597,7 +1690,7 @@ static int xennet_connect(struct net_device *dev)
  /**
   * Callback received when the backend's state changes.
   */
@@ -7248,7 +8348,7 @@
  			    enum xenbus_state backend_state)
  {
  	struct netfront_info *np = dev_get_drvdata(&dev->dev);
-@@ -1608,6 +1609,8 @@ static void backend_changed(struct xenbus_device *dev,
+@@ -1608,6 +1701,8 @@ static void backend_changed(struct xenbus_device *dev,
  	switch (backend_state) {
  	case XenbusStateInitialising:
  	case XenbusStateInitialised:
@@ -7257,7 +8357,38 @@
  	case XenbusStateConnected:
  	case XenbusStateUnknown:
  	case XenbusStateClosed:
-@@ -1798,7 +1801,7 @@ static struct xenbus_driver netfront_driver = {
+@@ -1627,12 +1722,30 @@ static void backend_changed(struct xenbus_device *dev,
+ 	}
+ }
+ 
++static int xennet_get_coalesce(struct net_device *netdev,
++			       struct ethtool_coalesce *ec)
++{
++	struct netfront_info *np = netdev_priv(netdev);
++	ec->rx_coalesce_usecs = MICRO_SECOND / np->smart_poll.smart_poll_freq;
++	return 0;
++}
++
++static int xennet_set_coalesce(struct net_device *netdev,
++		struct ethtool_coalesce *ec)
++{
++	struct netfront_info *np = netdev_priv(netdev);
++	np->smart_poll.smart_poll_freq = MICRO_SECOND / ec->rx_coalesce_usecs;
++	return 0;
++}
++
+ static const struct ethtool_ops xennet_ethtool_ops =
+ {
+ 	.set_tx_csum = ethtool_op_set_tx_csum,
+ 	.set_sg = xennet_set_sg,
+ 	.set_tso = xennet_set_tso,
+ 	.get_link = ethtool_op_get_link,
++	.get_coalesce = xennet_get_coalesce,
++	.set_coalesce = xennet_set_coalesce,
+ };
+ 
+ #ifdef CONFIG_SYSFS
+@@ -1798,7 +1911,7 @@ static struct xenbus_driver netfront_driver = {
  	.probe = netfront_probe,
  	.remove = __devexit_p(xennet_remove),
  	.resume = netfront_resume,
@@ -7365,6 +8496,32 @@
  	dma_ops = &intel_dma_ops;
  
  	init_iommu_sysfs();
+diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
+index e03fe98..f9db891 100644
+--- a/drivers/pci/iov.c
++++ b/drivers/pci/iov.c
+@@ -706,6 +706,21 @@ irqreturn_t pci_sriov_migration(struct pci_dev *dev)
+ }
+ EXPORT_SYMBOL_GPL(pci_sriov_migration);
+ 
++/**
++ * pci_num_vf - return number of VFs associated with a PF device_release_driver
++ * @dev: the PCI device
++ *
++ * Returns number of VFs, or 0 if SR-IOV is not enabled.
++ */
++int pci_num_vf(struct pci_dev *dev)
++{
++	if (!dev || !dev->is_physfn)
++		return 0;
++	else
++		return dev->sriov->nr_virtfn;
++}
++EXPORT_SYMBOL_GPL(pci_num_vf);
++
+ static int ats_alloc_one(struct pci_dev *dev, int ps)
+ {
+ 	int pos;
 diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
 index f9cf317..a77a46f 100644
 --- a/drivers/pci/msi.c
@@ -7679,10 +8836,10 @@
 +
 diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
 new file mode 100644
-index 0000000..360eccf
+index 0000000..76d0bdd
 --- /dev/null
 +++ b/drivers/pci/xen-pcifront.c
-@@ -0,0 +1,1156 @@
+@@ -0,0 +1,1157 @@
 +/*
 + * PCI Frontend Xenbus Setup - handles setup with backend (imports page/evtchn)
 + *
@@ -7727,7 +8884,6 @@
 +struct pcifront_device {
 +	struct xenbus_device *xdev;
 +	struct list_head root_buses;
-+	spinlock_t dev_lock;
 +
 +	int evtchn;
 +	int gnt_ref;
@@ -8084,7 +9240,7 @@
 +		r = &dev->resource[i];
 +
 +		if (!r->parent && r->start && r->flags) {
-+			dev_dbg(&pdev->xdev->dev, "claiming resource %s/%d\n",
++			dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
 +				pci_name(dev), i);
 +			if (pci_claim_resource(dev, i)) {
 +				dev_err(&pdev->xdev->dev, "Could not claim "
@@ -8098,6 +9254,36 @@
 +	return 0;
 +}
 +
++int __devinit pcifront_scan_bus(struct pcifront_device *pdev,
++				unsigned int domain, unsigned int bus,
++				struct pci_bus *b)
++{
++	struct pci_dev *d;
++	unsigned int devfn;
++	int err;
++
++	/* Scan the bus for functions and add.
++	 * We omit handling of PCI bridge attachment because pciback prevents
++	 * bridges from being exported.
++	 */
++	for (devfn = 0; devfn < 0x100; devfn++) {
++		d = pci_get_slot(b, devfn);
++		if (d) {
++			/* Device is already known. */
++			pci_dev_put(d);
++			continue;
++		}
++
++		d = pci_scan_single_device(b, devfn);
++		if (d)
++			dev_info(&pdev->xdev->dev, "New device on "
++				 "%04x:%02x:%02x.%02x found.\n", domain, bus,
++				 PCI_SLOT(devfn), PCI_FUNC(devfn));
++	}
++
++	return 0;
++}
++
 +int __devinit pcifront_scan_root(struct pcifront_device *pdev,
 +				 unsigned int domain, unsigned int bus)
 +{
@@ -8142,12 +9328,17 @@
 +
 +	list_add(&bus_entry->list, &pdev->root_buses);
 +
++	/* pci_scan_bus_parented skips devices which do not have a have
++	* devfn==0. The pcifront_scan_bus enumerates all devfn. */
++	err = pcifront_scan_bus(pdev, domain, bus, b);
++
 +	/* Claim resources before going "live" with our devices */
 +	pci_walk_bus(b, pcifront_claim_resource, pdev);
 +
++	/* Create SysFS and notify udev of the devices. Aka: "going live" */
 +	pci_bus_add_devices(b);
 +
-+	return 0;
++	return err;
 +
 +err_out:
 +	kfree(bus_entry);
@@ -8159,10 +9350,8 @@
 +int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
 +				   unsigned int domain, unsigned int bus)
 +{
-+	struct pci_bus *b;
-+	struct pci_dev *d;
-+	unsigned int devfn;
 +	int err;
++	struct pci_bus *b;
 +
 +#ifndef CONFIG_PCI_DOMAINS
 +	if (domain != 0) {
@@ -8182,33 +9371,15 @@
 +		/* If the bus is unknown, create it. */
 +		return pcifront_scan_root(pdev, domain, bus);
 +
-+	/* Rescan the bus for newly attached functions and add.
-+	 * We omit handling of PCI bridge attachment because pciback prevents
-+	 * bridges from being exported.
-+	 */
-+	for (devfn = 0; devfn < 0x100; devfn++) {
-+		d = pci_get_slot(b, devfn);
-+		if (d) {
-+			/* Device is already known. */
-+			pci_dev_put(d);
-+			continue;
-+		}
++	err = pcifront_scan_bus(pdev, domain, bus, b);
 +
-+		d = pci_scan_single_device(b, devfn);
-+		if (d) {
-+			dev_info(&pdev->xdev->dev, "New device on "
-+				 "%04x:%02x:%02x.%02x found.\n", domain, bus,
-+				 PCI_SLOT(devfn), PCI_FUNC(devfn));
-+			err = pci_bus_add_device(d);
-+			if (err) {
-+				dev_err(&pdev->xdev->dev, "Failed to add "
-+				" device to bus.\n");
-+				return err;
-+			}
-+		}
-+	}
++	/* Claim resources before going "live" with our devices */
++	pci_walk_bus(b, pcifront_claim_resource, pdev);
 +
-+	return 0;
++	/* Create SysFS and notify udev of the devices. Aka: "going live" */
++	pci_bus_add_devices(b);
++
++	return err;
 +}
 +
 +static void free_root_bus_devs(struct pci_bus *bus)
@@ -8397,7 +9568,6 @@
 +
 +	INIT_LIST_HEAD(&pdev->root_buses);
 +
-+	spin_lock_init(&pdev->dev_lock);
 +	spin_lock_init(&pdev->sh_info_lock);
 +
 +	pdev->evtchn = INVALID_EVTCHN;
@@ -8508,7 +9678,6 @@
 +	char str[64];
 +	unsigned int domain, bus;
 +
-+	spin_lock(&pdev->dev_lock);
 +
 +	/* Only connect once */
 +	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
@@ -8564,11 +9733,8 @@
 +	}
 +
 +	err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
-+	if (err)
-+		goto out;
 +
 +out:
-+	spin_unlock(&pdev->dev_lock);
 +	return err;
 +}
 +
@@ -8577,7 +9743,6 @@
 +	int err = 0;
 +	enum xenbus_state prev_state;
 +
-+	spin_lock(&pdev->dev_lock);
 +
 +	prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
 +
@@ -8592,7 +9757,6 @@
 +	err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
 +
 +out:
-+	spin_unlock(&pdev->dev_lock);
 +
 +	return err;
 +}
@@ -8604,8 +9768,6 @@
 +	unsigned int domain, bus;
 +	char str[64];
 +
-+	spin_lock(&pdev->dev_lock);
-+
 +	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
 +	    XenbusStateReconfiguring)
 +		goto out;
@@ -8654,7 +9816,6 @@
 +	xenbus_switch_state(pdev->xdev, XenbusStateConnected);
 +
 +out:
-+	spin_unlock(&pdev->dev_lock);
 +	return err;
 +}
 +
@@ -8667,8 +9828,6 @@
 +	struct pci_dev *pci_dev;
 +	char str[64];
 +
-+	spin_lock(&pdev->dev_lock);
-+
 +	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
 +	    XenbusStateConnected)
 +		goto out;
@@ -8739,7 +9898,6 @@
 +	err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
 +
 +out:
-+	spin_unlock(&pdev->dev_lock);
 +	return err;
 +}
 +
@@ -8948,7 +10106,7 @@
  
  	/* Nothing to do if running in dom0. */
 diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
-index cab100a..c63eeae 100644
+index cab100a..a3e1923 100644
 --- a/drivers/xen/Kconfig
 +++ b/drivers/xen/Kconfig
 @@ -28,6 +28,110 @@ config XEN_DEV_EVTCHN
@@ -9062,7 +10220,7 @@
  config XENFS
  	tristate "Xen filesystem"
  	depends on XEN
-@@ -60,4 +164,36 @@ config XEN_SYS_HYPERVISOR
+@@ -60,4 +164,37 @@ config XEN_SYS_HYPERVISOR
           Create entries under /sys/hypervisor describing the Xen
  	 hypervisor environment.  When running native or in another
  	 virtual environment, /sys/hypervisor will still be present,
@@ -9070,15 +10228,6 @@
 \ No newline at end of file
 +	 but will have no xen contents.
 +
-+config XEN_PLATFORM_PCI
-+	tristate "xen platform pci device driver"
-+	depends on XEN
-+	help
-+	  Driver for the Xen PCI Platform device: it is responsible for
-+	  initializing xenbus and grant_table when running in a Xen HVM
-+	  domain. As a consequence this driver is required to run any Xen PV
-+	  frontend on Xen HVM.
-+
 +config XEN_MCE
 +       def_bool y
 +       depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL
@@ -9101,8 +10250,18 @@
 +	   tristate
 +	   depends on XEN_DOM0 && ACPI_PROCESSOR && CPU_FREQ
 +	   default y
++
++config XEN_PLATFORM_PCI
++	tristate "xen platform pci device driver"
++	depends on XEN
++	default m
++	help
++	  Driver for the Xen PCI Platform device: it is responsible for
++	  initializing xenbus and grant_table when running in a Xen HVM
++	  domain. As a consequence this driver is required to run any Xen PV
++	  frontend on Xen HVM.
 diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
-index 7c28434..5771359 100644
+index 7c28434..ef1ea63 100644
 --- a/drivers/xen/Makefile
 +++ b/drivers/xen/Makefile
 @@ -1,12 +1,27 @@
@@ -9132,12 +10291,12 @@
 +obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
 +obj-$(CONFIG_XENFS)			+= xenfs/
 +obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
-+obj-$(CONFIG_XEN_PLATFORM_PCI)		+= platform-pci.o
 +obj-$(CONFIG_XEN_MCE)		+= mce.o
 +
 +obj-$(CONFIG_XEN_S3)           += acpi.o
 +obj-$(CONFIG_ACPI_PROCESSOR_XEN) += acpi_processor.o
 +obj-$(CONFIG_ACPI_HOTPLUG_MEMORY)  += xen_acpi_memhotplug.o
++obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
 +
 +xen-evtchn-y				:= evtchn.o
 +xen-gntdev-y				:= gntdev.o
@@ -9594,7 +10753,7 @@
 +subsys_initcall(xen_acpi_processor_extcntl_init);
 +MODULE_LICENSE("GPL");
 diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
-index 4204336..d7c0eae 100644
+index 4204336..a5ac75b 100644
 --- a/drivers/xen/balloon.c
 +++ b/drivers/xen/balloon.c
 @@ -43,6 +43,7 @@
@@ -9764,8 +10923,9 @@
  static int decrease_reservation(unsigned long nr_pages)
  {
 -	unsigned long  pfn, i, flags;
+-	struct page   *page;
 +	unsigned long  pfn, lpfn, mfn, i, j, flags;
- 	struct page   *page;
++	struct page   *page = NULL;
  	int            need_sleep = 0;
 -	int ret;
 +	int		discontig, discontig_free;
@@ -9785,7 +10945,7 @@
  			nr_pages = i;
  			need_sleep = 1;
  			break;
-@@ -282,37 +321,50 @@ static int decrease_reservation(unsigned long nr_pages)
+@@ -282,37 +321,52 @@ static int decrease_reservation(unsigned long nr_pages)
  		frame_list[i] = pfn_to_mfn(pfn);
  
  		scrub_page(page);
@@ -9819,6 +10979,8 @@
 +				discontig_free = 1;
 +
 +			set_phys_to_machine(lpfn, INVALID_P2M_ENTRY);
++                        page = pfn_to_page(lpfn);
++
 +			if (!PageHighMem(page)) {
 +				ret = HYPERVISOR_update_va_mapping(
 +					(unsigned long)__va(lpfn << PAGE_SHIFT),
@@ -9850,7 +11012,7 @@
  
  	return need_sleep;
  }
-@@ -379,7 +431,7 @@ static void watch_target(struct xenbus_watch *watch,
+@@ -379,7 +433,7 @@ static void watch_target(struct xenbus_watch *watch,
  	/* The given memory/target value is in KiB, so it needs converting to
  	 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
  	 */
@@ -9859,22 +11021,22 @@
  }
  
  static int balloon_init_watcher(struct notifier_block *notifier,
-@@ -405,9 +457,12 @@ static int __init balloon_init(void)
+@@ -405,9 +459,12 @@ static int __init balloon_init(void)
  	if (!xen_pv_domain())
  		return -ENODEV;
  
 -	pr_info("xen_balloon: Initialising balloon driver.\n");
 +	pr_info("xen_balloon: Initialising balloon driver with page order %d.\n",
 +		balloon_order);
-+
-+	balloon_npages = 1 << balloon_order;
  
 -	balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
++	balloon_npages = 1 << balloon_order;
++
 +	balloon_stats.current_pages = (min(xen_start_info->nr_pages, max_pfn)) >> balloon_order;
  	balloon_stats.target_pages  = balloon_stats.current_pages;
  	balloon_stats.balloon_low   = 0;
  	balloon_stats.balloon_high  = 0;
-@@ -420,7 +475,7 @@ static int __init balloon_init(void)
+@@ -420,7 +477,7 @@ static int __init balloon_init(void)
  	register_balloon(&balloon_sysdev);
  
  	/* Initialise the balloon with excess memory space. */
@@ -9883,7 +11045,7 @@
  		page = pfn_to_page(pfn);
  		if (!PageReserved(page))
  			balloon_append(page);
-@@ -444,6 +499,121 @@ static void balloon_exit(void)
+@@ -444,6 +501,121 @@ static void balloon_exit(void)
  
  module_exit(balloon_exit);
  
@@ -10005,7 +11167,7 @@
  #define BALLOON_SHOW(name, format, args...)				\
  	static ssize_t show_##name(struct sys_device *dev,		\
  				   struct sysdev_attribute *attr,	\
-@@ -477,7 +647,7 @@ static ssize_t store_target_kb(struct sys_device *dev,
+@@ -477,7 +649,7 @@ static ssize_t store_target_kb(struct sys_device *dev,
  
  	target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
  
@@ -10014,7 +11176,7 @@
  
  	return count;
  }
-@@ -491,7 +661,7 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr
+@@ -491,7 +663,7 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr
  {
  	return sprintf(buf, "%llu\n",
  		       (unsigned long long)balloon_stats.target_pages
@@ -10023,7 +11185,7 @@
  }
  
  static ssize_t store_target(struct sys_device *dev,
-@@ -507,7 +677,7 @@ static ssize_t store_target(struct sys_device *dev,
+@@ -507,7 +679,7 @@ static ssize_t store_target(struct sys_device *dev,
  
  	target_bytes = memparse(buf, &endchar);
  
@@ -11410,10 +12572,10 @@
 +}
 diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c
 new file mode 100644
-index 0000000..c31e5c4
+index 0000000..a0534fc
 --- /dev/null
 +++ b/drivers/xen/blkback/xenbus.c
-@@ -0,0 +1,546 @@
+@@ -0,0 +1,553 @@
 +/*  Xenbus code for blkif backend
 +    Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
 +    Copyright (C) 2005 XenSource Ltd
@@ -11507,6 +12669,13 @@
 +		return;
 +	}
 +
++	err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
++	if (err) {
++		xenbus_dev_error(blkif->be->dev, err, "block flush");
++		return;
++	}
++	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
++
 +	blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
 +	if (IS_ERR(blkif->xenblkd)) {
 +		err = PTR_ERR(blkif->xenblkd);
@@ -11962,19 +13131,19 @@
 +}
 diff --git a/drivers/xen/blktap/Makefile b/drivers/xen/blktap/Makefile
 new file mode 100644
-index 0000000..99ff53c
+index 0000000..822b4e4
 --- /dev/null
 +++ b/drivers/xen/blktap/Makefile
 @@ -0,0 +1,3 @@
 +obj-$(CONFIG_XEN_BLKDEV_TAP) := blktap.o
 +
-+blktap-objs := control.o ring.o wait_queue.o device.o request.o sysfs.o
++blktap-objs := control.o ring.o device.o request.o sysfs.o
 diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h
 new file mode 100644
-index 0000000..db4cf02
+index 0000000..33603cd
 --- /dev/null
 +++ b/drivers/xen/blktap/blktap.h
-@@ -0,0 +1,253 @@
+@@ -0,0 +1,231 @@
 +#ifndef _BLKTAP_H_
 +#define _BLKTAP_H_
 +
@@ -11986,8 +13155,6 @@
 +#include <xen/blkif.h>
 +#include <xen/grant_table.h>
 +
-+//#define ENABLE_PASSTHROUGH
-+
 +extern int blktap_debug_level;
 +
 +#define BTPRINTK(level, tag, force, _f, _a...)				\
@@ -12008,26 +13175,17 @@
 +#define BLKTAP_RING_FD               2
 +#define BLKTAP_RING_VMA              3
 +#define BLKTAP_DEVICE                4
-+#define BLKTAP_PAUSE_REQUESTED       6
-+#define BLKTAP_PAUSED                7
 +#define BLKTAP_SHUTDOWN_REQUESTED    8
 +#define BLKTAP_PASSTHROUGH           9
-+#define BLKTAP_DEFERRED              10
 +
 +/* blktap IOCTLs: */
 +#define BLKTAP2_IOCTL_KICK_FE        1
 +#define BLKTAP2_IOCTL_ALLOC_TAP	     200
 +#define BLKTAP2_IOCTL_FREE_TAP       201
 +#define BLKTAP2_IOCTL_CREATE_DEVICE  202
-+#define BLKTAP2_IOCTL_SET_PARAMS     203
-+#define BLKTAP2_IOCTL_PAUSE          204
-+#define BLKTAP2_IOCTL_REOPEN         205
-+#define BLKTAP2_IOCTL_RESUME         206
 +
 +#define BLKTAP2_MAX_MESSAGE_LEN      256
 +
-+#define BLKTAP2_RING_MESSAGE_PAUSE   1
-+#define BLKTAP2_RING_MESSAGE_RESUME  2
 +#define BLKTAP2_RING_MESSAGE_CLOSE   3
 +
 +#define BLKTAP_REQUEST_FREE          0
@@ -12098,8 +13256,6 @@
 +	unsigned long                  ring_vstart;
 +	unsigned long                  user_vstart;
 +
-+	int                            response;
-+
 +	wait_queue_head_t              poll_wait;
 +
 +	dev_t                          devno;
@@ -12145,8 +13301,6 @@
 +
 +	struct blktap_params           params;
 +
-+	struct rw_semaphore            tap_sem;
-+
 +	struct blktap_ring             ring;
 +	struct blktap_device           device;
 +
@@ -12155,7 +13309,6 @@
 +	struct scatterlist             sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 +
 +	wait_queue_head_t              wq;
-+	struct list_head               deferred_queue;
 +
 +	struct blktap_statistics       stats;
 +};
@@ -12184,8 +13337,6 @@
 +int blktap_ring_free(void);
 +int blktap_ring_create(struct blktap *);
 +int blktap_ring_destroy(struct blktap *);
-+int blktap_ring_pause(struct blktap *);
-+int blktap_ring_resume(struct blktap *);
 +void blktap_ring_kick_user(struct blktap *);
 +
 +int blktap_sysfs_init(void);
@@ -12197,8 +13348,7 @@
 +void blktap_device_free(void);
 +int blktap_device_create(struct blktap *);
 +int blktap_device_destroy(struct blktap *);
-+int blktap_device_pause(struct blktap *);
-+int blktap_device_resume(struct blktap *);
++int blktap_device_run_queue(struct blktap *);
 +void blktap_device_restart(struct blktap *);
 +void blktap_device_finish_request(struct blktap *,
 +				  struct blkif_response *,
@@ -12209,9 +13359,6 @@
 +				     unsigned, unsigned);
 +#endif
 +
-+void blktap_defer(struct blktap *);
-+void blktap_run_deferred(void);
-+
 +int blktap_request_pool_init(void);
 +void blktap_request_pool_free(void);
 +int blktap_request_pool_grow(void);
@@ -12230,10 +13377,10 @@
 +#endif
 diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c
 new file mode 100644
-index 0000000..a4852f7
+index 0000000..6a3f3e1
 --- /dev/null
 +++ b/drivers/xen/blktap/control.c
-@@ -0,0 +1,284 @@
+@@ -0,0 +1,266 @@
 +#include <linux/module.h>
 +#include <linux/sched.h>
 +#include <linux/miscdevice.h>
@@ -12256,7 +13403,6 @@
 +
 +	memset(tap, 0, sizeof(*tap));
 +	set_bit(BLKTAP_CONTROL, &tap->dev_inuse);
-+	init_rwsem(&tap->tap_sem);
 +	init_waitqueue_head(&tap->wq);
 +	atomic_set(&tap->refcnt, 0);
 +	sg_init_table(tap->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
@@ -12400,46 +13546,29 @@
 +blktap_control_destroy_device(struct blktap *tap)
 +{
 +	int err;
-+	unsigned long inuse;
 +
 +	if (!tap)
 +		return 0;
 +
 +	set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse);
 +
-+	for (;;) {
-+		inuse = tap->dev_inuse;
-+		err   = blktap_device_destroy(tap);
-+		if (err)
-+			goto wait;
++	err = blktap_device_destroy(tap);
++	if (err)
++		return err;
 +
-+		inuse = tap->dev_inuse;
-+		err   = blktap_ring_destroy(tap);
-+		if (err)
-+			goto wait;
++	err = blktap_sysfs_destroy(tap);
++	if (err)
++		return err;
 +
-+		inuse = tap->dev_inuse;
-+		err   = blktap_sysfs_destroy(tap);
-+		if (err)
-+			goto wait;
++	err = blktap_ring_destroy(tap);
++	if (err)
++		return err;
 +
-+		break;
++	clear_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse);
++	clear_bit(BLKTAP_CONTROL, &tap->dev_inuse);
++	wake_up(&tap->wq);
 +
-+	wait:
-+		BTDBG("inuse: 0x%lx, dev_inuse: 0x%lx\n",
-+		      inuse, tap->dev_inuse);
-+		if (wait_event_interruptible(tap->wq, tap->dev_inuse != inuse))
-+			break;
-+	}
-+
-+	clear_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse);
-+
-+	if (tap->dev_inuse == (1UL << BLKTAP_CONTROL)) {
-+		err = 0;
-+		clear_bit(BLKTAP_CONTROL, &tap->dev_inuse);
-+	}
-+
-+	return err;
++	return 0;
 +}
 +
 +static int __init
@@ -12485,7 +13614,7 @@
 +{
 +	int err;
 +
-+	if (!xen_domain())
++	if (!xen_pv_domain())
 +		return -ENODEV;
 +
 +	err = blktap_request_pool_init();
@@ -12520,10 +13649,10 @@
 +MODULE_LICENSE("Dual BSD/GPL");
 diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c
 new file mode 100644
-index 0000000..a50b622
+index 0000000..3feaa03
 --- /dev/null
 +++ b/drivers/xen/blktap/device.c
-@@ -0,0 +1,1138 @@
+@@ -0,0 +1,931 @@
 +#include <linux/version.h> /* XXX Remove uses of VERSION instead. */
 +#include <linux/fs.h>
 +#include <linux/blkdev.h>
@@ -12591,7 +13720,7 @@
 +
 +	dev->users--;
 +	if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
-+		blktap_device_destroy(tap);
++		blktap_control_destroy_device(tap);
 +
 +	return 0;
 +}
@@ -12623,26 +13752,6 @@
 +		      command, (long)argument, inode->i_rdev);
 +
 +	switch (command) {
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
-+	case HDIO_GETGEO: {
-+		struct hd_geometry geo;
-+		int ret;
-+
-+                if (!argument)
-+                        return -EINVAL;
-+
-+		geo.start = get_start_sect(bd);
-+		ret = blktap_device_getgeo(bd, &geo);
-+		if (ret)
-+			return ret;
-+
-+		if (copy_to_user((struct hd_geometry __user *)argument, &geo,
-+				 sizeof(geo)))
-+                        return -EFAULT;
-+
-+                return 0;
-+	}
-+#endif
 +	case CDROMMULTISESSION:
 +		BTDBG("FIXME: support multisession CDs later\n");
 +		for (i = 0; i < sizeof(struct cdrom_multisession); i++)
@@ -12675,9 +13784,7 @@
 +	.open      = blktap_device_open,
 +	.release   = blktap_device_release,
 +	.ioctl     = blktap_device_ioctl,
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
 +	.getgeo    = blktap_device_getgeo
-+#endif
 +};
 +
 +static int
@@ -12738,9 +13845,6 @@
 +	BUG_ON(ret);
 +}
 +
-+/*
-+ * tap->tap_sem held on entry
-+ */
 +static void
 +blktap_device_fast_flush(struct blktap *tap, struct blktap_request *request)
 +{
@@ -12828,9 +13932,6 @@
 +			       request->nr_pages << PAGE_SHIFT, NULL);
 +}
 +
-+/*
-+ * tap->tap_sem held on entry
-+ */
 +static void
 +blktap_unmap(struct blktap *tap, struct blktap_request *request)
 +{
@@ -12838,7 +13939,6 @@
 +	unsigned long kvaddr;
 +
 +	usr_idx = request->usr_idx;
-+	down_write(&tap->ring.vma->vm_mm->mmap_sem);
 +
 +	for (i = 0; i < request->nr_pages; i++) {
 +		kvaddr = request_to_kaddr(request, i);
@@ -12856,13 +13956,17 @@
 +		}
 +	}
 +
-+	blktap_device_fast_flush(tap, request);
-+	up_write(&tap->ring.vma->vm_mm->mmap_sem);
++	if (blktap_active(tap)) {
++		down_write(&tap->ring.vma->vm_mm->mmap_sem);
++		blktap_device_fast_flush(tap, request);
++		up_write(&tap->ring.vma->vm_mm->mmap_sem);
++	}
 +}
 +
 +/*
 + * called if the tapdisk process dies unexpectedly.
 + * fail and release any pending requests and disable queue.
++ * may be called from non-tapdisk context.
 + */
 +void
 +blktap_device_fail_pending_requests(struct blktap *tap)
@@ -12875,8 +13979,6 @@
 +	if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
 +		return;
 +
-+	down_write(&tap->tap_sem);
-+
 +	dev = &tap->device;
 +	for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
 +		request = tap->pending_requests[usr_idx];
@@ -12894,8 +13996,6 @@
 +		blktap_request_free(tap, request);
 +	}
 +
-+	up_write(&tap->tap_sem);
-+
 +	spin_lock_irq(&dev->lock);
 +
 +	/* fail any future requests */
@@ -12905,9 +14005,6 @@
 +	spin_unlock_irq(&dev->lock);
 +}
 +
-+/*
-+ * tap->tap_sem held on entry
-+ */
 +void
 +blktap_device_finish_request(struct blktap *tap,
 +			     struct blkif_response *res,
@@ -13116,9 +14213,6 @@
 +	err = -1;
 +	memset(&table, 0, sizeof(table));
 +
-+	if (!blktap_active(tap))
-+		goto out;
-+
 +	ring    = &tap->ring;
 +	usr_idx = request->usr_idx;
 +	blkif_req.id = usr_idx;
@@ -13207,142 +14301,43 @@
 +	return err;
 +}
 +
-+#ifdef ENABLE_PASSTHROUGH
-+#define rq_for_each_bio_safe(_bio, _tmp, _req)				\
-+	if ((_req)->bio)						\
-+		for (_bio = (_req)->bio;				\
-+		     _bio && ((_tmp = _bio->bi_next) || 1);		\
-+		     _bio = _tmp)
-+
-+static void
-+blktap_device_forward_request(struct blktap *tap, struct request *req)
-+{
-+	struct bio *bio, *tmp;
-+	struct blktap_device *dev;
-+
-+	dev = &tap->device;
-+
-+	rq_for_each_bio_safe(bio, tmp, req) {
-+		bio->bi_bdev = dev->bdev;
-+		submit_bio(bio->bi_rw, bio);
-+	}
-+}
-+
-+static void
-+blktap_device_close_bdev(struct blktap *tap)
-+{
-+	struct blktap_device *dev;
-+
-+	dev = &tap->device;
-+
-+	if (dev->bdev)
-+		blkdev_put(dev->bdev);
-+
-+	dev->bdev = NULL;
-+	clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
-+}
-+
-+static int
-+blktap_device_open_bdev(struct blktap *tap, u32 pdev)
-+{
-+	struct block_device *bdev;
-+	struct blktap_device *dev;
-+
-+	dev = &tap->device;
-+
-+	bdev = open_by_devnum(pdev, FMODE_WRITE);
-+	if (IS_ERR(bdev)) {
-+		BTERR("opening device %x:%x failed: %ld\n",
-+		      MAJOR(pdev), MINOR(pdev), PTR_ERR(bdev));
-+		return PTR_ERR(bdev);
-+	}
-+
-+	if (!bdev->bd_disk) {
-+		BTERR("device %x:%x doesn't exist\n",
-+		      MAJOR(pdev), MINOR(pdev));
-+		blkdev_put(dev->bdev);
-+		return -ENOENT;
-+	}
-+
-+	dev->bdev = bdev;
-+	set_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
-+
-+	/* TODO: readjust queue parameters */
-+
-+	BTINFO("set device %d to passthrough on %x:%x\n",
-+	       tap->minor, MAJOR(pdev), MINOR(pdev));
-+
-+	return 0;
-+}
-+
-+int
-+blktap_device_enable_passthrough(struct blktap *tap,
-+				 unsigned major, unsigned minor)
-+{
-+	u32 pdev;
-+	struct blktap_device *dev;
-+
-+	dev  = &tap->device;
-+	pdev = MKDEV(major, minor);
-+
-+	if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+		return -EINVAL;
-+
-+	if (dev->bdev) {
-+		if (pdev)
-+			return -EINVAL;
-+		blktap_device_close_bdev(tap);
-+		return 0;
-+	}
-+
-+	return blktap_device_open_bdev(tap, pdev);
-+}
-+#endif
-+
 +/*
-+ * dev->lock held on entry
++ * called from tapdisk context
 + */
-+static void
++int
 +blktap_device_run_queue(struct blktap *tap)
 +{
-+	int queued, err;
++	int err, rv;
 +	struct request_queue *rq;
 +	struct request *req;
 +	struct blktap_ring *ring;
 +	struct blktap_device *dev;
 +	struct blktap_request *request;
 +
-+	queued = 0;
 +	ring   = &tap->ring;
 +	dev    = &tap->device;
 +	rq     = dev->gd->queue;
 +
 +	BTDBG("running queue for %d\n", tap->minor);
++	spin_lock_irq(&dev->lock);
 +
 +	while ((req = blk_peek_request(rq)) != NULL) {
 +		if (!blk_fs_request(req)) {
++			blk_start_request(req);
 +			__blk_end_request_cur(req, 0);
 +			continue;
 +		}
 +
 +		if (blk_barrier_rq(req)) {
++			blk_start_request(req);
 +			__blk_end_request_cur(req, 0);
 +			continue;
 +		}
 +
-+#ifdef ENABLE_PASSTHROUGH
-+		if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
-+			blkdev_dequeue_request(req);
-+			blktap_device_forward_request(tap, req);
-+			continue;
-+		}
-+#endif
-+
 +		if (RING_FULL(&ring->ring)) {
 +		wait:
 +			/* Avoid pointless unplugs. */
 +			blk_stop_queue(rq);
-+			blktap_defer(tap);
 +			break;
 +		}
 +
@@ -13362,27 +14357,26 @@
 +		blk_start_request(req);
 +
 +		spin_unlock_irq(&dev->lock);
-+		down_read(&tap->tap_sem);
 +
 +		err = blktap_device_process_request(tap, request, req);
-+		if (!err)
-+			queued++;
-+		else {
++		if (err) {
 +			blktap_device_end_dequeued_request(dev, req, -EIO);
 +			blktap_request_free(tap, request);
 +		}
 +
-+		up_read(&tap->tap_sem);
 +		spin_lock_irq(&dev->lock);
 +	}
 +
-+	if (queued)
-+		blktap_ring_kick_user(tap);
++	spin_unlock_irq(&dev->lock);
++
++	rv = ring->ring.req_prod_pvt -
++		ring->ring.sring->req_prod;
++
++	RING_PUSH_REQUESTS(&ring->ring);
++
++	return rv;
 +}
 +
-+/*
-+ * dev->lock held on entry
-+ */
 +static void
 +blktap_device_do_request(struct request_queue *rq)
 +{
@@ -13398,17 +14392,11 @@
 +	if (!blktap_active(tap))
 +		goto fail;
 +
-+	if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
-+	    test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
-+		blktap_defer(tap);
-+		return;
-+	}
-+
-+	blktap_device_run_queue(tap);
++	blktap_ring_kick_user(tap);
 +	return;
 +
 +fail:
-+	while ((req = blk_peek_request(rq))) {
++	while ((req = blk_fetch_request(rq))) {
 +		BTERR("device closed: failing secs %llu - %llu\n",
 +		      (unsigned long long)blk_rq_pos(req),
 +		      (unsigned long long)blk_rq_pos(req) + blk_rq_sectors(req));
@@ -13422,18 +14410,6 @@
 +	struct blktap_device *dev;
 +
 +	dev = &tap->device;
-+
-+	if (blktap_active(tap) && RING_FULL(&tap->ring.ring)) {
-+		blktap_defer(tap);
-+		return;
-+	}
-+
-+	if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
-+	    test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
-+		blktap_defer(tap);
-+		return;
-+	}
-+
 +	spin_lock_irq(&dev->lock);
 +
 +	/* Re-enable calldowns. */
@@ -13485,52 +14461,6 @@
 +}
 +
 +int
-+blktap_device_resume(struct blktap *tap)
-+{
-+	int err;
-+
-+	if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
-+		return -ENODEV;
-+
-+	if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+		return 0;
-+
-+	err = blktap_ring_resume(tap);
-+	if (err)
-+		return err;
-+
-+	/* device size may have changed */
-+	blktap_device_configure(tap);
-+
-+	BTDBG("restarting device\n");
-+	blktap_device_restart(tap);
-+
-+	return 0;
-+}
-+
-+int
-+blktap_device_pause(struct blktap *tap)
-+{
-+	unsigned long flags;
-+	struct blktap_device *dev = &tap->device;
-+
-+	if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
-+		return -ENODEV;
-+
-+	if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+		return 0;
-+
-+	spin_lock_irqsave(&dev->lock, flags);
-+
-+	blk_stop_queue(dev->gd->queue);
-+	set_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
-+
-+	spin_unlock_irqrestore(&dev->lock, flags);
-+
-+	return blktap_ring_pause(tap);
-+}
-+
-+int
 +blktap_device_destroy(struct blktap *tap)
 +{
 +	struct blktap_device *dev = &tap->device;
@@ -13541,8 +14471,11 @@
 +
 +	BTINFO("destroy device %d users %d\n", tap->minor, dev->users);
 +
-+	if (dev->users)
++	if (dev->users) {
++		blktap_device_fail_pending_requests(tap);
++		blktap_device_restart(tap);
 +		return -EBUSY;
++	}
 +
 +	spin_lock_irq(&dev->lock);
 +	/* No more blktap_device_do_request(). */
@@ -13551,17 +14484,10 @@
 +	dev->gd = NULL;
 +	spin_unlock_irq(&dev->lock);
 +
-+#ifdef ENABLE_PASSTHROUGH
-+	if (dev->bdev)
-+		blktap_device_close_bdev(tap);
-+#endif
-+
 +	del_gendisk(gd);
 +	blk_cleanup_queue(gd->queue);
 +	put_disk(gd);
 +
-+	wake_up(&tap->wq);
-+
 +	return 0;
 +}
 +
@@ -13609,11 +14535,7 @@
 +	if (!rq)
 +		goto error;
 +
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
 +	elevator_init(rq, "noop");
-+#else
-+	elevator_init(rq, &elevator_noop);
-+#endif
 +
 +	gd->queue     = rq;
 +	rq->queuedata = dev;
@@ -13664,10 +14586,10 @@
 +}
 diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c
 new file mode 100644
-index 0000000..770736a
+index 0000000..4efd013
 --- /dev/null
 +++ b/drivers/xen/blktap/request.c
-@@ -0,0 +1,297 @@
+@@ -0,0 +1,295 @@
 +#include <linux/spinlock.h>
 +#include <xen/balloon.h>
 +#include <linux/sched.h>
@@ -13908,12 +14830,10 @@
 +	list_add(&request->free_list, &pool.free_list);
 +	atomic_dec(&handle->bucket->reqs_in_use);
 +	free = atomic_dec_and_test(&pool.reqs_in_use);
++	tap->pending_cnt--;
 +
 +	spin_unlock_irqrestore(&pool.lock, flags);
 +
-+	if (--tap->pending_cnt == 0)
-+		wake_up_interruptible(&tap->wq);
-+
 +	if (free)
 +		wake_up(&pool.wait_queue);
 +}
@@ -13967,10 +14887,10 @@
 +}
 diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c
 new file mode 100644
-index 0000000..74a7aa7
+index 0000000..d7d0c79
 --- /dev/null
 +++ b/drivers/xen/blktap/ring.c
-@@ -0,0 +1,615 @@
+@@ -0,0 +1,477 @@
 +#include <linux/module.h>
 +#include <linux/signal.h>
 +#include <linux/sched.h>
@@ -14003,7 +14923,7 @@
 +  */
 +#define RING_PAGES 1
 +
-+static int
++static void
 +blktap_read_ring(struct blktap *tap)
 +{
 +	/* This is called to read responses from the ring. */
@@ -14013,13 +14933,9 @@
 +	struct blktap_ring *ring;
 +	struct blktap_request *request;
 +
-+	down_read(&tap->tap_sem);
-+
 +	ring = &tap->ring;
-+	if (!ring->vma) {
-+		up_read(&tap->tap_sem);
-+		return 0;
-+	}
++	if (!ring->vma)
++		return;
 +
 +	/* for each outstanding message on the ring  */
 +	rp = ring->ring.sring->rsp_prod;
@@ -14027,7 +14943,6 @@
 +
 +	for (rc = ring->ring.rsp_cons; rc != rp; rc++) {
 +		memcpy(&res, RING_GET_RESPONSE(&ring->ring, rc), sizeof(res));
-+		mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
 +		++ring->ring.rsp_cons;
 +
 +		usr_idx = (int)res.id;
@@ -14043,11 +14958,9 @@
 +		blktap_device_finish_request(tap, &res, request);
 +	}
 +
-+	up_read(&tap->tap_sem);
-+
-+	blktap_run_deferred();
 +
-+	return 0;
++	blktap_device_restart(tap);
++	return;
 +}
 +
 +static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -14136,51 +15049,22 @@
 +}
 +
 +static void
-+blktap_ring_vm_unmap(struct vm_area_struct *vma)
-+{
-+	struct blktap *tap = vma_to_blktap(vma);
-+
-+	down_write(&tap->tap_sem);
-+	clear_bit(BLKTAP_RING_VMA, &tap->dev_inuse);
-+	clear_bit(BLKTAP_PAUSED, &tap->dev_inuse);
-+	clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
-+	up_write(&tap->tap_sem);
-+}
-+
-+static void
 +blktap_ring_vm_close(struct vm_area_struct *vma)
 +{
 +	struct blktap *tap = vma_to_blktap(vma);
 +	struct blktap_ring *ring = &tap->ring;
 +
-+	blktap_ring_vm_unmap(vma);                 /* fail future requests */
-+	blktap_device_fail_pending_requests(tap);  /* fail pending requests */
-+	blktap_device_restart(tap);                /* fail deferred requests */
-+
-+	down_write(&tap->tap_sem);
-+
-+	zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
-+
-+	kfree(ring->foreign_map.map);
-+	ring->foreign_map.map = NULL;
-+
-+	/* Free the ring page. */
-+	ClearPageReserved(virt_to_page(ring->ring.sring));
-+	free_page((unsigned long)ring->ring.sring);
-+
 +	BTINFO("unmapping ring %d\n", tap->minor);
-+	ring->ring.sring = NULL;
++	zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
++	clear_bit(BLKTAP_RING_VMA, &tap->dev_inuse);
 +	ring->vma = NULL;
 +
-+	up_write(&tap->tap_sem);
-+
-+	wake_up(&tap->wq);
++	blktap_control_destroy_device(tap);
 +}
 +
 +static struct vm_operations_struct blktap_ring_vm_operations = {
 +	.close    = blktap_ring_vm_close,
-+	.unmap    = blktap_ring_vm_unmap,
-+	.fault   = blktap_ring_fault,
++	.fault    = blktap_ring_fault,
 +	.zap_pte  = blktap_ring_clear_pte,
 +};
 +
@@ -14203,6 +15087,9 @@
 +	if (!test_bit(BLKTAP_CONTROL, &tap->dev_inuse))
 +		return -ENODEV;
 +
++	if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
++		return -EBUSY;
++
 +	/* Only one process can access ring at a time */
 +	if (test_and_set_bit(BLKTAP_RING_FD, &tap->dev_inuse))
 +		return -EBUSY;
@@ -14221,7 +15108,9 @@
 +	BTINFO("freeing device %d\n", tap->minor);
 +	clear_bit(BLKTAP_RING_FD, &tap->dev_inuse);
 +	filp->private_data = NULL;
-+	wake_up(&tap->wq);	
++
++	blktap_control_destroy_device(tap);
++
 +	return 0;
 +}
 +
@@ -14328,6 +15217,8 @@
 +	free_page((unsigned long)sring);
 +	kfree(map);
 +
++	clear_bit(BLKTAP_RING_VMA, &tap->dev_inuse);
++
 +	return -ENOMEM;
 +}
 +
@@ -14336,10 +15227,8 @@
 +{
 +	struct blktap_ring *ring = &tap->ring;
 +
-+	down_read(&tap->tap_sem);
 +	if (ring->ring.sring)
-+		ring->ring.sring->pad[0] = msg;
-+	up_read(&tap->tap_sem);
++		ring->ring.sring->private.tapif_user.msg = msg;
 +}
 +
 +static int
@@ -14354,32 +15243,15 @@
 +	switch(cmd) {
 +	case BLKTAP2_IOCTL_KICK_FE:
 +		/* There are fe messages to process. */
-+		return blktap_read_ring(tap);
++		blktap_read_ring(tap);
++		return 0;
 +
 +	case BLKTAP2_IOCTL_CREATE_DEVICE:
 +		if (!arg)
 +			return -EINVAL;
 +
-+		if (copy_from_user(&params, (struct blktap_params __user *)arg,
-+				   sizeof(params))) {
-+			BTERR("failed to get params\n");
-+			return -EFAULT;
-+		}
-+
-+		if (blktap_validate_params(tap, &params)) {
-+			BTERR("invalid params\n");
-+			return -EINVAL;
-+		}
-+
-+		tap->params = params;
-+		return blktap_device_create(tap);
-+
-+	case BLKTAP2_IOCTL_SET_PARAMS:
-+		if (!arg)
-+			return -EINVAL;
-+
-+		if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+			return -EINVAL;
++		if (!blktap_active(tap))
++			return -ENODEV;
 +
 +		if (copy_from_user(&params, (struct blktap_params __user *)arg,
 +				   sizeof(params))) {
@@ -14393,50 +15265,7 @@
 +		}
 +
 +		tap->params = params;
-+		return 0;
-+
-+	case BLKTAP2_IOCTL_PAUSE:
-+		if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse))
-+			return -EINVAL;
-+
-+		set_bit(BLKTAP_PAUSED, &tap->dev_inuse);
-+		clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
-+
-+		blktap_ring_set_message(tap, 0);
-+		wake_up_interruptible(&tap->wq);
-+
-+		return 0;
-+
-+
-+	case BLKTAP2_IOCTL_REOPEN:
-+		if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+			return -EINVAL;
-+
-+		if (!arg)
-+			return -EINVAL;
-+
-+		if (copy_to_user((char __user *)arg,
-+				 tap->params.name,
-+				 strlen(tap->params.name) + 1))
-+			return -EFAULT;
-+
-+		blktap_ring_set_message(tap, 0);
-+		wake_up_interruptible(&tap->wq);
-+
-+		return 0;
-+
-+	case BLKTAP2_IOCTL_RESUME:
-+		if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+			return -EINVAL;
-+
-+		tap->ring.response = (int)arg;
-+		if (!tap->ring.response)
-+			clear_bit(BLKTAP_PAUSED, &tap->dev_inuse);
-+
-+		blktap_ring_set_message(tap, 0);
-+		wake_up_interruptible(&tap->wq);
-+
-+		return 0;
++		return blktap_device_create(tap);
 +	}
 +
 +	return -ENOIOCTLCMD;
@@ -14446,13 +15275,26 @@
 +{
 +	struct blktap *tap = filp->private_data;
 +	struct blktap_ring *ring = &tap->ring;
++	int work = 0;
++
++	down_read(&current->mm->mmap_sem);
++
++	if (!blktap_active(tap)) {
++		up_read(&current->mm->mmap_sem);
++		force_sig(SIGSEGV, current);
++		return 0;
++	}
 +
 +	poll_wait(filp, &ring->poll_wait, wait);
-+	if (ring->ring.sring->pad[0] != 0 ||
-+	    ring->ring.req_prod_pvt != ring->ring.sring->req_prod) {
-+		RING_PUSH_REQUESTS(&ring->ring);
++
++	if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
++		work = blktap_device_run_queue(tap);
++
++	up_read(&current->mm->mmap_sem);
++
++	if (work ||
++	    ring->ring.sring->private.tapif_user.msg)
 +		return POLLIN | POLLRDNORM;
-+	}
 +
 +	return 0;
 +}
@@ -14473,66 +15315,6 @@
 +}
 +
 +int
-+blktap_ring_resume(struct blktap *tap)
-+{
-+	int err;
-+	struct blktap_ring *ring = &tap->ring;
-+
-+	if (!blktap_active(tap))
-+		return -ENODEV;
-+
-+	if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+		return -EINVAL;
-+
-+	/* set shared flag for resume */
-+	ring->response = 0;
-+
-+	blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_RESUME);
-+	blktap_ring_kick_user(tap);
-+
-+	wait_event_interruptible(tap->wq, ring->response ||
-+				 !test_bit(BLKTAP_PAUSED, &tap->dev_inuse));
-+
-+	err = ring->response;
-+	ring->response = 0;
-+
-+	BTDBG("err: %d\n", err);
-+
-+	if (err)
-+		return err;
-+
-+	if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+		return -EAGAIN;
-+
-+	return 0;
-+}
-+
-+int
-+blktap_ring_pause(struct blktap *tap)
-+{
-+	if (!blktap_active(tap))
-+		return -ENODEV;
-+
-+	if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse))
-+		return -EINVAL;
-+
-+	BTDBG("draining queue\n");
-+	wait_event_interruptible(tap->wq, !tap->pending_cnt);
-+	if (tap->pending_cnt)
-+		return -EAGAIN;
-+
-+	blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_PAUSE);
-+	blktap_ring_kick_user(tap);
-+
-+	BTDBG("waiting for tapdisk response\n");
-+	wait_event_interruptible(tap->wq, test_bit(BLKTAP_PAUSED, &tap->dev_inuse));
-+	if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+		return -EAGAIN;
-+
-+	return 0;
-+}
-+
-+int
 +blktap_ring_destroy(struct blktap *tap)
 +{
 +	if (!test_bit(BLKTAP_RING_FD, &tap->dev_inuse) &&
@@ -14588,10 +15370,10 @@
 +}
 diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c
 new file mode 100644
-index 0000000..23a3a51
+index 0000000..e342d15
 --- /dev/null
 +++ b/drivers/xen/blktap/sysfs.c
-@@ -0,0 +1,451 @@
+@@ -0,0 +1,313 @@
 +#include <linux/types.h>
 +#include <linux/device.h>
 +#include <linux/module.h>
@@ -14632,12 +15414,6 @@
 +}
 +
 +#define CLASS_DEVICE_ATTR(a,b,c,d) DEVICE_ATTR(a,b,c,d)
-+
-+static ssize_t blktap_sysfs_pause_device(struct device *, struct device_attribute *, const char *, size_t);
-+CLASS_DEVICE_ATTR(pause, S_IWUSR, NULL, blktap_sysfs_pause_device);
-+static ssize_t blktap_sysfs_resume_device(struct device *, struct device_attribute *, const char *, size_t);
-+CLASS_DEVICE_ATTR(resume, S_IWUSR, NULL, blktap_sysfs_resume_device);
-+
 +static ssize_t
 +blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const char *buf, size_t size)
 +{
@@ -14651,12 +15427,6 @@
 +		err = -ENODEV;
 +		goto out;
 +	}
-+
-+	if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
-+		err = -EPERM;
-+		goto out;
-+	}
-+
 +	if (size > BLKTAP2_MAX_MESSAGE_LEN) {
 +		err = -ENAMETOOLONG;
 +		goto out;
@@ -14702,8 +15472,8 @@
 +			   struct device_attribute *attr,
 +			   const char *buf, size_t size)
 +{
-+	int err;
 +	struct blktap *tap = (struct blktap *)dev_get_drvdata(dev);
++	struct blktap_ring *ring = &tap->ring;
 +
 +	if (!tap->ring.dev)
 +		return size;
@@ -14711,132 +15481,17 @@
 +	if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
 +		return -EBUSY;
 +
-+	err = blktap_control_destroy_device(tap);
++	BTDBG("sending tapdisk close message\n");
++	ring->ring.sring->private.tapif_user.msg = BLKTAP2_RING_MESSAGE_CLOSE;
++	blktap_ring_kick_user(tap);
++	wait_event_interruptible(tap->wq,
++				 !test_bit(BLKTAP_CONTROL, &tap->dev_inuse));
 +
-+	return (err ? : size);
++	return 0;
 +}
 +CLASS_DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device);
 +
 +static ssize_t
-+blktap_sysfs_pause_device(struct device *dev,
-+			  struct device_attribute *attr,
-+			  const char *buf, size_t size)
-+{
-+	int err;
-+	struct blktap *tap = (struct blktap *)dev_get_drvdata(dev);
-+
-+	blktap_sysfs_enter(tap);
-+
-+	BTDBG("pausing %u:%u: dev_inuse: %lu\n",
-+	      MAJOR(tap->ring.devno), MINOR(tap->ring.devno), tap->dev_inuse);
-+
-+	if (!tap->ring.dev ||
-+	    test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
-+		err = -ENODEV;
-+		goto out;
-+	}
-+
-+	if (test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
-+		err = -EBUSY;
-+		goto out;
-+	}
-+
-+	if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
-+		err = 0;
-+		goto out;
-+	}
-+
-+	err = blktap_device_pause(tap);
-+	if (!err) {
-+		device_remove_file(dev, &dev_attr_pause);
-+		err = device_create_file(dev, &dev_attr_resume);
-+	}
-+
-+out:
-+	blktap_sysfs_exit(tap);
-+
-+	return (err ? err : size);
-+}
-+
-+static ssize_t
-+blktap_sysfs_resume_device(struct device *dev,
-+			   struct device_attribute *attr,
-+			   const char *buf, size_t size)
-+{
-+	int err;
-+	struct blktap *tap = (struct blktap *)dev_get_drvdata(dev);
-+
-+	blktap_sysfs_enter(tap);
-+
-+	if (!tap->ring.dev ||
-+	    test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
-+		err = -ENODEV;
-+		goto out;
-+	}
-+
-+	if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
-+		err = -EINVAL;
-+		goto out;
-+	}
-+
-+	err = blktap_device_resume(tap);
-+	if (!err) {
-+		device_remove_file(dev, &dev_attr_resume);
-+		err = device_create_file(dev, &dev_attr_pause);
-+	}
-+
-+out:
-+	blktap_sysfs_exit(tap);
-+
-+	BTDBG("returning %zd\n", (err ? err : size));
-+	return (err ? err : size);
-+}
-+
-+#ifdef ENABLE_PASSTHROUGH
-+static ssize_t
-+blktap_sysfs_enable_passthrough(struct device *dev,
-+				const char *buf, size_t size)
-+{
-+	int err;
-+	unsigned major, minor;
-+	struct blktap *tap = (struct blktap *)dev_get_drvdata(dev);
-+
-+	BTINFO("passthrough request enabled\n");
-+
-+	blktap_sysfs_enter(tap);
-+
-+	if (!tap->ring.dev ||
-+	    test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
-+		err = -ENODEV;
-+		goto out;
-+	}
-+
-+	if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
-+		err = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
-+		err = -EINVAL;
-+		goto out;
-+	}
-+
-+	err = sscanf(buf, "%x:%x", &major, &minor);
-+	if (err != 2) {
-+		err = -EINVAL;
-+		goto out;
-+	}
-+
-+	err = blktap_device_enable_passthrough(tap, major, minor);
-+
-+out:
-+	blktap_sysfs_exit(tap);
-+	BTDBG("returning %d\n", (err ? err : size));
-+	return (err ? err : size);
-+}
-+#endif
-+
-+static ssize_t
 +blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr, char *buf)
 +{
 +	char *tmp;
@@ -14859,8 +15514,6 @@
 +		       "device users: %d\n", tap->params.capacity,
 +		       tap->params.sector_size, tap->device.users);
 +
-+	down_read(&tap->tap_sem);
-+
 +	tmp += sprintf(tmp, "pending requests: %d\n", tap->pending_cnt);
 +	for (i = 0; i < MAX_PENDING_REQS; i++) {
 +		struct blktap_request *req = tap->pending_requests[i];
@@ -14876,7 +15529,6 @@
 +			       req->time.tv_usec);
 +	}
 +
-+	up_read(&tap->tap_sem);
 +	ret = (tmp - buf) + 1;
 +
 +out:
@@ -14913,26 +15565,18 @@
 +	printk(KERN_CRIT "%s: adding attributes for dev %p\n", __func__, dev);
 +	err = device_create_file(dev, &dev_attr_name);
 +	if (err)
-+		goto out;
++		goto fail;
 +	err = device_create_file(dev, &dev_attr_remove);
 +	if (err)
-+		goto out_unregister_name;
-+	err = device_create_file(dev, &dev_attr_pause);
-+	if (err)
-+		goto out_unregister_remove;
++		goto fail;
 +	err = device_create_file(dev, &dev_attr_debug);
 +	if (err)
-+		goto out_unregister_pause;
++		goto fail;
 +
 +	return 0;
 +
-+out_unregister_pause:
-+	device_remove_file(dev, &dev_attr_pause);
-+out_unregister_remove:
-+	device_remove_file(dev, &dev_attr_remove);
-+out_unregister_name:
-+	device_remove_file(dev, &dev_attr_name);
-+out:
++fail:
++	device_unregister(dev);
 +	return err;
 +}
 +
@@ -15043,52 +15687,6 @@
 +	class_destroy(cls);
 +	return err;
 +}
-diff --git a/drivers/xen/blktap/wait_queue.c b/drivers/xen/blktap/wait_queue.c
-new file mode 100644
-index 0000000..f8995aa
---- /dev/null
-+++ b/drivers/xen/blktap/wait_queue.c
-@@ -0,0 +1,40 @@
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+
-+#include "blktap.h"
-+
-+static LIST_HEAD(deferred_work_queue);
-+static DEFINE_SPINLOCK(deferred_work_lock);
-+
-+void
-+blktap_run_deferred(void)
-+{
-+	LIST_HEAD(queue);
-+	struct blktap *tap;
-+	unsigned long flags;
-+
-+	spin_lock_irqsave(&deferred_work_lock, flags);
-+	list_splice_init(&deferred_work_queue, &queue);
-+	list_for_each_entry(tap, &queue, deferred_queue)
-+		clear_bit(BLKTAP_DEFERRED, &tap->dev_inuse);
-+	spin_unlock_irqrestore(&deferred_work_lock, flags);
-+
-+	while (!list_empty(&queue)) {
-+		tap = list_entry(queue.next, struct blktap, deferred_queue);
-+		list_del_init(&tap->deferred_queue);
-+		blktap_device_restart(tap);
-+	}
-+}
-+
-+void
-+blktap_defer(struct blktap *tap)
-+{
-+	unsigned long flags;
-+
-+	spin_lock_irqsave(&deferred_work_lock, flags);
-+	if (!test_bit(BLKTAP_DEFERRED, &tap->dev_inuse)) {
-+		set_bit(BLKTAP_DEFERRED, &tap->dev_inuse);
-+		list_add_tail(&tap->deferred_queue, &deferred_work_queue);
-+	}
-+	spin_unlock_irqrestore(&deferred_work_lock, flags);
-+}
 diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
 index bdfd584..6625ffe 100644
 --- a/drivers/xen/cpu_hotplug.c
@@ -15101,7 +15699,7 @@
  
  #include <asm/xen/hypervisor.h>
 diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index ce602dd..9c8ad5c 100644
+index ce602dd..b4a00bf 100644
 --- a/drivers/xen/events.c
 +++ b/drivers/xen/events.c
 @@ -16,7 +16,7 @@
@@ -15113,7 +15711,7 @@
   *
   * Jeremy Fitzhardinge <jeremy at xensource.com>, XenSource Inc, 2007
   */
-@@ -27,19 +27,28 @@
+@@ -27,18 +27,31 @@
  #include <linux/module.h>
  #include <linux/string.h>
  #include <linux/bootmem.h>
@@ -15122,6 +15720,7 @@
 +#include <linux/pci.h>
 +#include <linux/msi.h>
  
++#include <asm/desc.h>
  #include <asm/ptrace.h>
  #include <asm/irq.h>
  #include <asm/idle.h>
@@ -15131,18 +15730,20 @@
  #include <asm/xen/hypervisor.h>
 +#include <asm/xen/pci.h>
  
++#include <xen/xen.h>
 +#include <xen/hvm.h>
  #include <xen/xen-ops.h>
  #include <xen/events.h>
  #include <xen/interface/xen.h>
  #include <xen/interface/event_channel.h>
- 
-+#include "../pci/msi.h"
++#include <xen/interface/hvm/hvm_op.h>
++#include <xen/interface/hvm/params.h>
 +
++#include "../pci/msi.h"
+ 
  /*
   * This lock protects updates to the following mapping and reference-count
-  * arrays. The lock does not need to be acquired to read the mapping tables.
-@@ -67,7 +76,7 @@ enum xen_irq_type {
+@@ -67,7 +80,7 @@ enum xen_irq_type {
   * event channel - irq->event channel mapping
   * cpu - cpu this event channel is bound to
   * index - type-specific information:
@@ -15151,7 +15752,7 @@
   *    VIRQ - virq number
   *    IPI - IPI vector
   *    EVTCHN -
-@@ -83,20 +92,27 @@ struct irq_info
+@@ -83,20 +96,27 @@ struct irq_info
  		enum ipi_vector ipi;
  		struct {
  			unsigned short gsi;
@@ -15185,7 +15786,7 @@
  static inline unsigned long *cpu_evtchn_mask(int cpu)
  {
  	return cpu_evtchn_mask_p[cpu].bits;
-@@ -106,6 +122,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
+@@ -106,6 +126,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
  #define VALID_EVTCHN(chn)	((chn) != 0)
  
  static struct irq_chip xen_dynamic_chip;
@@ -15193,7 +15794,7 @@
  
  /* Constructor for packed IRQ information. */
  static struct irq_info mk_unbound_info(void)
-@@ -135,7 +152,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn,
+@@ -135,7 +156,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn,
  				    unsigned short gsi, unsigned short vector)
  {
  	return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
@@ -15203,7 +15804,7 @@
  }
  
  /*
-@@ -218,6 +236,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn)
+@@ -218,6 +240,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn)
  	return ret;
  }
  
@@ -15219,7 +15820,7 @@
  static inline unsigned long active_evtchns(unsigned int cpu,
  					   struct shared_info *sh,
  					   unsigned int idx)
-@@ -329,17 +356,42 @@ static void unmask_evtchn(int port)
+@@ -329,27 +360,372 @@ static void unmask_evtchn(int port)
  	put_cpu();
  }
  
@@ -15239,6 +15840,7 @@
  	int irq;
  	struct irq_desc *desc;
 +	int start = get_nr_hw_irqs();
++	void *chip_data;
  
 -	for (irq = 0; irq < nr_irqs; irq++)
 +	if (start == nr_irqs)
@@ -15265,8 +15867,12 @@
  
  	desc = irq_to_desc_alloc_node(irq, 0);
  	if (WARN_ON(desc == NULL))
-@@ -348,8 +400,324 @@ static int find_unbound_irq(void)
+ 		return -1;
+ 
++	/* save and restore chip_data */
++	chip_data = desc->chip_data;
  	dynamic_irq_init(irq);
++	desc->chip_data = chip_data;
  
  	return irq;
 +
@@ -15278,8 +15884,8 @@
 +{
 +	/* identity map all the hardware irqs */
 +	return irq < get_nr_hw_irqs();
-+}
-+
+ }
+ 
 +static void pirq_unmask_notify(int irq)
 +{
 +	struct irq_info *info = info_for_irq(irq);
@@ -15377,8 +15983,8 @@
 +static void enable_pirq(unsigned int irq)
 +{
 +	startup_pirq(irq);
- }
- 
++}
++
 +static void disable_pirq(unsigned int irq)
 +{
 +}
@@ -15590,7 +16196,7 @@
  int bind_evtchn_to_irq(unsigned int evtchn)
  {
  	int irq;
-@@ -409,8 +777,23 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+@@ -409,8 +785,23 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
  	return irq;
  }
  
@@ -15599,8 +16205,7 @@
 +{
 +        struct evtchn_bind_interdomain bind_interdomain;
 +        int err;
- 
--static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
++
 +        bind_interdomain.remote_dom  = remote_domain;
 +        bind_interdomain.remote_port = remote_port;
 +
@@ -15610,12 +16215,13 @@
 +        return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
 +}
 +
-+
+ 
+-static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 +int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
  {
  	struct evtchn_bind_virq bind_virq;
  	int evtchn, irq;
-@@ -504,6 +887,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
+@@ -504,6 +895,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
  }
  EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
  
@@ -15645,7 +16251,7 @@
  int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
  			    irq_handler_t handler,
  			    unsigned long irqflags, const char *devname, void *dev_id)
-@@ -535,6 +941,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
+@@ -535,6 +949,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
  	if (irq < 0)
  		return irq;
  
@@ -15653,12 +16259,12 @@
  	retval = request_irq(irq, handler, irqflags, devname, dev_id);
  	if (retval != 0) {
  		unbind_from_irq(irq);
-@@ -616,17 +1023,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count);
+@@ -616,17 +1031,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count);
   * a bitset of words which contain pending event bits.  The second
   * level is a bitset of pending events themselves.
   */
 -void xen_evtchn_do_upcall(struct pt_regs *regs)
-+void __xen_evtchn_do_upcall(struct pt_regs *regs)
++static void __xen_evtchn_do_upcall(struct pt_regs *regs)
  {
  	int cpu = get_cpu();
 -	struct pt_regs *old_regs = set_irq_regs(regs);
@@ -15672,7 +16278,7 @@
  	do {
  		unsigned long pending_words;
  
-@@ -649,9 +1052,13 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
+@@ -649,9 +1060,13 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
  				int bit_idx = __ffs(pending_bits);
  				int port = (word_idx * BITS_PER_LONG) + bit_idx;
  				int irq = evtchn_to_irq[port];
@@ -15688,8 +16294,12 @@
  			}
  		}
  
-@@ -662,10 +1069,26 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
- 	} while(count != 1);
+@@ -659,14 +1074,32 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
+ 
+ 		count = __get_cpu_var(xed_nesting_count);
+ 		__get_cpu_var(xed_nesting_count) = 0;
+-	} while(count != 1);
++	} while (count != 1 || vcpu_info->evtchn_upcall_pending);
  
  out:
 +
@@ -15710,22 +16320,28 @@
 +}
  
 -	put_cpu();
-+void xen_hvm_evtchn_do_upcall(struct pt_regs *regs)
++void xen_hvm_evtchn_do_upcall(void)
 +{
++	struct pt_regs *regs = get_irq_regs();
 +	__xen_evtchn_do_upcall(regs);
  }
++EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
  
  /* Rebind a new event channel to an existing irq. */
-@@ -703,7 +1126,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+ void rebind_evtchn_irq(int evtchn, int irq)
+@@ -703,7 +1136,10 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
  	struct evtchn_bind_vcpu bind_vcpu;
  	int evtchn = evtchn_from_irq(irq);
  
 -	if (!VALID_EVTCHN(evtchn))
-+	if (!VALID_EVTCHN(evtchn) || xen_hvm_domain())
++	/* events delivered via platform PCI interrupts are always
++	 * routed to vcpu 0 */
++	if (!VALID_EVTCHN(evtchn) ||
++		(xen_hvm_domain() && !xen_have_vector_callback))
  		return -1;
  
  	/* Send future instances of this interrupt to other vcpu. */
-@@ -855,7 +1278,7 @@ void xen_clear_irq_pending(int irq)
+@@ -855,7 +1291,7 @@ void xen_clear_irq_pending(int irq)
  	if (VALID_EVTCHN(evtchn))
  		clear_evtchn(evtchn);
  }
@@ -15734,7 +16350,7 @@
  void xen_set_irq_pending(int irq)
  {
  	int evtchn = evtchn_from_irq(irq);
-@@ -875,9 +1298,9 @@ bool xen_test_irq_pending(int irq)
+@@ -875,9 +1311,9 @@ bool xen_test_irq_pending(int irq)
  	return ret;
  }
  
@@ -15746,7 +16362,7 @@
  {
  	evtchn_port_t evtchn = evtchn_from_irq(irq);
  
-@@ -885,13 +1308,33 @@ void xen_poll_irq(int irq)
+@@ -885,13 +1321,33 @@ void xen_poll_irq(int irq)
  		struct sched_poll poll;
  
  		poll.nr_ports = 1;
@@ -15781,7 +16397,7 @@
  
  void xen_irq_resume(void)
  {
-@@ -928,13 +1371,38 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
+@@ -928,13 +1384,85 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
  	.retrigger	= retrigger_dynirq,
  };
  
@@ -15805,6 +16421,53 @@
 +	.retrigger	= retrigger_dynirq,
 +};
 +
++int xen_set_callback_via(uint64_t via)
++{
++	struct xen_hvm_param a;
++	a.domid = DOMID_SELF;
++	a.index = HVM_PARAM_CALLBACK_IRQ;
++	a.value = via;
++	return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
++}
++EXPORT_SYMBOL_GPL(xen_set_callback_via);
++
++void smp_xen_hvm_callback_vector(struct pt_regs *regs)
++{
++	struct pt_regs *old_regs = set_irq_regs(regs);
++
++	exit_idle();
++
++	irq_enter();
++
++	__xen_evtchn_do_upcall(regs);
++
++	irq_exit();
++
++	set_irq_regs(old_regs);
++}
++
++/* Vector callbacks are better than PCI interrupts to receive event
++ * channel notifications because we can receive vector callbacks on any
++ * vcpu and we don't need PCI support or APIC interactions. */
++void xen_callback_vector(void)
++{
++	int rc;
++	uint64_t callback_via;
++	if (xen_have_vector_callback) {
++		callback_via = HVM_CALLBACK_VECTOR(XEN_HVM_EVTCHN_CALLBACK);
++		rc = xen_set_callback_via(callback_via);
++		if (rc) {
++			printk(KERN_ERR "Request for Xen HVM callback vector"
++					" failed.\n");
++			xen_have_vector_callback = 0;
++			return;
++		}
++		printk(KERN_INFO "Xen HVM callback vector for event delivery is "
++				"enabled\n");
++		alloc_intr_gate(XEN_HVM_EVTCHN_CALLBACK, xen_hvm_callback_vector);
++	}
++}
++
  void __init xen_init_IRQ(void)
  {
  	int i;
@@ -15821,20 +16484,21 @@
  
  	init_evtchn_cpu_bindings();
  
-@@ -942,5 +1410,10 @@ void __init xen_init_IRQ(void)
+@@ -942,5 +1470,11 @@ void __init xen_init_IRQ(void)
  	for (i = 0; i < NR_EVENT_CHANNELS; i++)
  		mask_evtchn(i);
  
 -	irq_ctx_init(smp_processor_id());
-+	if (xen_hvm_domain())
++	if (xen_hvm_domain()) {
++		xen_callback_vector();
 +		native_init_IRQ();
-+	else
++	} else {
 +		irq_ctx_init(smp_processor_id());
-+
-+	xen_setup_pirqs();
++		xen_setup_pirqs();
++	}
  }
 diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
-index 79bedba..6a1c4a5 100644
+index 79bedba..b82666a 100644
 --- a/drivers/xen/evtchn.c
 +++ b/drivers/xen/evtchn.c
 @@ -48,6 +48,8 @@
@@ -15958,7 +16622,7 @@
  }
  
  static long evtchn_ioctl(struct file *file,
-@@ -332,7 +371,7 @@ static long evtchn_ioctl(struct file *file,
+@@ -332,15 +371,17 @@ static long evtchn_ioctl(struct file *file,
  		spin_lock_irq(&port_user_lock);
  
  		rc = -ENOTCONN;
@@ -15967,7 +16631,18 @@
  			spin_unlock_irq(&port_user_lock);
  			break;
  		}
-@@ -354,7 +393,7 @@ static long evtchn_ioctl(struct file *file,
+ 
+-		evtchn_unbind_from_user(u, unbind.port);
++		disable_irq(irq_from_evtchn(unbind.port));
+ 
+ 		spin_unlock_irq(&port_user_lock);
+ 
++		evtchn_unbind_from_user(u, unbind.port);
++
+ 		rc = 0;
+ 		break;
+ 	}
+@@ -354,7 +395,7 @@ static long evtchn_ioctl(struct file *file,
  
  		if (notify.port >= NR_EVENT_CHANNELS) {
  			rc = -EINVAL;
@@ -15976,7 +16651,7 @@
  			rc = -ENOTCONN;
  		} else {
  			notify_remote_via_evtchn(notify.port);
-@@ -443,10 +482,10 @@ static int evtchn_release(struct inode *inode, struct file *filp)
+@@ -443,14 +484,21 @@ static int evtchn_release(struct inode *inode, struct file *filp)
  	free_page((unsigned long)u->ring);
  
  	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
@@ -15985,11 +16660,31 @@
  			continue;
  
 -		evtchn_unbind_from_user(port_user[i], i);
-+		evtchn_unbind_from_user(get_port_user(i), i);
++		disable_irq(irq_from_evtchn(i));
  	}
  
  	spin_unlock_irq(&port_user_lock);
-@@ -480,8 +519,11 @@ static int __init evtchn_init(void)
+ 
++	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
++		if (get_port_user(i) != u)
++			continue;
++
++		evtchn_unbind_from_user(get_port_user(i), i);
++	}
++
+ 	kfree(u->name);
+ 	kfree(u);
+ 
+@@ -470,7 +518,7 @@ static const struct file_operations evtchn_fops = {
+ 
+ static struct miscdevice evtchn_miscdev = {
+ 	.minor        = MISC_DYNAMIC_MINOR,
+-	.name         = "evtchn",
++	.name         = "xen/evtchn",
+ 	.fops         = &evtchn_fops,
+ };
+ static int __init evtchn_init(void)
+@@ -480,8 +528,11 @@ static int __init evtchn_init(void)
  	if (!xen_domain())
  		return -ENODEV;
  
@@ -16002,7 +16697,7 @@
  
  	/* Create '/dev/misc/evtchn'. */
  	err = misc_register(&evtchn_miscdev);
-@@ -497,6 +539,9 @@ static int __init evtchn_init(void)
+@@ -497,6 +548,9 @@ static int __init evtchn_init(void)
  
  static void __exit evtchn_cleanup(void)
  {
@@ -16027,10 +16722,10 @@
  	int i, j;
 diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
 new file mode 100644
-index 0000000..ddc59cc
+index 0000000..a33e443
 --- /dev/null
 +++ b/drivers/xen/gntdev.c
-@@ -0,0 +1,626 @@
+@@ -0,0 +1,645 @@
 +/******************************************************************************
 + * gntdev.c
 + *
@@ -16061,7 +16756,7 @@
 +#include <linux/types.h>
 +#include <linux/uaccess.h>
 +#include <linux/sched.h>
-+#include <linux/rwsem.h>
++#include <linux/spinlock.h>
 +
 +#include <xen/xen.h>
 +#include <xen/grant_table.h>
@@ -16084,7 +16779,7 @@
 +	struct list_head maps;
 +	uint32_t used;
 +	uint32_t limit;
-+	struct rw_semaphore sem;
++	spinlock_t lock;
 +	struct mm_struct *mm;
 +	struct mmu_notifier mn;
 +};
@@ -16117,9 +16812,9 @@
 +		       map->index == text_index && text ? text : "");
 +}
 +
-+static struct grant_map *gntdev_add_map(struct gntdev_priv *priv, int count)
++static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 +{
-+	struct grant_map *map, *add;
++	struct grant_map *add;
 +
 +	add = kzalloc(sizeof(struct grant_map), GFP_KERNEL);
 +	if (NULL == add)
@@ -16140,6 +16835,20 @@
 +	if (add->count + priv->used > priv->limit)
 +		goto err;
 +
++	return add;
++
++err:
++	kfree(add->grants);
++	kfree(add->map_ops);
++	kfree(add->unmap_ops);
++	kfree(add);
++	return NULL;
++}
++
++static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add)
++{
++	struct grant_map *map;
++
 +	list_for_each_entry(map, &priv->maps, next) {
 +		if (add->index + add->count < map->index) {
 +			list_add_tail(&add->next, &map->next);
@@ -16153,14 +16862,6 @@
 +	priv->used += add->count;
 +	if (debug)
 +		gntdev_print_maps(priv, "[new]", add->index);
-+	return add;
-+
-+err:
-+	kfree(add->grants);
-+	kfree(add->map_ops);
-+	kfree(add->unmap_ops);
-+	kfree(add);
-+	return NULL;
 +}
 +
 +static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, int index,
@@ -16207,11 +16908,17 @@
 +
 +	map->priv->used -= map->count;
 +	list_del(&map->next);
++	return 0;
++}
++
++static void gntdev_free_map(struct grant_map *map)
++{
++	if (!map)
++		return;
 +	kfree(map->grants);
 +	kfree(map->map_ops);
 +	kfree(map->unmap_ops);
 +	kfree(map);
-+	return 0;
 +}
 +
 +/* ------------------------------------------------------------------ */
@@ -16310,7 +17017,7 @@
 +	unsigned long mstart, mend;
 +	int err;
 +
-+	down_read(&priv->sem);
++	spin_lock(&priv->lock);
 +	list_for_each_entry(map, &priv->maps, next) {
 +		if (!map->vma)
 +			continue;
@@ -16332,7 +17039,7 @@
 +					(mend - mstart) >> PAGE_SHIFT);
 +		WARN_ON(err);
 +	}
-+	up_read(&priv->sem);
++	spin_unlock(&priv->lock);
 +}
 +
 +static void mn_invl_page(struct mmu_notifier *mn,
@@ -16349,7 +17056,7 @@
 +	struct grant_map *map;
 +	int err;
 +
-+	down_read(&priv->sem);
++	spin_lock(&priv->lock);
 +	list_for_each_entry(map, &priv->maps, next) {
 +		if (!map->vma)
 +			continue;
@@ -16360,7 +17067,7 @@
 +		err = unmap_grant_pages(map, 0, map->count);
 +		WARN_ON(err);
 +	}
-+	up_read(&priv->sem);
++	spin_unlock(&priv->lock);
 +}
 +
 +struct mmu_notifier_ops gntdev_mmu_ops = {
@@ -16380,7 +17087,7 @@
 +		return -ENOMEM;
 +
 +	INIT_LIST_HEAD(&priv->maps);
-+	init_rwsem(&priv->sem);
++	spin_lock_init(&priv->lock);
 +	priv->limit = limit;
 +
 +	priv->mm = get_task_mm(current);
@@ -16408,13 +17115,16 @@
 +	if (debug)
 +		printk("%s: priv %p\n", __FUNCTION__, priv);
 +
-+	down_write(&priv->sem);
++	spin_lock(&priv->lock);
 +	while (!list_empty(&priv->maps)) {
 +		map = list_entry(priv->maps.next, struct grant_map, next);
 +		err = gntdev_del_map(map);
-+		WARN_ON(err);
++		if (WARN_ON(err))
++			gntdev_free_map(map);
++
 +	}
-+	up_write(&priv->sem);
++	spin_unlock(&priv->lock);
++
 +	mmu_notifier_unregister(&priv->mn, priv->mm);
 +	kfree(priv);
 +	return 0;
@@ -16437,27 +17147,29 @@
 +	if (unlikely(op.count > priv->limit))
 +		return -EINVAL;
 +
-+	down_write(&priv->sem);
 +	err = -ENOMEM;
-+	map = gntdev_add_map(priv, op.count);
++	map = gntdev_alloc_map(priv, op.count);
 +	if (!map)
-+		goto err_unlock;
-+
-+	err = -ENOMEM;
++		return err;
 +	if (copy_from_user(map->grants, &u->refs,
-+			   sizeof(map->grants[0]) * op.count) != 0)
-+		goto err_free;
++			   sizeof(map->grants[0]) * op.count) != 0) {
++		gntdev_free_map(map);
++		return err;
++	}
++
++	spin_lock(&priv->lock);
++	gntdev_add_map(priv, map);
 +	op.index = map->index << PAGE_SHIFT;
-+	if (copy_to_user(u, &op, sizeof(op)) != 0)
-+		goto err_free;
-+	up_write(&priv->sem);
-+	return 0;
++	spin_unlock(&priv->lock);
 +
-+err_free:
-+	gntdev_del_map(map);
-+err_unlock:
-+	up_write(&priv->sem);
-+	return err;
++	if (copy_to_user(u, &op, sizeof(op)) != 0) {
++		spin_lock(&priv->lock);
++		gntdev_del_map(map);
++		spin_unlock(&priv->lock);
++		gntdev_free_map(map);
++		return err;
++	}
++	return 0;
 +}
 +
 +static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
@@ -16473,11 +17185,13 @@
 +		printk("%s: priv %p, del %d+%d\n", __FUNCTION__, priv,
 +		       (int)op.index, (int)op.count);
 +
-+	down_write(&priv->sem);
++	spin_lock(&priv->lock);
 +	map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
 +	if (map)
 +		err = gntdev_del_map(map);
-+	up_write(&priv->sem);
++	spin_unlock(&priv->lock);
++	if (!err)
++		gntdev_free_map(map);
 +	return err;
 +}
 +
@@ -16493,16 +17207,16 @@
 +		printk("%s: priv %p, offset for vaddr %lx\n", __FUNCTION__, priv,
 +		       (unsigned long)op.vaddr);
 +
-+	down_read(&priv->sem);
++	spin_lock(&priv->lock);
 +	map = gntdev_find_map_vaddr(priv, op.vaddr);
 +	if (map == NULL ||
 +	    map->vma->vm_start != op.vaddr) {
-+		up_read(&priv->sem);
++		spin_unlock(&priv->lock);
 +		return -EINVAL;
 +	}
 +	op.offset = map->index << PAGE_SHIFT;
 +	op.count = map->count;
-+	up_read(&priv->sem);
++	spin_unlock(&priv->lock);
 +
 +	if (copy_to_user(u, &op, sizeof(op)) != 0)
 +		return -EFAULT;
@@ -16521,9 +17235,9 @@
 +	if (op.count > limit)
 +		return -EINVAL;
 +
-+	down_write(&priv->sem);
++	spin_lock(&priv->lock);
 +	priv->limit = op.count;
-+	up_write(&priv->sem);
++	spin_unlock(&priv->lock);
 +	return 0;
 +}
 +
@@ -16571,7 +17285,7 @@
 +		printk("%s: map %d+%d at %lx (pgoff %lx)\n", __FUNCTION__,
 +		       index, count, vma->vm_start, vma->vm_pgoff);
 +
-+	down_read(&priv->sem);
++	spin_lock(&priv->lock);
 +	map = gntdev_find_map_index(priv, index, count);
 +	if (!map)
 +		goto unlock_out;
@@ -16613,7 +17327,7 @@
 +	map->is_mapped = 1;
 +
 +unlock_out:
-+	up_read(&priv->sem);
++	spin_unlock(&priv->lock);
 +	return err;
 +}
 +
@@ -16627,7 +17341,7 @@
 +
 +static struct miscdevice gntdev_miscdev = {
 +	.minor        = MISC_DYNAMIC_MINOR,
-+	.name         = "gntdev",
++	.name         = "xen/gntdev",
 +	.fops         = &gntdev_fops,
 +};
 +
@@ -16658,10 +17372,10 @@
 +
 +/* ------------------------------------------------------------------ */
 diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
-index 7d8f531..8df6ae0 100644
+index 7d8f531..5a8ad45 100644
 --- a/drivers/xen/grant-table.c
 +++ b/drivers/xen/grant-table.c
-@@ -36,10 +36,14 @@
+@@ -36,10 +36,13 @@
  #include <linux/mm.h>
  #include <linux/vmalloc.h>
  #include <linux/uaccess.h>
@@ -16671,20 +17385,37 @@
  #include <xen/interface/xen.h>
  #include <xen/page.h>
  #include <xen/grant_table.h>
-+#include <xen/platform_pci.h>
 +#include <xen/interface/memory.h>
  #include <asm/xen/hypercall.h>
  
  #include <asm/pgtable.h>
-@@ -57,6 +61,7 @@ static unsigned int boot_max_nr_grant_frames;
+@@ -57,6 +60,8 @@ static unsigned int boot_max_nr_grant_frames;
  static int gnttab_free_count;
  static grant_ref_t gnttab_free_head;
  static DEFINE_SPINLOCK(gnttab_list_lock);
-+static unsigned long hvm_pv_resume_frames;
++unsigned long xen_hvm_resume_frames;
++EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
  
  static struct grant_entry *shared;
  
-@@ -447,6 +452,30 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
+@@ -431,7 +436,7 @@ static unsigned int __max_nr_grant_frames(void)
+ 	return query.max_nr_frames;
+ }
+ 
+-static inline unsigned int max_nr_grant_frames(void)
++unsigned int gnttab_max_grant_frames(void)
+ {
+ 	unsigned int xen_max = __max_nr_grant_frames();
+ 
+@@ -439,6 +444,7 @@ static inline unsigned int max_nr_grant_frames(void)
+ 		return boot_max_nr_grant_frames;
+ 	return xen_max;
+ }
++EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
+ 
+ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
+ {
+@@ -447,6 +453,30 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
  	unsigned int nr_gframes = end_idx + 1;
  	int rc;
  
@@ -16700,7 +17431,7 @@
 +			xatp.domid = DOMID_SELF;
 +			xatp.idx = i;
 +			xatp.space = XENMAPSPACE_grant_table;
-+			xatp.gpfn = (hvm_pv_resume_frames >> PAGE_SHIFT) + i;
++			xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
 +			rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
 +			if (rc != 0) {
 +				printk(KERN_WARNING
@@ -16715,7 +17446,16 @@
  	frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
  	if (!frames)
  		return -ENOMEM;
-@@ -472,11 +501,135 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
+@@ -463,7 +493,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
+ 
+ 	BUG_ON(rc || setup.status);
+ 
+-	rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
++	rc = arch_gnttab_map_shared(frames, nr_gframes, gnttab_max_grant_frames(),
+ 				    &shared);
+ 	BUG_ON(rc);
+ 
+@@ -472,11 +502,134 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
  	return 0;
  }
  
@@ -16829,7 +17569,7 @@
 -	if (max_nr_grant_frames() < nr_grant_frames)
 +	unsigned int max_nr_gframes;
 +
-+	max_nr_gframes = max_nr_grant_frames();
++	max_nr_gframes = gnttab_max_grant_frames();
 +	if (max_nr_gframes < nr_grant_frames)
  		return -ENOSYS;
 -	return gnttab_map(0, nr_grant_frames - 1);
@@ -16837,12 +17577,11 @@
 +	if (xen_pv_domain())
 +		return gnttab_map(0, nr_grant_frames - 1);
 +
-+	if (!hvm_pv_resume_frames) {
-+		hvm_pv_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
-+		shared = ioremap(hvm_pv_resume_frames, PAGE_SIZE * max_nr_gframes);
++	if (!shared) {
++		shared = ioremap(xen_hvm_resume_frames, PAGE_SIZE * max_nr_gframes);
 +		if (shared == NULL) {
 +			printk(KERN_WARNING
-+					"Fail to ioremap gnttab share frames\n");
++					"Failed to ioremap gnttab share frames!");
 +			return -ENOMEM;
 +		}
 +	}
@@ -16853,6 +17592,15 @@
  }
  
  int gnttab_suspend(void)
+@@ -493,7 +646,7 @@ static int gnttab_expand(unsigned int req_entries)
+ 	cur = nr_grant_frames;
+ 	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
+ 		 GREFS_PER_GRANT_FRAME);
+-	if (cur + extra > max_nr_grant_frames())
++	if (cur + extra > gnttab_max_grant_frames())
+ 		return -ENOSPC;
+ 
+ 	rc = gnttab_map(cur, cur + extra - 1);
 @@ -503,15 +656,12 @@ static int gnttab_expand(unsigned int req_entries)
  	return rc;
  }
@@ -16870,11 +17618,12 @@
  	nr_grant_frames = 1;
  	boot_max_nr_grant_frames = __max_nr_grant_frames();
  
-@@ -555,4 +705,16 @@ static int __devinit gnttab_init(void)
+@@ -554,5 +704,18 @@ static int __devinit gnttab_init(void)
+ 	kfree(gnttab_list);
  	return -ENOMEM;
  }
- 
--core_initcall(gnttab_init);
++EXPORT_SYMBOL_GPL(gnttab_init);
++
 +static int __devinit __gnttab_init(void)
 +{
 +	/* Delay grant-table initialization in the PV on HVM case */
@@ -16886,33 +17635,22 @@
 +
 +	return gnttab_init();
 +}
-+
+ 
+-core_initcall(gnttab_init);
 +core_initcall(__gnttab_init);
 diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
-index 5d42d55..3924018 100644
+index 5d42d55..0b50906 100644
 --- a/drivers/xen/manage.c
 +++ b/drivers/xen/manage.c
-@@ -7,15 +7,19 @@
- #include <linux/sysrq.h>
+@@ -8,6 +8,7 @@
  #include <linux/stop_machine.h>
  #include <linux/freezer.h>
-+#include <linux/pci.h>
-+#include <linux/cpumask.h>
  
++#include <xen/xen.h>
  #include <xen/xenbus.h>
  #include <xen/grant_table.h>
  #include <xen/events.h>
- #include <xen/hvc-console.h>
- #include <xen/xen-ops.h>
-+#include <xen/platform_pci.h>
- 
- #include <asm/xen/hypercall.h>
- #include <asm/xen/page.h>
-+#include <asm/xen/hypervisor.h>
- 
- enum shutdown_state {
- 	SHUTDOWN_INVALID = -1,
-@@ -32,10 +36,30 @@ enum shutdown_state {
+@@ -32,10 +33,30 @@ enum shutdown_state {
  static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
  
  #ifdef CONFIG_PM_SLEEP
@@ -16926,12 +17664,12 @@
 +
 +	*cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
 +
-+	xen_guest_init();
++	xen_hvm_post_suspend(*cancelled);
 +	gnttab_resume();
 +
 +	if (!*cancelled) {
 +		xen_irq_resume();
-+		platform_pci_resume();
++		xen_timer_resume();
 +	}
 +
 +	return 0;
@@ -16944,82 +17682,19 @@
  
  	BUG_ON(!irqs_disabled());
  
-@@ -72,6 +96,62 @@ static int xen_suspend(void *data)
- 	return 0;
- }
+@@ -111,7 +132,10 @@ static void do_suspend(void)
+ 		goto out_resume;
+ 	}
  
-+static void do_hvm_suspend(void)
-+{
-+	int err;
-+	int cancelled = 1;
-+
-+	shutting_down = SHUTDOWN_SUSPEND;
-+
-+	err = stop_machine_create();
-+	if (err) {
-+		printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
-+		goto out;
-+	}
-+
-+#ifdef CONFIG_PREEMPT
-+	/* If the kernel is preemptible, we need to freeze all the processes
-+	   to prevent them from being in the middle of a pagetable update
-+	   during suspend. */
-+	err = freeze_processes();
-+	if (err) {
-+		printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
-+		goto out_destroy_sm;
-+	}
-+#endif
-+
-+	printk(KERN_DEBUG "suspending xenstore... ");
-+	xenbus_suspend();
-+	printk(KERN_DEBUG "xenstore suspended\n");
-+	platform_pci_disable_irq();
-+	
-+	err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0));
-+	if (err) {
-+		printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
-+		cancelled = 1;
-+	}
-+
-+	platform_pci_enable_irq();
-+
-+	if (!cancelled) {
-+		xen_arch_resume();
-+		xenbus_resume();
-+	} else
-+		xs_suspend_cancel();
-+
-+	/* Make sure timer events get retriggered on all CPUs */
-+	clock_was_set();
-+
-+out_destroy_sm:
-+	stop_machine_destroy();
-+
-+out:
-+#ifdef CONFIG_PREEMPT
-+	thaw_processes();
-+#endif
-+	shutting_down = SHUTDOWN_INVALID;
-+}
-+
- static void do_suspend(void)
- {
- 	int err;
-@@ -184,7 +264,10 @@ static void shutdown_handler(struct xenbus_watch *watch,
- 		ctrl_alt_del();
- #ifdef CONFIG_PM_SLEEP
- 	} else if (strcmp(str, "suspend") == 0) {
--		do_suspend();
-+		if (xen_hvm_domain())
-+			do_hvm_suspend();
-+		else
-+			do_suspend();
- #endif
- 	} else {
- 		printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
-@@ -260,7 +343,19 @@ static int shutdown_event(struct notifier_block *notifier,
+-	err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
++	if (xen_hvm_domain())
++		err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0));
++	else
++		err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
+ 
+ 	dpm_resume_noirq(PMSG_RESUME);
+ 
+@@ -260,7 +284,19 @@ static int shutdown_event(struct notifier_block *notifier,
  	return NOTIFY_DONE;
  }
  
@@ -17040,9 +17715,11 @@
  {
  	static struct notifier_block xenstore_notifier = {
  		.notifier_call = shutdown_event
-@@ -270,4 +365,4 @@ static int __init setup_shutdown_event(void)
+@@ -269,5 +305,6 @@ static int __init setup_shutdown_event(void)
+ 
  	return 0;
  }
++EXPORT_SYMBOL_GPL(xen_setup_shutdown_event);
  
 -subsys_initcall(setup_shutdown_event);
 +subsys_initcall(__setup_shutdown_event);
@@ -17279,10 +17956,10 @@
 +xen-netback-y := netback.o xenbus.o interface.o
 diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
 new file mode 100644
-index 0000000..51f97c0
+index 0000000..b40ad72
 --- /dev/null
 +++ b/drivers/xen/netback/common.h
-@@ -0,0 +1,227 @@
+@@ -0,0 +1,329 @@
 +/******************************************************************************
 + * arch/xen/drivers/netif/backend/common.h
 + *
@@ -17343,6 +18020,7 @@
 +struct xen_netif {
 +	/* Unique identifier for this interface. */
 +	domid_t          domid;
++	int              group;
 +	unsigned int     handle;
 +
 +	u8               fe_dev_addr[6];
@@ -17360,15 +18038,22 @@
 +	struct vm_struct *tx_comms_area;
 +	struct vm_struct *rx_comms_area;
 +
-+	/* Set of features that can be turned on in dev->features. */
-+	int features;
++	/* Flags that must not be set in dev->features */
++	int features_disabled;
 +
-+	int smart_poll;
++	/* Frontend feature information. */
++	u8 can_sg:1;
++	u8 gso:1;
++	u8 gso_prefix:1;
++	u8 csum:1;
++	u8 smart_poll:1;
 +
 +	/* Internal feature information. */
-+	u8 can_queue:1;	/* can queue packets for receiver? */
++	u8 can_queue:1;	    /* can queue packets for receiver? */
 +
-+	/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
++	/* Allow netif_be_start_xmit() to peek ahead in the rx request
++	 * ring.  This is a prediction of what rx_req_cons will be once
++	 * all queued skbs are put on the ring. */
 +	RING_IDX rx_req_cons_peek;
 +
 +	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
@@ -17470,6 +18155,7 @@
 +
 +void netif_disconnect(struct xen_netif *netif);
 +
++void netif_set_features(struct xen_netif *netif);
 +struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
 +int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
 +	      unsigned long rx_ring_ref, unsigned int evtchn);
@@ -17506,16 +18192,109 @@
 +static inline int netbk_can_sg(struct net_device *dev)
 +{
 +	struct xen_netif *netif = netdev_priv(dev);
-+	return netif->features & NETIF_F_SG;
++	return netif->can_sg;
 +}
 +
++struct pending_tx_info {
++	struct xen_netif_tx_request req;
++	struct xen_netif *netif;
++};
++typedef unsigned int pending_ring_idx_t;
++
++struct netbk_rx_meta {
++	int id;
++	int size;
++	int gso_size;
++};
++
++struct netbk_tx_pending_inuse {
++	struct list_head list;
++	unsigned long alloc_time;
++};
++
++#define MAX_PENDING_REQS 256
++
++#define MAX_BUFFER_OFFSET PAGE_SIZE
++
++/* extra field used in struct page */
++union page_ext {
++	struct {
++#if BITS_PER_LONG < 64
++#define IDX_WIDTH   8
++#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
++		unsigned int group:GROUP_WIDTH;
++		unsigned int idx:IDX_WIDTH;
++#else
++		unsigned int group, idx;
++#endif
++	} e;
++	void *mapping;
++};
++
++struct xen_netbk {
++	union {
++		struct {
++			struct tasklet_struct net_tx_tasklet;
++			struct tasklet_struct net_rx_tasklet;
++		} tasklet;
++
++		struct {
++			wait_queue_head_t netbk_action_wq;
++			struct task_struct *task;
++		} kthread;
++	};
++
++	struct sk_buff_head rx_queue;
++	struct sk_buff_head tx_queue;
++
++	struct timer_list net_timer;
++	struct timer_list netbk_tx_pending_timer;
++
++	struct page **mmap_pages;
++
++	pending_ring_idx_t pending_prod;
++	pending_ring_idx_t pending_cons;
++	pending_ring_idx_t dealloc_prod;
++	pending_ring_idx_t dealloc_cons;
++
++	struct list_head pending_inuse_head;
++	struct list_head net_schedule_list;
++
++	/* Protect the net_schedule_list in netif. */
++	spinlock_t net_schedule_list_lock;
++
++	atomic_t netfront_count;
++
++	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
++	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
++	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
++	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
++
++	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
++	u16 pending_ring[MAX_PENDING_REQS];
++	u16 dealloc_ring[MAX_PENDING_REQS];
++
++	/*
++	 * Each head or fragment can be up to 4096 bytes. Given
++	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
++	 * head/fragment uses 2 copy operation.
++	 */
++	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
++	unsigned char rx_notify[NR_IRQS];
++	u16 notify_list[NET_RX_RING_SIZE];
++	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++};
++
++extern struct xen_netbk *xen_netbk;
++extern int xen_netbk_group_nr;
++
 +#endif /* __NETIF__BACKEND__COMMON_H__ */
 diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
 new file mode 100644
-index 0000000..086d939
+index 0000000..2e8508a
 --- /dev/null
 +++ b/drivers/xen/netback/interface.c
-@@ -0,0 +1,410 @@
+@@ -0,0 +1,475 @@
 +/******************************************************************************
 + * arch/xen/drivers/netif/backend/interface.c
 + *
@@ -17572,8 +18351,33 @@
 +static unsigned long netbk_queue_length = 32;
 +module_param_named(queue_length, netbk_queue_length, ulong, 0644);
 +
++static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
++			   struct xen_netif *netif)
++{
++	int i;
++	int min_netfront_count;
++	int min_group = 0;
++	min_netfront_count = atomic_read(&netbk[0].netfront_count);
++	for (i = 0; i < group_nr; i++) {
++		int netfront_count = atomic_read(&netbk[i].netfront_count);
++		if (netfront_count < min_netfront_count) {
++			min_group = i;
++			min_netfront_count = netfront_count;
++		}
++	}
++
++	netif->group = min_group;
++	atomic_inc(&netbk[netif->group].netfront_count);
++}
++
++static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
++{
++	atomic_dec(&netbk[netif->group].netfront_count);
++}
++
 +static void __netif_up(struct xen_netif *netif)
 +{
++	netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
 +	enable_irq(netif->irq);
 +	netif_schedule_work(netif);
 +}
@@ -17582,6 +18386,7 @@
 +{
 +	disable_irq(netif->irq);
 +	netif_deschedule_work(netif);
++	netbk_remove_netif(xen_netbk, netif);
 +}
 +
 +static int net_open(struct net_device *dev)
@@ -17613,31 +18418,69 @@
 +	return 0;
 +}
 +
-+static int netbk_set_sg(struct net_device *dev, u32 data)
++void netif_set_features(struct xen_netif *netif)
 +{
-+	if (data) {
-+		struct xen_netif *netif = netdev_priv(dev);
++	struct net_device *dev = netif->dev;
++	int features = dev->features;
 +
-+		if (!(netif->features & NETIF_F_SG))
++	if (netif->can_sg)
++		features |= NETIF_F_SG;
++	if (netif->gso || netif->gso_prefix)
++		features |= NETIF_F_TSO;
++	if (netif->csum)
++		features |= NETIF_F_IP_CSUM;
++
++	features &= ~(netif->features_disabled);
++
++	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
++		dev->mtu = ETH_DATA_LEN;
++
++	dev->features = features;
++}
++
++static int netbk_set_tx_csum(struct net_device *dev, u32 data)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	if (data) {
++		if (!netif->csum)
 +			return -ENOSYS;
++		netif->features_disabled &= ~NETIF_F_IP_CSUM;
++	} else {
++		netif->features_disabled |= NETIF_F_IP_CSUM;
 +	}
 +
-+	if (dev->mtu > ETH_DATA_LEN)
-+		dev->mtu = ETH_DATA_LEN;
++	netif_set_features(netif);
++	return 0;
++}
++
++static int netbk_set_sg(struct net_device *dev, u32 data)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	if (data) {
++		if (!netif->can_sg)
++			return -ENOSYS;
++		netif->features_disabled &= ~NETIF_F_SG;
++	} else {
++		netif->features_disabled |= NETIF_F_SG;
++	}
 +
-+	return ethtool_op_set_sg(dev, data);
++	netif_set_features(netif);
++	return 0;
 +}
 +
 +static int netbk_set_tso(struct net_device *dev, u32 data)
 +{
++	struct xen_netif *netif = netdev_priv(dev);
 +	if (data) {
-+		struct xen_netif *netif = netdev_priv(dev);
-+
-+		if (!(netif->features & NETIF_F_TSO))
++		if (!netif->gso && !netif->gso_prefix)
 +			return -ENOSYS;
++		netif->features_disabled &= ~NETIF_F_TSO;
++	} else {
++		netif->features_disabled |= NETIF_F_TSO;
 +	}
 +
-+	return ethtool_op_set_tso(dev, data);
++	netif_set_features(netif);
++	return 0;
 +}
 +
 +static void netbk_get_drvinfo(struct net_device *dev,
@@ -17692,7 +18535,7 @@
 +	.get_drvinfo = netbk_get_drvinfo,
 +
 +	.get_tx_csum = ethtool_op_get_tx_csum,
-+	.set_tx_csum = ethtool_op_set_tx_csum,
++	.set_tx_csum = netbk_set_tx_csum,
 +	.get_sg = ethtool_op_get_sg,
 +	.set_sg = netbk_set_sg,
 +	.get_tso = ethtool_op_get_tso,
@@ -17732,8 +18575,10 @@
 +	netif = netdev_priv(dev);
 +	memset(netif, 0, sizeof(*netif));
 +	netif->domid  = domid;
++	netif->group  = -1;
 +	netif->handle = handle;
-+	netif->features = NETIF_F_SG;
++	netif->can_sg = 1;
++	netif->csum = 1;
 +	atomic_set(&netif->refcnt, 1);
 +	init_waitqueue_head(&netif->waiting_to_free);
 +	netif->dev = dev;
@@ -17750,8 +18595,7 @@
 +	init_timer(&netif->tx_queue_timeout);
 +
 +	dev->netdev_ops	= &netback_ops;
-+	dev->features   = NETIF_F_IP_CSUM|NETIF_F_SG;
-+
++	netif_set_features(netif);
 +	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
 +
 +	dev->tx_queue_len = netbk_queue_length;
@@ -17928,10 +18772,10 @@
 +}
 diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
 new file mode 100644
-index 0000000..5dc4f98
+index 0000000..4121062
 --- /dev/null
 +++ b/drivers/xen/netback/netback.c
-@@ -0,0 +1,1609 @@
+@@ -0,0 +1,1855 @@
 +/******************************************************************************
 + * drivers/xen/netback/netback.c
 + *
@@ -17972,6 +18816,7 @@
 +
 +#include <linux/tcp.h>
 +#include <linux/udp.h>
++#include <linux/kthread.h>
 +
 +#include <xen/balloon.h>
 +#include <xen/events.h>
@@ -17982,18 +18827,10 @@
 +
 +/*define NETBE_DEBUG_INTERRUPT*/
 +
-+struct netbk_rx_meta {
-+	skb_frag_t frag;
-+	int id;
-+};
-+
-+struct netbk_tx_pending_inuse {
-+	struct list_head list;
-+	unsigned long alloc_time;
-+};
++struct xen_netbk *xen_netbk;
++int xen_netbk_group_nr;
 +
-+
-+static void netif_idx_release(u16 pending_idx);
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
 +static void make_tx_response(struct xen_netif *netif,
 +			     struct xen_netif_tx_request *txp,
 +			     s8       st);
@@ -18004,47 +18841,44 @@
 +					     u16      size,
 +					     u16      flags);
 +
-+static void net_tx_action(unsigned long unused);
-+static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
-+
-+static void net_rx_action(unsigned long unused);
-+static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
-+
-+static struct timer_list net_timer;
-+static struct timer_list netbk_tx_pending_timer;
-+
-+#define MAX_PENDING_REQS 256
++static void net_tx_action(unsigned long data);
 +
-+static struct sk_buff_head rx_queue;
++static void net_rx_action(unsigned long data);
 +
-+static struct page **mmap_pages;
-+static inline unsigned long idx_to_pfn(unsigned int idx)
++static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
++				       unsigned int idx)
 +{
-+	return page_to_pfn(mmap_pages[idx]);
++	return page_to_pfn(netbk->mmap_pages[idx]);
 +}
 +
-+static inline unsigned long idx_to_kaddr(unsigned int idx)
++static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
++					 unsigned int idx)
 +{
-+	return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
++	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
 +}
 +
 +/* extra field used in struct page */
-+static inline void netif_set_page_index(struct page *pg, unsigned int index)
++static inline void netif_set_page_ext(struct page *pg, unsigned int group,
++		unsigned int idx)
 +{
-+	*(unsigned long *)&pg->mapping = index + 1;
++	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
++
++	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
++	pg->mapping = ext.mapping;
 +}
 +
-+static inline int netif_page_index(struct page *pg)
++static inline unsigned int netif_page_group(const struct page *pg)
 +{
-+	unsigned long idx = (unsigned long)pg->mapping - 1;
++	union page_ext ext = { .mapping = pg->mapping };
 +
-+	if (!PageForeign(pg))
-+		return -1;
++	return ext.e.group - 1;
++}
 +
-+	if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg))
-+		return -1;
++static inline unsigned int netif_page_index(const struct page *pg)
++{
++	union page_ext ext = { .mapping = pg->mapping };
 +
-+	return idx;
++	return ext.e.idx;
 +}
 +
 +/*
@@ -18055,46 +18889,17 @@
 + */
 +#define PKT_PROT_LEN 72
 +
-+static struct pending_tx_info {
-+	struct xen_netif_tx_request req;
-+	struct xen_netif *netif;
-+} pending_tx_info[MAX_PENDING_REQS];
-+static u16 pending_ring[MAX_PENDING_REQS];
-+typedef unsigned int pending_ring_idx_t;
-+
 +static inline pending_ring_idx_t pending_index(unsigned i)
 +{
 +	return i & (MAX_PENDING_REQS-1);
 +}
 +
-+static pending_ring_idx_t pending_prod, pending_cons;
-+
-+static inline pending_ring_idx_t nr_pending_reqs(void)
++static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
 +{
-+	return MAX_PENDING_REQS - pending_prod + pending_cons;
++	return MAX_PENDING_REQS -
++		netbk->pending_prod + netbk->pending_cons;
 +}
 +
-+/* Freed TX SKBs get batched on this ring before return to pending_ring. */
-+static u16 dealloc_ring[MAX_PENDING_REQS];
-+static pending_ring_idx_t dealloc_prod, dealloc_cons;
-+
-+/* Doubly-linked list of in-use pending entries. */
-+static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
-+static LIST_HEAD(pending_inuse_head);
-+
-+static struct sk_buff_head tx_queue;
-+
-+static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-+static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
-+static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
-+
-+static LIST_HEAD(net_schedule_list);
-+static DEFINE_SPINLOCK(net_schedule_list_lock);
-+
-+#define MAX_MFN_ALLOC 64
-+static unsigned long mfn_list[MAX_MFN_ALLOC];
-+static unsigned int alloc_index = 0;
-+
 +/* Setting this allows the safe use of this driver without netloop. */
 +static int MODPARM_copy_skb = 1;
 +module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
@@ -18102,18 +18907,31 @@
 +
 +int netbk_copy_skb_mode;
 +
-+static inline unsigned long alloc_mfn(void)
++static int MODPARM_netback_kthread;
++module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
++MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
++
++/*
++ * Netback bottom half handler.
++ * dir indicates the data direction.
++ * rx: 1, tx: 0.
++ */
++static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
 +{
-+	BUG_ON(alloc_index == 0);
-+	return mfn_list[--alloc_index];
++	if (MODPARM_netback_kthread)
++		wake_up(&netbk->kthread.netbk_action_wq);
++	else if (dir)
++		tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
++	else
++		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
 +}
 +
-+static inline void maybe_schedule_tx_action(void)
++static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
 +{
 +	smp_mb();
-+	if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
-+	    !list_empty(&net_schedule_list))
-+		tasklet_schedule(&net_tx_tasklet);
++	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
++	    !list_empty(&netbk->net_schedule_list))
++		xen_netbk_bh_handler(netbk, 0);
 +}
 +
 +static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
@@ -18178,7 +18996,11 @@
 +		len -= copy;
 +	}
 +
++#ifdef NET_SKBUFF_DATA_USES_OFFSET
++	offset = 0;
++#else
 +	offset = nskb->data - skb->data;
++#endif
 +
 +	nskb->transport_header = skb->transport_header + offset;
 +	nskb->network_header = skb->network_header + offset;
@@ -18194,7 +19016,7 @@
 +
 +static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
 +{
-+	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
++	if (netif->can_sg || netif->gso || netif->gso_prefix)
 +		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
 +	return 1; /* all in one */
 +}
@@ -18215,12 +19037,60 @@
 +		netif_wake_queue(netif->dev);
 +}
 +
++/* Figure out how many ring slots we're going to need to send @skb to
++   the guest. */
++static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
++{
++	unsigned count;
++	unsigned copy_off;
++	unsigned i;
++
++	copy_off = 0;
++	count = 1;
++
++	BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
++
++	copy_off = skb_headlen(skb);
++
++	if (skb_shinfo(skb)->gso_size)
++		count++;
++
++	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
++		unsigned long size = skb_shinfo(skb)->frags[i].size;
++		unsigned long bytes;
++		while (size > 0) {
++			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
++
++			/* These checks are the same as in netbk_gop_frag_copy */
++			if (copy_off == MAX_BUFFER_OFFSET
++			    || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) {
++				count++;
++				copy_off = 0;
++			}
++
++			bytes = size;
++			if (copy_off + bytes > MAX_BUFFER_OFFSET)
++				bytes = MAX_BUFFER_OFFSET - copy_off;
++
++			copy_off += bytes;
++			size -= bytes;
++		}
++	}
++	return count;
++}
++
 +int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 +{
 +	struct xen_netif *netif = netdev_priv(dev);
++	struct xen_netbk *netbk;
 +
 +	BUG_ON(skb->dev != dev);
 +
++	if (netif->group == -1)
++		goto drop;
++
++	netbk = &xen_netbk[netif->group];
++
 +	/* Drop the packet if the target domain has no receive buffers. */
 +	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
 +		goto drop;
@@ -18240,8 +19110,9 @@
 +		skb = nskb;
 +	}
 +
-+	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
-+				   !!skb_shinfo(skb)->gso_size;
++	/* Reserve ring slots for the worst-case number of
++	 * fragments. */
++	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
 +	netif_get(netif);
 +
 +	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
@@ -18262,9 +19133,9 @@
 +			mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
 +		}
 +	}
++	skb_queue_tail(&netbk->rx_queue, skb);
 +
-+	skb_queue_tail(&rx_queue, skb);
-+	tasklet_schedule(&net_rx_tasklet);
++	xen_netbk_bh_handler(netbk, 1);
 +
 +	return 0;
 +
@@ -18275,112 +19146,187 @@
 +}
 +
 +struct netrx_pending_operations {
-+	unsigned trans_prod, trans_cons;
-+	unsigned mmu_prod, mmu_mcl;
-+	unsigned mcl_prod, mcl_cons;
 +	unsigned copy_prod, copy_cons;
 +	unsigned meta_prod, meta_cons;
-+	struct mmu_update *mmu;
-+	struct gnttab_transfer *trans;
 +	struct gnttab_copy *copy;
-+	struct multicall_entry *mcl;
 +	struct netbk_rx_meta *meta;
++	int copy_off;
++	grant_ref_t copy_gref;
 +};
 +
 +/* Set up the grant operations for this fragment.  If it's a flipping
 +   interface, we also set up the unmap request from here. */
-+static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
-+			  int i, struct netrx_pending_operations *npo,
-+			  struct page *page, unsigned long size,
-+			  unsigned long offset)
++
++static void netbk_gop_frag_copy(struct xen_netif *netif,
++				struct netrx_pending_operations *npo,
++				struct page *page, unsigned long size,
++				unsigned long offset, int head)
 +{
 +	struct gnttab_copy *copy_gop;
-+	struct xen_netif_rx_request *req;
-+	unsigned long old_mfn;
++	struct netbk_rx_meta *meta;
++	int group = netif_page_group(page);
 +	int idx = netif_page_index(page);
++	unsigned long bytes;
 +
-+	old_mfn = virt_to_mfn(page_address(page));
++	/* Data must not cross a page boundary. */
++	BUG_ON(size + offset > PAGE_SIZE);
 +
-+	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
++	meta = npo->meta + npo->meta_prod - 1;
++
++	while (size > 0) {
++		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
++
++		/*
++		 * Move to a new receive buffer if:
++		 *
++		 * simple case: we have completely filled the current buffer.
++		 *
++		 * complex case: the current frag would overflow
++		 * the current buffer but only if:
++		 *     (i)   this frag would fit completely in the next buffer
++		 * and (ii)  there is already some data in the current buffer
++		 * and (iii) this is not the head buffer.
++		 *
++		 * Where:
++		 * - (i) stops us splitting a frag into two copies
++		 *   unless the frag is too large for a single buffer.
++		 * - (ii) stops us from leaving a buffer pointlessly empty.
++		 * - (iii) stops us leaving the first buffer
++		 *   empty. Strictly speaking this is already covered
++		 *   by (ii) but is explicitly checked because
++		 *   netfront relies on the first buffer being
++		 *   non-empty and can crash otherwise.
++		 *
++		 * This means we will effectively linearise small
++		 * frags but do not needlessly split large buffers
++		 * into multiple copies tend to give large frags their
++		 * own buffers as before.
++		 */
++		if (npo->copy_off == MAX_BUFFER_OFFSET
++		    || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) {
++			struct xen_netif_rx_request *req;
++
++			BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
++			/* Overflowed this request, go to the next one */
++			req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++			meta = npo->meta + npo->meta_prod++;
++			meta->gso_size = 0;
++			meta->size = 0;
++			meta->id = req->id;
++			npo->copy_off = 0;
++			npo->copy_gref = req->gref;
++		}
++
++		bytes = size;
++		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
++			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
++
++		copy_gop = npo->copy + npo->copy_prod++;
++		copy_gop->flags = GNTCOPY_dest_gref;
++		if (PageForeign(page)) {
++			struct xen_netbk *netbk = &xen_netbk[group];
++			struct pending_tx_info *src_pend;
++
++			src_pend = &netbk->pending_tx_info[idx];
++
++			copy_gop->source.domid = src_pend->netif->domid;
++			copy_gop->source.u.ref = src_pend->req.gref;
++			copy_gop->flags |= GNTCOPY_source_gref;
++		} else {
++			copy_gop->source.domid = DOMID_SELF;
++			copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
++		}
++		copy_gop->source.offset = offset;
++		copy_gop->dest.domid = netif->domid;
 +
-+	copy_gop = npo->copy + npo->copy_prod++;
-+	copy_gop->flags = GNTCOPY_dest_gref;
-+	if (idx > -1) {
-+		struct pending_tx_info *src_pend = &pending_tx_info[idx];
-+		copy_gop->source.domid = src_pend->netif->domid;
-+		copy_gop->source.u.ref = src_pend->req.gref;
-+		copy_gop->flags |= GNTCOPY_source_gref;
-+	} else {
-+		copy_gop->source.domid = DOMID_SELF;
-+		copy_gop->source.u.gmfn = old_mfn;
++		copy_gop->dest.offset = npo->copy_off;
++		copy_gop->dest.u.ref = npo->copy_gref;
++		copy_gop->len = bytes;
++
++		npo->copy_off += bytes;
++		meta->size += bytes;
++
++		offset += bytes;
++		size -= bytes;
++		head = 0; /* Must be something in this buffer now */
 +	}
-+	copy_gop->source.offset = offset;
-+	copy_gop->dest.domid = netif->domid;
-+	copy_gop->dest.offset = 0;
-+	copy_gop->dest.u.ref = req->gref;
-+	copy_gop->len = size;
-+
-+	return req->id;
 +}
 +
-+static void netbk_gop_skb(struct sk_buff *skb,
-+			  struct netrx_pending_operations *npo)
++/* Prepare an SKB to be transmitted to the frontend.  This is
++   responsible for allocating grant operations, meta structures, etc.
++   It returns the number of meta structures consumed.  The number of
++   ring slots used is always equal to the number of meta slots used
++   plus the number of GSO descriptors used.  Currently, we use either
++   zero GSO descriptors (for non-GSO packets) or one descriptor (for
++   frontend-side LRO). */
++static int netbk_gop_skb(struct sk_buff *skb,
++			 struct netrx_pending_operations *npo)
 +{
 +	struct xen_netif *netif = netdev_priv(skb->dev);
 +	int nr_frags = skb_shinfo(skb)->nr_frags;
 +	int i;
-+	int extra;
-+	struct netbk_rx_meta *head_meta, *meta;
++	struct xen_netif_rx_request *req;
++	struct netbk_rx_meta *meta;
++	int old_meta_prod;
 +
-+	head_meta = npo->meta + npo->meta_prod++;
-+	head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
-+	head_meta->frag.size = skb_shinfo(skb)->gso_size;
-+	extra = !!head_meta->frag.size + 1;
++	old_meta_prod = npo->meta_prod;
 +
-+	for (i = 0; i < nr_frags; i++) {
++	/* Set up a GSO prefix descriptor, if necessary */
++	if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
++		req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
 +		meta = npo->meta + npo->meta_prod++;
-+		meta->frag = skb_shinfo(skb)->frags[i];
-+		meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
-+					  meta->frag.page,
-+					  meta->frag.size,
-+					  meta->frag.page_offset);
++		meta->gso_size = skb_shinfo(skb)->gso_size;
++		meta->size = 0;
++		meta->id = req->id;
 +	}
 +
-+	/*
-+	 * This must occur at the end to ensure that we don't trash skb_shinfo
-+	 * until we're done. We know that the head doesn't cross a page
-+	 * boundary because such packets get copied in netif_be_start_xmit.
-+	 */
-+	head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
-+				       virt_to_page(skb->data),
-+				       skb_headlen(skb),
-+				       offset_in_page(skb->data));
++	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++	meta = npo->meta + npo->meta_prod++;
 +
-+	netif->rx.req_cons += nr_frags + extra;
-+}
++	if (!netif->gso_prefix)
++		meta->gso_size = skb_shinfo(skb)->gso_size;
++	else
++		meta->gso_size = 0;
 +
-+static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
-+{
-+	int i;
++	meta->size = 0;
++	meta->id = req->id;
++	npo->copy_off = 0;
++	npo->copy_gref = req->gref;
++
++	netbk_gop_frag_copy(netif,
++			    npo, virt_to_page(skb->data),
++			    skb_headlen(skb),
++			    offset_in_page(skb->data), 1);
++
++	/* Leave a gap for the GSO descriptor. */
++	if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
++		netif->rx.req_cons++;
++
++	for (i = 0; i < nr_frags; i++) {
++		netbk_gop_frag_copy(netif, npo,
++				    skb_shinfo(skb)->frags[i].page,
++				    skb_shinfo(skb)->frags[i].size,
++				    skb_shinfo(skb)->frags[i].page_offset,
++				    0);
++	}
 +
-+	for (i = 0; i < nr_frags; i++)
-+		put_page(meta[i].frag.page);
++	return npo->meta_prod - old_meta_prod;
 +}
 +
 +/* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
 +   used to set up the operations on the top of
 +   netrx_pending_operations, which have since been done.  Check that
 +   they didn't give any errors and advance over them. */
-+static int netbk_check_gop(int nr_frags, domid_t domid,
++static int netbk_check_gop(int nr_meta_slots, domid_t domid,
 +			   struct netrx_pending_operations *npo)
 +{
 +	struct gnttab_copy     *copy_op;
 +	int status = NETIF_RSP_OKAY;
 +	int i;
 +
-+	for (i = 0; i <= nr_frags; i++) {
-+			copy_op = npo->copy + npo->copy_cons++;
-+			if (copy_op->status != GNTST_okay) {
++	for (i = 0; i < nr_meta_slots; i++) {
++		copy_op = npo->copy + npo->copy_cons++;
++		if (copy_op->status != GNTST_okay) {
 +				DPRINTK("Bad status %d from copy to DOM%d.\n",
 +					copy_op->status, domid);
 +				status = NETIF_RSP_ERROR;
@@ -18391,28 +19337,36 @@
 +}
 +
 +static void netbk_add_frag_responses(struct xen_netif *netif, int status,
-+				     struct netbk_rx_meta *meta, int nr_frags)
++				     struct netbk_rx_meta *meta,
++				     int nr_meta_slots)
 +{
 +	int i;
 +	unsigned long offset;
 +
-+	for (i = 0; i < nr_frags; i++) {
-+		int id = meta[i].id;
-+		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
-+		
++	for (i = 0; i < nr_meta_slots; i++) {
++		int flags;
++		if (i == nr_meta_slots - 1)
++			flags = 0;
++		else
++			flags = NETRXF_more_data;
++
 +		offset = 0;
-+		make_rx_response(netif, id, status, offset,
-+				 meta[i].frag.size, flags);
++		make_rx_response(netif, meta[i].id, status, offset,
++				 meta[i].size, flags);
 +	}
 +}
 +
-+static void net_rx_action(unsigned long unused)
++struct skb_cb_overlay {
++	int meta_slots_used;
++};
++
++static void net_rx_action(unsigned long data)
 +{
 +	struct xen_netif *netif = NULL;
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
 +	s8 status;
-+	u16 id, irq, flags;
++	u16 irq, flags;
 +	struct xen_netif_rx_response *resp;
-+	struct multicall_entry *mcl;
 +	struct sk_buff_head rxq;
 +	struct sk_buff *skb;
 +	int notify_nr = 0;
@@ -18420,35 +19374,23 @@
 +	int nr_frags;
 +	int count;
 +	unsigned long offset;
-+
-+	/*
-+	 * Putting hundreds of bytes on the stack is considered rude.
-+	 * Static works because a tasklet can only be on one CPU at any time.
-+	 */
-+	static struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
-+	static struct mmu_update rx_mmu[NET_RX_RING_SIZE];
-+	static struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
-+	static struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
-+	static unsigned char rx_notify[NR_IRQS];
-+	static u16 notify_list[NET_RX_RING_SIZE];
-+	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++	struct skb_cb_overlay *sco;
 +
 +	struct netrx_pending_operations npo = {
-+		mmu: rx_mmu,
-+		trans: grant_trans_op,
-+		copy: grant_copy_op,
-+		mcl: rx_mcl,
-+		meta: meta};
++		.copy  = netbk->grant_copy_op,
++		.meta  = netbk->meta,
++	};
 +
 +	skb_queue_head_init(&rxq);
 +
 +	count = 0;
 +
-+	while ((skb = skb_dequeue(&rx_queue)) != NULL) {
++	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
++		netif = netdev_priv(skb->dev);
 +		nr_frags = skb_shinfo(skb)->nr_frags;
-+		*(int *)skb->cb = nr_frags;
 +
-+		netbk_gop_skb(skb, &npo);
++		sco = (struct skb_cb_overlay *)skb->cb;
++		sco->meta_slots_used = netbk_gop_skb(skb, &npo);
 +
 +		count += nr_frags + 1;
 +
@@ -18459,65 +19401,46 @@
 +			break;
 +	}
 +
-+	BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
-+
-+	npo.mmu_mcl = npo.mcl_prod;
-+	if (npo.mcl_prod) {
-+		BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-+		BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
-+		mcl = npo.mcl + npo.mcl_prod++;
-+
-+		BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
-+		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-+
-+		mcl->op = __HYPERVISOR_mmu_update;
-+		mcl->args[0] = (unsigned long)rx_mmu;
-+		mcl->args[1] = npo.mmu_prod;
-+		mcl->args[2] = 0;
-+		mcl->args[3] = DOMID_SELF;
-+	}
-+
-+	if (npo.trans_prod) {
-+		BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
-+		mcl = npo.mcl + npo.mcl_prod++;
-+		mcl->op = __HYPERVISOR_grant_table_op;
-+		mcl->args[0] = GNTTABOP_transfer;
-+		mcl->args[1] = (unsigned long)grant_trans_op;
-+		mcl->args[2] = npo.trans_prod;
-+	}
-+
-+	if (npo.copy_prod) {
-+		BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
-+		mcl = npo.mcl + npo.mcl_prod++;
-+		mcl->op = __HYPERVISOR_grant_table_op;
-+		mcl->args[0] = GNTTABOP_copy;
-+		mcl->args[1] = (unsigned long)grant_copy_op;
-+		mcl->args[2] = npo.copy_prod;
-+	}
++	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
 +
-+	/* Nothing to do? */
-+	if (!npo.mcl_prod)
++	if (!npo.copy_prod)
 +		return;
 +
-+	BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
-+
-+	ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
++	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
++	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
++					npo.copy_prod);
 +	BUG_ON(ret != 0);
-+	/* The mmu_machphys_update() must not fail. */
-+	BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
 +
 +	while ((skb = __skb_dequeue(&rxq)) != NULL) {
-+		nr_frags = *(int *)skb->cb;
++		sco = (struct skb_cb_overlay *)skb->cb;
 +
 +		netif = netdev_priv(skb->dev);
 +
++		if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
++			resp = RING_GET_RESPONSE(&netif->rx,
++						netif->rx.rsp_prod_pvt++);
++
++			resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
++
++			resp->offset = netbk->meta[npo.meta_cons].gso_size;
++			resp->id = netbk->meta[npo.meta_cons].id;
++			resp->status = sco->meta_slots_used;
++
++			npo.meta_cons++;
++			sco->meta_slots_used--;
++		}
++
++
 +		netif->stats.tx_bytes += skb->len;
 +		netif->stats.tx_packets++;
 +
-+		status = netbk_check_gop(nr_frags, netif->domid, &npo);
++		status = netbk_check_gop(sco->meta_slots_used,
++					 netif->domid, &npo);
 +
-+		id = meta[npo.meta_cons].id;
-+		flags = nr_frags ? NETRXF_more_data : 0;
++		if (sco->meta_slots_used == 1)
++			flags = 0;
++		else
++			flags = NETRXF_more_data;
 +
 +		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
 +			flags |= NETRXF_csum_blank | NETRXF_data_validated;
@@ -18526,10 +19449,12 @@
 +			flags |= NETRXF_data_validated;
 +
 +		offset = 0;
-+		resp = make_rx_response(netif, id, status, offset,
-+					skb_headlen(skb), flags);
++		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
++					status, offset,
++					netbk->meta[npo.meta_cons].size,
++					flags);
 +
-+		if (meta[npo.meta_cons].frag.size) {
++		if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
 +			struct xen_netif_extra_info *gso =
 +				(struct xen_netif_extra_info *)
 +				RING_GET_RESPONSE(&netif->rx,
@@ -18537,7 +19462,7 @@
 +
 +			resp->flags |= NETRXF_extra_info;
 +
-+			gso->u.gso.size = meta[npo.meta_cons].frag.size;
++			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
 +			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 +			gso->u.gso.pad = 0;
 +			gso->u.gso.features = 0;
@@ -18546,16 +19471,18 @@
 +			gso->flags = 0;
 +		}
 +
-+		netbk_add_frag_responses(netif, status,
-+					 meta + npo.meta_cons + 1,
-+					 nr_frags);
++		if (sco->meta_slots_used > 1) {
++			netbk_add_frag_responses(netif, status,
++						 netbk->meta + npo.meta_cons + 1,
++						 sco->meta_slots_used - 1);
++		}
 +
 +		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
 +		irq = netif->irq;
-+		if (ret && !rx_notify[irq] &&
++		if (ret && !netbk->rx_notify[irq] &&
 +				(netif->smart_poll != 1)) {
-+			rx_notify[irq] = 1;
-+			notify_list[notify_nr++] = irq;
++			netbk->rx_notify[irq] = 1;
++			netbk->notify_list[notify_nr++] = irq;
 +		}
 +
 +		if (netif_queue_stopped(netif->dev) &&
@@ -18567,37 +19494,39 @@
 +		 * netfront_smartpoll_active indicates whether
 +		 * netfront timer is active.
 +		 */
-+		if ((netif->smart_poll == 1)) {
-+			if (!(netif->rx.sring->netfront_smartpoll_active)) {
-+				notify_remote_via_irq(irq);
-+				netif->rx.sring->netfront_smartpoll_active = 1;
-+			}
++		if ((netif->smart_poll == 1) &&
++		    !(netif->rx.sring->private.netif.smartpoll_active)) {
++			notify_remote_via_irq(irq);
++			netif->rx.sring->private.netif.smartpoll_active = 1;
 +		}
 +
 +		netif_put(netif);
++		npo.meta_cons += sco->meta_slots_used;
 +		dev_kfree_skb(skb);
-+		npo.meta_cons += nr_frags + 1;
 +	}
 +
 +	while (notify_nr != 0) {
-+		irq = notify_list[--notify_nr];
-+		rx_notify[irq] = 0;
++		irq = netbk->notify_list[--notify_nr];
++		netbk->rx_notify[irq] = 0;
 +		notify_remote_via_irq(irq);
 +	}
 +
 +	/* More work to do? */
-+	if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
-+		tasklet_schedule(&net_rx_tasklet);
++	if (!skb_queue_empty(&netbk->rx_queue) &&
++			!timer_pending(&netbk->net_timer))
++		xen_netbk_bh_handler(netbk, 1);
 +}
 +
-+static void net_alarm(unsigned long unused)
++static void net_alarm(unsigned long data)
 +{
-+	tasklet_schedule(&net_rx_tasklet);
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
++	xen_netbk_bh_handler(netbk, 1);
 +}
 +
-+static void netbk_tx_pending_timeout(unsigned long unused)
++static void netbk_tx_pending_timeout(unsigned long data)
 +{
-+	tasklet_schedule(&net_tx_tasklet);
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
++	xen_netbk_bh_handler(netbk, 0);
 +}
 +
 +struct net_device_stats *netif_be_get_stats(struct net_device *dev)
@@ -18613,37 +19542,40 @@
 +
 +static void remove_from_net_schedule_list(struct xen_netif *netif)
 +{
-+	spin_lock_irq(&net_schedule_list_lock);
++	struct xen_netbk *netbk = &xen_netbk[netif->group];
++	spin_lock_irq(&netbk->net_schedule_list_lock);
 +	if (likely(__on_net_schedule_list(netif))) {
 +		list_del_init(&netif->list);
 +		netif_put(netif);
 +	}
-+	spin_unlock_irq(&net_schedule_list_lock);
++	spin_unlock_irq(&netbk->net_schedule_list_lock);
 +}
 +
 +static void add_to_net_schedule_list_tail(struct xen_netif *netif)
 +{
++	struct xen_netbk *netbk = &xen_netbk[netif->group];
 +	if (__on_net_schedule_list(netif))
 +		return;
 +
-+	spin_lock_irq(&net_schedule_list_lock);
++	spin_lock_irq(&netbk->net_schedule_list_lock);
 +	if (!__on_net_schedule_list(netif) &&
 +	    likely(netif_schedulable(netif))) {
-+		list_add_tail(&netif->list, &net_schedule_list);
++		list_add_tail(&netif->list, &netbk->net_schedule_list);
 +		netif_get(netif);
 +	}
-+	spin_unlock_irq(&net_schedule_list_lock);
++	spin_unlock_irq(&netbk->net_schedule_list_lock);
 +}
 +
 +void netif_schedule_work(struct xen_netif *netif)
 +{
++	struct xen_netbk *netbk = &xen_netbk[netif->group];
 +	int more_to_do;
 +
 +	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
 +
 +	if (more_to_do) {
 +		add_to_net_schedule_list_tail(netif);
-+		maybe_schedule_tx_action();
++		maybe_schedule_tx_action(netbk);
 +	}
 +}
 +
@@ -18680,13 +19612,15 @@
 +	netif_schedule_work(netif);
 +}
 +
-+static inline int copy_pending_req(pending_ring_idx_t pending_idx)
++static inline int copy_pending_req(struct xen_netbk *netbk,
++				   pending_ring_idx_t pending_idx)
 +{
-+	return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
-+				      &mmap_pages[pending_idx]);
++	return gnttab_copy_grant_page(
++			netbk->grant_tx_handle[pending_idx],
++			&netbk->mmap_pages[pending_idx]);
 +}
 +
-+inline static void net_tx_action_dealloc(void)
++static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
 +{
 +	struct netbk_tx_pending_inuse *inuse, *n;
 +	struct gnttab_unmap_grant_ref *gop;
@@ -18696,49 +19630,56 @@
 +	int ret;
 +	LIST_HEAD(list);
 +
-+	dc = dealloc_cons;
-+	gop = tx_unmap_ops;
++	dc = netbk->dealloc_cons;
++	gop = netbk->tx_unmap_ops;
 +
 +	/*
 +	 * Free up any grants we have finished using
 +	 */
 +	do {
-+		dp = dealloc_prod;
++		dp = netbk->dealloc_prod;
 +
 +		/* Ensure we see all indices enqueued by netif_idx_release(). */
 +		smp_rmb();
 +
 +		while (dc != dp) {
 +			unsigned long pfn;
++			struct netbk_tx_pending_inuse *pending_inuse =
++					netbk->pending_inuse;
 +
-+			pending_idx = dealloc_ring[pending_index(dc++)];
++			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
 +			list_move_tail(&pending_inuse[pending_idx].list, &list);
 +
-+			pfn = idx_to_pfn(pending_idx);
++			pfn = idx_to_pfn(netbk, pending_idx);
 +			/* Already unmapped? */
 +			if (!phys_to_machine_mapping_valid(pfn))
 +				continue;
 +
-+			gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
-+					    GNTMAP_host_map,
-+					    grant_tx_handle[pending_idx]);
++			gnttab_set_unmap_op(gop,
++					idx_to_kaddr(netbk, pending_idx),
++					GNTMAP_host_map,
++					netbk->grant_tx_handle[pending_idx]);
 +			gop++;
 +		}
 +
 +		if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
-+		    list_empty(&pending_inuse_head))
++		    list_empty(&netbk->pending_inuse_head))
 +			break;
 +
 +		/* Copy any entries that have been pending for too long. */
-+		list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
++		list_for_each_entry_safe(inuse, n,
++				&netbk->pending_inuse_head, list) {
++			struct pending_tx_info *pending_tx_info;
++			pending_tx_info = netbk->pending_tx_info;
++
 +			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
 +				break;
 +
-+			pending_idx = inuse - pending_inuse;
++			pending_idx = inuse - netbk->pending_inuse;
 +
 +			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
 +
-+			switch (copy_pending_req(pending_idx)) {
++			switch (copy_pending_req(netbk, pending_idx)) {
 +			case 0:
 +				list_move_tail(&inuse->list, &list);
 +				continue;
@@ -18751,16 +19692,21 @@
 +
 +			break;
 +		}
-+	} while (dp != dealloc_prod);
++	} while (dp != netbk->dealloc_prod);
 +
-+	dealloc_cons = dc;
++	netbk->dealloc_cons = dc;
 +
 +	ret = HYPERVISOR_grant_table_op(
-+		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
++		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
++		gop - netbk->tx_unmap_ops);
 +	BUG_ON(ret);
 +
 +	list_for_each_entry_safe(inuse, n, &list, list) {
-+		pending_idx = inuse - pending_inuse;
++		struct pending_tx_info *pending_tx_info;
++		pending_ring_idx_t index;
++
++		pending_tx_info = netbk->pending_tx_info;
++		pending_idx = inuse - netbk->pending_inuse;
 +
 +		netif = pending_tx_info[pending_idx].netif;
 +
@@ -18768,9 +19714,10 @@
 +				 NETIF_RSP_OKAY);
 +
 +		/* Ready for next use. */
-+		gnttab_reset_grant_page(mmap_pages[pending_idx]);
++		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
 +
-+		pending_ring[pending_index(pending_prod++)] = pending_idx;
++		index = pending_index(netbk->pending_prod++);
++		netbk->pending_ring[index] = pending_idx;
 +
 +		netif_put(netif);
 +
@@ -18778,7 +19725,8 @@
 +	}
 +}
 +
-+static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *txp, RING_IDX end)
++static void netbk_tx_err(struct xen_netif *netif,
++		struct xen_netif_tx_request *txp, RING_IDX end)
 +{
 +	RING_IDX cons = netif->tx.req_cons;
 +
@@ -18834,7 +19782,8 @@
 +	return frags;
 +}
 +
-+static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
++static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
++						  struct xen_netif *netif,
 +						  struct sk_buff *skb,
 +						  struct xen_netif_tx_request *txp,
 +						  struct gnttab_map_grant_ref *mop)
@@ -18848,9 +19797,14 @@
 +	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
 +
 +	for (i = start; i < shinfo->nr_frags; i++, txp++) {
-+		pending_idx = pending_ring[pending_index(pending_cons++)];
++		pending_ring_idx_t index;
++		struct pending_tx_info *pending_tx_info =
++			netbk->pending_tx_info;
++
++		index = pending_index(netbk->pending_cons++);
++		pending_idx = netbk->pending_ring[index];
 +
-+		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
++		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
 +				  GNTMAP_host_map | GNTMAP_readonly,
 +				  txp->gref, netif->domid);
 +
@@ -18863,11 +19817,13 @@
 +	return mop;
 +}
 +
-+static int netbk_tx_check_mop(struct sk_buff *skb,
-+			       struct gnttab_map_grant_ref **mopp)
++static int netbk_tx_check_mop(struct xen_netbk *netbk,
++			      struct sk_buff *skb,
++			      struct gnttab_map_grant_ref **mopp)
 +{
 +	struct gnttab_map_grant_ref *mop = *mopp;
 +	int pending_idx = *((u16 *)skb->data);
++	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
 +	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
 +	struct xen_netif_tx_request *txp;
 +	struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -18877,15 +19833,17 @@
 +	/* Check status of header. */
 +	err = mop->status;
 +	if (unlikely(err)) {
++		pending_ring_idx_t index;
++		index = pending_index(netbk->pending_prod++);
 +		txp = &pending_tx_info[pending_idx].req;
 +		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+		pending_ring[pending_index(pending_prod++)] = pending_idx;
++		netbk->pending_ring[index] = pending_idx;
 +		netif_put(netif);
 +	} else {
 +		set_phys_to_machine(
-+			__pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
++			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
 +			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
-+		grant_tx_handle[pending_idx] = mop->handle;
++		netbk->grant_tx_handle[pending_idx] = mop->handle;
 +	}
 +
 +	/* Skip first skb fragment if it is on same page as header fragment. */
@@ -18893,26 +19851,30 @@
 +
 +	for (i = start; i < nr_frags; i++) {
 +		int j, newerr;
++		pending_ring_idx_t index;
 +
 +		pending_idx = (unsigned long)shinfo->frags[i].page;
 +
 +		/* Check error status: if okay then remember grant handle. */
 +		newerr = (++mop)->status;
 +		if (likely(!newerr)) {
++			unsigned long addr;
++			addr = idx_to_kaddr(netbk, pending_idx);
 +			set_phys_to_machine(
-+				__pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
++				__pa(addr)>>PAGE_SHIFT,
 +				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
-+			grant_tx_handle[pending_idx] = mop->handle;
++			netbk->grant_tx_handle[pending_idx] = mop->handle;
 +			/* Had a previous error? Invalidate this fragment. */
 +			if (unlikely(err))
-+				netif_idx_release(pending_idx);
++				netif_idx_release(netbk, pending_idx);
 +			continue;
 +		}
 +
 +		/* Error on this fragment: respond to client with an error. */
-+		txp = &pending_tx_info[pending_idx].req;
++		txp = &netbk->pending_tx_info[pending_idx].req;
 +		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+		pending_ring[pending_index(pending_prod++)] = pending_idx;
++		index = pending_index(netbk->pending_prod++);
++		netbk->pending_ring[index] = pending_idx;
 +		netif_put(netif);
 +
 +		/* Not the first error? Preceding frags already invalidated. */
@@ -18921,10 +19883,10 @@
 +
 +		/* First error: invalidate header and preceding fragments. */
 +		pending_idx = *((u16 *)skb->data);
-+		netif_idx_release(pending_idx);
++		netif_idx_release(netbk, pending_idx);
 +		for (j = start; j < i; j++) {
 +			pending_idx = (unsigned long)shinfo->frags[i].page;
-+			netif_idx_release(pending_idx);
++			netif_idx_release(netbk, pending_idx);
 +		}
 +
 +		/* Remember the error: invalidate all subsequent fragments. */
@@ -18935,7 +19897,7 @@
 +	return err;
 +}
 +
-+static void netbk_fill_frags(struct sk_buff *skb)
++static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
 +{
 +	struct skb_shared_info *shinfo = skb_shinfo(skb);
 +	int nr_frags = shinfo->nr_frags;
@@ -18948,12 +19910,12 @@
 +
 +		pending_idx = (unsigned long)frag->page;
 +
-+		pending_inuse[pending_idx].alloc_time = jiffies;
-+		list_add_tail(&pending_inuse[pending_idx].list,
-+			      &pending_inuse_head);
++		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
++		list_add_tail(&netbk->pending_inuse[pending_idx].list,
++			      &netbk->pending_inuse_head);
 +
-+		txp = &pending_tx_info[pending_idx].req;
-+		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
++		txp = &netbk->pending_tx_info[pending_idx].req;
++		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
 +		frag->size = txp->size;
 +		frag->page_offset = txp->offset;
 +
@@ -19085,15 +20047,15 @@
 +	return false;
 +}
 +
-+static unsigned net_tx_build_mops(void)
++static unsigned net_tx_build_mops(struct xen_netbk *netbk)
 +{
 +	struct gnttab_map_grant_ref *mop;
 +	struct sk_buff *skb;
 +	int ret;
 +
-+	mop = tx_map_ops;
-+	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-+		!list_empty(&net_schedule_list)) {
++	mop = netbk->tx_map_ops;
++	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++		!list_empty(&netbk->net_schedule_list)) {
 +		struct xen_netif *netif;
 +		struct xen_netif_tx_request txreq;
 +		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
@@ -19102,9 +20064,11 @@
 +		RING_IDX idx;
 +		int work_to_do;
 +		unsigned int data_len;
++		pending_ring_idx_t index;
 +	
 +		/* Get a netif from the list with work to do. */
-+		netif = list_first_entry(&net_schedule_list, struct xen_netif, list);
++		netif = list_first_entry(&netbk->net_schedule_list,
++				struct xen_netif, list);
 +		netif_get(netif);
 +		remove_from_net_schedule_list(netif);
 +
@@ -19163,7 +20127,8 @@
 +			continue;
 +		}
 +
-+		pending_idx = pending_ring[pending_index(pending_cons)];
++		index = pending_index(netbk->pending_cons);
++		pending_idx = netbk->pending_ring[index];
 +
 +		data_len = (txreq.size > PKT_PROT_LEN &&
 +			    ret < MAX_SKB_FRAGS) ?
@@ -19191,14 +20156,14 @@
 +			}
 +		}
 +
-+		gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
++		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
 +				  GNTMAP_host_map | GNTMAP_readonly,
 +				  txreq.gref, netif->domid);
 +		mop++;
 +
-+		memcpy(&pending_tx_info[pending_idx].req,
++		memcpy(&netbk->pending_tx_info[pending_idx].req,
 +		       &txreq, sizeof(txreq));
-+		pending_tx_info[pending_idx].netif = netif;
++		netbk->pending_tx_info[pending_idx].netif = netif;
 +		*((u16 *)skb->data) = pending_idx;
 +
 +		__skb_put(skb, data_len);
@@ -19213,40 +20178,40 @@
 +			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
 +		}
 +
-+		__skb_queue_tail(&tx_queue, skb);
++		__skb_queue_tail(&netbk->tx_queue, skb);
 +
-+		pending_cons++;
++		netbk->pending_cons++;
 +
-+		mop = netbk_get_requests(netif, skb, txfrags, mop);
++		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
 +
 +		netif->tx.req_cons = idx;
 +		netif_schedule_work(netif);
 +
-+		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
++		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
 +			break;
 +	}
 +
-+	return mop - tx_map_ops;
++	return mop - netbk->tx_map_ops;
 +}
 +
-+static void net_tx_submit(void)
++static void net_tx_submit(struct xen_netbk *netbk)
 +{
 +	struct gnttab_map_grant_ref *mop;
 +	struct sk_buff *skb;
 +
-+	mop = tx_map_ops;
-+	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
++	mop = netbk->tx_map_ops;
++	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
 +		struct xen_netif_tx_request *txp;
 +		struct xen_netif *netif;
 +		u16 pending_idx;
 +		unsigned data_len;
 +
 +		pending_idx = *((u16 *)skb->data);
-+		netif       = pending_tx_info[pending_idx].netif;
-+		txp         = &pending_tx_info[pending_idx].req;
++		netif = netbk->pending_tx_info[pending_idx].netif;
++		txp = &netbk->pending_tx_info[pending_idx].req;
 +
 +		/* Check the remap error code. */
-+		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
++		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
 +			DPRINTK("netback grant failed.\n");
 +			skb_shinfo(skb)->nr_frags = 0;
 +			kfree_skb(skb);
@@ -19255,7 +20220,7 @@
 +
 +		data_len = skb->len;
 +		memcpy(skb->data,
-+		       (void *)(idx_to_kaddr(pending_idx)|txp->offset),
++		       (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
 +		       data_len);
 +		if (data_len < txp->size) {
 +			/* Append the packet payload as a fragment. */
@@ -19263,7 +20228,7 @@
 +			txp->size -= data_len;
 +		} else {
 +			/* Schedule a response immediately. */
-+			netif_idx_release(pending_idx);
++			netif_idx_release(netbk, pending_idx);
 +		}
 +
 +		if (txp->flags & NETTXF_csum_blank)
@@ -19271,7 +20236,7 @@
 +		else if (txp->flags & NETTXF_data_validated)
 +			skb->ip_summed = CHECKSUM_UNNECESSARY;
 +
-+		netbk_fill_frags(skb);
++		netbk_fill_frags(netbk, skb);
 +
 +		/*
 +		 * If the initial fragment was < PKT_PROT_LEN then
@@ -19304,70 +20269,83 @@
 +			continue;
 +		}
 +
-+		netif_rx(skb);
++		netif_rx_ni(skb);
 +		netif->dev->last_rx = jiffies;
 +	}
-+
-+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-+	    !list_empty(&pending_inuse_head)) {
-+		struct netbk_tx_pending_inuse *oldest;
-+
-+		oldest = list_entry(pending_inuse_head.next,
-+				    struct netbk_tx_pending_inuse, list);
-+		mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
-+	}
 +}
 +
 +/* Called after netfront has transmitted */
-+static void net_tx_action(unsigned long unused)
++static void net_tx_action(unsigned long data)
 +{
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
 +	unsigned nr_mops;
 +	int ret;
 +
-+	if (dealloc_cons != dealloc_prod)
-+		net_tx_action_dealloc();
++	net_tx_action_dealloc(netbk);
 +
-+	nr_mops = net_tx_build_mops();
++	nr_mops = net_tx_build_mops(netbk);
 +
 +	if (nr_mops == 0)
-+		return;
++		goto out;
 +
 +	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-+					tx_map_ops, nr_mops);
++					netbk->tx_map_ops, nr_mops);
 +	BUG_ON(ret);
 +
-+	net_tx_submit();
++	net_tx_submit(netbk);
++out:
++	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++	    !list_empty(&netbk->pending_inuse_head)) {
++		struct netbk_tx_pending_inuse *oldest;
++
++		oldest = list_entry(netbk->pending_inuse_head.next,
++				    struct netbk_tx_pending_inuse, list);
++		mod_timer(&netbk->netbk_tx_pending_timer,
++				oldest->alloc_time + HZ);
++	}
 +}
 +
-+static void netif_idx_release(u16 pending_idx)
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
 +{
 +	static DEFINE_SPINLOCK(_lock);
 +	unsigned long flags;
++	pending_ring_idx_t index;
 +
 +	spin_lock_irqsave(&_lock, flags);
-+	dealloc_ring[pending_index(dealloc_prod)] = pending_idx;
++	index = pending_index(netbk->dealloc_prod);
++	netbk->dealloc_ring[index] = pending_idx;
 +	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
 +	smp_wmb();
-+	dealloc_prod++;
++	netbk->dealloc_prod++;
 +	spin_unlock_irqrestore(&_lock, flags);
 +
-+	tasklet_schedule(&net_tx_tasklet);
++	xen_netbk_bh_handler(netbk, 0);
 +}
 +
 +static void netif_page_release(struct page *page, unsigned int order)
 +{
++	int group = netif_page_group(page);
 +	int idx = netif_page_index(page);
++	struct xen_netbk *netbk = &xen_netbk[group];
 +	BUG_ON(order);
-+	BUG_ON(idx < 0);
-+	netif_idx_release(idx);
++	BUG_ON(group < 0 || group >= xen_netbk_group_nr);
++	BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
++	BUG_ON(netbk->mmap_pages[idx] != page);
++	netif_idx_release(netbk, idx);
 +}
 +
 +irqreturn_t netif_be_int(int irq, void *dev_id)
 +{
 +	struct xen_netif *netif = dev_id;
++	struct xen_netbk *netbk;
++
++	if (netif->group == -1)
++		return IRQ_NONE;
++
++	netbk = &xen_netbk[netif->group];
 +
 +	add_to_net_schedule_list_tail(netif);
-+	maybe_schedule_tx_action();
++	maybe_schedule_tx_action(netbk);
 +
 +	if (netif_schedulable(netif) && !netbk_queue_full(netif))
 +		netif_wake_queue(netif->dev);
@@ -19398,9 +20376,9 @@
 +	 * is active.
 +	 */
 +	if ((netif->smart_poll == 1)) {
-+		if (!(netif->rx.sring->netfront_smartpoll_active)) {
++		if (!(netif->rx.sring->private.netif.smartpoll_active)) {
 +			notify_remote_via_irq(netif->irq);
-+			netif->rx.sring->netfront_smartpoll_active = 1;
++			netif->rx.sring->private.netif.smartpoll_active = 1;
 +		}
 +	} else if (notify)
 +		notify_remote_via_irq(netif->irq);
@@ -19435,75 +20413,180 @@
 +	struct list_head *ent;
 +	struct xen_netif *netif;
 +	int i = 0;
++	int group = 0;
 +
 +	printk(KERN_ALERT "netif_schedule_list:\n");
-+	spin_lock_irq(&net_schedule_list_lock);
 +
-+	list_for_each (ent, &net_schedule_list) {
-+		netif = list_entry(ent, struct xen_netif, list);
-+		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
-+		       "rx_resp_prod=%08x\n",
-+		       i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
-+		printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
-+		       netif->tx.req_cons, netif->tx.rsp_prod_pvt);
-+		printk(KERN_ALERT "   shared(rx_req_prod=%08x "
-+		       "rx_resp_prod=%08x\n",
-+		       netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
-+		printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
-+		       netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
-+		printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
-+		       netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
-+		i++;
++	for (group = 0; group < xen_netbk_group_nr; group++) {
++		struct xen_netbk *netbk = &xen_netbk[group];
++		spin_lock_irq(&netbk->net_schedule_list_lock);
++		printk(KERN_ALERT "xen_netback group number: %d\n", group);
++		list_for_each(ent, &netbk->net_schedule_list) {
++			netif = list_entry(ent, struct xen_netif, list);
++			printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
++				"rx_resp_prod=%08x\n",
++				i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
++			printk(KERN_ALERT
++				"   tx_req_cons=%08x, tx_resp_prod=%08x)\n",
++				netif->tx.req_cons, netif->tx.rsp_prod_pvt);
++			printk(KERN_ALERT
++				"   shared(rx_req_prod=%08x "
++				"rx_resp_prod=%08x\n",
++				netif->rx.sring->req_prod,
++				netif->rx.sring->rsp_prod);
++			printk(KERN_ALERT
++				"   rx_event=%08x, tx_req_prod=%08x\n",
++				netif->rx.sring->rsp_event,
++				netif->tx.sring->req_prod);
++			printk(KERN_ALERT
++				"   tx_resp_prod=%08x, tx_event=%08x)\n",
++				netif->tx.sring->rsp_prod,
++				netif->tx.sring->rsp_event);
++			i++;
++		}
++		spin_unlock_irq(&netbk->net_schedule_list_lock);
 +	}
 +
-+	spin_unlock_irq(&net_schedule_list_lock);
 +	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
 +
 +	return IRQ_HANDLED;
 +}
 +#endif
 +
++static inline int rx_work_todo(struct xen_netbk *netbk)
++{
++	return !skb_queue_empty(&netbk->rx_queue);
++}
++
++static inline int tx_work_todo(struct xen_netbk *netbk)
++{
++	if (netbk->dealloc_cons != netbk->dealloc_prod)
++		return 1;
++
++	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++			!list_empty(&netbk->net_schedule_list))
++		return 1;
++
++	return 0;
++}
++
++static int netbk_action_thread(void *data)
++{
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
++	while (!kthread_should_stop()) {
++		wait_event_interruptible(netbk->kthread.netbk_action_wq,
++				rx_work_todo(netbk)
++				|| tx_work_todo(netbk)
++				|| kthread_should_stop());
++		cond_resched();
++
++		if (kthread_should_stop())
++			break;
++
++		if (rx_work_todo(netbk))
++			net_rx_action((unsigned long)netbk);
++
++		if (tx_work_todo(netbk))
++			net_tx_action((unsigned long)netbk);
++	}
++
++	return 0;
++}
++
 +static int __init netback_init(void)
 +{
 +	int i;
 +	struct page *page;
 +	int rc = 0;
++	int group;
 +
-+	if (!xen_domain())
++	if (!xen_pv_domain())
 +		return -ENODEV;
 +
++	xen_netbk_group_nr = num_online_cpus();
++	xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
++	if (!xen_netbk) {
++		printk(KERN_ALERT "%s: out of memory\n", __func__);
++		return -ENOMEM;
++	}
++	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
++
 +	/* We can increase reservation by this much in net_rx_action(). */
 +//	balloon_update_driver_allowance(NET_RX_RING_SIZE);
 +
-+	skb_queue_head_init(&rx_queue);
-+	skb_queue_head_init(&tx_queue);
++	for (group = 0; group < xen_netbk_group_nr; group++) {
++		struct xen_netbk *netbk = &xen_netbk[group];
++		skb_queue_head_init(&netbk->rx_queue);
++		skb_queue_head_init(&netbk->tx_queue);
++
++		init_timer(&netbk->net_timer);
++		netbk->net_timer.data = (unsigned long)netbk;
++		netbk->net_timer.function = net_alarm;
++
++		init_timer(&netbk->netbk_tx_pending_timer);
++		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
++		netbk->netbk_tx_pending_timer.function =
++			netbk_tx_pending_timeout;
++
++		netbk->mmap_pages =
++			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
++		if (!netbk->mmap_pages) {
++			printk(KERN_ALERT "%s: out of memory\n", __func__);
++			del_timer(&netbk->netbk_tx_pending_timer);
++			del_timer(&netbk->net_timer);
++			rc = -ENOMEM;
++			goto failed_init;
++		}
++
++		for (i = 0; i < MAX_PENDING_REQS; i++) {
++			page = netbk->mmap_pages[i];
++			SetPageForeign(page, netif_page_release);
++			netif_set_page_ext(page, group, i);
++			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
++		}
++
++		netbk->pending_cons = 0;
++		netbk->pending_prod = MAX_PENDING_REQS;
++		for (i = 0; i < MAX_PENDING_REQS; i++)
++			netbk->pending_ring[i] = i;
++
++		if (MODPARM_netback_kthread) {
++			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
++			netbk->kthread.task =
++				kthread_create(netbk_action_thread,
++					       (void *)netbk,
++					       "netback/%u", group);
++
++			if (!IS_ERR(netbk->kthread.task)) {
++				kthread_bind(netbk->kthread.task, group);
++				wake_up_process(netbk->kthread.task);
++			} else {
++				printk(KERN_ALERT
++					"kthread_run() fails at netback\n");
++				free_empty_pages_and_pagevec(netbk->mmap_pages,
++						MAX_PENDING_REQS);
++				del_timer(&netbk->netbk_tx_pending_timer);
++				del_timer(&netbk->net_timer);
++				rc = PTR_ERR(netbk->kthread.task);
++				goto failed_init;
++			}
++		} else {
++			tasklet_init(&netbk->tasklet.net_tx_tasklet,
++				     net_tx_action,
++				     (unsigned long)netbk);
++			tasklet_init(&netbk->tasklet.net_rx_tasklet,
++				     net_rx_action,
++				     (unsigned long)netbk);
++		}
++
++		INIT_LIST_HEAD(&netbk->pending_inuse_head);
++		INIT_LIST_HEAD(&netbk->net_schedule_list);
 +
-+	init_timer(&net_timer);
-+	net_timer.data = 0;
-+	net_timer.function = net_alarm;
-+
-+	init_timer(&netbk_tx_pending_timer);
-+	netbk_tx_pending_timer.data = 0;
-+	netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
-+
-+	mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
-+	if (mmap_pages == NULL) {
-+		printk("%s: out of memory\n", __FUNCTION__);
-+		return -ENOMEM;
-+	}
++		spin_lock_init(&netbk->net_schedule_list_lock);
 +
-+	for (i = 0; i < MAX_PENDING_REQS; i++) {
-+		page = mmap_pages[i];
-+		SetPageForeign(page, netif_page_release);
-+		netif_set_page_index(page, i);
-+		INIT_LIST_HEAD(&pending_inuse[i].list);
++		atomic_set(&netbk->netfront_count, 0);
 +	}
 +
-+	pending_cons = 0;
-+	pending_prod = MAX_PENDING_REQS;
-+	for (i = 0; i < MAX_PENDING_REQS; i++)
-+		pending_ring[i] = i;
-+
 +	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
 +	if (MODPARM_copy_skb) {
 +		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
@@ -19523,7 +20606,7 @@
 +	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
 +				      0,
 +				      netif_be_dbg,
-+				      SA_SHIRQ,
++				      IRQF_SHARED,
 +				      "net-be-dbg",
 +				      &netif_be_dbg);
 +#endif
@@ -19531,9 +20614,16 @@
 +	return 0;
 +
 +failed_init:
-+	free_empty_pages_and_pagevec(mmap_pages, MAX_PENDING_REQS);
-+	del_timer(&netbk_tx_pending_timer);
-+	del_timer(&net_timer);
++	for (i = 0; i < group; i++) {
++		struct xen_netbk *netbk = &xen_netbk[i];
++		free_empty_pages_and_pagevec(netbk->mmap_pages,
++				MAX_PENDING_REQS);
++		del_timer(&netbk->netbk_tx_pending_timer);
++		del_timer(&netbk->net_timer);
++		if (MODPARM_netback_kthread)
++			kthread_stop(netbk->kthread.task);
++	}
++	vfree(xen_netbk);
 +	return rc;
 +
 +}
@@ -19543,10 +20633,10 @@
 +MODULE_LICENSE("Dual BSD/GPL");
 diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
 new file mode 100644
-index 0000000..70636d0
+index 0000000..99831c7
 --- /dev/null
 +++ b/drivers/xen/netback/xenbus.c
-@@ -0,0 +1,523 @@
+@@ -0,0 +1,524 @@
 +/*  Xenbus code for netif backend
 +    Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
 +    Copyright (C) 2005 XenSource Ltd
@@ -19711,12 +20801,17 @@
 + */
 +static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
 +{
-+	struct backend_info *be = dev_get_drvdata(&xdev->dev);
-+	struct xen_netif *netif = be->netif;
++	struct backend_info *be;
++	struct xen_netif *netif;
 +	char *val;
 +
 +	DPRINTK("netback_uevent");
 +
++	be = dev_get_drvdata(&xdev->dev);
++	if (!be)
++		return 0;
++	netif = be->netif;
++
 +	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
 +	if (IS_ERR(val)) {
 +		int err = PTR_ERR(val);
@@ -19956,6 +21051,7 @@
 +
 +static int connect_rings(struct backend_info *be)
 +{
++	struct xen_netif *netif = be->netif;
 +	struct xenbus_device *dev = be->dev;
 +	unsigned long tx_ring_ref, rx_ring_ref;
 +	unsigned int evtchn, rx_copy;
@@ -19989,52 +21085,47 @@
 +	if (!rx_copy)
 +		return -EOPNOTSUPP;
 +
-+	if (be->netif->dev->tx_queue_len != 0) {
++	if (netif->dev->tx_queue_len != 0) {
 +		if (xenbus_scanf(XBT_NIL, dev->otherend,
 +				 "feature-rx-notify", "%d", &val) < 0)
 +			val = 0;
 +		if (val)
-+			be->netif->can_queue = 1;
++			netif->can_queue = 1;
 +		else
 +			/* Must be non-zero for pfifo_fast to work. */
-+			be->netif->dev->tx_queue_len = 1;
++			netif->dev->tx_queue_len = 1;
 +	}
 +
-+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
++			 "%d", &val) < 0)
 +		val = 0;
-+	if (!val) {
-+		be->netif->features &= ~NETIF_F_SG;
-+		be->netif->dev->features &= ~NETIF_F_SG;
-+		if (be->netif->dev->mtu > ETH_DATA_LEN)
-+			be->netif->dev->mtu = ETH_DATA_LEN;
-+	}
++	netif->can_sg = !!val;
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
++			 "%d", &val) < 0)
++		val = 0;
++	netif->gso = !!val;
 +
-+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
-+			 &val) < 0)
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
++			 "%d", &val) < 0)
 +		val = 0;
-+	if (val) {
-+		be->netif->features |= NETIF_F_TSO;
-+		be->netif->dev->features |= NETIF_F_TSO;
-+	}
++	netif->gso_prefix = !!val;
 +
 +	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
 +			 "%d", &val) < 0)
 +		val = 0;
-+	if (val) {
-+		be->netif->features &= ~NETIF_F_IP_CSUM;
-+		be->netif->dev->features &= ~NETIF_F_IP_CSUM;
-+	}
++	netif->csum = !val;
 +
 +	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-smart-poll",
 +			 "%d", &val) < 0)
 +		val = 0;
-+	if (val)
-+		be->netif->smart_poll = 1;
-+	else
-+		be->netif->smart_poll = 0;
++	netif->smart_poll = !!val;
++
++	/* Set dev->features */
++	netif_set_features(netif);
 +
 +	/* Map the shared frame, irq etc. */
-+	err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
++	err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
 +	if (err) {
 +		xenbus_dev_fatal(dev, err,
 +				 "mapping shared-frames %lu/%lu port %u",
@@ -24612,10 +25703,10 @@
 +}
 diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
 new file mode 100644
-index 0000000..d448bf5
+index 0000000..f0d5426
 --- /dev/null
 +++ b/drivers/xen/pciback/xenbus.c
-@@ -0,0 +1,722 @@
+@@ -0,0 +1,730 @@
 +/*
 + * PCI Backend Xenbus Setup - handles setup with frontend and xend
 + *
@@ -24672,23 +25763,31 @@
 +		unbind_from_irqhandler(pdev->evtchn_irq, pdev);
 +		pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
 +	}
++	spin_unlock(&pdev->dev_lock);
 +
 +	/* If the driver domain started an op, make sure we complete it
 +	 * before releasing the shared memory */
++
++	/* Note, the workqueue does not use spinlocks at all.*/
 +	flush_workqueue(pciback_wq);
 +
++	spin_lock(&pdev->dev_lock);
 +	if (pdev->sh_info != NULL) {
 +		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
 +		pdev->sh_info = NULL;
 +	}
-+
 +	spin_unlock(&pdev->dev_lock);
++
 +}
 +
 +static void free_pdev(struct pciback_device *pdev)
 +{
-+	if (pdev->be_watching)
++	spin_lock(&pdev->dev_lock);
++	if (pdev->be_watching) {
 +		unregister_xenbus_watch(&pdev->be_watch);
++		pdev->be_watching = 0;
++	}
++	spin_unlock(&pdev->dev_lock);
 +
 +	pciback_disconnect(pdev);
 +
@@ -24716,7 +25815,10 @@
 +				"Error mapping other domain page in ours.");
 +		goto out;
 +	}
++
++	spin_lock(&pdev->dev_lock);
 +	pdev->sh_info = vaddr;
++	spin_unlock(&pdev->dev_lock);
 +
 +	err = bind_interdomain_evtchn_to_irqhandler(
 +		pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
@@ -24726,7 +25828,10 @@
 +				 "Error binding event channel to IRQ");
 +		goto out;
 +	}
++
++	spin_lock(&pdev->dev_lock);
 +	pdev->evtchn_irq = err;
++	spin_unlock(&pdev->dev_lock);
 +	err = 0;
 +
 +	dev_dbg(&pdev->xdev->dev, "Attached!\n");
@@ -24740,7 +25845,6 @@
 +	int gnt_ref, remote_evtchn;
 +	char *magic = NULL;
 +
-+	spin_lock(&pdev->dev_lock);
 +
 +	/* Make sure we only do this setup once */
 +	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
@@ -24786,7 +25890,6 @@
 +
 +	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
 +out:
-+	spin_unlock(&pdev->dev_lock);
 +
 +	kfree(magic);
 +
@@ -24958,7 +26061,6 @@
 +	char state_str[64];
 +	char dev_str[64];
 +
-+	spin_lock(&pdev->dev_lock);
 +
 +	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
 +
@@ -25099,8 +26201,6 @@
 +	}
 +
 +out:
-+	spin_unlock(&pdev->dev_lock);
-+
 +	return 0;
 +}
 +
@@ -25157,8 +26257,6 @@
 +	char dev_str[64];
 +	char state_str[64];
 +
-+	spin_lock(&pdev->dev_lock);
-+
 +	/* It's possible we could get the call to setup twice, so make sure
 +	 * we're not already connected.
 +	 */
@@ -25239,8 +26337,6 @@
 +				 "Error switching to initialised state!");
 +
 +out:
-+	spin_unlock(&pdev->dev_lock);
-+
 +	if (!err)
 +		/* see if pcifront is already configured (if not, we'll wait) */
 +		pciback_attach(pdev);
@@ -25287,7 +26383,10 @@
 +				pciback_be_watch);
 +	if (err)
 +		goto out;
++
++	spin_lock(&pdev->dev_lock);
 +	pdev->be_watching = 1;
++	spin_unlock(&pdev->dev_lock);
 +
 +	/* We need to force a call to our callback here in case
 +	 * xend already configured us!
@@ -25326,8 +26425,8 @@
 +{
 +	pciback_wq = create_workqueue("pciback_workqueue");
 +	if (!pciback_wq) {
-+		printk(KERN_ERR "pciback_xenbus_register: create"
-+			"pciback_workqueue failed\n");
++		printk(KERN_ERR "%s: create"
++			"pciback_workqueue failed\n",__FUNCTION__);
 +		return -EFAULT;
 +	}
 +	return xenbus_register_backend(&xenbus_pciback_driver);
@@ -25766,10 +26865,10 @@
 +subsys_initcall(xen_pcpu_init);
 diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
 new file mode 100644
-index 0000000..a33074e
+index 0000000..c01b5dd
 --- /dev/null
 +++ b/drivers/xen/platform-pci.c
-@@ -0,0 +1,259 @@
+@@ -0,0 +1,207 @@
 +/******************************************************************************
 + * platform-pci.c
 + *
@@ -25793,15 +26892,14 @@
 + *
 + */
 +
-+#include <asm/io.h>
 +
 +#include <linux/interrupt.h>
++#include <linux/io.h>
 +#include <linux/module.h>
 +#include <linux/pci.h>
 +
-+#include <xen/grant_table.h>
 +#include <xen/platform_pci.h>
-+#include <xen/interface/platform_pci.h>
++#include <xen/grant_table.h>
 +#include <xen/xenbus.h>
 +#include <xen/events.h>
 +#include <xen/hvm.h>
@@ -25817,7 +26915,6 @@
 +static unsigned long platform_mmio_alloc;
 +static unsigned long platform_mmiolen;
 +static uint64_t callback_via;
-+struct pci_dev *xen_platform_pdev;
 +
 +unsigned long alloc_xen_mmio(unsigned long len)
 +{
@@ -25851,36 +26948,28 @@
 +
 +static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id)
 +{
-+	xen_hvm_evtchn_do_upcall(get_irq_regs());
++	xen_hvm_evtchn_do_upcall();
 +	return IRQ_HANDLED;
 +}
 +
 +static int xen_allocate_irq(struct pci_dev *pdev)
 +{
-+	__set_irq_handler(pdev->irq, handle_edge_irq, 0, NULL);
 +	return request_irq(pdev->irq, do_hvm_evtchn_intr,
 +			IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
 +			"xen-platform-pci", pdev);
 +}
 +
-+void platform_pci_disable_irq(void)
-+{
-+	printk(KERN_DEBUG "platform_pci_disable_irq\n");
-+	disable_irq(xen_platform_pdev->irq);
-+}
-+
-+void platform_pci_enable_irq(void)
++static int platform_pci_resume(struct pci_dev *pdev)
 +{
-+	printk(KERN_DEBUG "platform_pci_enable_irq\n");
-+	enable_irq(xen_platform_pdev->irq);
-+}
-+
-+void platform_pci_resume(void)
-+{
-+	if (xen_set_callback_via(callback_via)) {
-+		printk("platform_pci_resume failure!\n");
-+		return;
++	int err;
++	if (xen_have_vector_callback)
++		return 0;
++	err = xen_set_callback_via(callback_via);
++	if (err) {
++		dev_err(&pdev->dev, "platform_pci_resume failure!\n");
++		return err;
 +	}
++	return 0;
 +}
 +
 +static int __devinit platform_pci_init(struct pci_dev *pdev,
@@ -25889,7 +26978,7 @@
 +	int i, ret;
 +	long ioaddr, iolen;
 +	long mmio_addr, mmio_len;
-+	xen_platform_pdev = pdev;
++	unsigned int max_nr_gframes;
 +
 +	i = pci_enable_device(pdev);
 +	if (i)
@@ -25904,19 +26993,21 @@
 +	if (mmio_addr == 0 || ioaddr == 0) {
 +		dev_err(&pdev->dev, "no resources found\n");
 +		ret = -ENOENT;
++		goto pci_out;
 +	}
 +
 +	if (request_mem_region(mmio_addr, mmio_len, DRV_NAME) == NULL) {
 +		dev_err(&pdev->dev, "MEM I/O resource 0x%lx @ 0x%lx busy\n",
 +		       mmio_addr, mmio_len);
 +		ret = -EBUSY;
++		goto pci_out;
 +	}
 +
 +	if (request_region(ioaddr, iolen, DRV_NAME) == NULL) {
 +		dev_err(&pdev->dev, "I/O resource 0x%lx @ 0x%lx busy\n",
 +		       iolen, ioaddr);
 +		ret = -EBUSY;
-+		goto out;
++		goto mem_out;
 +	}
 +
 +	platform_mmio = mmio_addr;
@@ -25925,107 +27016,63 @@
 +	if (!xen_have_vector_callback) {
 +		ret = xen_allocate_irq(pdev);
 +		if (ret) {
-+			printk(KERN_WARNING "request_irq failed err=%d\n", ret);
++			dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret);
 +			goto out;
 +		}
 +		callback_via = get_callback_via(pdev);
 +		ret = xen_set_callback_via(callback_via);
 +		if (ret) {
-+			printk(KERN_WARNING
-+					"Unable to set the evtchn callback err=%d\n", ret);
++			dev_warn(&pdev->dev, "Unable to set the evtchn callback "
++					 "err=%d\n", ret);
 +			goto out;
 +		}
 +	}
++
++	max_nr_gframes = gnttab_max_grant_frames();
++	xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
 +	ret = gnttab_init();
 +	if (ret)
 +		goto out;
-+	ret = xenbus_probe_init();
-+	if (ret)
-+		goto out;
++	xenbus_probe(NULL);
 +	ret = xen_setup_shutdown_event();
 +	if (ret)
 +		goto out;
-+
++	return 0;
 +
 +out:
-+	if (ret) {
-+		release_mem_region(mmio_addr, mmio_len);
-+		release_region(ioaddr, iolen);
-+		pci_disable_device(pdev);
-+	}
-+
++	release_region(ioaddr, iolen);
++mem_out:
++	release_mem_region(mmio_addr, mmio_len);
++pci_out:
++	pci_disable_device(pdev);
 +	return ret;
 +}
 +
-+#define XEN_PLATFORM_VENDOR_ID 0x5853
-+#define XEN_PLATFORM_DEVICE_ID 0x0001
 +static struct pci_device_id platform_pci_tbl[] __devinitdata = {
-+	{XEN_PLATFORM_VENDOR_ID, XEN_PLATFORM_DEVICE_ID,
-+	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++	{PCI_VENDOR_ID_XEN, PCI_DEVICE_ID_XEN_PLATFORM,
++		PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 +	{0,}
 +};
 +
 +MODULE_DEVICE_TABLE(pci, platform_pci_tbl);
 +
 +static struct pci_driver platform_driver = {
-+	name:     DRV_NAME,
-+	probe :    platform_pci_init,
-+	id_table : platform_pci_tbl,
++	.name =           DRV_NAME,
++	.probe =          platform_pci_init,
++	.id_table =       platform_pci_tbl,
++#ifdef CONFIG_PM
++	.resume_early =   platform_pci_resume,
++#endif
 +};
 +
-+static int check_platform_magic(void)
-+{
-+	short magic;
-+	char protocol, *err;
-+
-+	magic = inw(XEN_IOPORT_MAGIC);
-+
-+	if (magic != XEN_IOPORT_MAGIC_VAL) {
-+		err = "unrecognised magic value";
-+		goto no_dev;
-+	}
-+
-+	protocol = inb(XEN_IOPORT_PROTOVER);
-+
-+	printk(KERN_DEBUG DRV_NAME "I/O protocol version %d\n", protocol);
-+
-+	switch (protocol) {
-+	case 1:
-+		outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM);
-+		outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER);
-+		if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) {
-+			printk(KERN_ERR DRV_NAME "blacklisted by host\n");
-+			return -ENODEV;
-+		}
-+		break;
-+	default:
-+		err = "unknown I/O protocol version";
-+		goto no_dev;
-+	}
-+
-+	return 0;
-+
-+ no_dev:
-+	printk(KERN_WARNING DRV_NAME  "failed backend handshake: %s\n", err);
-+	return -ENODEV;
-+}
-+
 +static int __init platform_pci_module_init(void)
 +{
-+	int rc;
-+
-+	rc = check_platform_magic();
-+	if (rc < 0)
-+		return rc;
-+
-+	rc = pci_register_driver(&platform_driver);
-+	if (rc) {
-+		printk(KERN_INFO DRV_NAME
-+		       ": No platform pci device model found\n");
-+		return rc;
-+	}
++	/* no unplug has been done, IGNORE hasn't been specified: just
++	 * return now */
++	if (!xen_platform_pci_unplug)
++		return -ENODEV;
 +
-+	return 0;
++	return pci_register_driver(&platform_driver);
 +}
 +
 +module_init(platform_pci_module_init);
@@ -26410,10 +27457,10 @@
   * @dev: xenbus device
   * @ring_mfn: mfn of ring to grant
 diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
-index 649fcdf..57fb749 100644
+index 649fcdf..3a83ba2 100644
 --- a/drivers/xen/xenbus/xenbus_probe.c
 +++ b/drivers/xen/xenbus/xenbus_probe.c
-@@ -49,31 +49,28 @@
+@@ -49,31 +49,29 @@
  #include <asm/page.h>
  #include <asm/pgtable.h>
  #include <asm/xen/hypervisor.h>
@@ -26423,6 +27470,7 @@
  #include <xen/events.h>
  #include <xen/page.h>
  
++#include <xen/platform_pci.h>
 +#include <xen/hvm.h>
 +
  #include "xenbus_comms.h"
@@ -26452,7 +27500,7 @@
  /* If something in array of ids matches this device, return it. */
  static const struct xenbus_device_id *
  match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
-@@ -94,34 +91,7 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv)
+@@ -94,34 +92,7 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv)
  
  	return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
  }
@@ -26488,7 +27536,7 @@
  
  
  static void free_otherend_details(struct xenbus_device *dev)
-@@ -141,7 +111,28 @@ static void free_otherend_watch(struct xenbus_device *dev)
+@@ -141,7 +112,28 @@ static void free_otherend_watch(struct xenbus_device *dev)
  }
  
  
@@ -26518,7 +27566,7 @@
  				 char *id_node, char *path_node)
  {
  	int err = xenbus_gather(XBT_NIL, xendev->nodename,
-@@ -166,39 +157,11 @@ int read_otherend_details(struct xenbus_device *xendev,
+@@ -166,39 +158,11 @@ int read_otherend_details(struct xenbus_device *xendev,
  
  	return 0;
  }
@@ -26562,7 +27610,7 @@
  {
  	struct xenbus_device *dev =
  		container_of(watch, struct xenbus_device, otherend_watch);
-@@ -226,11 +189,7 @@ static void otherend_changed(struct xenbus_watch *watch,
+@@ -226,11 +190,7 @@ static void otherend_changed(struct xenbus_watch *watch,
  	 * work that can fail e.g., when the rootfs is gone.
  	 */
  	if (system_state > SYSTEM_RUNNING) {
@@ -26575,7 +27623,7 @@
  			xenbus_frontend_closed(dev);
  		return;
  	}
-@@ -238,25 +197,7 @@ static void otherend_changed(struct xenbus_watch *watch,
+@@ -238,25 +198,7 @@ static void otherend_changed(struct xenbus_watch *watch,
  	if (drv->otherend_changed)
  		drv->otherend_changed(dev, state);
  }
@@ -26602,7 +27650,7 @@
  
  int xenbus_dev_probe(struct device *_dev)
  {
-@@ -300,8 +241,9 @@ int xenbus_dev_probe(struct device *_dev)
+@@ -300,8 +242,9 @@ int xenbus_dev_probe(struct device *_dev)
  fail:
  	xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
  	xenbus_switch_state(dev, XenbusStateClosed);
@@ -26613,7 +27661,7 @@
  
  int xenbus_dev_remove(struct device *_dev)
  {
-@@ -319,8 +261,9 @@ int xenbus_dev_remove(struct device *_dev)
+@@ -319,8 +262,9 @@ int xenbus_dev_remove(struct device *_dev)
  	xenbus_switch_state(dev, XenbusStateClosed);
  	return 0;
  }
@@ -26624,7 +27672,7 @@
  {
  	struct xenbus_device *dev = to_xenbus_device(_dev);
  	unsigned long timeout = 5*HZ;
-@@ -341,6 +284,7 @@ static void xenbus_dev_shutdown(struct device *_dev)
+@@ -341,6 +285,7 @@ static void xenbus_dev_shutdown(struct device *_dev)
   out:
  	put_device(&dev->dev);
  }
@@ -26632,7 +27680,7 @@
  
  int xenbus_register_driver_common(struct xenbus_driver *drv,
  				  struct xen_bus_type *bus,
-@@ -354,25 +298,7 @@ int xenbus_register_driver_common(struct xenbus_driver *drv,
+@@ -354,25 +299,7 @@ int xenbus_register_driver_common(struct xenbus_driver *drv,
  
  	return driver_register(&drv->driver);
  }
@@ -26659,7 +27707,7 @@
  
  void xenbus_unregister_driver(struct xenbus_driver *drv)
  {
-@@ -543,24 +469,7 @@ fail:
+@@ -543,24 +470,7 @@ fail:
  	kfree(xendev);
  	return err;
  }
@@ -26685,7 +27733,7 @@
  
  static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
  {
-@@ -574,10 +483,11 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
+@@ -574,10 +484,11 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
  		return PTR_ERR(dir);
  
  	for (i = 0; i < dir_n; i++) {
@@ -26698,7 +27746,7 @@
  	kfree(dir);
  	return err;
  }
-@@ -597,9 +507,11 @@ int xenbus_probe_devices(struct xen_bus_type *bus)
+@@ -597,9 +508,11 @@ int xenbus_probe_devices(struct xen_bus_type *bus)
  		if (err)
  			break;
  	}
@@ -26710,7 +27758,7 @@
  
  static unsigned int char_count(const char *str, char c)
  {
-@@ -662,32 +574,17 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
+@@ -662,32 +575,17 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
  }
  EXPORT_SYMBOL_GPL(xenbus_dev_changed);
  
@@ -26746,7 +27794,7 @@
  	if (drv->suspend)
  		err = drv->suspend(xdev, state);
  	if (err)
-@@ -695,21 +592,19 @@ static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
+@@ -695,21 +593,19 @@ static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
  		       "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
  	return 0;
  }
@@ -26772,7 +27820,7 @@
  	err = talk_to_otherend(xdev);
  	if (err) {
  		printk(KERN_WARNING
-@@ -740,6 +635,7 @@ static int xenbus_dev_resume(struct device *dev)
+@@ -740,6 +636,7 @@ static int xenbus_dev_resume(struct device *dev)
  
  	return 0;
  }
@@ -26780,7 +27828,19 @@
  
  /* A flag to determine if xenstored is 'ready' (i.e. has started) */
  int xenstored_ready = 0;
-@@ -768,52 +664,78 @@ void xenbus_probe(struct work_struct *unused)
+@@ -749,10 +646,7 @@ int register_xenstore_notifier(struct notifier_block *nb)
+ {
+ 	int ret = 0;
+ 
+-	if (xenstored_ready > 0)
+-		ret = nb->notifier_call(nb, 0, NULL);
+-	else
+-		blocking_notifier_chain_register(&xenstore_chain, nb);
++	blocking_notifier_chain_register(&xenstore_chain, nb);
+ 
+ 	return ret;
+ }
+@@ -768,57 +662,93 @@ void xenbus_probe(struct work_struct *unused)
  {
  	BUG_ON((xenstored_ready <= 0));
  
@@ -26792,39 +27852,43 @@
  	/* Notify others that xenstore is up */
  	blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
  }
++EXPORT_SYMBOL_GPL(xenbus_probe);
++
++static int __init xenbus_probe_initcall(void)
++{
++	if (!xen_domain())
++		return -ENODEV;
++
++	if (xen_initial_domain() || xen_hvm_domain())
++		return 0;
++
++	xenbus_probe(NULL);
++	return 0;
++}
++
++device_initcall(xenbus_probe_initcall);
  
 -static int __init xenbus_probe_init(void)
-+static int __init __xenbus_probe_init(void)
++static int __init xenbus_init(void)
  {
--	int err = 0;
-+	/* Delay initialization in the PV on HVM case */
-+	if (xen_hvm_domain())
-+		return 0;
+ 	int err = 0;
++	unsigned long page = 0;
  
--	DPRINTK("");
-+	if (!xen_pv_domain())
-+		return -ENODEV;
+ 	DPRINTK("");
  
--	err = -ENODEV;
--	if (!xen_domain())
+ 	err = -ENODEV;
+ 	if (!xen_domain())
 -		goto out_error;
-+	return xenbus_probe_init();
-+}
- 
+-
 -	/* Register ourselves with the kernel bus subsystem */
 -	err = bus_register(&xenbus_frontend.bus);
--	if (err)
--		goto out_error;
-+int xenbus_probe_init(void)
-+{
-+	int err = 0;
-+	unsigned long page = 0;
-+
-+	DPRINTK("");
- 
+-	if (err)
+-		goto out_error;
+-
 -	err = xenbus_backend_bus_register();
 -	if (err)
 -		goto out_unreg_front;
++		return err;
  
  	/*
  	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
@@ -26861,8 +27925,15 @@
 -		xen_store_evtchn = xen_start_info->store_evtchn;
 -		xen_store_mfn = xen_start_info->store_mfn;
 +		if (xen_hvm_domain()) {
-+			xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
-+			xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
++			uint64_t v = 0;
++			err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
++			if (err)
++				goto out_error;
++			xen_store_evtchn = (int)v;
++			err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
++			if (err)
++				goto out_error;
++			xen_store_mfn = (unsigned long)v;
 +			xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
 +		} else {
 +			xen_store_evtchn = xen_start_info->store_evtchn;
@@ -26881,8 +27952,13 @@
 +		goto out_error;
  	}
  
- 	if (!xen_initial_domain())
-@@ -829,128 +751,13 @@ static int __init xenbus_probe_init(void)
+-	if (!xen_initial_domain())
+-		xenbus_probe(NULL);
+-
+ #ifdef CONFIG_XEN_COMPAT_XENFS
+ 	/*
+ 	 * Create xenfs mountpoint in /proc for compatibility with
+@@ -829,128 +759,13 @@ static int __init xenbus_probe_init(void)
  
  	return 0;
  
@@ -26900,7 +27976,7 @@
  }
  
 -postcore_initcall(xenbus_probe_init);
-+postcore_initcall(__xenbus_probe_init);
++postcore_initcall(xenbus_init);
  
  MODULE_LICENSE("GPL");
 -
@@ -27366,10 +28442,10 @@
 +subsys_initcall(xenbus_probe_backend_init);
 diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
 new file mode 100644
-index 0000000..54e4d70
+index 0000000..5413248
 --- /dev/null
 +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
-@@ -0,0 +1,314 @@
+@@ -0,0 +1,292 @@
 +#define DPRINTK(fmt, args...)				\
 +	pr_debug("xenbus_probe (%s:%d) " fmt ".\n",	\
 +		 __func__, __LINE__, ##args)
@@ -27392,6 +28468,8 @@
 +#include <xen/xenbus.h>
 +#include <xen/events.h>
 +#include <xen/page.h>
++#include <xen/xen.h>
++#include <xen/platform_pci.h>
 +
 +#include "xenbus_comms.h"
 +#include "xenbus_probe.h"
@@ -27624,33 +28702,6 @@
 +	return NOTIFY_DONE;
 +}
 +
-+static int dev_suspend(struct device *dev, void *data)
-+{
-+	return xenbus_dev_suspend(dev, PMSG_SUSPEND);
-+}
-+
-+static int dev_resume(struct device *dev, void *data)
-+{
-+	return xenbus_dev_resume(dev);
-+}
-+
-+void xenbus_suspend(void)
-+{
-+	DPRINTK("");
-+
-+	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, dev_suspend);
-+	xs_suspend();
-+}
-+EXPORT_SYMBOL_GPL(xenbus_suspend);
-+
-+void xenbus_resume(void)
-+{
-+	DPRINTK("");
-+
-+	xs_resume();
-+	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, dev_resume);
-+}
-+EXPORT_SYMBOL_GPL(xenbus_resume);
 +
 +static int __init xenbus_probe_frontend_init(void)
 +{
@@ -27675,6 +28726,9 @@
 +#ifndef MODULE
 +static int __init boot_wait_for_devices(void)
 +{
++	if (xen_hvm_domain() && !xen_platform_pci_unplug)
++		return -ENODEV;
++
 +	ready_to_wait_for_devices = 1;
 +	wait_for_devices(NULL);
 +	return 0;
@@ -27685,7 +28739,7 @@
 +
 +MODULE_LICENSE("GPL");
 diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
-index eab33f1..6f91e8c 100644
+index 7b547f5..5534690 100644
 --- a/drivers/xen/xenbus/xenbus_xs.c
 +++ b/drivers/xen/xenbus/xenbus_xs.c
 @@ -76,6 +76,14 @@ struct xs_handle {
@@ -28380,6 +29434,20 @@
  }
  
  static void __exit xenfs_exit(void)
+diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenfs/xenbus.c
+index 6c4269b..64b3be4 100644
+--- a/drivers/xen/xenfs/xenbus.c
++++ b/drivers/xen/xenfs/xenbus.c
+@@ -123,6 +123,9 @@ static ssize_t xenbus_file_read(struct file *filp,
+ 	mutex_lock(&u->reply_mutex);
+ 	while (list_empty(&u->read_buffers)) {
+ 		mutex_unlock(&u->reply_mutex);
++		if (filp->f_flags & O_NONBLOCK)
++			return -EAGAIN;
++
+ 		ret = wait_event_interruptible(u->read_waitq,
+ 					       !list_empty(&u->read_buffers));
+ 		if (ret)
 diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
 index 51f08b2..b68aa62 100644
 --- a/drivers/xen/xenfs/xenfs.h
@@ -28497,16 +29565,28 @@
 +
  #endif /*__ACPI_DRIVERS_H__*/
 diff --git a/include/acpi/processor.h b/include/acpi/processor.h
-index 740ac3a..3d1205f 100644
+index 740ac3a..7ee588d 100644
 --- a/include/acpi/processor.h
 +++ b/include/acpi/processor.h
-@@ -238,6 +238,13 @@ struct acpi_processor_errata {
+@@ -238,6 +238,25 @@ struct acpi_processor_errata {
  	} piix4;
  };
  
 +extern int acpi_processor_errata(struct acpi_processor *pr);
++#ifdef CONFIG_ACPI_PROCFS
 +extern int acpi_processor_add_fs(struct acpi_device *device);
 +extern int acpi_processor_remove_fs(struct acpi_device *device);
++#else
++static inline int acpi_processor_add_fs(struct acpi_device *device)
++{
++	return 0;
++}
++
++static inline int acpi_processor_remove_fs(struct acpi_device *device)
++{
++	return 0;
++}
++#endif
 +extern int acpi_processor_set_pdc(struct acpi_processor *pr);
 +extern int acpi_processor_remove(struct acpi_device *device, int type);
 +extern void acpi_processor_notify(struct acpi_device *device, u32 event);
@@ -28514,7 +29594,7 @@
  extern int acpi_processor_preregister_performance(struct
  						  acpi_processor_performance
  						  *performance);
-@@ -295,6 +302,8 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx
+@@ -295,6 +314,8 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx
  void acpi_processor_ppc_init(void);
  void acpi_processor_ppc_exit(void);
  int acpi_processor_ppc_has_changed(struct acpi_processor *pr);
@@ -28523,7 +29603,7 @@
  #else
  static inline void acpi_processor_ppc_init(void)
  {
-@@ -331,6 +340,7 @@ int acpi_processor_power_init(struct acpi_processor *pr,
+@@ -331,6 +352,7 @@ int acpi_processor_power_init(struct acpi_processor *pr,
  int acpi_processor_cst_has_changed(struct acpi_processor *pr);
  int acpi_processor_power_exit(struct acpi_processor *pr,
  			      struct acpi_device *device);
@@ -28606,6 +29686,67 @@
  #define FBINFO_PARTIAL_PAN_OK	0x0040 /* otw use pan only for double-buffering */
  #define FBINFO_READS_FAST	0x0080 /* soft-copy faster than rendering */
  
+diff --git a/include/linux/if_link.h b/include/linux/if_link.h
+index 176c518..d681cc9 100644
+--- a/include/linux/if_link.h
++++ b/include/linux/if_link.h
+@@ -81,6 +81,8 @@ enum
+ #define IFLA_LINKINFO IFLA_LINKINFO
+ 	IFLA_NET_NS_PID,
+ 	IFLA_IFALIAS,
++	IFLA_NUM_VF,		/* Number of VFs if device is SR-IOV PF */
++	IFLA_VFINFO_LIST,
+ 	__IFLA_MAX
+ };
+ 
+@@ -190,4 +192,47 @@ struct ifla_vlan_qos_mapping
+ 	__u32 to;
+ };
+ 
++/* SR-IOV virtual function managment section */
++
++enum {
++	IFLA_VF_INFO_UNSPEC,
++	IFLA_VF_INFO,
++	__IFLA_VF_INFO_MAX,
++};
++
++#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
++
++enum {
++	IFLA_VF_UNSPEC,
++	IFLA_VF_MAC,		/* Hardware queue specific attributes */
++	IFLA_VF_VLAN,
++	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
++	__IFLA_VF_MAX,
++};
++
++#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
++
++struct ifla_vf_mac {
++	__u32 vf;
++	__u8 mac[32]; /* MAX_ADDR_LEN */
++};
++
++struct ifla_vf_vlan {
++	__u32 vf;
++	__u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
++	__u32 qos;
++};
++
++struct ifla_vf_tx_rate {
++	__u32 vf;
++	__u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
++};
++
++struct ifla_vf_info {
++	__u32 vf;
++	__u8 mac[32];
++	__u32 vlan;
++	__u32 qos;
++	__u32 tx_rate;
++};
+ #endif /* _LINUX_IF_LINK_H */
 diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
 index 7ca72b7..1c30adf 100644
 --- a/include/linux/interrupt.h
@@ -28619,7 +29760,7 @@
  /*
   * Bits used by threaded handlers:
 diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 24c3956..3d74515 100644
+index 24c3956..e8cf80f 100644
 --- a/include/linux/mm.h
 +++ b/include/linux/mm.h
 @@ -105,6 +105,12 @@ extern unsigned int kobjsize(const void *objp);
@@ -28635,7 +29776,7 @@
  
  #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
  #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
-@@ -195,6 +201,15 @@ struct vm_operations_struct {
+@@ -195,6 +201,11 @@ struct vm_operations_struct {
  	 */
  	int (*access)(struct vm_area_struct *vma, unsigned long addr,
  		      void *buf, int len, int write);
@@ -28644,13 +29785,51 @@
 +	 * original value of @ptep. */
 +	pte_t (*zap_pte)(struct vm_area_struct *vma, 
 +			 unsigned long addr, pte_t *ptep, int is_fullmm);
-+
-+	/* called before close() to indicate no more pages should be mapped */
-+	void (*unmap)(struct vm_area_struct *area);
-+
  #ifdef CONFIG_NUMA
  	/*
  	 * set_policy() op must add a reference to any non-NULL @new mempolicy
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 812a5f3..0b7d4ec 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -28,6 +28,7 @@
+ #include <linux/if.h>
+ #include <linux/if_ether.h>
+ #include <linux/if_packet.h>
++#include <linux/if_link.h>
+ 
+ #ifdef __KERNEL__
+ #include <linux/timer.h>
+@@ -577,6 +578,13 @@ struct netdev_queue {
+  *	this function is called when a VLAN id is unregistered.
+  *
+  * void (*ndo_poll_controller)(struct net_device *dev);
++ *
++ *	SR-IOV management functions.
++ * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
++ * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
++ * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
++ * int (*ndo_get_vf_config)(struct net_device *dev,
++ *			    int vf, struct ifla_vf_info *ivf);
+  */
+ #define HAVE_NET_DEVICE_OPS
+ struct net_device_ops {
+@@ -626,6 +634,15 @@ struct net_device_ops {
+ #define HAVE_NETDEV_POLL
+ 	void                    (*ndo_poll_controller)(struct net_device *dev);
+ #endif
++	int			(*ndo_set_vf_mac)(struct net_device *dev,
++						  int queue, u8 *mac);
++	int			(*ndo_set_vf_vlan)(struct net_device *dev,
++						   int queue, u16 vlan, u8 qos);
++	int			(*ndo_set_vf_tx_rate)(struct net_device *dev,
++						      int vf, int rate);
++	int			(*ndo_get_vf_config)(struct net_device *dev,
++						     int vf,
++						     struct ifla_vf_info *ivf);
+ #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+ 	int			(*ndo_fcoe_enable)(struct net_device *dev);
+ 	int			(*ndo_fcoe_disable)(struct net_device *dev);
 diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
 index 6b202b1..b03950e 100644
 --- a/include/linux/page-flags.h
@@ -28689,6 +29868,60 @@
  #ifdef CONFIG_MEMORY_FAILURE
  PAGEFLAG(HWPoison, hwpoison)
  TESTSETFLAG(HWPoison, hwpoison)
+diff --git a/include/linux/pci.h b/include/linux/pci.h
+index e07d194..ca28e46 100644
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -609,6 +609,9 @@ extern void pci_remove_bus_device(struct pci_dev *dev);
+ extern void pci_stop_bus_device(struct pci_dev *dev);
+ void pci_setup_cardbus(struct pci_bus *bus);
+ extern void pci_sort_breadthfirst(void);
++#define dev_is_pci(d) ((d)->bus == &pci_bus_type)
++#define dev_is_pf(d) ((dev_is_pci(d) ? to_pci_dev(d)->is_physfn : false))
++#define dev_num_vf(d) ((dev_is_pci(d) ? pci_num_vf(to_pci_dev(d)) : 0))
+ 
+ /* Generic PCI functions exported to card drivers */
+ 
+@@ -1124,6 +1127,9 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
+ 						unsigned int devfn)
+ { return NULL; }
+ 
++#define dev_is_pci(d) (false)
++#define dev_is_pf(d) (false)
++#define dev_num_vf(d) (0)
+ #endif /* CONFIG_PCI */
+ 
+ /* Include architecture-dependent settings and functions */
+@@ -1279,6 +1285,7 @@ void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
+ extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+ extern void pci_disable_sriov(struct pci_dev *dev);
+ extern irqreturn_t pci_sriov_migration(struct pci_dev *dev);
++extern int pci_num_vf(struct pci_dev *dev);
+ #else
+ static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+ {
+@@ -1291,6 +1298,10 @@ static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev)
+ {
+ 	return IRQ_NONE;
+ }
++static inline int pci_num_vf(struct pci_dev *dev)
++{
++	return 0;
++}
+ #endif
+ 
+ #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
+diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
+index 67325bf..c398cc3 100644
+--- a/include/linux/pci_ids.h
++++ b/include/linux/pci_ids.h
+@@ -2712,3 +2712,6 @@
+ #define PCI_DEVICE_ID_RME_DIGI32	0x9896
+ #define PCI_DEVICE_ID_RME_DIGI32_PRO	0x9897
+ #define PCI_DEVICE_ID_RME_DIGI32_8	0x9898
++
++#define PCI_VENDOR_ID_XEN		0x5853
++#define PCI_DEVICE_ID_XEN_PLATFORM	0x0001
 diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
 index 73b1f1c..113585a 100644
 --- a/include/linux/swiotlb.h
@@ -29103,7 +30336,7 @@
 +
 +#endif /* __XEN_BLKIF_H__ */
 diff --git a/include/xen/events.h b/include/xen/events.h
-index e68d59a..699108a 100644
+index e68d59a..7e17e2a 100644
 --- a/include/xen/events.h
 +++ b/include/xen/events.h
 @@ -12,6 +12,8 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
@@ -29128,7 +30361,7 @@
  
  /*
   * Common unbind function for all event sources. Takes IRQ to unbind from.
-@@ -53,7 +61,39 @@ bool xen_test_irq_pending(int irq);
+@@ -53,7 +61,42 @@ bool xen_test_irq_pending(int irq);
     irq will be disabled so it won't deliver an interrupt. */
  void xen_poll_irq(int irq);
  
@@ -29163,9 +30396,12 @@
 +
 +/* Determine whether to ignore this IRQ if passed to a guest. */
 +int xen_ignore_irq(int irq);
-+
++/* Xen HVM evtchn vector callback */
++extern void xen_hvm_callback_vector(void);
++extern int xen_have_vector_callback;
++int xen_set_callback_via(uint64_t via);
 +void xen_evtchn_do_upcall(struct pt_regs *regs);
-+void xen_hvm_evtchn_do_upcall(struct pt_regs *regs);
++void xen_hvm_evtchn_do_upcall(void);
 +
  #endif	/* _XEN_EVENTS_H */
 diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
@@ -29294,7 +30530,7 @@
 +
 +#endif /* __LINUX_PUBLIC_GNTDEV_H__ */
 diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
-index a40f1cd..7f8c7c8 100644
+index a40f1cd..871b553 100644
 --- a/include/xen/grant_table.h
 +++ b/include/xen/grant_table.h
 @@ -37,10 +37,16 @@
@@ -29319,9 +30555,9 @@
  	u16 count;
  };
  
-+int gnttab_init(void);
 +void gnttab_reset_grant_page(struct page *page);
 +
++int gnttab_init(void);
  int gnttab_suspend(void);
  int gnttab_resume(void);
  
@@ -29334,7 +30570,7 @@
  /*
   * operations on reserved batches of grant references
   */
-@@ -106,6 +117,37 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+@@ -106,12 +117,46 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
  void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
  				       unsigned long pfn);
  
@@ -29372,12 +30608,21 @@
  int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
  			   unsigned long max_nr_gframes,
  			   struct grant_entry **__shared);
+ void arch_gnttab_unmap_shared(struct grant_entry *shared,
+ 			      unsigned long nr_gframes);
+ 
++extern unsigned long xen_hvm_resume_frames;
++unsigned int gnttab_max_grant_frames(void);
++
+ #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
+ 
+ #endif /* __ASM_GNTTAB_H__ */
 diff --git a/include/xen/hvm.h b/include/xen/hvm.h
 new file mode 100644
-index 0000000..a80c7b9
+index 0000000..b193fa2
 --- /dev/null
 +++ b/include/xen/hvm.h
-@@ -0,0 +1,32 @@
+@@ -0,0 +1,30 @@
 +/* Simple wrappers around HVM functions */
 +#ifndef XEN_HVM_H__
 +#define XEN_HVM_H__
@@ -29385,42 +30630,43 @@
 +#include <xen/interface/hvm/params.h>
 +#include <asm/xen/hypercall.h>
 +
-+static inline unsigned long hvm_get_parameter(int idx)
++static inline int hvm_get_parameter(int idx, uint64_t *value)
 +{
-+       struct xen_hvm_param xhv;
-+       int r;
++	struct xen_hvm_param xhv;
++	int r;
 +
-+       xhv.domid = DOMID_SELF;
-+       xhv.index = idx;
-+       r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
-+       if (r < 0) {
-+               printk(KERN_ERR "cannot get hvm parameter %d: %d.\n",
-+                      idx, r);
-+               return 0;
-+       }
-+       return xhv.value;
++	xhv.domid = DOMID_SELF;
++	xhv.index = idx;
++	r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
++	if (r < 0) {
++		printk(KERN_ERR "Cannot get hvm parameter %d: %d!\n",
++			idx, r);
++		return r;
++	}
++	*value = xhv.value;
++	return r;
 +}
 +
-+int xen_set_callback_via(uint64_t via);
-+extern int xen_have_vector_callback;
-+
 +#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2
 +#define HVM_CALLBACK_VIA_TYPE_SHIFT 56
 +#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\
-+                               HVM_CALLBACK_VIA_TYPE_SHIFT | (x))
++		HVM_CALLBACK_VIA_TYPE_SHIFT | (x))
 +
 +#endif /* XEN_HVM_H__ */
 diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
-index f51b641..8ab08b9 100644
+index f51b641..70d2563 100644
 --- a/include/xen/interface/features.h
 +++ b/include/xen/interface/features.h
-@@ -41,6 +41,9 @@
+@@ -41,6 +41,12 @@
  /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
  #define XENFEAT_mmu_pt_update_preserve_ad  5
  
 +/* x86: Does this Xen host support the HVM callback vector type? */
 +#define XENFEAT_hvm_callback_vector        8
 +
++/* x86: pvclock algorithm is safe to use on HVM */
++#define XENFEAT_hvm_safe_pvclock           9
++
  #define XENFEAT_NR_SUBMAPS 1
  
  #endif /* __XEN_PUBLIC_FEATURES_H__ */
@@ -29467,10 +30713,10 @@
   /* Map the grant entry for access by I/O devices. */
 diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
 new file mode 100644
-index 0000000..7c74ba4
+index 0000000..a4827f4
 --- /dev/null
 +++ b/include/xen/interface/hvm/hvm_op.h
-@@ -0,0 +1,72 @@
+@@ -0,0 +1,46 @@
 +/*
 + * Permission is hereby granted, free of charge, to any person obtaining a copy
 + * of this software and associated documentation files (the "Software"), to
@@ -29494,7 +30740,8 @@
 +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
 +#define __XEN_PUBLIC_HVM_HVM_OP_H__
 +
-+/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */
++/* Get/set subcommands: the second argument of the hypercall is a
++ * pointer to a xen_hvm_param struct. */
 +#define HVMOP_set_param           0
 +#define HVMOP_get_param           1
 +struct xen_hvm_param {
@@ -29504,51 +30751,24 @@
 +};
 +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
 +
-+/* Set the logical level of one of a domain's PCI INTx wires. */
-+#define HVMOP_set_pci_intx_level  2
-+struct xen_hvm_set_pci_intx_level {
-+    /* Domain to be updated. */
-+    domid_t  domid;
-+    /* PCI INTx identification in PCI topology (domain:bus:device:intx). */
-+    uint8_t  domain, bus, device, intx;
-+    /* Assertion level (0 = unasserted, 1 = asserted). */
-+    uint8_t  level;
-+};
-+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_intx_level);
-+
-+/* Set the logical level of one of a domain's ISA IRQ wires. */
-+#define HVMOP_set_isa_irq_level   3
-+struct xen_hvm_set_isa_irq_level {
-+    /* Domain to be updated. */
-+    domid_t  domid;
-+    /* ISA device identification, by ISA IRQ (0-15). */
-+    uint8_t  isa_irq;
-+    /* Assertion level (0 = unasserted, 1 = asserted). */
-+    uint8_t  level;
-+};
-+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_isa_irq_level);
-+
-+#define HVMOP_set_pci_link_route  4
-+struct xen_hvm_set_pci_link_route {
-+    /* Domain to be updated. */
++/* Hint from PV drivers for pagetable destruction. */
++#define HVMOP_pagetable_dying       9
++struct xen_hvm_pagetable_dying {
++    /* Domain with a pagetable about to be destroyed. */
 +    domid_t  domid;
-+    /* PCI link identifier (0-3). */
-+    uint8_t  link;
-+    /* ISA IRQ (1-15), or 0 (disable link). */
-+    uint8_t  isa_irq;
++    /* guest physical address of the toplevel pagetable dying */
++    aligned_u64 gpa;
 +};
-+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_link_route);
-+
-+/* Flushes all VCPU TLBs: @arg must be NULL. */
-+#define HVMOP_flush_tlbs          5
-+
++typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
++DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t);
++ 
 +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
 diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h
 new file mode 100644
-index 0000000..aa9efd8
+index 0000000..1888d8c
 --- /dev/null
 +++ b/include/xen/interface/hvm/params.h
-@@ -0,0 +1,112 @@
+@@ -0,0 +1,95 @@
 +/*
 + * Permission is hereby granted, free of charge, to any person obtaining a copy
 + * of this software and associated documentation files (the "Software"), to
@@ -29589,10 +30809,6 @@
 + */
 +#define HVM_PARAM_CALLBACK_IRQ 0
 +
-+/*
-+ * These are not used by Xen. They are here for convenience of HVM-guest
-+ * xenbus implementations.
-+ */
 +#define HVM_PARAM_STORE_PFN    1
 +#define HVM_PARAM_STORE_EVTCHN 2
 +
@@ -29602,19 +30818,6 @@
 +
 +#define HVM_PARAM_BUFIOREQ_PFN 6
 +
-+#ifdef __ia64__
-+
-+#define HVM_PARAM_NVRAM_FD     7
-+#define HVM_PARAM_VHPT_SIZE    8
-+#define HVM_PARAM_BUFPIOREQ_PFN        9
-+
-+#elif defined(__i386__) || defined(__x86_64__)
-+
-+/* Expose Viridian interfaces to this HVM guest? */
-+#define HVM_PARAM_VIRIDIAN     9
-+
-+#endif
-+
 +/*
 + * Set mode for virtual timers (currently x86 only):
 + *  delay_for_missed_ticks (default):
@@ -29661,6 +30864,21 @@
 +#define HVM_NR_PARAMS          17
 +
 +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
+diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
+index 518481c..8309344 100644
+--- a/include/xen/interface/io/netif.h
++++ b/include/xen/interface/io/netif.h
+@@ -131,6 +131,10 @@ struct xen_netif_rx_request {
+ #define _NETRXF_extra_info     (3)
+ #define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
+ 
++/* GSO Prefix descriptor. */
++#define _NETRXF_gso_prefix     (4)
++#define  NETRXF_gso_prefix     (1U<<_NETRXF_gso_prefix)
++
+ struct xen_netif_rx_response {
+     uint16_t id;
+     uint16_t offset;       /* Offset in page of start of received packet  */
 diff --git a/include/xen/interface/io/pciif.h b/include/xen/interface/io/pciif.h
 new file mode 100644
 index 0000000..c4177f3
@@ -29792,16 +31010,24 @@
 + * End:
 + */
 diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
-index e8cbf43..865dcf0 100644
+index e8cbf43..7b301fa 100644
 --- a/include/xen/interface/io/ring.h
 +++ b/include/xen/interface/io/ring.h
-@@ -73,7 +73,8 @@ union __name##_sring_entry {						\
+@@ -73,7 +73,16 @@ union __name##_sring_entry {						\
  struct __name##_sring {							\
      RING_IDX req_prod, req_event;					\
      RING_IDX rsp_prod, rsp_event;					\
 -    uint8_t  pad[48];							\
-+    uint8_t  netfront_smartpoll_active;					\
-+    uint8_t  pad[47];							\
++    union {								\
++        struct {							\
++            uint8_t smartpoll_active;					\
++        } netif;							\
++        struct {							\
++            uint8_t msg;						\
++        } tapif_user;							\
++        uint8_t pvt_pad[4];						\
++    } private;								\
++    uint8_t pad[44];							\
      union __name##_sring_entry ring[1]; /* variable-length */		\
  };									\
  									\
@@ -30420,57 +31646,6 @@
 +DEFINE_GUEST_HANDLE_STRUCT(xen_platform_op_t);
 +
 +#endif /* __XEN_PUBLIC_PLATFORM_H__ */
-diff --git a/include/xen/interface/platform_pci.h b/include/xen/interface/platform_pci.h
-new file mode 100644
-index 0000000..bc230cd
---- /dev/null
-+++ b/include/xen/interface/platform_pci.h
-@@ -0,0 +1,45 @@
-+/******************************************************************************
-+ * platform_pci.h
-+ *
-+ * Interface for granting foreign access to page frames, and receiving
-+ * page-ownership transfers.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this software and associated documentation files (the "Software"), to
-+ * deal in the Software without restriction, including without limitation the
-+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-+ * sell copies of the Software, and to permit persons to whom the Software is
-+ * furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-+ * DEALINGS IN THE SOFTWARE.
-+ */
-+
-+#ifndef __XEN_PUBLIC_PLATFORM_PCI_H__
-+#define __XEN_PUBLIC_PLATFORM_PCI_H__
-+
-+#define XEN_IOPORT_BASE 0x10
-+
-+#define XEN_IOPORT_PLATFLAGS	(XEN_IOPORT_BASE + 0) /* 1 byte access (R/W) */
-+#define XEN_IOPORT_MAGIC	(XEN_IOPORT_BASE + 0) /* 2 byte access (R) */
-+#define XEN_IOPORT_UNPLUG	(XEN_IOPORT_BASE + 0) /* 2 byte access (W) */
-+#define XEN_IOPORT_DRVVER	(XEN_IOPORT_BASE + 0) /* 4 byte access (W) */
-+
-+#define XEN_IOPORT_SYSLOG	(XEN_IOPORT_BASE + 2) /* 1 byte access (W) */
-+#define XEN_IOPORT_PROTOVER	(XEN_IOPORT_BASE + 2) /* 1 byte access (R) */
-+#define XEN_IOPORT_PRODNUM	(XEN_IOPORT_BASE + 2) /* 2 byte access (W) */
-+
-+#define UNPLUG_ALL_IDE_DISKS 1
-+#define UNPLUG_ALL_NICS 2
-+#define UNPLUG_AUX_IDE_DISKS 4
-+#define UNPLUG_ALL 7
-+
-+#endif /* __XEN_PUBLIC_PLATFORM_PCI_H__ */
 diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
 new file mode 100644
 index 0000000..f31fdab
@@ -31024,55 +32199,57 @@
 +#endif
 diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
 new file mode 100644
-index 0000000..ced434d
+index 0000000..ce9d671
 --- /dev/null
 +++ b/include/xen/platform_pci.h
-@@ -0,0 +1,47 @@
-+/******************************************************************************
-+ * platform-pci.h
-+ *
-+ * Xen platform PCI device driver
-+ * Copyright (c) 2004, Intel Corporation. <xiaofeng.ling at intel.com>
-+ * Copyright (c) 2007, XenSource Inc.
-+ * Copyright (c) 2010, Citrix
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms and conditions of the GNU General Public License,
-+ * version 2, as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-+ * more details.
-+ *
-+ * You should have received a copy of the GNU General Public License along with
-+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-+ * Place - Suite 330, Boston, MA 02111-1307 USA.
-+ */
-+
+@@ -0,0 +1,49 @@
 +#ifndef _XEN_PLATFORM_PCI_H
 +#define _XEN_PLATFORM_PCI_H
 +
-+#include <linux/version.h>
-+
 +#define XEN_IOPORT_MAGIC_VAL 0x49d2
-+#define XEN_IOPORT_LINUX_PRODNUM 0xffff
-+#define XEN_IOPORT_LINUX_DRVVER  ((LINUX_VERSION_CODE << 8) + 0x0)
++#define XEN_IOPORT_LINUX_PRODNUM 0x0003
++#define XEN_IOPORT_LINUX_DRVVER  0x0001
++
++#define XEN_IOPORT_BASE 0x10
++
++#define XEN_IOPORT_PLATFLAGS	(XEN_IOPORT_BASE + 0) /* 1 byte access (R/W) */
++#define XEN_IOPORT_MAGIC	(XEN_IOPORT_BASE + 0) /* 2 byte access (R) */
++#define XEN_IOPORT_UNPLUG	(XEN_IOPORT_BASE + 0) /* 2 byte access (W) */
++#define XEN_IOPORT_DRVVER	(XEN_IOPORT_BASE + 0) /* 4 byte access (W) */
++
++#define XEN_IOPORT_SYSLOG	(XEN_IOPORT_BASE + 2) /* 1 byte access (W) */
++#define XEN_IOPORT_PROTOVER	(XEN_IOPORT_BASE + 2) /* 1 byte access (R) */
++#define XEN_IOPORT_PRODNUM	(XEN_IOPORT_BASE + 2) /* 2 byte access (W) */
 +
-+#ifdef CONFIG_XEN_PLATFORM_PCI
-+unsigned long alloc_xen_mmio(unsigned long len);
-+void platform_pci_resume(void);
-+void platform_pci_disable_irq(void);
-+void platform_pci_enable_irq(void);
++#define XEN_UNPLUG_ALL_IDE_DISKS 1
++#define XEN_UNPLUG_ALL_NICS 2
++#define XEN_UNPLUG_AUX_IDE_DISKS 4
++#define XEN_UNPLUG_ALL 7
++#define XEN_UNPLUG_IGNORE 8
++
++static inline int xen_must_unplug_nics(void) {
++#if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \
++		defined(CONFIG_XEN_NETDEV_FRONTEND_MODULE)) && \
++		(defined(CONFIG_XEN_PLATFORM_PCI) || \
++		 defined(CONFIG_XEN_PLATFORM_PCI_MODULE))
++        return 1;
 +#else
-+static inline unsigned long alloc_xen_mmio(unsigned long len)
-+{
-+	return ~0UL;
++        return 0;
++#endif
 +}
-+static inline void platform_pci_resume(void) {}
-+static inline void platform_pci_disable_irq(void) {}
-+static inline void platform_pci_enable_irq(void) {}
++
++static inline int xen_must_unplug_disks(void) {
++#if (defined(CONFIG_XEN_BLKDEV_FRONTEND) || \
++		defined(CONFIG_XEN_BLKDEV_FRONTEND_MODULE)) && \
++		(defined(CONFIG_XEN_PLATFORM_PCI) || \
++		 defined(CONFIG_XEN_PLATFORM_PCI_MODULE))
++        return 1;
++#else
++        return 0;
 +#endif
++}
++
++extern int xen_platform_pci_unplug;
 +
 +#endif /* _XEN_PLATFORM_PCI_H */
 diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
@@ -31162,15 +32339,21 @@
 +
 +#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
 diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
-index 883a21b..323121a 100644
+index 883a21b..7058f8a 100644
 --- a/include/xen/xen-ops.h
 +++ b/include/xen/xen-ops.h
-@@ -14,4 +14,17 @@ void xen_mm_unpin_all(void);
+@@ -7,6 +7,7 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
+ 
+ void xen_pre_suspend(void);
+ void xen_post_suspend(int suspend_cancelled);
++void xen_hvm_post_suspend(int suspend_cancelled);
+ 
+ void xen_mm_pin_all(void);
+ void xen_mm_unpin_all(void);
+@@ -14,4 +15,16 @@ void xen_mm_unpin_all(void);
  void xen_timer_resume(void);
  void xen_arch_resume(void);
  
-+int xen_setup_shutdown_event(void);
-+
 +int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 +			       unsigned long addr,
 +			       unsigned long mfn, int nr,
@@ -31181,14 +32364,15 @@
 +				unsigned int address_bits);
 +
 +void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
++int xen_setup_shutdown_event(void);
 +
  #endif /* INCLUDE_XEN_OPS_H */
 diff --git a/include/xen/xen.h b/include/xen/xen.h
 new file mode 100644
-index 0000000..a164024
+index 0000000..77604ed
 --- /dev/null
 +++ b/include/xen/xen.h
-@@ -0,0 +1,32 @@
+@@ -0,0 +1,34 @@
 +#ifndef _XEN_XEN_H
 +#define _XEN_XEN_H
 +
@@ -31200,8 +32384,10 @@
 +
 +#ifdef CONFIG_XEN
 +extern enum xen_domain_type xen_domain_type;
++extern void xen_hvm_guest_init(void);
 +#else
 +#define xen_domain_type		XEN_NATIVE
++#define xen_hvm_guest_init() do { } while (0)
 +#endif
 +
 +#define xen_domain()		(xen_domain_type != XEN_NATIVE)
@@ -31222,7 +32408,7 @@
 +
 +#endif	/* _XEN_XEN_H */
 diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
-index b9763ba..a7d13ff 100644
+index b9763ba..542ca7c 100644
 --- a/include/xen/xenbus.h
 +++ b/include/xen/xenbus.h
 @@ -93,7 +93,7 @@ struct xenbus_driver {
@@ -31234,14 +32420,6 @@
  	struct device_driver driver;
  	int (*read_otherend_details)(struct xenbus_device *dev);
  	int (*is_ready)(struct xenbus_device *dev);
-@@ -173,6 +173,7 @@ void unregister_xenbus_watch(struct xenbus_watch *watch);
- void xs_suspend(void);
- void xs_resume(void);
- void xs_suspend_cancel(void);
-+int xenbus_probe_init(void);
- 
- /* Used by xenbus_dev to borrow kernel's store connection. */
- void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg);
 diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
 index 986519e..cae345b 100644
 --- a/kernel/irq/manage.c
@@ -33063,7 +34241,7 @@
  {
  	int aligned;
 diff --git a/mm/memory.c b/mm/memory.c
-index 4e59455..b2de7c9 100644
+index 4e59455..17148f0 100644
 --- a/mm/memory.c
 +++ b/mm/memory.c
 @@ -553,6 +553,13 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -33137,57 +34315,35 @@
  
  	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
  
-+#if CONFIG_XEN
++#ifdef CONFIG_XEN
 +	vma->vm_mm->context.has_foreign_mappings = 1;
 +#endif
 +
  	err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size));
  	if (err) {
  		/*
-diff --git a/mm/mmap.c b/mm/mmap.c
-index ae19746..9c39fc2 100644
---- a/mm/mmap.c
-+++ b/mm/mmap.c
-@@ -1785,6 +1785,12 @@ static void unmap_region(struct mm_struct *mm,
- 	tlb_finish_mmu(tlb, start, end);
- }
+@@ -1886,11 +1925,10 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+ {
+ 	pgd_t *pgd;
+ 	unsigned long next;
+-	unsigned long start = addr, end = addr + size;
++	unsigned long end = addr + size;
+ 	int err;
  
-+static inline void unmap_vma(struct vm_area_struct *vma)
-+{
-+	if (unlikely(vma->vm_ops && vma->vm_ops->unmap))
-+		vma->vm_ops->unmap(vma);
-+}
-+
- /*
-  * Create a list of vma's touched by the unmap, removing them from the mm's
-  * vma list as we go..
-@@ -1800,6 +1806,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
- 	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
+ 	BUG_ON(addr >= end);
+-	mmu_notifier_invalidate_range_start(mm, start, end);
+ 	pgd = pgd_offset(mm, addr);
  	do {
- 		rb_erase(&vma->vm_rb, &mm->mm_rb);
-+		unmap_vma(vma);
- 		mm->map_count--;
- 		tail_vma = vma;
- 		vma = vma->vm_next;
-@@ -2076,7 +2083,7 @@ EXPORT_SYMBOL(do_brk);
- void exit_mmap(struct mm_struct *mm)
- {
- 	struct mmu_gather *tlb;
--	struct vm_area_struct *vma;
-+	struct vm_area_struct *vma, *vma_tmp;
- 	unsigned long nr_accounted = 0;
- 	unsigned long end;
- 
-@@ -2098,6 +2105,9 @@ void exit_mmap(struct mm_struct *mm)
- 	if (!vma)	/* Can happen if dup_mmap() received an OOM */
- 		return;
- 
-+	for (vma_tmp = mm->mmap; vma_tmp; vma_tmp = vma_tmp->vm_next)
-+		unmap_vma(vma_tmp);
+ 		next = pgd_addr_end(addr, end);
+@@ -1898,7 +1936,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+ 		if (err)
+ 			break;
+ 	} while (pgd++, addr = next, addr != end);
+-	mmu_notifier_invalidate_range_end(mm, start, end);
 +
- 	lru_add_drain();
- 	flush_cache_mm(mm);
- 	tlb = tlb_gather_mmu(mm, 1);
+ 	return err;
+ }
+ EXPORT_SYMBOL_GPL(apply_to_page_range);
 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
 index 36992b6..bc1b6e9 100644
 --- a/mm/page_alloc.c
@@ -33253,3 +34409,202 @@
  
  	if (nr || force_flush)
  		flush_tlb_kernel_range(*start, *end);
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index d4fd895..4ab8c97 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -35,6 +35,7 @@
+ #include <linux/security.h>
+ #include <linux/mutex.h>
+ #include <linux/if_addr.h>
++#include <linux/pci.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+@@ -582,6 +583,22 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
+ 	a->tx_compressed = b->tx_compressed;
+ };
+ 
++/* All VF info */
++static inline int rtnl_vfinfo_size(const struct net_device *dev)
++{
++	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
++
++		int num_vfs = dev_num_vf(dev->dev.parent);
++		size_t size = nlmsg_total_size(sizeof(struct nlattr));
++		size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
++		size += num_vfs * (sizeof(struct ifla_vf_mac) +
++				  sizeof(struct ifla_vf_vlan) +
++				  sizeof(struct ifla_vf_tx_rate));
++		return size;
++	} else
++		return 0;
++}
++
+ static inline size_t if_nlmsg_size(const struct net_device *dev)
+ {
+ 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+@@ -599,6 +616,8 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
+ 	       + nla_total_size(4) /* IFLA_MASTER */
+ 	       + nla_total_size(1) /* IFLA_OPERSTATE */
+ 	       + nla_total_size(1) /* IFLA_LINKMODE */
++	       + nla_total_size(4) /* IFLA_NUM_VF */
++	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+ 	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
+ }
+ 
+@@ -667,6 +686,40 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+ 	stats = dev_get_stats(dev);
+ 	copy_rtnl_link_stats(nla_data(attr), stats);
+ 
++	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
++		int i;
++
++		struct nlattr *vfinfo, *vf;
++		int num_vfs = dev_num_vf(dev->dev.parent);
++
++		NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs);
++		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
++		if (!vfinfo)
++			goto nla_put_failure;
++		for (i = 0; i < num_vfs; i++) {
++			struct ifla_vf_info ivi;
++			struct ifla_vf_mac vf_mac;
++			struct ifla_vf_vlan vf_vlan;
++			struct ifla_vf_tx_rate vf_tx_rate;
++			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
++				break;
++			vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
++			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
++			vf_vlan.vlan = ivi.vlan;
++			vf_vlan.qos = ivi.qos;
++			vf_tx_rate.rate = ivi.tx_rate;
++			vf = nla_nest_start(skb, IFLA_VF_INFO);
++			if (!vf) {
++				nla_nest_cancel(skb, vfinfo);
++				goto nla_put_failure;
++			}
++			NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
++			NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
++			NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
++			nla_nest_end(skb, vf);
++		}
++		nla_nest_end(skb, vfinfo);
++	}
+ 	if (dev->rtnl_link_ops) {
+ 		if (rtnl_link_fill(skb, dev) < 0)
+ 			goto nla_put_failure;
+@@ -716,6 +769,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
+ 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
+ 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
+ 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
++	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
+ };
+ 
+ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
+@@ -723,6 +777,33 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
+ 	[IFLA_INFO_DATA]	= { .type = NLA_NESTED },
+ };
+ 
++static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
++	[IFLA_VF_INFO]		= { .type = NLA_NESTED },
++};
++
++static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
++	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
++				    .len = sizeof(struct ifla_vf_mac) },
++	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
++				    .len = sizeof(struct ifla_vf_vlan) },
++	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
++				    .len = sizeof(struct ifla_vf_tx_rate) },
++};
++
++struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
++{
++	struct net *net;
++	/* Examine the link attributes and figure out which
++	 * network namespace we are talking about.
++	 */
++	if (tb[IFLA_NET_NS_PID])
++		net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
++	else
++		net = get_net(src_net);
++	return net;
++}
++EXPORT_SYMBOL(rtnl_link_get_net);
++
+ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
+ {
+ 	if (dev) {
+@@ -738,6 +819,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
+ 	return 0;
+ }
+ 
++static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
++{
++	int rem, err = -EINVAL;
++	struct nlattr *vf;
++	const struct net_device_ops *ops = dev->netdev_ops;
++
++	nla_for_each_nested(vf, attr, rem) {
++		switch (nla_type(vf)) {
++		case IFLA_VF_MAC: {
++			struct ifla_vf_mac *ivm;
++			ivm = nla_data(vf);
++			err = -EOPNOTSUPP;
++			if (ops->ndo_set_vf_mac)
++				err = ops->ndo_set_vf_mac(dev, ivm->vf,
++							  ivm->mac);
++			break;
++		}
++		case IFLA_VF_VLAN: {
++			struct ifla_vf_vlan *ivv;
++			ivv = nla_data(vf);
++			err = -EOPNOTSUPP;
++			if (ops->ndo_set_vf_vlan)
++				err = ops->ndo_set_vf_vlan(dev, ivv->vf,
++							   ivv->vlan,
++							   ivv->qos);
++			break;
++		}
++		case IFLA_VF_TX_RATE: {
++			struct ifla_vf_tx_rate *ivt;
++			ivt = nla_data(vf);
++			err = -EOPNOTSUPP;
++			if (ops->ndo_set_vf_tx_rate)
++				err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
++							      ivt->rate);
++			break;
++		}
++		default:
++			err = -EINVAL;
++			break;
++		}
++		if (err)
++			break;
++	}
++	return err;
++}
++
+ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
+ 		      struct nlattr **tb, char *ifname, int modified)
+ {
+@@ -875,6 +1002,18 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
+ 		write_unlock_bh(&dev_base_lock);
+ 	}
+ 
++	if (tb[IFLA_VFINFO_LIST]) {
++		struct nlattr *attr;
++		int rem;
++		nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
++			if (nla_type(attr) != IFLA_VF_INFO)
++				goto errout;
++			err = do_setvfinfo(dev, attr);
++			if (err < 0)
++				goto errout;
++			modified = 1;
++		}
++	}
+ 	err = 0;
+ 
+ errout:



More information about the Kernel-svn-changes mailing list