[kernel] r16044 - in dists/sid/linux-2.6/debian: . config/featureset-xen patches/features/all/xen
Bastian Blank
waldi at alioth.debian.org
Mon Aug 2 14:46:35 UTC 2010
Author: waldi
Date: Mon Aug 2 14:46:31 2010
New Revision: 16044
Log:
* debian/changelog: Update.
* debian/patches/features/all/xen/pvops.patch: Update patch to 78b55f90e723.
* debian/patches/features/all/xen/pvops-update.patch: Fix to apply.
* debian/config/featureset-xen/defines: Ignore all ABI changes.
Modified:
dists/sid/linux-2.6/debian/changelog
dists/sid/linux-2.6/debian/config/featureset-xen/defines
dists/sid/linux-2.6/debian/patches/features/all/xen/pvops-update.patch
dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch
Modified: dists/sid/linux-2.6/debian/changelog
==============================================================================
--- dists/sid/linux-2.6/debian/changelog Mon Aug 2 12:16:57 2010 (r16043)
+++ dists/sid/linux-2.6/debian/changelog Mon Aug 2 14:46:31 2010 (r16044)
@@ -25,6 +25,10 @@
[ dann frazier ]
* [ia64] Fix crash when gcore reads gate area (Closes: #588574)
+ [ Bastian Blank ]
+ * Update Xen patch.
+ - Ignore ABI changes.
+
-- Ben Hutchings <ben at decadent.org.uk> Sat, 24 Jul 2010 00:41:51 +0100
linux-2.6 (2.6.32-18) unstable; urgency=low
Modified: dists/sid/linux-2.6/debian/config/featureset-xen/defines
==============================================================================
--- dists/sid/linux-2.6/debian/config/featureset-xen/defines Mon Aug 2 12:16:57 2010 (r16043)
+++ dists/sid/linux-2.6/debian/config/featureset-xen/defines Mon Aug 2 14:46:31 2010 (r16044)
@@ -1,3 +1,6 @@
+[abi]
+ignore-changes: *
+
[description]
parts: xen
part-long-xen: This kernel also runs on a Xen hypervisor.
Modified: dists/sid/linux-2.6/debian/patches/features/all/xen/pvops-update.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/xen/pvops-update.patch Mon Aug 2 12:16:57 2010 (r16043)
+++ dists/sid/linux-2.6/debian/patches/features/all/xen/pvops-update.patch Mon Aug 2 14:46:31 2010 (r16044)
@@ -1,16 +1,27 @@
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index 70636d0..88262bb 100644
+index 99831c7..1930f64 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
-@@ -163,7 +163,6 @@ fail:
+@@ -162,17 +162,11 @@ fail:
+ */
static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
{
- struct backend_info *be = dev_get_drvdata(&xdev->dev);
-- struct xen_netif *netif = be->netif;
+- struct backend_info *be;
+- struct xen_netif *netif;
++ struct backend_info *be = dev_get_drvdata(&xdev->dev);
char *val;
DPRINTK("netback_uevent");
-@@ -182,7 +181,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
+
+- be = dev_get_drvdata(&xdev->dev);
+- if (!be)
+- return 0;
+- netif = be->netif;
+-
+ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+ if (IS_ERR(val)) {
+ int err = PTR_ERR(val);
+@@ -187,7 +181,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
kfree(val);
}
Modified: dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch Mon Aug 2 12:16:57 2010 (r16043)
+++ dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch Mon Aug 2 14:46:31 2010 (r16044)
@@ -1,6 +1,35 @@
-Patch based on commit f6fe6583b77a49b569eef1b66c3d761eec2e561b of
+Patch based on commit 78b55f90e72348e231092dbe3e50ac7414b9e1af of
git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git.
+diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
+index 5f6aa11..3e30e60 100644
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -113,6 +113,7 @@ parameter is applicable:
+ More X86-64 boot options can be found in
+ Documentation/x86/x86_64/boot-options.txt .
+ X86 Either 32bit or 64bit x86 (same as X86-32+X86-64)
++ XEN Xen support is enabled
+
+ In addition, the following text indicates that the option:
+
+@@ -2760,6 +2761,16 @@ and is between 256 and 4096 characters. It is defined in the file
+ xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks.
+ xd_geo= See header of drivers/block/xd.c.
+
++ xen_emul_unplug= [HW,X86,XEN]
++ Unplug Xen emulated devices
++ Format: [unplug0,][unplug1]
++ ide-disks -- unplug primary master IDE devices
++ aux-ide-disks -- unplug non-primary-master IDE devices
++ nics -- unplug network devices
++ all -- unplug all emulated devices (NICs and IDE disks)
++ ignore -- continue loading the Xen platform PCI driver even
++ if the version check failed
++
+ xirc2ps_cs= [NET,PCMCIA]
+ Format:
+ <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 29a6ff8..81f9b94 100644
--- a/Documentation/x86/x86_64/boot-options.txt
@@ -178,6 +207,332 @@
static inline void detect_calgary(void) { return; }
#endif
+diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
+index ee1931b..5af5051 100644
+--- a/arch/x86/include/asm/cmpxchg_32.h
++++ b/arch/x86/include/asm/cmpxchg_32.h
+@@ -34,12 +34,12 @@ static inline void __set_64bit(unsigned long long *ptr,
+ unsigned int low, unsigned int high)
+ {
+ asm volatile("\n1:\t"
+- "movl (%0), %%eax\n\t"
+- "movl 4(%0), %%edx\n\t"
+- LOCK_PREFIX "cmpxchg8b (%0)\n\t"
++ "movl (%1), %%eax\n\t"
++ "movl 4(%1), %%edx\n\t"
++ LOCK_PREFIX "cmpxchg8b %0\n\t"
+ "jnz 1b"
+- : /* no outputs */
+- : "D"(ptr),
++ : "=m"(*ptr)
++ : "D" (ptr),
+ "b"(low),
+ "c"(high)
+ : "ax", "dx", "memory");
+@@ -82,20 +82,20 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
+ switch (size) {
+ case 1:
+ asm volatile("xchgb %b0,%1"
+- : "=q" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=q" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ case 2:
+ asm volatile("xchgw %w0,%1"
+- : "=r" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=r" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ case 4:
+ asm volatile("xchgl %0,%1"
+- : "=r" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=r" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ }
+@@ -139,21 +139,21 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgb %b2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "q"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgw %w2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- asm volatile(LOCK_PREFIX "cmpxchgl %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgl %2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ }
+@@ -172,21 +172,21 @@ static inline unsigned long __sync_cmpxchg(volatile void *ptr,
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- asm volatile("lock; cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("lock; cmpxchgb %b2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "q"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- asm volatile("lock; cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("lock; cmpxchgw %w2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- asm volatile("lock; cmpxchgl %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("lock; cmpxchgl %2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ }
+@@ -200,21 +200,21 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- asm volatile("cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgb %b2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "q"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- asm volatile("cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgw %w2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- asm volatile("cmpxchgl %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgl %2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ }
+@@ -226,11 +226,10 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr,
+ unsigned long long new)
+ {
+ unsigned long long prev;
+- asm volatile(LOCK_PREFIX "cmpxchg8b %3"
+- : "=A"(prev)
++ asm volatile(LOCK_PREFIX "cmpxchg8b %1"
++ : "=A"(prev), "+m" (*__xg(ptr))
+ : "b"((unsigned long)new),
+ "c"((unsigned long)(new >> 32)),
+- "m"(*__xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+@@ -241,11 +240,10 @@ static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
+ unsigned long long new)
+ {
+ unsigned long long prev;
+- asm volatile("cmpxchg8b %3"
+- : "=A"(prev)
++ asm volatile("cmpxchg8b %1"
++ : "=A"(prev), "+m"(*__xg(ptr))
+ : "b"((unsigned long)new),
+ "c"((unsigned long)(new >> 32)),
+- "m"(*__xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
+index 52de72e..1871cb0 100644
+--- a/arch/x86/include/asm/cmpxchg_64.h
++++ b/arch/x86/include/asm/cmpxchg_64.h
+@@ -26,26 +26,26 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
+ switch (size) {
+ case 1:
+ asm volatile("xchgb %b0,%1"
+- : "=q" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=q" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ case 2:
+ asm volatile("xchgw %w0,%1"
+- : "=r" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=r" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ case 4:
+ asm volatile("xchgl %k0,%1"
+- : "=r" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=r" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ case 8:
+ asm volatile("xchgq %0,%1"
+- : "=r" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=r" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ }
+@@ -66,27 +66,27 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgb %b2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "q"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgw %w2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgl %k2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 8:
+- asm volatile(LOCK_PREFIX "cmpxchgq %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgq %2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ }
+@@ -105,21 +105,27 @@ static inline unsigned long __sync_cmpxchg(volatile void *ptr,
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- asm volatile("lock; cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("lock; cmpxchgb %b2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "q"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- asm volatile("lock; cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("lock; cmpxchgw %w2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- asm volatile("lock; cmpxchgl %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("lock; cmpxchgl %k2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
++ : "memory");
++ return prev;
++ case 8:
++ asm volatile("lock; cmpxchgq %2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ }
+@@ -133,27 +139,27 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- asm volatile("cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgb %b2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "q"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- asm volatile("cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgw %w2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- asm volatile("cmpxchgl %k1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgl %k2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 8:
+- asm volatile("cmpxchgq %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgq %2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ }
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 6a25d5d..ac91eed 100644
--- a/arch/x86/include/asm/dma-mapping.h
@@ -258,10 +613,10 @@
#define hpet_readl(a) 0
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
-index 439a9ac..4cfd4de 100644
+index 439a9ac..bf88684 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
-@@ -36,16 +36,24 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+@@ -36,16 +36,28 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
free_pgd_range(tlb, addr, end, floor, ceiling);
}
@@ -274,7 +629,11 @@
pte_t *ptep, pte_t pte)
{
- set_pte_at(mm, addr, ptep, pte);
-+ set_pmd((pmd_t *)ptep, __pmd(pte_val(pte)));
++#if PAGETABLE_LEVELS >= 3
++ set_pmd((pmd_t *)ptep, native_make_pmd(native_pte_val(pte)));
++#else
++ set_pgd((pgd_t *)ptep, native_make_pgd(native_pte_val(pte)));
++#endif
}
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
@@ -288,7 +647,7 @@
}
static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
-@@ -66,19 +74,25 @@ static inline pte_t huge_pte_wrprotect(pte_t pte)
+@@ -66,19 +78,25 @@ static inline pte_t huge_pte_wrprotect(pte_t pte)
static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
@@ -386,10 +745,23 @@
extern int force_iommu, no_iommu;
extern int iommu_detected;
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
-index 6e90a04..451a45b 100644
+index 6e90a04..ba4dc7b 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
-@@ -157,6 +157,14 @@ static inline int invalid_vm86_irq(int irq)
+@@ -120,6 +120,12 @@
+ */
+ #define MCE_SELF_VECTOR 0xeb
+
++#ifdef CONFIG_XEN
++/* Xen vector callback to receive events in a HVM domain */
++#define XEN_HVM_EVTCHN_CALLBACK 0xe9
++#endif
++
++
+ /*
+ * First APIC vector available to drivers: (vectors 0x30-0xee) we
+ * start at 0x31(0x41) to spread out vectors evenly between priority
+@@ -157,6 +163,14 @@ static inline int invalid_vm86_irq(int irq)
#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS )
#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS )
@@ -404,7 +776,7 @@
#ifdef CONFIG_X86_IO_APIC
# ifdef CONFIG_SPARSE_IRQ
# define NR_IRQS \
-@@ -165,13 +173,13 @@ static inline int invalid_vm86_irq(int irq)
+@@ -165,13 +179,13 @@ static inline int invalid_vm86_irq(int irq)
(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
# else
# if NR_CPUS < MAX_IO_APICS
@@ -454,7 +826,7 @@
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
-index efb3899..63a55bc 100644
+index efb3899..e571db4 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -330,11 +330,18 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
@@ -476,6 +848,67 @@
/* The paravirtualized I/O functions */
static inline void slow_down_io(void)
{
+@@ -770,15 +777,28 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
+ #define PV_RESTORE_REGS "popl %edx; popl %ecx;"
+
+ /* save and restore all caller-save registers, except return value */
+-#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;"
+-#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;"
++#define __PV_SAVE_ALL_CALLER_REGS "pushl %ecx;"
++#define __PV_RESTORE_ALL_CALLER_REGS "popl %ecx;"
++
++#ifdef CONFIG_FRAME_POINTER
++#define PV_SAVE_ALL_CALLER_REGS \
++ "push %ebp;" \
++ "mov %esp, %ebp;" \
++ __PV_SAVE_ALL_CALLER_REGS
++#define PV_RESTORE_ALL_CALLER_REGS \
++ __PV_RESTORE_ALL_CALLER_REGS \
++ "leave;"
++#else
++#define PV_SAVE_ALL_CALLER_REGS __PV_SAVE_ALL_CALLER_REGS
++#define PV_RESTORE_ALL_CALLER_REGS __PV_RESTORE_ALL_CALLER_REGS
++#endif
+
+ #define PV_FLAGS_ARG "0"
+ #define PV_EXTRA_CLOBBERS
+ #define PV_VEXTRA_CLOBBERS
+ #else
+ /* save and restore all caller-save registers, except return value */
+-#define PV_SAVE_ALL_CALLER_REGS \
++#define __PV_SAVE_ALL_CALLER_REGS \
+ "push %rcx;" \
+ "push %rdx;" \
+ "push %rsi;" \
+@@ -787,7 +807,7 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
+ "push %r9;" \
+ "push %r10;" \
+ "push %r11;"
+-#define PV_RESTORE_ALL_CALLER_REGS \
++#define __PV_RESTORE_ALL_CALLER_REGS \
+ "pop %r11;" \
+ "pop %r10;" \
+ "pop %r9;" \
+@@ -797,6 +817,19 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
+ "pop %rdx;" \
+ "pop %rcx;"
+
++#ifdef CONFIG_FRAME_POINTER
++#define PV_SAVE_ALL_CALLER_REGS \
++ "push %rbp;" \
++ "mov %rsp, %rbp;" \
++ __PV_SAVE_ALL_CALLER_REGS
++#define PV_RESTORE_ALL_CALLER_REGS \
++ __PV_RESTORE_ALL_CALLER_REGS \
++ "leaveq;"
++#else
++#define PV_SAVE_ALL_CALLER_REGS __PV_SAVE_ALL_CALLER_REGS
++#define PV_RESTORE_ALL_CALLER_REGS __PV_RESTORE_ALL_CALLER_REGS
++#endif
++
+ /* We save some registers, but all of them, that's too much. We clobber all
+ * caller saved registers but the argument parameter */
+ #define PV_SAVE_REGS "pushq %%rdi;"
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 9357473..3202dcc 100644
--- a/arch/x86/include/asm/paravirt_types.h
@@ -822,10 +1255,10 @@
static inline void
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
-index d5b7e90..8d5e15a 100644
+index d5b7e90..396ff4c 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
-@@ -37,31 +37,10 @@
+@@ -37,31 +37,4 @@
extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info;
@@ -835,14 +1268,12 @@
- XEN_HVM_DOMAIN, /* running in a Xen hvm domain */
-};
-
- #ifdef CONFIG_XEN
+-#ifdef CONFIG_XEN
-extern enum xen_domain_type xen_domain_type;
-+extern void xen_guest_init(void);
- #else
+-#else
-#define xen_domain_type XEN_NATIVE
-+#define xen_guest_init() do { } while (0)
- #endif
-
+-#endif
+-
-#define xen_domain() (xen_domain_type != XEN_NATIVE)
-#define xen_pv_domain() (xen_domain() && \
- xen_domain_type == XEN_PV_DOMAIN)
@@ -1260,7 +1691,7 @@
#include "sleep.h"
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
-index c0ebc63..c8b5021 100644
+index f0fa7a1..0c1876b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -928,7 +928,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
@@ -1462,7 +1893,7 @@
} else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
force_iommu ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
-index dc4f486..dfb14f9 100644
+index dc4f486..7c954ff 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -63,7 +63,12 @@
@@ -1514,7 +1945,7 @@
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
-+ if (xen_domain())
++ if (xen_pv_domain())
+ return xen_pci_setup_msi_irqs(dev, nvec, type);
+
node = dev_to_node(&dev->dev);
@@ -1887,8 +2318,35 @@
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static void kdump_nmi_callback(int cpu, struct die_args *args)
+diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
+index c097e7d..21feb03 100644
+--- a/arch/x86/kernel/entry_32.S
++++ b/arch/x86/kernel/entry_32.S
+@@ -1088,6 +1088,8 @@ ENTRY(xen_failsafe_callback)
+ .previous
+ ENDPROC(xen_failsafe_callback)
+
++BUILD_INTERRUPT(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK)
++
+ #endif /* CONFIG_XEN */
+
+ #ifdef CONFIG_FUNCTION_TRACER
+diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
+index b5c061f..1bf0911 100644
+--- a/arch/x86/kernel/entry_64.S
++++ b/arch/x86/kernel/entry_64.S
+@@ -1364,6 +1364,9 @@ ENTRY(xen_failsafe_callback)
+ CFI_ENDPROC
+ END(xen_failsafe_callback)
+
++apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
++ xen_hvm_callback_vector smp_xen_hvm_callback_vector
++
+ #endif /* CONFIG_XEN */
+
+ /*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
-index 74f5a3f..b69c4e8 100644
+index 74f5a3f..9712ffc 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -98,7 +98,7 @@ static int __init hpet_setup(char *str)
@@ -1900,6 +2358,33 @@
{
boot_hpet_disable = 1;
return 1;
+@@ -949,16 +949,18 @@ fs_initcall(hpet_late_init);
+
+ void hpet_disable(void)
+ {
+- if (is_hpet_capable()) {
+- unsigned long cfg = hpet_readl(HPET_CFG);
++ unsigned int cfg;
+
+- if (hpet_legacy_int_enabled) {
+- cfg &= ~HPET_CFG_LEGACY;
+- hpet_legacy_int_enabled = 0;
+- }
+- cfg &= ~HPET_CFG_ENABLE;
+- hpet_writel(cfg, HPET_CFG);
++ if (!is_hpet_capable() || !hpet_address || !hpet_virt_address)
++ return;
++
++ cfg = hpet_readl(HPET_CFG);
++ if (hpet_legacy_int_enabled) {
++ cfg &= ~HPET_CFG_LEGACY;
++ hpet_legacy_int_enabled = 0;
+ }
++ cfg &= ~HPET_CFG_ENABLE;
++ hpet_writel(cfg, HPET_CFG);
+ }
+
+ #ifdef CONFIG_HPET_EMULATE_RTC
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 99c4d30..919c1a8 100644
--- a/arch/x86/kernel/ioport.c
@@ -2717,7 +3202,7 @@
}
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index d0ba107..0b4f9d1 100644
+index 5fd5b07..11d8667 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -73,16 +73,12 @@ void exit_thread(void)
@@ -2787,10 +3272,18 @@
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
-index 8425f7e..abd6489 100644
+index d7a0888..594e324 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
-@@ -89,6 +89,7 @@
+@@ -70,6 +70,7 @@
+ #include <linux/tboot.h>
+
+ #include <video/edid.h>
++#include <xen/xen.h>
+
+ #include <asm/mtrr.h>
+ #include <asm/apic.h>
+@@ -89,6 +90,7 @@
#include <asm/cacheflush.h>
#include <asm/processor.h>
#include <asm/bugs.h>
@@ -2798,15 +3291,7 @@
#include <asm/system.h>
#include <asm/vsyscall.h>
-@@ -102,6 +103,7 @@
-
- #include <asm/paravirt.h>
- #include <asm/hypervisor.h>
-+#include <asm/xen/hypervisor.h>
-
- #include <asm/percpu.h>
- #include <asm/topology.h>
-@@ -955,6 +957,9 @@ void __init setup_arch(char **cmdline_p)
+@@ -966,6 +968,9 @@ void __init setup_arch(char **cmdline_p)
initmem_init(0, max_pfn);
@@ -2816,11 +3301,11 @@
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
-@@ -1023,6 +1028,7 @@ void __init setup_arch(char **cmdline_p)
+@@ -1034,6 +1039,7 @@ void __init setup_arch(char **cmdline_p)
probe_nr_irqs_gsi();
kvm_guest_init();
-+ xen_guest_init();
++ xen_hvm_guest_init();
e820_reserve_resources();
e820_mark_nosave_regions(max_low_pfn);
@@ -3282,10 +3767,21 @@
+}
+
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
-index b83e119..7675f9b 100644
+index b83e119..3db328f 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
-@@ -36,3 +36,40 @@ config XEN_DEBUG_FS
+@@ -29,6 +29,10 @@ config XEN_SAVE_RESTORE
+ depends on XEN && PM
+ default y
+
++config XEN_SCHED_CLOCK
++ bool
++ default n
++
+ config XEN_DEBUG_FS
+ bool "Enable Xen debug and tuning parameters in debugfs"
+ depends on XEN && DEBUG_FS
+@@ -36,3 +40,40 @@ config XEN_DEBUG_FS
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
@@ -3327,10 +3823,16 @@
+ Enable support for passing PCI devices through to
+ unprivileged domains. (COMPLETELY UNTESTED)
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
-index 3bb4fc2..08ac224 100644
+index 3bb4fc2..13ca65c 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
-@@ -17,4 +17,7 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
+@@ -12,9 +12,12 @@ CFLAGS_mmu.o := $(nostackp)
+
+ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
+ time.o xen-asm.o xen-asm_$(BITS).o \
+- grant-table.o suspend.o
++ grant-table.o suspend.o platform-pci-unplug.o
+
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
@@ -3380,10 +3882,18 @@
+#endif
+}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
-index 3578688..7638cd6 100644
+index 3578688..b20e9c5 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
-@@ -28,12 +28,19 @@
+@@ -11,6 +11,7 @@
+ * Jeremy Fitzhardinge <jeremy at xensource.com>, XenSource Inc, 2007
+ */
+
++#include <linux/cpu.h>
+ #include <linux/kernel.h>
+ #include <linux/init.h>
+ #include <linux/smp.h>
+@@ -28,12 +29,15 @@
#include <linux/highmem.h>
#include <linux/console.h>
@@ -3393,17 +3903,21 @@
#include <xen/interface/physdev.h>
#include <xen/interface/vcpu.h>
+#include <xen/interface/memory.h>
-+#include <xen/interface/hvm/hvm_op.h>
-+#include <xen/interface/hvm/params.h>
-+#include <xen/interface/platform_pci.h>
#include <xen/features.h>
#include <xen/page.h>
+#include <xen/hvm.h>
-+#include <xen/events.h>
#include <xen/hvc-console.h>
#include <asm/paravirt.h>
-@@ -66,6 +73,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
+@@ -53,6 +57,7 @@
+ #include <asm/tlbflush.h>
+ #include <asm/reboot.h>
+ #include <asm/stackprotector.h>
++#include <asm/hypervisor.h>
+
+ #include "xen-ops.h"
+ #include "mmu.h"
+@@ -66,6 +71,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
enum xen_domain_type xen_domain_type = XEN_NATIVE;
EXPORT_SYMBOL_GPL(xen_domain_type);
@@ -3415,17 +3929,48 @@
struct start_info *xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
-@@ -73,6 +85,9 @@ struct shared_info xen_dummy_shared_info;
+@@ -73,6 +83,9 @@ struct shared_info xen_dummy_shared_info;
void *xen_initial_gdt;
-+int xen_have_vector_callback;
-+int unplug;
++__read_mostly int xen_have_vector_callback;
++EXPORT_SYMBOL_GPL(xen_have_vector_callback);
+
/*
* Point at some empty memory to start with. We map the real shared_info
* page as soon as fixmap is up and running.
-@@ -167,13 +182,16 @@ static void __init xen_banner(void)
+@@ -101,13 +114,17 @@ static void xen_vcpu_setup(int cpu)
+ struct vcpu_info *vcpup;
+
+ BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
+- per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+
+- if (!have_vcpu_info_placement)
+- return; /* already tested, not available */
++ if (cpu < MAX_VIRT_CPUS)
++ per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+
+- vcpup = &per_cpu(xen_vcpu_info, cpu);
++ if (!have_vcpu_info_placement) {
++ if (cpu >= MAX_VIRT_CPUS && setup_max_cpus > MAX_VIRT_CPUS)
++ setup_max_cpus = MAX_VIRT_CPUS;
++ return;
++ }
+
++ vcpup = &per_cpu(xen_vcpu_info, cpu);
+ info.mfn = arbitrary_virt_to_mfn(vcpup);
+ info.offset = offset_in_page(vcpup);
+
+@@ -122,6 +139,8 @@ static void xen_vcpu_setup(int cpu)
+ if (err) {
+ printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
+ have_vcpu_info_placement = 0;
++ if (setup_max_cpus > MAX_VIRT_CPUS)
++ setup_max_cpus = MAX_VIRT_CPUS;
+ } else {
+ /* This cpu is using the registered vcpu info, even if
+ later ones fail to. */
+@@ -167,13 +186,16 @@ static void __init xen_banner(void)
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
pv_info.name);
@@ -3444,7 +3989,7 @@
static void xen_cpuid(unsigned int *ax, unsigned int *bx,
unsigned int *cx, unsigned int *dx)
-@@ -187,7 +205,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
+@@ -187,7 +209,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
* unsupported kernel subsystems as possible.
*/
switch (*ax) {
@@ -3453,7 +3998,7 @@
maskecx = cpuid_leaf1_ecx_mask;
maskedx = cpuid_leaf1_edx_mask;
break;
-@@ -196,6 +214,10 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
+@@ -196,6 +218,10 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
/* Suppress extended topology stuff */
maskebx = 0;
break;
@@ -3464,7 +4009,7 @@
}
asm(XEN_EMULATE_PREFIX "cpuid"
-@@ -215,13 +237,15 @@ static __init void xen_init_cpuid_mask(void)
+@@ -215,13 +241,15 @@ static __init void xen_init_cpuid_mask(void)
unsigned int ax, bx, cx, dx;
cpuid_leaf1_edx_mask =
@@ -3484,7 +4029,7 @@
(1 << X86_FEATURE_ACPI)); /* disable ACPI */
ax = 1;
-@@ -406,7 +430,7 @@ static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
+@@ -406,7 +434,7 @@ static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
pte = pfn_pte(pfn, PAGE_KERNEL_RO);
@@ -3493,7 +4038,7 @@
BUG();
frames[f] = mfn;
-@@ -517,13 +541,13 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
+@@ -517,13 +545,13 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
return 0;
#ifdef CONFIG_X86_MCE
} else if (addr == (unsigned long)machine_check) {
@@ -3513,7 +4058,7 @@
#endif /* CONFIG_X86_64 */
info->address = addr;
-@@ -679,6 +703,18 @@ static void xen_set_iopl_mask(unsigned mask)
+@@ -679,6 +707,18 @@ static void xen_set_iopl_mask(unsigned mask)
HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
}
@@ -3532,7 +4077,7 @@
static void xen_io_delay(void)
{
}
-@@ -716,7 +752,7 @@ static u32 xen_safe_apic_wait_icr_idle(void)
+@@ -716,7 +756,7 @@ static u32 xen_safe_apic_wait_icr_idle(void)
return 0;
}
@@ -3541,7 +4086,7 @@
{
apic->read = xen_apic_read;
apic->write = xen_apic_write;
-@@ -728,7 +764,6 @@ static void set_xen_basic_apic_ops(void)
+@@ -728,7 +768,6 @@ static void set_xen_basic_apic_ops(void)
#endif
@@ -3549,7 +4094,7 @@
static void xen_clts(void)
{
struct multicall_space mcs;
-@@ -811,6 +846,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+@@ -811,6 +850,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
Xen console noise. */
break;
@@ -3561,6 +4106,17 @@
default:
ret = native_write_msr_safe(msr, low, high);
}
+@@ -923,10 +967,6 @@ static const struct pv_init_ops xen_init_ops __initdata = {
+ .patch = xen_patch,
+ };
+
+-static const struct pv_time_ops xen_time_ops __initdata = {
+- .sched_clock = xen_sched_clock,
+-};
+-
+ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
+ .cpuid = xen_cpuid,
+
@@ -978,6 +1018,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.load_sp0 = xen_load_sp0,
@@ -3593,7 +4149,7 @@
.shutdown = xen_machine_halt,
.crash_shutdown = xen_crash_shutdown,
.emergency_restart = xen_emergency_restart,
-@@ -1061,6 +1110,8 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1061,10 +1110,11 @@ asmlinkage void __init xen_start_kernel(void)
xen_domain_type = XEN_PV_DOMAIN;
@@ -3602,20 +4158,26 @@
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops = xen_init_ops;
-@@ -1086,6 +1137,12 @@ asmlinkage void __init xen_start_kernel(void)
+- pv_time_ops = xen_time_ops;
+ pv_cpu_ops = xen_cpu_ops;
+ pv_apic_ops = xen_apic_ops;
+
+@@ -1072,13 +1122,7 @@ asmlinkage void __init xen_start_kernel(void)
+ x86_init.oem.arch_setup = xen_arch_setup;
+ x86_init.oem.banner = xen_banner;
+
+- x86_init.timers.timer_init = xen_time_init;
+- x86_init.timers.setup_percpu_clockev = x86_init_noop;
+- x86_cpuinit.setup_percpu_clockev = x86_init_noop;
+-
+- x86_platform.calibrate_tsc = xen_tsc_khz;
+- x86_platform.get_wallclock = xen_get_wallclock;
+- x86_platform.set_wallclock = xen_set_wallclock;
++ xen_init_time_ops();
- xen_init_mmu_ops();
-
-+ /*
-+ * Prevent page tables from being allocated in highmem, even
-+ * if CONFIG_HIGHPTE is enabled.
-+ */
-+ __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
-+
- /* Prevent unwanted bits from being set in PTEs. */
- __supported_pte_mask &= ~_PAGE_GLOBAL;
- if (!xen_initial_domain())
-@@ -1116,6 +1173,10 @@ asmlinkage void __init xen_start_kernel(void)
+ /*
+ * Set up some pagetable state before starting to set any ptes.
+@@ -1116,6 +1160,10 @@ asmlinkage void __init xen_start_kernel(void)
*/
xen_setup_stackprotector();
@@ -3626,7 +4188,7 @@
xen_init_irq_ops();
xen_init_cpuid_mask();
-@@ -1144,6 +1205,8 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1144,6 +1192,8 @@ asmlinkage void __init xen_start_kernel(void)
pgd = (pgd_t *)xen_start_info->pt_base;
@@ -3635,7 +4197,7 @@
/* Don't do the full vcpu_info placement stuff until we have a
possible map and a non-dummy shared_info. */
per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
-@@ -1153,6 +1216,7 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1153,6 +1203,7 @@ asmlinkage void __init xen_start_kernel(void)
xen_raw_console_write("mapping kernel into physical memory\n");
pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
@@ -3643,7 +4205,7 @@
init_mm.pgd = pgd;
-@@ -1162,6 +1226,14 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1162,6 +1213,14 @@ asmlinkage void __init xen_start_kernel(void)
if (xen_feature(XENFEAT_supervisor_mode_kernel))
pv_info.kernel_rpl = 0;
@@ -3658,7 +4220,7 @@
/* set the limit of our address space */
xen_reserve_top();
-@@ -1184,6 +1256,16 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1184,6 +1243,16 @@ asmlinkage void __init xen_start_kernel(void)
add_preferred_console("xenboot", 0, NULL);
add_preferred_console("tty", 0, NULL);
add_preferred_console("hvc", 0, NULL);
@@ -3675,7 +4237,7 @@
}
xen_raw_console_write("about to get started...\n");
-@@ -1197,3 +1279,141 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1197,3 +1266,124 @@ asmlinkage void __init xen_start_kernel(void)
x86_64_start_reservations((char *)__pa_symbol(&boot_params));
#endif
}
@@ -3687,9 +4249,9 @@
+
+ for (base = 0x40000000; base < 0x40010000; base += 0x100) {
+ cpuid(base, &eax, &ebx, &ecx, &edx);
-+ *(uint32_t*)(signature + 0) = ebx;
-+ *(uint32_t*)(signature + 4) = ecx;
-+ *(uint32_t*)(signature + 8) = edx;
++ *(uint32_t *)(signature + 0) = ebx;
++ *(uint32_t *)(signature + 4) = ecx;
++ *(uint32_t *)(signature + 8) = edx;
+ signature[12] = 0;
+
+ if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2))
@@ -3729,8 +4291,9 @@
+ return 0;
+}
+
-+static void init_shared_info(void)
++void xen_hvm_init_shared_info(void)
+{
++ int cpu;
+ struct xen_add_to_physmap xatp;
+ static struct shared_info *shared_info_page = 0;
+
@@ -3745,31 +4308,41 @@
+
+ HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
+
-+ /* Don't do the full vcpu_info placement stuff until we have a
-+ possible map and a non-dummy shared_info. */
-+ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
++ /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
++ * page, we use it in the event channel upcall and in some pvclock
++ * related functions. We don't need the vcpu_info placement
++ * optimizations because we don't use any pv_mmu or pv_irq op on
++ * HVM.
++ * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
++ * online but xen_hvm_init_shared_info is run at resume time too and
++ * in that case multiple vcpus might be online. */
++ for_each_online_cpu(cpu) {
++ per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
++ }
+}
+
-+int xen_set_callback_via(uint64_t via)
++static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
++ unsigned long action, void *hcpu)
+{
-+ struct xen_hvm_param a;
-+
-+ a.domid = DOMID_SELF;
-+ a.index = HVM_PARAM_CALLBACK_IRQ;
-+ a.value = via;
-+ return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
++ int cpu = (long)hcpu;
++ switch (action) {
++ case CPU_UP_PREPARE:
++ per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
++ break;
++ default:
++ break;
++ }
++ return NOTIFY_OK;
+}
+
-+void do_hvm_pv_evtchn_intr(void)
-+{
-+ xen_hvm_evtchn_do_upcall(get_irq_regs());
-+}
++static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = {
++ .notifier_call = xen_hvm_cpu_notify,
++};
+
-+void xen_guest_init(void)
++void __init xen_hvm_guest_init(void)
+{
+ int r;
+ int major, minor;
-+ uint64_t callback_via;
+
+ if (xen_pv_domain())
+ return;
@@ -3778,47 +4351,19 @@
+ if (r < 0)
+ return;
+
-+ init_shared_info();
++ xen_hvm_init_shared_info();
+
-+ if (xen_feature(XENFEAT_hvm_callback_vector)) {
-+ callback_via = HVM_CALLBACK_VECTOR(GENERIC_INTERRUPT_VECTOR);
-+ xen_set_callback_via(callback_via);
-+ generic_interrupt_extension = do_hvm_pv_evtchn_intr;
++ if (xen_feature(XENFEAT_hvm_callback_vector))
+ xen_have_vector_callback = 1;
-+ }
-+ if (unplug) {
-+ /* unplug emulated devices */
-+ outw(UNPLUG_ALL, XEN_IOPORT_UNPLUG);
-+ }
++ register_cpu_notifier(&xen_hvm_cpu_notifier);
++ xen_unplug_emulated_devices();
+ have_vcpu_info_placement = 0;
+ x86_init.irqs.intr_init = xen_init_IRQ;
++ xen_hvm_init_time_ops();
++ xen_hvm_init_mmu_ops();
+}
-+
-+static int __init parse_unplug(char *arg)
-+{
-+ char *p, *q;
-+
-+ for (p = arg; p; p = q) {
-+ q = strchr(arg, ',');
-+ if (q)
-+ *q++ = '\0';
-+ if (!strcmp(p, "all"))
-+ unplug |= UNPLUG_ALL;
-+ else if (!strcmp(p, "ide-disks"))
-+ unplug |= UNPLUG_ALL_IDE_DISKS;
-+ else if (!strcmp(p, "aux-ide-disks"))
-+ unplug |= UNPLUG_AUX_IDE_DISKS;
-+ else if (!strcmp(p, "nics"))
-+ unplug |= UNPLUG_ALL_NICS;
-+ else
-+ printk(KERN_WARNING "unrecognised option '%s' "
-+ "in module parameter 'dev_unplug'\n", p);
-+ }
-+ return 0;
-+}
-+early_param("xen_unplug", parse_unplug);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
-index 350a3de..8c6a858 100644
+index 350a3de..74e284f 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -42,6 +42,7 @@
@@ -3829,7 +4374,7 @@
#include <linux/module.h>
#include <asm/pgtable.h>
-@@ -50,7 +51,10 @@
+@@ -50,14 +51,19 @@
#include <asm/mmu_context.h>
#include <asm/setup.h>
#include <asm/paravirt.h>
@@ -3840,15 +4385,16 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
-@@ -58,6 +62,7 @@
+
#include <xen/page.h>
#include <xen/interface/xen.h>
++#include <xen/interface/hvm/hvm_op.h>
#include <xen/interface/version.h>
+#include <xen/interface/memory.h>
#include <xen/hvc-console.h>
#include "multicalls.h"
-@@ -66,6 +71,13 @@
+@@ -66,6 +72,13 @@
#define MMU_UPDATE_HISTO 30
@@ -3862,7 +4408,7 @@
#ifdef CONFIG_XEN_DEBUG_FS
static struct {
-@@ -184,6 +196,26 @@ static inline unsigned p2m_index(unsigned long pfn)
+@@ -184,6 +197,26 @@ static inline unsigned p2m_index(unsigned long pfn)
return pfn % P2M_ENTRIES_PER_PAGE;
}
@@ -3889,7 +4435,7 @@
/* Build the parallel p2m_top_mfn structures */
void xen_build_mfn_list_list(void)
{
-@@ -315,6 +347,7 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr)
+@@ -315,6 +348,7 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr)
return PFN_DOWN(maddr.maddr);
}
@@ -3897,7 +4443,7 @@
xmaddr_t arbitrary_virt_to_machine(void *vaddr)
{
-@@ -376,6 +409,34 @@ static bool xen_page_pinned(void *ptr)
+@@ -376,6 +410,34 @@ static bool xen_page_pinned(void *ptr)
return PagePinned(page);
}
@@ -3932,7 +4478,7 @@
static void xen_extend_mmu_update(const struct mmu_update *update)
{
struct multicall_space mcs;
-@@ -452,6 +513,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
+@@ -452,6 +514,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval)
{
@@ -3944,7 +4490,7 @@
ADD_STATS(set_pte_at, 1);
// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
ADD_STATS(set_pte_at_current, mm == current->mm);
-@@ -522,9 +588,34 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
+@@ -522,9 +589,34 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
return val;
}
@@ -3980,7 +4526,7 @@
}
PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
-@@ -534,9 +625,62 @@ pgdval_t xen_pgd_val(pgd_t pgd)
+@@ -534,9 +626,62 @@ pgdval_t xen_pgd_val(pgd_t pgd)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
@@ -4044,7 +4590,7 @@
return native_make_pte(pte);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
-@@ -592,6 +736,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)
+@@ -592,6 +737,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)
void xen_set_pte(pte_t *ptep, pte_t pte)
{
@@ -4056,7 +4602,7 @@
ADD_STATS(pte_update, 1);
// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-@@ -608,6 +757,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
+@@ -608,6 +758,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
#ifdef CONFIG_X86_PAE
void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
{
@@ -4068,7 +4614,7 @@
set_64bit((u64 *)ptep, native_pte_val(pte));
}
-@@ -934,8 +1088,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
+@@ -934,8 +1089,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
read-only, and can be pinned. */
static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
{
@@ -4077,7 +4623,7 @@
xen_mc_batch();
if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
-@@ -1219,7 +1371,7 @@ void xen_exit_mmap(struct mm_struct *mm)
+@@ -1219,7 +1372,7 @@ void xen_exit_mmap(struct mm_struct *mm)
spin_lock(&mm->page_table_lock);
/* pgd may not be pinned in the error exit path of execve */
@@ -4086,7 +4632,7 @@
xen_pgd_unpin(mm);
spin_unlock(&mm->page_table_lock);
-@@ -1288,12 +1440,19 @@ static void xen_flush_tlb_single(unsigned long addr)
+@@ -1288,12 +1441,19 @@ static void xen_flush_tlb_single(unsigned long addr)
preempt_enable();
}
@@ -4107,7 +4653,7 @@
} *args;
struct multicall_space mcs;
-@@ -1417,6 +1576,13 @@ static int xen_pgd_alloc(struct mm_struct *mm)
+@@ -1417,6 +1577,13 @@ static int xen_pgd_alloc(struct mm_struct *mm)
return ret;
}
@@ -4121,7 +4667,7 @@
static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
#ifdef CONFIG_X86_64
-@@ -1448,10 +1614,17 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
+@@ -1448,10 +1615,17 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
#ifdef CONFIG_X86_32
static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
{
@@ -4141,7 +4687,7 @@
return pte;
}
-@@ -1517,7 +1690,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
+@@ -1517,7 +1691,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
if (PagePinned(virt_to_page(mm->pgd))) {
SetPagePinned(page);
@@ -4149,7 +4695,7 @@
if (!PageHighMem(page)) {
make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
if (level == PT_PTE && USE_SPLIT_PTLOCKS)
-@@ -1620,6 +1792,7 @@ static void *m2v(phys_addr_t maddr)
+@@ -1620,6 +1793,7 @@ static void *m2v(phys_addr_t maddr)
return __ka(m2p(maddr));
}
@@ -4157,7 +4703,7 @@
static void set_page_prot(void *addr, pgprot_t prot)
{
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
-@@ -1675,6 +1848,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+@@ -1675,6 +1849,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
set_page_prot(pmd, PAGE_KERNEL_RO);
}
@@ -4178,7 +4724,7 @@
#ifdef CONFIG_X86_64
static void convert_pfn_mfn(void *v)
{
-@@ -1766,6 +1953,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+@@ -1766,6 +1954,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
unsigned long max_pfn)
{
pmd_t *kernel_pmd;
@@ -4186,7 +4732,7 @@
max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
xen_start_info->nr_pt_frames * PAGE_SIZE +
-@@ -1777,6 +1965,20 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+@@ -1777,6 +1966,20 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
xen_map_identity_early(level2_kernel_pgt, max_pfn);
memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
@@ -4207,7 +4753,7 @@
set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
-@@ -1799,6 +2001,8 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+@@ -1799,6 +2002,8 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
}
#endif /* CONFIG_X86_64 */
@@ -4216,7 +4762,7 @@
static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
{
pte_t pte;
-@@ -1828,9 +2032,26 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
+@@ -1828,9 +2033,26 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
pte = pfn_pte(phys, prot);
break;
@@ -4244,7 +4790,7 @@
}
__native_set_fixmap(idx, pte);
-@@ -1845,6 +2066,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
+@@ -1845,6 +2067,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#endif
}
@@ -4274,7 +4820,7 @@
static __init void xen_post_allocator_init(void)
{
pv_mmu_ops.set_pte = xen_set_pte;
-@@ -1960,7 +2204,270 @@ void __init xen_init_mmu_ops(void)
+@@ -1960,6 +2205,301 @@ void __init xen_init_mmu_ops(void)
x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;
@@ -4540,11 +5086,52 @@
+ flush_tlb_all();
+
+ return err;
- }
++}
+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
++
++static void xen_hvm_exit_mmap(struct mm_struct *mm)
++{
++ struct xen_hvm_pagetable_dying a;
++ int rc;
++
++ a.domid = DOMID_SELF;
++ a.gpa = __pa(mm->pgd);
++ rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
++ WARN_ON_ONCE(rc < 0);
++}
++
++static int is_pagetable_dying_supported(void)
++{
++ struct xen_hvm_pagetable_dying a;
++ int rc = 0;
++
++ a.domid = DOMID_SELF;
++ a.gpa = 0x00;
++ rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
++ if (rc < 0) {
++ printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
++ return 0;
++ }
++ return 1;
++}
++
++void __init xen_hvm_init_mmu_ops(void)
++{
++ if (is_pagetable_dying_supported())
++ pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
+ }
#ifdef CONFIG_XEN_DEBUG_FS
-
+diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
+index 5fe6bc7..fa938c4 100644
+--- a/arch/x86/xen/mmu.h
++++ b/arch/x86/xen/mmu.h
+@@ -60,4 +60,5 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+ unsigned long xen_read_cr2_direct(void);
+
+ extern void xen_init_mmu_ops(void);
++extern void xen_hvm_init_mmu_ops(void);
+ #endif /* _XEN_MMU_H */
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
new file mode 100644
index 0000000..4d55524
@@ -4605,7 +5192,7 @@
+}
diff --git a/arch/x86/xen/pci.c b/arch/x86/xen/pci.c
new file mode 100644
-index 0000000..3def132
+index 0000000..8ca31f1
--- /dev/null
+++ b/arch/x86/xen/pci.c
@@ -0,0 +1,296 @@
@@ -4634,7 +5221,7 @@
+ int shareable = 0;
+ char *name;
+
-+ if (!xen_domain())
++ if (!xen_pv_domain())
+ return -1;
+
+ if (triggering == ACPI_EDGE_SENSITIVE) {
@@ -4672,7 +5259,7 @@
+ int rc, irq;
+ struct physdev_setup_gsi setup_gsi;
+
-+ if (!xen_domain())
++ if (!xen_pv_domain())
+ return -1;
+
+ printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
@@ -4905,6 +5492,147 @@
+ return 0;
+}
+EXPORT_SYMBOL(xen_unregister_device_domain_owner);
+diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
+new file mode 100644
+index 0000000..2f7f3fb
+--- /dev/null
++++ b/arch/x86/xen/platform-pci-unplug.c
+@@ -0,0 +1,135 @@
++/******************************************************************************
++ * platform-pci-unplug.c
++ *
++ * Xen platform PCI device driver
++ * Copyright (c) 2010, Citrix
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++ * Place - Suite 330, Boston, MA 02111-1307 USA.
++ *
++ */
++
++#include <linux/init.h>
++#include <linux/io.h>
++#include <linux/module.h>
++
++#include <xen/platform_pci.h>
++
++#define XEN_PLATFORM_ERR_MAGIC -1
++#define XEN_PLATFORM_ERR_PROTOCOL -2
++#define XEN_PLATFORM_ERR_BLACKLIST -3
++
++/* store the value of xen_emul_unplug after the unplug is done */
++int xen_platform_pci_unplug;
++EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
++static int xen_emul_unplug;
++
++static int __init check_platform_magic(void)
++{
++ short magic;
++ char protocol;
++
++ magic = inw(XEN_IOPORT_MAGIC);
++ if (magic != XEN_IOPORT_MAGIC_VAL) {
++ printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n");
++ return XEN_PLATFORM_ERR_MAGIC;
++ }
++
++ protocol = inb(XEN_IOPORT_PROTOVER);
++
++ printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n",
++ protocol);
++
++ switch (protocol) {
++ case 1:
++ outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM);
++ outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER);
++ if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) {
++ printk(KERN_ERR "Xen Platform: blacklisted by host\n");
++ return XEN_PLATFORM_ERR_BLACKLIST;
++ }
++ break;
++ default:
++ printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version");
++ return XEN_PLATFORM_ERR_PROTOCOL;
++ }
++
++ return 0;
++}
++
++void __init xen_unplug_emulated_devices(void)
++{
++ int r;
++
++ /* check the version of the xen platform PCI device */
++ r = check_platform_magic();
++ /* If the version matches enable the Xen platform PCI driver.
++ * Also enable the Xen platform PCI driver if the version is really old
++ * and the user told us to ignore it. */
++ if (r && !(r == XEN_PLATFORM_ERR_MAGIC &&
++ (xen_emul_unplug & XEN_UNPLUG_IGNORE)))
++ return;
++ /* Set the default value of xen_emul_unplug depending on whether or
++ * not the Xen PV frontends and the Xen platform PCI driver have
++ * been compiled for this kernel (modules or built-in are both OK). */
++ if (!xen_emul_unplug) {
++ if (xen_must_unplug_nics()) {
++ printk(KERN_INFO "Netfront and the Xen platform PCI driver have "
++ "been compiled for this kernel: unplug emulated NICs.\n");
++ xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
++ }
++ if (xen_must_unplug_disks()) {
++ printk(KERN_INFO "Blkfront and the Xen platform PCI driver have "
++ "been compiled for this kernel: unplug emulated disks.\n"
++ "You might have to change the root device\n"
++ "from /dev/hd[a-d] to /dev/xvd[a-d]\n"
++ "in your root= kernel command line option\n");
++ xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS;
++ }
++ }
++ /* Now unplug the emulated devices */
++ if (!(xen_emul_unplug & XEN_UNPLUG_IGNORE))
++ outw(xen_emul_unplug, XEN_IOPORT_UNPLUG);
++ xen_platform_pci_unplug = xen_emul_unplug;
++}
++
++static int __init parse_xen_emul_unplug(char *arg)
++{
++ char *p, *q;
++ int l;
++
++ for (p = arg; p; p = q) {
++ q = strchr(p, ',');
++ if (q) {
++ l = q - p;
++ q++;
++ } else {
++ l = strlen(p);
++ }
++ if (!strncmp(p, "all", l))
++ xen_emul_unplug |= XEN_UNPLUG_ALL;
++ else if (!strncmp(p, "ide-disks", l))
++ xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS;
++ else if (!strncmp(p, "aux-ide-disks", l))
++ xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS;
++ else if (!strncmp(p, "nics", l))
++ xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
++ else if (!strncmp(p, "ignore", l))
++ xen_emul_unplug |= XEN_UNPLUG_IGNORE;
++ else
++ printk(KERN_WARNING "unrecognised option '%s' "
++ "in parameter 'xen_emul_unplug'\n", p);
++ }
++ return 0;
++}
++early_param("xen_emul_unplug", parse_xen_emul_unplug);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index ad0047f..f008629 100644
--- a/arch/x86/xen/setup.c
@@ -5123,13 +5851,84 @@
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* make sure interrupts start blocked */
+diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
+index a9c6611..1d789d5 100644
+--- a/arch/x86/xen/suspend.c
++++ b/arch/x86/xen/suspend.c
+@@ -26,6 +26,18 @@ void xen_pre_suspend(void)
+ BUG();
+ }
+
++void xen_hvm_post_suspend(int suspend_cancelled)
++{
++ int cpu;
++ xen_hvm_init_shared_info();
++ xen_callback_vector();
++ if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
++ for_each_online_cpu(cpu) {
++ xen_setup_runstate_info(cpu);
++ }
++ }
++}
++
+ void xen_post_suspend(int suspend_cancelled)
+ {
+ xen_build_mfn_list_list();
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
-index 9d1f853..af5463a 100644
+index 9d1f853..ca8efdb 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
-@@ -239,8 +239,22 @@ unsigned long xen_get_wallclock(void)
+@@ -19,6 +19,7 @@
+ #include <asm/xen/hypercall.h>
+
+ #include <xen/events.h>
++#include <xen/features.h>
+ #include <xen/interface/xen.h>
+ #include <xen/interface/vcpu.h>
+
+@@ -154,12 +155,13 @@ static void do_stolen_accounting(void)
+ account_idle_ticks(ticks);
+ }
+
++#ifdef CONFIG_XEN_SCHED_CLOCK
+ /*
+ * Xen sched_clock implementation. Returns the number of unstolen
+ * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
+ * states.
+ */
+-unsigned long long xen_sched_clock(void)
++static unsigned long long xen_sched_clock(void)
+ {
+ struct vcpu_runstate_info state;
+ cycle_t now;
+@@ -191,10 +193,10 @@ unsigned long long xen_sched_clock(void)
+
+ return ret;
+ }
+-
++#endif
+
+ /* Get the TSC speed from Xen */
+-unsigned long xen_tsc_khz(void)
++static unsigned long xen_tsc_khz(void)
+ {
+ struct pvclock_vcpu_time_info *info =
+ &HYPERVISOR_shared_info->vcpu_info[0].time;
+@@ -229,7 +231,7 @@ static void xen_read_wallclock(struct timespec *ts)
+ put_cpu_var(xen_vcpu);
+ }
+
+-unsigned long xen_get_wallclock(void)
++static unsigned long xen_get_wallclock(void)
+ {
+ struct timespec ts;
- int xen_set_wallclock(unsigned long now)
+@@ -237,10 +239,24 @@ unsigned long xen_get_wallclock(void)
+ return ts.tv_sec;
+ }
+
+-int xen_set_wallclock(unsigned long now)
++static int xen_set_wallclock(unsigned long now)
{
+ struct xen_platform_op op;
+ int rc;
@@ -5151,8 +5950,80 @@
}
static struct clocksource xen_clocksource __read_mostly = {
-diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
-new file mode 100644
+@@ -442,6 +458,8 @@ void xen_setup_timer(int cpu)
+
+ evt->cpumask = cpumask_of(cpu);
+ evt->irq = irq;
++
++ xen_setup_runstate_info(cpu);
+ }
+
+ void xen_teardown_timer(int cpu)
+@@ -472,7 +490,7 @@ void xen_timer_resume(void)
+ }
+ }
+
+-__init void xen_time_init(void)
++static __init void xen_time_init(void)
+ {
+ int cpu = smp_processor_id();
+
+@@ -496,3 +514,53 @@ __init void xen_time_init(void)
+ xen_setup_timer(cpu);
+ xen_setup_cpu_clockevents();
+ }
++
++static const struct pv_time_ops xen_time_ops __initdata = {
++#ifdef CONFIG_XEN_SCHED_CLOCK
++ .sched_clock = xen_sched_clock,
++#else
++ .sched_clock = xen_clocksource_read,
++#endif
++};
++
++__init void xen_init_time_ops(void)
++{
++ pv_time_ops = xen_time_ops;
++
++ x86_init.timers.timer_init = xen_time_init;
++ x86_init.timers.setup_percpu_clockev = x86_init_noop;
++ x86_cpuinit.setup_percpu_clockev = x86_init_noop;
++
++ x86_platform.calibrate_tsc = xen_tsc_khz;
++ x86_platform.get_wallclock = xen_get_wallclock;
++ x86_platform.set_wallclock = xen_set_wallclock;
++}
++
++static void xen_hvm_setup_cpu_clockevents(void)
++{
++ int cpu = smp_processor_id();
++ xen_setup_runstate_info(cpu);
++ xen_setup_timer(cpu);
++ xen_setup_cpu_clockevents();
++}
++
++__init void xen_hvm_init_time_ops(void)
++{
++ /* vector callback is needed otherwise we cannot receive interrupts
++ * on cpu > 0 */
++ if (!xen_have_vector_callback && num_present_cpus() > 1)
++ return;
++ if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
++ printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
++ "disable pv timer\n");
++ return;
++ }
++
++ pv_time_ops = xen_time_ops;
++ x86_init.timers.setup_percpu_clockev = xen_time_init;
++ x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
++
++ x86_platform.calibrate_tsc = xen_tsc_khz;
++ x86_platform.get_wallclock = xen_get_wallclock;
++ x86_platform.set_wallclock = xen_set_wallclock;
++}
+diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
+new file mode 100644
index 0000000..1cd7f4d
--- /dev/null
+++ b/arch/x86/xen/vga.c
@@ -5225,7 +6096,7 @@
+ }
+}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
-index f9153a3..1c1eff4 100644
+index f9153a3..03e97f5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -30,6 +30,9 @@ void xen_setup_machphys_mapping(void);
@@ -5238,7 +6109,32 @@
char * __init xen_memory_setup(void);
void __init xen_arch_setup(void);
-@@ -82,6 +85,23 @@ static inline void xen_uninit_lock_cpu(int cpu)
+@@ -38,6 +41,10 @@ void xen_enable_sysenter(void);
+ void xen_enable_syscall(void);
+ void xen_vcpu_restore(void);
+
++void xen_callback_vector(void);
++void xen_hvm_init_shared_info(void);
++void __init xen_unplug_emulated_devices(void);
++
+ void __init xen_build_dynamic_phys_to_machine(void);
+
+ void xen_init_irq_ops(void);
+@@ -46,11 +53,8 @@ void xen_setup_runstate_info(int cpu);
+ void xen_teardown_timer(int cpu);
+ cycle_t xen_clocksource_read(void);
+ void xen_setup_cpu_clockevents(void);
+-unsigned long xen_tsc_khz(void);
+-void __init xen_time_init(void);
+-unsigned long xen_get_wallclock(void);
+-int xen_set_wallclock(unsigned long time);
+-unsigned long long xen_sched_clock(void);
++void __init xen_init_time_ops(void);
++void __init xen_hvm_init_time_ops(void);
+
+ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
+
+@@ -82,6 +86,23 @@ static inline void xen_uninit_lock_cpu(int cpu)
}
#endif
@@ -5373,7 +6269,7 @@
status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control);
if (ACPI_FAILURE(status)) {
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
-index ec742a4..4ccecf6 100644
+index ec742a4..492a899 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -58,6 +58,7 @@
@@ -5432,7 +6328,23 @@
{
if (acpi_device_dir(device)) {
-@@ -711,7 +710,7 @@ static int acpi_processor_get_info(struct acpi_device *device)
+@@ -408,15 +407,6 @@ static int acpi_processor_remove_fs(struct acpi_device *device)
+
+ return 0;
+ }
+-#else
+-static inline int acpi_processor_add_fs(struct acpi_device *device)
+-{
+- return 0;
+-}
+-static inline int acpi_processor_remove_fs(struct acpi_device *device)
+-{
+- return 0;
+-}
+ #endif
+
+ /* Use the acpiid in MADT to map cpus in case of SMP */
+@@ -711,7 +701,7 @@ static int acpi_processor_get_info(struct acpi_device *device)
static DEFINE_PER_CPU(void *, processor_device_array);
@@ -5441,7 +6353,7 @@
{
struct acpi_processor *pr = acpi_driver_data(device);
int saved;
-@@ -879,7 +878,7 @@ err_free_cpumask:
+@@ -879,7 +869,7 @@ err_free_cpumask:
return result;
}
@@ -5450,7 +6362,7 @@
{
struct acpi_processor *pr = NULL;
-@@ -1154,7 +1153,11 @@ static int __init acpi_processor_init(void)
+@@ -1154,7 +1144,11 @@ static int __init acpi_processor_init(void)
if (result < 0)
goto out_proc;
@@ -5463,7 +6375,7 @@
if (result < 0)
goto out_cpuidle;
-@@ -1190,7 +1193,10 @@ static void __exit acpi_processor_exit(void)
+@@ -1190,7 +1184,10 @@ static void __exit acpi_processor_exit(void)
acpi_processor_uninstall_hotplug_notify();
@@ -6179,7 +7091,7 @@
+ acpi_bus_unregister_driver(&xen_acpi_processor_driver);
+}
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
-index 7c85265..882ed92 100644
+index 9ed9292..3770a02 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -19,6 +19,8 @@
@@ -6235,10 +7147,10 @@
This driver implements the front-end of the Xen virtual
block device driver. It communicates with a back-end driver
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
-index b8578bb..75f730b 100644
+index b8578bb..89adac5 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
-@@ -42,6 +42,7 @@
+@@ -42,10 +42,12 @@
#include <linux/module.h>
#include <linux/scatterlist.h>
@@ -6246,7 +7158,12 @@
#include <xen/xenbus.h>
#include <xen/grant_table.h>
#include <xen/events.h>
-@@ -76,6 +77,7 @@ static const struct block_device_operations xlvbd_block_fops;
+ #include <xen/page.h>
++#include <xen/platform_pci.h>
+
+ #include <xen/interface/grant_table.h>
+ #include <xen/interface/io/blkif.h>
+@@ -76,6 +78,7 @@ static const struct block_device_operations xlvbd_block_fops;
*/
struct blkfront_info
{
@@ -6254,7 +7171,7 @@
struct xenbus_device *xbdev;
struct gendisk *gd;
int vdevice;
-@@ -92,16 +94,14 @@ struct blkfront_info
+@@ -92,16 +95,14 @@ struct blkfront_info
unsigned long shadow_free;
int feature_barrier;
int is_ready;
@@ -6275,7 +7192,7 @@
#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
#define GRANT_INVALID_REF 0
-@@ -136,6 +136,55 @@ static void add_id_to_freelist(struct blkfront_info *info,
+@@ -136,6 +137,55 @@ static void add_id_to_freelist(struct blkfront_info *info,
info->shadow_free = id;
}
@@ -6331,7 +7248,7 @@
static void blkif_restart_queue_callback(void *arg)
{
struct blkfront_info *info = (struct blkfront_info *)arg;
-@@ -416,9 +465,14 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+@@ -416,9 +466,14 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
if ((minor % nr_parts) == 0)
nr_minors = nr_parts;
@@ -6347,7 +7264,7 @@
offset = minor / nr_parts;
-@@ -449,7 +503,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+@@ -449,7 +504,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
if (xlvbd_init_blk_queue(gd, sector_size)) {
del_gendisk(gd);
@@ -6356,7 +7273,7 @@
}
info->rq = gd->queue;
-@@ -469,10 +523,45 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+@@ -469,10 +524,45 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
return 0;
@@ -6402,7 +7319,7 @@
static void kick_pending_request_queues(struct blkfront_info *info)
{
if (!RING_FULL(&info->ring)) {
-@@ -650,7 +739,7 @@ fail:
+@@ -650,7 +740,7 @@ fail:
/* Common code used when first setting up, and when resuming. */
@@ -6411,7 +7328,7 @@
struct blkfront_info *info)
{
const char *message = NULL;
-@@ -710,7 +799,6 @@ again:
+@@ -710,7 +800,6 @@ again:
return err;
}
@@ -6419,7 +7336,29 @@
/**
* Entry point to this code when a new device is created. Allocate the basic
* structures and the ring buffer for communication with the backend, and
-@@ -742,6 +830,7 @@ static int blkfront_probe(struct xenbus_device *dev,
+@@ -736,12 +825,29 @@ static int blkfront_probe(struct xenbus_device *dev,
+ }
+ }
+
++ /* no unplug has been done: do not hook devices != xen vbds */
++ if (xen_hvm_domain() && (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE)) {
++ int major;
++
++ if (!VDEV_IS_EXTENDED(vdevice))
++ major = BLKIF_MAJOR(vdevice);
++ else
++ major = XENVBD_MAJOR;
++
++ if (major != XENVBD_MAJOR) {
++ printk(KERN_INFO
++ "%s: HVM does not support vbd %d as xen block device\n",
++ __FUNCTION__, vdevice);
++ return -ENODEV;
++ }
++ }
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
return -ENOMEM;
}
@@ -6427,7 +7366,7 @@
info->xbdev = dev;
info->vdevice = vdevice;
info->connected = BLKIF_STATE_DISCONNECTED;
-@@ -755,7 +844,7 @@ static int blkfront_probe(struct xenbus_device *dev,
+@@ -755,7 +861,7 @@ static int blkfront_probe(struct xenbus_device *dev,
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
dev_set_drvdata(&dev->dev, info);
@@ -6436,7 +7375,7 @@
if (err) {
kfree(info);
dev_set_drvdata(&dev->dev, NULL);
-@@ -850,13 +939,50 @@ static int blkfront_resume(struct xenbus_device *dev)
+@@ -850,13 +956,50 @@ static int blkfront_resume(struct xenbus_device *dev)
blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
@@ -6488,7 +7427,7 @@
/*
* Invoked when the backend is finally 'ready' (and has told produced
-@@ -869,10 +995,29 @@ static void blkfront_connect(struct blkfront_info *info)
+@@ -869,10 +1012,29 @@ static void blkfront_connect(struct blkfront_info *info)
unsigned int binfo;
int err;
@@ -6520,7 +7459,7 @@
dev_dbg(&info->xbdev->dev, "%s:%s.\n",
__func__, info->xbdev->otherend);
-@@ -915,57 +1060,21 @@ static void blkfront_connect(struct blkfront_info *info)
+@@ -915,57 +1077,21 @@ static void blkfront_connect(struct blkfront_info *info)
}
/**
@@ -6582,7 +7521,7 @@
case XenbusStateUnknown:
case XenbusStateClosed:
break;
-@@ -975,35 +1084,56 @@ static void backend_changed(struct xenbus_device *dev,
+@@ -975,35 +1101,56 @@ static void backend_changed(struct xenbus_device *dev,
break;
case XenbusStateClosing:
@@ -6659,7 +7598,7 @@
return 0;
}
-@@ -1012,30 +1142,68 @@ static int blkfront_is_ready(struct xenbus_device *dev)
+@@ -1012,30 +1159,68 @@ static int blkfront_is_ready(struct xenbus_device *dev)
{
struct blkfront_info *info = dev_get_drvdata(&dev->dev);
@@ -6742,7 +7681,7 @@
return 0;
}
-@@ -1061,7 +1229,7 @@ static struct xenbus_driver blkfront = {
+@@ -1061,7 +1246,7 @@ static struct xenbus_driver blkfront = {
.probe = blkfront_probe,
.remove = blkfront_remove,
.resume = blkfront_resume,
@@ -7210,7 +8149,7 @@
help
The network device frontend driver allows the kernel to
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
-index baa051d..ee7465a 100644
+index baa051d..328fe40 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -42,6 +42,7 @@
@@ -7221,7 +8160,139 @@
#include <xen/xenbus.h>
#include <xen/events.h>
#include <xen/page.h>
-@@ -1393,7 +1394,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
+@@ -58,6 +59,19 @@ struct netfront_cb {
+ unsigned offset;
+ };
+
++#define MICRO_SECOND 1000000UL
++#define NANO_SECOND 1000000000UL
++#define DEFAULT_SMART_POLL_FREQ 1000UL
++
++struct netfront_smart_poll {
++ struct hrtimer timer;
++ struct net_device *netdev;
++ unsigned int smart_poll_freq;
++ unsigned int feature_smart_poll;
++ unsigned int active;
++ unsigned long counter;
++};
++
+ #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
+
+ #define RX_COPY_THRESHOLD 256
+@@ -104,7 +118,7 @@ struct netfront_info {
+
+ /* Receive-ring batched refills. */
+ #define RX_MIN_TARGET 8
+-#define RX_DFL_MIN_TARGET 64
++#define RX_DFL_MIN_TARGET 80
+ #define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+ unsigned rx_min_target, rx_max_target, rx_target;
+ struct sk_buff_head rx_batch;
+@@ -118,6 +132,8 @@ struct netfront_info {
+ unsigned long rx_pfn_array[NET_RX_RING_SIZE];
+ struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
+ struct mmu_update rx_mmu[NET_RX_RING_SIZE];
++
++ struct netfront_smart_poll smart_poll;
+ };
+
+ struct netfront_rx_info {
+@@ -337,15 +353,17 @@ static int xennet_open(struct net_device *dev)
+ return 0;
+ }
+
+-static void xennet_tx_buf_gc(struct net_device *dev)
++static int xennet_tx_buf_gc(struct net_device *dev)
+ {
+ RING_IDX cons, prod;
++ RING_IDX cons_begin, cons_end;
+ unsigned short id;
+ struct netfront_info *np = netdev_priv(dev);
+ struct sk_buff *skb;
+
+ BUG_ON(!netif_carrier_ok(dev));
+
++ cons_begin = np->tx.rsp_cons;
+ do {
+ prod = np->tx.sring->rsp_prod;
+ rmb(); /* Ensure we see responses up to 'rp'. */
+@@ -390,7 +408,11 @@ static void xennet_tx_buf_gc(struct net_device *dev)
+ mb(); /* update shared area */
+ } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
+
++ cons_end = np->tx.rsp_cons;
++
+ xennet_maybe_wake_tx(dev);
++
++ return (cons_begin == cons_end);
+ }
+
+ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
+@@ -1305,6 +1327,50 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+ return 0;
+ }
+
++static enum hrtimer_restart smart_poll_function(struct hrtimer *timer)
++{
++ struct netfront_smart_poll *psmart_poll;
++ struct net_device *dev;
++ struct netfront_info *np;
++ unsigned long flags;
++ unsigned int tx_active = 0, rx_active = 0;
++
++ psmart_poll = container_of(timer, struct netfront_smart_poll, timer);
++ dev = psmart_poll->netdev;
++ np = netdev_priv(dev);
++
++ spin_lock_irqsave(&np->tx_lock, flags);
++ np->smart_poll.counter++;
++
++ if (likely(netif_carrier_ok(dev))) {
++ tx_active = !(xennet_tx_buf_gc(dev));
++ /* Under tx_lock: protects access to rx shared-ring indexes. */
++ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) {
++ rx_active = 1;
++ napi_schedule(&np->napi);
++ }
++ }
++
++ np->smart_poll.active |= (tx_active || rx_active);
++ if (np->smart_poll.counter %
++ (np->smart_poll.smart_poll_freq / 10) == 0) {
++ if (!np->smart_poll.active) {
++ np->rx.sring->private.netif.smartpoll_active = 0;
++ goto end;
++ }
++ np->smart_poll.active = 0;
++ }
++
++ if (np->rx.sring->private.netif.smartpoll_active)
++ hrtimer_start(timer,
++ ktime_set(0, NANO_SECOND/psmart_poll->smart_poll_freq),
++ HRTIMER_MODE_REL);
++
++end:
++ spin_unlock_irqrestore(&np->tx_lock, flags);
++ return HRTIMER_NORESTART;
++}
++
+ static irqreturn_t xennet_interrupt(int irq, void *dev_id)
+ {
+ struct net_device *dev = dev_id;
+@@ -1320,6 +1386,11 @@ static irqreturn_t xennet_interrupt(int irq, void *dev_id)
+ napi_schedule(&np->napi);
+ }
+
++ if (np->smart_poll.feature_smart_poll)
++ hrtimer_start(&np->smart_poll.timer,
++ ktime_set(0, NANO_SECOND/np->smart_poll.smart_poll_freq),
++ HRTIMER_MODE_REL);
++
+ spin_unlock_irqrestore(&np->tx_lock, flags);
+
+ return IRQ_HANDLED;
+@@ -1393,7 +1464,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
}
/* Common code used when first setting up, and when resuming. */
@@ -7230,16 +8301,45 @@
struct netfront_info *info)
{
const char *message;
-@@ -1543,7 +1544,7 @@ static int xennet_connect(struct net_device *dev)
+@@ -1456,6 +1527,12 @@ again:
+ goto abort_transaction;
+ }
+
++ err = xenbus_printf(xbt, dev->nodename, "feature-smart-poll", "%d", 1);
++ if (err) {
++ message = "writing feature-smart-poll";
++ goto abort_transaction;
++ }
++
+ err = xenbus_transaction_end(xbt, 0);
+ if (err) {
+ if (err == -EAGAIN)
+@@ -1543,7 +1620,23 @@ static int xennet_connect(struct net_device *dev)
return -ENODEV;
}
- err = talk_to_backend(np->xbdev, np);
++ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
++ "feature-smart-poll", "%u",
++ &np->smart_poll.feature_smart_poll);
++ if (err != 1)
++ np->smart_poll.feature_smart_poll = 0;
++
++ if (np->smart_poll.feature_smart_poll) {
++ hrtimer_init(&np->smart_poll.timer, CLOCK_MONOTONIC,
++ HRTIMER_MODE_REL);
++ np->smart_poll.timer.function = smart_poll_function;
++ np->smart_poll.netdev = dev;
++ np->smart_poll.smart_poll_freq = DEFAULT_SMART_POLL_FREQ;
++ np->smart_poll.active = 0;
++ np->smart_poll.counter = 0;
++ }
++
+ err = talk_to_netback(np->xbdev, np);
if (err)
return err;
-@@ -1597,7 +1598,7 @@ static int xennet_connect(struct net_device *dev)
+@@ -1597,7 +1690,7 @@ static int xennet_connect(struct net_device *dev)
/**
* Callback received when the backend's state changes.
*/
@@ -7248,7 +8348,7 @@
enum xenbus_state backend_state)
{
struct netfront_info *np = dev_get_drvdata(&dev->dev);
-@@ -1608,6 +1609,8 @@ static void backend_changed(struct xenbus_device *dev,
+@@ -1608,6 +1701,8 @@ static void backend_changed(struct xenbus_device *dev,
switch (backend_state) {
case XenbusStateInitialising:
case XenbusStateInitialised:
@@ -7257,7 +8357,38 @@
case XenbusStateConnected:
case XenbusStateUnknown:
case XenbusStateClosed:
-@@ -1798,7 +1801,7 @@ static struct xenbus_driver netfront_driver = {
+@@ -1627,12 +1722,30 @@ static void backend_changed(struct xenbus_device *dev,
+ }
+ }
+
++static int xennet_get_coalesce(struct net_device *netdev,
++ struct ethtool_coalesce *ec)
++{
++ struct netfront_info *np = netdev_priv(netdev);
++ ec->rx_coalesce_usecs = MICRO_SECOND / np->smart_poll.smart_poll_freq;
++ return 0;
++}
++
++static int xennet_set_coalesce(struct net_device *netdev,
++ struct ethtool_coalesce *ec)
++{
++ struct netfront_info *np = netdev_priv(netdev);
++ np->smart_poll.smart_poll_freq = MICRO_SECOND / ec->rx_coalesce_usecs;
++ return 0;
++}
++
+ static const struct ethtool_ops xennet_ethtool_ops =
+ {
+ .set_tx_csum = ethtool_op_set_tx_csum,
+ .set_sg = xennet_set_sg,
+ .set_tso = xennet_set_tso,
+ .get_link = ethtool_op_get_link,
++ .get_coalesce = xennet_get_coalesce,
++ .set_coalesce = xennet_set_coalesce,
+ };
+
+ #ifdef CONFIG_SYSFS
+@@ -1798,7 +1911,7 @@ static struct xenbus_driver netfront_driver = {
.probe = netfront_probe,
.remove = __devexit_p(xennet_remove),
.resume = netfront_resume,
@@ -7365,6 +8496,32 @@
dma_ops = &intel_dma_ops;
init_iommu_sysfs();
+diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
+index e03fe98..f9db891 100644
+--- a/drivers/pci/iov.c
++++ b/drivers/pci/iov.c
+@@ -706,6 +706,21 @@ irqreturn_t pci_sriov_migration(struct pci_dev *dev)
+ }
+ EXPORT_SYMBOL_GPL(pci_sriov_migration);
+
++/**
++ * pci_num_vf - return number of VFs associated with a PF device_release_driver
++ * @dev: the PCI device
++ *
++ * Returns number of VFs, or 0 if SR-IOV is not enabled.
++ */
++int pci_num_vf(struct pci_dev *dev)
++{
++ if (!dev || !dev->is_physfn)
++ return 0;
++ else
++ return dev->sriov->nr_virtfn;
++}
++EXPORT_SYMBOL_GPL(pci_num_vf);
++
+ static int ats_alloc_one(struct pci_dev *dev, int ps)
+ {
+ int pos;
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index f9cf317..a77a46f 100644
--- a/drivers/pci/msi.c
@@ -7679,10 +8836,10 @@
+
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
new file mode 100644
-index 0000000..360eccf
+index 0000000..76d0bdd
--- /dev/null
+++ b/drivers/pci/xen-pcifront.c
-@@ -0,0 +1,1156 @@
+@@ -0,0 +1,1157 @@
+/*
+ * PCI Frontend Xenbus Setup - handles setup with backend (imports page/evtchn)
+ *
@@ -7727,7 +8884,6 @@
+struct pcifront_device {
+ struct xenbus_device *xdev;
+ struct list_head root_buses;
-+ spinlock_t dev_lock;
+
+ int evtchn;
+ int gnt_ref;
@@ -8084,7 +9240,7 @@
+ r = &dev->resource[i];
+
+ if (!r->parent && r->start && r->flags) {
-+ dev_dbg(&pdev->xdev->dev, "claiming resource %s/%d\n",
++ dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
+ pci_name(dev), i);
+ if (pci_claim_resource(dev, i)) {
+ dev_err(&pdev->xdev->dev, "Could not claim "
@@ -8098,6 +9254,36 @@
+ return 0;
+}
+
++int __devinit pcifront_scan_bus(struct pcifront_device *pdev,
++ unsigned int domain, unsigned int bus,
++ struct pci_bus *b)
++{
++ struct pci_dev *d;
++ unsigned int devfn;
++ int err;
++
++ /* Scan the bus for functions and add.
++ * We omit handling of PCI bridge attachment because pciback prevents
++ * bridges from being exported.
++ */
++ for (devfn = 0; devfn < 0x100; devfn++) {
++ d = pci_get_slot(b, devfn);
++ if (d) {
++ /* Device is already known. */
++ pci_dev_put(d);
++ continue;
++ }
++
++ d = pci_scan_single_device(b, devfn);
++ if (d)
++ dev_info(&pdev->xdev->dev, "New device on "
++ "%04x:%02x:%02x.%02x found.\n", domain, bus,
++ PCI_SLOT(devfn), PCI_FUNC(devfn));
++ }
++
++ return 0;
++}
++
+int __devinit pcifront_scan_root(struct pcifront_device *pdev,
+ unsigned int domain, unsigned int bus)
+{
@@ -8142,12 +9328,17 @@
+
+ list_add(&bus_entry->list, &pdev->root_buses);
+
++ /* pci_scan_bus_parented skips devices which do not have a have
++ * devfn==0. The pcifront_scan_bus enumerates all devfn. */
++ err = pcifront_scan_bus(pdev, domain, bus, b);
++
+ /* Claim resources before going "live" with our devices */
+ pci_walk_bus(b, pcifront_claim_resource, pdev);
+
++ /* Create SysFS and notify udev of the devices. Aka: "going live" */
+ pci_bus_add_devices(b);
+
-+ return 0;
++ return err;
+
+err_out:
+ kfree(bus_entry);
@@ -8159,10 +9350,8 @@
+int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
+ unsigned int domain, unsigned int bus)
+{
-+ struct pci_bus *b;
-+ struct pci_dev *d;
-+ unsigned int devfn;
+ int err;
++ struct pci_bus *b;
+
+#ifndef CONFIG_PCI_DOMAINS
+ if (domain != 0) {
@@ -8182,33 +9371,15 @@
+ /* If the bus is unknown, create it. */
+ return pcifront_scan_root(pdev, domain, bus);
+
-+ /* Rescan the bus for newly attached functions and add.
-+ * We omit handling of PCI bridge attachment because pciback prevents
-+ * bridges from being exported.
-+ */
-+ for (devfn = 0; devfn < 0x100; devfn++) {
-+ d = pci_get_slot(b, devfn);
-+ if (d) {
-+ /* Device is already known. */
-+ pci_dev_put(d);
-+ continue;
-+ }
++ err = pcifront_scan_bus(pdev, domain, bus, b);
+
-+ d = pci_scan_single_device(b, devfn);
-+ if (d) {
-+ dev_info(&pdev->xdev->dev, "New device on "
-+ "%04x:%02x:%02x.%02x found.\n", domain, bus,
-+ PCI_SLOT(devfn), PCI_FUNC(devfn));
-+ err = pci_bus_add_device(d);
-+ if (err) {
-+ dev_err(&pdev->xdev->dev, "Failed to add "
-+ " device to bus.\n");
-+ return err;
-+ }
-+ }
-+ }
++ /* Claim resources before going "live" with our devices */
++ pci_walk_bus(b, pcifront_claim_resource, pdev);
+
-+ return 0;
++ /* Create SysFS and notify udev of the devices. Aka: "going live" */
++ pci_bus_add_devices(b);
++
++ return err;
+}
+
+static void free_root_bus_devs(struct pci_bus *bus)
@@ -8397,7 +9568,6 @@
+
+ INIT_LIST_HEAD(&pdev->root_buses);
+
-+ spin_lock_init(&pdev->dev_lock);
+ spin_lock_init(&pdev->sh_info_lock);
+
+ pdev->evtchn = INVALID_EVTCHN;
@@ -8508,7 +9678,6 @@
+ char str[64];
+ unsigned int domain, bus;
+
-+ spin_lock(&pdev->dev_lock);
+
+ /* Only connect once */
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
@@ -8564,11 +9733,8 @@
+ }
+
+ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
-+ if (err)
-+ goto out;
+
+out:
-+ spin_unlock(&pdev->dev_lock);
+ return err;
+}
+
@@ -8577,7 +9743,6 @@
+ int err = 0;
+ enum xenbus_state prev_state;
+
-+ spin_lock(&pdev->dev_lock);
+
+ prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
+
@@ -8592,7 +9757,6 @@
+ err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
+
+out:
-+ spin_unlock(&pdev->dev_lock);
+
+ return err;
+}
@@ -8604,8 +9768,6 @@
+ unsigned int domain, bus;
+ char str[64];
+
-+ spin_lock(&pdev->dev_lock);
-+
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+ XenbusStateReconfiguring)
+ goto out;
@@ -8654,7 +9816,6 @@
+ xenbus_switch_state(pdev->xdev, XenbusStateConnected);
+
+out:
-+ spin_unlock(&pdev->dev_lock);
+ return err;
+}
+
@@ -8667,8 +9828,6 @@
+ struct pci_dev *pci_dev;
+ char str[64];
+
-+ spin_lock(&pdev->dev_lock);
-+
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+ XenbusStateConnected)
+ goto out;
@@ -8739,7 +9898,6 @@
+ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
+
+out:
-+ spin_unlock(&pdev->dev_lock);
+ return err;
+}
+
@@ -8948,7 +10106,7 @@
/* Nothing to do if running in dom0. */
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
-index cab100a..c63eeae 100644
+index cab100a..a3e1923 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -28,6 +28,110 @@ config XEN_DEV_EVTCHN
@@ -9062,7 +10220,7 @@
config XENFS
tristate "Xen filesystem"
depends on XEN
-@@ -60,4 +164,36 @@ config XEN_SYS_HYPERVISOR
+@@ -60,4 +164,37 @@ config XEN_SYS_HYPERVISOR
Create entries under /sys/hypervisor describing the Xen
hypervisor environment. When running native or in another
virtual environment, /sys/hypervisor will still be present,
@@ -9070,15 +10228,6 @@
\ No newline at end of file
+ but will have no xen contents.
+
-+config XEN_PLATFORM_PCI
-+ tristate "xen platform pci device driver"
-+ depends on XEN
-+ help
-+ Driver for the Xen PCI Platform device: it is responsible for
-+ initializing xenbus and grant_table when running in a Xen HVM
-+ domain. As a consequence this driver is required to run any Xen PV
-+ frontend on Xen HVM.
-+
+config XEN_MCE
+ def_bool y
+ depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL
@@ -9101,8 +10250,18 @@
+ tristate
+ depends on XEN_DOM0 && ACPI_PROCESSOR && CPU_FREQ
+ default y
++
++config XEN_PLATFORM_PCI
++ tristate "xen platform pci device driver"
++ depends on XEN
++ default m
++ help
++ Driver for the Xen PCI Platform device: it is responsible for
++ initializing xenbus and grant_table when running in a Xen HVM
++ domain. As a consequence this driver is required to run any Xen PV
++ frontend on Xen HVM.
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
-index 7c28434..5771359 100644
+index 7c28434..ef1ea63 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,12 +1,27 @@
@@ -9132,12 +10291,12 @@
+obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/
+obj-$(CONFIG_XENFS) += xenfs/
+obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
-+obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
+obj-$(CONFIG_XEN_MCE) += mce.o
+
+obj-$(CONFIG_XEN_S3) += acpi.o
+obj-$(CONFIG_ACPI_PROCESSOR_XEN) += acpi_processor.o
+obj-$(CONFIG_ACPI_HOTPLUG_MEMORY) += xen_acpi_memhotplug.o
++obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
+
+xen-evtchn-y := evtchn.o
+xen-gntdev-y := gntdev.o
@@ -9594,7 +10753,7 @@
+subsys_initcall(xen_acpi_processor_extcntl_init);
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
-index 4204336..d7c0eae 100644
+index 4204336..a5ac75b 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -43,6 +43,7 @@
@@ -9764,8 +10923,9 @@
static int decrease_reservation(unsigned long nr_pages)
{
- unsigned long pfn, i, flags;
+- struct page *page;
+ unsigned long pfn, lpfn, mfn, i, j, flags;
- struct page *page;
++ struct page *page = NULL;
int need_sleep = 0;
- int ret;
+ int discontig, discontig_free;
@@ -9785,7 +10945,7 @@
nr_pages = i;
need_sleep = 1;
break;
-@@ -282,37 +321,50 @@ static int decrease_reservation(unsigned long nr_pages)
+@@ -282,37 +321,52 @@ static int decrease_reservation(unsigned long nr_pages)
frame_list[i] = pfn_to_mfn(pfn);
scrub_page(page);
@@ -9819,6 +10979,8 @@
+ discontig_free = 1;
+
+ set_phys_to_machine(lpfn, INVALID_P2M_ENTRY);
++ page = pfn_to_page(lpfn);
++
+ if (!PageHighMem(page)) {
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(lpfn << PAGE_SHIFT),
@@ -9850,7 +11012,7 @@
return need_sleep;
}
-@@ -379,7 +431,7 @@ static void watch_target(struct xenbus_watch *watch,
+@@ -379,7 +433,7 @@ static void watch_target(struct xenbus_watch *watch,
/* The given memory/target value is in KiB, so it needs converting to
* pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
*/
@@ -9859,22 +11021,22 @@
}
static int balloon_init_watcher(struct notifier_block *notifier,
-@@ -405,9 +457,12 @@ static int __init balloon_init(void)
+@@ -405,9 +459,12 @@ static int __init balloon_init(void)
if (!xen_pv_domain())
return -ENODEV;
- pr_info("xen_balloon: Initialising balloon driver.\n");
+ pr_info("xen_balloon: Initialising balloon driver with page order %d.\n",
+ balloon_order);
-+
-+ balloon_npages = 1 << balloon_order;
- balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
++ balloon_npages = 1 << balloon_order;
++
+ balloon_stats.current_pages = (min(xen_start_info->nr_pages, max_pfn)) >> balloon_order;
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
-@@ -420,7 +475,7 @@ static int __init balloon_init(void)
+@@ -420,7 +477,7 @@ static int __init balloon_init(void)
register_balloon(&balloon_sysdev);
/* Initialise the balloon with excess memory space. */
@@ -9883,7 +11045,7 @@
page = pfn_to_page(pfn);
if (!PageReserved(page))
balloon_append(page);
-@@ -444,6 +499,121 @@ static void balloon_exit(void)
+@@ -444,6 +501,121 @@ static void balloon_exit(void)
module_exit(balloon_exit);
@@ -10005,7 +11167,7 @@
#define BALLOON_SHOW(name, format, args...) \
static ssize_t show_##name(struct sys_device *dev, \
struct sysdev_attribute *attr, \
-@@ -477,7 +647,7 @@ static ssize_t store_target_kb(struct sys_device *dev,
+@@ -477,7 +649,7 @@ static ssize_t store_target_kb(struct sys_device *dev,
target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
@@ -10014,7 +11176,7 @@
return count;
}
-@@ -491,7 +661,7 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr
+@@ -491,7 +663,7 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr
{
return sprintf(buf, "%llu\n",
(unsigned long long)balloon_stats.target_pages
@@ -10023,7 +11185,7 @@
}
static ssize_t store_target(struct sys_device *dev,
-@@ -507,7 +677,7 @@ static ssize_t store_target(struct sys_device *dev,
+@@ -507,7 +679,7 @@ static ssize_t store_target(struct sys_device *dev,
target_bytes = memparse(buf, &endchar);
@@ -11410,10 +12572,10 @@
+}
diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c
new file mode 100644
-index 0000000..c31e5c4
+index 0000000..a0534fc
--- /dev/null
+++ b/drivers/xen/blkback/xenbus.c
-@@ -0,0 +1,546 @@
+@@ -0,0 +1,553 @@
+/* Xenbus code for blkif backend
+ Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
+ Copyright (C) 2005 XenSource Ltd
@@ -11507,6 +12669,13 @@
+ return;
+ }
+
++ err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
++ if (err) {
++ xenbus_dev_error(blkif->be->dev, err, "block flush");
++ return;
++ }
++ invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
++
+ blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
+ if (IS_ERR(blkif->xenblkd)) {
+ err = PTR_ERR(blkif->xenblkd);
@@ -11962,19 +13131,19 @@
+}
diff --git a/drivers/xen/blktap/Makefile b/drivers/xen/blktap/Makefile
new file mode 100644
-index 0000000..99ff53c
+index 0000000..822b4e4
--- /dev/null
+++ b/drivers/xen/blktap/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_XEN_BLKDEV_TAP) := blktap.o
+
-+blktap-objs := control.o ring.o wait_queue.o device.o request.o sysfs.o
++blktap-objs := control.o ring.o device.o request.o sysfs.o
diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h
new file mode 100644
-index 0000000..db4cf02
+index 0000000..33603cd
--- /dev/null
+++ b/drivers/xen/blktap/blktap.h
-@@ -0,0 +1,253 @@
+@@ -0,0 +1,231 @@
+#ifndef _BLKTAP_H_
+#define _BLKTAP_H_
+
@@ -11986,8 +13155,6 @@
+#include <xen/blkif.h>
+#include <xen/grant_table.h>
+
-+//#define ENABLE_PASSTHROUGH
-+
+extern int blktap_debug_level;
+
+#define BTPRINTK(level, tag, force, _f, _a...) \
@@ -12008,26 +13175,17 @@
+#define BLKTAP_RING_FD 2
+#define BLKTAP_RING_VMA 3
+#define BLKTAP_DEVICE 4
-+#define BLKTAP_PAUSE_REQUESTED 6
-+#define BLKTAP_PAUSED 7
+#define BLKTAP_SHUTDOWN_REQUESTED 8
+#define BLKTAP_PASSTHROUGH 9
-+#define BLKTAP_DEFERRED 10
+
+/* blktap IOCTLs: */
+#define BLKTAP2_IOCTL_KICK_FE 1
+#define BLKTAP2_IOCTL_ALLOC_TAP 200
+#define BLKTAP2_IOCTL_FREE_TAP 201
+#define BLKTAP2_IOCTL_CREATE_DEVICE 202
-+#define BLKTAP2_IOCTL_SET_PARAMS 203
-+#define BLKTAP2_IOCTL_PAUSE 204
-+#define BLKTAP2_IOCTL_REOPEN 205
-+#define BLKTAP2_IOCTL_RESUME 206
+
+#define BLKTAP2_MAX_MESSAGE_LEN 256
+
-+#define BLKTAP2_RING_MESSAGE_PAUSE 1
-+#define BLKTAP2_RING_MESSAGE_RESUME 2
+#define BLKTAP2_RING_MESSAGE_CLOSE 3
+
+#define BLKTAP_REQUEST_FREE 0
@@ -12098,8 +13256,6 @@
+ unsigned long ring_vstart;
+ unsigned long user_vstart;
+
-+ int response;
-+
+ wait_queue_head_t poll_wait;
+
+ dev_t devno;
@@ -12145,8 +13301,6 @@
+
+ struct blktap_params params;
+
-+ struct rw_semaphore tap_sem;
-+
+ struct blktap_ring ring;
+ struct blktap_device device;
+
@@ -12155,7 +13309,6 @@
+ struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+
+ wait_queue_head_t wq;
-+ struct list_head deferred_queue;
+
+ struct blktap_statistics stats;
+};
@@ -12184,8 +13337,6 @@
+int blktap_ring_free(void);
+int blktap_ring_create(struct blktap *);
+int blktap_ring_destroy(struct blktap *);
-+int blktap_ring_pause(struct blktap *);
-+int blktap_ring_resume(struct blktap *);
+void blktap_ring_kick_user(struct blktap *);
+
+int blktap_sysfs_init(void);
@@ -12197,8 +13348,7 @@
+void blktap_device_free(void);
+int blktap_device_create(struct blktap *);
+int blktap_device_destroy(struct blktap *);
-+int blktap_device_pause(struct blktap *);
-+int blktap_device_resume(struct blktap *);
++int blktap_device_run_queue(struct blktap *);
+void blktap_device_restart(struct blktap *);
+void blktap_device_finish_request(struct blktap *,
+ struct blkif_response *,
@@ -12209,9 +13359,6 @@
+ unsigned, unsigned);
+#endif
+
-+void blktap_defer(struct blktap *);
-+void blktap_run_deferred(void);
-+
+int blktap_request_pool_init(void);
+void blktap_request_pool_free(void);
+int blktap_request_pool_grow(void);
@@ -12230,10 +13377,10 @@
+#endif
diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c
new file mode 100644
-index 0000000..a4852f7
+index 0000000..6a3f3e1
--- /dev/null
+++ b/drivers/xen/blktap/control.c
-@@ -0,0 +1,284 @@
+@@ -0,0 +1,266 @@
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/miscdevice.h>
@@ -12256,7 +13403,6 @@
+
+ memset(tap, 0, sizeof(*tap));
+ set_bit(BLKTAP_CONTROL, &tap->dev_inuse);
-+ init_rwsem(&tap->tap_sem);
+ init_waitqueue_head(&tap->wq);
+ atomic_set(&tap->refcnt, 0);
+ sg_init_table(tap->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
@@ -12400,46 +13546,29 @@
+blktap_control_destroy_device(struct blktap *tap)
+{
+ int err;
-+ unsigned long inuse;
+
+ if (!tap)
+ return 0;
+
+ set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse);
+
-+ for (;;) {
-+ inuse = tap->dev_inuse;
-+ err = blktap_device_destroy(tap);
-+ if (err)
-+ goto wait;
++ err = blktap_device_destroy(tap);
++ if (err)
++ return err;
+
-+ inuse = tap->dev_inuse;
-+ err = blktap_ring_destroy(tap);
-+ if (err)
-+ goto wait;
++ err = blktap_sysfs_destroy(tap);
++ if (err)
++ return err;
+
-+ inuse = tap->dev_inuse;
-+ err = blktap_sysfs_destroy(tap);
-+ if (err)
-+ goto wait;
++ err = blktap_ring_destroy(tap);
++ if (err)
++ return err;
+
-+ break;
++ clear_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse);
++ clear_bit(BLKTAP_CONTROL, &tap->dev_inuse);
++ wake_up(&tap->wq);
+
-+ wait:
-+ BTDBG("inuse: 0x%lx, dev_inuse: 0x%lx\n",
-+ inuse, tap->dev_inuse);
-+ if (wait_event_interruptible(tap->wq, tap->dev_inuse != inuse))
-+ break;
-+ }
-+
-+ clear_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse);
-+
-+ if (tap->dev_inuse == (1UL << BLKTAP_CONTROL)) {
-+ err = 0;
-+ clear_bit(BLKTAP_CONTROL, &tap->dev_inuse);
-+ }
-+
-+ return err;
++ return 0;
+}
+
+static int __init
@@ -12485,7 +13614,7 @@
+{
+ int err;
+
-+ if (!xen_domain())
++ if (!xen_pv_domain())
+ return -ENODEV;
+
+ err = blktap_request_pool_init();
@@ -12520,10 +13649,10 @@
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c
new file mode 100644
-index 0000000..a50b622
+index 0000000..3feaa03
--- /dev/null
+++ b/drivers/xen/blktap/device.c
-@@ -0,0 +1,1138 @@
+@@ -0,0 +1,931 @@
+#include <linux/version.h> /* XXX Remove uses of VERSION instead. */
+#include <linux/fs.h>
+#include <linux/blkdev.h>
@@ -12591,7 +13720,7 @@
+
+ dev->users--;
+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
-+ blktap_device_destroy(tap);
++ blktap_control_destroy_device(tap);
+
+ return 0;
+}
@@ -12623,26 +13752,6 @@
+ command, (long)argument, inode->i_rdev);
+
+ switch (command) {
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
-+ case HDIO_GETGEO: {
-+ struct hd_geometry geo;
-+ int ret;
-+
-+ if (!argument)
-+ return -EINVAL;
-+
-+ geo.start = get_start_sect(bd);
-+ ret = blktap_device_getgeo(bd, &geo);
-+ if (ret)
-+ return ret;
-+
-+ if (copy_to_user((struct hd_geometry __user *)argument, &geo,
-+ sizeof(geo)))
-+ return -EFAULT;
-+
-+ return 0;
-+ }
-+#endif
+ case CDROMMULTISESSION:
+ BTDBG("FIXME: support multisession CDs later\n");
+ for (i = 0; i < sizeof(struct cdrom_multisession); i++)
@@ -12675,9 +13784,7 @@
+ .open = blktap_device_open,
+ .release = blktap_device_release,
+ .ioctl = blktap_device_ioctl,
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+ .getgeo = blktap_device_getgeo
-+#endif
+};
+
+static int
@@ -12738,9 +13845,6 @@
+ BUG_ON(ret);
+}
+
-+/*
-+ * tap->tap_sem held on entry
-+ */
+static void
+blktap_device_fast_flush(struct blktap *tap, struct blktap_request *request)
+{
@@ -12828,9 +13932,6 @@
+ request->nr_pages << PAGE_SHIFT, NULL);
+}
+
-+/*
-+ * tap->tap_sem held on entry
-+ */
+static void
+blktap_unmap(struct blktap *tap, struct blktap_request *request)
+{
@@ -12838,7 +13939,6 @@
+ unsigned long kvaddr;
+
+ usr_idx = request->usr_idx;
-+ down_write(&tap->ring.vma->vm_mm->mmap_sem);
+
+ for (i = 0; i < request->nr_pages; i++) {
+ kvaddr = request_to_kaddr(request, i);
@@ -12856,13 +13956,17 @@
+ }
+ }
+
-+ blktap_device_fast_flush(tap, request);
-+ up_write(&tap->ring.vma->vm_mm->mmap_sem);
++ if (blktap_active(tap)) {
++ down_write(&tap->ring.vma->vm_mm->mmap_sem);
++ blktap_device_fast_flush(tap, request);
++ up_write(&tap->ring.vma->vm_mm->mmap_sem);
++ }
+}
+
+/*
+ * called if the tapdisk process dies unexpectedly.
+ * fail and release any pending requests and disable queue.
++ * may be called from non-tapdisk context.
+ */
+void
+blktap_device_fail_pending_requests(struct blktap *tap)
@@ -12875,8 +13979,6 @@
+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
+ return;
+
-+ down_write(&tap->tap_sem);
-+
+ dev = &tap->device;
+ for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
+ request = tap->pending_requests[usr_idx];
@@ -12894,8 +13996,6 @@
+ blktap_request_free(tap, request);
+ }
+
-+ up_write(&tap->tap_sem);
-+
+ spin_lock_irq(&dev->lock);
+
+ /* fail any future requests */
@@ -12905,9 +14005,6 @@
+ spin_unlock_irq(&dev->lock);
+}
+
-+/*
-+ * tap->tap_sem held on entry
-+ */
+void
+blktap_device_finish_request(struct blktap *tap,
+ struct blkif_response *res,
@@ -13116,9 +14213,6 @@
+ err = -1;
+ memset(&table, 0, sizeof(table));
+
-+ if (!blktap_active(tap))
-+ goto out;
-+
+ ring = &tap->ring;
+ usr_idx = request->usr_idx;
+ blkif_req.id = usr_idx;
@@ -13207,142 +14301,43 @@
+ return err;
+}
+
-+#ifdef ENABLE_PASSTHROUGH
-+#define rq_for_each_bio_safe(_bio, _tmp, _req) \
-+ if ((_req)->bio) \
-+ for (_bio = (_req)->bio; \
-+ _bio && ((_tmp = _bio->bi_next) || 1); \
-+ _bio = _tmp)
-+
-+static void
-+blktap_device_forward_request(struct blktap *tap, struct request *req)
-+{
-+ struct bio *bio, *tmp;
-+ struct blktap_device *dev;
-+
-+ dev = &tap->device;
-+
-+ rq_for_each_bio_safe(bio, tmp, req) {
-+ bio->bi_bdev = dev->bdev;
-+ submit_bio(bio->bi_rw, bio);
-+ }
-+}
-+
-+static void
-+blktap_device_close_bdev(struct blktap *tap)
-+{
-+ struct blktap_device *dev;
-+
-+ dev = &tap->device;
-+
-+ if (dev->bdev)
-+ blkdev_put(dev->bdev);
-+
-+ dev->bdev = NULL;
-+ clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
-+}
-+
-+static int
-+blktap_device_open_bdev(struct blktap *tap, u32 pdev)
-+{
-+ struct block_device *bdev;
-+ struct blktap_device *dev;
-+
-+ dev = &tap->device;
-+
-+ bdev = open_by_devnum(pdev, FMODE_WRITE);
-+ if (IS_ERR(bdev)) {
-+ BTERR("opening device %x:%x failed: %ld\n",
-+ MAJOR(pdev), MINOR(pdev), PTR_ERR(bdev));
-+ return PTR_ERR(bdev);
-+ }
-+
-+ if (!bdev->bd_disk) {
-+ BTERR("device %x:%x doesn't exist\n",
-+ MAJOR(pdev), MINOR(pdev));
-+ blkdev_put(dev->bdev);
-+ return -ENOENT;
-+ }
-+
-+ dev->bdev = bdev;
-+ set_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
-+
-+ /* TODO: readjust queue parameters */
-+
-+ BTINFO("set device %d to passthrough on %x:%x\n",
-+ tap->minor, MAJOR(pdev), MINOR(pdev));
-+
-+ return 0;
-+}
-+
-+int
-+blktap_device_enable_passthrough(struct blktap *tap,
-+ unsigned major, unsigned minor)
-+{
-+ u32 pdev;
-+ struct blktap_device *dev;
-+
-+ dev = &tap->device;
-+ pdev = MKDEV(major, minor);
-+
-+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+ return -EINVAL;
-+
-+ if (dev->bdev) {
-+ if (pdev)
-+ return -EINVAL;
-+ blktap_device_close_bdev(tap);
-+ return 0;
-+ }
-+
-+ return blktap_device_open_bdev(tap, pdev);
-+}
-+#endif
-+
+/*
-+ * dev->lock held on entry
++ * called from tapdisk context
+ */
-+static void
++int
+blktap_device_run_queue(struct blktap *tap)
+{
-+ int queued, err;
++ int err, rv;
+ struct request_queue *rq;
+ struct request *req;
+ struct blktap_ring *ring;
+ struct blktap_device *dev;
+ struct blktap_request *request;
+
-+ queued = 0;
+ ring = &tap->ring;
+ dev = &tap->device;
+ rq = dev->gd->queue;
+
+ BTDBG("running queue for %d\n", tap->minor);
++ spin_lock_irq(&dev->lock);
+
+ while ((req = blk_peek_request(rq)) != NULL) {
+ if (!blk_fs_request(req)) {
++ blk_start_request(req);
+ __blk_end_request_cur(req, 0);
+ continue;
+ }
+
+ if (blk_barrier_rq(req)) {
++ blk_start_request(req);
+ __blk_end_request_cur(req, 0);
+ continue;
+ }
+
-+#ifdef ENABLE_PASSTHROUGH
-+ if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
-+ blkdev_dequeue_request(req);
-+ blktap_device_forward_request(tap, req);
-+ continue;
-+ }
-+#endif
-+
+ if (RING_FULL(&ring->ring)) {
+ wait:
+ /* Avoid pointless unplugs. */
+ blk_stop_queue(rq);
-+ blktap_defer(tap);
+ break;
+ }
+
@@ -13362,27 +14357,26 @@
+ blk_start_request(req);
+
+ spin_unlock_irq(&dev->lock);
-+ down_read(&tap->tap_sem);
+
+ err = blktap_device_process_request(tap, request, req);
-+ if (!err)
-+ queued++;
-+ else {
++ if (err) {
+ blktap_device_end_dequeued_request(dev, req, -EIO);
+ blktap_request_free(tap, request);
+ }
+
-+ up_read(&tap->tap_sem);
+ spin_lock_irq(&dev->lock);
+ }
+
-+ if (queued)
-+ blktap_ring_kick_user(tap);
++ spin_unlock_irq(&dev->lock);
++
++ rv = ring->ring.req_prod_pvt -
++ ring->ring.sring->req_prod;
++
++ RING_PUSH_REQUESTS(&ring->ring);
++
++ return rv;
+}
+
-+/*
-+ * dev->lock held on entry
-+ */
+static void
+blktap_device_do_request(struct request_queue *rq)
+{
@@ -13398,17 +14392,11 @@
+ if (!blktap_active(tap))
+ goto fail;
+
-+ if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
-+ test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
-+ blktap_defer(tap);
-+ return;
-+ }
-+
-+ blktap_device_run_queue(tap);
++ blktap_ring_kick_user(tap);
+ return;
+
+fail:
-+ while ((req = blk_peek_request(rq))) {
++ while ((req = blk_fetch_request(rq))) {
+ BTERR("device closed: failing secs %llu - %llu\n",
+ (unsigned long long)blk_rq_pos(req),
+ (unsigned long long)blk_rq_pos(req) + blk_rq_sectors(req));
@@ -13422,18 +14410,6 @@
+ struct blktap_device *dev;
+
+ dev = &tap->device;
-+
-+ if (blktap_active(tap) && RING_FULL(&tap->ring.ring)) {
-+ blktap_defer(tap);
-+ return;
-+ }
-+
-+ if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
-+ test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
-+ blktap_defer(tap);
-+ return;
-+ }
-+
+ spin_lock_irq(&dev->lock);
+
+ /* Re-enable calldowns. */
@@ -13485,52 +14461,6 @@
+}
+
+int
-+blktap_device_resume(struct blktap *tap)
-+{
-+ int err;
-+
-+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
-+ return -ENODEV;
-+
-+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+ return 0;
-+
-+ err = blktap_ring_resume(tap);
-+ if (err)
-+ return err;
-+
-+ /* device size may have changed */
-+ blktap_device_configure(tap);
-+
-+ BTDBG("restarting device\n");
-+ blktap_device_restart(tap);
-+
-+ return 0;
-+}
-+
-+int
-+blktap_device_pause(struct blktap *tap)
-+{
-+ unsigned long flags;
-+ struct blktap_device *dev = &tap->device;
-+
-+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
-+ return -ENODEV;
-+
-+ if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+ return 0;
-+
-+ spin_lock_irqsave(&dev->lock, flags);
-+
-+ blk_stop_queue(dev->gd->queue);
-+ set_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
-+
-+ spin_unlock_irqrestore(&dev->lock, flags);
-+
-+ return blktap_ring_pause(tap);
-+}
-+
-+int
+blktap_device_destroy(struct blktap *tap)
+{
+ struct blktap_device *dev = &tap->device;
@@ -13541,8 +14471,11 @@
+
+ BTINFO("destroy device %d users %d\n", tap->minor, dev->users);
+
-+ if (dev->users)
++ if (dev->users) {
++ blktap_device_fail_pending_requests(tap);
++ blktap_device_restart(tap);
+ return -EBUSY;
++ }
+
+ spin_lock_irq(&dev->lock);
+ /* No more blktap_device_do_request(). */
@@ -13551,17 +14484,10 @@
+ dev->gd = NULL;
+ spin_unlock_irq(&dev->lock);
+
-+#ifdef ENABLE_PASSTHROUGH
-+ if (dev->bdev)
-+ blktap_device_close_bdev(tap);
-+#endif
-+
+ del_gendisk(gd);
+ blk_cleanup_queue(gd->queue);
+ put_disk(gd);
+
-+ wake_up(&tap->wq);
-+
+ return 0;
+}
+
@@ -13609,11 +14535,7 @@
+ if (!rq)
+ goto error;
+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
+ elevator_init(rq, "noop");
-+#else
-+ elevator_init(rq, &elevator_noop);
-+#endif
+
+ gd->queue = rq;
+ rq->queuedata = dev;
@@ -13664,10 +14586,10 @@
+}
diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c
new file mode 100644
-index 0000000..770736a
+index 0000000..4efd013
--- /dev/null
+++ b/drivers/xen/blktap/request.c
-@@ -0,0 +1,297 @@
+@@ -0,0 +1,295 @@
+#include <linux/spinlock.h>
+#include <xen/balloon.h>
+#include <linux/sched.h>
@@ -13908,12 +14830,10 @@
+ list_add(&request->free_list, &pool.free_list);
+ atomic_dec(&handle->bucket->reqs_in_use);
+ free = atomic_dec_and_test(&pool.reqs_in_use);
++ tap->pending_cnt--;
+
+ spin_unlock_irqrestore(&pool.lock, flags);
+
-+ if (--tap->pending_cnt == 0)
-+ wake_up_interruptible(&tap->wq);
-+
+ if (free)
+ wake_up(&pool.wait_queue);
+}
@@ -13967,10 +14887,10 @@
+}
diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c
new file mode 100644
-index 0000000..74a7aa7
+index 0000000..d7d0c79
--- /dev/null
+++ b/drivers/xen/blktap/ring.c
-@@ -0,0 +1,615 @@
+@@ -0,0 +1,477 @@
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
@@ -14003,7 +14923,7 @@
+ */
+#define RING_PAGES 1
+
-+static int
++static void
+blktap_read_ring(struct blktap *tap)
+{
+ /* This is called to read responses from the ring. */
@@ -14013,13 +14933,9 @@
+ struct blktap_ring *ring;
+ struct blktap_request *request;
+
-+ down_read(&tap->tap_sem);
-+
+ ring = &tap->ring;
-+ if (!ring->vma) {
-+ up_read(&tap->tap_sem);
-+ return 0;
-+ }
++ if (!ring->vma)
++ return;
+
+ /* for each outstanding message on the ring */
+ rp = ring->ring.sring->rsp_prod;
@@ -14027,7 +14943,6 @@
+
+ for (rc = ring->ring.rsp_cons; rc != rp; rc++) {
+ memcpy(&res, RING_GET_RESPONSE(&ring->ring, rc), sizeof(res));
-+ mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
+ ++ring->ring.rsp_cons;
+
+ usr_idx = (int)res.id;
@@ -14043,11 +14958,9 @@
+ blktap_device_finish_request(tap, &res, request);
+ }
+
-+ up_read(&tap->tap_sem);
-+
-+ blktap_run_deferred();
+
-+ return 0;
++ blktap_device_restart(tap);
++ return;
+}
+
+static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -14136,51 +15049,22 @@
+}
+
+static void
-+blktap_ring_vm_unmap(struct vm_area_struct *vma)
-+{
-+ struct blktap *tap = vma_to_blktap(vma);
-+
-+ down_write(&tap->tap_sem);
-+ clear_bit(BLKTAP_RING_VMA, &tap->dev_inuse);
-+ clear_bit(BLKTAP_PAUSED, &tap->dev_inuse);
-+ clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
-+ up_write(&tap->tap_sem);
-+}
-+
-+static void
+blktap_ring_vm_close(struct vm_area_struct *vma)
+{
+ struct blktap *tap = vma_to_blktap(vma);
+ struct blktap_ring *ring = &tap->ring;
+
-+ blktap_ring_vm_unmap(vma); /* fail future requests */
-+ blktap_device_fail_pending_requests(tap); /* fail pending requests */
-+ blktap_device_restart(tap); /* fail deferred requests */
-+
-+ down_write(&tap->tap_sem);
-+
-+ zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
-+
-+ kfree(ring->foreign_map.map);
-+ ring->foreign_map.map = NULL;
-+
-+ /* Free the ring page. */
-+ ClearPageReserved(virt_to_page(ring->ring.sring));
-+ free_page((unsigned long)ring->ring.sring);
-+
+ BTINFO("unmapping ring %d\n", tap->minor);
-+ ring->ring.sring = NULL;
++ zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
++ clear_bit(BLKTAP_RING_VMA, &tap->dev_inuse);
+ ring->vma = NULL;
+
-+ up_write(&tap->tap_sem);
-+
-+ wake_up(&tap->wq);
++ blktap_control_destroy_device(tap);
+}
+
+static struct vm_operations_struct blktap_ring_vm_operations = {
+ .close = blktap_ring_vm_close,
-+ .unmap = blktap_ring_vm_unmap,
-+ .fault = blktap_ring_fault,
++ .fault = blktap_ring_fault,
+ .zap_pte = blktap_ring_clear_pte,
+};
+
@@ -14203,6 +15087,9 @@
+ if (!test_bit(BLKTAP_CONTROL, &tap->dev_inuse))
+ return -ENODEV;
+
++ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
++ return -EBUSY;
++
+ /* Only one process can access ring at a time */
+ if (test_and_set_bit(BLKTAP_RING_FD, &tap->dev_inuse))
+ return -EBUSY;
@@ -14221,7 +15108,9 @@
+ BTINFO("freeing device %d\n", tap->minor);
+ clear_bit(BLKTAP_RING_FD, &tap->dev_inuse);
+ filp->private_data = NULL;
-+ wake_up(&tap->wq);
++
++ blktap_control_destroy_device(tap);
++
+ return 0;
+}
+
@@ -14328,6 +15217,8 @@
+ free_page((unsigned long)sring);
+ kfree(map);
+
++ clear_bit(BLKTAP_RING_VMA, &tap->dev_inuse);
++
+ return -ENOMEM;
+}
+
@@ -14336,10 +15227,8 @@
+{
+ struct blktap_ring *ring = &tap->ring;
+
-+ down_read(&tap->tap_sem);
+ if (ring->ring.sring)
-+ ring->ring.sring->pad[0] = msg;
-+ up_read(&tap->tap_sem);
++ ring->ring.sring->private.tapif_user.msg = msg;
+}
+
+static int
@@ -14354,32 +15243,15 @@
+ switch(cmd) {
+ case BLKTAP2_IOCTL_KICK_FE:
+ /* There are fe messages to process. */
-+ return blktap_read_ring(tap);
++ blktap_read_ring(tap);
++ return 0;
+
+ case BLKTAP2_IOCTL_CREATE_DEVICE:
+ if (!arg)
+ return -EINVAL;
+
-+ if (copy_from_user(¶ms, (struct blktap_params __user *)arg,
-+ sizeof(params))) {
-+ BTERR("failed to get params\n");
-+ return -EFAULT;
-+ }
-+
-+ if (blktap_validate_params(tap, ¶ms)) {
-+ BTERR("invalid params\n");
-+ return -EINVAL;
-+ }
-+
-+ tap->params = params;
-+ return blktap_device_create(tap);
-+
-+ case BLKTAP2_IOCTL_SET_PARAMS:
-+ if (!arg)
-+ return -EINVAL;
-+
-+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+ return -EINVAL;
++ if (!blktap_active(tap))
++ return -ENODEV;
+
+ if (copy_from_user(¶ms, (struct blktap_params __user *)arg,
+ sizeof(params))) {
@@ -14393,50 +15265,7 @@
+ }
+
+ tap->params = params;
-+ return 0;
-+
-+ case BLKTAP2_IOCTL_PAUSE:
-+ if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse))
-+ return -EINVAL;
-+
-+ set_bit(BLKTAP_PAUSED, &tap->dev_inuse);
-+ clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
-+
-+ blktap_ring_set_message(tap, 0);
-+ wake_up_interruptible(&tap->wq);
-+
-+ return 0;
-+
-+
-+ case BLKTAP2_IOCTL_REOPEN:
-+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+ return -EINVAL;
-+
-+ if (!arg)
-+ return -EINVAL;
-+
-+ if (copy_to_user((char __user *)arg,
-+ tap->params.name,
-+ strlen(tap->params.name) + 1))
-+ return -EFAULT;
-+
-+ blktap_ring_set_message(tap, 0);
-+ wake_up_interruptible(&tap->wq);
-+
-+ return 0;
-+
-+ case BLKTAP2_IOCTL_RESUME:
-+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+ return -EINVAL;
-+
-+ tap->ring.response = (int)arg;
-+ if (!tap->ring.response)
-+ clear_bit(BLKTAP_PAUSED, &tap->dev_inuse);
-+
-+ blktap_ring_set_message(tap, 0);
-+ wake_up_interruptible(&tap->wq);
-+
-+ return 0;
++ return blktap_device_create(tap);
+ }
+
+ return -ENOIOCTLCMD;
@@ -14446,13 +15275,26 @@
+{
+ struct blktap *tap = filp->private_data;
+ struct blktap_ring *ring = &tap->ring;
++ int work = 0;
++
++ down_read(¤t->mm->mmap_sem);
++
++ if (!blktap_active(tap)) {
++ up_read(¤t->mm->mmap_sem);
++ force_sig(SIGSEGV, current);
++ return 0;
++ }
+
+ poll_wait(filp, &ring->poll_wait, wait);
-+ if (ring->ring.sring->pad[0] != 0 ||
-+ ring->ring.req_prod_pvt != ring->ring.sring->req_prod) {
-+ RING_PUSH_REQUESTS(&ring->ring);
++
++ if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
++ work = blktap_device_run_queue(tap);
++
++ up_read(¤t->mm->mmap_sem);
++
++ if (work ||
++ ring->ring.sring->private.tapif_user.msg)
+ return POLLIN | POLLRDNORM;
-+ }
+
+ return 0;
+}
@@ -14473,66 +15315,6 @@
+}
+
+int
-+blktap_ring_resume(struct blktap *tap)
-+{
-+ int err;
-+ struct blktap_ring *ring = &tap->ring;
-+
-+ if (!blktap_active(tap))
-+ return -ENODEV;
-+
-+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+ return -EINVAL;
-+
-+ /* set shared flag for resume */
-+ ring->response = 0;
-+
-+ blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_RESUME);
-+ blktap_ring_kick_user(tap);
-+
-+ wait_event_interruptible(tap->wq, ring->response ||
-+ !test_bit(BLKTAP_PAUSED, &tap->dev_inuse));
-+
-+ err = ring->response;
-+ ring->response = 0;
-+
-+ BTDBG("err: %d\n", err);
-+
-+ if (err)
-+ return err;
-+
-+ if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+ return -EAGAIN;
-+
-+ return 0;
-+}
-+
-+int
-+blktap_ring_pause(struct blktap *tap)
-+{
-+ if (!blktap_active(tap))
-+ return -ENODEV;
-+
-+ if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse))
-+ return -EINVAL;
-+
-+ BTDBG("draining queue\n");
-+ wait_event_interruptible(tap->wq, !tap->pending_cnt);
-+ if (tap->pending_cnt)
-+ return -EAGAIN;
-+
-+ blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_PAUSE);
-+ blktap_ring_kick_user(tap);
-+
-+ BTDBG("waiting for tapdisk response\n");
-+ wait_event_interruptible(tap->wq, test_bit(BLKTAP_PAUSED, &tap->dev_inuse));
-+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
-+ return -EAGAIN;
-+
-+ return 0;
-+}
-+
-+int
+blktap_ring_destroy(struct blktap *tap)
+{
+ if (!test_bit(BLKTAP_RING_FD, &tap->dev_inuse) &&
@@ -14588,10 +15370,10 @@
+}
diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c
new file mode 100644
-index 0000000..23a3a51
+index 0000000..e342d15
--- /dev/null
+++ b/drivers/xen/blktap/sysfs.c
-@@ -0,0 +1,451 @@
+@@ -0,0 +1,313 @@
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/module.h>
@@ -14632,12 +15414,6 @@
+}
+
+#define CLASS_DEVICE_ATTR(a,b,c,d) DEVICE_ATTR(a,b,c,d)
-+
-+static ssize_t blktap_sysfs_pause_device(struct device *, struct device_attribute *, const char *, size_t);
-+CLASS_DEVICE_ATTR(pause, S_IWUSR, NULL, blktap_sysfs_pause_device);
-+static ssize_t blktap_sysfs_resume_device(struct device *, struct device_attribute *, const char *, size_t);
-+CLASS_DEVICE_ATTR(resume, S_IWUSR, NULL, blktap_sysfs_resume_device);
-+
+static ssize_t
+blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const char *buf, size_t size)
+{
@@ -14651,12 +15427,6 @@
+ err = -ENODEV;
+ goto out;
+ }
-+
-+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
-+ err = -EPERM;
-+ goto out;
-+ }
-+
+ if (size > BLKTAP2_MAX_MESSAGE_LEN) {
+ err = -ENAMETOOLONG;
+ goto out;
@@ -14702,8 +15472,8 @@
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
-+ int err;
+ struct blktap *tap = (struct blktap *)dev_get_drvdata(dev);
++ struct blktap_ring *ring = &tap->ring;
+
+ if (!tap->ring.dev)
+ return size;
@@ -14711,132 +15481,17 @@
+ if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
+ return -EBUSY;
+
-+ err = blktap_control_destroy_device(tap);
++ BTDBG("sending tapdisk close message\n");
++ ring->ring.sring->private.tapif_user.msg = BLKTAP2_RING_MESSAGE_CLOSE;
++ blktap_ring_kick_user(tap);
++ wait_event_interruptible(tap->wq,
++ !test_bit(BLKTAP_CONTROL, &tap->dev_inuse));
+
-+ return (err ? : size);
++ return 0;
+}
+CLASS_DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device);
+
+static ssize_t
-+blktap_sysfs_pause_device(struct device *dev,
-+ struct device_attribute *attr,
-+ const char *buf, size_t size)
-+{
-+ int err;
-+ struct blktap *tap = (struct blktap *)dev_get_drvdata(dev);
-+
-+ blktap_sysfs_enter(tap);
-+
-+ BTDBG("pausing %u:%u: dev_inuse: %lu\n",
-+ MAJOR(tap->ring.devno), MINOR(tap->ring.devno), tap->dev_inuse);
-+
-+ if (!tap->ring.dev ||
-+ test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
-+ err = -ENODEV;
-+ goto out;
-+ }
-+
-+ if (test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
-+ err = -EBUSY;
-+ goto out;
-+ }
-+
-+ if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
-+ err = 0;
-+ goto out;
-+ }
-+
-+ err = blktap_device_pause(tap);
-+ if (!err) {
-+ device_remove_file(dev, &dev_attr_pause);
-+ err = device_create_file(dev, &dev_attr_resume);
-+ }
-+
-+out:
-+ blktap_sysfs_exit(tap);
-+
-+ return (err ? err : size);
-+}
-+
-+static ssize_t
-+blktap_sysfs_resume_device(struct device *dev,
-+ struct device_attribute *attr,
-+ const char *buf, size_t size)
-+{
-+ int err;
-+ struct blktap *tap = (struct blktap *)dev_get_drvdata(dev);
-+
-+ blktap_sysfs_enter(tap);
-+
-+ if (!tap->ring.dev ||
-+ test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
-+ err = -ENODEV;
-+ goto out;
-+ }
-+
-+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
-+ err = -EINVAL;
-+ goto out;
-+ }
-+
-+ err = blktap_device_resume(tap);
-+ if (!err) {
-+ device_remove_file(dev, &dev_attr_resume);
-+ err = device_create_file(dev, &dev_attr_pause);
-+ }
-+
-+out:
-+ blktap_sysfs_exit(tap);
-+
-+ BTDBG("returning %zd\n", (err ? err : size));
-+ return (err ? err : size);
-+}
-+
-+#ifdef ENABLE_PASSTHROUGH
-+static ssize_t
-+blktap_sysfs_enable_passthrough(struct device *dev,
-+ const char *buf, size_t size)
-+{
-+ int err;
-+ unsigned major, minor;
-+ struct blktap *tap = (struct blktap *)dev_get_drvdata(dev);
-+
-+ BTINFO("passthrough request enabled\n");
-+
-+ blktap_sysfs_enter(tap);
-+
-+ if (!tap->ring.dev ||
-+ test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
-+ err = -ENODEV;
-+ goto out;
-+ }
-+
-+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
-+ err = -EINVAL;
-+ goto out;
-+ }
-+
-+ if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
-+ err = -EINVAL;
-+ goto out;
-+ }
-+
-+ err = sscanf(buf, "%x:%x", &major, &minor);
-+ if (err != 2) {
-+ err = -EINVAL;
-+ goto out;
-+ }
-+
-+ err = blktap_device_enable_passthrough(tap, major, minor);
-+
-+out:
-+ blktap_sysfs_exit(tap);
-+ BTDBG("returning %d\n", (err ? err : size));
-+ return (err ? err : size);
-+}
-+#endif
-+
-+static ssize_t
+blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ char *tmp;
@@ -14859,8 +15514,6 @@
+ "device users: %d\n", tap->params.capacity,
+ tap->params.sector_size, tap->device.users);
+
-+ down_read(&tap->tap_sem);
-+
+ tmp += sprintf(tmp, "pending requests: %d\n", tap->pending_cnt);
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
+ struct blktap_request *req = tap->pending_requests[i];
@@ -14876,7 +15529,6 @@
+ req->time.tv_usec);
+ }
+
-+ up_read(&tap->tap_sem);
+ ret = (tmp - buf) + 1;
+
+out:
@@ -14913,26 +15565,18 @@
+ printk(KERN_CRIT "%s: adding attributes for dev %p\n", __func__, dev);
+ err = device_create_file(dev, &dev_attr_name);
+ if (err)
-+ goto out;
++ goto fail;
+ err = device_create_file(dev, &dev_attr_remove);
+ if (err)
-+ goto out_unregister_name;
-+ err = device_create_file(dev, &dev_attr_pause);
-+ if (err)
-+ goto out_unregister_remove;
++ goto fail;
+ err = device_create_file(dev, &dev_attr_debug);
+ if (err)
-+ goto out_unregister_pause;
++ goto fail;
+
+ return 0;
+
-+out_unregister_pause:
-+ device_remove_file(dev, &dev_attr_pause);
-+out_unregister_remove:
-+ device_remove_file(dev, &dev_attr_remove);
-+out_unregister_name:
-+ device_remove_file(dev, &dev_attr_name);
-+out:
++fail:
++ device_unregister(dev);
+ return err;
+}
+
@@ -15043,52 +15687,6 @@
+ class_destroy(cls);
+ return err;
+}
-diff --git a/drivers/xen/blktap/wait_queue.c b/drivers/xen/blktap/wait_queue.c
-new file mode 100644
-index 0000000..f8995aa
---- /dev/null
-+++ b/drivers/xen/blktap/wait_queue.c
-@@ -0,0 +1,40 @@
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+
-+#include "blktap.h"
-+
-+static LIST_HEAD(deferred_work_queue);
-+static DEFINE_SPINLOCK(deferred_work_lock);
-+
-+void
-+blktap_run_deferred(void)
-+{
-+ LIST_HEAD(queue);
-+ struct blktap *tap;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&deferred_work_lock, flags);
-+ list_splice_init(&deferred_work_queue, &queue);
-+ list_for_each_entry(tap, &queue, deferred_queue)
-+ clear_bit(BLKTAP_DEFERRED, &tap->dev_inuse);
-+ spin_unlock_irqrestore(&deferred_work_lock, flags);
-+
-+ while (!list_empty(&queue)) {
-+ tap = list_entry(queue.next, struct blktap, deferred_queue);
-+ list_del_init(&tap->deferred_queue);
-+ blktap_device_restart(tap);
-+ }
-+}
-+
-+void
-+blktap_defer(struct blktap *tap)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&deferred_work_lock, flags);
-+ if (!test_bit(BLKTAP_DEFERRED, &tap->dev_inuse)) {
-+ set_bit(BLKTAP_DEFERRED, &tap->dev_inuse);
-+ list_add_tail(&tap->deferred_queue, &deferred_work_queue);
-+ }
-+ spin_unlock_irqrestore(&deferred_work_lock, flags);
-+}
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index bdfd584..6625ffe 100644
--- a/drivers/xen/cpu_hotplug.c
@@ -15101,7 +15699,7 @@
#include <asm/xen/hypervisor.h>
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index ce602dd..9c8ad5c 100644
+index ce602dd..b4a00bf 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -16,7 +16,7 @@
@@ -15113,7 +15711,7 @@
*
* Jeremy Fitzhardinge <jeremy at xensource.com>, XenSource Inc, 2007
*/
-@@ -27,19 +27,28 @@
+@@ -27,18 +27,31 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/bootmem.h>
@@ -15122,6 +15720,7 @@
+#include <linux/pci.h>
+#include <linux/msi.h>
++#include <asm/desc.h>
#include <asm/ptrace.h>
#include <asm/irq.h>
#include <asm/idle.h>
@@ -15131,18 +15730,20 @@
#include <asm/xen/hypervisor.h>
+#include <asm/xen/pci.h>
++#include <xen/xen.h>
+#include <xen/hvm.h>
#include <xen/xen-ops.h>
#include <xen/events.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
-
-+#include "../pci/msi.h"
++#include <xen/interface/hvm/hvm_op.h>
++#include <xen/interface/hvm/params.h>
+
++#include "../pci/msi.h"
+
/*
* This lock protects updates to the following mapping and reference-count
- * arrays. The lock does not need to be acquired to read the mapping tables.
-@@ -67,7 +76,7 @@ enum xen_irq_type {
+@@ -67,7 +80,7 @@ enum xen_irq_type {
* event channel - irq->event channel mapping
* cpu - cpu this event channel is bound to
* index - type-specific information:
@@ -15151,7 +15752,7 @@
* VIRQ - virq number
* IPI - IPI vector
* EVTCHN -
-@@ -83,20 +92,27 @@ struct irq_info
+@@ -83,20 +96,27 @@ struct irq_info
enum ipi_vector ipi;
struct {
unsigned short gsi;
@@ -15185,7 +15786,7 @@
static inline unsigned long *cpu_evtchn_mask(int cpu)
{
return cpu_evtchn_mask_p[cpu].bits;
-@@ -106,6 +122,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
+@@ -106,6 +126,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
#define VALID_EVTCHN(chn) ((chn) != 0)
static struct irq_chip xen_dynamic_chip;
@@ -15193,7 +15794,7 @@
/* Constructor for packed IRQ information. */
static struct irq_info mk_unbound_info(void)
-@@ -135,7 +152,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn,
+@@ -135,7 +156,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn,
unsigned short gsi, unsigned short vector)
{
return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
@@ -15203,7 +15804,7 @@
}
/*
-@@ -218,6 +236,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn)
+@@ -218,6 +240,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn)
return ret;
}
@@ -15219,7 +15820,7 @@
static inline unsigned long active_evtchns(unsigned int cpu,
struct shared_info *sh,
unsigned int idx)
-@@ -329,17 +356,42 @@ static void unmask_evtchn(int port)
+@@ -329,27 +360,372 @@ static void unmask_evtchn(int port)
put_cpu();
}
@@ -15239,6 +15840,7 @@
int irq;
struct irq_desc *desc;
+ int start = get_nr_hw_irqs();
++ void *chip_data;
- for (irq = 0; irq < nr_irqs; irq++)
+ if (start == nr_irqs)
@@ -15265,8 +15867,12 @@
desc = irq_to_desc_alloc_node(irq, 0);
if (WARN_ON(desc == NULL))
-@@ -348,8 +400,324 @@ static int find_unbound_irq(void)
+ return -1;
+
++ /* save and restore chip_data */
++ chip_data = desc->chip_data;
dynamic_irq_init(irq);
++ desc->chip_data = chip_data;
return irq;
+
@@ -15278,8 +15884,8 @@
+{
+ /* identity map all the hardware irqs */
+ return irq < get_nr_hw_irqs();
-+}
-+
+ }
+
+static void pirq_unmask_notify(int irq)
+{
+ struct irq_info *info = info_for_irq(irq);
@@ -15377,8 +15983,8 @@
+static void enable_pirq(unsigned int irq)
+{
+ startup_pirq(irq);
- }
-
++}
++
+static void disable_pirq(unsigned int irq)
+{
+}
@@ -15590,7 +16196,7 @@
int bind_evtchn_to_irq(unsigned int evtchn)
{
int irq;
-@@ -409,8 +777,23 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+@@ -409,8 +785,23 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
return irq;
}
@@ -15599,8 +16205,7 @@
+{
+ struct evtchn_bind_interdomain bind_interdomain;
+ int err;
-
--static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
++
+ bind_interdomain.remote_dom = remote_domain;
+ bind_interdomain.remote_port = remote_port;
+
@@ -15610,12 +16215,13 @@
+ return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
+}
+
-+
+
+-static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
int evtchn, irq;
-@@ -504,6 +887,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
+@@ -504,6 +895,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
@@ -15645,7 +16251,7 @@
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler,
unsigned long irqflags, const char *devname, void *dev_id)
-@@ -535,6 +941,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
+@@ -535,6 +949,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
if (irq < 0)
return irq;
@@ -15653,12 +16259,12 @@
retval = request_irq(irq, handler, irqflags, devname, dev_id);
if (retval != 0) {
unbind_from_irq(irq);
-@@ -616,17 +1023,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count);
+@@ -616,17 +1031,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count);
* a bitset of words which contain pending event bits. The second
* level is a bitset of pending events themselves.
*/
-void xen_evtchn_do_upcall(struct pt_regs *regs)
-+void __xen_evtchn_do_upcall(struct pt_regs *regs)
++static void __xen_evtchn_do_upcall(struct pt_regs *regs)
{
int cpu = get_cpu();
- struct pt_regs *old_regs = set_irq_regs(regs);
@@ -15672,7 +16278,7 @@
do {
unsigned long pending_words;
-@@ -649,9 +1052,13 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
+@@ -649,9 +1060,13 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
int bit_idx = __ffs(pending_bits);
int port = (word_idx * BITS_PER_LONG) + bit_idx;
int irq = evtchn_to_irq[port];
@@ -15688,8 +16294,12 @@
}
}
-@@ -662,10 +1069,26 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
- } while(count != 1);
+@@ -659,14 +1074,32 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
+
+ count = __get_cpu_var(xed_nesting_count);
+ __get_cpu_var(xed_nesting_count) = 0;
+- } while(count != 1);
++ } while (count != 1 || vcpu_info->evtchn_upcall_pending);
out:
+
@@ -15710,22 +16320,28 @@
+}
- put_cpu();
-+void xen_hvm_evtchn_do_upcall(struct pt_regs *regs)
++void xen_hvm_evtchn_do_upcall(void)
+{
++ struct pt_regs *regs = get_irq_regs();
+ __xen_evtchn_do_upcall(regs);
}
++EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
/* Rebind a new event channel to an existing irq. */
-@@ -703,7 +1126,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+ void rebind_evtchn_irq(int evtchn, int irq)
+@@ -703,7 +1136,10 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
struct evtchn_bind_vcpu bind_vcpu;
int evtchn = evtchn_from_irq(irq);
- if (!VALID_EVTCHN(evtchn))
-+ if (!VALID_EVTCHN(evtchn) || xen_hvm_domain())
++ /* events delivered via platform PCI interrupts are always
++ * routed to vcpu 0 */
++ if (!VALID_EVTCHN(evtchn) ||
++ (xen_hvm_domain() && !xen_have_vector_callback))
return -1;
/* Send future instances of this interrupt to other vcpu. */
-@@ -855,7 +1278,7 @@ void xen_clear_irq_pending(int irq)
+@@ -855,7 +1291,7 @@ void xen_clear_irq_pending(int irq)
if (VALID_EVTCHN(evtchn))
clear_evtchn(evtchn);
}
@@ -15734,7 +16350,7 @@
void xen_set_irq_pending(int irq)
{
int evtchn = evtchn_from_irq(irq);
-@@ -875,9 +1298,9 @@ bool xen_test_irq_pending(int irq)
+@@ -875,9 +1311,9 @@ bool xen_test_irq_pending(int irq)
return ret;
}
@@ -15746,7 +16362,7 @@
{
evtchn_port_t evtchn = evtchn_from_irq(irq);
-@@ -885,13 +1308,33 @@ void xen_poll_irq(int irq)
+@@ -885,13 +1321,33 @@ void xen_poll_irq(int irq)
struct sched_poll poll;
poll.nr_ports = 1;
@@ -15781,7 +16397,7 @@
void xen_irq_resume(void)
{
-@@ -928,13 +1371,38 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
+@@ -928,13 +1384,85 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
.retrigger = retrigger_dynirq,
};
@@ -15805,6 +16421,53 @@
+ .retrigger = retrigger_dynirq,
+};
+
++int xen_set_callback_via(uint64_t via)
++{
++ struct xen_hvm_param a;
++ a.domid = DOMID_SELF;
++ a.index = HVM_PARAM_CALLBACK_IRQ;
++ a.value = via;
++ return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
++}
++EXPORT_SYMBOL_GPL(xen_set_callback_via);
++
++void smp_xen_hvm_callback_vector(struct pt_regs *regs)
++{
++ struct pt_regs *old_regs = set_irq_regs(regs);
++
++ exit_idle();
++
++ irq_enter();
++
++ __xen_evtchn_do_upcall(regs);
++
++ irq_exit();
++
++ set_irq_regs(old_regs);
++}
++
++/* Vector callbacks are better than PCI interrupts to receive event
++ * channel notifications because we can receive vector callbacks on any
++ * vcpu and we don't need PCI support or APIC interactions. */
++void xen_callback_vector(void)
++{
++ int rc;
++ uint64_t callback_via;
++ if (xen_have_vector_callback) {
++ callback_via = HVM_CALLBACK_VECTOR(XEN_HVM_EVTCHN_CALLBACK);
++ rc = xen_set_callback_via(callback_via);
++ if (rc) {
++ printk(KERN_ERR "Request for Xen HVM callback vector"
++ " failed.\n");
++ xen_have_vector_callback = 0;
++ return;
++ }
++ printk(KERN_INFO "Xen HVM callback vector for event delivery is "
++ "enabled\n");
++ alloc_intr_gate(XEN_HVM_EVTCHN_CALLBACK, xen_hvm_callback_vector);
++ }
++}
++
void __init xen_init_IRQ(void)
{
int i;
@@ -15821,20 +16484,21 @@
init_evtchn_cpu_bindings();
-@@ -942,5 +1410,10 @@ void __init xen_init_IRQ(void)
+@@ -942,5 +1470,11 @@ void __init xen_init_IRQ(void)
for (i = 0; i < NR_EVENT_CHANNELS; i++)
mask_evtchn(i);
- irq_ctx_init(smp_processor_id());
-+ if (xen_hvm_domain())
++ if (xen_hvm_domain()) {
++ xen_callback_vector();
+ native_init_IRQ();
-+ else
++ } else {
+ irq_ctx_init(smp_processor_id());
-+
-+ xen_setup_pirqs();
++ xen_setup_pirqs();
++ }
}
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
-index 79bedba..6a1c4a5 100644
+index 79bedba..b82666a 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -48,6 +48,8 @@
@@ -15958,7 +16622,7 @@
}
static long evtchn_ioctl(struct file *file,
-@@ -332,7 +371,7 @@ static long evtchn_ioctl(struct file *file,
+@@ -332,15 +371,17 @@ static long evtchn_ioctl(struct file *file,
spin_lock_irq(&port_user_lock);
rc = -ENOTCONN;
@@ -15967,7 +16631,18 @@
spin_unlock_irq(&port_user_lock);
break;
}
-@@ -354,7 +393,7 @@ static long evtchn_ioctl(struct file *file,
+
+- evtchn_unbind_from_user(u, unbind.port);
++ disable_irq(irq_from_evtchn(unbind.port));
+
+ spin_unlock_irq(&port_user_lock);
+
++ evtchn_unbind_from_user(u, unbind.port);
++
+ rc = 0;
+ break;
+ }
+@@ -354,7 +395,7 @@ static long evtchn_ioctl(struct file *file,
if (notify.port >= NR_EVENT_CHANNELS) {
rc = -EINVAL;
@@ -15976,7 +16651,7 @@
rc = -ENOTCONN;
} else {
notify_remote_via_evtchn(notify.port);
-@@ -443,10 +482,10 @@ static int evtchn_release(struct inode *inode, struct file *filp)
+@@ -443,14 +484,21 @@ static int evtchn_release(struct inode *inode, struct file *filp)
free_page((unsigned long)u->ring);
for (i = 0; i < NR_EVENT_CHANNELS; i++) {
@@ -15985,11 +16660,31 @@
continue;
- evtchn_unbind_from_user(port_user[i], i);
-+ evtchn_unbind_from_user(get_port_user(i), i);
++ disable_irq(irq_from_evtchn(i));
}
spin_unlock_irq(&port_user_lock);
-@@ -480,8 +519,11 @@ static int __init evtchn_init(void)
+
++ for (i = 0; i < NR_EVENT_CHANNELS; i++) {
++ if (get_port_user(i) != u)
++ continue;
++
++ evtchn_unbind_from_user(get_port_user(i), i);
++ }
++
+ kfree(u->name);
+ kfree(u);
+
+@@ -470,7 +518,7 @@ static const struct file_operations evtchn_fops = {
+
+ static struct miscdevice evtchn_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+- .name = "evtchn",
++ .name = "xen/evtchn",
+ .fops = &evtchn_fops,
+ };
+ static int __init evtchn_init(void)
+@@ -480,8 +528,11 @@ static int __init evtchn_init(void)
if (!xen_domain())
return -ENODEV;
@@ -16002,7 +16697,7 @@
/* Create '/dev/misc/evtchn'. */
err = misc_register(&evtchn_miscdev);
-@@ -497,6 +539,9 @@ static int __init evtchn_init(void)
+@@ -497,6 +548,9 @@ static int __init evtchn_init(void)
static void __exit evtchn_cleanup(void)
{
@@ -16027,10 +16722,10 @@
int i, j;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
new file mode 100644
-index 0000000..ddc59cc
+index 0000000..a33e443
--- /dev/null
+++ b/drivers/xen/gntdev.c
-@@ -0,0 +1,626 @@
+@@ -0,0 +1,645 @@
+/******************************************************************************
+ * gntdev.c
+ *
@@ -16061,7 +16756,7 @@
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
-+#include <linux/rwsem.h>
++#include <linux/spinlock.h>
+
+#include <xen/xen.h>
+#include <xen/grant_table.h>
@@ -16084,7 +16779,7 @@
+ struct list_head maps;
+ uint32_t used;
+ uint32_t limit;
-+ struct rw_semaphore sem;
++ spinlock_t lock;
+ struct mm_struct *mm;
+ struct mmu_notifier mn;
+};
@@ -16117,9 +16812,9 @@
+ map->index == text_index && text ? text : "");
+}
+
-+static struct grant_map *gntdev_add_map(struct gntdev_priv *priv, int count)
++static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
+{
-+ struct grant_map *map, *add;
++ struct grant_map *add;
+
+ add = kzalloc(sizeof(struct grant_map), GFP_KERNEL);
+ if (NULL == add)
@@ -16140,6 +16835,20 @@
+ if (add->count + priv->used > priv->limit)
+ goto err;
+
++ return add;
++
++err:
++ kfree(add->grants);
++ kfree(add->map_ops);
++ kfree(add->unmap_ops);
++ kfree(add);
++ return NULL;
++}
++
++static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add)
++{
++ struct grant_map *map;
++
+ list_for_each_entry(map, &priv->maps, next) {
+ if (add->index + add->count < map->index) {
+ list_add_tail(&add->next, &map->next);
@@ -16153,14 +16862,6 @@
+ priv->used += add->count;
+ if (debug)
+ gntdev_print_maps(priv, "[new]", add->index);
-+ return add;
-+
-+err:
-+ kfree(add->grants);
-+ kfree(add->map_ops);
-+ kfree(add->unmap_ops);
-+ kfree(add);
-+ return NULL;
+}
+
+static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, int index,
@@ -16207,11 +16908,17 @@
+
+ map->priv->used -= map->count;
+ list_del(&map->next);
++ return 0;
++}
++
++static void gntdev_free_map(struct grant_map *map)
++{
++ if (!map)
++ return;
+ kfree(map->grants);
+ kfree(map->map_ops);
+ kfree(map->unmap_ops);
+ kfree(map);
-+ return 0;
+}
+
+/* ------------------------------------------------------------------ */
@@ -16310,7 +17017,7 @@
+ unsigned long mstart, mend;
+ int err;
+
-+ down_read(&priv->sem);
++ spin_lock(&priv->lock);
+ list_for_each_entry(map, &priv->maps, next) {
+ if (!map->vma)
+ continue;
@@ -16332,7 +17039,7 @@
+ (mend - mstart) >> PAGE_SHIFT);
+ WARN_ON(err);
+ }
-+ up_read(&priv->sem);
++ spin_unlock(&priv->lock);
+}
+
+static void mn_invl_page(struct mmu_notifier *mn,
@@ -16349,7 +17056,7 @@
+ struct grant_map *map;
+ int err;
+
-+ down_read(&priv->sem);
++ spin_lock(&priv->lock);
+ list_for_each_entry(map, &priv->maps, next) {
+ if (!map->vma)
+ continue;
@@ -16360,7 +17067,7 @@
+ err = unmap_grant_pages(map, 0, map->count);
+ WARN_ON(err);
+ }
-+ up_read(&priv->sem);
++ spin_unlock(&priv->lock);
+}
+
+struct mmu_notifier_ops gntdev_mmu_ops = {
@@ -16380,7 +17087,7 @@
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&priv->maps);
-+ init_rwsem(&priv->sem);
++ spin_lock_init(&priv->lock);
+ priv->limit = limit;
+
+ priv->mm = get_task_mm(current);
@@ -16408,13 +17115,16 @@
+ if (debug)
+ printk("%s: priv %p\n", __FUNCTION__, priv);
+
-+ down_write(&priv->sem);
++ spin_lock(&priv->lock);
+ while (!list_empty(&priv->maps)) {
+ map = list_entry(priv->maps.next, struct grant_map, next);
+ err = gntdev_del_map(map);
-+ WARN_ON(err);
++ if (WARN_ON(err))
++ gntdev_free_map(map);
++
+ }
-+ up_write(&priv->sem);
++ spin_unlock(&priv->lock);
++
+ mmu_notifier_unregister(&priv->mn, priv->mm);
+ kfree(priv);
+ return 0;
@@ -16437,27 +17147,29 @@
+ if (unlikely(op.count > priv->limit))
+ return -EINVAL;
+
-+ down_write(&priv->sem);
+ err = -ENOMEM;
-+ map = gntdev_add_map(priv, op.count);
++ map = gntdev_alloc_map(priv, op.count);
+ if (!map)
-+ goto err_unlock;
-+
-+ err = -ENOMEM;
++ return err;
+ if (copy_from_user(map->grants, &u->refs,
-+ sizeof(map->grants[0]) * op.count) != 0)
-+ goto err_free;
++ sizeof(map->grants[0]) * op.count) != 0) {
++ gntdev_free_map(map);
++ return err;
++ }
++
++ spin_lock(&priv->lock);
++ gntdev_add_map(priv, map);
+ op.index = map->index << PAGE_SHIFT;
-+ if (copy_to_user(u, &op, sizeof(op)) != 0)
-+ goto err_free;
-+ up_write(&priv->sem);
-+ return 0;
++ spin_unlock(&priv->lock);
+
-+err_free:
-+ gntdev_del_map(map);
-+err_unlock:
-+ up_write(&priv->sem);
-+ return err;
++ if (copy_to_user(u, &op, sizeof(op)) != 0) {
++ spin_lock(&priv->lock);
++ gntdev_del_map(map);
++ spin_unlock(&priv->lock);
++ gntdev_free_map(map);
++ return err;
++ }
++ return 0;
+}
+
+static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
@@ -16473,11 +17185,13 @@
+ printk("%s: priv %p, del %d+%d\n", __FUNCTION__, priv,
+ (int)op.index, (int)op.count);
+
-+ down_write(&priv->sem);
++ spin_lock(&priv->lock);
+ map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
+ if (map)
+ err = gntdev_del_map(map);
-+ up_write(&priv->sem);
++ spin_unlock(&priv->lock);
++ if (!err)
++ gntdev_free_map(map);
+ return err;
+}
+
@@ -16493,16 +17207,16 @@
+ printk("%s: priv %p, offset for vaddr %lx\n", __FUNCTION__, priv,
+ (unsigned long)op.vaddr);
+
-+ down_read(&priv->sem);
++ spin_lock(&priv->lock);
+ map = gntdev_find_map_vaddr(priv, op.vaddr);
+ if (map == NULL ||
+ map->vma->vm_start != op.vaddr) {
-+ up_read(&priv->sem);
++ spin_unlock(&priv->lock);
+ return -EINVAL;
+ }
+ op.offset = map->index << PAGE_SHIFT;
+ op.count = map->count;
-+ up_read(&priv->sem);
++ spin_unlock(&priv->lock);
+
+ if (copy_to_user(u, &op, sizeof(op)) != 0)
+ return -EFAULT;
@@ -16521,9 +17235,9 @@
+ if (op.count > limit)
+ return -EINVAL;
+
-+ down_write(&priv->sem);
++ spin_lock(&priv->lock);
+ priv->limit = op.count;
-+ up_write(&priv->sem);
++ spin_unlock(&priv->lock);
+ return 0;
+}
+
@@ -16571,7 +17285,7 @@
+ printk("%s: map %d+%d at %lx (pgoff %lx)\n", __FUNCTION__,
+ index, count, vma->vm_start, vma->vm_pgoff);
+
-+ down_read(&priv->sem);
++ spin_lock(&priv->lock);
+ map = gntdev_find_map_index(priv, index, count);
+ if (!map)
+ goto unlock_out;
@@ -16613,7 +17327,7 @@
+ map->is_mapped = 1;
+
+unlock_out:
-+ up_read(&priv->sem);
++ spin_unlock(&priv->lock);
+ return err;
+}
+
@@ -16627,7 +17341,7 @@
+
+static struct miscdevice gntdev_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
-+ .name = "gntdev",
++ .name = "xen/gntdev",
+ .fops = &gntdev_fops,
+};
+
@@ -16658,10 +17372,10 @@
+
+/* ------------------------------------------------------------------ */
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
-index 7d8f531..8df6ae0 100644
+index 7d8f531..5a8ad45 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
-@@ -36,10 +36,14 @@
+@@ -36,10 +36,13 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
@@ -16671,20 +17385,37 @@
#include <xen/interface/xen.h>
#include <xen/page.h>
#include <xen/grant_table.h>
-+#include <xen/platform_pci.h>
+#include <xen/interface/memory.h>
#include <asm/xen/hypercall.h>
#include <asm/pgtable.h>
-@@ -57,6 +61,7 @@ static unsigned int boot_max_nr_grant_frames;
+@@ -57,6 +60,8 @@ static unsigned int boot_max_nr_grant_frames;
static int gnttab_free_count;
static grant_ref_t gnttab_free_head;
static DEFINE_SPINLOCK(gnttab_list_lock);
-+static unsigned long hvm_pv_resume_frames;
++unsigned long xen_hvm_resume_frames;
++EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
static struct grant_entry *shared;
-@@ -447,6 +452,30 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
+@@ -431,7 +436,7 @@ static unsigned int __max_nr_grant_frames(void)
+ return query.max_nr_frames;
+ }
+
+-static inline unsigned int max_nr_grant_frames(void)
++unsigned int gnttab_max_grant_frames(void)
+ {
+ unsigned int xen_max = __max_nr_grant_frames();
+
+@@ -439,6 +444,7 @@ static inline unsigned int max_nr_grant_frames(void)
+ return boot_max_nr_grant_frames;
+ return xen_max;
+ }
++EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
+
+ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
+ {
+@@ -447,6 +453,30 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
unsigned int nr_gframes = end_idx + 1;
int rc;
@@ -16700,7 +17431,7 @@
+ xatp.domid = DOMID_SELF;
+ xatp.idx = i;
+ xatp.space = XENMAPSPACE_grant_table;
-+ xatp.gpfn = (hvm_pv_resume_frames >> PAGE_SHIFT) + i;
++ xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
+ rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
+ if (rc != 0) {
+ printk(KERN_WARNING
@@ -16715,7 +17446,16 @@
frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
if (!frames)
return -ENOMEM;
-@@ -472,11 +501,135 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
+@@ -463,7 +493,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
+
+ BUG_ON(rc || setup.status);
+
+- rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
++ rc = arch_gnttab_map_shared(frames, nr_gframes, gnttab_max_grant_frames(),
+ &shared);
+ BUG_ON(rc);
+
+@@ -472,11 +502,134 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
return 0;
}
@@ -16829,7 +17569,7 @@
- if (max_nr_grant_frames() < nr_grant_frames)
+ unsigned int max_nr_gframes;
+
-+ max_nr_gframes = max_nr_grant_frames();
++ max_nr_gframes = gnttab_max_grant_frames();
+ if (max_nr_gframes < nr_grant_frames)
return -ENOSYS;
- return gnttab_map(0, nr_grant_frames - 1);
@@ -16837,12 +17577,11 @@
+ if (xen_pv_domain())
+ return gnttab_map(0, nr_grant_frames - 1);
+
-+ if (!hvm_pv_resume_frames) {
-+ hvm_pv_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
-+ shared = ioremap(hvm_pv_resume_frames, PAGE_SIZE * max_nr_gframes);
++ if (!shared) {
++ shared = ioremap(xen_hvm_resume_frames, PAGE_SIZE * max_nr_gframes);
+ if (shared == NULL) {
+ printk(KERN_WARNING
-+ "Fail to ioremap gnttab share frames\n");
++ "Failed to ioremap gnttab share frames!");
+ return -ENOMEM;
+ }
+ }
@@ -16853,6 +17592,15 @@
}
int gnttab_suspend(void)
+@@ -493,7 +646,7 @@ static int gnttab_expand(unsigned int req_entries)
+ cur = nr_grant_frames;
+ extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
+ GREFS_PER_GRANT_FRAME);
+- if (cur + extra > max_nr_grant_frames())
++ if (cur + extra > gnttab_max_grant_frames())
+ return -ENOSPC;
+
+ rc = gnttab_map(cur, cur + extra - 1);
@@ -503,15 +656,12 @@ static int gnttab_expand(unsigned int req_entries)
return rc;
}
@@ -16870,11 +17618,12 @@
nr_grant_frames = 1;
boot_max_nr_grant_frames = __max_nr_grant_frames();
-@@ -555,4 +705,16 @@ static int __devinit gnttab_init(void)
+@@ -554,5 +704,18 @@ static int __devinit gnttab_init(void)
+ kfree(gnttab_list);
return -ENOMEM;
}
-
--core_initcall(gnttab_init);
++EXPORT_SYMBOL_GPL(gnttab_init);
++
+static int __devinit __gnttab_init(void)
+{
+ /* Delay grant-table initialization in the PV on HVM case */
@@ -16886,33 +17635,22 @@
+
+ return gnttab_init();
+}
-+
+
+-core_initcall(gnttab_init);
+core_initcall(__gnttab_init);
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
-index 5d42d55..3924018 100644
+index 5d42d55..0b50906 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
-@@ -7,15 +7,19 @@
- #include <linux/sysrq.h>
+@@ -8,6 +8,7 @@
#include <linux/stop_machine.h>
#include <linux/freezer.h>
-+#include <linux/pci.h>
-+#include <linux/cpumask.h>
++#include <xen/xen.h>
#include <xen/xenbus.h>
#include <xen/grant_table.h>
#include <xen/events.h>
- #include <xen/hvc-console.h>
- #include <xen/xen-ops.h>
-+#include <xen/platform_pci.h>
-
- #include <asm/xen/hypercall.h>
- #include <asm/xen/page.h>
-+#include <asm/xen/hypervisor.h>
-
- enum shutdown_state {
- SHUTDOWN_INVALID = -1,
-@@ -32,10 +36,30 @@ enum shutdown_state {
+@@ -32,10 +33,30 @@ enum shutdown_state {
static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
#ifdef CONFIG_PM_SLEEP
@@ -16926,12 +17664,12 @@
+
+ *cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
+
-+ xen_guest_init();
++ xen_hvm_post_suspend(*cancelled);
+ gnttab_resume();
+
+ if (!*cancelled) {
+ xen_irq_resume();
-+ platform_pci_resume();
++ xen_timer_resume();
+ }
+
+ return 0;
@@ -16944,82 +17682,19 @@
BUG_ON(!irqs_disabled());
-@@ -72,6 +96,62 @@ static int xen_suspend(void *data)
- return 0;
- }
+@@ -111,7 +132,10 @@ static void do_suspend(void)
+ goto out_resume;
+ }
-+static void do_hvm_suspend(void)
-+{
-+ int err;
-+ int cancelled = 1;
-+
-+ shutting_down = SHUTDOWN_SUSPEND;
-+
-+ err = stop_machine_create();
-+ if (err) {
-+ printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
-+ goto out;
-+ }
-+
-+#ifdef CONFIG_PREEMPT
-+ /* If the kernel is preemptible, we need to freeze all the processes
-+ to prevent them from being in the middle of a pagetable update
-+ during suspend. */
-+ err = freeze_processes();
-+ if (err) {
-+ printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
-+ goto out_destroy_sm;
-+ }
-+#endif
-+
-+ printk(KERN_DEBUG "suspending xenstore... ");
-+ xenbus_suspend();
-+ printk(KERN_DEBUG "xenstore suspended\n");
-+ platform_pci_disable_irq();
-+
-+ err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0));
-+ if (err) {
-+ printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
-+ cancelled = 1;
-+ }
-+
-+ platform_pci_enable_irq();
-+
-+ if (!cancelled) {
-+ xen_arch_resume();
-+ xenbus_resume();
-+ } else
-+ xs_suspend_cancel();
-+
-+ /* Make sure timer events get retriggered on all CPUs */
-+ clock_was_set();
-+
-+out_destroy_sm:
-+ stop_machine_destroy();
-+
-+out:
-+#ifdef CONFIG_PREEMPT
-+ thaw_processes();
-+#endif
-+ shutting_down = SHUTDOWN_INVALID;
-+}
-+
- static void do_suspend(void)
- {
- int err;
-@@ -184,7 +264,10 @@ static void shutdown_handler(struct xenbus_watch *watch,
- ctrl_alt_del();
- #ifdef CONFIG_PM_SLEEP
- } else if (strcmp(str, "suspend") == 0) {
-- do_suspend();
-+ if (xen_hvm_domain())
-+ do_hvm_suspend();
-+ else
-+ do_suspend();
- #endif
- } else {
- printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
-@@ -260,7 +343,19 @@ static int shutdown_event(struct notifier_block *notifier,
+- err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
++ if (xen_hvm_domain())
++ err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0));
++ else
++ err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
+
+ dpm_resume_noirq(PMSG_RESUME);
+
+@@ -260,7 +284,19 @@ static int shutdown_event(struct notifier_block *notifier,
return NOTIFY_DONE;
}
@@ -17040,9 +17715,11 @@
{
static struct notifier_block xenstore_notifier = {
.notifier_call = shutdown_event
-@@ -270,4 +365,4 @@ static int __init setup_shutdown_event(void)
+@@ -269,5 +305,6 @@ static int __init setup_shutdown_event(void)
+
return 0;
}
++EXPORT_SYMBOL_GPL(xen_setup_shutdown_event);
-subsys_initcall(setup_shutdown_event);
+subsys_initcall(__setup_shutdown_event);
@@ -17279,10 +17956,10 @@
+xen-netback-y := netback.o xenbus.o interface.o
diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
new file mode 100644
-index 0000000..51f97c0
+index 0000000..b40ad72
--- /dev/null
+++ b/drivers/xen/netback/common.h
-@@ -0,0 +1,227 @@
+@@ -0,0 +1,329 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/backend/common.h
+ *
@@ -17343,6 +18020,7 @@
+struct xen_netif {
+ /* Unique identifier for this interface. */
+ domid_t domid;
++ int group;
+ unsigned int handle;
+
+ u8 fe_dev_addr[6];
@@ -17360,15 +18038,22 @@
+ struct vm_struct *tx_comms_area;
+ struct vm_struct *rx_comms_area;
+
-+ /* Set of features that can be turned on in dev->features. */
-+ int features;
++ /* Flags that must not be set in dev->features */
++ int features_disabled;
+
-+ int smart_poll;
++ /* Frontend feature information. */
++ u8 can_sg:1;
++ u8 gso:1;
++ u8 gso_prefix:1;
++ u8 csum:1;
++ u8 smart_poll:1;
+
+ /* Internal feature information. */
-+ u8 can_queue:1; /* can queue packets for receiver? */
++ u8 can_queue:1; /* can queue packets for receiver? */
+
-+ /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
++ /* Allow netif_be_start_xmit() to peek ahead in the rx request
++ * ring. This is a prediction of what rx_req_cons will be once
++ * all queued skbs are put on the ring. */
+ RING_IDX rx_req_cons_peek;
+
+ /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
@@ -17470,6 +18155,7 @@
+
+void netif_disconnect(struct xen_netif *netif);
+
++void netif_set_features(struct xen_netif *netif);
+struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ unsigned long rx_ring_ref, unsigned int evtchn);
@@ -17506,16 +18192,109 @@
+static inline int netbk_can_sg(struct net_device *dev)
+{
+ struct xen_netif *netif = netdev_priv(dev);
-+ return netif->features & NETIF_F_SG;
++ return netif->can_sg;
+}
+
++struct pending_tx_info {
++ struct xen_netif_tx_request req;
++ struct xen_netif *netif;
++};
++typedef unsigned int pending_ring_idx_t;
++
++struct netbk_rx_meta {
++ int id;
++ int size;
++ int gso_size;
++};
++
++struct netbk_tx_pending_inuse {
++ struct list_head list;
++ unsigned long alloc_time;
++};
++
++#define MAX_PENDING_REQS 256
++
++#define MAX_BUFFER_OFFSET PAGE_SIZE
++
++/* extra field used in struct page */
++union page_ext {
++ struct {
++#if BITS_PER_LONG < 64
++#define IDX_WIDTH 8
++#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
++ unsigned int group:GROUP_WIDTH;
++ unsigned int idx:IDX_WIDTH;
++#else
++ unsigned int group, idx;
++#endif
++ } e;
++ void *mapping;
++};
++
++struct xen_netbk {
++ union {
++ struct {
++ struct tasklet_struct net_tx_tasklet;
++ struct tasklet_struct net_rx_tasklet;
++ } tasklet;
++
++ struct {
++ wait_queue_head_t netbk_action_wq;
++ struct task_struct *task;
++ } kthread;
++ };
++
++ struct sk_buff_head rx_queue;
++ struct sk_buff_head tx_queue;
++
++ struct timer_list net_timer;
++ struct timer_list netbk_tx_pending_timer;
++
++ struct page **mmap_pages;
++
++ pending_ring_idx_t pending_prod;
++ pending_ring_idx_t pending_cons;
++ pending_ring_idx_t dealloc_prod;
++ pending_ring_idx_t dealloc_cons;
++
++ struct list_head pending_inuse_head;
++ struct list_head net_schedule_list;
++
++ /* Protect the net_schedule_list in netif. */
++ spinlock_t net_schedule_list_lock;
++
++ atomic_t netfront_count;
++
++ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
++ struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
++ struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
++ struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
++
++ grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
++ u16 pending_ring[MAX_PENDING_REQS];
++ u16 dealloc_ring[MAX_PENDING_REQS];
++
++ /*
++ * Each head or fragment can be up to 4096 bytes. Given
++ * MAX_BUFFER_OFFSET of 4096 the worst case is that each
++ * head/fragment uses 2 copy operation.
++ */
++ struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
++ unsigned char rx_notify[NR_IRQS];
++ u16 notify_list[NET_RX_RING_SIZE];
++ struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++};
++
++extern struct xen_netbk *xen_netbk;
++extern int xen_netbk_group_nr;
++
+#endif /* __NETIF__BACKEND__COMMON_H__ */
diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
new file mode 100644
-index 0000000..086d939
+index 0000000..2e8508a
--- /dev/null
+++ b/drivers/xen/netback/interface.c
-@@ -0,0 +1,410 @@
+@@ -0,0 +1,475 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/backend/interface.c
+ *
@@ -17572,8 +18351,33 @@
+static unsigned long netbk_queue_length = 32;
+module_param_named(queue_length, netbk_queue_length, ulong, 0644);
+
++static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
++ struct xen_netif *netif)
++{
++ int i;
++ int min_netfront_count;
++ int min_group = 0;
++ min_netfront_count = atomic_read(&netbk[0].netfront_count);
++ for (i = 0; i < group_nr; i++) {
++ int netfront_count = atomic_read(&netbk[i].netfront_count);
++ if (netfront_count < min_netfront_count) {
++ min_group = i;
++ min_netfront_count = netfront_count;
++ }
++ }
++
++ netif->group = min_group;
++ atomic_inc(&netbk[netif->group].netfront_count);
++}
++
++static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
++{
++ atomic_dec(&netbk[netif->group].netfront_count);
++}
++
+static void __netif_up(struct xen_netif *netif)
+{
++ netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
+ enable_irq(netif->irq);
+ netif_schedule_work(netif);
+}
@@ -17582,6 +18386,7 @@
+{
+ disable_irq(netif->irq);
+ netif_deschedule_work(netif);
++ netbk_remove_netif(xen_netbk, netif);
+}
+
+static int net_open(struct net_device *dev)
@@ -17613,31 +18418,69 @@
+ return 0;
+}
+
-+static int netbk_set_sg(struct net_device *dev, u32 data)
++void netif_set_features(struct xen_netif *netif)
+{
-+ if (data) {
-+ struct xen_netif *netif = netdev_priv(dev);
++ struct net_device *dev = netif->dev;
++ int features = dev->features;
+
-+ if (!(netif->features & NETIF_F_SG))
++ if (netif->can_sg)
++ features |= NETIF_F_SG;
++ if (netif->gso || netif->gso_prefix)
++ features |= NETIF_F_TSO;
++ if (netif->csum)
++ features |= NETIF_F_IP_CSUM;
++
++ features &= ~(netif->features_disabled);
++
++ if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
++ dev->mtu = ETH_DATA_LEN;
++
++ dev->features = features;
++}
++
++static int netbk_set_tx_csum(struct net_device *dev, u32 data)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ if (data) {
++ if (!netif->csum)
+ return -ENOSYS;
++ netif->features_disabled &= ~NETIF_F_IP_CSUM;
++ } else {
++ netif->features_disabled |= NETIF_F_IP_CSUM;
+ }
+
-+ if (dev->mtu > ETH_DATA_LEN)
-+ dev->mtu = ETH_DATA_LEN;
++ netif_set_features(netif);
++ return 0;
++}
++
++static int netbk_set_sg(struct net_device *dev, u32 data)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ if (data) {
++ if (!netif->can_sg)
++ return -ENOSYS;
++ netif->features_disabled &= ~NETIF_F_SG;
++ } else {
++ netif->features_disabled |= NETIF_F_SG;
++ }
+
-+ return ethtool_op_set_sg(dev, data);
++ netif_set_features(netif);
++ return 0;
+}
+
+static int netbk_set_tso(struct net_device *dev, u32 data)
+{
++ struct xen_netif *netif = netdev_priv(dev);
+ if (data) {
-+ struct xen_netif *netif = netdev_priv(dev);
-+
-+ if (!(netif->features & NETIF_F_TSO))
++ if (!netif->gso && !netif->gso_prefix)
+ return -ENOSYS;
++ netif->features_disabled &= ~NETIF_F_TSO;
++ } else {
++ netif->features_disabled |= NETIF_F_TSO;
+ }
+
-+ return ethtool_op_set_tso(dev, data);
++ netif_set_features(netif);
++ return 0;
+}
+
+static void netbk_get_drvinfo(struct net_device *dev,
@@ -17692,7 +18535,7 @@
+ .get_drvinfo = netbk_get_drvinfo,
+
+ .get_tx_csum = ethtool_op_get_tx_csum,
-+ .set_tx_csum = ethtool_op_set_tx_csum,
++ .set_tx_csum = netbk_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = netbk_set_sg,
+ .get_tso = ethtool_op_get_tso,
@@ -17732,8 +18575,10 @@
+ netif = netdev_priv(dev);
+ memset(netif, 0, sizeof(*netif));
+ netif->domid = domid;
++ netif->group = -1;
+ netif->handle = handle;
-+ netif->features = NETIF_F_SG;
++ netif->can_sg = 1;
++ netif->csum = 1;
+ atomic_set(&netif->refcnt, 1);
+ init_waitqueue_head(&netif->waiting_to_free);
+ netif->dev = dev;
@@ -17750,8 +18595,7 @@
+ init_timer(&netif->tx_queue_timeout);
+
+ dev->netdev_ops = &netback_ops;
-+ dev->features = NETIF_F_IP_CSUM|NETIF_F_SG;
-+
++ netif_set_features(netif);
+ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+
+ dev->tx_queue_len = netbk_queue_length;
@@ -17928,10 +18772,10 @@
+}
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
new file mode 100644
-index 0000000..5dc4f98
+index 0000000..4121062
--- /dev/null
+++ b/drivers/xen/netback/netback.c
-@@ -0,0 +1,1609 @@
+@@ -0,0 +1,1855 @@
+/******************************************************************************
+ * drivers/xen/netback/netback.c
+ *
@@ -17972,6 +18816,7 @@
+
+#include <linux/tcp.h>
+#include <linux/udp.h>
++#include <linux/kthread.h>
+
+#include <xen/balloon.h>
+#include <xen/events.h>
@@ -17982,18 +18827,10 @@
+
+/*define NETBE_DEBUG_INTERRUPT*/
+
-+struct netbk_rx_meta {
-+ skb_frag_t frag;
-+ int id;
-+};
-+
-+struct netbk_tx_pending_inuse {
-+ struct list_head list;
-+ unsigned long alloc_time;
-+};
++struct xen_netbk *xen_netbk;
++int xen_netbk_group_nr;
+
-+
-+static void netif_idx_release(u16 pending_idx);
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
+static void make_tx_response(struct xen_netif *netif,
+ struct xen_netif_tx_request *txp,
+ s8 st);
@@ -18004,47 +18841,44 @@
+ u16 size,
+ u16 flags);
+
-+static void net_tx_action(unsigned long unused);
-+static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
-+
-+static void net_rx_action(unsigned long unused);
-+static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
-+
-+static struct timer_list net_timer;
-+static struct timer_list netbk_tx_pending_timer;
-+
-+#define MAX_PENDING_REQS 256
++static void net_tx_action(unsigned long data);
+
-+static struct sk_buff_head rx_queue;
++static void net_rx_action(unsigned long data);
+
-+static struct page **mmap_pages;
-+static inline unsigned long idx_to_pfn(unsigned int idx)
++static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
++ unsigned int idx)
+{
-+ return page_to_pfn(mmap_pages[idx]);
++ return page_to_pfn(netbk->mmap_pages[idx]);
+}
+
-+static inline unsigned long idx_to_kaddr(unsigned int idx)
++static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
++ unsigned int idx)
+{
-+ return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
++ return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
+}
+
+/* extra field used in struct page */
-+static inline void netif_set_page_index(struct page *pg, unsigned int index)
++static inline void netif_set_page_ext(struct page *pg, unsigned int group,
++ unsigned int idx)
+{
-+ *(unsigned long *)&pg->mapping = index + 1;
++ union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
++
++ BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
++ pg->mapping = ext.mapping;
+}
+
-+static inline int netif_page_index(struct page *pg)
++static inline unsigned int netif_page_group(const struct page *pg)
+{
-+ unsigned long idx = (unsigned long)pg->mapping - 1;
++ union page_ext ext = { .mapping = pg->mapping };
+
-+ if (!PageForeign(pg))
-+ return -1;
++ return ext.e.group - 1;
++}
+
-+ if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg))
-+ return -1;
++static inline unsigned int netif_page_index(const struct page *pg)
++{
++ union page_ext ext = { .mapping = pg->mapping };
+
-+ return idx;
++ return ext.e.idx;
+}
+
+/*
@@ -18055,46 +18889,17 @@
+ */
+#define PKT_PROT_LEN 72
+
-+static struct pending_tx_info {
-+ struct xen_netif_tx_request req;
-+ struct xen_netif *netif;
-+} pending_tx_info[MAX_PENDING_REQS];
-+static u16 pending_ring[MAX_PENDING_REQS];
-+typedef unsigned int pending_ring_idx_t;
-+
+static inline pending_ring_idx_t pending_index(unsigned i)
+{
+ return i & (MAX_PENDING_REQS-1);
+}
+
-+static pending_ring_idx_t pending_prod, pending_cons;
-+
-+static inline pending_ring_idx_t nr_pending_reqs(void)
++static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
+{
-+ return MAX_PENDING_REQS - pending_prod + pending_cons;
++ return MAX_PENDING_REQS -
++ netbk->pending_prod + netbk->pending_cons;
+}
+
-+/* Freed TX SKBs get batched on this ring before return to pending_ring. */
-+static u16 dealloc_ring[MAX_PENDING_REQS];
-+static pending_ring_idx_t dealloc_prod, dealloc_cons;
-+
-+/* Doubly-linked list of in-use pending entries. */
-+static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
-+static LIST_HEAD(pending_inuse_head);
-+
-+static struct sk_buff_head tx_queue;
-+
-+static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-+static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
-+static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
-+
-+static LIST_HEAD(net_schedule_list);
-+static DEFINE_SPINLOCK(net_schedule_list_lock);
-+
-+#define MAX_MFN_ALLOC 64
-+static unsigned long mfn_list[MAX_MFN_ALLOC];
-+static unsigned int alloc_index = 0;
-+
+/* Setting this allows the safe use of this driver without netloop. */
+static int MODPARM_copy_skb = 1;
+module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
@@ -18102,18 +18907,31 @@
+
+int netbk_copy_skb_mode;
+
-+static inline unsigned long alloc_mfn(void)
++static int MODPARM_netback_kthread;
++module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
++MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
++
++/*
++ * Netback bottom half handler.
++ * dir indicates the data direction.
++ * rx: 1, tx: 0.
++ */
++static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
+{
-+ BUG_ON(alloc_index == 0);
-+ return mfn_list[--alloc_index];
++ if (MODPARM_netback_kthread)
++ wake_up(&netbk->kthread.netbk_action_wq);
++ else if (dir)
++ tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
++ else
++ tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
+}
+
-+static inline void maybe_schedule_tx_action(void)
++static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
+{
+ smp_mb();
-+ if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
-+ !list_empty(&net_schedule_list))
-+ tasklet_schedule(&net_tx_tasklet);
++ if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
++ !list_empty(&netbk->net_schedule_list))
++ xen_netbk_bh_handler(netbk, 0);
+}
+
+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
@@ -18178,7 +18996,11 @@
+ len -= copy;
+ }
+
++#ifdef NET_SKBUFF_DATA_USES_OFFSET
++ offset = 0;
++#else
+ offset = nskb->data - skb->data;
++#endif
+
+ nskb->transport_header = skb->transport_header + offset;
+ nskb->network_header = skb->network_header + offset;
@@ -18194,7 +19016,7 @@
+
+static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+{
-+ if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
++ if (netif->can_sg || netif->gso || netif->gso_prefix)
+ return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
+ return 1; /* all in one */
+}
@@ -18215,12 +19037,60 @@
+ netif_wake_queue(netif->dev);
+}
+
++/* Figure out how many ring slots we're going to need to send @skb to
++ the guest. */
++static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
++{
++ unsigned count;
++ unsigned copy_off;
++ unsigned i;
++
++ copy_off = 0;
++ count = 1;
++
++ BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
++
++ copy_off = skb_headlen(skb);
++
++ if (skb_shinfo(skb)->gso_size)
++ count++;
++
++ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
++ unsigned long size = skb_shinfo(skb)->frags[i].size;
++ unsigned long bytes;
++ while (size > 0) {
++ BUG_ON(copy_off > MAX_BUFFER_OFFSET);
++
++ /* These checks are the same as in netbk_gop_frag_copy */
++ if (copy_off == MAX_BUFFER_OFFSET
++ || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) {
++ count++;
++ copy_off = 0;
++ }
++
++ bytes = size;
++ if (copy_off + bytes > MAX_BUFFER_OFFSET)
++ bytes = MAX_BUFFER_OFFSET - copy_off;
++
++ copy_off += bytes;
++ size -= bytes;
++ }
++ }
++ return count;
++}
++
+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct xen_netif *netif = netdev_priv(dev);
++ struct xen_netbk *netbk;
+
+ BUG_ON(skb->dev != dev);
+
++ if (netif->group == -1)
++ goto drop;
++
++ netbk = &xen_netbk[netif->group];
++
+ /* Drop the packet if the target domain has no receive buffers. */
+ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
+ goto drop;
@@ -18240,8 +19110,9 @@
+ skb = nskb;
+ }
+
-+ netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
-+ !!skb_shinfo(skb)->gso_size;
++ /* Reserve ring slots for the worst-case number of
++ * fragments. */
++ netif->rx_req_cons_peek += count_skb_slots(skb, netif);
+ netif_get(netif);
+
+ if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
@@ -18262,9 +19133,9 @@
+ mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
+ }
+ }
++ skb_queue_tail(&netbk->rx_queue, skb);
+
-+ skb_queue_tail(&rx_queue, skb);
-+ tasklet_schedule(&net_rx_tasklet);
++ xen_netbk_bh_handler(netbk, 1);
+
+ return 0;
+
@@ -18275,112 +19146,187 @@
+}
+
+struct netrx_pending_operations {
-+ unsigned trans_prod, trans_cons;
-+ unsigned mmu_prod, mmu_mcl;
-+ unsigned mcl_prod, mcl_cons;
+ unsigned copy_prod, copy_cons;
+ unsigned meta_prod, meta_cons;
-+ struct mmu_update *mmu;
-+ struct gnttab_transfer *trans;
+ struct gnttab_copy *copy;
-+ struct multicall_entry *mcl;
+ struct netbk_rx_meta *meta;
++ int copy_off;
++ grant_ref_t copy_gref;
+};
+
+/* Set up the grant operations for this fragment. If it's a flipping
+ interface, we also set up the unmap request from here. */
-+static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
-+ int i, struct netrx_pending_operations *npo,
-+ struct page *page, unsigned long size,
-+ unsigned long offset)
++
++static void netbk_gop_frag_copy(struct xen_netif *netif,
++ struct netrx_pending_operations *npo,
++ struct page *page, unsigned long size,
++ unsigned long offset, int head)
+{
+ struct gnttab_copy *copy_gop;
-+ struct xen_netif_rx_request *req;
-+ unsigned long old_mfn;
++ struct netbk_rx_meta *meta;
++ int group = netif_page_group(page);
+ int idx = netif_page_index(page);
++ unsigned long bytes;
+
-+ old_mfn = virt_to_mfn(page_address(page));
++ /* Data must not cross a page boundary. */
++ BUG_ON(size + offset > PAGE_SIZE);
+
-+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
++ meta = npo->meta + npo->meta_prod - 1;
++
++ while (size > 0) {
++ BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
++
++ /*
++ * Move to a new receive buffer if:
++ *
++ * simple case: we have completely filled the current buffer.
++ *
++ * complex case: the current frag would overflow
++ * the current buffer but only if:
++ * (i) this frag would fit completely in the next buffer
++ * and (ii) there is already some data in the current buffer
++ * and (iii) this is not the head buffer.
++ *
++ * Where:
++ * - (i) stops us splitting a frag into two copies
++ * unless the frag is too large for a single buffer.
++ * - (ii) stops us from leaving a buffer pointlessly empty.
++ * - (iii) stops us leaving the first buffer
++ * empty. Strictly speaking this is already covered
++ * by (ii) but is explicitly checked because
++ * netfront relies on the first buffer being
++ * non-empty and can crash otherwise.
++ *
++ * This means we will effectively linearise small
++ * frags but do not needlessly split large buffers
++ * into multiple copies tend to give large frags their
++ * own buffers as before.
++ */
++ if (npo->copy_off == MAX_BUFFER_OFFSET
++ || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) {
++ struct xen_netif_rx_request *req;
++
++ BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
++ /* Overflowed this request, go to the next one */
++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++ meta = npo->meta + npo->meta_prod++;
++ meta->gso_size = 0;
++ meta->size = 0;
++ meta->id = req->id;
++ npo->copy_off = 0;
++ npo->copy_gref = req->gref;
++ }
++
++ bytes = size;
++ if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
++ bytes = MAX_BUFFER_OFFSET - npo->copy_off;
++
++ copy_gop = npo->copy + npo->copy_prod++;
++ copy_gop->flags = GNTCOPY_dest_gref;
++ if (PageForeign(page)) {
++ struct xen_netbk *netbk = &xen_netbk[group];
++ struct pending_tx_info *src_pend;
++
++ src_pend = &netbk->pending_tx_info[idx];
++
++ copy_gop->source.domid = src_pend->netif->domid;
++ copy_gop->source.u.ref = src_pend->req.gref;
++ copy_gop->flags |= GNTCOPY_source_gref;
++ } else {
++ copy_gop->source.domid = DOMID_SELF;
++ copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
++ }
++ copy_gop->source.offset = offset;
++ copy_gop->dest.domid = netif->domid;
+
-+ copy_gop = npo->copy + npo->copy_prod++;
-+ copy_gop->flags = GNTCOPY_dest_gref;
-+ if (idx > -1) {
-+ struct pending_tx_info *src_pend = &pending_tx_info[idx];
-+ copy_gop->source.domid = src_pend->netif->domid;
-+ copy_gop->source.u.ref = src_pend->req.gref;
-+ copy_gop->flags |= GNTCOPY_source_gref;
-+ } else {
-+ copy_gop->source.domid = DOMID_SELF;
-+ copy_gop->source.u.gmfn = old_mfn;
++ copy_gop->dest.offset = npo->copy_off;
++ copy_gop->dest.u.ref = npo->copy_gref;
++ copy_gop->len = bytes;
++
++ npo->copy_off += bytes;
++ meta->size += bytes;
++
++ offset += bytes;
++ size -= bytes;
++ head = 0; /* Must be something in this buffer now */
+ }
-+ copy_gop->source.offset = offset;
-+ copy_gop->dest.domid = netif->domid;
-+ copy_gop->dest.offset = 0;
-+ copy_gop->dest.u.ref = req->gref;
-+ copy_gop->len = size;
-+
-+ return req->id;
+}
+
-+static void netbk_gop_skb(struct sk_buff *skb,
-+ struct netrx_pending_operations *npo)
++/* Prepare an SKB to be transmitted to the frontend. This is
++ responsible for allocating grant operations, meta structures, etc.
++ It returns the number of meta structures consumed. The number of
++ ring slots used is always equal to the number of meta slots used
++ plus the number of GSO descriptors used. Currently, we use either
++ zero GSO descriptors (for non-GSO packets) or one descriptor (for
++ frontend-side LRO). */
++static int netbk_gop_skb(struct sk_buff *skb,
++ struct netrx_pending_operations *npo)
+{
+ struct xen_netif *netif = netdev_priv(skb->dev);
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ int i;
-+ int extra;
-+ struct netbk_rx_meta *head_meta, *meta;
++ struct xen_netif_rx_request *req;
++ struct netbk_rx_meta *meta;
++ int old_meta_prod;
+
-+ head_meta = npo->meta + npo->meta_prod++;
-+ head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
-+ head_meta->frag.size = skb_shinfo(skb)->gso_size;
-+ extra = !!head_meta->frag.size + 1;
++ old_meta_prod = npo->meta_prod;
+
-+ for (i = 0; i < nr_frags; i++) {
++ /* Set up a GSO prefix descriptor, if necessary */
++ if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+ meta = npo->meta + npo->meta_prod++;
-+ meta->frag = skb_shinfo(skb)->frags[i];
-+ meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
-+ meta->frag.page,
-+ meta->frag.size,
-+ meta->frag.page_offset);
++ meta->gso_size = skb_shinfo(skb)->gso_size;
++ meta->size = 0;
++ meta->id = req->id;
+ }
+
-+ /*
-+ * This must occur at the end to ensure that we don't trash skb_shinfo
-+ * until we're done. We know that the head doesn't cross a page
-+ * boundary because such packets get copied in netif_be_start_xmit.
-+ */
-+ head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
-+ virt_to_page(skb->data),
-+ skb_headlen(skb),
-+ offset_in_page(skb->data));
++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++ meta = npo->meta + npo->meta_prod++;
+
-+ netif->rx.req_cons += nr_frags + extra;
-+}
++ if (!netif->gso_prefix)
++ meta->gso_size = skb_shinfo(skb)->gso_size;
++ else
++ meta->gso_size = 0;
+
-+static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
-+{
-+ int i;
++ meta->size = 0;
++ meta->id = req->id;
++ npo->copy_off = 0;
++ npo->copy_gref = req->gref;
++
++ netbk_gop_frag_copy(netif,
++ npo, virt_to_page(skb->data),
++ skb_headlen(skb),
++ offset_in_page(skb->data), 1);
++
++ /* Leave a gap for the GSO descriptor. */
++ if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
++ netif->rx.req_cons++;
++
++ for (i = 0; i < nr_frags; i++) {
++ netbk_gop_frag_copy(netif, npo,
++ skb_shinfo(skb)->frags[i].page,
++ skb_shinfo(skb)->frags[i].size,
++ skb_shinfo(skb)->frags[i].page_offset,
++ 0);
++ }
+
-+ for (i = 0; i < nr_frags; i++)
-+ put_page(meta[i].frag.page);
++ return npo->meta_prod - old_meta_prod;
+}
+
+/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
+ used to set up the operations on the top of
+ netrx_pending_operations, which have since been done. Check that
+ they didn't give any errors and advance over them. */
-+static int netbk_check_gop(int nr_frags, domid_t domid,
++static int netbk_check_gop(int nr_meta_slots, domid_t domid,
+ struct netrx_pending_operations *npo)
+{
+ struct gnttab_copy *copy_op;
+ int status = NETIF_RSP_OKAY;
+ int i;
+
-+ for (i = 0; i <= nr_frags; i++) {
-+ copy_op = npo->copy + npo->copy_cons++;
-+ if (copy_op->status != GNTST_okay) {
++ for (i = 0; i < nr_meta_slots; i++) {
++ copy_op = npo->copy + npo->copy_cons++;
++ if (copy_op->status != GNTST_okay) {
+ DPRINTK("Bad status %d from copy to DOM%d.\n",
+ copy_op->status, domid);
+ status = NETIF_RSP_ERROR;
@@ -18391,28 +19337,36 @@
+}
+
+static void netbk_add_frag_responses(struct xen_netif *netif, int status,
-+ struct netbk_rx_meta *meta, int nr_frags)
++ struct netbk_rx_meta *meta,
++ int nr_meta_slots)
+{
+ int i;
+ unsigned long offset;
+
-+ for (i = 0; i < nr_frags; i++) {
-+ int id = meta[i].id;
-+ int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
-+
++ for (i = 0; i < nr_meta_slots; i++) {
++ int flags;
++ if (i == nr_meta_slots - 1)
++ flags = 0;
++ else
++ flags = NETRXF_more_data;
++
+ offset = 0;
-+ make_rx_response(netif, id, status, offset,
-+ meta[i].frag.size, flags);
++ make_rx_response(netif, meta[i].id, status, offset,
++ meta[i].size, flags);
+ }
+}
+
-+static void net_rx_action(unsigned long unused)
++struct skb_cb_overlay {
++ int meta_slots_used;
++};
++
++static void net_rx_action(unsigned long data)
+{
+ struct xen_netif *netif = NULL;
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
+ s8 status;
-+ u16 id, irq, flags;
++ u16 irq, flags;
+ struct xen_netif_rx_response *resp;
-+ struct multicall_entry *mcl;
+ struct sk_buff_head rxq;
+ struct sk_buff *skb;
+ int notify_nr = 0;
@@ -18420,35 +19374,23 @@
+ int nr_frags;
+ int count;
+ unsigned long offset;
-+
-+ /*
-+ * Putting hundreds of bytes on the stack is considered rude.
-+ * Static works because a tasklet can only be on one CPU at any time.
-+ */
-+ static struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
-+ static struct mmu_update rx_mmu[NET_RX_RING_SIZE];
-+ static struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
-+ static struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
-+ static unsigned char rx_notify[NR_IRQS];
-+ static u16 notify_list[NET_RX_RING_SIZE];
-+ static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++ struct skb_cb_overlay *sco;
+
+ struct netrx_pending_operations npo = {
-+ mmu: rx_mmu,
-+ trans: grant_trans_op,
-+ copy: grant_copy_op,
-+ mcl: rx_mcl,
-+ meta: meta};
++ .copy = netbk->grant_copy_op,
++ .meta = netbk->meta,
++ };
+
+ skb_queue_head_init(&rxq);
+
+ count = 0;
+
-+ while ((skb = skb_dequeue(&rx_queue)) != NULL) {
++ while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
++ netif = netdev_priv(skb->dev);
+ nr_frags = skb_shinfo(skb)->nr_frags;
-+ *(int *)skb->cb = nr_frags;
+
-+ netbk_gop_skb(skb, &npo);
++ sco = (struct skb_cb_overlay *)skb->cb;
++ sco->meta_slots_used = netbk_gop_skb(skb, &npo);
+
+ count += nr_frags + 1;
+
@@ -18459,65 +19401,46 @@
+ break;
+ }
+
-+ BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
-+
-+ npo.mmu_mcl = npo.mcl_prod;
-+ if (npo.mcl_prod) {
-+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-+ BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
-+ mcl = npo.mcl + npo.mcl_prod++;
-+
-+ BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
-+ mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-+
-+ mcl->op = __HYPERVISOR_mmu_update;
-+ mcl->args[0] = (unsigned long)rx_mmu;
-+ mcl->args[1] = npo.mmu_prod;
-+ mcl->args[2] = 0;
-+ mcl->args[3] = DOMID_SELF;
-+ }
-+
-+ if (npo.trans_prod) {
-+ BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
-+ mcl = npo.mcl + npo.mcl_prod++;
-+ mcl->op = __HYPERVISOR_grant_table_op;
-+ mcl->args[0] = GNTTABOP_transfer;
-+ mcl->args[1] = (unsigned long)grant_trans_op;
-+ mcl->args[2] = npo.trans_prod;
-+ }
-+
-+ if (npo.copy_prod) {
-+ BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
-+ mcl = npo.mcl + npo.mcl_prod++;
-+ mcl->op = __HYPERVISOR_grant_table_op;
-+ mcl->args[0] = GNTTABOP_copy;
-+ mcl->args[1] = (unsigned long)grant_copy_op;
-+ mcl->args[2] = npo.copy_prod;
-+ }
++ BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
+
-+ /* Nothing to do? */
-+ if (!npo.mcl_prod)
++ if (!npo.copy_prod)
+ return;
+
-+ BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
-+
-+ ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
++ BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
++ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
++ npo.copy_prod);
+ BUG_ON(ret != 0);
-+ /* The mmu_machphys_update() must not fail. */
-+ BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
+
+ while ((skb = __skb_dequeue(&rxq)) != NULL) {
-+ nr_frags = *(int *)skb->cb;
++ sco = (struct skb_cb_overlay *)skb->cb;
+
+ netif = netdev_priv(skb->dev);
+
++ if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
++ resp = RING_GET_RESPONSE(&netif->rx,
++ netif->rx.rsp_prod_pvt++);
++
++ resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
++
++ resp->offset = netbk->meta[npo.meta_cons].gso_size;
++ resp->id = netbk->meta[npo.meta_cons].id;
++ resp->status = sco->meta_slots_used;
++
++ npo.meta_cons++;
++ sco->meta_slots_used--;
++ }
++
++
+ netif->stats.tx_bytes += skb->len;
+ netif->stats.tx_packets++;
+
-+ status = netbk_check_gop(nr_frags, netif->domid, &npo);
++ status = netbk_check_gop(sco->meta_slots_used,
++ netif->domid, &npo);
+
-+ id = meta[npo.meta_cons].id;
-+ flags = nr_frags ? NETRXF_more_data : 0;
++ if (sco->meta_slots_used == 1)
++ flags = 0;
++ else
++ flags = NETRXF_more_data;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+ flags |= NETRXF_csum_blank | NETRXF_data_validated;
@@ -18526,10 +19449,12 @@
+ flags |= NETRXF_data_validated;
+
+ offset = 0;
-+ resp = make_rx_response(netif, id, status, offset,
-+ skb_headlen(skb), flags);
++ resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
++ status, offset,
++ netbk->meta[npo.meta_cons].size,
++ flags);
+
-+ if (meta[npo.meta_cons].frag.size) {
++ if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
+ struct xen_netif_extra_info *gso =
+ (struct xen_netif_extra_info *)
+ RING_GET_RESPONSE(&netif->rx,
@@ -18537,7 +19462,7 @@
+
+ resp->flags |= NETRXF_extra_info;
+
-+ gso->u.gso.size = meta[npo.meta_cons].frag.size;
++ gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+ gso->u.gso.pad = 0;
+ gso->u.gso.features = 0;
@@ -18546,16 +19471,18 @@
+ gso->flags = 0;
+ }
+
-+ netbk_add_frag_responses(netif, status,
-+ meta + npo.meta_cons + 1,
-+ nr_frags);
++ if (sco->meta_slots_used > 1) {
++ netbk_add_frag_responses(netif, status,
++ netbk->meta + npo.meta_cons + 1,
++ sco->meta_slots_used - 1);
++ }
+
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+ irq = netif->irq;
-+ if (ret && !rx_notify[irq] &&
++ if (ret && !netbk->rx_notify[irq] &&
+ (netif->smart_poll != 1)) {
-+ rx_notify[irq] = 1;
-+ notify_list[notify_nr++] = irq;
++ netbk->rx_notify[irq] = 1;
++ netbk->notify_list[notify_nr++] = irq;
+ }
+
+ if (netif_queue_stopped(netif->dev) &&
@@ -18567,37 +19494,39 @@
+ * netfront_smartpoll_active indicates whether
+ * netfront timer is active.
+ */
-+ if ((netif->smart_poll == 1)) {
-+ if (!(netif->rx.sring->netfront_smartpoll_active)) {
-+ notify_remote_via_irq(irq);
-+ netif->rx.sring->netfront_smartpoll_active = 1;
-+ }
++ if ((netif->smart_poll == 1) &&
++ !(netif->rx.sring->private.netif.smartpoll_active)) {
++ notify_remote_via_irq(irq);
++ netif->rx.sring->private.netif.smartpoll_active = 1;
+ }
+
+ netif_put(netif);
++ npo.meta_cons += sco->meta_slots_used;
+ dev_kfree_skb(skb);
-+ npo.meta_cons += nr_frags + 1;
+ }
+
+ while (notify_nr != 0) {
-+ irq = notify_list[--notify_nr];
-+ rx_notify[irq] = 0;
++ irq = netbk->notify_list[--notify_nr];
++ netbk->rx_notify[irq] = 0;
+ notify_remote_via_irq(irq);
+ }
+
+ /* More work to do? */
-+ if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
-+ tasklet_schedule(&net_rx_tasklet);
++ if (!skb_queue_empty(&netbk->rx_queue) &&
++ !timer_pending(&netbk->net_timer))
++ xen_netbk_bh_handler(netbk, 1);
+}
+
-+static void net_alarm(unsigned long unused)
++static void net_alarm(unsigned long data)
+{
-+ tasklet_schedule(&net_rx_tasklet);
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
++ xen_netbk_bh_handler(netbk, 1);
+}
+
-+static void netbk_tx_pending_timeout(unsigned long unused)
++static void netbk_tx_pending_timeout(unsigned long data)
+{
-+ tasklet_schedule(&net_tx_tasklet);
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
++ xen_netbk_bh_handler(netbk, 0);
+}
+
+struct net_device_stats *netif_be_get_stats(struct net_device *dev)
@@ -18613,37 +19542,40 @@
+
+static void remove_from_net_schedule_list(struct xen_netif *netif)
+{
-+ spin_lock_irq(&net_schedule_list_lock);
++ struct xen_netbk *netbk = &xen_netbk[netif->group];
++ spin_lock_irq(&netbk->net_schedule_list_lock);
+ if (likely(__on_net_schedule_list(netif))) {
+ list_del_init(&netif->list);
+ netif_put(netif);
+ }
-+ spin_unlock_irq(&net_schedule_list_lock);
++ spin_unlock_irq(&netbk->net_schedule_list_lock);
+}
+
+static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+{
++ struct xen_netbk *netbk = &xen_netbk[netif->group];
+ if (__on_net_schedule_list(netif))
+ return;
+
-+ spin_lock_irq(&net_schedule_list_lock);
++ spin_lock_irq(&netbk->net_schedule_list_lock);
+ if (!__on_net_schedule_list(netif) &&
+ likely(netif_schedulable(netif))) {
-+ list_add_tail(&netif->list, &net_schedule_list);
++ list_add_tail(&netif->list, &netbk->net_schedule_list);
+ netif_get(netif);
+ }
-+ spin_unlock_irq(&net_schedule_list_lock);
++ spin_unlock_irq(&netbk->net_schedule_list_lock);
+}
+
+void netif_schedule_work(struct xen_netif *netif)
+{
++ struct xen_netbk *netbk = &xen_netbk[netif->group];
+ int more_to_do;
+
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+
+ if (more_to_do) {
+ add_to_net_schedule_list_tail(netif);
-+ maybe_schedule_tx_action();
++ maybe_schedule_tx_action(netbk);
+ }
+}
+
@@ -18680,13 +19612,15 @@
+ netif_schedule_work(netif);
+}
+
-+static inline int copy_pending_req(pending_ring_idx_t pending_idx)
++static inline int copy_pending_req(struct xen_netbk *netbk,
++ pending_ring_idx_t pending_idx)
+{
-+ return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
-+ &mmap_pages[pending_idx]);
++ return gnttab_copy_grant_page(
++ netbk->grant_tx_handle[pending_idx],
++ &netbk->mmap_pages[pending_idx]);
+}
+
-+inline static void net_tx_action_dealloc(void)
++static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+{
+ struct netbk_tx_pending_inuse *inuse, *n;
+ struct gnttab_unmap_grant_ref *gop;
@@ -18696,49 +19630,56 @@
+ int ret;
+ LIST_HEAD(list);
+
-+ dc = dealloc_cons;
-+ gop = tx_unmap_ops;
++ dc = netbk->dealloc_cons;
++ gop = netbk->tx_unmap_ops;
+
+ /*
+ * Free up any grants we have finished using
+ */
+ do {
-+ dp = dealloc_prod;
++ dp = netbk->dealloc_prod;
+
+ /* Ensure we see all indices enqueued by netif_idx_release(). */
+ smp_rmb();
+
+ while (dc != dp) {
+ unsigned long pfn;
++ struct netbk_tx_pending_inuse *pending_inuse =
++ netbk->pending_inuse;
+
-+ pending_idx = dealloc_ring[pending_index(dc++)];
++ pending_idx = netbk->dealloc_ring[pending_index(dc++)];
+ list_move_tail(&pending_inuse[pending_idx].list, &list);
+
-+ pfn = idx_to_pfn(pending_idx);
++ pfn = idx_to_pfn(netbk, pending_idx);
+ /* Already unmapped? */
+ if (!phys_to_machine_mapping_valid(pfn))
+ continue;
+
-+ gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
-+ GNTMAP_host_map,
-+ grant_tx_handle[pending_idx]);
++ gnttab_set_unmap_op(gop,
++ idx_to_kaddr(netbk, pending_idx),
++ GNTMAP_host_map,
++ netbk->grant_tx_handle[pending_idx]);
+ gop++;
+ }
+
+ if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
-+ list_empty(&pending_inuse_head))
++ list_empty(&netbk->pending_inuse_head))
+ break;
+
+ /* Copy any entries that have been pending for too long. */
-+ list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
++ list_for_each_entry_safe(inuse, n,
++ &netbk->pending_inuse_head, list) {
++ struct pending_tx_info *pending_tx_info;
++ pending_tx_info = netbk->pending_tx_info;
++
+ if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+ break;
+
-+ pending_idx = inuse - pending_inuse;
++ pending_idx = inuse - netbk->pending_inuse;
+
+ pending_tx_info[pending_idx].netif->nr_copied_skbs++;
+
-+ switch (copy_pending_req(pending_idx)) {
++ switch (copy_pending_req(netbk, pending_idx)) {
+ case 0:
+ list_move_tail(&inuse->list, &list);
+ continue;
@@ -18751,16 +19692,21 @@
+
+ break;
+ }
-+ } while (dp != dealloc_prod);
++ } while (dp != netbk->dealloc_prod);
+
-+ dealloc_cons = dc;
++ netbk->dealloc_cons = dc;
+
+ ret = HYPERVISOR_grant_table_op(
-+ GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
++ GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
++ gop - netbk->tx_unmap_ops);
+ BUG_ON(ret);
+
+ list_for_each_entry_safe(inuse, n, &list, list) {
-+ pending_idx = inuse - pending_inuse;
++ struct pending_tx_info *pending_tx_info;
++ pending_ring_idx_t index;
++
++ pending_tx_info = netbk->pending_tx_info;
++ pending_idx = inuse - netbk->pending_inuse;
+
+ netif = pending_tx_info[pending_idx].netif;
+
@@ -18768,9 +19714,10 @@
+ NETIF_RSP_OKAY);
+
+ /* Ready for next use. */
-+ gnttab_reset_grant_page(mmap_pages[pending_idx]);
++ gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
+
-+ pending_ring[pending_index(pending_prod++)] = pending_idx;
++ index = pending_index(netbk->pending_prod++);
++ netbk->pending_ring[index] = pending_idx;
+
+ netif_put(netif);
+
@@ -18778,7 +19725,8 @@
+ }
+}
+
-+static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *txp, RING_IDX end)
++static void netbk_tx_err(struct xen_netif *netif,
++ struct xen_netif_tx_request *txp, RING_IDX end)
+{
+ RING_IDX cons = netif->tx.req_cons;
+
@@ -18834,7 +19782,8 @@
+ return frags;
+}
+
-+static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
++static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
++ struct xen_netif *netif,
+ struct sk_buff *skb,
+ struct xen_netif_tx_request *txp,
+ struct gnttab_map_grant_ref *mop)
@@ -18848,9 +19797,14 @@
+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+ for (i = start; i < shinfo->nr_frags; i++, txp++) {
-+ pending_idx = pending_ring[pending_index(pending_cons++)];
++ pending_ring_idx_t index;
++ struct pending_tx_info *pending_tx_info =
++ netbk->pending_tx_info;
++
++ index = pending_index(netbk->pending_cons++);
++ pending_idx = netbk->pending_ring[index];
+
-+ gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
++ gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
+ GNTMAP_host_map | GNTMAP_readonly,
+ txp->gref, netif->domid);
+
@@ -18863,11 +19817,13 @@
+ return mop;
+}
+
-+static int netbk_tx_check_mop(struct sk_buff *skb,
-+ struct gnttab_map_grant_ref **mopp)
++static int netbk_tx_check_mop(struct xen_netbk *netbk,
++ struct sk_buff *skb,
++ struct gnttab_map_grant_ref **mopp)
+{
+ struct gnttab_map_grant_ref *mop = *mopp;
+ int pending_idx = *((u16 *)skb->data);
++ struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
+ struct xen_netif *netif = pending_tx_info[pending_idx].netif;
+ struct xen_netif_tx_request *txp;
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -18877,15 +19833,17 @@
+ /* Check status of header. */
+ err = mop->status;
+ if (unlikely(err)) {
++ pending_ring_idx_t index;
++ index = pending_index(netbk->pending_prod++);
+ txp = &pending_tx_info[pending_idx].req;
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+ pending_ring[pending_index(pending_prod++)] = pending_idx;
++ netbk->pending_ring[index] = pending_idx;
+ netif_put(netif);
+ } else {
+ set_phys_to_machine(
-+ __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
++ __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
+ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
-+ grant_tx_handle[pending_idx] = mop->handle;
++ netbk->grant_tx_handle[pending_idx] = mop->handle;
+ }
+
+ /* Skip first skb fragment if it is on same page as header fragment. */
@@ -18893,26 +19851,30 @@
+
+ for (i = start; i < nr_frags; i++) {
+ int j, newerr;
++ pending_ring_idx_t index;
+
+ pending_idx = (unsigned long)shinfo->frags[i].page;
+
+ /* Check error status: if okay then remember grant handle. */
+ newerr = (++mop)->status;
+ if (likely(!newerr)) {
++ unsigned long addr;
++ addr = idx_to_kaddr(netbk, pending_idx);
+ set_phys_to_machine(
-+ __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
++ __pa(addr)>>PAGE_SHIFT,
+ FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
-+ grant_tx_handle[pending_idx] = mop->handle;
++ netbk->grant_tx_handle[pending_idx] = mop->handle;
+ /* Had a previous error? Invalidate this fragment. */
+ if (unlikely(err))
-+ netif_idx_release(pending_idx);
++ netif_idx_release(netbk, pending_idx);
+ continue;
+ }
+
+ /* Error on this fragment: respond to client with an error. */
-+ txp = &pending_tx_info[pending_idx].req;
++ txp = &netbk->pending_tx_info[pending_idx].req;
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+ pending_ring[pending_index(pending_prod++)] = pending_idx;
++ index = pending_index(netbk->pending_prod++);
++ netbk->pending_ring[index] = pending_idx;
+ netif_put(netif);
+
+ /* Not the first error? Preceding frags already invalidated. */
@@ -18921,10 +19883,10 @@
+
+ /* First error: invalidate header and preceding fragments. */
+ pending_idx = *((u16 *)skb->data);
-+ netif_idx_release(pending_idx);
++ netif_idx_release(netbk, pending_idx);
+ for (j = start; j < i; j++) {
+ pending_idx = (unsigned long)shinfo->frags[i].page;
-+ netif_idx_release(pending_idx);
++ netif_idx_release(netbk, pending_idx);
+ }
+
+ /* Remember the error: invalidate all subsequent fragments. */
@@ -18935,7 +19897,7 @@
+ return err;
+}
+
-+static void netbk_fill_frags(struct sk_buff *skb)
++static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int nr_frags = shinfo->nr_frags;
@@ -18948,12 +19910,12 @@
+
+ pending_idx = (unsigned long)frag->page;
+
-+ pending_inuse[pending_idx].alloc_time = jiffies;
-+ list_add_tail(&pending_inuse[pending_idx].list,
-+ &pending_inuse_head);
++ netbk->pending_inuse[pending_idx].alloc_time = jiffies;
++ list_add_tail(&netbk->pending_inuse[pending_idx].list,
++ &netbk->pending_inuse_head);
+
-+ txp = &pending_tx_info[pending_idx].req;
-+ frag->page = virt_to_page(idx_to_kaddr(pending_idx));
++ txp = &netbk->pending_tx_info[pending_idx].req;
++ frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
+ frag->size = txp->size;
+ frag->page_offset = txp->offset;
+
@@ -19085,15 +20047,15 @@
+ return false;
+}
+
-+static unsigned net_tx_build_mops(void)
++static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+{
+ struct gnttab_map_grant_ref *mop;
+ struct sk_buff *skb;
+ int ret;
+
-+ mop = tx_map_ops;
-+ while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-+ !list_empty(&net_schedule_list)) {
++ mop = netbk->tx_map_ops;
++ while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++ !list_empty(&netbk->net_schedule_list)) {
+ struct xen_netif *netif;
+ struct xen_netif_tx_request txreq;
+ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
@@ -19102,9 +20064,11 @@
+ RING_IDX idx;
+ int work_to_do;
+ unsigned int data_len;
++ pending_ring_idx_t index;
+
+ /* Get a netif from the list with work to do. */
-+ netif = list_first_entry(&net_schedule_list, struct xen_netif, list);
++ netif = list_first_entry(&netbk->net_schedule_list,
++ struct xen_netif, list);
+ netif_get(netif);
+ remove_from_net_schedule_list(netif);
+
@@ -19163,7 +20127,8 @@
+ continue;
+ }
+
-+ pending_idx = pending_ring[pending_index(pending_cons)];
++ index = pending_index(netbk->pending_cons);
++ pending_idx = netbk->pending_ring[index];
+
+ data_len = (txreq.size > PKT_PROT_LEN &&
+ ret < MAX_SKB_FRAGS) ?
@@ -19191,14 +20156,14 @@
+ }
+ }
+
-+ gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
++ gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
+ GNTMAP_host_map | GNTMAP_readonly,
+ txreq.gref, netif->domid);
+ mop++;
+
-+ memcpy(&pending_tx_info[pending_idx].req,
++ memcpy(&netbk->pending_tx_info[pending_idx].req,
+ &txreq, sizeof(txreq));
-+ pending_tx_info[pending_idx].netif = netif;
++ netbk->pending_tx_info[pending_idx].netif = netif;
+ *((u16 *)skb->data) = pending_idx;
+
+ __skb_put(skb, data_len);
@@ -19213,40 +20178,40 @@
+ skb_shinfo(skb)->frags[0].page = (void *)~0UL;
+ }
+
-+ __skb_queue_tail(&tx_queue, skb);
++ __skb_queue_tail(&netbk->tx_queue, skb);
+
-+ pending_cons++;
++ netbk->pending_cons++;
+
-+ mop = netbk_get_requests(netif, skb, txfrags, mop);
++ mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
+
+ netif->tx.req_cons = idx;
+ netif_schedule_work(netif);
+
-+ if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
++ if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
+ break;
+ }
+
-+ return mop - tx_map_ops;
++ return mop - netbk->tx_map_ops;
+}
+
-+static void net_tx_submit(void)
++static void net_tx_submit(struct xen_netbk *netbk)
+{
+ struct gnttab_map_grant_ref *mop;
+ struct sk_buff *skb;
+
-+ mop = tx_map_ops;
-+ while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
++ mop = netbk->tx_map_ops;
++ while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
+ struct xen_netif_tx_request *txp;
+ struct xen_netif *netif;
+ u16 pending_idx;
+ unsigned data_len;
+
+ pending_idx = *((u16 *)skb->data);
-+ netif = pending_tx_info[pending_idx].netif;
-+ txp = &pending_tx_info[pending_idx].req;
++ netif = netbk->pending_tx_info[pending_idx].netif;
++ txp = &netbk->pending_tx_info[pending_idx].req;
+
+ /* Check the remap error code. */
-+ if (unlikely(netbk_tx_check_mop(skb, &mop))) {
++ if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
+ DPRINTK("netback grant failed.\n");
+ skb_shinfo(skb)->nr_frags = 0;
+ kfree_skb(skb);
@@ -19255,7 +20220,7 @@
+
+ data_len = skb->len;
+ memcpy(skb->data,
-+ (void *)(idx_to_kaddr(pending_idx)|txp->offset),
++ (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
+ data_len);
+ if (data_len < txp->size) {
+ /* Append the packet payload as a fragment. */
@@ -19263,7 +20228,7 @@
+ txp->size -= data_len;
+ } else {
+ /* Schedule a response immediately. */
-+ netif_idx_release(pending_idx);
++ netif_idx_release(netbk, pending_idx);
+ }
+
+ if (txp->flags & NETTXF_csum_blank)
@@ -19271,7 +20236,7 @@
+ else if (txp->flags & NETTXF_data_validated)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
-+ netbk_fill_frags(skb);
++ netbk_fill_frags(netbk, skb);
+
+ /*
+ * If the initial fragment was < PKT_PROT_LEN then
@@ -19304,70 +20269,83 @@
+ continue;
+ }
+
-+ netif_rx(skb);
++ netif_rx_ni(skb);
+ netif->dev->last_rx = jiffies;
+ }
-+
-+ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-+ !list_empty(&pending_inuse_head)) {
-+ struct netbk_tx_pending_inuse *oldest;
-+
-+ oldest = list_entry(pending_inuse_head.next,
-+ struct netbk_tx_pending_inuse, list);
-+ mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
-+ }
+}
+
+/* Called after netfront has transmitted */
-+static void net_tx_action(unsigned long unused)
++static void net_tx_action(unsigned long data)
+{
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
+ unsigned nr_mops;
+ int ret;
+
-+ if (dealloc_cons != dealloc_prod)
-+ net_tx_action_dealloc();
++ net_tx_action_dealloc(netbk);
+
-+ nr_mops = net_tx_build_mops();
++ nr_mops = net_tx_build_mops(netbk);
+
+ if (nr_mops == 0)
-+ return;
++ goto out;
+
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-+ tx_map_ops, nr_mops);
++ netbk->tx_map_ops, nr_mops);
+ BUG_ON(ret);
+
-+ net_tx_submit();
++ net_tx_submit(netbk);
++out:
++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++ !list_empty(&netbk->pending_inuse_head)) {
++ struct netbk_tx_pending_inuse *oldest;
++
++ oldest = list_entry(netbk->pending_inuse_head.next,
++ struct netbk_tx_pending_inuse, list);
++ mod_timer(&netbk->netbk_tx_pending_timer,
++ oldest->alloc_time + HZ);
++ }
+}
+
-+static void netif_idx_release(u16 pending_idx)
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+{
+ static DEFINE_SPINLOCK(_lock);
+ unsigned long flags;
++ pending_ring_idx_t index;
+
+ spin_lock_irqsave(&_lock, flags);
-+ dealloc_ring[pending_index(dealloc_prod)] = pending_idx;
++ index = pending_index(netbk->dealloc_prod);
++ netbk->dealloc_ring[index] = pending_idx;
+ /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+ smp_wmb();
-+ dealloc_prod++;
++ netbk->dealloc_prod++;
+ spin_unlock_irqrestore(&_lock, flags);
+
-+ tasklet_schedule(&net_tx_tasklet);
++ xen_netbk_bh_handler(netbk, 0);
+}
+
+static void netif_page_release(struct page *page, unsigned int order)
+{
++ int group = netif_page_group(page);
+ int idx = netif_page_index(page);
++ struct xen_netbk *netbk = &xen_netbk[group];
+ BUG_ON(order);
-+ BUG_ON(idx < 0);
-+ netif_idx_release(idx);
++ BUG_ON(group < 0 || group >= xen_netbk_group_nr);
++ BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
++ BUG_ON(netbk->mmap_pages[idx] != page);
++ netif_idx_release(netbk, idx);
+}
+
+irqreturn_t netif_be_int(int irq, void *dev_id)
+{
+ struct xen_netif *netif = dev_id;
++ struct xen_netbk *netbk;
++
++ if (netif->group == -1)
++ return IRQ_NONE;
++
++ netbk = &xen_netbk[netif->group];
+
+ add_to_net_schedule_list_tail(netif);
-+ maybe_schedule_tx_action();
++ maybe_schedule_tx_action(netbk);
+
+ if (netif_schedulable(netif) && !netbk_queue_full(netif))
+ netif_wake_queue(netif->dev);
@@ -19398,9 +20376,9 @@
+ * is active.
+ */
+ if ((netif->smart_poll == 1)) {
-+ if (!(netif->rx.sring->netfront_smartpoll_active)) {
++ if (!(netif->rx.sring->private.netif.smartpoll_active)) {
+ notify_remote_via_irq(netif->irq);
-+ netif->rx.sring->netfront_smartpoll_active = 1;
++ netif->rx.sring->private.netif.smartpoll_active = 1;
+ }
+ } else if (notify)
+ notify_remote_via_irq(netif->irq);
@@ -19435,75 +20413,180 @@
+ struct list_head *ent;
+ struct xen_netif *netif;
+ int i = 0;
++ int group = 0;
+
+ printk(KERN_ALERT "netif_schedule_list:\n");
-+ spin_lock_irq(&net_schedule_list_lock);
+
-+ list_for_each (ent, &net_schedule_list) {
-+ netif = list_entry(ent, struct xen_netif, list);
-+ printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
-+ "rx_resp_prod=%08x\n",
-+ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
-+ printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
-+ netif->tx.req_cons, netif->tx.rsp_prod_pvt);
-+ printk(KERN_ALERT " shared(rx_req_prod=%08x "
-+ "rx_resp_prod=%08x\n",
-+ netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
-+ printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
-+ netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
-+ printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
-+ netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
-+ i++;
++ for (group = 0; group < xen_netbk_group_nr; group++) {
++ struct xen_netbk *netbk = &xen_netbk[group];
++ spin_lock_irq(&netbk->net_schedule_list_lock);
++ printk(KERN_ALERT "xen_netback group number: %d\n", group);
++ list_for_each(ent, &netbk->net_schedule_list) {
++ netif = list_entry(ent, struct xen_netif, list);
++ printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
++ "rx_resp_prod=%08x\n",
++ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
++ printk(KERN_ALERT
++ " tx_req_cons=%08x, tx_resp_prod=%08x)\n",
++ netif->tx.req_cons, netif->tx.rsp_prod_pvt);
++ printk(KERN_ALERT
++ " shared(rx_req_prod=%08x "
++ "rx_resp_prod=%08x\n",
++ netif->rx.sring->req_prod,
++ netif->rx.sring->rsp_prod);
++ printk(KERN_ALERT
++ " rx_event=%08x, tx_req_prod=%08x\n",
++ netif->rx.sring->rsp_event,
++ netif->tx.sring->req_prod);
++ printk(KERN_ALERT
++ " tx_resp_prod=%08x, tx_event=%08x)\n",
++ netif->tx.sring->rsp_prod,
++ netif->tx.sring->rsp_event);
++ i++;
++ }
++ spin_unlock_irq(&netbk->net_schedule_list_lock);
+ }
+
-+ spin_unlock_irq(&net_schedule_list_lock);
+ printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+
+ return IRQ_HANDLED;
+}
+#endif
+
++static inline int rx_work_todo(struct xen_netbk *netbk)
++{
++ return !skb_queue_empty(&netbk->rx_queue);
++}
++
++static inline int tx_work_todo(struct xen_netbk *netbk)
++{
++ if (netbk->dealloc_cons != netbk->dealloc_prod)
++ return 1;
++
++ if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++ !list_empty(&netbk->net_schedule_list))
++ return 1;
++
++ return 0;
++}
++
++static int netbk_action_thread(void *data)
++{
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
++ while (!kthread_should_stop()) {
++ wait_event_interruptible(netbk->kthread.netbk_action_wq,
++ rx_work_todo(netbk)
++ || tx_work_todo(netbk)
++ || kthread_should_stop());
++ cond_resched();
++
++ if (kthread_should_stop())
++ break;
++
++ if (rx_work_todo(netbk))
++ net_rx_action((unsigned long)netbk);
++
++ if (tx_work_todo(netbk))
++ net_tx_action((unsigned long)netbk);
++ }
++
++ return 0;
++}
++
+static int __init netback_init(void)
+{
+ int i;
+ struct page *page;
+ int rc = 0;
++ int group;
+
-+ if (!xen_domain())
++ if (!xen_pv_domain())
+ return -ENODEV;
+
++ xen_netbk_group_nr = num_online_cpus();
++ xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
++ if (!xen_netbk) {
++ printk(KERN_ALERT "%s: out of memory\n", __func__);
++ return -ENOMEM;
++ }
++ memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
++
+ /* We can increase reservation by this much in net_rx_action(). */
+// balloon_update_driver_allowance(NET_RX_RING_SIZE);
+
-+ skb_queue_head_init(&rx_queue);
-+ skb_queue_head_init(&tx_queue);
++ for (group = 0; group < xen_netbk_group_nr; group++) {
++ struct xen_netbk *netbk = &xen_netbk[group];
++ skb_queue_head_init(&netbk->rx_queue);
++ skb_queue_head_init(&netbk->tx_queue);
++
++ init_timer(&netbk->net_timer);
++ netbk->net_timer.data = (unsigned long)netbk;
++ netbk->net_timer.function = net_alarm;
++
++ init_timer(&netbk->netbk_tx_pending_timer);
++ netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
++ netbk->netbk_tx_pending_timer.function =
++ netbk_tx_pending_timeout;
++
++ netbk->mmap_pages =
++ alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
++ if (!netbk->mmap_pages) {
++ printk(KERN_ALERT "%s: out of memory\n", __func__);
++ del_timer(&netbk->netbk_tx_pending_timer);
++ del_timer(&netbk->net_timer);
++ rc = -ENOMEM;
++ goto failed_init;
++ }
++
++ for (i = 0; i < MAX_PENDING_REQS; i++) {
++ page = netbk->mmap_pages[i];
++ SetPageForeign(page, netif_page_release);
++ netif_set_page_ext(page, group, i);
++ INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
++ }
++
++ netbk->pending_cons = 0;
++ netbk->pending_prod = MAX_PENDING_REQS;
++ for (i = 0; i < MAX_PENDING_REQS; i++)
++ netbk->pending_ring[i] = i;
++
++ if (MODPARM_netback_kthread) {
++ init_waitqueue_head(&netbk->kthread.netbk_action_wq);
++ netbk->kthread.task =
++ kthread_create(netbk_action_thread,
++ (void *)netbk,
++ "netback/%u", group);
++
++ if (!IS_ERR(netbk->kthread.task)) {
++ kthread_bind(netbk->kthread.task, group);
++ wake_up_process(netbk->kthread.task);
++ } else {
++ printk(KERN_ALERT
++ "kthread_run() fails at netback\n");
++ free_empty_pages_and_pagevec(netbk->mmap_pages,
++ MAX_PENDING_REQS);
++ del_timer(&netbk->netbk_tx_pending_timer);
++ del_timer(&netbk->net_timer);
++ rc = PTR_ERR(netbk->kthread.task);
++ goto failed_init;
++ }
++ } else {
++ tasklet_init(&netbk->tasklet.net_tx_tasklet,
++ net_tx_action,
++ (unsigned long)netbk);
++ tasklet_init(&netbk->tasklet.net_rx_tasklet,
++ net_rx_action,
++ (unsigned long)netbk);
++ }
++
++ INIT_LIST_HEAD(&netbk->pending_inuse_head);
++ INIT_LIST_HEAD(&netbk->net_schedule_list);
+
-+ init_timer(&net_timer);
-+ net_timer.data = 0;
-+ net_timer.function = net_alarm;
-+
-+ init_timer(&netbk_tx_pending_timer);
-+ netbk_tx_pending_timer.data = 0;
-+ netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
-+
-+ mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
-+ if (mmap_pages == NULL) {
-+ printk("%s: out of memory\n", __FUNCTION__);
-+ return -ENOMEM;
-+ }
++ spin_lock_init(&netbk->net_schedule_list_lock);
+
-+ for (i = 0; i < MAX_PENDING_REQS; i++) {
-+ page = mmap_pages[i];
-+ SetPageForeign(page, netif_page_release);
-+ netif_set_page_index(page, i);
-+ INIT_LIST_HEAD(&pending_inuse[i].list);
++ atomic_set(&netbk->netfront_count, 0);
+ }
+
-+ pending_cons = 0;
-+ pending_prod = MAX_PENDING_REQS;
-+ for (i = 0; i < MAX_PENDING_REQS; i++)
-+ pending_ring[i] = i;
-+
+ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+ if (MODPARM_copy_skb) {
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
@@ -19523,7 +20606,7 @@
+ (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
+ 0,
+ netif_be_dbg,
-+ SA_SHIRQ,
++ IRQF_SHARED,
+ "net-be-dbg",
+ &netif_be_dbg);
+#endif
@@ -19531,9 +20614,16 @@
+ return 0;
+
+failed_init:
-+ free_empty_pages_and_pagevec(mmap_pages, MAX_PENDING_REQS);
-+ del_timer(&netbk_tx_pending_timer);
-+ del_timer(&net_timer);
++ for (i = 0; i < group; i++) {
++ struct xen_netbk *netbk = &xen_netbk[i];
++ free_empty_pages_and_pagevec(netbk->mmap_pages,
++ MAX_PENDING_REQS);
++ del_timer(&netbk->netbk_tx_pending_timer);
++ del_timer(&netbk->net_timer);
++ if (MODPARM_netback_kthread)
++ kthread_stop(netbk->kthread.task);
++ }
++ vfree(xen_netbk);
+ return rc;
+
+}
@@ -19543,10 +20633,10 @@
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
new file mode 100644
-index 0000000..70636d0
+index 0000000..99831c7
--- /dev/null
+++ b/drivers/xen/netback/xenbus.c
-@@ -0,0 +1,523 @@
+@@ -0,0 +1,524 @@
+/* Xenbus code for netif backend
+ Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
+ Copyright (C) 2005 XenSource Ltd
@@ -19711,12 +20801,17 @@
+ */
+static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
+{
-+ struct backend_info *be = dev_get_drvdata(&xdev->dev);
-+ struct xen_netif *netif = be->netif;
++ struct backend_info *be;
++ struct xen_netif *netif;
+ char *val;
+
+ DPRINTK("netback_uevent");
+
++ be = dev_get_drvdata(&xdev->dev);
++ if (!be)
++ return 0;
++ netif = be->netif;
++
+ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+ if (IS_ERR(val)) {
+ int err = PTR_ERR(val);
@@ -19956,6 +21051,7 @@
+
+static int connect_rings(struct backend_info *be)
+{
++ struct xen_netif *netif = be->netif;
+ struct xenbus_device *dev = be->dev;
+ unsigned long tx_ring_ref, rx_ring_ref;
+ unsigned int evtchn, rx_copy;
@@ -19989,52 +21085,47 @@
+ if (!rx_copy)
+ return -EOPNOTSUPP;
+
-+ if (be->netif->dev->tx_queue_len != 0) {
++ if (netif->dev->tx_queue_len != 0) {
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
+ "feature-rx-notify", "%d", &val) < 0)
+ val = 0;
+ if (val)
-+ be->netif->can_queue = 1;
++ netif->can_queue = 1;
+ else
+ /* Must be non-zero for pfifo_fast to work. */
-+ be->netif->dev->tx_queue_len = 1;
++ netif->dev->tx_queue_len = 1;
+ }
+
-+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
++ "%d", &val) < 0)
+ val = 0;
-+ if (!val) {
-+ be->netif->features &= ~NETIF_F_SG;
-+ be->netif->dev->features &= ~NETIF_F_SG;
-+ if (be->netif->dev->mtu > ETH_DATA_LEN)
-+ be->netif->dev->mtu = ETH_DATA_LEN;
-+ }
++ netif->can_sg = !!val;
++
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
++ "%d", &val) < 0)
++ val = 0;
++ netif->gso = !!val;
+
-+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
-+ &val) < 0)
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
++ "%d", &val) < 0)
+ val = 0;
-+ if (val) {
-+ be->netif->features |= NETIF_F_TSO;
-+ be->netif->dev->features |= NETIF_F_TSO;
-+ }
++ netif->gso_prefix = !!val;
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
+ "%d", &val) < 0)
+ val = 0;
-+ if (val) {
-+ be->netif->features &= ~NETIF_F_IP_CSUM;
-+ be->netif->dev->features &= ~NETIF_F_IP_CSUM;
-+ }
++ netif->csum = !val;
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-smart-poll",
+ "%d", &val) < 0)
+ val = 0;
-+ if (val)
-+ be->netif->smart_poll = 1;
-+ else
-+ be->netif->smart_poll = 0;
++ netif->smart_poll = !!val;
++
++ /* Set dev->features */
++ netif_set_features(netif);
+
+ /* Map the shared frame, irq etc. */
-+ err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
++ err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
+ if (err) {
+ xenbus_dev_fatal(dev, err,
+ "mapping shared-frames %lu/%lu port %u",
@@ -24612,10 +25703,10 @@
+}
diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
new file mode 100644
-index 0000000..d448bf5
+index 0000000..f0d5426
--- /dev/null
+++ b/drivers/xen/pciback/xenbus.c
-@@ -0,0 +1,722 @@
+@@ -0,0 +1,730 @@
+/*
+ * PCI Backend Xenbus Setup - handles setup with frontend and xend
+ *
@@ -24672,23 +25763,31 @@
+ unbind_from_irqhandler(pdev->evtchn_irq, pdev);
+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
+ }
++ spin_unlock(&pdev->dev_lock);
+
+ /* If the driver domain started an op, make sure we complete it
+ * before releasing the shared memory */
++
++ /* Note, the workqueue does not use spinlocks at all.*/
+ flush_workqueue(pciback_wq);
+
++ spin_lock(&pdev->dev_lock);
+ if (pdev->sh_info != NULL) {
+ xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
+ pdev->sh_info = NULL;
+ }
-+
+ spin_unlock(&pdev->dev_lock);
++
+}
+
+static void free_pdev(struct pciback_device *pdev)
+{
-+ if (pdev->be_watching)
++ spin_lock(&pdev->dev_lock);
++ if (pdev->be_watching) {
+ unregister_xenbus_watch(&pdev->be_watch);
++ pdev->be_watching = 0;
++ }
++ spin_unlock(&pdev->dev_lock);
+
+ pciback_disconnect(pdev);
+
@@ -24716,7 +25815,10 @@
+ "Error mapping other domain page in ours.");
+ goto out;
+ }
++
++ spin_lock(&pdev->dev_lock);
+ pdev->sh_info = vaddr;
++ spin_unlock(&pdev->dev_lock);
+
+ err = bind_interdomain_evtchn_to_irqhandler(
+ pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
@@ -24726,7 +25828,10 @@
+ "Error binding event channel to IRQ");
+ goto out;
+ }
++
++ spin_lock(&pdev->dev_lock);
+ pdev->evtchn_irq = err;
++ spin_unlock(&pdev->dev_lock);
+ err = 0;
+
+ dev_dbg(&pdev->xdev->dev, "Attached!\n");
@@ -24740,7 +25845,6 @@
+ int gnt_ref, remote_evtchn;
+ char *magic = NULL;
+
-+ spin_lock(&pdev->dev_lock);
+
+ /* Make sure we only do this setup once */
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
@@ -24786,7 +25890,6 @@
+
+ dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
+out:
-+ spin_unlock(&pdev->dev_lock);
+
+ kfree(magic);
+
@@ -24958,7 +26061,6 @@
+ char state_str[64];
+ char dev_str[64];
+
-+ spin_lock(&pdev->dev_lock);
+
+ dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
+
@@ -25099,8 +26201,6 @@
+ }
+
+out:
-+ spin_unlock(&pdev->dev_lock);
-+
+ return 0;
+}
+
@@ -25157,8 +26257,6 @@
+ char dev_str[64];
+ char state_str[64];
+
-+ spin_lock(&pdev->dev_lock);
-+
+ /* It's possible we could get the call to setup twice, so make sure
+ * we're not already connected.
+ */
@@ -25239,8 +26337,6 @@
+ "Error switching to initialised state!");
+
+out:
-+ spin_unlock(&pdev->dev_lock);
-+
+ if (!err)
+ /* see if pcifront is already configured (if not, we'll wait) */
+ pciback_attach(pdev);
@@ -25287,7 +26383,10 @@
+ pciback_be_watch);
+ if (err)
+ goto out;
++
++ spin_lock(&pdev->dev_lock);
+ pdev->be_watching = 1;
++ spin_unlock(&pdev->dev_lock);
+
+ /* We need to force a call to our callback here in case
+ * xend already configured us!
@@ -25326,8 +26425,8 @@
+{
+ pciback_wq = create_workqueue("pciback_workqueue");
+ if (!pciback_wq) {
-+ printk(KERN_ERR "pciback_xenbus_register: create"
-+ "pciback_workqueue failed\n");
++ printk(KERN_ERR "%s: create"
++ "pciback_workqueue failed\n",__FUNCTION__);
+ return -EFAULT;
+ }
+ return xenbus_register_backend(&xenbus_pciback_driver);
@@ -25766,10 +26865,10 @@
+subsys_initcall(xen_pcpu_init);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
new file mode 100644
-index 0000000..a33074e
+index 0000000..c01b5dd
--- /dev/null
+++ b/drivers/xen/platform-pci.c
-@@ -0,0 +1,259 @@
+@@ -0,0 +1,207 @@
+/******************************************************************************
+ * platform-pci.c
+ *
@@ -25793,15 +26892,14 @@
+ *
+ */
+
-+#include <asm/io.h>
+
+#include <linux/interrupt.h>
++#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
-+#include <xen/grant_table.h>
+#include <xen/platform_pci.h>
-+#include <xen/interface/platform_pci.h>
++#include <xen/grant_table.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/hvm.h>
@@ -25817,7 +26915,6 @@
+static unsigned long platform_mmio_alloc;
+static unsigned long platform_mmiolen;
+static uint64_t callback_via;
-+struct pci_dev *xen_platform_pdev;
+
+unsigned long alloc_xen_mmio(unsigned long len)
+{
@@ -25851,36 +26948,28 @@
+
+static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id)
+{
-+ xen_hvm_evtchn_do_upcall(get_irq_regs());
++ xen_hvm_evtchn_do_upcall();
+ return IRQ_HANDLED;
+}
+
+static int xen_allocate_irq(struct pci_dev *pdev)
+{
-+ __set_irq_handler(pdev->irq, handle_edge_irq, 0, NULL);
+ return request_irq(pdev->irq, do_hvm_evtchn_intr,
+ IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
+ "xen-platform-pci", pdev);
+}
+
-+void platform_pci_disable_irq(void)
-+{
-+ printk(KERN_DEBUG "platform_pci_disable_irq\n");
-+ disable_irq(xen_platform_pdev->irq);
-+}
-+
-+void platform_pci_enable_irq(void)
++static int platform_pci_resume(struct pci_dev *pdev)
+{
-+ printk(KERN_DEBUG "platform_pci_enable_irq\n");
-+ enable_irq(xen_platform_pdev->irq);
-+}
-+
-+void platform_pci_resume(void)
-+{
-+ if (xen_set_callback_via(callback_via)) {
-+ printk("platform_pci_resume failure!\n");
-+ return;
++ int err;
++ if (xen_have_vector_callback)
++ return 0;
++ err = xen_set_callback_via(callback_via);
++ if (err) {
++ dev_err(&pdev->dev, "platform_pci_resume failure!\n");
++ return err;
+ }
++ return 0;
+}
+
+static int __devinit platform_pci_init(struct pci_dev *pdev,
@@ -25889,7 +26978,7 @@
+ int i, ret;
+ long ioaddr, iolen;
+ long mmio_addr, mmio_len;
-+ xen_platform_pdev = pdev;
++ unsigned int max_nr_gframes;
+
+ i = pci_enable_device(pdev);
+ if (i)
@@ -25904,19 +26993,21 @@
+ if (mmio_addr == 0 || ioaddr == 0) {
+ dev_err(&pdev->dev, "no resources found\n");
+ ret = -ENOENT;
++ goto pci_out;
+ }
+
+ if (request_mem_region(mmio_addr, mmio_len, DRV_NAME) == NULL) {
+ dev_err(&pdev->dev, "MEM I/O resource 0x%lx @ 0x%lx busy\n",
+ mmio_addr, mmio_len);
+ ret = -EBUSY;
++ goto pci_out;
+ }
+
+ if (request_region(ioaddr, iolen, DRV_NAME) == NULL) {
+ dev_err(&pdev->dev, "I/O resource 0x%lx @ 0x%lx busy\n",
+ iolen, ioaddr);
+ ret = -EBUSY;
-+ goto out;
++ goto mem_out;
+ }
+
+ platform_mmio = mmio_addr;
@@ -25925,107 +27016,63 @@
+ if (!xen_have_vector_callback) {
+ ret = xen_allocate_irq(pdev);
+ if (ret) {
-+ printk(KERN_WARNING "request_irq failed err=%d\n", ret);
++ dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret);
+ goto out;
+ }
+ callback_via = get_callback_via(pdev);
+ ret = xen_set_callback_via(callback_via);
+ if (ret) {
-+ printk(KERN_WARNING
-+ "Unable to set the evtchn callback err=%d\n", ret);
++ dev_warn(&pdev->dev, "Unable to set the evtchn callback "
++ "err=%d\n", ret);
+ goto out;
+ }
+ }
++
++ max_nr_gframes = gnttab_max_grant_frames();
++ xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
+ ret = gnttab_init();
+ if (ret)
+ goto out;
-+ ret = xenbus_probe_init();
-+ if (ret)
-+ goto out;
++ xenbus_probe(NULL);
+ ret = xen_setup_shutdown_event();
+ if (ret)
+ goto out;
-+
++ return 0;
+
+out:
-+ if (ret) {
-+ release_mem_region(mmio_addr, mmio_len);
-+ release_region(ioaddr, iolen);
-+ pci_disable_device(pdev);
-+ }
-+
++ release_region(ioaddr, iolen);
++mem_out:
++ release_mem_region(mmio_addr, mmio_len);
++pci_out:
++ pci_disable_device(pdev);
+ return ret;
+}
+
-+#define XEN_PLATFORM_VENDOR_ID 0x5853
-+#define XEN_PLATFORM_DEVICE_ID 0x0001
+static struct pci_device_id platform_pci_tbl[] __devinitdata = {
-+ {XEN_PLATFORM_VENDOR_ID, XEN_PLATFORM_DEVICE_ID,
-+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++ {PCI_VENDOR_ID_XEN, PCI_DEVICE_ID_XEN_PLATFORM,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0,}
+};
+
+MODULE_DEVICE_TABLE(pci, platform_pci_tbl);
+
+static struct pci_driver platform_driver = {
-+ name: DRV_NAME,
-+ probe : platform_pci_init,
-+ id_table : platform_pci_tbl,
++ .name = DRV_NAME,
++ .probe = platform_pci_init,
++ .id_table = platform_pci_tbl,
++#ifdef CONFIG_PM
++ .resume_early = platform_pci_resume,
++#endif
+};
+
-+static int check_platform_magic(void)
-+{
-+ short magic;
-+ char protocol, *err;
-+
-+ magic = inw(XEN_IOPORT_MAGIC);
-+
-+ if (magic != XEN_IOPORT_MAGIC_VAL) {
-+ err = "unrecognised magic value";
-+ goto no_dev;
-+ }
-+
-+ protocol = inb(XEN_IOPORT_PROTOVER);
-+
-+ printk(KERN_DEBUG DRV_NAME "I/O protocol version %d\n", protocol);
-+
-+ switch (protocol) {
-+ case 1:
-+ outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM);
-+ outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER);
-+ if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) {
-+ printk(KERN_ERR DRV_NAME "blacklisted by host\n");
-+ return -ENODEV;
-+ }
-+ break;
-+ default:
-+ err = "unknown I/O protocol version";
-+ goto no_dev;
-+ }
-+
-+ return 0;
-+
-+ no_dev:
-+ printk(KERN_WARNING DRV_NAME "failed backend handshake: %s\n", err);
-+ return -ENODEV;
-+}
-+
+static int __init platform_pci_module_init(void)
+{
-+ int rc;
-+
-+ rc = check_platform_magic();
-+ if (rc < 0)
-+ return rc;
-+
-+ rc = pci_register_driver(&platform_driver);
-+ if (rc) {
-+ printk(KERN_INFO DRV_NAME
-+ ": No platform pci device model found\n");
-+ return rc;
-+ }
++ /* no unplug has been done, IGNORE hasn't been specified: just
++ * return now */
++ if (!xen_platform_pci_unplug)
++ return -ENODEV;
+
-+ return 0;
++ return pci_register_driver(&platform_driver);
+}
+
+module_init(platform_pci_module_init);
@@ -26410,10 +27457,10 @@
* @dev: xenbus device
* @ring_mfn: mfn of ring to grant
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
-index 649fcdf..57fb749 100644
+index 649fcdf..3a83ba2 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
-@@ -49,31 +49,28 @@
+@@ -49,31 +49,29 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/xen/hypervisor.h>
@@ -26423,6 +27470,7 @@
#include <xen/events.h>
#include <xen/page.h>
++#include <xen/platform_pci.h>
+#include <xen/hvm.h>
+
#include "xenbus_comms.h"
@@ -26452,7 +27500,7 @@
/* If something in array of ids matches this device, return it. */
static const struct xenbus_device_id *
match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
-@@ -94,34 +91,7 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv)
+@@ -94,34 +92,7 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv)
return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
}
@@ -26488,7 +27536,7 @@
static void free_otherend_details(struct xenbus_device *dev)
-@@ -141,7 +111,28 @@ static void free_otherend_watch(struct xenbus_device *dev)
+@@ -141,7 +112,28 @@ static void free_otherend_watch(struct xenbus_device *dev)
}
@@ -26518,7 +27566,7 @@
char *id_node, char *path_node)
{
int err = xenbus_gather(XBT_NIL, xendev->nodename,
-@@ -166,39 +157,11 @@ int read_otherend_details(struct xenbus_device *xendev,
+@@ -166,39 +158,11 @@ int read_otherend_details(struct xenbus_device *xendev,
return 0;
}
@@ -26562,7 +27610,7 @@
{
struct xenbus_device *dev =
container_of(watch, struct xenbus_device, otherend_watch);
-@@ -226,11 +189,7 @@ static void otherend_changed(struct xenbus_watch *watch,
+@@ -226,11 +190,7 @@ static void otherend_changed(struct xenbus_watch *watch,
* work that can fail e.g., when the rootfs is gone.
*/
if (system_state > SYSTEM_RUNNING) {
@@ -26575,7 +27623,7 @@
xenbus_frontend_closed(dev);
return;
}
-@@ -238,25 +197,7 @@ static void otherend_changed(struct xenbus_watch *watch,
+@@ -238,25 +198,7 @@ static void otherend_changed(struct xenbus_watch *watch,
if (drv->otherend_changed)
drv->otherend_changed(dev, state);
}
@@ -26602,7 +27650,7 @@
int xenbus_dev_probe(struct device *_dev)
{
-@@ -300,8 +241,9 @@ int xenbus_dev_probe(struct device *_dev)
+@@ -300,8 +242,9 @@ int xenbus_dev_probe(struct device *_dev)
fail:
xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
xenbus_switch_state(dev, XenbusStateClosed);
@@ -26613,7 +27661,7 @@
int xenbus_dev_remove(struct device *_dev)
{
-@@ -319,8 +261,9 @@ int xenbus_dev_remove(struct device *_dev)
+@@ -319,8 +262,9 @@ int xenbus_dev_remove(struct device *_dev)
xenbus_switch_state(dev, XenbusStateClosed);
return 0;
}
@@ -26624,7 +27672,7 @@
{
struct xenbus_device *dev = to_xenbus_device(_dev);
unsigned long timeout = 5*HZ;
-@@ -341,6 +284,7 @@ static void xenbus_dev_shutdown(struct device *_dev)
+@@ -341,6 +285,7 @@ static void xenbus_dev_shutdown(struct device *_dev)
out:
put_device(&dev->dev);
}
@@ -26632,7 +27680,7 @@
int xenbus_register_driver_common(struct xenbus_driver *drv,
struct xen_bus_type *bus,
-@@ -354,25 +298,7 @@ int xenbus_register_driver_common(struct xenbus_driver *drv,
+@@ -354,25 +299,7 @@ int xenbus_register_driver_common(struct xenbus_driver *drv,
return driver_register(&drv->driver);
}
@@ -26659,7 +27707,7 @@
void xenbus_unregister_driver(struct xenbus_driver *drv)
{
-@@ -543,24 +469,7 @@ fail:
+@@ -543,24 +470,7 @@ fail:
kfree(xendev);
return err;
}
@@ -26685,7 +27733,7 @@
static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
{
-@@ -574,10 +483,11 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
+@@ -574,10 +484,11 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
return PTR_ERR(dir);
for (i = 0; i < dir_n; i++) {
@@ -26698,7 +27746,7 @@
kfree(dir);
return err;
}
-@@ -597,9 +507,11 @@ int xenbus_probe_devices(struct xen_bus_type *bus)
+@@ -597,9 +508,11 @@ int xenbus_probe_devices(struct xen_bus_type *bus)
if (err)
break;
}
@@ -26710,7 +27758,7 @@
static unsigned int char_count(const char *str, char c)
{
-@@ -662,32 +574,17 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
+@@ -662,32 +575,17 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
}
EXPORT_SYMBOL_GPL(xenbus_dev_changed);
@@ -26746,7 +27794,7 @@
if (drv->suspend)
err = drv->suspend(xdev, state);
if (err)
-@@ -695,21 +592,19 @@ static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
+@@ -695,21 +593,19 @@ static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
"xenbus: suspend %s failed: %i\n", dev_name(dev), err);
return 0;
}
@@ -26772,7 +27820,7 @@
err = talk_to_otherend(xdev);
if (err) {
printk(KERN_WARNING
-@@ -740,6 +635,7 @@ static int xenbus_dev_resume(struct device *dev)
+@@ -740,6 +636,7 @@ static int xenbus_dev_resume(struct device *dev)
return 0;
}
@@ -26780,7 +27828,19 @@
/* A flag to determine if xenstored is 'ready' (i.e. has started) */
int xenstored_ready = 0;
-@@ -768,52 +664,78 @@ void xenbus_probe(struct work_struct *unused)
+@@ -749,10 +646,7 @@ int register_xenstore_notifier(struct notifier_block *nb)
+ {
+ int ret = 0;
+
+- if (xenstored_ready > 0)
+- ret = nb->notifier_call(nb, 0, NULL);
+- else
+- blocking_notifier_chain_register(&xenstore_chain, nb);
++ blocking_notifier_chain_register(&xenstore_chain, nb);
+
+ return ret;
+ }
+@@ -768,57 +662,93 @@ void xenbus_probe(struct work_struct *unused)
{
BUG_ON((xenstored_ready <= 0));
@@ -26792,39 +27852,43 @@
/* Notify others that xenstore is up */
blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
}
++EXPORT_SYMBOL_GPL(xenbus_probe);
++
++static int __init xenbus_probe_initcall(void)
++{
++ if (!xen_domain())
++ return -ENODEV;
++
++ if (xen_initial_domain() || xen_hvm_domain())
++ return 0;
++
++ xenbus_probe(NULL);
++ return 0;
++}
++
++device_initcall(xenbus_probe_initcall);
-static int __init xenbus_probe_init(void)
-+static int __init __xenbus_probe_init(void)
++static int __init xenbus_init(void)
{
-- int err = 0;
-+ /* Delay initialization in the PV on HVM case */
-+ if (xen_hvm_domain())
-+ return 0;
+ int err = 0;
++ unsigned long page = 0;
-- DPRINTK("");
-+ if (!xen_pv_domain())
-+ return -ENODEV;
+ DPRINTK("");
-- err = -ENODEV;
-- if (!xen_domain())
+ err = -ENODEV;
+ if (!xen_domain())
- goto out_error;
-+ return xenbus_probe_init();
-+}
-
+-
- /* Register ourselves with the kernel bus subsystem */
- err = bus_register(&xenbus_frontend.bus);
-- if (err)
-- goto out_error;
-+int xenbus_probe_init(void)
-+{
-+ int err = 0;
-+ unsigned long page = 0;
-+
-+ DPRINTK("");
-
+- if (err)
+- goto out_error;
+-
- err = xenbus_backend_bus_register();
- if (err)
- goto out_unreg_front;
++ return err;
/*
* Domain0 doesn't have a store_evtchn or store_mfn yet.
@@ -26861,8 +27925,15 @@
- xen_store_evtchn = xen_start_info->store_evtchn;
- xen_store_mfn = xen_start_info->store_mfn;
+ if (xen_hvm_domain()) {
-+ xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
-+ xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
++ uint64_t v = 0;
++ err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
++ if (err)
++ goto out_error;
++ xen_store_evtchn = (int)v;
++ err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
++ if (err)
++ goto out_error;
++ xen_store_mfn = (unsigned long)v;
+ xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
+ } else {
+ xen_store_evtchn = xen_start_info->store_evtchn;
@@ -26881,8 +27952,13 @@
+ goto out_error;
}
- if (!xen_initial_domain())
-@@ -829,128 +751,13 @@ static int __init xenbus_probe_init(void)
+- if (!xen_initial_domain())
+- xenbus_probe(NULL);
+-
+ #ifdef CONFIG_XEN_COMPAT_XENFS
+ /*
+ * Create xenfs mountpoint in /proc for compatibility with
+@@ -829,128 +759,13 @@ static int __init xenbus_probe_init(void)
return 0;
@@ -26900,7 +27976,7 @@
}
-postcore_initcall(xenbus_probe_init);
-+postcore_initcall(__xenbus_probe_init);
++postcore_initcall(xenbus_init);
MODULE_LICENSE("GPL");
-
@@ -27366,10 +28442,10 @@
+subsys_initcall(xenbus_probe_backend_init);
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
new file mode 100644
-index 0000000..54e4d70
+index 0000000..5413248
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
-@@ -0,0 +1,314 @@
+@@ -0,0 +1,292 @@
+#define DPRINTK(fmt, args...) \
+ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
+ __func__, __LINE__, ##args)
@@ -27392,6 +28468,8 @@
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/page.h>
++#include <xen/xen.h>
++#include <xen/platform_pci.h>
+
+#include "xenbus_comms.h"
+#include "xenbus_probe.h"
@@ -27624,33 +28702,6 @@
+ return NOTIFY_DONE;
+}
+
-+static int dev_suspend(struct device *dev, void *data)
-+{
-+ return xenbus_dev_suspend(dev, PMSG_SUSPEND);
-+}
-+
-+static int dev_resume(struct device *dev, void *data)
-+{
-+ return xenbus_dev_resume(dev);
-+}
-+
-+void xenbus_suspend(void)
-+{
-+ DPRINTK("");
-+
-+ bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, dev_suspend);
-+ xs_suspend();
-+}
-+EXPORT_SYMBOL_GPL(xenbus_suspend);
-+
-+void xenbus_resume(void)
-+{
-+ DPRINTK("");
-+
-+ xs_resume();
-+ bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, dev_resume);
-+}
-+EXPORT_SYMBOL_GPL(xenbus_resume);
+
+static int __init xenbus_probe_frontend_init(void)
+{
@@ -27675,6 +28726,9 @@
+#ifndef MODULE
+static int __init boot_wait_for_devices(void)
+{
++ if (xen_hvm_domain() && !xen_platform_pci_unplug)
++ return -ENODEV;
++
+ ready_to_wait_for_devices = 1;
+ wait_for_devices(NULL);
+ return 0;
@@ -27685,7 +28739,7 @@
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
-index eab33f1..6f91e8c 100644
+index 7b547f5..5534690 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -76,6 +76,14 @@ struct xs_handle {
@@ -28380,6 +29434,20 @@
}
static void __exit xenfs_exit(void)
+diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenfs/xenbus.c
+index 6c4269b..64b3be4 100644
+--- a/drivers/xen/xenfs/xenbus.c
++++ b/drivers/xen/xenfs/xenbus.c
+@@ -123,6 +123,9 @@ static ssize_t xenbus_file_read(struct file *filp,
+ mutex_lock(&u->reply_mutex);
+ while (list_empty(&u->read_buffers)) {
+ mutex_unlock(&u->reply_mutex);
++ if (filp->f_flags & O_NONBLOCK)
++ return -EAGAIN;
++
+ ret = wait_event_interruptible(u->read_waitq,
+ !list_empty(&u->read_buffers));
+ if (ret)
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
index 51f08b2..b68aa62 100644
--- a/drivers/xen/xenfs/xenfs.h
@@ -28497,16 +29565,28 @@
+
#endif /*__ACPI_DRIVERS_H__*/
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
-index 740ac3a..3d1205f 100644
+index 740ac3a..7ee588d 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
-@@ -238,6 +238,13 @@ struct acpi_processor_errata {
+@@ -238,6 +238,25 @@ struct acpi_processor_errata {
} piix4;
};
+extern int acpi_processor_errata(struct acpi_processor *pr);
++#ifdef CONFIG_ACPI_PROCFS
+extern int acpi_processor_add_fs(struct acpi_device *device);
+extern int acpi_processor_remove_fs(struct acpi_device *device);
++#else
++static inline int acpi_processor_add_fs(struct acpi_device *device)
++{
++ return 0;
++}
++
++static inline int acpi_processor_remove_fs(struct acpi_device *device)
++{
++ return 0;
++}
++#endif
+extern int acpi_processor_set_pdc(struct acpi_processor *pr);
+extern int acpi_processor_remove(struct acpi_device *device, int type);
+extern void acpi_processor_notify(struct acpi_device *device, u32 event);
@@ -28514,7 +29594,7 @@
extern int acpi_processor_preregister_performance(struct
acpi_processor_performance
*performance);
-@@ -295,6 +302,8 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx
+@@ -295,6 +314,8 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx
void acpi_processor_ppc_init(void);
void acpi_processor_ppc_exit(void);
int acpi_processor_ppc_has_changed(struct acpi_processor *pr);
@@ -28523,7 +29603,7 @@
#else
static inline void acpi_processor_ppc_init(void)
{
-@@ -331,6 +340,7 @@ int acpi_processor_power_init(struct acpi_processor *pr,
+@@ -331,6 +352,7 @@ int acpi_processor_power_init(struct acpi_processor *pr,
int acpi_processor_cst_has_changed(struct acpi_processor *pr);
int acpi_processor_power_exit(struct acpi_processor *pr,
struct acpi_device *device);
@@ -28606,6 +29686,67 @@
#define FBINFO_PARTIAL_PAN_OK 0x0040 /* otw use pan only for double-buffering */
#define FBINFO_READS_FAST 0x0080 /* soft-copy faster than rendering */
+diff --git a/include/linux/if_link.h b/include/linux/if_link.h
+index 176c518..d681cc9 100644
+--- a/include/linux/if_link.h
++++ b/include/linux/if_link.h
+@@ -81,6 +81,8 @@ enum
+ #define IFLA_LINKINFO IFLA_LINKINFO
+ IFLA_NET_NS_PID,
+ IFLA_IFALIAS,
++ IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */
++ IFLA_VFINFO_LIST,
+ __IFLA_MAX
+ };
+
+@@ -190,4 +192,47 @@ struct ifla_vlan_qos_mapping
+ __u32 to;
+ };
+
++/* SR-IOV virtual function managment section */
++
++enum {
++ IFLA_VF_INFO_UNSPEC,
++ IFLA_VF_INFO,
++ __IFLA_VF_INFO_MAX,
++};
++
++#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
++
++enum {
++ IFLA_VF_UNSPEC,
++ IFLA_VF_MAC, /* Hardware queue specific attributes */
++ IFLA_VF_VLAN,
++ IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */
++ __IFLA_VF_MAX,
++};
++
++#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
++
++struct ifla_vf_mac {
++ __u32 vf;
++ __u8 mac[32]; /* MAX_ADDR_LEN */
++};
++
++struct ifla_vf_vlan {
++ __u32 vf;
++ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
++ __u32 qos;
++};
++
++struct ifla_vf_tx_rate {
++ __u32 vf;
++ __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
++};
++
++struct ifla_vf_info {
++ __u32 vf;
++ __u8 mac[32];
++ __u32 vlan;
++ __u32 qos;
++ __u32 tx_rate;
++};
+ #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 7ca72b7..1c30adf 100644
--- a/include/linux/interrupt.h
@@ -28619,7 +29760,7 @@
/*
* Bits used by threaded handlers:
diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 24c3956..3d74515 100644
+index 24c3956..e8cf80f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -105,6 +105,12 @@ extern unsigned int kobjsize(const void *objp);
@@ -28635,7 +29776,7 @@
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
-@@ -195,6 +201,15 @@ struct vm_operations_struct {
+@@ -195,6 +201,11 @@ struct vm_operations_struct {
*/
int (*access)(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write);
@@ -28644,13 +29785,51 @@
+ * original value of @ptep. */
+ pte_t (*zap_pte)(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, int is_fullmm);
-+
-+ /* called before close() to indicate no more pages should be mapped */
-+ void (*unmap)(struct vm_area_struct *area);
-+
#ifdef CONFIG_NUMA
/*
* set_policy() op must add a reference to any non-NULL @new mempolicy
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 812a5f3..0b7d4ec 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -28,6 +28,7 @@
+ #include <linux/if.h>
+ #include <linux/if_ether.h>
+ #include <linux/if_packet.h>
++#include <linux/if_link.h>
+
+ #ifdef __KERNEL__
+ #include <linux/timer.h>
+@@ -577,6 +578,13 @@ struct netdev_queue {
+ * this function is called when a VLAN id is unregistered.
+ *
+ * void (*ndo_poll_controller)(struct net_device *dev);
++ *
++ * SR-IOV management functions.
++ * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
++ * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
++ * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
++ * int (*ndo_get_vf_config)(struct net_device *dev,
++ * int vf, struct ifla_vf_info *ivf);
+ */
+ #define HAVE_NET_DEVICE_OPS
+ struct net_device_ops {
+@@ -626,6 +634,15 @@ struct net_device_ops {
+ #define HAVE_NETDEV_POLL
+ void (*ndo_poll_controller)(struct net_device *dev);
+ #endif
++ int (*ndo_set_vf_mac)(struct net_device *dev,
++ int queue, u8 *mac);
++ int (*ndo_set_vf_vlan)(struct net_device *dev,
++ int queue, u16 vlan, u8 qos);
++ int (*ndo_set_vf_tx_rate)(struct net_device *dev,
++ int vf, int rate);
++ int (*ndo_get_vf_config)(struct net_device *dev,
++ int vf,
++ struct ifla_vf_info *ivf);
+ #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+ int (*ndo_fcoe_enable)(struct net_device *dev);
+ int (*ndo_fcoe_disable)(struct net_device *dev);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6b202b1..b03950e 100644
--- a/include/linux/page-flags.h
@@ -28689,6 +29868,60 @@
#ifdef CONFIG_MEMORY_FAILURE
PAGEFLAG(HWPoison, hwpoison)
TESTSETFLAG(HWPoison, hwpoison)
+diff --git a/include/linux/pci.h b/include/linux/pci.h
+index e07d194..ca28e46 100644
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -609,6 +609,9 @@ extern void pci_remove_bus_device(struct pci_dev *dev);
+ extern void pci_stop_bus_device(struct pci_dev *dev);
+ void pci_setup_cardbus(struct pci_bus *bus);
+ extern void pci_sort_breadthfirst(void);
++#define dev_is_pci(d) ((d)->bus == &pci_bus_type)
++#define dev_is_pf(d) ((dev_is_pci(d) ? to_pci_dev(d)->is_physfn : false))
++#define dev_num_vf(d) ((dev_is_pci(d) ? pci_num_vf(to_pci_dev(d)) : 0))
+
+ /* Generic PCI functions exported to card drivers */
+
+@@ -1124,6 +1127,9 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
+ unsigned int devfn)
+ { return NULL; }
+
++#define dev_is_pci(d) (false)
++#define dev_is_pf(d) (false)
++#define dev_num_vf(d) (0)
+ #endif /* CONFIG_PCI */
+
+ /* Include architecture-dependent settings and functions */
+@@ -1279,6 +1285,7 @@ void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
+ extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+ extern void pci_disable_sriov(struct pci_dev *dev);
+ extern irqreturn_t pci_sriov_migration(struct pci_dev *dev);
++extern int pci_num_vf(struct pci_dev *dev);
+ #else
+ static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+ {
+@@ -1291,6 +1298,10 @@ static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev)
+ {
+ return IRQ_NONE;
+ }
++static inline int pci_num_vf(struct pci_dev *dev)
++{
++ return 0;
++}
+ #endif
+
+ #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
+diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
+index 67325bf..c398cc3 100644
+--- a/include/linux/pci_ids.h
++++ b/include/linux/pci_ids.h
+@@ -2712,3 +2712,6 @@
+ #define PCI_DEVICE_ID_RME_DIGI32 0x9896
+ #define PCI_DEVICE_ID_RME_DIGI32_PRO 0x9897
+ #define PCI_DEVICE_ID_RME_DIGI32_8 0x9898
++
++#define PCI_VENDOR_ID_XEN 0x5853
++#define PCI_DEVICE_ID_XEN_PLATFORM 0x0001
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 73b1f1c..113585a 100644
--- a/include/linux/swiotlb.h
@@ -29103,7 +30336,7 @@
+
+#endif /* __XEN_BLKIF_H__ */
diff --git a/include/xen/events.h b/include/xen/events.h
-index e68d59a..699108a 100644
+index e68d59a..7e17e2a 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -12,6 +12,8 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
@@ -29128,7 +30361,7 @@
/*
* Common unbind function for all event sources. Takes IRQ to unbind from.
-@@ -53,7 +61,39 @@ bool xen_test_irq_pending(int irq);
+@@ -53,7 +61,42 @@ bool xen_test_irq_pending(int irq);
irq will be disabled so it won't deliver an interrupt. */
void xen_poll_irq(int irq);
@@ -29163,9 +30396,12 @@
+
+/* Determine whether to ignore this IRQ if passed to a guest. */
+int xen_ignore_irq(int irq);
-+
++/* Xen HVM evtchn vector callback */
++extern void xen_hvm_callback_vector(void);
++extern int xen_have_vector_callback;
++int xen_set_callback_via(uint64_t via);
+void xen_evtchn_do_upcall(struct pt_regs *regs);
-+void xen_hvm_evtchn_do_upcall(struct pt_regs *regs);
++void xen_hvm_evtchn_do_upcall(void);
+
#endif /* _XEN_EVENTS_H */
diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
@@ -29294,7 +30530,7 @@
+
+#endif /* __LINUX_PUBLIC_GNTDEV_H__ */
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
-index a40f1cd..7f8c7c8 100644
+index a40f1cd..871b553 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -37,10 +37,16 @@
@@ -29319,9 +30555,9 @@
u16 count;
};
-+int gnttab_init(void);
+void gnttab_reset_grant_page(struct page *page);
+
++int gnttab_init(void);
int gnttab_suspend(void);
int gnttab_resume(void);
@@ -29334,7 +30570,7 @@
/*
* operations on reserved batches of grant references
*/
-@@ -106,6 +117,37 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+@@ -106,12 +117,46 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
unsigned long pfn);
@@ -29372,12 +30608,21 @@
int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
unsigned long max_nr_gframes,
struct grant_entry **__shared);
+ void arch_gnttab_unmap_shared(struct grant_entry *shared,
+ unsigned long nr_gframes);
+
++extern unsigned long xen_hvm_resume_frames;
++unsigned int gnttab_max_grant_frames(void);
++
+ #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
+
+ #endif /* __ASM_GNTTAB_H__ */
diff --git a/include/xen/hvm.h b/include/xen/hvm.h
new file mode 100644
-index 0000000..a80c7b9
+index 0000000..b193fa2
--- /dev/null
+++ b/include/xen/hvm.h
-@@ -0,0 +1,32 @@
+@@ -0,0 +1,30 @@
+/* Simple wrappers around HVM functions */
+#ifndef XEN_HVM_H__
+#define XEN_HVM_H__
@@ -29385,42 +30630,43 @@
+#include <xen/interface/hvm/params.h>
+#include <asm/xen/hypercall.h>
+
-+static inline unsigned long hvm_get_parameter(int idx)
++static inline int hvm_get_parameter(int idx, uint64_t *value)
+{
-+ struct xen_hvm_param xhv;
-+ int r;
++ struct xen_hvm_param xhv;
++ int r;
+
-+ xhv.domid = DOMID_SELF;
-+ xhv.index = idx;
-+ r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
-+ if (r < 0) {
-+ printk(KERN_ERR "cannot get hvm parameter %d: %d.\n",
-+ idx, r);
-+ return 0;
-+ }
-+ return xhv.value;
++ xhv.domid = DOMID_SELF;
++ xhv.index = idx;
++ r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
++ if (r < 0) {
++ printk(KERN_ERR "Cannot get hvm parameter %d: %d!\n",
++ idx, r);
++ return r;
++ }
++ *value = xhv.value;
++ return r;
+}
+
-+int xen_set_callback_via(uint64_t via);
-+extern int xen_have_vector_callback;
-+
+#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2
+#define HVM_CALLBACK_VIA_TYPE_SHIFT 56
+#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\
-+ HVM_CALLBACK_VIA_TYPE_SHIFT | (x))
++ HVM_CALLBACK_VIA_TYPE_SHIFT | (x))
+
+#endif /* XEN_HVM_H__ */
diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
-index f51b641..8ab08b9 100644
+index f51b641..70d2563 100644
--- a/include/xen/interface/features.h
+++ b/include/xen/interface/features.h
-@@ -41,6 +41,9 @@
+@@ -41,6 +41,12 @@
/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
#define XENFEAT_mmu_pt_update_preserve_ad 5
+/* x86: Does this Xen host support the HVM callback vector type? */
+#define XENFEAT_hvm_callback_vector 8
+
++/* x86: pvclock algorithm is safe to use on HVM */
++#define XENFEAT_hvm_safe_pvclock 9
++
#define XENFEAT_NR_SUBMAPS 1
#endif /* __XEN_PUBLIC_FEATURES_H__ */
@@ -29467,10 +30713,10 @@
/* Map the grant entry for access by I/O devices. */
diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
new file mode 100644
-index 0000000..7c74ba4
+index 0000000..a4827f4
--- /dev/null
+++ b/include/xen/interface/hvm/hvm_op.h
-@@ -0,0 +1,72 @@
+@@ -0,0 +1,46 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
@@ -29494,7 +30740,8 @@
+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
+#define __XEN_PUBLIC_HVM_HVM_OP_H__
+
-+/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */
++/* Get/set subcommands: the second argument of the hypercall is a
++ * pointer to a xen_hvm_param struct. */
+#define HVMOP_set_param 0
+#define HVMOP_get_param 1
+struct xen_hvm_param {
@@ -29504,51 +30751,24 @@
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
+
-+/* Set the logical level of one of a domain's PCI INTx wires. */
-+#define HVMOP_set_pci_intx_level 2
-+struct xen_hvm_set_pci_intx_level {
-+ /* Domain to be updated. */
-+ domid_t domid;
-+ /* PCI INTx identification in PCI topology (domain:bus:device:intx). */
-+ uint8_t domain, bus, device, intx;
-+ /* Assertion level (0 = unasserted, 1 = asserted). */
-+ uint8_t level;
-+};
-+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_intx_level);
-+
-+/* Set the logical level of one of a domain's ISA IRQ wires. */
-+#define HVMOP_set_isa_irq_level 3
-+struct xen_hvm_set_isa_irq_level {
-+ /* Domain to be updated. */
-+ domid_t domid;
-+ /* ISA device identification, by ISA IRQ (0-15). */
-+ uint8_t isa_irq;
-+ /* Assertion level (0 = unasserted, 1 = asserted). */
-+ uint8_t level;
-+};
-+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_isa_irq_level);
-+
-+#define HVMOP_set_pci_link_route 4
-+struct xen_hvm_set_pci_link_route {
-+ /* Domain to be updated. */
++/* Hint from PV drivers for pagetable destruction. */
++#define HVMOP_pagetable_dying 9
++struct xen_hvm_pagetable_dying {
++ /* Domain with a pagetable about to be destroyed. */
+ domid_t domid;
-+ /* PCI link identifier (0-3). */
-+ uint8_t link;
-+ /* ISA IRQ (1-15), or 0 (disable link). */
-+ uint8_t isa_irq;
++ /* guest physical address of the toplevel pagetable dying */
++ aligned_u64 gpa;
+};
-+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_link_route);
-+
-+/* Flushes all VCPU TLBs: @arg must be NULL. */
-+#define HVMOP_flush_tlbs 5
-+
++typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
++DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t);
++
+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h
new file mode 100644
-index 0000000..aa9efd8
+index 0000000..1888d8c
--- /dev/null
+++ b/include/xen/interface/hvm/params.h
-@@ -0,0 +1,112 @@
+@@ -0,0 +1,95 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
@@ -29589,10 +30809,6 @@
+ */
+#define HVM_PARAM_CALLBACK_IRQ 0
+
-+/*
-+ * These are not used by Xen. They are here for convenience of HVM-guest
-+ * xenbus implementations.
-+ */
+#define HVM_PARAM_STORE_PFN 1
+#define HVM_PARAM_STORE_EVTCHN 2
+
@@ -29602,19 +30818,6 @@
+
+#define HVM_PARAM_BUFIOREQ_PFN 6
+
-+#ifdef __ia64__
-+
-+#define HVM_PARAM_NVRAM_FD 7
-+#define HVM_PARAM_VHPT_SIZE 8
-+#define HVM_PARAM_BUFPIOREQ_PFN 9
-+
-+#elif defined(__i386__) || defined(__x86_64__)
-+
-+/* Expose Viridian interfaces to this HVM guest? */
-+#define HVM_PARAM_VIRIDIAN 9
-+
-+#endif
-+
+/*
+ * Set mode for virtual timers (currently x86 only):
+ * delay_for_missed_ticks (default):
@@ -29661,6 +30864,21 @@
+#define HVM_NR_PARAMS 17
+
+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
+diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
+index 518481c..8309344 100644
+--- a/include/xen/interface/io/netif.h
++++ b/include/xen/interface/io/netif.h
+@@ -131,6 +131,10 @@ struct xen_netif_rx_request {
+ #define _NETRXF_extra_info (3)
+ #define NETRXF_extra_info (1U<<_NETRXF_extra_info)
+
++/* GSO Prefix descriptor. */
++#define _NETRXF_gso_prefix (4)
++#define NETRXF_gso_prefix (1U<<_NETRXF_gso_prefix)
++
+ struct xen_netif_rx_response {
+ uint16_t id;
+ uint16_t offset; /* Offset in page of start of received packet */
diff --git a/include/xen/interface/io/pciif.h b/include/xen/interface/io/pciif.h
new file mode 100644
index 0000000..c4177f3
@@ -29792,16 +31010,24 @@
+ * End:
+ */
diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
-index e8cbf43..865dcf0 100644
+index e8cbf43..7b301fa 100644
--- a/include/xen/interface/io/ring.h
+++ b/include/xen/interface/io/ring.h
-@@ -73,7 +73,8 @@ union __name##_sring_entry { \
+@@ -73,7 +73,16 @@ union __name##_sring_entry { \
struct __name##_sring { \
RING_IDX req_prod, req_event; \
RING_IDX rsp_prod, rsp_event; \
- uint8_t pad[48]; \
-+ uint8_t netfront_smartpoll_active; \
-+ uint8_t pad[47]; \
++ union { \
++ struct { \
++ uint8_t smartpoll_active; \
++ } netif; \
++ struct { \
++ uint8_t msg; \
++ } tapif_user; \
++ uint8_t pvt_pad[4]; \
++ } private; \
++ uint8_t pad[44]; \
union __name##_sring_entry ring[1]; /* variable-length */ \
}; \
\
@@ -30420,57 +31646,6 @@
+DEFINE_GUEST_HANDLE_STRUCT(xen_platform_op_t);
+
+#endif /* __XEN_PUBLIC_PLATFORM_H__ */
-diff --git a/include/xen/interface/platform_pci.h b/include/xen/interface/platform_pci.h
-new file mode 100644
-index 0000000..bc230cd
---- /dev/null
-+++ b/include/xen/interface/platform_pci.h
-@@ -0,0 +1,45 @@
-+/******************************************************************************
-+ * platform_pci.h
-+ *
-+ * Interface for granting foreign access to page frames, and receiving
-+ * page-ownership transfers.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this software and associated documentation files (the "Software"), to
-+ * deal in the Software without restriction, including without limitation the
-+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-+ * sell copies of the Software, and to permit persons to whom the Software is
-+ * furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-+ * DEALINGS IN THE SOFTWARE.
-+ */
-+
-+#ifndef __XEN_PUBLIC_PLATFORM_PCI_H__
-+#define __XEN_PUBLIC_PLATFORM_PCI_H__
-+
-+#define XEN_IOPORT_BASE 0x10
-+
-+#define XEN_IOPORT_PLATFLAGS (XEN_IOPORT_BASE + 0) /* 1 byte access (R/W) */
-+#define XEN_IOPORT_MAGIC (XEN_IOPORT_BASE + 0) /* 2 byte access (R) */
-+#define XEN_IOPORT_UNPLUG (XEN_IOPORT_BASE + 0) /* 2 byte access (W) */
-+#define XEN_IOPORT_DRVVER (XEN_IOPORT_BASE + 0) /* 4 byte access (W) */
-+
-+#define XEN_IOPORT_SYSLOG (XEN_IOPORT_BASE + 2) /* 1 byte access (W) */
-+#define XEN_IOPORT_PROTOVER (XEN_IOPORT_BASE + 2) /* 1 byte access (R) */
-+#define XEN_IOPORT_PRODNUM (XEN_IOPORT_BASE + 2) /* 2 byte access (W) */
-+
-+#define UNPLUG_ALL_IDE_DISKS 1
-+#define UNPLUG_ALL_NICS 2
-+#define UNPLUG_AUX_IDE_DISKS 4
-+#define UNPLUG_ALL 7
-+
-+#endif /* __XEN_PUBLIC_PLATFORM_PCI_H__ */
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
new file mode 100644
index 0000000..f31fdab
@@ -31024,55 +32199,57 @@
+#endif
diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
new file mode 100644
-index 0000000..ced434d
+index 0000000..ce9d671
--- /dev/null
+++ b/include/xen/platform_pci.h
-@@ -0,0 +1,47 @@
-+/******************************************************************************
-+ * platform-pci.h
-+ *
-+ * Xen platform PCI device driver
-+ * Copyright (c) 2004, Intel Corporation. <xiaofeng.ling at intel.com>
-+ * Copyright (c) 2007, XenSource Inc.
-+ * Copyright (c) 2010, Citrix
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms and conditions of the GNU General Public License,
-+ * version 2, as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-+ * more details.
-+ *
-+ * You should have received a copy of the GNU General Public License along with
-+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-+ * Place - Suite 330, Boston, MA 02111-1307 USA.
-+ */
-+
+@@ -0,0 +1,49 @@
+#ifndef _XEN_PLATFORM_PCI_H
+#define _XEN_PLATFORM_PCI_H
+
-+#include <linux/version.h>
-+
+#define XEN_IOPORT_MAGIC_VAL 0x49d2
-+#define XEN_IOPORT_LINUX_PRODNUM 0xffff
-+#define XEN_IOPORT_LINUX_DRVVER ((LINUX_VERSION_CODE << 8) + 0x0)
++#define XEN_IOPORT_LINUX_PRODNUM 0x0003
++#define XEN_IOPORT_LINUX_DRVVER 0x0001
++
++#define XEN_IOPORT_BASE 0x10
++
++#define XEN_IOPORT_PLATFLAGS (XEN_IOPORT_BASE + 0) /* 1 byte access (R/W) */
++#define XEN_IOPORT_MAGIC (XEN_IOPORT_BASE + 0) /* 2 byte access (R) */
++#define XEN_IOPORT_UNPLUG (XEN_IOPORT_BASE + 0) /* 2 byte access (W) */
++#define XEN_IOPORT_DRVVER (XEN_IOPORT_BASE + 0) /* 4 byte access (W) */
++
++#define XEN_IOPORT_SYSLOG (XEN_IOPORT_BASE + 2) /* 1 byte access (W) */
++#define XEN_IOPORT_PROTOVER (XEN_IOPORT_BASE + 2) /* 1 byte access (R) */
++#define XEN_IOPORT_PRODNUM (XEN_IOPORT_BASE + 2) /* 2 byte access (W) */
+
-+#ifdef CONFIG_XEN_PLATFORM_PCI
-+unsigned long alloc_xen_mmio(unsigned long len);
-+void platform_pci_resume(void);
-+void platform_pci_disable_irq(void);
-+void platform_pci_enable_irq(void);
++#define XEN_UNPLUG_ALL_IDE_DISKS 1
++#define XEN_UNPLUG_ALL_NICS 2
++#define XEN_UNPLUG_AUX_IDE_DISKS 4
++#define XEN_UNPLUG_ALL 7
++#define XEN_UNPLUG_IGNORE 8
++
++static inline int xen_must_unplug_nics(void) {
++#if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \
++ defined(CONFIG_XEN_NETDEV_FRONTEND_MODULE)) && \
++ (defined(CONFIG_XEN_PLATFORM_PCI) || \
++ defined(CONFIG_XEN_PLATFORM_PCI_MODULE))
++ return 1;
+#else
-+static inline unsigned long alloc_xen_mmio(unsigned long len)
-+{
-+ return ~0UL;
++ return 0;
++#endif
+}
-+static inline void platform_pci_resume(void) {}
-+static inline void platform_pci_disable_irq(void) {}
-+static inline void platform_pci_enable_irq(void) {}
++
++static inline int xen_must_unplug_disks(void) {
++#if (defined(CONFIG_XEN_BLKDEV_FRONTEND) || \
++ defined(CONFIG_XEN_BLKDEV_FRONTEND_MODULE)) && \
++ (defined(CONFIG_XEN_PLATFORM_PCI) || \
++ defined(CONFIG_XEN_PLATFORM_PCI_MODULE))
++ return 1;
++#else
++ return 0;
+#endif
++}
++
++extern int xen_platform_pci_unplug;
+
+#endif /* _XEN_PLATFORM_PCI_H */
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
@@ -31162,15 +32339,21 @@
+
+#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
-index 883a21b..323121a 100644
+index 883a21b..7058f8a 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
-@@ -14,4 +14,17 @@ void xen_mm_unpin_all(void);
+@@ -7,6 +7,7 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
+
+ void xen_pre_suspend(void);
+ void xen_post_suspend(int suspend_cancelled);
++void xen_hvm_post_suspend(int suspend_cancelled);
+
+ void xen_mm_pin_all(void);
+ void xen_mm_unpin_all(void);
+@@ -14,4 +15,16 @@ void xen_mm_unpin_all(void);
void xen_timer_resume(void);
void xen_arch_resume(void);
-+int xen_setup_shutdown_event(void);
-+
+int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long mfn, int nr,
@@ -31181,14 +32364,15 @@
+ unsigned int address_bits);
+
+void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
++int xen_setup_shutdown_event(void);
+
#endif /* INCLUDE_XEN_OPS_H */
diff --git a/include/xen/xen.h b/include/xen/xen.h
new file mode 100644
-index 0000000..a164024
+index 0000000..77604ed
--- /dev/null
+++ b/include/xen/xen.h
-@@ -0,0 +1,32 @@
+@@ -0,0 +1,34 @@
+#ifndef _XEN_XEN_H
+#define _XEN_XEN_H
+
@@ -31200,8 +32384,10 @@
+
+#ifdef CONFIG_XEN
+extern enum xen_domain_type xen_domain_type;
++extern void xen_hvm_guest_init(void);
+#else
+#define xen_domain_type XEN_NATIVE
++#define xen_hvm_guest_init() do { } while (0)
+#endif
+
+#define xen_domain() (xen_domain_type != XEN_NATIVE)
@@ -31222,7 +32408,7 @@
+
+#endif /* _XEN_XEN_H */
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
-index b9763ba..a7d13ff 100644
+index b9763ba..542ca7c 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -93,7 +93,7 @@ struct xenbus_driver {
@@ -31234,14 +32420,6 @@
struct device_driver driver;
int (*read_otherend_details)(struct xenbus_device *dev);
int (*is_ready)(struct xenbus_device *dev);
-@@ -173,6 +173,7 @@ void unregister_xenbus_watch(struct xenbus_watch *watch);
- void xs_suspend(void);
- void xs_resume(void);
- void xs_suspend_cancel(void);
-+int xenbus_probe_init(void);
-
- /* Used by xenbus_dev to borrow kernel's store connection. */
- void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 986519e..cae345b 100644
--- a/kernel/irq/manage.c
@@ -33063,7 +34241,7 @@
{
int aligned;
diff --git a/mm/memory.c b/mm/memory.c
-index 4e59455..b2de7c9 100644
+index 4e59455..17148f0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -553,6 +553,13 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -33137,57 +34315,35 @@
vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
-+#if CONFIG_XEN
++#ifdef CONFIG_XEN
+ vma->vm_mm->context.has_foreign_mappings = 1;
+#endif
+
err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size));
if (err) {
/*
-diff --git a/mm/mmap.c b/mm/mmap.c
-index ae19746..9c39fc2 100644
---- a/mm/mmap.c
-+++ b/mm/mmap.c
-@@ -1785,6 +1785,12 @@ static void unmap_region(struct mm_struct *mm,
- tlb_finish_mmu(tlb, start, end);
- }
+@@ -1886,11 +1925,10 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+ {
+ pgd_t *pgd;
+ unsigned long next;
+- unsigned long start = addr, end = addr + size;
++ unsigned long end = addr + size;
+ int err;
-+static inline void unmap_vma(struct vm_area_struct *vma)
-+{
-+ if (unlikely(vma->vm_ops && vma->vm_ops->unmap))
-+ vma->vm_ops->unmap(vma);
-+}
-+
- /*
- * Create a list of vma's touched by the unmap, removing them from the mm's
- * vma list as we go..
-@@ -1800,6 +1806,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
- insertion_point = (prev ? &prev->vm_next : &mm->mmap);
+ BUG_ON(addr >= end);
+- mmu_notifier_invalidate_range_start(mm, start, end);
+ pgd = pgd_offset(mm, addr);
do {
- rb_erase(&vma->vm_rb, &mm->mm_rb);
-+ unmap_vma(vma);
- mm->map_count--;
- tail_vma = vma;
- vma = vma->vm_next;
-@@ -2076,7 +2083,7 @@ EXPORT_SYMBOL(do_brk);
- void exit_mmap(struct mm_struct *mm)
- {
- struct mmu_gather *tlb;
-- struct vm_area_struct *vma;
-+ struct vm_area_struct *vma, *vma_tmp;
- unsigned long nr_accounted = 0;
- unsigned long end;
-
-@@ -2098,6 +2105,9 @@ void exit_mmap(struct mm_struct *mm)
- if (!vma) /* Can happen if dup_mmap() received an OOM */
- return;
-
-+ for (vma_tmp = mm->mmap; vma_tmp; vma_tmp = vma_tmp->vm_next)
-+ unmap_vma(vma_tmp);
+ next = pgd_addr_end(addr, end);
+@@ -1898,7 +1936,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+ if (err)
+ break;
+ } while (pgd++, addr = next, addr != end);
+- mmu_notifier_invalidate_range_end(mm, start, end);
+
- lru_add_drain();
- flush_cache_mm(mm);
- tlb = tlb_gather_mmu(mm, 1);
+ return err;
+ }
+ EXPORT_SYMBOL_GPL(apply_to_page_range);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 36992b6..bc1b6e9 100644
--- a/mm/page_alloc.c
@@ -33253,3 +34409,202 @@
if (nr || force_flush)
flush_tlb_kernel_range(*start, *end);
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index d4fd895..4ab8c97 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -35,6 +35,7 @@
+ #include <linux/security.h>
+ #include <linux/mutex.h>
+ #include <linux/if_addr.h>
++#include <linux/pci.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+@@ -582,6 +583,22 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
+ a->tx_compressed = b->tx_compressed;
+ };
+
++/* All VF info */
++static inline int rtnl_vfinfo_size(const struct net_device *dev)
++{
++ if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
++
++ int num_vfs = dev_num_vf(dev->dev.parent);
++ size_t size = nlmsg_total_size(sizeof(struct nlattr));
++ size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
++ size += num_vfs * (sizeof(struct ifla_vf_mac) +
++ sizeof(struct ifla_vf_vlan) +
++ sizeof(struct ifla_vf_tx_rate));
++ return size;
++ } else
++ return 0;
++}
++
+ static inline size_t if_nlmsg_size(const struct net_device *dev)
+ {
+ return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+@@ -599,6 +616,8 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
+ + nla_total_size(4) /* IFLA_MASTER */
+ + nla_total_size(1) /* IFLA_OPERSTATE */
+ + nla_total_size(1) /* IFLA_LINKMODE */
++ + nla_total_size(4) /* IFLA_NUM_VF */
++ + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+ + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
+ }
+
+@@ -667,6 +686,40 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+ stats = dev_get_stats(dev);
+ copy_rtnl_link_stats(nla_data(attr), stats);
+
++ if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
++ int i;
++
++ struct nlattr *vfinfo, *vf;
++ int num_vfs = dev_num_vf(dev->dev.parent);
++
++ NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs);
++ vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
++ if (!vfinfo)
++ goto nla_put_failure;
++ for (i = 0; i < num_vfs; i++) {
++ struct ifla_vf_info ivi;
++ struct ifla_vf_mac vf_mac;
++ struct ifla_vf_vlan vf_vlan;
++ struct ifla_vf_tx_rate vf_tx_rate;
++ if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
++ break;
++ vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
++ memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
++ vf_vlan.vlan = ivi.vlan;
++ vf_vlan.qos = ivi.qos;
++ vf_tx_rate.rate = ivi.tx_rate;
++ vf = nla_nest_start(skb, IFLA_VF_INFO);
++ if (!vf) {
++ nla_nest_cancel(skb, vfinfo);
++ goto nla_put_failure;
++ }
++ NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
++ NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
++ NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
++ nla_nest_end(skb, vf);
++ }
++ nla_nest_end(skb, vfinfo);
++ }
+ if (dev->rtnl_link_ops) {
+ if (rtnl_link_fill(skb, dev) < 0)
+ goto nla_put_failure;
+@@ -716,6 +769,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
+ [IFLA_LINKINFO] = { .type = NLA_NESTED },
+ [IFLA_NET_NS_PID] = { .type = NLA_U32 },
+ [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
++ [IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
+ };
+
+ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
+@@ -723,6 +777,33 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
+ [IFLA_INFO_DATA] = { .type = NLA_NESTED },
+ };
+
++static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
++ [IFLA_VF_INFO] = { .type = NLA_NESTED },
++};
++
++static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
++ [IFLA_VF_MAC] = { .type = NLA_BINARY,
++ .len = sizeof(struct ifla_vf_mac) },
++ [IFLA_VF_VLAN] = { .type = NLA_BINARY,
++ .len = sizeof(struct ifla_vf_vlan) },
++ [IFLA_VF_TX_RATE] = { .type = NLA_BINARY,
++ .len = sizeof(struct ifla_vf_tx_rate) },
++};
++
++struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
++{
++ struct net *net;
++ /* Examine the link attributes and figure out which
++ * network namespace we are talking about.
++ */
++ if (tb[IFLA_NET_NS_PID])
++ net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
++ else
++ net = get_net(src_net);
++ return net;
++}
++EXPORT_SYMBOL(rtnl_link_get_net);
++
+ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
+ {
+ if (dev) {
+@@ -738,6 +819,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
+ return 0;
+ }
+
++static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
++{
++ int rem, err = -EINVAL;
++ struct nlattr *vf;
++ const struct net_device_ops *ops = dev->netdev_ops;
++
++ nla_for_each_nested(vf, attr, rem) {
++ switch (nla_type(vf)) {
++ case IFLA_VF_MAC: {
++ struct ifla_vf_mac *ivm;
++ ivm = nla_data(vf);
++ err = -EOPNOTSUPP;
++ if (ops->ndo_set_vf_mac)
++ err = ops->ndo_set_vf_mac(dev, ivm->vf,
++ ivm->mac);
++ break;
++ }
++ case IFLA_VF_VLAN: {
++ struct ifla_vf_vlan *ivv;
++ ivv = nla_data(vf);
++ err = -EOPNOTSUPP;
++ if (ops->ndo_set_vf_vlan)
++ err = ops->ndo_set_vf_vlan(dev, ivv->vf,
++ ivv->vlan,
++ ivv->qos);
++ break;
++ }
++ case IFLA_VF_TX_RATE: {
++ struct ifla_vf_tx_rate *ivt;
++ ivt = nla_data(vf);
++ err = -EOPNOTSUPP;
++ if (ops->ndo_set_vf_tx_rate)
++ err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
++ ivt->rate);
++ break;
++ }
++ default:
++ err = -EINVAL;
++ break;
++ }
++ if (err)
++ break;
++ }
++ return err;
++}
++
+ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
+ struct nlattr **tb, char *ifname, int modified)
+ {
+@@ -875,6 +1002,18 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
+ write_unlock_bh(&dev_base_lock);
+ }
+
++ if (tb[IFLA_VFINFO_LIST]) {
++ struct nlattr *attr;
++ int rem;
++ nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
++ if (nla_type(attr) != IFLA_VF_INFO)
++ goto errout;
++ err = do_setvfinfo(dev, attr);
++ if (err < 0)
++ goto errout;
++ modified = 1;
++ }
++ }
+ err = 0;
+
+ errout:
More information about the Kernel-svn-changes
mailing list