[Pkg-xen-changes] [xen] 01/36: Import xen_4.4.1.orig.tar.xz

Bastian Blank waldi at moszumanska.debian.org
Sun Sep 7 09:27:00 UTC 2014


This is an automated email from the git hooks/post-receive script.

waldi pushed a commit to branch develop
in repository xen.

commit 3387be132d526263f246c24d3bbc94767a4eba76
Author: Bastian Blank <waldi at debian.org>
Date:   Sun Sep 7 10:59:57 2014 +0200

    Import xen_4.4.1.orig.tar.xz
---
 Config.mk                                    |   8 +-
 MAINTAINERS                                  |  11 +-
 config.guess                                 | 258 ++++++++++---------
 config.sub                                   | 218 ++++++++++------
 docs/misc/xen-command-line.markdown          |   8 +
 m4/ax_compare_version.m4                     | 179 +++++++++++++
 stubdom/grub/mini-os.c                       |   3 +
 tools/blktap2/drivers/block-qcow.c           |   2 +
 tools/configure                              | 174 +++++++++++++
 tools/configure.ac                           |   7 +
 tools/include/Makefile                       |   9 +-
 tools/libxc/xc_dom.h                         |   8 +
 tools/libxc/xc_dom_arm.c                     |  86 ++++++-
 tools/libxc/xc_dom_armzimageloader.c         |  42 +--
 tools/libxc/xc_dom_core.c                    |  13 +-
 tools/libxc/xc_private.c                     |   6 +-
 tools/libxl/libxl.c                          |   3 +-
 tools/libxl/libxl_arch.h                     |  11 +-
 tools/libxl/libxl_arm.c                      |  72 +++++-
 tools/libxl/libxl_device.c                   |   3 +-
 tools/libxl/libxl_dom.c                      |   8 +-
 tools/libxl/libxl_fork.c                     |   2 +-
 tools/libxl/libxl_json.c                     |   1 -
 tools/libxl/libxl_numa.c                     |   5 +-
 tools/libxl/libxl_save_callout.c             |   6 +-
 tools/libxl/libxl_utils.c                    |   5 +-
 tools/libxl/libxl_x86.c                      |  13 +-
 tools/pygrub/examples/rhel-7-beta.grub2      | 118 +++++++++
 tools/pygrub/src/GrubConf.py                 |   4 +-
 tools/pygrub/src/pygrub                      |   4 +-
 tools/tests/x86_emulator/test_x86_emulator.c | 140 +++++-----
 xen/Makefile                                 |   2 +-
 xen/arch/arm/Rules.mk                        |   9 +
 xen/arch/arm/arm32/head.S                    |  13 +-
 xen/arch/arm/arm64/head.S                    |  13 +-
 xen/arch/arm/domain.c                        |  25 +-
 xen/arch/arm/domain_build.c                  |  23 +-
 xen/arch/arm/guestcopy.c                     |  26 +-
 xen/arch/arm/kernel.c                        |   2 +-
 xen/arch/arm/p2m.c                           |  56 ++++
 xen/arch/arm/psci.c                          |  12 +-
 xen/arch/arm/time.c                          |   1 -
 xen/arch/arm/traps.c                         | 366 ++++++++++++++++++++++++---
 xen/arch/arm/vgic.c                          |  11 +-
 xen/arch/x86/acpi/boot.c                     |   2 +
 xen/arch/x86/acpi/cpu_idle.c                 |   6 +
 xen/arch/x86/cpu/amd.c                       |   5 +-
 xen/arch/x86/cpu/common.c                    |   5 +-
 xen/arch/x86/cpu/intel.c                     |   7 +
 xen/arch/x86/cpu/mcheck/mce.c                |   6 +-
 xen/arch/x86/cpu/mcheck/mce_intel.c          |  19 +-
 xen/arch/x86/cpu/mcheck/mctelem.c            |  81 +++---
 xen/arch/x86/cpu/mcheck/vmce.c               |   4 +
 xen/arch/x86/cpu/mwait-idle.c                |   4 +-
 xen/arch/x86/domain.c                        |  38 ++-
 xen/arch/x86/domctl.c                        |  82 +++---
 xen/arch/x86/efi/runtime.c                   |   9 +
 xen/arch/x86/flushtlb.c                      |   3 +-
 xen/arch/x86/hvm/hvm.c                       |  60 ++++-
 xen/arch/x86/hvm/i8254.c                     |   3 +-
 xen/arch/x86/hvm/io.c                        |   2 +
 xen/arch/x86/hvm/irq.c                       |  10 +-
 xen/arch/x86/hvm/mtrr.c                      |  85 +++++--
 xen/arch/x86/hvm/rtc.c                       |  77 ++++--
 xen/arch/x86/hvm/svm/svm.c                   |  10 +-
 xen/arch/x86/hvm/vmx/vmcs.c                  |   8 +-
 xen/arch/x86/hvm/vmx/vmx.c                   |  22 +-
 xen/arch/x86/hvm/vmx/vpmu_core2.c            |   4 +
 xen/arch/x86/hvm/vpt.c                       |  40 ---
 xen/arch/x86/i387.c                          |  17 +-
 xen/arch/x86/irq.c                           |   4 +-
 xen/arch/x86/microcode_amd.c                 |   2 +-
 xen/arch/x86/mm.c                            |   4 +-
 xen/arch/x86/mm/hap/hap.c                    |   9 +-
 xen/arch/x86/mm/hap/nested_hap.c             |   2 +-
 xen/arch/x86/mm/mem_event.c                  |  32 +++
 xen/arch/x86/mm/mem_sharing.c                |  17 +-
 xen/arch/x86/mm/p2m-pod.c                    |   6 +-
 xen/arch/x86/mm/p2m.c                        |  47 +++-
 xen/arch/x86/mm/shadow/common.c              |   8 +-
 xen/arch/x86/msi.c                           |   2 +-
 xen/arch/x86/nmi.c                           |  14 +-
 xen/arch/x86/time.c                          |  16 +-
 xen/arch/x86/traps.c                         |  14 +-
 xen/arch/x86/x86_64/compat/traps.c           |  14 +-
 xen/arch/x86/x86_emulate/x86_emulate.c       |  33 ++-
 xen/common/compat/multicall.c                |   2 +-
 xen/common/cpupool.c                         |  18 +-
 xen/common/device_tree.c                     |   9 +-
 xen/common/domain.c                          |  67 +++--
 xen/common/domctl.c                          |  18 +-
 xen/common/event_channel.c                   |   4 +-
 xen/common/kexec.c                           |   4 +-
 xen/common/kimage.c                          |   1 +
 xen/common/lz4/decompress.c                  |   8 +
 xen/common/memory.c                          |   9 +-
 xen/common/multicall.c                       |   6 +-
 xen/common/page_alloc.c                      |   5 +
 xen/common/schedule.c                        |   3 +-
 xen/common/timer.c                           |   3 +-
 xen/common/tmem.c                            |   2 -
 xen/common/trace.c                           |   2 +-
 xen/common/xmalloc_tlsf.c                    |   5 +-
 xen/drivers/acpi/apei/Makefile               |   1 +
 xen/drivers/acpi/apei/erst.c                 |  12 +-
 xen/drivers/acpi/apei/hest.c                 | 200 +++++++++++++++
 xen/drivers/acpi/tables.c                    |   6 +
 xen/drivers/char/console.c                   |   4 +-
 xen/drivers/passthrough/amd/iommu_map.c      |   5 +-
 xen/drivers/passthrough/amd/pci_amd_iommu.c  |   2 +-
 xen/drivers/passthrough/ats.h                |   3 +-
 xen/drivers/passthrough/iommu.c              |  10 +-
 xen/drivers/passthrough/pci.c                | 228 +++++++++++++++++
 xen/drivers/passthrough/vtd/dmar.c           |  10 +
 xen/drivers/passthrough/vtd/extern.h         |   2 +-
 xen/drivers/passthrough/vtd/iommu.c          | 110 +++++---
 xen/drivers/passthrough/vtd/qinval.c         |   2 +-
 xen/drivers/passthrough/vtd/quirks.c         | 124 ++++++++-
 xen/drivers/passthrough/vtd/x86/ats.c        |  37 ++-
 xen/drivers/passthrough/x86/ats.c            |   3 +-
 xen/drivers/pci/pci.c                        |  29 ++-
 xen/include/acpi/actbl1.h                    |   8 +
 xen/include/acpi/apei.h                      |   3 +
 xen/include/asm-arm/arm32/page.h             |   7 +-
 xen/include/asm-arm/arm64/page.h             |   7 +-
 xen/include/asm-arm/arm64/vfp.h              |   5 +-
 xen/include/asm-arm/bitops.h                 |  37 +--
 xen/include/asm-arm/cpregs.h                 |  37 ++-
 xen/include/asm-arm/domain.h                 |   3 +
 xen/include/asm-arm/mm.h                     |   7 +-
 xen/include/asm-arm/p2m.h                    |   4 +
 xen/include/asm-arm/page.h                   |   4 +
 xen/include/asm-arm/processor.h              |  39 ++-
 xen/include/asm-arm/sysregs.h                |  44 ++++
 xen/include/asm-x86/cpufeature.h             |   1 +
 xen/include/asm-x86/hvm/hvm.h                |  31 ++-
 xen/include/asm-x86/hvm/vmx/vmcs.h           |   2 +
 xen/include/asm-x86/hvm/vpt.h                |   6 +-
 xen/include/asm-x86/i387.h                   |   1 +
 xen/include/asm-x86/mem_event.h              |   3 +
 xen/include/asm-x86/p2m.h                    |   4 +-
 xen/include/public/arch-arm.h                |   3 +-
 xen/include/public/xen.h                     |  10 +-
 xen/include/xen/acpi.h                       |   1 +
 xen/include/xen/hvm/iommu.h                  |   6 -
 xen/include/xen/pci.h                        |   3 +
 xen/include/xen/pci_ids.h                    |   9 +
 xen/include/xen/pci_regs.h                   |  11 +-
 xen/include/xen/prefetch.h                   |  13 +-
 xen/include/xen/sched.h                      |  28 +-
 xen/include/xen/trace.h                      |   2 +-
 151 files changed, 3172 insertions(+), 940 deletions(-)

diff --git a/Config.mk b/Config.mk
index eb2c192..c44853f 100644
--- a/Config.mk
+++ b/Config.mk
@@ -234,7 +234,7 @@ QEMU_UPSTREAM_URL ?= git://xenbits.xen.org/qemu-upstream-4.4-testing.git
 SEABIOS_UPSTREAM_URL ?= git://xenbits.xen.org/seabios.git
 endif
 OVMF_UPSTREAM_REVISION ?= 447d264115c476142f884af0be287622cd244423
-QEMU_UPSTREAM_REVISION ?= qemu-xen-4.4.0
+QEMU_UPSTREAM_REVISION ?= qemu-xen-4.4.1
 SEABIOS_UPSTREAM_TAG ?= rel-1.7.3.1
 # Fri Aug 2 14:12:09 2013 -0400
 # Fix bug in CBFS file walking with compressed files.
@@ -246,9 +246,9 @@ ETHERBOOT_NICS ?= rtl8139 8086100e
 # CONFIG_QEMU ?= `pwd`/$(XEN_ROOT)/../qemu-xen.git
 CONFIG_QEMU ?= $(QEMU_REMOTE)
 
-QEMU_TAG ?= xen-4.4.0
-# Wed Dec 18 15:25:14 2013 +0000
-# qemu-traditional: Fix build warnings on Wheezy
+QEMU_TAG ?= xen-4.4.1
+# Tue Apr 8 16:50:06 2014 +0000
+# qemu-xen-trad: free all the pirqs for msi/msix when driver unloads
 
 # Short answer -- do not enable this unless you know what you are
 # doing and are prepared for some pain.
diff --git a/MAINTAINERS b/MAINTAINERS
index 7757cdd..902c077 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -49,9 +49,14 @@ for inclusion in xen-unstable.
 Please see http://wiki.xen.org/wiki/Xen_Maintenance_Releases for more
 information.
 
-Remember to copy the appropriate stable branch maintainer who will be
-listed in this section of the MAINTAINERS file in the appropriate
-branch.
+Remember to copy the stable branch maintainer. The maintainer for this
+branch is:
+
+	Jan Beulich <jbeulich at suse.com>
+
+Tools backport requests should also be copied to:
+
+	Ian Jackson <Ian.Jackson at eu.citrix.com>
 
 	Unstable Subsystem Maintainers
 	==============================
diff --git a/config.guess b/config.guess
index c2246a4..d622a44 100755
--- a/config.guess
+++ b/config.guess
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
-#   Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2009-12-30'
+timestamp='2012-02-10'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -17,9 +17,7 @@ timestamp='2009-12-30'
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
@@ -57,8 +55,8 @@ GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
-Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -145,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     *:NetBSD:*:*)
 	# NetBSD (nbsd) targets should (where applicable) match one or
-	# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
 	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
 	# switched to ELF, *-*-netbsd* would select the old
 	# object file format.  This provides both forward
@@ -181,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		fi
 		;;
 	    *)
-	        os=netbsd
+		os=netbsd
 		;;
 	esac
 	# The OS release
@@ -224,7 +222,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
 		;;
 	*5.*)
-	        UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
 		;;
 	esac
 	# According to Compaq, /usr/sbin/psrinfo has been available on
@@ -270,7 +268,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
 	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-	exit ;;
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	exitcode=$?
+	trap '' 0
+	exit $exitcode ;;
     Alpha\ *:Windows_NT*:*)
 	# How do we know it's Interix rather than the generic POSIX subsystem?
 	# Should we change UNAME_MACHINE based on the output of uname instead
@@ -296,7 +297,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	echo s390-ibm-zvmoe
 	exit ;;
     *:OS400:*:*)
-        echo powerpc-ibm-os400
+	echo powerpc-ibm-os400
 	exit ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
 	echo arm-acorn-riscix${UNAME_RELEASE}
@@ -395,23 +396,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     # MiNT.  But MiNT is downward compatible to TOS, so this should
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint${UNAME_RELEASE}
 	exit ;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
 	echo m68k-atari-mint${UNAME_RELEASE}
-        exit ;;
+	exit ;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint${UNAME_RELEASE}
 	exit ;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-        echo m68k-milan-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-milan-mint${UNAME_RELEASE}
+	exit ;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-        echo m68k-hades-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-hades-mint${UNAME_RELEASE}
+	exit ;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-        echo m68k-unknown-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-unknown-mint${UNAME_RELEASE}
+	exit ;;
     m68k:machten:*:*)
 	echo m68k-apple-machten${UNAME_RELEASE}
 	exit ;;
@@ -481,8 +482,8 @@ EOF
 	echo m88k-motorola-sysv3
 	exit ;;
     AViiON:dgux:*:*)
-        # DG/UX returns AViiON for all architectures
-        UNAME_PROCESSOR=`/usr/bin/uname -p`
+	# DG/UX returns AViiON for all architectures
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
 	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
 	then
 	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
@@ -495,7 +496,7 @@ EOF
 	else
 	    echo i586-dg-dgux${UNAME_RELEASE}
 	fi
- 	exit ;;
+	exit ;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
 	echo m88k-dolphin-sysv3
 	exit ;;
@@ -552,7 +553,7 @@ EOF
 		echo rs6000-ibm-aix3.2
 	fi
 	exit ;;
-    *:AIX:*:[456])
+    *:AIX:*:[4567])
 	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
 	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
@@ -595,52 +596,52 @@ EOF
 	    9000/[678][0-9][0-9])
 		if [ -x /usr/bin/getconf ]; then
 		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
-                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
-                    case "${sc_cpu_version}" in
-                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
-                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
-                      532)                      # CPU_PA_RISC2_0
-                        case "${sc_kernel_bits}" in
-                          32) HP_ARCH="hppa2.0n" ;;
-                          64) HP_ARCH="hppa2.0w" ;;
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case "${sc_cpu_version}" in
+		      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+		      532)                      # CPU_PA_RISC2_0
+			case "${sc_kernel_bits}" in
+			  32) HP_ARCH="hppa2.0n" ;;
+			  64) HP_ARCH="hppa2.0w" ;;
 			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
-                        esac ;;
-                    esac
+			esac ;;
+		    esac
 		fi
 		if [ "${HP_ARCH}" = "" ]; then
 		    eval $set_cc_for_build
-		    sed 's/^              //' << EOF >$dummy.c
+		    sed 's/^		//' << EOF >$dummy.c
 
-              #define _HPUX_SOURCE
-              #include <stdlib.h>
-              #include <unistd.h>
+		#define _HPUX_SOURCE
+		#include <stdlib.h>
+		#include <unistd.h>
 
-              int main ()
-              {
-              #if defined(_SC_KERNEL_BITS)
-                  long bits = sysconf(_SC_KERNEL_BITS);
-              #endif
-                  long cpu  = sysconf (_SC_CPU_VERSION);
+		int main ()
+		{
+		#if defined(_SC_KERNEL_BITS)
+		    long bits = sysconf(_SC_KERNEL_BITS);
+		#endif
+		    long cpu  = sysconf (_SC_CPU_VERSION);
 
-                  switch (cpu)
-              	{
-              	case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
-              	case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
-              	case CPU_PA_RISC2_0:
-              #if defined(_SC_KERNEL_BITS)
-              	    switch (bits)
-              		{
-              		case 64: puts ("hppa2.0w"); break;
-              		case 32: puts ("hppa2.0n"); break;
-              		default: puts ("hppa2.0"); break;
-              		} break;
-              #else  /* !defined(_SC_KERNEL_BITS) */
-              	    puts ("hppa2.0"); break;
-              #endif
-              	default: puts ("hppa1.0"); break;
-              	}
-                  exit (0);
-              }
+		    switch (cpu)
+			{
+			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+			case CPU_PA_RISC2_0:
+		#if defined(_SC_KERNEL_BITS)
+			    switch (bits)
+				{
+				case 64: puts ("hppa2.0w"); break;
+				case 32: puts ("hppa2.0n"); break;
+				default: puts ("hppa2.0"); break;
+				} break;
+		#else  /* !defined(_SC_KERNEL_BITS) */
+			    puts ("hppa2.0"); break;
+		#endif
+			default: puts ("hppa1.0"); break;
+			}
+		    exit (0);
+		}
 EOF
 		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
@@ -731,22 +732,22 @@ EOF
 	exit ;;
     C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
 	echo c1-convex-bsd
-        exit ;;
+	exit ;;
     C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
 	else echo c2-convex-bsd
 	fi
-        exit ;;
+	exit ;;
     C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
 	echo c34-convex-bsd
-        exit ;;
+	exit ;;
     C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
 	echo c38-convex-bsd
-        exit ;;
+	exit ;;
     C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
 	echo c4-convex-bsd
-        exit ;;
+	exit ;;
     CRAY*Y-MP:*:*:*)
 	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
@@ -770,14 +771,14 @@ EOF
 	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
 	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
-        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-        exit ;;
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
     5000:UNIX_System_V:4.*:*)
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
-        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
 	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
@@ -789,13 +790,12 @@ EOF
 	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
 	exit ;;
     *:FreeBSD:*:*)
-	case ${UNAME_MACHINE} in
-	    pc98)
-		echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	case ${UNAME_PROCESSOR} in
 	    amd64)
 		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	    *)
-		echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+		echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	esac
 	exit ;;
     i*:CYGWIN*:*)
@@ -804,15 +804,18 @@ EOF
     *:MINGW*:*)
 	echo ${UNAME_MACHINE}-pc-mingw32
 	exit ;;
+    i*:MSYS*:*)
+	echo ${UNAME_MACHINE}-pc-msys
+	exit ;;
     i*:windows32*:*)
-    	# uname -m includes "-pc" on this system.
-    	echo ${UNAME_MACHINE}-mingw32
+	# uname -m includes "-pc" on this system.
+	echo ${UNAME_MACHINE}-mingw32
 	exit ;;
     i*:PW*:*)
 	echo ${UNAME_MACHINE}-pc-pw32
 	exit ;;
     *:Interix*:*)
-    	case ${UNAME_MACHINE} in
+	case ${UNAME_MACHINE} in
 	    x86)
 		echo i586-pc-interix${UNAME_RELEASE}
 		exit ;;
@@ -858,6 +861,13 @@ EOF
     i*86:Minix:*:*)
 	echo ${UNAME_MACHINE}-pc-minix
 	exit ;;
+    aarch64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    aarch64_be:Linux:*:*)
+	UNAME_MACHINE=aarch64_be
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
     alpha:Linux:*:*)
 	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
 	  EV5)   UNAME_MACHINE=alphaev5 ;;
@@ -867,7 +877,7 @@ EOF
 	  EV6)   UNAME_MACHINE=alphaev6 ;;
 	  EV67)  UNAME_MACHINE=alphaev67 ;;
 	  EV68*) UNAME_MACHINE=alphaev68 ;;
-        esac
+	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
 	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
 	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
@@ -879,20 +889,29 @@ EOF
 	then
 	    echo ${UNAME_MACHINE}-unknown-linux-gnu
 	else
-	    echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+		| grep -q __ARM_PCS_VFP
+	    then
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	    else
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
+	    fi
 	fi
 	exit ;;
     avr32*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     cris:Linux:*:*)
-	echo cris-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-gnu
 	exit ;;
     crisv32:Linux:*:*)
-	echo crisv32-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-gnu
 	exit ;;
     frv:Linux:*:*)
-    	echo frv-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    hexagon:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     i*86:Linux:*:*)
 	LIBC=gnu
@@ -934,7 +953,7 @@ EOF
 	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
 	;;
     or32:Linux:*:*)
-	echo or32-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     padre:Linux:*:*)
 	echo sparc-unknown-linux-gnu
@@ -960,7 +979,7 @@ EOF
 	echo ${UNAME_MACHINE}-ibm-linux
 	exit ;;
     sh64*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     sh*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -968,14 +987,17 @@ EOF
     sparc:Linux:*:* | sparc64:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
+    tile*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
     vax:Linux:*:*)
 	echo ${UNAME_MACHINE}-dec-linux-gnu
 	exit ;;
     x86_64:Linux:*:*)
-	echo x86_64-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     xtensa*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
@@ -984,11 +1006,11 @@ EOF
 	echo i386-sequent-sysv4
 	exit ;;
     i*86:UNIX_SV:4.2MP:2.*)
-        # Unixware is an offshoot of SVR4, but it has its own version
-        # number series starting with 2...
-        # I am not positive that other SVR4 systems won't match this,
+	# Unixware is an offshoot of SVR4, but it has its own version
+	# number series starting with 2...
+	# I am not positive that other SVR4 systems won't match this,
 	# I just have to hope.  -- rms.
-        # Use sysv4.2uw... so that sysv4* matches it.
+	# Use sysv4.2uw... so that sysv4* matches it.
 	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
 	exit ;;
     i*86:OS/2:*:*)
@@ -1020,7 +1042,7 @@ EOF
 	fi
 	exit ;;
     i*86:*:5:[678]*)
-    	# UnixWare 7.x, OpenUNIX and OpenServer 6.
+	# UnixWare 7.x, OpenUNIX and OpenServer 6.
 	case `/bin/uname -X | grep "^Machine"` in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
@@ -1048,13 +1070,13 @@ EOF
 	exit ;;
     pc:*:*:*)
 	# Left here for compatibility:
-        # uname -m prints for DJGPP always 'pc', but it prints nothing about
-        # the processor, so we play safe by assuming i586.
+	# uname -m prints for DJGPP always 'pc', but it prints nothing about
+	# the processor, so we play safe by assuming i586.
 	# Note: whatever this is, it MUST be the same as what config.sub
 	# prints for the "djgpp" host, or else GDB configury will decide that
 	# this is a cross-build.
 	echo i586-pc-msdosdjgpp
-        exit ;;
+	exit ;;
     Intel:Mach:3*:*)
 	echo i386-pc-mach3
 	exit ;;
@@ -1089,8 +1111,8 @@ EOF
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
 	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
     3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
-        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-          && { echo i486-ncr-sysv4; exit; } ;;
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4; exit; } ;;
     NCR*:*:4.2:* | MPRAS*:*:4.2:*)
 	OS_REL='.3'
 	test -r /etc/.relid \
@@ -1133,10 +1155,10 @@ EOF
 		echo ns32k-sni-sysv
 	fi
 	exit ;;
-    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
-                      # says <Richard.M.Bartel at ccMail.Census.GOV>
-        echo i586-unisys-sysv4
-        exit ;;
+    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+			# says <Richard.M.Bartel at ccMail.Census.GOV>
+	echo i586-unisys-sysv4
+	exit ;;
     *:UNIX_System_V:4*:FTX*)
 	# From Gerald Hewes <hewes at openmarket.com>.
 	# How about differentiating between stratus architectures? -djm
@@ -1162,11 +1184,11 @@ EOF
 	exit ;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
 	if [ -d /usr/nec ]; then
-	        echo mips-nec-sysv${UNAME_RELEASE}
+		echo mips-nec-sysv${UNAME_RELEASE}
 	else
-	        echo mips-unknown-sysv${UNAME_RELEASE}
+		echo mips-unknown-sysv${UNAME_RELEASE}
 	fi
-        exit ;;
+	exit ;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
 	echo powerpc-be-beos
 	exit ;;
@@ -1231,6 +1253,9 @@ EOF
     *:QNX:*:4*)
 	echo i386-pc-qnx
 	exit ;;
+    NEO-?:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk${UNAME_RELEASE}
+	exit ;;
     NSE-?:NONSTOP_KERNEL:*:*)
 	echo nse-tandem-nsk${UNAME_RELEASE}
 	exit ;;
@@ -1276,13 +1301,13 @@ EOF
 	echo pdp10-unknown-its
 	exit ;;
     SEI:*:*:SEIUX)
-        echo mips-sei-seiux${UNAME_RELEASE}
+	echo mips-sei-seiux${UNAME_RELEASE}
 	exit ;;
     *:DragonFly:*:*)
 	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
 	exit ;;
     *:*VMS:*:*)
-    	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
 	case "${UNAME_MACHINE}" in
 	    A*) echo alpha-dec-vms ; exit ;;
 	    I*) echo ia64-dec-vms ; exit ;;
@@ -1300,6 +1325,9 @@ EOF
     i*86:AROS:*:*)
 	echo ${UNAME_MACHINE}-pc-aros
 	exit ;;
+    x86_64:VMkernel:*:*)
+	echo ${UNAME_MACHINE}-unknown-esx
+	exit ;;
 esac
 
 #echo '(No uname command or uname output not recognized.)' 1>&2
@@ -1322,11 +1350,11 @@ main ()
 #include <sys/param.h>
   printf ("m68k-sony-newsos%s\n",
 #ifdef NEWSOS4
-          "4"
+	"4"
 #else
-	  ""
+	""
 #endif
-         ); exit (0);
+	); exit (0);
 #endif
 #endif
 
diff --git a/config.sub b/config.sub
index c2d1257..6205f84 100755
--- a/config.sub
+++ b/config.sub
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Configuration validation subroutine script.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
-#   Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2010-01-22'
+timestamp='2012-04-18'
 
 # This file is (in principle) common to ALL GNU software.
 # The presence of a machine in this file suggests that SOME GNU software
@@ -21,9 +21,7 @@ timestamp='2010-01-22'
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
@@ -76,8 +74,8 @@ version="\
 GNU config.sub ($timestamp)
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
-Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -124,13 +122,18 @@ esac
 # Here we must recognize all the valid KERNEL-OS combinations.
 maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
 case $maybe_os in
-  nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
-  uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
+  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
+  linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+  knetbsd*-gnu* | netbsd*-gnu* | \
   kopensolaris*-gnu* | \
   storm-chaos* | os2-emx* | rtmk-nova*)
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
     ;;
+  android-linux)
+    os=-linux-android
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+    ;;
   *)
     basic_machine=`echo $1 | sed 's/-[^-]*$//'`
     if [ $basic_machine != $1 ]
@@ -157,8 +160,8 @@ case $os in
 		os=
 		basic_machine=$1
 		;;
-        -bluegene*)
-	        os=-cnk
+	-bluegene*)
+		os=-cnk
 		;;
 	-sim | -cisco | -oki | -wec | -winbond)
 		os=
@@ -174,10 +177,10 @@ case $os in
 		os=-chorusos
 		basic_machine=$1
 		;;
- 	-chorusrdb)
- 		os=-chorusrdb
+	-chorusrdb)
+		os=-chorusrdb
 		basic_machine=$1
- 		;;
+		;;
 	-hiux*)
 		os=-hiuxwe2
 		;;
@@ -222,6 +225,12 @@ case $os in
 	-isc*)
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
 		;;
+	-lynx*178)
+		os=-lynxos178
+		;;
+	-lynx*5)
+		os=-lynxos5
+		;;
 	-lynx*)
 		os=-lynxos
 		;;
@@ -246,17 +255,22 @@ case $basic_machine in
 	# Some are omitted here because they have special meanings below.
 	1750a | 580 \
 	| a29k \
+	| aarch64 | aarch64_be \
 	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
 	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
+        | be32 | be64 \
 	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
+	| epiphany \
 	| fido | fr30 | frv \
 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| hexagon \
 	| i370 | i860 | i960 | ia64 \
 	| ip2k | iq2000 \
+	| le32 | le64 \
 	| lm32 \
 	| m32c | m32r | m32rle | m68000 | m68k | m88k \
 	| maxq | mb | microblaze | mcore | mep | metag \
@@ -282,29 +296,39 @@ case $basic_machine in
 	| moxie \
 	| mt \
 	| msp430 \
+	| nds32 | nds32le | nds32be \
 	| nios | nios2 \
 	| ns16k | ns32k \
+	| open8 \
 	| or32 \
 	| pdp10 | pdp11 | pj | pjl \
-	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
+	| powerpc | powerpc64 | powerpc64le | powerpcle \
 	| pyramid \
-	| rx \
+	| rl78 | rx \
 	| score \
 	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
 	| sh64 | sh64le \
 	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
 	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
-	| spu | strongarm \
-	| tahoe | thumb | tic4x | tic80 | tron \
+	| spu \
+	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
 	| ubicom32 \
-	| v850 | v850e \
+	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
 	| we32k \
-	| x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
+	| x86 | xc16x | xstormy16 | xtensa \
 	| z8k | z80)
 		basic_machine=$basic_machine-unknown
 		;;
-	m6811 | m68hc11 | m6812 | m68hc12 | picochip)
-		# Motorola 68HC11/12.
+	c54x)
+		basic_machine=tic54x-unknown
+		;;
+	c55x)
+		basic_machine=tic55x-unknown
+		;;
+	c6x)
+		basic_machine=tic6x-unknown
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
 		basic_machine=$basic_machine-unknown
 		os=-none
 		;;
@@ -314,6 +338,21 @@ case $basic_machine in
 		basic_machine=mt-unknown
 		;;
 
+	strongarm | thumb | xscale)
+		basic_machine=arm-unknown
+		;;
+	xgate)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	xscaleeb)
+		basic_machine=armeb-unknown
+		;;
+
+	xscaleel)
+		basic_machine=armel-unknown
+		;;
+
 	# We use `pc' rather than `unknown'
 	# because (1) that's what they normally are, and
 	# (2) the word "unknown" tends to confuse beginning users.
@@ -328,21 +367,25 @@ case $basic_machine in
 	# Recognize the basic CPU types with company name.
 	580-* \
 	| a29k-* \
+	| aarch64-* | aarch64_be-* \
 	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
 	| avr-* | avr32-* \
+	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
-	| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* \
 	| clipper-* | craynv-* | cydra-* \
 	| d10v-* | d30v-* | dlx-* \
 	| elxsi-* \
 	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
 	| h8300-* | h8500-* \
 	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| hexagon-* \
 	| i*86-* | i860-* | i960-* | ia64-* \
 	| ip2k-* | iq2000-* \
+	| le32-* | le64-* \
 	| lm32-* \
 	| m32c-* | m32r-* | m32rle-* \
 	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
@@ -368,26 +411,29 @@ case $basic_machine in
 	| mmix-* \
 	| mt-* \
 	| msp430-* \
+	| nds32-* | nds32le-* | nds32be-* \
 	| nios-* | nios2-* \
 	| none-* | np1-* | ns16k-* | ns32k-* \
+	| open8-* \
 	| orion-* \
 	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
-	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
 	| pyramid-* \
-	| romp-* | rs6000-* | rx-* \
+	| rl78-* | romp-* | rs6000-* | rx-* \
 	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
 	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
 	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
 	| sparclite-* \
-	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
-	| tahoe-* | thumb-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
+	| tahoe-* \
 	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
-	| tile-* | tilegx-* \
+	| tile*-* \
 	| tron-* \
 	| ubicom32-* \
-	| v850-* | v850e-* | vax-* \
+	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+	| vax-* \
 	| we32k-* \
-	| x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
+	| x86-* | x86_64-* | xc16x-* | xps100-* \
 	| xstormy16-* | xtensa*-* \
 	| ymp-* \
 	| z8k-* | z80-*)
@@ -412,7 +458,7 @@ case $basic_machine in
 		basic_machine=a29k-amd
 		os=-udi
 		;;
-    	abacus)
+	abacus)
 		basic_machine=abacus-unknown
 		;;
 	adobe68k)
@@ -482,11 +528,20 @@ case $basic_machine in
 		basic_machine=powerpc-ibm
 		os=-cnk
 		;;
+	c54x-*)
+		basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c55x-*)
+		basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c6x-*)
+		basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	c90)
 		basic_machine=c90-cray
 		os=-unicos
 		;;
-        cegcc)
+	cegcc)
 		basic_machine=arm-unknown
 		os=-cegcc
 		;;
@@ -518,7 +573,7 @@ case $basic_machine in
 		basic_machine=craynv-cray
 		os=-unicosmp
 		;;
-	cr16)
+	cr16 | cr16-*)
 		basic_machine=cr16-unknown
 		os=-elf
 		;;
@@ -676,7 +731,6 @@ case $basic_machine in
 	i370-ibm* | ibm*)
 		basic_machine=i370-ibm
 		;;
-# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
 	i*86v32)
 		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
 		os=-sysv32
@@ -734,7 +788,7 @@ case $basic_machine in
 		basic_machine=ns32k-utek
 		os=-sysv
 		;;
-        microblaze)
+	microblaze)
 		basic_machine=microblaze-xilinx
 		;;
 	mingw32)
@@ -773,10 +827,18 @@ case $basic_machine in
 	ms1-*)
 		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
 		;;
+	msys)
+		basic_machine=i386-pc
+		os=-msys
+		;;
 	mvs)
 		basic_machine=i370-ibm
 		os=-mvs
 		;;
+	nacl)
+		basic_machine=le32-unknown
+		os=-nacl
+		;;
 	ncr3000)
 		basic_machine=i486-ncr
 		os=-sysv4
@@ -841,6 +903,12 @@ case $basic_machine in
 	np1)
 		basic_machine=np1-gould
 		;;
+	neo-tandem)
+		basic_machine=neo-tandem
+		;;
+	nse-tandem)
+		basic_machine=nse-tandem
+		;;
 	nsr-tandem)
 		basic_machine=nsr-tandem
 		;;
@@ -923,9 +991,10 @@ case $basic_machine in
 		;;
 	power)	basic_machine=power-ibm
 		;;
-	ppc)	basic_machine=powerpc-unknown
+	ppc | ppcbe)	basic_machine=powerpc-unknown
 		;;
-	ppc-*)	basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+	ppc-* | ppcbe-*)
+		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
 		;;
 	ppcle | powerpclittle | ppc-le | powerpc-little)
 		basic_machine=powerpcle-unknown
@@ -1019,6 +1088,9 @@ case $basic_machine in
 		basic_machine=i860-stratus
 		os=-sysv4
 		;;
+	strongarm-* | thumb-*)
+		basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	sun2)
 		basic_machine=m68000-sun
 		;;
@@ -1075,25 +1147,8 @@ case $basic_machine in
 		basic_machine=t90-cray
 		os=-unicos
 		;;
-	tic54x | c54x*)
-		basic_machine=tic54x-unknown
-		os=-coff
-		;;
-	tic55x | c55x*)
-		basic_machine=tic55x-unknown
-		os=-coff
-		;;
-	tic6x | c6x*)
-		basic_machine=tic6x-unknown
-		os=-coff
-		;;
-        # This must be matched before tile*.
-        tilegx*)
-		basic_machine=tilegx-unknown
-		os=-linux-gnu
-		;;
 	tile*)
-		basic_machine=tile-unknown
+		basic_machine=$basic_machine-unknown
 		os=-linux-gnu
 		;;
 	tx39)
@@ -1163,6 +1218,9 @@ case $basic_machine in
 	xps | xps100)
 		basic_machine=xps100-honeywell
 		;;
+	xscale-* | xscalee[bl]-*)
+		basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
+		;;
 	ymp)
 		basic_machine=ymp-cray
 		os=-unicos
@@ -1260,11 +1318,11 @@ esac
 if [ x"$os" != x"" ]
 then
 case $os in
-        # First match some system type aliases
-        # that might get confused with valid system types.
+	# First match some system type aliases
+	# that might get confused with valid system types.
 	# -solaris* is a basic system type, with this one exception.
-        -auroraux)
-	        os=-auroraux
+	-auroraux)
+		os=-auroraux
 		;;
 	-solaris1 | -solaris1.*)
 		os=`echo $os | sed -e 's|solaris1|sunos4|'`
@@ -1300,8 +1358,9 @@ case $os in
 	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
 	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
 	      | -chorusos* | -chorusrdb* | -cegcc* \
-	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
+	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -mingw32* | -linux-gnu* | -linux-android* \
+	      | -linux-newlib* | -linux-uclibc* \
 	      | -uxpv* | -beos* | -mpeix* | -udk* \
 	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
 	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
@@ -1348,7 +1407,7 @@ case $os in
 	-opened*)
 		os=-openedition
 		;;
-        -os400*)
+	-os400*)
 		os=-os400
 		;;
 	-wince*)
@@ -1397,7 +1456,7 @@ case $os in
 	-sinix*)
 		os=-sysv4
 		;;
-        -tpf*)
+	-tpf*)
 		os=-tpf
 		;;
 	-triton*)
@@ -1442,8 +1501,8 @@ case $os in
 	-dicos*)
 		os=-dicos
 		;;
-        -nacl*)
-	        ;;
+	-nacl*)
+		;;
 	-none)
 		;;
 	*)
@@ -1466,10 +1525,10 @@ else
 # system, and we'll never get to this point.
 
 case $basic_machine in
-        score-*)
+	score-*)
 		os=-elf
 		;;
-        spu-*)
+	spu-*)
 		os=-elf
 		;;
 	*-acorn)
@@ -1481,8 +1540,20 @@ case $basic_machine in
 	arm*-semi)
 		os=-aout
 		;;
-        c4x-* | tic4x-*)
-        	os=-coff
+	c4x-* | tic4x-*)
+		os=-coff
+		;;
+	hexagon-*)
+		os=-elf
+		;;
+	tic54x-*)
+		os=-coff
+		;;
+	tic55x-*)
+		os=-coff
+		;;
+	tic6x-*)
+		os=-coff
 		;;
 	# This must come before the *-dec entry.
 	pdp10-*)
@@ -1502,14 +1573,11 @@ case $basic_machine in
 		;;
 	m68000-sun)
 		os=-sunos3
-		# This also exists in the configure program, but was not the
-		# default.
-		# os=-sunos4
 		;;
 	m68*-cisco)
 		os=-aout
 		;;
-        mep-*)
+	mep-*)
 		os=-elf
 		;;
 	mips*-cisco)
@@ -1536,7 +1604,7 @@ case $basic_machine in
 	*-ibm)
 		os=-aix
 		;;
-    	*-knuth)
+	*-knuth)
 		os=-mmixware
 		;;
 	*-wec)
diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index 15aa404..293d5ac 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -754,6 +754,14 @@ Defaults to booting secondary processors.
 
 Default: `on`
 
+### pci
+> `= {no-}serr | {no-}perr`
+
+Disable signaling of SERR (system errors) and/or PERR (parity errors)
+on all PCI devices.
+
+Default: Signaling left as set by firmware.
+
 ### pci-phantom
 > `=[<seg>:]<bus>:<device>,<stride>`
 
diff --git a/m4/ax_compare_version.m4 b/m4/ax_compare_version.m4
new file mode 100644
index 0000000..26f4dec
--- /dev/null
+++ b/m4/ax_compare_version.m4
@@ -0,0 +1,179 @@
+# Fetched from http://git.savannah.gnu.org/gitweb/?p=autoconf-archive.git;a=blob_plain;f=m4/ax_compare_version.m4
+# Commit ID: 27948f49ca30e4222bb7cdd55182bd7341ac50c5
+# ===========================================================================
+#    http://www.gnu.org/software/autoconf-archive/ax_compare_version.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_COMPARE_VERSION(VERSION_A, OP, VERSION_B, [ACTION-IF-TRUE], [ACTION-IF-FALSE])
+#
+# DESCRIPTION
+#
+#   This macro compares two version strings. Due to the various number of
+#   minor-version numbers that can exist, and the fact that string
+#   comparisons are not compatible with numeric comparisons, this is not
+#   necessarily trivial to do in a autoconf script. This macro makes doing
+#   these comparisons easy.
+#
+#   The six basic comparisons are available, as well as checking equality
+#   limited to a certain number of minor-version levels.
+#
+#   The operator OP determines what type of comparison to do, and can be one
+#   of:
+#
+#    eq  - equal (test A == B)
+#    ne  - not equal (test A != B)
+#    le  - less than or equal (test A <= B)
+#    ge  - greater than or equal (test A >= B)
+#    lt  - less than (test A < B)
+#    gt  - greater than (test A > B)
+#
+#   Additionally, the eq and ne operator can have a number after it to limit
+#   the test to that number of minor versions.
+#
+#    eq0 - equal up to the length of the shorter version
+#    ne0 - not equal up to the length of the shorter version
+#    eqN - equal up to N sub-version levels
+#    neN - not equal up to N sub-version levels
+#
+#   When the condition is true, shell commands ACTION-IF-TRUE are run,
+#   otherwise shell commands ACTION-IF-FALSE are run. The environment
+#   variable 'ax_compare_version' is always set to either 'true' or 'false'
+#   as well.
+#
+#   Examples:
+#
+#     AX_COMPARE_VERSION([3.15.7],[lt],[3.15.8])
+#     AX_COMPARE_VERSION([3.15],[lt],[3.15.8])
+#
+#   would both be true.
+#
+#     AX_COMPARE_VERSION([3.15.7],[eq],[3.15.8])
+#     AX_COMPARE_VERSION([3.15],[gt],[3.15.8])
+#
+#   would both be false.
+#
+#     AX_COMPARE_VERSION([3.15.7],[eq2],[3.15.8])
+#
+#   would be true because it is only comparing two minor versions.
+#
+#     AX_COMPARE_VERSION([3.15.7],[eq0],[3.15])
+#
+#   would be true because it is only comparing the lesser number of minor
+#   versions of the two values.
+#
+#   Note: The characters that separate the version numbers do not matter. An
+#   empty string is the same as version 0. OP is evaluated by autoconf, not
+#   configure, so must be a string, not a variable.
+#
+#   The author would like to acknowledge Guido Draheim whose advice about
+#   the m4_case and m4_ifvaln functions make this macro only include the
+#   portions necessary to perform the specific comparison specified by the
+#   OP argument in the final configure script.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Tim Toolan <toolan at ele.uri.edu>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 11
+
+dnl #########################################################################
+AC_DEFUN([AX_COMPARE_VERSION], [
+  AC_REQUIRE([AC_PROG_AWK])
+
+  # Used to indicate true or false condition
+  ax_compare_version=false
+
+  # Convert the two version strings to be compared into a format that
+  # allows a simple string comparison.  The end result is that a version
+  # string of the form 1.12.5-r617 will be converted to the form
+  # 0001001200050617.  In other words, each number is zero padded to four
+  # digits, and non digits are removed.
+  AS_VAR_PUSHDEF([A],[ax_compare_version_A])
+  A=`echo "$1" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \
+                     -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/[[^0-9]]//g'`
+
+  AS_VAR_PUSHDEF([B],[ax_compare_version_B])
+  B=`echo "$3" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \
+                     -e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \
+                     -e 's/[[^0-9]]//g'`
+
+  dnl # In the case of le, ge, lt, and gt, the strings are sorted as necessary
+  dnl # then the first line is used to determine if the condition is true.
+  dnl # The sed right after the echo is to remove any indented white space.
+  m4_case(m4_tolower($2),
+  [lt],[
+    ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/false/;s/x${B}/true/;1q"`
+  ],
+  [gt],[
+    ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort | sed "s/x${A}/false/;s/x${B}/true/;1q"`
+  ],
+  [le],[
+    ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort | sed "s/x${A}/true/;s/x${B}/false/;1q"`
+  ],
+  [ge],[
+    ax_compare_version=`echo "x$A
+x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/true/;s/x${B}/false/;1q"`
+  ],[
+    dnl Split the operator from the subversion count if present.
+    m4_bmatch(m4_substr($2,2),
+    [0],[
+      # A count of zero means use the length of the shorter version.
+      # Determine the number of characters in A and B.
+      ax_compare_version_len_A=`echo "$A" | $AWK '{print(length)}'`
+      ax_compare_version_len_B=`echo "$B" | $AWK '{print(length)}'`
+
+      # Set A to no more than B's length and B to no more than A's length.
+      A=`echo "$A" | sed "s/\(.\{$ax_compare_version_len_B\}\).*/\1/"`
+      B=`echo "$B" | sed "s/\(.\{$ax_compare_version_len_A\}\).*/\1/"`
+    ],
+    [[0-9]+],[
+      # A count greater than zero means use only that many subversions
+      A=`echo "$A" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"`
+      B=`echo "$B" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"`
+    ],
+    [.+],[
+      AC_WARNING(
+        [illegal OP numeric parameter: $2])
+    ],[])
+
+    # Pad zeros at end of numbers to make same length.
+    ax_compare_version_tmp_A="$A`echo $B | sed 's/./0/g'`"
+    B="$B`echo $A | sed 's/./0/g'`"
+    A="$ax_compare_version_tmp_A"
+
+    # Check for equality or inequality as necessary.
+    m4_case(m4_tolower(m4_substr($2,0,2)),
+    [eq],[
+      test "x$A" = "x$B" && ax_compare_version=true
+    ],
+    [ne],[
+      test "x$A" != "x$B" && ax_compare_version=true
+    ],[
+      AC_WARNING([illegal OP parameter: $2])
+    ])
+  ])
+
+  AS_VAR_POPDEF([A])dnl
+  AS_VAR_POPDEF([B])dnl
+
+  dnl # Execute ACTION-IF-TRUE / ACTION-IF-FALSE.
+  if test "$ax_compare_version" = "true" ; then
+    m4_ifvaln([$4],[$4],[:])dnl
+    m4_ifvaln([$5],[else $5])dnl
+  fi
+]) dnl AX_COMPARE_VERSION
diff --git a/stubdom/grub/mini-os.c b/stubdom/grub/mini-os.c
index 8cecf90..64ab962 100644
--- a/stubdom/grub/mini-os.c
+++ b/stubdom/grub/mini-os.c
@@ -126,6 +126,9 @@ biosdisk (int read, int drive, struct geometry *geometry,
     if (i >= blk_nb)
         return -1;
 
+    if (sector + nsec > geometry->total_sectors)
+      return -1;
+
     aiocb.aio_dev = blk_dev[i];
     aiocb.aio_buf = addr;
     aiocb.aio_nbytes = (size_t)nsec * blk_info[i].sector_size;
diff --git a/tools/blktap2/drivers/block-qcow.c b/tools/blktap2/drivers/block-qcow.c
index d5053d4..b45bcaa 100644
--- a/tools/blktap2/drivers/block-qcow.c
+++ b/tools/blktap2/drivers/block-qcow.c
@@ -427,6 +427,7 @@ static uint64_t get_cluster_offset(struct tdqcow_state *s,
 
 		if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) {
 			DPRINTF("ERROR allocating memory for L1 table\n");
+                        return 0;
 		}
 		memcpy(tmp_ptr, l1_ptr, 4096);
 
@@ -600,6 +601,7 @@ found:
 		
 		if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) {
 			DPRINTF("ERROR allocating memory for L1 table\n");
+                        return 0;
 		}
 		memcpy(tmp_ptr2, l2_ptr, 4096);
 		lseek(s->fd, l2_offset + (l2_sector << 12), SEEK_SET);
diff --git a/tools/configure b/tools/configure
index 3f2eecf..b06fcf9 100755
--- a/tools/configure
+++ b/tools/configure
@@ -648,6 +648,7 @@ CPP
 pyconfig
 PYTHONPATH
 CHECKPOLICY
+AWK
 OCAMLFIND
 OCAMLBUILD
 OCAMLDOC
@@ -3450,6 +3451,92 @@ esac
 
 
 
+# Fetched from http://git.savannah.gnu.org/gitweb/?p=autoconf-archive.git;a=blob_plain;f=m4/ax_compare_version.m4
+# Commit ID: 27948f49ca30e4222bb7cdd55182bd7341ac50c5
+# ===========================================================================
+#    http://www.gnu.org/software/autoconf-archive/ax_compare_version.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_COMPARE_VERSION(VERSION_A, OP, VERSION_B, [ACTION-IF-TRUE], [ACTION-IF-FALSE])
+#
+# DESCRIPTION
+#
+#   This macro compares two version strings. Due to the various number of
+#   minor-version numbers that can exist, and the fact that string
+#   comparisons are not compatible with numeric comparisons, this is not
+#   necessarily trivial to do in a autoconf script. This macro makes doing
+#   these comparisons easy.
+#
+#   The six basic comparisons are available, as well as checking equality
+#   limited to a certain number of minor-version levels.
+#
+#   The operator OP determines what type of comparison to do, and can be one
+#   of:
+#
+#    eq  - equal (test A == B)
+#    ne  - not equal (test A != B)
+#    le  - less than or equal (test A <= B)
+#    ge  - greater than or equal (test A >= B)
+#    lt  - less than (test A < B)
+#    gt  - greater than (test A > B)
+#
+#   Additionally, the eq and ne operator can have a number after it to limit
+#   the test to that number of minor versions.
+#
+#    eq0 - equal up to the length of the shorter version
+#    ne0 - not equal up to the length of the shorter version
+#    eqN - equal up to N sub-version levels
+#    neN - not equal up to N sub-version levels
+#
+#   When the condition is true, shell commands ACTION-IF-TRUE are run,
+#   otherwise shell commands ACTION-IF-FALSE are run. The environment
+#   variable 'ax_compare_version' is always set to either 'true' or 'false'
+#   as well.
+#
+#   Examples:
+#
+#     AX_COMPARE_VERSION([3.15.7],[lt],[3.15.8])
+#     AX_COMPARE_VERSION([3.15],[lt],[3.15.8])
+#
+#   would both be true.
+#
+#     AX_COMPARE_VERSION([3.15.7],[eq],[3.15.8])
+#     AX_COMPARE_VERSION([3.15],[gt],[3.15.8])
+#
+#   would both be false.
+#
+#     AX_COMPARE_VERSION([3.15.7],[eq2],[3.15.8])
+#
+#   would be true because it is only comparing two minor versions.
+#
+#     AX_COMPARE_VERSION([3.15.7],[eq0],[3.15])
+#
+#   would be true because it is only comparing the lesser number of minor
+#   versions of the two values.
+#
+#   Note: The characters that separate the version numbers do not matter. An
+#   empty string is the same as version 0. OP is evaluated by autoconf, not
+#   configure, so must be a string, not a variable.
+#
+#   The author would like to acknowledge Guido Draheim whose advice about
+#   the m4_case and m4_ifvaln functions make this macro only include the
+#   portions necessary to perform the specific comparison specified by the
+#   OP argument in the final configure script.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Tim Toolan <toolan at ele.uri.edu>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 11
+
+
 
 # Enable/disable options
 
@@ -4709,6 +4796,48 @@ then
 fi
 
 fi
+for ac_prog in gawk mawk nawk awk
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AWK+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$AWK"; then
+  ac_cv_prog_AWK="$AWK" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AWK="$ac_prog"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AWK=$ac_cv_prog_AWK
+if test -n "$AWK"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5
+$as_echo "$AWK" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$AWK" && break
+done
+
 if test "x$ocamltools" = "xy"; then :
 
       # checking for ocamlc
@@ -5813,6 +5942,50 @@ fi
 fi
         ocamltools="n"
 
+else
+
+
+
+
+  # Used to indicate true or false condition
+  ax_compare_version=false
+
+  # Convert the two version strings to be compared into a format that
+  # allows a simple string comparison.  The end result is that a version
+  # string of the form 1.12.5-r617 will be converted to the form
+  # 0001001200050617.  In other words, each number is zero padded to four
+  # digits, and non digits are removed.
+
+  ax_compare_version_A=`echo "$OCAMLVERSION" | sed -e 's/\([0-9]*\)/Z\1Z/g' \
+                     -e 's/Z\([0-9]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([0-9][0-9]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([0-9][0-9][0-9]\)Z/Z0\1Z/g' \
+                     -e 's/[^0-9]//g'`
+
+
+  ax_compare_version_B=`echo "3.09.3" | sed -e 's/\([0-9]*\)/Z\1Z/g' \
+                     -e 's/Z\([0-9]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([0-9][0-9]\)Z/Z0\1Z/g' \
+                     -e 's/Z\([0-9][0-9][0-9]\)Z/Z0\1Z/g' \
+                     -e 's/[^0-9]//g'`
+
+
+    ax_compare_version=`echo "x$ax_compare_version_A
+x$ax_compare_version_B" | sed 's/^ *//' | sort -r | sed "s/x${ax_compare_version_A}/false/;s/x${ax_compare_version_B}/true/;1q"`
+
+
+
+    if test "$ax_compare_version" = "true" ; then
+
+            if test "x$enable_ocamltools" = "xyes"; then :
+
+                as_fn_error $? "Your version of OCaml: $OCAMLVERSION is not supported" "$LINENO" 5
+fi
+            ocamltools="n"
+
+      fi
+
+
 fi
 
 fi
@@ -8606,6 +8779,7 @@ gives unlimited permission to copy, distribute and modify it."
 ac_pwd='$ac_pwd'
 srcdir='$srcdir'
 INSTALL='$INSTALL'
+AWK='$AWK'
 test -n "\$AWK" || AWK=awk
 _ACEOF
 
diff --git a/tools/configure.ac b/tools/configure.ac
index 5083c7b..884d63d 100644
--- a/tools/configure.ac
+++ b/tools/configure.ac
@@ -46,6 +46,7 @@ m4_include([../m4/pthread.m4])
 m4_include([../m4/ptyfuncs.m4])
 m4_include([../m4/extfs.m4])
 m4_include([../m4/fetcher.m4])
+m4_include([../m4/ax_compare_version.m4])
 
 # Enable/disable options
 AX_ARG_DEFAULT_DISABLE([githttp], [Download GIT repositories via HTTP])
@@ -161,6 +162,12 @@ AS_IF([test "x$ocamltools" = "xy"], [
         AS_IF([test "x$enable_ocamltools" = "xyes"], [
             AC_MSG_ERROR([Ocaml tools enabled, but unable to find Ocaml])])
         ocamltools="n"
+    ], [
+        AX_COMPARE_VERSION([$OCAMLVERSION], [lt], [3.09.3], [
+            AS_IF([test "x$enable_ocamltools" = "xyes"], [
+                AC_MSG_ERROR([Your version of OCaml: $OCAMLVERSION is not supported])])
+            ocamltools="n"
+        ])
     ])
 ])
 AS_IF([test "x$xsmpolicy" = "xy"], [
diff --git a/tools/include/Makefile b/tools/include/Makefile
index 9ed41f1..f7a6256 100644
--- a/tools/include/Makefile
+++ b/tools/include/Makefile
@@ -13,7 +13,7 @@ xen/.dir:
 	mkdir -p xen/libelf
 	ln -sf $(XEN_ROOT)/xen/include/public/COPYING xen
 	ln -sf $(wildcard $(XEN_ROOT)/xen/include/public/*.h) xen
-	ln -sf $(addprefix $(XEN_ROOT)/xen/include/public/,arch-x86 hvm io xsm) xen
+	ln -sf $(addprefix $(XEN_ROOT)/xen/include/public/,arch-x86 arch-arm hvm io xsm) xen
 	ln -sf ../xen-sys/$(XEN_OS) xen/sys
 	ln -sf $(addprefix $(XEN_ROOT)/xen/include/xen/,libelf.h elfstructs.h) xen/libelf/
 	ln -s ../xen-foreign xen/foreign
@@ -23,6 +23,8 @@ xen/.dir:
 install: all
 	$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/arch-x86
 	$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/arch-x86/hvm
+	$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/arch-arm
+	$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/arch-arm/hvm
 	$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/foreign
 	$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/hvm
 	$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)/xen/io
@@ -32,6 +34,11 @@ install: all
 	$(INSTALL_DATA) xen/*.h $(DESTDIR)$(INCLUDEDIR)/xen
 	$(INSTALL_DATA) xen/arch-x86/*.h $(DESTDIR)$(INCLUDEDIR)/xen/arch-x86
 	$(INSTALL_DATA) xen/arch-x86/hvm/*.h $(DESTDIR)$(INCLUDEDIR)/xen/arch-x86/hvm
+
+# 	xen/arch-arm doesn't contains headers for now. Uncommented the line
+# 	as soon as a new header is added
+#	$(INSTALL_DATA) xen/arch-arm/*.h $(DESTDIR)$(INCLUDEDIR)/xen/arch-arm
+	$(INSTALL_DATA) xen/arch-arm/hvm/*.h $(DESTDIR)$(INCLUDEDIR)/xen/arch-arm/hvm
 	$(INSTALL_DATA) xen/foreign/*.h $(DESTDIR)$(INCLUDEDIR)/xen/foreign
 	$(INSTALL_DATA) xen/hvm/*.h $(DESTDIR)$(INCLUDEDIR)/xen/hvm
 	$(INSTALL_DATA) xen/io/*.h $(DESTDIR)$(INCLUDEDIR)/xen/io
diff --git a/tools/libxc/xc_dom.h b/tools/libxc/xc_dom.h
index 7099cee..c9af0ce 100644
--- a/tools/libxc/xc_dom.h
+++ b/tools/libxc/xc_dom.h
@@ -68,6 +68,14 @@ struct xc_dom_image {
 
     /* memory layout */
     struct xc_dom_seg kernel_seg;
+    /* If ramdisk_seg.vstart is non zero then the ramdisk will be
+     * loaded at that address, otherwise it will automatically placed.
+     *
+     * If automatic placement is used and the ramdisk is gzip
+     * compressed then it will be decompressed as it is loaded. If the
+     * ramdisk has been explicitly placed then it is loaded as is
+     * otherwise decompressing risks undoing the manual placement.
+     */
     struct xc_dom_seg ramdisk_seg;
     struct xc_dom_seg p2m_seg;
     struct xc_dom_seg pgtables_seg;
diff --git a/tools/libxc/xc_dom_arm.c b/tools/libxc/xc_dom_arm.c
index a40e04d..d5831a2 100644
--- a/tools/libxc/xc_dom_arm.c
+++ b/tools/libxc/xc_dom_arm.c
@@ -249,6 +249,37 @@ int arch_setup_meminit(struct xc_dom_image *dom)
 {
     int rc;
     xen_pfn_t pfn, allocsz, i;
+    uint64_t modbase;
+
+    /* Convenient */
+    const uint64_t rambase = dom->rambase_pfn << XC_PAGE_SHIFT;
+    const uint64_t ramsize = dom->total_pages << XC_PAGE_SHIFT;
+    const uint64_t ramend = rambase + ramsize;
+    const uint64_t kernbase = dom->kernel_seg.vstart;
+    const uint64_t kernend = ROUNDUP(dom->kernel_seg.vend, 21/*2MB*/);
+    const uint64_t kernsize = kernend - kernbase;
+    const uint64_t dtb_size = dom->devicetree_blob ?
+        ROUNDUP(dom->devicetree_size, XC_PAGE_SHIFT) : 0;
+    const uint64_t ramdisk_size = dom->ramdisk_blob ?
+        ROUNDUP(dom->ramdisk_size, XC_PAGE_SHIFT) : 0;
+    const uint64_t modsize = dtb_size + ramdisk_size;
+    const uint64_t ram128mb = rambase + (128<<20);
+
+    if ( modsize + kernsize > ramsize )
+    {
+        DOMPRINTF("%s: Not enough memory for the kernel+dtb+initrd",
+                  __FUNCTION__);
+        return -1;
+    }
+
+    if ( ramsize > GUEST_RAM_SIZE - NR_MAGIC_PAGES*XC_PAGE_SIZE )
+    {
+        DOMPRINTF("%s: ram size is too large for guest address space: "
+                  "%"PRIx64" > %llx",
+                  __FUNCTION__, ramsize,
+                  GUEST_RAM_SIZE - NR_MAGIC_PAGES*XC_PAGE_SIZE);
+        return -1;
+    }
 
     rc = set_mode(dom->xch, dom->guest_domid, dom->guest_type);
     if ( rc )
@@ -278,23 +309,52 @@ int arch_setup_meminit(struct xc_dom_image *dom)
             0, 0, &dom->p2m_host[i]);
     }
 
-    if ( dom->devicetree_blob )
+    /*
+     * We try to place dtb+initrd at 128MB or if we have less RAM
+     * as high as possible. If there is no space then fallback to
+     * just before the kernel.
+     *
+     * If changing this then consider
+     * xen/arch/arm/kernel.c:place_modules as well.
+     */
+    if ( ramend >= ram128mb + modsize && kernend < ram128mb )
+        modbase = ram128mb;
+    else if ( ramend - modsize > kernend )
+        modbase = ramend - modsize;
+    else if (kernbase - rambase > modsize )
+        modbase = kernbase - modsize;
+    else
+        return -1;
+
+    DOMPRINTF("%s: placing boot modules at 0x%" PRIx64, __FUNCTION__, modbase);
+
+    /*
+     * Must map DTB *after* initrd, to satisfy order of calls to
+     * xc_dom_alloc_segment in xc_dom_build_image, which must map
+     * things at monotonolically increasing addresses.
+     */
+    if ( ramdisk_size )
     {
-        const uint64_t rambase = dom->rambase_pfn << XC_PAGE_SHIFT;
-        const uint64_t ramend = rambase + ( dom->total_pages << XC_PAGE_SHIFT );
-        const uint64_t dtbsize = ROUNDUP(dom->devicetree_size, XC_PAGE_SHIFT);
-
-        /* Place at 128MB if there is sufficient RAM */
-        if ( ramend >= rambase + 128*1024*1024 + dtbsize )
-            dom->devicetree_seg.vstart = rambase + 128*1024*1024;
-        else /* otherwise at top of RAM */
-            dom->devicetree_seg.vstart = ramend - dtbsize;
-
-        dom->devicetree_seg.vend =
-            dom->devicetree_seg.vstart + dom->devicetree_size;
+        dom->ramdisk_seg.vstart = modbase;
+        dom->ramdisk_seg.vend = modbase + ramdisk_size;
+
+        DOMPRINTF("%s: ramdisk: 0x%" PRIx64 " -> 0x%" PRIx64 "",
+                  __FUNCTION__,
+                  dom->ramdisk_seg.vstart, dom->ramdisk_seg.vend);
+
+        modbase += ramdisk_size;
+    }
+
+    if ( dtb_size )
+    {
+        dom->devicetree_seg.vstart = modbase;
+        dom->devicetree_seg.vend = modbase + dtb_size;
+
         DOMPRINTF("%s: devicetree: 0x%" PRIx64 " -> 0x%" PRIx64 "",
                   __FUNCTION__,
                   dom->devicetree_seg.vstart, dom->devicetree_seg.vend);
+
+        modbase += dtb_size;
     }
 
     return 0;
diff --git a/tools/libxc/xc_dom_armzimageloader.c b/tools/libxc/xc_dom_armzimageloader.c
index e6516a1..2b28781 100644
--- a/tools/libxc/xc_dom_armzimageloader.c
+++ b/tools/libxc/xc_dom_armzimageloader.c
@@ -51,7 +51,6 @@ struct minimal_dtb_header {
 static int xc_dom_probe_zimage32_kernel(struct xc_dom_image *dom)
 {
     uint32_t *zimage;
-    uint32_t end;
 
     if ( dom->kernel_blob == NULL )
     {
@@ -73,22 +72,6 @@ static int xc_dom_probe_zimage32_kernel(struct xc_dom_image *dom)
         return -EINVAL;
     }
 
-    end = zimage[ZIMAGE32_END_OFFSET/4];
-
-    /*
-     * Check for an appended DTB.
-     */
-    if ( end + sizeof(struct minimal_dtb_header) < dom->kernel_size ) {
-        struct minimal_dtb_header *dtb_hdr;
-        dtb_hdr = (struct minimal_dtb_header *)(dom->kernel_blob + end);
-        if (ntohl/*be32_to_cpu*/(dtb_hdr->magic) == DTB_MAGIC) {
-            xc_dom_printf(dom->xch, "%s: found an appended DTB", __FUNCTION__);
-            end += ntohl/*be32_to_cpu*/(dtb_hdr->total_size);
-        }
-    }
-
-    dom->kernel_size = end;
-
     return 0;
 }
 
@@ -105,8 +88,20 @@ static int xc_dom_parse_zimage32_kernel(struct xc_dom_image *dom)
 
     /* Do not load kernel at the very first RAM address */
     v_start = rambase + 0x8000;
+
+    if ( dom->kernel_size > UINT64_MAX - v_start )
+    {
+        DOMPRINTF("%s: kernel is too large\n", __FUNCTION__);
+        return -EINVAL;
+    }
+
     v_end = v_start + dom->kernel_size;
 
+    /*
+     * If start is invalid then the guest will start at some invalid
+     * address and crash, but this happens in guest context so doesn't
+     * concern us here.
+     */
     start = zimage[ZIMAGE32_START_OFFSET/4];
 
     if (start == 0)
@@ -187,7 +182,20 @@ static int xc_dom_parse_zimage64_kernel(struct xc_dom_image *dom)
 
     zimage = dom->kernel_blob;
 
+    if ( zimage->text_offset > UINT64_MAX - rambase )
+    {
+        DOMPRINTF("%s: kernel text offset is too large\n", __FUNCTION__);
+        return -EINVAL;
+    }
+
     v_start = rambase + zimage->text_offset;
+
+    if ( dom->kernel_size > UINT64_MAX - v_start )
+    {
+        DOMPRINTF("%s: kernel is too large\n", __FUNCTION__);
+        return -EINVAL;
+    }
+
     v_end = v_start + dom->kernel_size;
 
     dom->kernel_seg.vstart = v_start;
diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
index b9d1015..baa62a1 100644
--- a/tools/libxc/xc_dom_core.c
+++ b/tools/libxc/xc_dom_core.c
@@ -955,13 +955,20 @@ int xc_dom_build_image(struct xc_dom_image *dom)
         size_t unziplen, ramdisklen;
         void *ramdiskmap;
 
-        unziplen = xc_dom_check_gzip(dom->xch, dom->ramdisk_blob, dom->ramdisk_size);
-        if ( xc_dom_ramdisk_check_size(dom, unziplen) != 0 )
+        if ( !dom->ramdisk_seg.vstart )
+        {
+            unziplen = xc_dom_check_gzip(dom->xch,
+                                         dom->ramdisk_blob, dom->ramdisk_size);
+            if ( xc_dom_ramdisk_check_size(dom, unziplen) != 0 )
+                unziplen = 0;
+        }
+        else
             unziplen = 0;
 
         ramdisklen = unziplen ? unziplen : dom->ramdisk_size;
 
-        if ( xc_dom_alloc_segment(dom, &dom->ramdisk_seg, "ramdisk", 0,
+        if ( xc_dom_alloc_segment(dom, &dom->ramdisk_seg, "ramdisk",
+                                  dom->ramdisk_seg.vstart,
                                   ramdisklen) != 0 )
             goto err;
         ramdiskmap = xc_dom_seg_to_ptr(dom, &dom->ramdisk_seg);
diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
index 33ed15b..0478f52 100644
--- a/tools/libxc/xc_private.c
+++ b/tools/libxc/xc_private.c
@@ -201,14 +201,14 @@ static int xc_interface_close_common(xc_interface *xch)
     if (!xch)
 	return 0;
 
+    rc = xch->ops->close(xch, xch->ops_handle);
+    if (rc) PERROR("Could not close hypervisor interface");
+
     xc__hypercall_buffer_cache_release(xch);
 
     xtl_logger_destroy(xch->dombuild_logger_tofree);
     xtl_logger_destroy(xch->error_handler_tofree);
 
-    rc = xch->ops->close(xch, xch->ops_handle);
-    if (rc) PERROR("Could not close hypervisor interface");
-
     free(xch);
     return rc;
 }
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 730f6e1..2d29ad2 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -3881,7 +3881,8 @@ int libxl_device_events_handler(libxl_ctx *ctx,
 
 out:
     GC_FREE;
-    return rc ? : AO_INPROGRESS;
+    if (rc) return AO_ABORT(rc);
+    return AO_INPROGRESS;
 }
 
 /******************************************************************************/
diff --git a/tools/libxl/libxl_arch.h b/tools/libxl/libxl_arch.h
index aee0a91..d3bc136 100644
--- a/tools/libxl/libxl_arch.h
+++ b/tools/libxl/libxl_arch.h
@@ -19,7 +19,12 @@
 int libxl__arch_domain_create(libxl__gc *gc, libxl_domain_config *d_config,
                uint32_t domid);
 
-int libxl__arch_domain_configure(libxl__gc *gc,
-                                 libxl_domain_build_info *info,
-                                 struct xc_dom_image *dom);
+/* setup arch specific hardware description, i.e. DTB on ARM */
+int libxl__arch_domain_init_hw_description(libxl__gc *gc,
+                                           libxl_domain_build_info *info,
+                                           struct xc_dom_image *dom);
+/* finalize arch specific hardware description. */
+int libxl__arch_domain_finalise_hw_description(libxl__gc *gc,
+                                      libxl_domain_build_info *info,
+                                      struct xc_dom_image *dom);
 #endif
diff --git a/tools/libxl/libxl_arm.c b/tools/libxl/libxl_arm.c
index 0a1c8c5..4f0f0e2 100644
--- a/tools/libxl/libxl_arm.c
+++ b/tools/libxl/libxl_arm.c
@@ -2,6 +2,7 @@
 #include "libxl_arch.h"
 
 #include <xc_dom.h>
+#include <stdbool.h>
 #include <libfdt.h>
 #include <assert.h>
 
@@ -31,6 +32,9 @@ typedef be32 gic_interrupt[3];
 #define ROOT_ADDRESS_CELLS 2
 #define ROOT_SIZE_CELLS 2
 
+#define PROP_INITRD_START "linux,initrd-start"
+#define PROP_INITRD_END "linux,initrd-end"
+
 static void set_cell(be32 **cellp, int size, uint64_t val)
 {
     int cells = size;
@@ -155,7 +159,7 @@ static int make_root_properties(libxl__gc *gc,
     return 0;
 }
 
-static int make_chosen_node(libxl__gc *gc, void *fdt,
+static int make_chosen_node(libxl__gc *gc, void *fdt, bool ramdisk,
                             const libxl_domain_build_info *info)
 {
     int res;
@@ -164,8 +168,19 @@ static int make_chosen_node(libxl__gc *gc, void *fdt,
     res = fdt_begin_node(fdt, "chosen");
     if (res) return res;
 
-    res = fdt_property_string(fdt, "bootargs", info->u.pv.cmdline);
-    if (res) return res;
+    if (info->u.pv.cmdline) {
+        res = fdt_property_string(fdt, "bootargs", info->u.pv.cmdline);
+        if (res) return res;
+    }
+
+    if (ramdisk) {
+        uint64_t dummy = 0;
+        LOG(DEBUG, "/chosen adding placeholder linux,initrd properties");
+        res = fdt_property(fdt, PROP_INITRD_START, &dummy, sizeof(dummy));
+        if (res) return res;
+        res = fdt_property(fdt, PROP_INITRD_END, &dummy, sizeof(dummy));
+        if (res) return res;
+    }
 
     res = fdt_end_node(fdt);
     if (res) return res;
@@ -410,9 +425,9 @@ out:
 
 #define FDT_MAX_SIZE (1<<20)
 
-int libxl__arch_domain_configure(libxl__gc *gc,
-                                 libxl_domain_build_info *info,
-                                 struct xc_dom_image *dom)
+int libxl__arch_domain_init_hw_description(libxl__gc *gc,
+                                           libxl_domain_build_info *info,
+                                           struct xc_dom_image *dom)
 {
     void *fdt = NULL;
     int rc, res;
@@ -473,7 +488,7 @@ next_resize:
         FDT( fdt_begin_node(fdt, "") );
 
         FDT( make_root_properties(gc, vers, fdt) );
-        FDT( make_chosen_node(gc, fdt, info) );
+        FDT( make_chosen_node(gc, fdt, !!dom->ramdisk_blob, info) );
         FDT( make_cpus_node(gc, fdt, info->max_vcpus, ainfo) );
         FDT( make_psci_node(gc, fdt) );
 
@@ -503,10 +518,49 @@ next_resize:
         goto out;
     }
 
-    debug_dump_fdt(gc, fdt);
-
     rc = 0;
 
 out:
     return rc;
 }
+
+int libxl__arch_domain_finalise_hw_description(libxl__gc *gc,
+                                               libxl_domain_build_info *info,
+                                               struct xc_dom_image *dom)
+{
+    void *fdt = dom->devicetree_blob;
+
+    const struct xc_dom_seg *ramdisk = dom->ramdisk_blob ?
+        &dom->ramdisk_seg : NULL;
+
+    if (ramdisk) {
+        int chosen, res;
+        uint64_t val;
+
+        /* Neither the fdt_path_offset() nor either of the
+         * fdt_setprop_inplace() calls can fail. If they do then
+         * make_chosen_node() (see above) has got something very
+         * wrong.
+         */
+        chosen = fdt_path_offset(fdt, "/chosen");
+        assert(chosen > 0);
+
+        LOG(DEBUG, "/chosen updating initrd properties to cover "
+            "%"PRIx64"-%"PRIx64,
+            ramdisk->vstart, ramdisk->vend);
+
+        val = cpu_to_fdt64(ramdisk->vstart);
+        res = fdt_setprop_inplace(fdt, chosen, PROP_INITRD_START,
+                                  &val, sizeof(val));
+        assert(!res);
+
+        val = cpu_to_fdt64(ramdisk->vend);
+        res = fdt_setprop_inplace(fdt, chosen,PROP_INITRD_END,
+                                  &val, sizeof(val));
+        assert(!res);
+    }
+
+    debug_dump_fdt(gc, fdt);
+
+    return 0;
+}
diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index ba7d100..29ed547 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -775,6 +775,8 @@ void libxl__initiate_device_remove(libxl__egc *egc,
     uint32_t my_domid, domid = aodev->dev->domid;
     int rc = 0;
 
+    libxl_dominfo_init(&info);
+
     rc = libxl__get_domid(gc, &my_domid);
     if (rc) {
         LOG(ERROR, "unable to get my domid");
@@ -782,7 +784,6 @@ void libxl__initiate_device_remove(libxl__egc *egc,
     }
 
     if (my_domid == LIBXL_TOOLSTACK_DOMID) {
-        libxl_dominfo_init(&info);
         rc = libxl_domain_info(CTX, &info, domid);
         if (rc) {
             LOG(ERROR, "unable to get info for domain %d", domid);
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 55f74b2..69e6088 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -407,8 +407,8 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid,
         LOGE(ERROR, "xc_dom_parse_image failed");
         goto out;
     }
-    if ( (ret = libxl__arch_domain_configure(gc, info, dom)) != 0 ) {
-        LOGE(ERROR, "libxl__arch_domain_configure failed");
+    if ( (ret = libxl__arch_domain_init_hw_description(gc, info, dom)) != 0 ) {
+        LOGE(ERROR, "libxl__arch_domain_init_hw_description failed");
         goto out;
     }
     if ( (ret = xc_dom_mem_init(dom, info->target_memkb / 1024)) != 0 ) {
@@ -419,6 +419,10 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid,
         LOGE(ERROR, "xc_dom_boot_mem_init failed");
         goto out;
     }
+    if ( (ret = libxl__arch_domain_finalise_hw_description(gc, info, dom)) != 0 ) {
+        LOGE(ERROR, "libxl__arch_domain_finalise_hw_description failed");
+        goto out;
+    }
     if ( (ret = xc_dom_build_image(dom)) != 0 ) {
         LOGE(ERROR, "xc_dom_build_image failed");
         goto out;
diff --git a/tools/libxl/libxl_fork.c b/tools/libxl/libxl_fork.c
index 8421296..fa15095 100644
--- a/tools/libxl/libxl_fork.c
+++ b/tools/libxl/libxl_fork.c
@@ -184,9 +184,9 @@ void libxl_postfork_child_noexec(libxl_ctx *ctx)
 int libxl__carefd_close(libxl__carefd *cf)
 {
     if (!cf) return 0;
+    atfork_lock();
     int r = cf->fd < 0 ? 0 : close(cf->fd);
     int esave = errno;
-    atfork_lock();
     LIBXL_LIST_REMOVE(cf, entry);
     atfork_unlock();
     free(cf);
diff --git a/tools/libxl/libxl_json.c b/tools/libxl/libxl_json.c
index d2f7de8..989ac3f 100644
--- a/tools/libxl/libxl_json.c
+++ b/tools/libxl/libxl_json.c
@@ -285,7 +285,6 @@ void libxl__json_object_free(libxl__gc *gc, libxl__json_object *obj)
     }
     case JSON_ARRAY: {
         libxl__json_object *node = NULL;
-        break;
 
         for (idx = 0; idx < obj->u.array->count; idx++) {
             if (flexarray_get(obj->u.array, idx, (void**)&node) != 0)
diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
index 20c99ac..4fac664 100644
--- a/tools/libxl/libxl_numa.c
+++ b/tools/libxl/libxl_numa.c
@@ -180,6 +180,7 @@ static int nodemap_to_nr_vcpus(libxl__gc *gc, int vcpus_on_node[],
 /* Number of vcpus able to run on the cpus of the various nodes
  * (reported by filling the array vcpus_on_node[]). */
 static int nr_vcpus_on_nodes(libxl__gc *gc, libxl_cputopology *tinfo,
+                             size_t tinfo_elements,
                              const libxl_bitmap *suitable_cpumap,
                              int vcpus_on_node[])
 {
@@ -222,6 +223,8 @@ static int nr_vcpus_on_nodes(libxl__gc *gc, libxl_cputopology *tinfo,
              */
             libxl_bitmap_set_none(&nodes_counted);
             libxl_for_each_set_bit(k, vinfo[j].cpumap) {
+                if (k >= tinfo_elements)
+                    break;
                 int node = tinfo[k].node;
 
                 if (libxl_bitmap_test(suitable_cpumap, k) &&
@@ -364,7 +367,7 @@ int libxl__get_numa_candidate(libxl__gc *gc,
      * all we have to do later is summing up the right elements of the
      * vcpus_on_node array.
      */
-    rc = nr_vcpus_on_nodes(gc, tinfo, suitable_cpumap, vcpus_on_node);
+    rc = nr_vcpus_on_nodes(gc, tinfo, nr_cpus, suitable_cpumap, vcpus_on_node);
     if (rc)
         goto out;
 
diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c
index 6e45b2f..e3bda8f 100644
--- a/tools/libxl/libxl_save_callout.c
+++ b/tools/libxl/libxl_save_callout.c
@@ -185,7 +185,11 @@ static void run_helper(libxl__egc *egc, libxl__save_helper_state *shs,
     for (childfd=0; childfd<2; childfd++) {
         /* Setting up the pipe for the child's fd childfd */
         int fds[2];
-        if (libxl_pipe(CTX,fds)) { rc = ERROR_FAIL; goto out; }
+        if (libxl_pipe(CTX,fds)) {
+            rc = ERROR_FAIL;
+            libxl__carefd_unlock();
+            goto out;
+        }
         int childs_end = childfd==0 ? 0 /*read*/  : 1 /*write*/;
         int our_end    = childfd==0 ? 1 /*write*/ : 0 /*read*/;
         childs_pipes[childfd] = libxl__carefd_record(CTX, fds[childs_end]);
diff --git a/tools/libxl/libxl_utils.c b/tools/libxl/libxl_utils.c
index c9cef66..1f334f2 100644
--- a/tools/libxl/libxl_utils.c
+++ b/tools/libxl/libxl_utils.c
@@ -762,8 +762,11 @@ int libxl_cpumap_to_nodemap(libxl_ctx *ctx,
     }
 
     libxl_bitmap_set_none(nodemap);
-    libxl_for_each_set_bit(i, *cpumap)
+    libxl_for_each_set_bit(i, *cpumap) {
+        if (i >= nr_cpus)
+            break;
         libxl_bitmap_set(nodemap, tinfo[i].node);
+    }
  out:
     libxl_cputopology_list_free(tinfo, nr_cpus);
     return rc;
diff --git a/tools/libxl/libxl_x86.c b/tools/libxl/libxl_x86.c
index b11d036..7589060 100644
--- a/tools/libxl/libxl_x86.c
+++ b/tools/libxl/libxl_x86.c
@@ -311,9 +311,16 @@ int libxl__arch_domain_create(libxl__gc *gc, libxl_domain_config *d_config,
     return ret;
 }
 
-int libxl__arch_domain_configure(libxl__gc *gc,
-                                 libxl_domain_build_info *info,
-                                 struct xc_dom_image *dom)
+int libxl__arch_domain_init_hw_description(libxl__gc *gc,
+                                           libxl_domain_build_info *info,
+                                           struct xc_dom_image *dom)
+{
+    return 0;
+}
+
+int libxl__arch_domain_finalise_hw_description(libxl__gc *gc,
+                                               libxl_domain_build_info *info,
+                                               struct xc_dom_image *dom)
 {
     return 0;
 }
diff --git a/tools/pygrub/examples/rhel-7-beta.grub2 b/tools/pygrub/examples/rhel-7-beta.grub2
new file mode 100644
index 0000000..88f0f99
--- /dev/null
+++ b/tools/pygrub/examples/rhel-7-beta.grub2
@@ -0,0 +1,118 @@
+#
+# DO NOT EDIT THIS FILE
+#
+# It is automatically generated by grub2-mkconfig using templates
+# from /etc/grub.d and settings from /etc/default/grub
+#
+
+### BEGIN /etc/grub.d/00_header ###
+set pager=1
+
+if [ -s $prefix/grubenv ]; then
+  load_env
+fi
+if [ "${next_entry}" ] ; then
+   set default="${next_entry}"
+   set next_entry=
+   save_env next_entry
+   set boot_once=true
+else
+   set default="${saved_entry}"
+fi
+
+if [ x"${feature_menuentry_id}" = xy ]; then
+  menuentry_id_option="--id"
+else
+  menuentry_id_option=""
+fi
+
+export menuentry_id_option
+
+if [ "${prev_saved_entry}" ]; then
+  set saved_entry="${prev_saved_entry}"
+  save_env saved_entry
+  set prev_saved_entry=
+  save_env prev_saved_entry
+  set boot_once=true
+fi
+
+function savedefault {
+  if [ -z "${boot_once}" ]; then
+    saved_entry="${chosen}"
+    save_env saved_entry
+  fi
+}
+
+function load_video {
+  if [ x$feature_all_video_module = xy ]; then
+    insmod all_video
+  else
+    insmod efi_gop
+    insmod efi_uga
+    insmod ieee1275_fb
+    insmod vbe
+    insmod vga
+    insmod video_bochs
+    insmod video_cirrus
+  fi
+}
+
+terminal_output console
+set timeout=5
+### END /etc/grub.d/00_header ###
+
+### BEGIN /etc/grub.d/10_linux ###
+menuentry 'Red Hat Enterprise Linux Everything, with Linux 3.10.0-54.0.1.el7.x86_64' --class red --class gnu-linux --class gnu --class os $menuentry_id_option 'gnulinux-3.10.0-54.0.1.el7.x86_64-advanced-d23b8b49-4cfe-4900-8ef1-ec80bc633163' {
+	load_video
+	set gfxpayload=keep
+	insmod gzio
+	insmod part_msdos
+	insmod xfs
+	set root='hd0,msdos1'
+	if [ x$feature_platform_search_hint = xy ]; then
+	  search --no-floppy --fs-uuid --set=root --hint='hd0,msdos1'  89ffef78-82b3-457c-bc57-42cccc373851
+	else
+	  search --no-floppy --fs-uuid --set=root 89ffef78-82b3-457c-bc57-42cccc373851
+	fi
+	linux16 /vmlinuz-3.10.0-54.0.1.el7.x86_64 root=/dev/mapper/rhel-root ro rd.lvm.lv=rhel/swap vconsole.keymap=uk crashkernel=auto rd.lvm.lv=rhel/root vconsole.font=latarcyrheb-sun16 LANG=en_GB.UTF-8
+	initrd16 /initramfs-3.10.0-54.0.1.el7.x86_64.img
+}
+menuentry 'Red Hat Enterprise Linux Everything, with Linux 0-rescue-af34f0b8cf364cdbbe6d093f8228a37f' --class red --class gnu-linux --class gnu --class os $menuentry_id_option 'gnulinux-0-rescue-af34f0b8cf364cdbbe6d093f8228a37f-advanced-d23b8b49-4cfe-4900-8ef1-ec80bc633163' {
+	load_video
+	insmod gzio
+	insmod part_msdos
+	insmod xfs
+	set root='hd0,msdos1'
+	if [ x$feature_platform_search_hint = xy ]; then
+	  search --no-floppy --fs-uuid --set=root --hint='hd0,msdos1'  89ffef78-82b3-457c-bc57-42cccc373851
+	else
+	  search --no-floppy --fs-uuid --set=root 89ffef78-82b3-457c-bc57-42cccc373851
+	fi
+	linux16 /vmlinuz-0-rescue-af34f0b8cf364cdbbe6d093f8228a37f root=/dev/mapper/rhel-root ro rd.lvm.lv=rhel/swap vconsole.keymap=uk crashkernel=auto rd.lvm.lv=rhel/root vconsole.font=latarcyrheb-sun16
+	initrd16 /initramfs-0-rescue-af34f0b8cf364cdbbe6d093f8228a37f.img
+}
+
+### END /etc/grub.d/10_linux ###
+
+### BEGIN /etc/grub.d/20_linux_xen ###
+### END /etc/grub.d/20_linux_xen ###
+
+### BEGIN /etc/grub.d/20_ppc_terminfo ###
+### END /etc/grub.d/20_ppc_terminfo ###
+
+### BEGIN /etc/grub.d/30_os-prober ###
+### END /etc/grub.d/30_os-prober ###
+
+### BEGIN /etc/grub.d/40_custom ###
+# This file provides an easy way to add custom menu entries.  Simply type the
+# menu entries you want to add after this comment.  Be careful not to change
+# the 'exec tail' line above.
+### END /etc/grub.d/40_custom ###
+
+### BEGIN /etc/grub.d/41_custom ###
+if [ -f  ${config_directory}/custom.cfg ]; then
+  source ${config_directory}/custom.cfg
+elif [ -z "${config_directory}" -a -f  $prefix/custom.cfg ]; then
+  source $prefix/custom.cfg;
+fi
+### END /etc/grub.d/41_custom ###
diff --git a/tools/pygrub/src/GrubConf.py b/tools/pygrub/src/GrubConf.py
index cb853c9..974cded 100644
--- a/tools/pygrub/src/GrubConf.py
+++ b/tools/pygrub/src/GrubConf.py
@@ -348,7 +348,9 @@ class Grub2Image(_GrubImage):
                 
     commands = {'set:root': 'root',
                 'linux': 'kernel',
+                'linux16': 'kernel',
                 'initrd': 'initrd',
+                'initrd16': 'initrd',
                 'echo': None,
                 'insmod': None,
                 'search': None}
@@ -394,7 +396,7 @@ class Grub2ConfigFile(_GrubConfigFile):
                 continue
 
             # new image
-            title_match = re.match('^menuentry ["\'](.*)["\'] (.*){', l)
+            title_match = re.match('^menuentry ["\'](.*?)["\'] (.*){', l)
             if title_match:
                 if img is not None:
                     raise RuntimeError, "syntax error: cannot nest menuentry (%d %s)" % (len(img),img)
diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
index ee4e741..45a7290 100644
--- a/tools/pygrub/src/pygrub
+++ b/tools/pygrub/src/pygrub
@@ -760,7 +760,7 @@ if __name__ == "__main__":
         usage()
         sys.exit(1)
     file = args[0]
-        
+    fs = None
     output = None
     entry = None
     interactive = True
@@ -882,7 +882,7 @@ if __name__ == "__main__":
         sys.exit(0)
 
     # Did looping through partitions find us a kernel?
-    if not fs:
+    if fs is None:
         raise RuntimeError, "Unable to find partition containing kernel"
 
     bootcfg["kernel"] = copy_from_image(fs, chosencfg["kernel"], "kernel",
diff --git a/tools/tests/x86_emulator/test_x86_emulator.c b/tools/tests/x86_emulator/test_x86_emulator.c
index 7404ee3..3166674 100644
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -597,23 +597,32 @@ int main(int argc, char **argv)
     printf("skipped\n");
 #endif
 
+#define decl_insn(which) extern const unsigned char which[], which##_len[]
+#define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \
+                              #which ": " insn "\n"                     \
+                              ".equ " #which "_len, .-" #which "\n"     \
+                              ".popsection"
+#define set_insn(which) (regs.eip = (unsigned long)memcpy(instr, which, \
+                                             (unsigned long)which##_len))
+#define check_eip(which) (regs.eip == (unsigned long)instr + \
+                                      (unsigned long)which##_len)
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
-        extern const unsigned char movq_to_mem[];
+        decl_insn(movq_to_mem);
 
         asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
-                       ".pushsection .test, \"a\", @progbits\n"
-                       "movq_to_mem: movq %%mm3, (%0)\n"
-                       ".popsection" :: "c" (NULL) );
+                       put_insn(movq_to_mem, "movq %%mm3, (%0)")
+                       :: "c" (NULL) );
 
-        memcpy(instr, movq_to_mem, 15);
+        set_insn(movq_to_mem);
         memset(res, 0x33, 64);
         memset(res + 8, 0xff, 8);
-        regs.eip    = (unsigned long)&instr[0];
         regs.ecx    = (unsigned long)res;
         rc = x86_emulate(&ctxt, &emulops);
-        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) )
+        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ||
+             !check_eip(movq_to_mem) )
             goto fail;
         printf("okay\n");
     }
@@ -623,19 +632,17 @@ int main(int argc, char **argv)
     printf("%-40s", "Testing movq (%edx),%mm5...");
     if ( stack_exec && cpu_has_mmx )
     {
-        extern const unsigned char movq_from_mem[];
+        decl_insn(movq_from_mem);
 
         asm volatile ( "pcmpgtb %%mm5, %%mm5\n"
-                       ".pushsection .test, \"a\", @progbits\n"
-                       "movq_from_mem: movq (%0), %%mm5\n"
-                       ".popsection" :: "d" (NULL) );
+                       put_insn(movq_from_mem, "movq (%0), %%mm5")
+                       :: "d" (NULL) );
 
-        memcpy(instr, movq_from_mem, 15);
-        regs.eip    = (unsigned long)&instr[0];
+        set_insn(movq_from_mem);
         regs.ecx    = 0;
         regs.edx    = (unsigned long)res;
         rc = x86_emulate(&ctxt, &emulops);
-        if ( rc != X86EMUL_OKAY )
+        if ( rc != X86EMUL_OKAY || !check_eip(movq_from_mem) )
             goto fail;
         asm ( "pcmpeqb %%mm3, %%mm3\n\t"
               "pcmpeqb %%mm5, %%mm3\n\t"
@@ -650,20 +657,19 @@ int main(int argc, char **argv)
     printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
     if ( stack_exec && cpu_has_sse2 )
     {
-        extern const unsigned char movdqu_to_mem[];
+        decl_insn(movdqu_to_mem);
 
         asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
-                       ".pushsection .test, \"a\", @progbits\n"
-                       "movdqu_to_mem: movdqu %%xmm2, (%0)\n"
-                       ".popsection" :: "c" (NULL) );
+                       put_insn(movdqu_to_mem, "movdqu %%xmm2, (%0)")
+                       :: "c" (NULL) );
 
-        memcpy(instr, movdqu_to_mem, 15);
+        set_insn(movdqu_to_mem);
         memset(res, 0x55, 64);
         memset(res + 8, 0xff, 16);
-        regs.eip    = (unsigned long)&instr[0];
         regs.ecx    = (unsigned long)res;
         rc = x86_emulate(&ctxt, &emulops);
-        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) )
+        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ||
+             !check_eip(movdqu_to_mem) )
             goto fail;
         printf("okay\n");
     }
@@ -673,19 +679,17 @@ int main(int argc, char **argv)
     printf("%-40s", "Testing movdqu (%edx),%xmm4...");
     if ( stack_exec && cpu_has_sse2 )
     {
-        extern const unsigned char movdqu_from_mem[];
+        decl_insn(movdqu_from_mem);
 
         asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
-                       ".pushsection .test, \"a\", @progbits\n"
-                       "movdqu_from_mem: movdqu (%0), %%xmm4\n"
-                       ".popsection" :: "d" (NULL) );
+                       put_insn(movdqu_from_mem, "movdqu (%0), %%xmm4")
+                       :: "d" (NULL) );
 
-        memcpy(instr, movdqu_from_mem, 15);
-        regs.eip    = (unsigned long)&instr[0];
+        set_insn(movdqu_from_mem);
         regs.ecx    = 0;
         regs.edx    = (unsigned long)res;
         rc = x86_emulate(&ctxt, &emulops);
-        if ( rc != X86EMUL_OKAY )
+        if ( rc != X86EMUL_OKAY || !check_eip(movdqu_from_mem) )
             goto fail;
         asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
               "pcmpeqb %%xmm4, %%xmm2\n\t"
@@ -700,21 +704,20 @@ int main(int argc, char **argv)
     printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)...");
     if ( stack_exec && cpu_has_avx )
     {
-        extern const unsigned char vmovdqu_to_mem[];
+        decl_insn(vmovdqu_to_mem);
 
         asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n"
-                       ".pushsection .test, \"a\", @progbits\n"
-                       "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n"
-                       ".popsection" :: "c" (NULL) );
+                       put_insn(vmovdqu_to_mem, "vmovdqu %%ymm2, (%0)")
+                       :: "c" (NULL) );
 
-        memcpy(instr, vmovdqu_to_mem, 15);
+        set_insn(vmovdqu_to_mem);
         memset(res, 0x55, 128);
         memset(res + 16, 0xff, 16);
         memset(res + 20, 0x00, 16);
-        regs.eip    = (unsigned long)&instr[0];
         regs.ecx    = (unsigned long)res;
         rc = x86_emulate(&ctxt, &emulops);
-        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) )
+        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) ||
+             !check_eip(vmovdqu_to_mem) )
             goto fail;
         printf("okay\n");
     }
@@ -724,7 +727,7 @@ int main(int argc, char **argv)
     printf("%-40s", "Testing vmovdqu (%edx),%ymm4...");
     if ( stack_exec && cpu_has_avx )
     {
-        extern const unsigned char vmovdqu_from_mem[];
+        decl_insn(vmovdqu_from_mem);
 
 #if 0 /* Don't use AVX2 instructions for now */
         asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n"
@@ -732,17 +735,15 @@ int main(int argc, char **argv)
         asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t"
                        "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n"
 #endif
-                       ".pushsection .test, \"a\", @progbits\n"
-                       "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n"
-                       ".popsection" :: "d" (NULL) );
+                       put_insn(vmovdqu_from_mem, "vmovdqu (%0), %%ymm4")
+                       :: "d" (NULL) );
 
-        memcpy(instr, vmovdqu_from_mem, 15);
+        set_insn(vmovdqu_from_mem);
         memset(res + 4, 0xff, 16);
-        regs.eip    = (unsigned long)&instr[0];
         regs.ecx    = 0;
         regs.edx    = (unsigned long)res;
         rc = x86_emulate(&ctxt, &emulops);
-        if ( rc != X86EMUL_OKAY )
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovdqu_from_mem) )
             goto fail;
 #if 0 /* Don't use AVX2 instructions for now */
         asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
@@ -769,20 +770,19 @@ int main(int argc, char **argv)
     memset(res + 10, 0x66, 8);
     if ( stack_exec && cpu_has_sse2 )
     {
-        extern const unsigned char movsd_to_mem[];
+        decl_insn(movsd_to_mem);
 
         asm volatile ( "movlpd %0, %%xmm5\n\t"
                        "movhpd %0, %%xmm5\n"
-                       ".pushsection .test, \"a\", @progbits\n"
-                       "movsd_to_mem: movsd %%xmm5, (%1)\n"
-                       ".popsection" :: "m" (res[10]), "c" (NULL) );
+                       put_insn(movsd_to_mem, "movsd %%xmm5, (%1)")
+                       :: "m" (res[10]), "c" (NULL) );
 
-        memcpy(instr, movsd_to_mem, 15);
-        regs.eip    = (unsigned long)&instr[0];
+        set_insn(movsd_to_mem);
         regs.ecx    = (unsigned long)(res + 2);
         regs.edx    = 0;
         rc = x86_emulate(&ctxt, &emulops);
-        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) )
+        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ||
+             !check_eip(movsd_to_mem) )
             goto fail;
         printf("okay\n");
     }
@@ -795,19 +795,17 @@ int main(int argc, char **argv)
     printf("%-40s", "Testing movaps (%edx),%xmm7...");
     if ( stack_exec && cpu_has_sse )
     {
-        extern const unsigned char movaps_from_mem[];
+        decl_insn(movaps_from_mem);
 
         asm volatile ( "xorps %%xmm7, %%xmm7\n"
-                       ".pushsection .test, \"a\", @progbits\n"
-                       "movaps_from_mem: movaps (%0), %%xmm7\n"
-                       ".popsection" :: "d" (NULL) );
+                       put_insn(movaps_from_mem, "movaps (%0), %%xmm7")
+                       :: "d" (NULL) );
 
-        memcpy(instr, movaps_from_mem, 15);
-        regs.eip    = (unsigned long)&instr[0];
+        set_insn(movaps_from_mem);
         regs.ecx    = 0;
         regs.edx    = (unsigned long)res;
         rc = x86_emulate(&ctxt, &emulops);
-        if ( rc != X86EMUL_OKAY )
+        if ( rc != X86EMUL_OKAY || !check_eip(movaps_from_mem) )
             goto fail;
         asm ( "cmpeqps %1, %%xmm7\n\t"
               "movmskps %%xmm7, %0" : "=r" (rc) : "m" (res[8]) );
@@ -823,19 +821,18 @@ int main(int argc, char **argv)
     memset(res + 10, 0x77, 8);
     if ( stack_exec && cpu_has_avx )
     {
-        extern const unsigned char vmovsd_to_mem[];
+        decl_insn(vmovsd_to_mem);
 
         asm volatile ( "vbroadcastsd %0, %%ymm5\n"
-                       ".pushsection .test, \"a\", @progbits\n"
-                       "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n"
-                       ".popsection" :: "m" (res[10]), "c" (NULL) );
+                       put_insn(vmovsd_to_mem, "vmovsd %%xmm5, (%1)")
+                       :: "m" (res[10]), "c" (NULL) );
 
-        memcpy(instr, vmovsd_to_mem, 15);
-        regs.eip    = (unsigned long)&instr[0];
+        set_insn(vmovsd_to_mem);
         regs.ecx    = (unsigned long)(res + 2);
         regs.edx    = 0;
         rc = x86_emulate(&ctxt, &emulops);
-        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) )
+        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ||
+             !check_eip(vmovsd_to_mem) )
             goto fail;
         printf("okay\n");
     }
@@ -848,19 +845,17 @@ int main(int argc, char **argv)
     printf("%-40s", "Testing vmovaps (%edx),%ymm7...");
     if ( stack_exec && cpu_has_avx )
     {
-        extern const unsigned char vmovaps_from_mem[];
+        decl_insn(vmovaps_from_mem);
 
         asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n"
-                       ".pushsection .test, \"a\", @progbits\n"
-                       "vmovaps_from_mem: vmovaps (%0), %%ymm7\n"
-                       ".popsection" :: "d" (NULL) );
+                       put_insn(vmovaps_from_mem, "vmovaps (%0), %%ymm7")
+                       :: "d" (NULL) );
 
-        memcpy(instr, vmovaps_from_mem, 15);
-        regs.eip    = (unsigned long)&instr[0];
+        set_insn(vmovaps_from_mem);
         regs.ecx    = 0;
         regs.edx    = (unsigned long)res;
         rc = x86_emulate(&ctxt, &emulops);
-        if ( rc != X86EMUL_OKAY )
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovaps_from_mem) )
             goto fail;
         asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t"
               "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) );
@@ -871,6 +866,11 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+#undef decl_insn
+#undef put_insn
+#undef set_insn
+#undef check_eip
+
     for ( j = 1; j <= 2; j++ )
     {
 #if defined(__i386__)
diff --git a/xen/Makefile b/xen/Makefile
index 39839a3..134a8bd 100644
--- a/xen/Makefile
+++ b/xen/Makefile
@@ -2,7 +2,7 @@
 # All other places this is stored (eg. compile.h) should be autogenerated.
 export XEN_VERSION       = 4
 export XEN_SUBVERSION    = 4
-export XEN_EXTRAVERSION ?= .0$(XEN_VENDORVERSION)
+export XEN_EXTRAVERSION ?= .1$(XEN_VENDORVERSION)
 export XEN_FULLVERSION   = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
 -include xen-version
 
diff --git a/xen/arch/arm/Rules.mk b/xen/arch/arm/Rules.mk
index aaa203e..2db39ad 100644
--- a/xen/arch/arm/Rules.mk
+++ b/xen/arch/arm/Rules.mk
@@ -27,6 +27,7 @@ endif
 
 ifeq ($(TARGET_SUBARCH),arm64)
 CFLAGS += -mcpu=generic
+CFLAGS += -mgeneral-regs-only # No fp registers etc
 arm32 := n
 arm64 := y
 endif
@@ -99,4 +100,12 @@ CFLAGS-$(EARLY_PRINTK) += -DEARLY_PRINTK_INC=\"debug-$(EARLY_PRINTK_INC).inc\"
 CFLAGS-$(EARLY_PRINTK) += -DEARLY_PRINTK_BAUD=$(EARLY_PRINTK_BAUD)
 CFLAGS-$(EARLY_PRINTK) += -DEARLY_UART_BASE_ADDRESS=$(EARLY_UART_BASE_ADDRESS)
 CFLAGS-$(EARLY_PRINTK) += -DEARLY_UART_REG_SHIFT=$(EARLY_UART_REG_SHIFT)
+
+else # !debug
+
+ifneq ($(CONFIG_EARLY_PRINTK),)
+# Early printk is dependant on a debug build.
+$(error CONFIG_EARLY_PRINTK enabled for non-debug build)
+endif
+
 endif
diff --git a/xen/arch/arm/arm32/head.S b/xen/arch/arm/arm32/head.S
index 96230ac..5bc23cc 100644
--- a/xen/arch/arm/arm32/head.S
+++ b/xen/arch/arm/arm32/head.S
@@ -372,9 +372,20 @@ paging:
         ldr   r4, =BOOT_FDT_VIRT_START
         mov   r4, r4, lsr #18        /* Slot for BOOT_FDT_VIRT_START */
         strd  r2, r3, [r1, r4]       /* Map it in the early fdt slot */
-        dsb
 1:
 
+        /*
+         * Flush the TLB in case the 1:1 mapping happens to clash with
+         * the virtual addresses used by the fixmap or DTB.
+         */
+        dsb                          /* Ensure any page table updates made above
+                                      * have occurred. */
+
+        isb
+        mcr   CP32(r0, TLBIALLH)     /* Flush hypervisor TLB */
+        dsb                          /* Ensure completion of TLB flush */
+        isb
+
         PRINT("- Ready -\r\n")
 
         /* The boot CPU should go straight into C now */
diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
index 31afdd0..be44f67 100644
--- a/xen/arch/arm/arm64/head.S
+++ b/xen/arch/arm/arm64/head.S
@@ -382,9 +382,20 @@ paging:
         ldr   x1, =BOOT_FDT_VIRT_START
         lsr   x1, x1, #18            /* x4 := Slot for BOOT_FDT_VIRT_START */
         str   x2, [x4, x1]           /* Map it in the early fdt slot */
-        dsb   sy
 1:
 
+        /*
+         * Flush the TLB in case the 1:1 mapping happens to clash with
+         * the virtual addresses used by the fixmap or DTB.
+         */
+        dsb   sy                     /* Ensure any page table updates made above
+                                      * have occurred. */
+
+        isb
+        tlbi  alle2
+        dsb   sy                     /* Ensure completion of TLB flush */
+        isb
+
         PRINT("- Ready -\r\n")
 
         /* The boot CPU should go straight into C now */
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 8f20fdf..67de12a 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -59,11 +59,12 @@ void idle_loop(void)
 
 static void ctxt_switch_from(struct vcpu *p)
 {
+    p2m_save_state(p);
+
     /* CP 15 */
     p->arch.csselr = READ_SYSREG(CSSELR_EL1);
 
     /* Control Registers */
-    p->arch.sctlr = READ_SYSREG(SCTLR_EL1);
     p->arch.cpacr = READ_SYSREG(CPACR_EL1);
 
     p->arch.contextidr = READ_SYSREG(CONTEXTIDR_EL1);
@@ -72,6 +73,7 @@ static void ctxt_switch_from(struct vcpu *p)
     p->arch.tpidr_el1 = READ_SYSREG(TPIDR_EL1);
 
     /* Arch timer */
+    p->arch.cntkctl = READ_SYSREG32(CNTKCTL_EL1);
     virt_timer_save(p);
 
     if ( is_pv32_domain(p->domain) && cpu_has_thumbee )
@@ -134,14 +136,7 @@ static void ctxt_switch_from(struct vcpu *p)
 
 static void ctxt_switch_to(struct vcpu *n)
 {
-    register_t hcr;
-
-    hcr = READ_SYSREG(HCR_EL2);
-    WRITE_SYSREG(hcr & ~HCR_VM, HCR_EL2);
-    isb();
-
-    p2m_load_VTTBR(n->domain);
-    isb();
+    p2m_restore_state(n);
 
     WRITE_SYSREG32(n->domain->arch.vpidr, VPIDR_EL2);
     WRITE_SYSREG(n->arch.vmpidr, VMPIDR_EL2);
@@ -189,7 +184,6 @@ static void ctxt_switch_to(struct vcpu *n)
     isb();
 
     /* Control Registers */
-    WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1);
     WRITE_SYSREG(n->arch.cpacr, CPACR_EL1);
 
     WRITE_SYSREG(n->arch.contextidr, CONTEXTIDR_EL1);
@@ -214,16 +208,9 @@ static void ctxt_switch_to(struct vcpu *n)
 
     isb();
 
-    if ( is_pv32_domain(n->domain) )
-        hcr &= ~HCR_RW;
-    else
-        hcr |= HCR_RW;
-
-    WRITE_SYSREG(hcr, HCR_EL2);
-    isb();
-
     /* This is could trigger an hardware interrupt from the virtual
      * timer. The interrupt needs to be injected into the guest. */
+    WRITE_SYSREG32(n->arch.cntkctl, CNTKCTL_EL1);
     virt_timer_restore(n);
 }
 
@@ -407,7 +394,7 @@ struct domain *alloc_domain_struct(void)
         return NULL;
 
     clear_page(d);
-    d->arch.grant_table_gpfn = xmalloc_array(xen_pfn_t, max_nr_grant_frames);
+    d->arch.grant_table_gpfn = xzalloc_array(xen_pfn_t, max_nr_grant_frames);
     return d;
 }
 
diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 5ca2f15..8cbd776 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -967,7 +967,7 @@ static void initrd_load(struct kernel_info *kinfo)
         s = offs & ~PAGE_MASK;
         l = min(PAGE_SIZE - s, len);
 
-        rc = gvirt_to_maddr(load_addr + offs, &ma);
+        rc = gvirt_to_maddr(load_addr + offs, &ma, GV2M_WRITE);
         if ( rc )
         {
             panic("Unable to translate guest address");
@@ -986,6 +986,7 @@ static void initrd_load(struct kernel_info *kinfo)
 int construct_dom0(struct domain *d)
 {
     struct kernel_info kinfo = {};
+    struct vcpu *saved_current;
     int rc, i, cpu;
 
     struct vcpu *v = d->vcpu[0];
@@ -1020,15 +1021,13 @@ int construct_dom0(struct domain *d)
     if ( rc < 0 )
         return rc;
 
-    /* The following loads use the domain's p2m */
-    p2m_load_VTTBR(d);
-#ifdef CONFIG_ARM_64
-    d->arch.type = kinfo.type;
-    if ( is_pv32_domain(d) )
-        WRITE_SYSREG(READ_SYSREG(HCR_EL2) & ~HCR_RW, HCR_EL2);
-    else
-        WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_RW, HCR_EL2);
-#endif
+    /*
+     * The following loads use the domain's p2m and require current to
+     * be a vcpu of the domain, temporarily switch
+     */
+    saved_current = current;
+    p2m_restore_state(v);
+    set_current(v);
 
     /*
      * kernel_load will determine the placement of the initrd & fdt in
@@ -1039,6 +1038,10 @@ int construct_dom0(struct domain *d)
     initrd_load(&kinfo);
     dtb_load(&kinfo);
 
+    /* Now that we are done restore the original p2m and current. */
+    set_current(saved_current);
+    p2m_restore_state(saved_current);
+
     discard_initial_modules();
 
     v->is_initialised = 1;
diff --git a/xen/arch/arm/guestcopy.c b/xen/arch/arm/guestcopy.c
index cea5f97..0173597 100644
--- a/xen/arch/arm/guestcopy.c
+++ b/xen/arch/arm/guestcopy.c
@@ -1,6 +1,8 @@
 #include <xen/config.h>
 #include <xen/lib.h>
 #include <xen/domain_page.h>
+#include <xen/sched.h>
+#include <asm/current.h>
 
 #include <asm/mm.h>
 #include <asm/guest_access.h>
@@ -13,20 +15,22 @@ static unsigned long raw_copy_to_guest_helper(void *to, const void *from,
 
     while ( len )
     {
-        paddr_t g;
         void *p;
         unsigned size = min(len, (unsigned)PAGE_SIZE - offset);
+        struct page_info *page;
 
-        if ( gvirt_to_maddr((vaddr_t) to, &g) )
+        page = get_page_from_gva(current->domain, (vaddr_t) to, GV2M_WRITE);
+        if ( page == NULL )
             return len;
 
-        p = map_domain_page(g>>PAGE_SHIFT);
+        p = __map_domain_page(page);
         p += offset;
         memcpy(p, from, size);
         if ( flush_dcache )
             clean_xen_dcache_va_range(p, size);
 
         unmap_domain_page(p - offset);
+        put_page(page);
         len -= size;
         from += size;
         to += size;
@@ -58,18 +62,20 @@ unsigned long raw_clear_guest(void *to, unsigned len)
 
     while ( len )
     {
-        paddr_t g;
         void *p;
         unsigned size = min(len, (unsigned)PAGE_SIZE - offset);
+        struct page_info *page;
 
-        if ( gvirt_to_maddr((vaddr_t) to, &g) )
+        page = get_page_from_gva(current->domain, (vaddr_t) to, GV2M_WRITE);
+        if ( page == NULL )
             return len;
 
-        p = map_domain_page(g>>PAGE_SHIFT);
+        p = __map_domain_page(page);
         p += offset;
         memset(p, 0x00, size);
 
         unmap_domain_page(p - offset);
+        put_page(page);
         len -= size;
         to += size;
         /*
@@ -88,19 +94,21 @@ unsigned long raw_copy_from_guest(void *to, const void __user *from, unsigned le
 
     while ( len )
     {
-        paddr_t g;
         void *p;
         unsigned size = min(len, (unsigned)(PAGE_SIZE - offset));
+        struct page_info *page;
 
-        if ( gvirt_to_maddr((vaddr_t) from & PAGE_MASK, &g) )
+        page = get_page_from_gva(current->domain, (vaddr_t) from, GV2M_READ);
+        if ( page == NULL )
             return len;
 
-        p = map_domain_page(g>>PAGE_SHIFT);
+        p = __map_domain_page(page);
         p += ((vaddr_t)from & (~PAGE_MASK));
 
         memcpy(to, p, size);
 
         unmap_domain_page(p);
+        put_page(page);
         len -= size;
         from += size;
         to += size;
diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
index 1e3107d..69c7d43 100644
--- a/xen/arch/arm/kernel.c
+++ b/xen/arch/arm/kernel.c
@@ -141,7 +141,7 @@ static void kernel_zimage_load(struct kernel_info *info)
         s = offs & ~PAGE_MASK;
         l = min(PAGE_SIZE - s, len);
 
-        rc = gvirt_to_maddr(load_addr + offs, &ma);
+        rc = gvirt_to_maddr(load_addr + offs, &ma, GV2M_WRITE);
         if ( rc )
         {
             panic("Unable to map translate guest address");
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index d00c882..bc81b67 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -44,6 +44,34 @@ void p2m_load_VTTBR(struct domain *d)
     isb(); /* Ensure update is visible */
 }
 
+void p2m_save_state(struct vcpu *p)
+{
+    p->arch.sctlr = READ_SYSREG(SCTLR_EL1);
+}
+
+void p2m_restore_state(struct vcpu *n)
+{
+    register_t hcr;
+
+    hcr = READ_SYSREG(HCR_EL2);
+    WRITE_SYSREG(hcr & ~HCR_VM, HCR_EL2);
+    isb();
+
+    p2m_load_VTTBR(n->domain);
+    isb();
+
+    if ( is_pv32_domain(n->domain) )
+        hcr &= ~HCR_RW;
+    else
+        hcr |= HCR_RW;
+
+    WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1);
+    isb();
+
+    WRITE_SYSREG(hcr, HCR_EL2);
+    isb();
+}
+
 static int p2m_first_level_index(paddr_t addr)
 {
     /*
@@ -655,6 +683,34 @@ unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
     return p >> PAGE_SHIFT;
 }
 
+struct page_info *get_page_from_gva(struct domain *d, vaddr_t va,
+                                    unsigned long flags)
+{
+    struct p2m_domain *p2m = &d->arch.p2m;
+    struct page_info *page = NULL;
+    paddr_t maddr;
+
+    ASSERT(d == current->domain);
+
+    spin_lock(&p2m->lock);
+
+    if ( gvirt_to_maddr(va, &maddr, flags) )
+        goto err;
+
+    if ( !mfn_valid(maddr >> PAGE_SHIFT) )
+        goto err;
+
+    page = mfn_to_page(maddr >> PAGE_SHIFT);
+    ASSERT(page);
+
+    if ( unlikely(!get_page(page, d)) )
+        page = NULL;
+
+err:
+    spin_unlock(&p2m->lock);
+    return page;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/psci.c b/xen/arch/arm/psci.c
index 25a8697..b6360d5 100644
--- a/xen/arch/arm/psci.c
+++ b/xen/arch/arm/psci.c
@@ -54,7 +54,8 @@ static uint32_t psci_cpu_on_nr;
 
 int call_psci_cpu_on(int cpu)
 {
-    return __invoke_psci_fn_smc(psci_cpu_on_nr, cpu, __pa(init_secondary), 0);
+    return __invoke_psci_fn_smc(psci_cpu_on_nr,
+                                cpu_logical_map(cpu), __pa(init_secondary), 0);
 }
 
 int __init psci_init(void)
@@ -96,3 +97,12 @@ int __init psci_init(void)
 
     return 0;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c
index 81e3e28..5374bf7 100644
--- a/xen/arch/arm/time.c
+++ b/xen/arch/arm/time.c
@@ -234,7 +234,6 @@ void __cpuinit init_timer_interrupt(void)
 {
     /* Sensible defaults */
     WRITE_SYSREG64(0, CNTVOFF_EL2);     /* No VM-specific offset */
-    WRITE_SYSREG32(0, CNTKCTL_EL1);     /* No user-mode access */
 #if USE_HYP_TIMER
     /* Do not let the VMs program the physical timer, only read the physical counter */
     WRITE_SYSREG32(CNTHCTL_PA, CNTHCTL_EL2);
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 21c7b26..4c910c8 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -17,6 +17,7 @@
  */
 
 #include <xen/config.h>
+#include <xen/stdbool.h>
 #include <xen/init.h>
 #include <xen/string.h>
 #include <xen/version.h>
@@ -74,9 +75,22 @@ void __cpuinit init_traps(void)
     /* Setup Hyp vector base */
     WRITE_SYSREG((vaddr_t)hyp_traps_vector, VBAR_EL2);
 
+    /* Trap Debug and Performance Monitor accesses */
+    WRITE_SYSREG(HDCR_TDRA|HDCR_TDOSA|HDCR_TDA|HDCR_TPM|HDCR_TPMCR,
+                 MDCR_EL2);
+
+    /* Trap CP15 c15 used for implementation defined registers */
+    WRITE_SYSREG(HSTR_T(15), HSTR_EL2);
+
+    /* Trap all coprocessor registers (0-13) except cp10 and cp11 for VFP
+     * /!\ All processors except cp10 and cp11 cannot be used in Xen
+     */
+    WRITE_SYSREG((HCPTR_CP_MASK & ~(HCPTR_CP(10) | HCPTR_CP(11))) | HCPTR_TTA,
+                 CPTR_EL2);
+
     /* Setup hypervisor traps */
-    WRITE_SYSREG(HCR_PTW|HCR_BSU_OUTER|HCR_AMO|HCR_IMO|HCR_VM|HCR_TWI|HCR_TSC|
-                 HCR_TAC, HCR_EL2);
+    WRITE_SYSREG(HCR_PTW|HCR_BSU_OUTER|HCR_AMO|HCR_IMO|HCR_FMO|HCR_VM|
+                 HCR_TWI|HCR_TSC|HCR_TAC|HCR_SWIO|HCR_TIDCP, HCR_EL2);
     isb();
 }
 
@@ -272,7 +286,7 @@ static void cpsr_switch_mode(struct cpu_user_regs *regs, int mode)
         regs->cpsr |= PSR_BIG_ENDIAN;
 }
 
-static vaddr_t exception_handler(vaddr_t offset)
+static vaddr_t exception_handler32(vaddr_t offset)
 {
     uint32_t sctlr = READ_SYSREG32(SCTLR_EL1);
 
@@ -304,7 +318,7 @@ static void inject_undef32_exception(struct cpu_user_regs *regs)
     regs->lr_und = regs->pc32 + return_offset;
 
     /* Branch to exception vector */
-    regs->pc32 = exception_handler(VECTOR32_UND);
+    regs->pc32 = exception_handler32(VECTOR32_UND);
 }
 
 /* Injects an Abort exception into the current vcpu, PC is the exact
@@ -330,7 +344,7 @@ static void inject_abt32_exception(struct cpu_user_regs *regs,
     regs->spsr_abt = spsr;
     regs->lr_abt = regs->pc32 + return_offset;
 
-    regs->pc32 = exception_handler(prefetch ? VECTOR32_PABT : VECTOR32_DABT);
+    regs->pc32 = exception_handler32(prefetch ? VECTOR32_PABT : VECTOR32_DABT);
 
     /* Inject a debug fault, best we can do right now */
     if ( READ_SYSREG(TCR_EL1) & TTBCR_EAE )
@@ -383,9 +397,28 @@ static void inject_pabt32_exception(struct cpu_user_regs *regs,
 }
 
 #ifdef CONFIG_ARM_64
+/*
+ * Take care to call this while regs contains the original faulting
+ * state and not the (partially constructed) exception state.
+ */
+static vaddr_t exception_handler64(struct cpu_user_regs *regs, vaddr_t offset)
+{
+    vaddr_t base = READ_SYSREG(VBAR_EL1);
+
+    if ( usr_mode(regs) )
+        base += VECTOR64_LOWER32_BASE;
+    else if ( psr_mode(regs->cpsr,PSR_MODE_EL0t) )
+        base += VECTOR64_LOWER64_BASE;
+    else /* Otherwise must be from kernel mode */
+        base += VECTOR64_CURRENT_SPx_BASE;
+
+    return base + offset;
+}
+
 /* Inject an undefined exception into a 64 bit guest */
 static void inject_undef64_exception(struct cpu_user_regs *regs, int instr_len)
 {
+    vaddr_t handler;
     union hsr esr = {
         .iss = 0,
         .len = instr_len,
@@ -394,12 +427,14 @@ static void inject_undef64_exception(struct cpu_user_regs *regs, int instr_len)
 
     BUG_ON( is_pv32_domain(current->domain) );
 
+    handler = exception_handler64(regs, VECTOR64_SYNC_OFFSET);
+
     regs->spsr_el1 = regs->cpsr;
     regs->elr_el1 = regs->pc;
 
     regs->cpsr = PSR_MODE_EL1h | PSR_ABT_MASK | PSR_FIQ_MASK | \
         PSR_IRQ_MASK | PSR_DBG_MASK;
-    regs->pc = READ_SYSREG(VBAR_EL1) + VECTOR64_CURRENT_SPx_SYNC;
+    regs->pc = handler;
 
     WRITE_SYSREG32(esr.bits, ESR_EL1);
 }
@@ -410,6 +445,7 @@ static void inject_abt64_exception(struct cpu_user_regs *regs,
                                    register_t addr,
                                    int instr_len)
 {
+    vaddr_t handler;
     union hsr esr = {
         .iss = 0,
         .len = instr_len,
@@ -431,12 +467,14 @@ static void inject_abt64_exception(struct cpu_user_regs *regs,
 
     BUG_ON( is_pv32_domain(current->domain) );
 
+    handler = exception_handler64(regs, VECTOR64_SYNC_OFFSET);
+
     regs->spsr_el1 = regs->cpsr;
     regs->elr_el1 = regs->pc;
 
     regs->cpsr = PSR_MODE_EL1h | PSR_ABT_MASK | PSR_FIQ_MASK | \
         PSR_IRQ_MASK | PSR_DBG_MASK;
-    regs->pc = READ_SYSREG(VBAR_EL1) + VECTOR64_CURRENT_SPx_SYNC;
+    regs->pc = handler;
 
     WRITE_SYSREG(addr, FAR_EL1);
     WRITE_SYSREG32(esr.bits, ESR_EL1);
@@ -458,6 +496,17 @@ static void inject_iabt64_exception(struct cpu_user_regs *regs,
 
 #endif
 
+static void inject_undef_exception(struct cpu_user_regs *regs,
+                                   int instr_len)
+{
+        if ( is_pv32_domain(current->domain) )
+            inject_undef32_exception(regs);
+#ifdef CONFIG_ARM_64
+        else
+            inject_undef64_exception(regs, instr_len);
+#endif
+}
+
 static void inject_iabt_exception(struct cpu_user_regs *regs,
                                   register_t addr,
                                   int instr_len)
@@ -683,7 +732,17 @@ static void _show_registers(struct cpu_user_regs *regs,
             show_registers_32(regs, ctxt, guest_mode, v);
 #ifdef CONFIG_ARM_64
         else if ( is_pv64_domain(v->domain) )
-            show_registers_64(regs, ctxt, guest_mode, v);
+        {
+            if ( psr_mode_is_32bit(regs->cpsr) )
+            {
+                BUG_ON(!usr_mode(regs));
+                show_registers_32(regs, ctxt, guest_mode, v);
+            }
+            else
+            {
+                show_registers_64(regs, ctxt, guest_mode, v);
+            }
+        }
 #endif
     }
     else
@@ -763,7 +822,7 @@ static void show_guest_stack(struct vcpu *v, struct cpu_user_regs *regs)
 {
     int i;
     vaddr_t sp;
-    paddr_t stack_phys;
+    struct page_info *page;
     void *mapped;
     unsigned long *stack, addr;
 
@@ -823,13 +882,20 @@ static void show_guest_stack(struct vcpu *v, struct cpu_user_regs *regs)
 
     printk("Guest stack trace from sp=%"PRIvaddr":\n  ", sp);
 
-    if ( gvirt_to_maddr(sp, &stack_phys) )
+    if ( sp & ( sizeof(long) - 1 ) )
+    {
+        printk("Stack is misaligned\n");
+        return;
+    }
+
+    page = get_page_from_gva(current->domain, sp, GV2M_READ);
+    if ( page == NULL )
     {
         printk("Failed to convert stack to physical address\n");
         return;
     }
 
-    mapped = map_domain_page(stack_phys >> PAGE_SHIFT);
+    mapped = __map_domain_page(page);
 
     stack = mapped + (sp & ~PAGE_MASK);
 
@@ -847,7 +913,7 @@ static void show_guest_stack(struct vcpu *v, struct cpu_user_regs *regs)
         printk("Stack empty.");
     printk("\n");
     unmap_domain_page(mapped);
-
+    put_page(page);
 }
 
 #define STACK_BEFORE_EXCEPTION(regs) ((register_t*)(regs)->sp)
@@ -978,7 +1044,7 @@ void do_unexpected_trap(const char *msg, struct cpu_user_regs *regs)
 {
     printk("CPU%d: Unexpected Trap: %s\n", smp_processor_id(), msg);
     show_execution_state(regs);
-    while(1);
+    panic("CPU%d: Unexpected Trap: %s\n", smp_processor_id(), msg);
 }
 
 typedef register_t (*arm_hypercall_fn_t)(
@@ -1011,6 +1077,7 @@ static arm_hypercall_t arm_hypercall_table[] = {
     HYPERCALL(sysctl, 2),
     HYPERCALL(hvm_op, 2),
     HYPERCALL(grant_table_op, 3),
+    HYPERCALL(multicall, 2),
     HYPERCALL_ARM(vcpu_op, 3),
 };
 
@@ -1158,6 +1225,24 @@ static void do_trap_hypercall(struct cpu_user_regs *regs, register_t *nr,
 #endif
 }
 
+static bool_t check_multicall_32bit_clean(struct multicall_entry *multi)
+{
+    int i;
+
+    for ( i = 0; i < arm_hypercall_table[multi->op].nr_args; i++ )
+    {
+        if ( unlikely(multi->args[i] & 0xffffffff00000000ULL) )
+        {
+            printk("%pv: multicall argument %d is not 32-bit clean %"PRIx64"\n",
+                   current, i, multi->args[i]);
+            domain_crash(current->domain);
+            return false;
+        }
+    }
+
+    return true;
+}
+
 void do_multicall_call(struct multicall_entry *multi)
 {
     arm_hypercall_fn_t call = NULL;
@@ -1175,9 +1260,13 @@ void do_multicall_call(struct multicall_entry *multi)
         return;
     }
 
+    if ( is_pv32_domain(current->domain) &&
+         !check_multicall_32bit_clean(multi) )
+        return;
+
     multi->result = call(multi->args[0], multi->args[1],
-                        multi->args[2], multi->args[3],
-                        multi->args[4]);
+                         multi->args[2], multi->args[3],
+                         multi->args[4]);
 }
 
 /*
@@ -1349,11 +1438,45 @@ static void do_cp15_32(struct cpu_user_regs *regs,
         if ( cp32.read )
            *r = v->arch.actlr;
         break;
+
+    /* We could trap ID_DFR0 and tell the guest we don't support
+     * performance monitoring, but Linux doesn't check the ID_DFR0.
+     * Therefore it will read PMCR.
+     *
+     * We tell the guest we have 0 counters. Unfortunately we must
+     * always support PMCCNTR (the cyle counter): we just RAZ/WI for all
+     * PM register, which doesn't crash the kernel at least
+     */
+    case HSR_CPREG32(PMCR):
+    case HSR_CPREG32(PMCNTENSET):
+    case HSR_CPREG32(PMCNTENCLR):
+    case HSR_CPREG32(PMOVSR):
+    case HSR_CPREG32(PMSWINC):
+    case HSR_CPREG32(PMSELR):
+    case HSR_CPREG32(PMCEID0):
+    case HSR_CPREG32(PMCEID1):
+    case HSR_CPREG32(PMCCNTR):
+    case HSR_CPREG32(PMXEVCNTR):
+    case HSR_CPREG32(PMXEVCNR):
+    case HSR_CPREG32(PMUSERENR):
+    case HSR_CPREG32(PMINTENSET):
+    case HSR_CPREG32(PMINTENCLR):
+    case HSR_CPREG32(PMOVSSET):
+        if ( cp32.read )
+            *r = 0;
+        break;
+
     default:
-        printk("%s p15, %d, r%d, cr%d, cr%d, %d @ 0x%"PRIregister"\n",
-               cp32.read ? "mrc" : "mcr",
-               cp32.op1, cp32.reg, cp32.crn, cp32.crm, cp32.op2, regs->pc);
-        panic("unhandled 32-bit CP15 access %#x", hsr.bits & HSR_CP32_REGS_MASK);
+#ifndef NDEBUG
+        gdprintk(XENLOG_ERR,
+                 "%s p15, %d, r%d, cr%d, cr%d, %d @ 0x%"PRIregister"\n",
+                 cp32.read ? "mrc" : "mcr",
+                 cp32.op1, cp32.reg, cp32.crn, cp32.crm, cp32.op2, regs->pc);
+        gdprintk(XENLOG_ERR, "unhandled 32-bit CP15 access %#x\n",
+                 hsr.bits & HSR_CP32_REGS_MASK);
+#endif
+        inject_undef_exception(regs, hsr.len);
+        return;
     }
     advance_pc(regs, hsr);
 }
@@ -1361,8 +1484,6 @@ static void do_cp15_32(struct cpu_user_regs *regs,
 static void do_cp15_64(struct cpu_user_regs *regs,
                        union hsr hsr)
 {
-    struct hsr_cp64 cp64 = hsr.cp64;
-
     if ( !check_conditional_instr(regs, hsr) )
     {
         advance_pc(regs, hsr);
@@ -1380,22 +1501,161 @@ static void do_cp15_64(struct cpu_user_regs *regs,
         }
         break;
     default:
-        printk("%s p15, %d, r%d, r%d, cr%d @ 0x%"PRIregister"\n",
-               cp64.read ? "mrrc" : "mcrr",
-               cp64.op1, cp64.reg1, cp64.reg2, cp64.crm, regs->pc);
-        panic("unhandled 64-bit CP15 access %#x", hsr.bits & HSR_CP64_REGS_MASK);
+        {
+#ifndef NDEBUG
+            struct hsr_cp64 cp64 = hsr.cp64;
+
+            gdprintk(XENLOG_ERR,
+                     "%s p15, %d, r%d, r%d, cr%d @ 0x%"PRIregister"\n",
+                     cp64.read ? "mrrc" : "mcrr",
+                     cp64.op1, cp64.reg1, cp64.reg2, cp64.crm, regs->pc);
+            gdprintk(XENLOG_ERR, "unhandled 64-bit CP15 access %#x\n",
+                     hsr.bits & HSR_CP64_REGS_MASK);
+#endif
+            inject_undef_exception(regs, hsr.len);
+            return;
+        }
+    }
+    advance_pc(regs, hsr);
+}
+
+static void do_cp14_32(struct cpu_user_regs *regs, union hsr hsr)
+{
+    struct hsr_cp32 cp32 = hsr.cp32;
+    uint32_t *r = (uint32_t *)select_user_reg(regs, cp32.reg);
+    struct domain *d = current->domain;
+
+    if ( !check_conditional_instr(regs, hsr) )
+    {
+        advance_pc(regs, hsr);
+        return;
+    }
+
+    switch ( hsr.bits & HSR_CP32_REGS_MASK )
+    {
+    case HSR_CPREG32(DBGDIDR):
+
+        /* Read-only register */
+        if ( !cp32.read )
+            goto bad_cp;
+
+        /* Implement the minimum requirements:
+         *  - Number of watchpoints: 1
+         *  - Number of breakpoints: 2
+         *  - Version: ARMv7 v7.1
+         *  - Variant and Revision bits match MDIR
+         */
+        *r = (1 << 24) | (5 << 16);
+        *r |= ((d->arch.vpidr >> 20) & 0xf) | (d->arch.vpidr & 0xf);
+        break;
+
+    case HSR_CPREG32(DBGDSCRINT):
+    case HSR_CPREG32(DBGDSCREXT):
+        /* Implement debug status and control register as RAZ/WI.
+         * The OS won't use Hardware debug if MDBGen not set
+         */
+        if ( cp32.read )
+           *r = 0;
+        break;
+    case HSR_CPREG32(DBGVCR):
+    case HSR_CPREG32(DBGOSLAR):
+    case HSR_CPREG32(DBGBVR0):
+    case HSR_CPREG32(DBGBCR0):
+    case HSR_CPREG32(DBGWVR0):
+    case HSR_CPREG32(DBGWCR0):
+    case HSR_CPREG32(DBGBVR1):
+    case HSR_CPREG32(DBGBCR1):
+    case HSR_CPREG32(DBGOSDLR):
+        /* RAZ/WI */
+        if ( cp32.read )
+            *r = 0;
+        break;
+
+    default:
+bad_cp:
+#ifndef NDEBUG
+        gdprintk(XENLOG_ERR,
+                 "%s p14, %d, r%d, cr%d, cr%d, %d @ 0x%"PRIregister"\n",
+                  cp32.read ? "mrc" : "mcr",
+                  cp32.op1, cp32.reg, cp32.crn, cp32.crm, cp32.op2, regs->pc);
+        gdprintk(XENLOG_ERR, "unhandled 32-bit cp14 access %#x\n",
+                 hsr.bits & HSR_CP32_REGS_MASK);
+#endif
+        inject_undef_exception(regs, hsr.len);
+        return;
     }
+
     advance_pc(regs, hsr);
 }
 
+static void do_cp14_dbg(struct cpu_user_regs *regs, union hsr hsr)
+{
+    if ( !check_conditional_instr(regs, hsr) )
+    {
+        advance_pc(regs, hsr);
+        return;
+    }
+
+    inject_undef_exception(regs, hsr.len);
+}
+
+static void do_cp(struct cpu_user_regs *regs, union hsr hsr)
+{
+    if ( !check_conditional_instr(regs, hsr) )
+    {
+        advance_pc(regs, hsr);
+        return;
+    }
+
+    inject_undef_exception(regs, hsr.len);
+}
+
 #ifdef CONFIG_ARM_64
 static void do_sysreg(struct cpu_user_regs *regs,
                       union hsr hsr)
 {
-    struct hsr_sysreg sysreg = hsr.sysreg;
+    register_t *x = select_user_reg(regs, hsr.sysreg.reg);
 
     switch ( hsr.bits & HSR_SYSREG_REGS_MASK )
     {
+    /* RAZ/WI registers: */
+    /*  - Debug */
+    case HSR_SYSREG_MDSCR_EL1:
+    /*  - Perf monitors */
+    case HSR_SYSREG_PMINTENSET_EL1:
+    case HSR_SYSREG_PMINTENCLR_EL1:
+    case HSR_SYSREG_PMCR_EL0:
+    case HSR_SYSREG_PMCNTENSET_EL0:
+    case HSR_SYSREG_PMCNTENCLR_EL0:
+    case HSR_SYSREG_PMOVSCLR_EL0:
+    case HSR_SYSREG_PMSWINC_EL0:
+    case HSR_SYSREG_PMSELR_EL0:
+    case HSR_SYSREG_PMCEID0_EL0:
+    case HSR_SYSREG_PMCEID1_EL0:
+    case HSR_SYSREG_PMCCNTR_EL0:
+    case HSR_SYSREG_PMXEVTYPER_EL0:
+    case HSR_SYSREG_PMXEVCNTR_EL0:
+    case HSR_SYSREG_PMUSERENR_EL0:
+    case HSR_SYSREG_PMOVSSET_EL0:
+    /* - Breakpoints */
+    HSR_SYSREG_DBG_CASES(DBGBVR):
+    HSR_SYSREG_DBG_CASES(DBGBCR):
+    /* - Watchpoints */
+    HSR_SYSREG_DBG_CASES(DBGWVR):
+    HSR_SYSREG_DBG_CASES(DBGWCR):
+    /* - Double Lock Register */
+    case HSR_SYSREG_OSDLR_EL1:
+        if ( hsr.sysreg.read )
+            *x = 0;
+        /* else: write ignored */
+        break;
+
+    /* Write only, Write ignore registers: */
+    case HSR_SYSREG_OSLAR_EL1:
+        if ( hsr.sysreg.read )
+            goto bad_sysreg;
+        /* else: write ignored */
+        break;
     case HSR_SYSREG_CNTP_CTL_EL0:
     case HSR_SYSREG_CNTP_TVAL_EL0:
         if ( !vtimer_emulate(regs, hsr) )
@@ -1406,15 +1666,25 @@ static void do_sysreg(struct cpu_user_regs *regs,
         }
         break;
     default:
-        printk("%s %d, %d, c%d, c%d, %d %s x%d @ 0x%"PRIregister"\n",
-               sysreg.read ? "mrs" : "msr",
-               sysreg.op0, sysreg.op1,
-               sysreg.crn, sysreg.crm,
-               sysreg.op2,
-               sysreg.read ? "=>" : "<=",
-               sysreg.reg, regs->pc);
-        panic("unhandled 64-bit sysreg access %#x",
-              hsr.bits & HSR_SYSREG_REGS_MASK);
+ bad_sysreg:
+        {
+            struct hsr_sysreg sysreg = hsr.sysreg;
+#ifndef NDEBUG
+
+            gdprintk(XENLOG_ERR,
+                     "%s %d, %d, c%d, c%d, %d %s x%d @ 0x%"PRIregister"\n",
+                     sysreg.read ? "mrs" : "msr",
+                     sysreg.op0, sysreg.op1,
+                     sysreg.crn, sysreg.crm,
+                     sysreg.op2,
+                     sysreg.read ? "=>" : "<=",
+                     sysreg.reg, regs->pc);
+            gdprintk(XENLOG_ERR, "unhandled 64-bit sysreg access %#x\n",
+                     hsr.bits & HSR_SYSREG_REGS_MASK);
+#endif
+            inject_undef_exception(regs, sysreg.len);
+            return;
+        }
     }
 
     regs->pc += 4;
@@ -1543,6 +1813,17 @@ asmlinkage void do_trap_hypervisor(struct cpu_user_regs *regs)
 {
     union hsr hsr = { .bits = READ_SYSREG32(ESR_EL2) };
 
+    /*
+     * We currently do not handle 32-bit userspace on 64-bit kernels
+     * correctly (See XSA-102). Until that is resolved we treat any
+     * trap from 32-bit userspace on 64-bit kernel as undefined.
+     */
+    if ( is_pv64_domain(current->domain) && psr_mode_is_32bit(regs->cpsr) )
+    {
+        inject_undef_exception(regs, hsr.len);
+        return;
+    }
+
     switch (hsr.ec) {
     case HSR_EC_WFI_WFE:
         if ( !check_conditional_instr(regs, hsr) )
@@ -1572,6 +1853,21 @@ asmlinkage void do_trap_hypervisor(struct cpu_user_regs *regs)
             goto bad_trap;
         do_cp15_64(regs, hsr);
         break;
+    case HSR_EC_CP14_32:
+        if ( !is_pv32_domain(current->domain) )
+            goto bad_trap;
+        do_cp14_32(regs, hsr);
+        break;
+    case HSR_EC_CP14_DBG:
+        if ( !is_pv32_domain(current->domain) )
+            goto bad_trap;
+        do_cp14_dbg(regs, hsr);
+        break;
+    case HSR_EC_CP:
+        if ( !is_pv32_domain(current->domain) )
+            goto bad_trap;
+        do_cp(regs, hsr);
+        break;
     case HSR_EC_SMC32:
         inject_undef32_exception(regs);
         break;
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 553411d..850006c 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -89,8 +89,17 @@ int domain_vgic_init(struct domain *d)
 
     d->arch.vgic.shared_irqs =
         xzalloc_array(struct vgic_irq_rank, DOMAIN_NR_RANKS(d));
+    if ( d->arch.vgic.shared_irqs == NULL )
+        return -ENOMEM;
+
     d->arch.vgic.pending_irqs =
         xzalloc_array(struct pending_irq, d->arch.vgic.nr_lines);
+    if ( d->arch.vgic.pending_irqs == NULL )
+    {
+        xfree(d->arch.vgic.shared_irqs);
+        return -ENOMEM;
+    }
+
     for (i=0; i<d->arch.vgic.nr_lines; i++)
     {
         INIT_LIST_HEAD(&d->arch.vgic.pending_irqs[i].inflight);
@@ -583,8 +592,8 @@ static int vgic_distr_mmio_write(struct vcpu *v, mmio_info_t *info)
     case GICD_ICFGR + 2 ... GICD_ICFGRN: /* SPIs */
         if ( dabt.size != 2 ) goto bad_width;
         rank = vgic_irq_rank(v, 2, gicd_reg - GICD_ICFGR);
-        vgic_lock_rank(v, rank);
         if ( rank == NULL) goto write_ignore;
+        vgic_lock_rank(v, rank);
         rank->icfg[REG_RANK_INDEX(2, gicd_reg - GICD_ICFGR)] = *r;
         vgic_unlock_rank(v, rank);
         return 1;
diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c
index 6d7984f..05de536 100644
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -748,6 +748,8 @@ int __init acpi_boot_init(void)
 
 	erst_init();
 
+	acpi_hest_init();
+
 	acpi_table_parse(ACPI_SIG_BGRT, acpi_invalidate_bgrt);
 
 	return 0;
diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
index 597befa..6aaa7ab 100644
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -139,6 +139,9 @@ static void do_get_hw_residencies(void *arg)
     case 0x3F:
     case 0x45:
     case 0x46:
+    /* future */
+    case 0x3D:
+    case 0x4E:
         GET_PC2_RES(hw_res->pc2);
         GET_CC7_RES(hw_res->cc7);
         /* fall through */
@@ -296,6 +299,9 @@ void mwait_idle_with_hints(unsigned int eax, unsigned int ecx)
     unsigned int cpu = smp_processor_id();
     s_time_t expires = per_cpu(timer_deadline, cpu);
 
+    if ( boot_cpu_has(X86_FEATURE_CLFLUSH_MONITOR) )
+        clflush((void *)&mwait_wakeup(cpu));
+
     __monitor((void *)&mwait_wakeup(cpu), 0, 0);
     smp_mb();
 
diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
index 44087fa..46c1e48 100644
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -107,6 +107,10 @@ static void __devinit set_cpuidmask(const struct cpuinfo_x86 *c)
 	ASSERT((status == not_parsed) && (smp_processor_id() == 0));
 	status = no_mask;
 
+	/* Fam11 doesn't support masking at all. */
+	if (c->x86 == 0x11)
+		return;
+
 	if (~(opt_cpuid_mask_ecx & opt_cpuid_mask_edx &
 	      opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx)) {
 		feat_ecx = opt_cpuid_mask_ecx;
@@ -176,7 +180,6 @@ static void __devinit set_cpuidmask(const struct cpuinfo_x86 *c)
 	       extfeat_ecx, extfeat_edx);
 
  setmask:
-	/* FIXME check if processor supports CPUID masking */
 	/* AMD processors prior to family 10h required a 32-bit password */
 	if (c->x86 >= 0x10) {
 		wrmsr(MSR_K8_FEATURE_MASK, feat_edx, feat_ecx);
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 32ca458..4221826 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -234,6 +234,9 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 			paddr_bits = cpuid_eax(0x80000008) & 0xff;
 	}
 
+	/* Might lift BIOS max_leaf=3 limit. */
+	early_intel_workaround(c);
+
 	/* Intel-defined flags: level 0x00000007 */
 	if ( c->cpuid_level >= 0x00000007 ) {
 		u32 dummy;
@@ -241,8 +244,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 		c->x86_capability[X86_FEATURE_FSGSBASE / 32] = ebx;
 	}
 
-	early_intel_workaround(c);
-
 #ifdef CONFIG_X86_HT
 	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
 #endif
diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
index 27fe762..992650f 100644
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -147,6 +147,9 @@ void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
 /*
  * P4 Xeon errata 037 workaround.
  * Hardware prefetcher may cause stale data to be loaded into the cache.
+ *
+ * Xeon 7400 erratum AAI65 (and further newer Xeons)
+ * MONITOR/MWAIT may have excessive false wakeups
  */
 static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
 {
@@ -161,6 +164,10 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
 			wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
 		}
 	}
+
+	if (c->x86 == 6 && cpu_has_clflush &&
+	    (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
+		set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability);
 }
 
 
diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c
index b375ef7..af6f0be 100644
--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -729,8 +729,10 @@ void mcheck_init(struct cpuinfo_x86 *c, bool_t bsp)
 {
     enum mcheck_type inited = mcheck_none;
 
-    if (mce_disabled == 1) {
-        dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n");
+    if ( mce_disabled )
+    {
+        if ( bsp )
+            printk(XENLOG_INFO "MCE support disabled by bootparam\n");
         return;
     }
 
diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c
index b32fdb2..5b93870 100644
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -49,11 +49,15 @@ static int __read_mostly nr_intel_ext_msrs;
 #define INTEL_SRAR_INSTR_FETCH	0x150
 
 #ifdef CONFIG_X86_MCE_THERMAL
+#define MCE_RING                0x1
+static DEFINE_PER_CPU(int, last_state);
+
 static void intel_thermal_interrupt(struct cpu_user_regs *regs)
 {
     uint64_t msr_content;
     unsigned int cpu = smp_processor_id();
     static DEFINE_PER_CPU(s_time_t, next);
+    int *this_last_state;
 
     ack_APIC_irq();
 
@@ -62,13 +66,17 @@ static void intel_thermal_interrupt(struct cpu_user_regs *regs)
 
     per_cpu(next, cpu) = NOW() + MILLISECS(5000);
     rdmsrl(MSR_IA32_THERM_STATUS, msr_content);
-    if (msr_content & 0x1) {
-        printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
-        printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-                cpu);
+    this_last_state = &per_cpu(last_state, cpu);
+    if ( *this_last_state == (msr_content & MCE_RING) )
+        return;
+    *this_last_state = msr_content & MCE_RING;
+    if ( msr_content & MCE_RING )
+    {
+        printk(KERN_EMERG "CPU%u: Temperature above threshold\n", cpu);
+        printk(KERN_EMERG "CPU%u: Running in modulated clock mode\n", cpu);
         add_taint(TAINT_MACHINE_CHECK);
     } else {
-        printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+        printk(KERN_INFO "CPU%u: Temperature/speed normal\n", cpu);
     }
 }
 
@@ -802,6 +810,7 @@ static int cpu_mcabank_alloc(unsigned int cpu)
 
     per_cpu(no_cmci_banks, cpu) = cmci;
     per_cpu(mce_banks_owned, cpu) = owned;
+    per_cpu(last_state, cpu) = -1;
 
     return 0;
 out:
diff --git a/xen/arch/x86/cpu/mcheck/mctelem.c b/xen/arch/x86/cpu/mcheck/mctelem.c
index 895ce1a..ed8e8d2 100644
--- a/xen/arch/x86/cpu/mcheck/mctelem.c
+++ b/xen/arch/x86/cpu/mcheck/mctelem.c
@@ -37,24 +37,19 @@ struct mctelem_ent {
 	void *mcte_data;		/* corresponding data payload */
 };
 
-#define	MCTE_F_HOME_URGENT		0x0001U	/* free to urgent freelist */
-#define	MCTE_F_HOME_NONURGENT		0x0002U /* free to nonurgent freelist */
-#define	MCTE_F_CLASS_URGENT		0x0004U /* in use - urgent errors */
-#define	MCTE_F_CLASS_NONURGENT		0x0008U /* in use - nonurgent errors */
+#define	MCTE_F_CLASS_URGENT		0x0001U /* in use - urgent errors */
+#define	MCTE_F_CLASS_NONURGENT		0x0002U /* in use - nonurgent errors */
 #define	MCTE_F_STATE_FREE		0x0010U	/* on a freelist */
 #define	MCTE_F_STATE_UNCOMMITTED	0x0020U	/* reserved; on no list */
 #define	MCTE_F_STATE_COMMITTED		0x0040U	/* on a committed list */
 #define	MCTE_F_STATE_PROCESSING		0x0080U	/* on a processing list */
 
-#define	MCTE_F_MASK_HOME	(MCTE_F_HOME_URGENT | MCTE_F_HOME_NONURGENT)
 #define	MCTE_F_MASK_CLASS	(MCTE_F_CLASS_URGENT | MCTE_F_CLASS_NONURGENT)
 #define	MCTE_F_MASK_STATE	(MCTE_F_STATE_FREE | \
 				MCTE_F_STATE_UNCOMMITTED | \
 				MCTE_F_STATE_COMMITTED | \
 				MCTE_F_STATE_PROCESSING)
 
-#define	MCTE_HOME(tep) ((tep)->mcte_flags & MCTE_F_MASK_HOME)
-
 #define	MCTE_CLASS(tep) ((tep)->mcte_flags & MCTE_F_MASK_CLASS)
 #define	MCTE_SET_CLASS(tep, new) do { \
     (tep)->mcte_flags &= ~MCTE_F_MASK_CLASS; \
@@ -69,6 +64,8 @@ struct mctelem_ent {
 #define	MC_URGENT_NENT		10
 #define	MC_NONURGENT_NENT	20
 
+#define MC_NENT (MC_URGENT_NENT + MC_NONURGENT_NENT)
+
 #define	MC_NCLASSES		(MC_NONURGENT + 1)
 
 #define	COOKIE2MCTE(c)		((struct mctelem_ent *)(c))
@@ -77,11 +74,9 @@ struct mctelem_ent {
 static struct mc_telem_ctl {
 	/* Linked lists that thread the array members together.
 	 *
-	 * The free lists are singly-linked via mcte_next, and we allocate
-	 * from them by atomically unlinking an element from the head.
-	 * Consumed entries are returned to the head of the free list.
-	 * When an entry is reserved off the free list it is not linked
-	 * on any list until it is committed or dismissed.
+	 * The free lists is a bit array where bit 1 means free.
+	 * This as element number is quite small and is easy to
+	 * atomically allocate that way.
 	 *
 	 * The committed list grows at the head and we do not maintain a
 	 * tail pointer; insertions are performed atomically.  The head
@@ -101,7 +96,7 @@ static struct mc_telem_ctl {
 	 * we can lock it for updates.  The head of the processing list
 	 * always has the oldest telemetry, and we append (as above)
 	 * at the tail of the processing list. */
-	struct mctelem_ent *mctc_free[MC_NCLASSES];
+	DECLARE_BITMAP(mctc_free, MC_NENT);
 	struct mctelem_ent *mctc_committed[MC_NCLASSES];
 	struct mctelem_ent *mctc_processing_head[MC_NCLASSES];
 	struct mctelem_ent *mctc_processing_tail[MC_NCLASSES];
@@ -207,14 +202,14 @@ int mctelem_has_deferred(unsigned int cpu)
  */
 static void mctelem_free(struct mctelem_ent *tep)
 {
-	mctelem_class_t target = MCTE_HOME(tep) == MCTE_F_HOME_URGENT ?
-	    MC_URGENT : MC_NONURGENT;
-
 	BUG_ON(tep->mcte_refcnt != 0);
 	BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE);
 
 	tep->mcte_prev = NULL;
-	mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep);
+	tep->mcte_next = NULL;
+
+	/* set free in array */
+	set_bit(tep - mctctl.mctc_elems, mctctl.mctc_free);
 }
 
 /* Increment the reference count of an entry that is not linked on to
@@ -274,34 +269,25 @@ void mctelem_init(int reqdatasz)
 	}
 
 	if ((mctctl.mctc_elems = xmalloc_array(struct mctelem_ent,
-	    MC_URGENT_NENT + MC_NONURGENT_NENT)) == NULL ||
-	    (datarr = xmalloc_bytes((MC_URGENT_NENT + MC_NONURGENT_NENT) *
-	    datasz)) == NULL) {
+	    MC_NENT)) == NULL ||
+	    (datarr = xmalloc_bytes(MC_NENT * datasz)) == NULL) {
 		if (mctctl.mctc_elems)
 			xfree(mctctl.mctc_elems);
 		printk("Allocations for MCA telemetry failed\n");
 		return;
 	}
 
-	for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) {
-		struct mctelem_ent *tep, **tepp;
+	for (i = 0; i < MC_NENT; i++) {
+		struct mctelem_ent *tep;
 
 		tep = mctctl.mctc_elems + i;
 		tep->mcte_flags = MCTE_F_STATE_FREE;
 		tep->mcte_refcnt = 0;
 		tep->mcte_data = datarr + i * datasz;
 
-		if (i < MC_URGENT_NENT) {
-			tepp = &mctctl.mctc_free[MC_URGENT];
-			tep->mcte_flags |= MCTE_F_HOME_URGENT;
-		} else {
-			tepp = &mctctl.mctc_free[MC_NONURGENT];
-			tep->mcte_flags |= MCTE_F_HOME_NONURGENT;
-		}
-
-		tep->mcte_next = *tepp;
+		__set_bit(i, mctctl.mctc_free);
+		tep->mcte_next = NULL;
 		tep->mcte_prev = NULL;
-		*tepp = tep;
 	}
 }
 
@@ -310,32 +296,25 @@ static int mctelem_drop_count;
 
 /* Reserve a telemetry entry, or return NULL if none available.
  * If we return an entry then the caller must subsequently call exactly one of
- * mctelem_unreserve or mctelem_commit for that entry.
+ * mctelem_dismiss or mctelem_commit for that entry.
  */
 mctelem_cookie_t mctelem_reserve(mctelem_class_t which)
 {
-	struct mctelem_ent **freelp;
-	struct mctelem_ent *oldhead, *newhead;
-	mctelem_class_t target = (which == MC_URGENT) ?
-	    MC_URGENT : MC_NONURGENT;
+	unsigned bit;
+	unsigned start_bit = (which == MC_URGENT) ? 0 : MC_URGENT_NENT;
 
-	freelp = &mctctl.mctc_free[target];
 	for (;;) {
-		if ((oldhead = *freelp) == NULL) {
-			if (which == MC_URGENT && target == MC_URGENT) {
-				/* raid the non-urgent freelist */
-				target = MC_NONURGENT;
-				freelp = &mctctl.mctc_free[target];
-				continue;
-			} else {
-				mctelem_drop_count++;
-				return (NULL);
-			}
+		bit = find_next_bit(mctctl.mctc_free, MC_NENT, start_bit);
+
+		if (bit >= MC_NENT) {
+			mctelem_drop_count++;
+			return (NULL);
 		}
 
-		newhead = oldhead->mcte_next;
-		if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
-			struct mctelem_ent *tep = oldhead;
+		/* try to allocate, atomically clear free bit */
+		if (test_and_clear_bit(bit, mctctl.mctc_free)) {
+			/* return element we got */
+			struct mctelem_ent *tep = mctctl.mctc_elems + bit;
 
 			mctelem_hold(tep);
 			MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED);
diff --git a/xen/arch/x86/cpu/mcheck/vmce.c b/xen/arch/x86/cpu/mcheck/vmce.c
index f6c35db..dcfe97e 100644
--- a/xen/arch/x86/cpu/mcheck/vmce.c
+++ b/xen/arch/x86/cpu/mcheck/vmce.c
@@ -357,6 +357,10 @@ int inject_vmce(struct domain *d, int vcpu)
         if ( vcpu != VMCE_INJECT_BROADCAST && vcpu != v->vcpu_id )
             continue;
 
+        /* Don't inject to uninitialized VCPU. */
+        if ( !v->is_initialised )
+            continue;
+
         if ( (has_hvm_container_domain(d) ||
               guest_has_trap_callback(d, v->vcpu_id, TRAP_machine_check)) &&
              !test_and_set_bool(v->mce_pending) )
diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c
index 85179f2..65ea7b3 100644
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -360,7 +360,7 @@ static void mwait_idle(void)
 		lapic_timer_off();
 
 	before = cpuidle_get_tick();
-	TRACE_4D(TRC_PM_IDLE_ENTRY, cx->idx, before, exp, pred);
+	TRACE_4D(TRC_PM_IDLE_ENTRY, cx->type, before, exp, pred);
 
 	if (cpu_is_haltable(cpu))
 		mwait_idle_with_hints(eax, MWAIT_ECX_INTERRUPT_BREAK);
@@ -369,7 +369,7 @@ static void mwait_idle(void)
 
 	cstate_restore_tsc();
 	trace_exit_reason(irq_traced);
-	TRACE_6D(TRC_PM_IDLE_EXIT, cx->idx, after,
+	TRACE_6D(TRC_PM_IDLE_EXIT, cx->type, after,
 		irq_traced[0], irq_traced[1], irq_traced[2], irq_traced[3]);
 
 	update_idle_stats(power, cx, before, after);
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 6618ae6..195b07f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -180,6 +180,28 @@ void dump_pageframe_info(struct domain *d)
     spin_unlock(&d->page_alloc_lock);
 }
 
+/*
+ * The hole may be at or above the 44-bit boundary, so we need to determine
+ * the total bit count until reaching 32 significant (not squashed out) bits
+ * in PFN representations.
+ * Note that the way "bits" gets initialized/updated/bounds-checked guarantees
+ * that the function will never return zero, and hence will never be called
+ * more than once (which is important due to it being deliberately placed in
+ * .init.text).
+ */
+static unsigned int __init noinline _domain_struct_bits(void)
+{
+    unsigned int bits = 32 + PAGE_SHIFT;
+    unsigned int sig = hweight32(~pfn_hole_mask);
+    unsigned int mask = pfn_hole_mask >> 32;
+
+    for ( ; bits < BITS_PER_LONG && sig < 32; ++bits, mask >>= 1 )
+        if ( !(mask & 1) )
+            ++sig;
+
+    return bits;
+}
+
 struct domain *alloc_domain_struct(void)
 {
     struct domain *d;
@@ -187,7 +209,10 @@ struct domain *alloc_domain_struct(void)
      * We pack the PDX of the domain structure into a 32-bit field within
      * the page_info structure. Hence the MEMF_bits() restriction.
      */
-    unsigned int bits = 32 + PAGE_SHIFT + pfn_pdx_hole_shift;
+    static unsigned int __read_mostly bits;
+
+    if ( unlikely(!bits) )
+         bits = _domain_struct_bits();
 
     BUILD_BUG_ON(sizeof(*d) > PAGE_SIZE);
     d = alloc_xenheap_pages(0, MEMF_bits(bits));
@@ -422,10 +447,6 @@ int vcpu_initialise(struct vcpu *v)
 
         /* PV guests by default have a 100Hz ticker. */
         v->periodic_period = MILLISECS(10);
-
-        /* PV guests get an emulated PIT too for video BIOSes to use. */
-        if ( v->vcpu_id == 0 )
-            pit_init(v, cpu_khz);
     }
 
     v->arch.schedule_tail = continue_nonidle_domain;
@@ -528,7 +549,7 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
 
     if ( !is_idle_domain(d) )
     {
-        d->arch.cpuids = xzalloc_array(cpuid_input_t, MAX_CPUID_INPUT);
+        d->arch.cpuids = xmalloc_array(cpuid_input_t, MAX_CPUID_INPUT);
         rc = -ENOMEM;
         if ( d->arch.cpuids == NULL )
             goto fail;
@@ -579,6 +600,9 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
     tsc_set_info(d, TSC_MODE_DEFAULT, 0UL, 0, 0);
     spin_lock_init(&d->arch.vtsc_lock);
 
+    /* PV/PVH guests get an emulated PIT too for video BIOSes to use. */
+    pit_init(d, cpu_khz);
+
     return 0;
 
  fail:
@@ -1980,6 +2004,8 @@ int domain_relinquish_resources(struct domain *d)
         BUG();
     }
 
+    pit_deinit(d);
+
     if ( has_hvm_container_domain(d) )
         hvm_domain_relinquish_resources(d);
 
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 26635ff..a967b65 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -920,7 +920,7 @@ long arch_do_domctl(
     case XEN_DOMCTL_set_cpuid:
     {
         xen_domctl_cpuid_t *ctl = &domctl->u.cpuid;
-        cpuid_input_t *cpuid = NULL; 
+        cpuid_input_t *cpuid, *unused = NULL;
         int i;
 
         for ( i = 0; i < MAX_CPUID_INPUT; i++ )
@@ -928,7 +928,11 @@ long arch_do_domctl(
             cpuid = &d->arch.cpuids[i];
 
             if ( cpuid->input[0] == XEN_CPUID_INPUT_UNUSED )
-                break;
+            {
+                if ( !unused )
+                    unused = cpuid;
+                continue;
+            }
 
             if ( (cpuid->input[0] == ctl->input[0]) &&
                  ((cpuid->input[1] == XEN_CPUID_INPUT_UNUSED) ||
@@ -936,15 +940,12 @@ long arch_do_domctl(
                 break;
         }
         
-        if ( i == MAX_CPUID_INPUT )
-        {
-            ret = -ENOENT;
-        }
+        if ( i < MAX_CPUID_INPUT )
+            *cpuid = *ctl;
+        else if ( unused )
+            *unused = *ctl;
         else
-        {
-            memcpy(cpuid, ctl, sizeof(cpuid_input_t));
-            ret = 0;
-        }
+            ret = -ENOENT;
     }
     break;
 
@@ -1018,7 +1019,7 @@ long arch_do_domctl(
         struct vcpu *v;
 
         ret = -EBUSY;
-        if ( !d->is_paused_by_controller )
+        if ( !d->controller_pause_count )
             break;
         ret = -EINVAL;
         if ( domctl->u.gdbsx_pauseunp_vcpu.vcpu >= MAX_VIRT_CPUS ||
@@ -1034,7 +1035,7 @@ long arch_do_domctl(
         struct vcpu *v;
 
         ret = -EBUSY;
-        if ( !d->is_paused_by_controller )
+        if ( !d->controller_pause_count )
             break;
         ret = -EINVAL;
         if ( domctl->u.gdbsx_pauseunp_vcpu.vcpu >= MAX_VIRT_CPUS ||
@@ -1052,7 +1053,7 @@ long arch_do_domctl(
         struct vcpu *v;
 
         domctl->u.gdbsx_domstatus.vcpu_id = -1;
-        domctl->u.gdbsx_domstatus.paused = d->is_paused_by_controller;
+        domctl->u.gdbsx_domstatus.paused = d->controller_pause_count > 0;
         if ( domctl->u.gdbsx_domstatus.paused )
         {
             for_each_vcpu ( d, v )
@@ -1088,45 +1089,48 @@ long arch_do_domctl(
              ((v = d->vcpu[evc->vcpu]) == NULL) )
             goto vcpuextstate_out;
 
+        ret = -EINVAL;
+        if ( v == current ) /* no vcpu_pause() */
+            goto vcpuextstate_out;
+
         if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate )
         {
-            unsigned int size = PV_XSAVE_SIZE(v->arch.xcr0_accum);
+            unsigned int size;
 
-            if ( !evc->size && !evc->xfeature_mask )
+            ret = 0;
+            vcpu_pause(v);
+
+            size = PV_XSAVE_SIZE(v->arch.xcr0_accum);
+            if ( (!evc->size && !evc->xfeature_mask) ||
+                 guest_handle_is_null(evc->buffer) )
             {
                 evc->xfeature_mask = xfeature_mask;
                 evc->size = size;
-                ret = 0;
+                vcpu_unpause(v);
                 goto vcpuextstate_out;
             }
+
             if ( evc->size != size || evc->xfeature_mask != xfeature_mask )
-            {
                 ret = -EINVAL;
-                goto vcpuextstate_out;
-            }
-            if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer,
-                                      offset, (void *)&v->arch.xcr0,
-                                      sizeof(v->arch.xcr0)) )
-            {
+
+            if ( !ret && copy_to_guest_offset(evc->buffer, offset,
+                                              (void *)&v->arch.xcr0,
+                                              sizeof(v->arch.xcr0)) )
                 ret = -EFAULT;
-                goto vcpuextstate_out;
-            }
+
             offset += sizeof(v->arch.xcr0);
-            if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer,
-                                      offset, (void *)&v->arch.xcr0_accum,
-                                      sizeof(v->arch.xcr0_accum)) )
-            {
+            if ( !ret && copy_to_guest_offset(evc->buffer, offset,
+                                              (void *)&v->arch.xcr0_accum,
+                                              sizeof(v->arch.xcr0_accum)) )
                 ret = -EFAULT;
-                goto vcpuextstate_out;
-            }
+
             offset += sizeof(v->arch.xcr0_accum);
-            if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer,
-                                      offset, (void *)v->arch.xsave_area,
-                                      size - 2 * sizeof(uint64_t)) )
-            {
+            if ( !ret && copy_to_guest_offset(evc->buffer, offset,
+                                              (void *)v->arch.xsave_area,
+                                              size - 2 * sizeof(uint64_t)) )
                 ret = -EFAULT;
-                goto vcpuextstate_out;
-            }
+
+            vcpu_unpause(v);
         }
         else
         {
@@ -1175,12 +1179,14 @@ long arch_do_domctl(
 
             if ( evc->size <= PV_XSAVE_SIZE(_xcr0_accum) )
             {
+                vcpu_pause(v);
                 v->arch.xcr0 = _xcr0;
                 v->arch.xcr0_accum = _xcr0_accum;
                 if ( _xcr0_accum & XSTATE_NONLAZY )
                     v->arch.nonlazy_xstate_used = 1;
                 memcpy(v->arch.xsave_area, _xsave_area,
                        evc->size - 2 * sizeof(uint64_t));
+                vcpu_unpause(v);
             }
             else
                 ret = -EINVAL;
@@ -1188,8 +1194,6 @@ long arch_do_domctl(
             xfree(receive_buf);
         }
 
-        ret = 0;
-
     vcpuextstate_out:
         if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate )
             copyback = 1;
diff --git a/xen/arch/x86/efi/runtime.c b/xen/arch/x86/efi/runtime.c
index d7c884b..166852d 100644
--- a/xen/arch/x86/efi/runtime.c
+++ b/xen/arch/x86/efi/runtime.c
@@ -10,6 +10,8 @@ DEFINE_XEN_GUEST_HANDLE(CHAR16);
 
 #ifndef COMPAT
 
+# include <asm/i387.h>
+# include <asm/xstate.h>
 # include <public/platform.h>
 
 const bool_t efi_enabled = 1;
@@ -45,8 +47,14 @@ const struct efi_pci_rom *__read_mostly efi_pci_roms;
 
 unsigned long efi_rs_enter(void)
 {
+    static const u16 fcw = FCW_DEFAULT;
+    static const u32 mxcsr = MXCSR_DEFAULT;
     unsigned long cr3 = read_cr3();
 
+    save_fpu_enable();
+    asm volatile ( "fldcw %0" :: "m" (fcw) );
+    asm volatile ( "ldmxcsr %0" :: "m" (mxcsr) );
+
     spin_lock(&efi_rs_lock);
 
     /* prevent fixup_page_fault() from doing anything */
@@ -82,6 +90,7 @@ void efi_rs_leave(unsigned long cr3)
     }
     irq_exit();
     spin_unlock(&efi_rs_lock);
+    stts();
 }
 
 unsigned long efi_get_time(void)
diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
index e0388e7..1b75652 100644
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -152,7 +152,8 @@ void flush_area_local(const void *va, unsigned int flags)
         if ( order < (BITS_PER_LONG - PAGE_SHIFT) )
             sz = 1UL << (order + PAGE_SHIFT);
 
-        if ( c->x86_clflush_size && c->x86_cache_size && sz &&
+        if ( !(flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL)) &&
+             c->x86_clflush_size && c->x86_cache_size && sz &&
              ((sz >> 10) < c->x86_cache_size) )
         {
             va = (const void *)((unsigned long)va & ~(sz - 1));
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 69f7e74..be1a2d3 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -478,7 +478,7 @@ static int hvm_set_ioreq_page(
 
     if ( (iorp->va != NULL) || d->is_dying )
     {
-        destroy_ring_for_helper(&iorp->va, iorp->page);
+        destroy_ring_for_helper(&va, page);
         spin_unlock(&iorp->lock);
         return -EINVAL;
     }
@@ -643,7 +643,6 @@ void hvm_domain_relinquish_resources(struct domain *d)
     rtc_deinit(d);
     if ( d->vcpu != NULL && d->vcpu[0] != NULL )
     {
-        pit_deinit(d);
         pmtimer_deinit(d);
         hpet_deinit(d);
     }
@@ -858,7 +857,7 @@ static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
         return -EINVAL;
     }
 
-    if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS(v) )
+    if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS(v, 1) )
     {
         printk(XENLOG_G_ERR "HVM%d restore: bad CR4 %#" PRIx64 "\n",
                d->domain_id, ctxt.cr4);
@@ -1217,7 +1216,6 @@ int hvm_vcpu_initialise(struct vcpu *v)
     if ( v->vcpu_id == 0 )
     {
         /* NB. All these really belong in hvm_domain_initialise(). */
-        pit_init(v, cpu_khz);
         pmtimer_init(v);
         hpet_init(v);
  
@@ -1529,6 +1527,11 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
          (access_w && (p2mt == p2m_ram_ro)) )
     {
         put_gfn(p2m->domain, gfn);
+
+        rc = 0;
+        if ( unlikely(is_pvh_vcpu(v)) )
+            goto out;
+
         if ( !handle_mmio() )
             hvm_inject_hw_exception(TRAP_gp_fault, 0);
         rc = 1;
@@ -1923,6 +1926,7 @@ int hvm_set_cr0(unsigned long value)
         hvm_funcs.handle_cd(v, value);
 
     hvm_update_cr(v, 0, value);
+    hvm_memory_event_cr0(value, old_value);
 
     if ( (value ^ old_value) & X86_CR0_PG ) {
         if ( !nestedhvm_vmswitch_in_progress(v) && nestedhvm_vcpu_in_guestmode(v) )
@@ -1977,7 +1981,7 @@ int hvm_set_cr4(unsigned long value)
     struct vcpu *v = current;
     unsigned long old_cr;
 
-    if ( value & HVM_CR4_GUEST_RESERVED_BITS(v) )
+    if ( value & HVM_CR4_GUEST_RESERVED_BITS(v, 0) )
     {
         HVM_DBG_LOG(DBG_LEVEL_1,
                     "Guest attempts to set reserved bit in CR4: %lx",
@@ -2885,6 +2889,8 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
 
     switch ( input )
     {
+        unsigned int sub_leaf, _eax, _ebx, _ecx, _edx;
+
     case 0x1:
         /* Fix up VLAPIC details. */
         *ebx &= 0x00FFFFFFu;
@@ -2918,8 +2924,6 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
         *edx = v->vcpu_id * 2;
         break;
     case 0xd:
-    {
-        unsigned int sub_leaf, _eax, _ebx, _ecx, _edx;
         /* EBX value of main leaf 0 depends on enabled xsave features */
         if ( count == 0 && v->arch.xcr0 ) 
         {
@@ -2936,7 +2940,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
             }
         }
         break;
-    }
+
     case 0x80000001:
         /* We expose RDTSCP feature to guest only when
            tsc_mode == TSC_MODE_DEFAULT and host_tsc_is_safe() returns 1 */
@@ -2950,6 +2954,23 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
         if ( !(hvm_pae_enabled(v) || hvm_long_mode_enabled(v)) )
             *edx &= ~cpufeat_mask(X86_FEATURE_PSE36);
         break;
+
+    case 0x80000008:
+        count = cpuid_eax(0x80000008);
+        count = (count >> 16) & 0xff ?: count & 0xff;
+        if ( (*eax & 0xff) > count )
+            *eax = (*eax & ~0xff) | count;
+
+        hvm_cpuid(1, NULL, NULL, NULL, &_edx);
+        count = _edx & (cpufeat_mask(X86_FEATURE_PAE) |
+                        cpufeat_mask(X86_FEATURE_PSE36)) ? 36 : 32;
+        if ( (*eax & 0xff) < count )
+            *eax = (*eax & ~0xff) | count;
+
+        hvm_cpuid(0x80000001, NULL, NULL, NULL, &_edx);
+        *eax = (*eax & ~0xffff00) | (_edx & cpufeat_mask(X86_FEATURE_LM)
+                                     ? 0x3000 : 0x2000);
+        break;
     }
 }
 
@@ -3294,6 +3315,12 @@ static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     case PHYSDEVOP_irq_status_query:
     case PHYSDEVOP_get_free_pirq:
         return do_physdev_op(cmd, arg);
+
+    /* pvh fixme: coming soon */
+    case PHYSDEVOP_pirq_eoi_gmfn_v1:
+    case PHYSDEVOP_pirq_eoi_gmfn_v2:
+        return -ENOSYS;
+
     }
 }
 
@@ -4393,12 +4420,10 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
                 rc = -EINVAL;
                 goto param_fail4;
             } 
-            if ( p2m_is_grant(t) )
+            if ( !p2m_is_ram(t) &&
+                 (!p2m_is_hole(t) || a.hvmmem_type != HVMMEM_mmio_dm) )
             {
                 put_gfn(d, pfn);
-                gdprintk(XENLOG_WARNING,
-                         "type for pfn %#lx changed to grant while "
-                         "we were working?\n", pfn);
                 goto param_fail4;
             }
             else
@@ -4465,6 +4490,15 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
             goto param_fail5;
             
         rc = p2m_set_mem_access(d, a.first_pfn, a.nr, a.hvmmem_access);
+        if ( rc > 0 )
+        {
+            a.first_pfn += a.nr - rc;
+            a.nr = rc;
+            if ( __copy_to_guest(arg, &a, 1) )
+                rc = -EFAULT;
+            else
+                rc = -EAGAIN;
+        }
 
     param_fail5:
         rcu_unlock_domain(d);
@@ -4671,7 +4705,7 @@ static int hvm_memory_event_traps(long p, uint32_t reason,
     if ( (p & HVMPME_MODE_MASK) == HVMPME_mode_sync ) 
     {
         req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;    
-        vcpu_pause_nosync(v);   
+        mem_event_vcpu_pause(v);
     }
 
     req.gfn = value;
diff --git a/xen/arch/x86/hvm/i8254.c b/xen/arch/x86/hvm/i8254.c
index c0d6bc2..f7493b8 100644
--- a/xen/arch/x86/hvm/i8254.c
+++ b/xen/arch/x86/hvm/i8254.c
@@ -447,9 +447,8 @@ void pit_reset(struct domain *d)
     spin_unlock(&pit->lock);
 }
 
-void pit_init(struct vcpu *v, unsigned long cpu_khz)
+void pit_init(struct domain *d, unsigned long cpu_khz)
 {
-    struct domain *d = v->domain;
     PITState *pit = domain_vpit(d);
 
     spin_lock_init(&pit->lock);
diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
index bf6309d..247fb7e 100644
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -178,6 +178,8 @@ int handle_mmio(void)
     struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
     int rc;
 
+    ASSERT(!is_pvh_vcpu(curr));
+
     hvm_emulate_prepare(&ctxt, guest_cpu_user_regs());
 
     rc = hvm_emulate_one(&ctxt);
diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
index 677fbcd..5048c29 100644
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -289,20 +289,18 @@ void hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data)
             struct pirq *info = pirq_info(d, pirq);
 
             /* if it is the first time, allocate the pirq */
-            if (info->arch.hvm.emuirq == IRQ_UNBOUND)
+            if ( !info || info->arch.hvm.emuirq == IRQ_UNBOUND )
             {
                 spin_lock(&d->event_lock);
                 map_domain_emuirq_pirq(d, pirq, IRQ_MSI_EMU);
                 spin_unlock(&d->event_lock);
+                info = pirq_info(d, pirq);
+                if ( !info )
+                    return;
             } else if (info->arch.hvm.emuirq != IRQ_MSI_EMU)
-            {
-                printk("%s: pirq %d does not correspond to an emulated MSI\n", __func__, pirq);
                 return;
-            }
             send_guest_pirq(d, info);
             return;
-        } else {
-            printk("%s: error getting pirq from MSI: pirq = %d\n", __func__, pirq);
         }
     }
 
diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
index 9937f5a..e13647c 100644
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -145,7 +145,7 @@ bool_t is_var_mtrr_overlapped(struct mtrr_state *m)
 
 static int hvm_mtrr_pat_init(void)
 {
-    unsigned int i, j, phys_addr;
+    unsigned int i, j;
 
     memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl));
     for ( i = 0; i < MTRR_NUM_TYPES; i++ )
@@ -172,11 +172,7 @@ static int hvm_mtrr_pat_init(void)
         }
     }
 
-    phys_addr = 36;
-    if ( cpuid_eax(0x80000000) >= 0x80000008 )
-        phys_addr = (uint8_t)cpuid_eax(0x80000008);
-
-    size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
+    size_or_mask = ~((1 << (paddr_bits - PAGE_SHIFT)) - 1);
 
     return 0;
 }
@@ -455,7 +451,7 @@ bool_t mtrr_fix_range_msr_set(struct mtrr_state *m, uint32_t row,
 bool_t mtrr_var_range_msr_set(
     struct domain *d, struct mtrr_state *m, uint32_t msr, uint64_t msr_content)
 {
-    uint32_t index, type, phys_addr, eax, ebx, ecx, edx;
+    uint32_t index, type, phys_addr, eax;
     uint64_t msr_mask;
     uint64_t *var_range_base = (uint64_t*)m->var_ranges;
 
@@ -468,16 +464,21 @@ bool_t mtrr_var_range_msr_set(
                     type == 4 || type == 5 || type == 6)) )
         return 0;
 
-    phys_addr = 36;
-    domain_cpuid(d, 0x80000000, 0, &eax, &ebx, &ecx, &edx);
-    if ( eax >= 0x80000008 )
+    if ( d == current->domain )
     {
-        domain_cpuid(d, 0x80000008, 0, &eax, &ebx, &ecx, &edx);
-        phys_addr = (uint8_t)eax;
+        phys_addr = 36;
+        hvm_cpuid(0x80000000, &eax, NULL, NULL, NULL);
+        if ( eax >= 0x80000008 )
+        {
+            hvm_cpuid(0x80000008, &eax, NULL, NULL, NULL);
+            phys_addr = (uint8_t)eax;
+        }
     }
+    else
+        phys_addr = paddr_bits;
     msr_mask = ~((((uint64_t)1) << phys_addr) - 1);
     msr_mask |= (index & 1) ? 0x7ffUL : 0xf00UL;
-    if ( unlikely(msr_content && (msr_content & msr_mask)) )
+    if ( unlikely(msr_content & msr_mask) )
     {
         HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
                     msr_content);
@@ -689,13 +690,8 @@ uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
 
     *ipat = 0;
 
-    if ( (current->domain != d) &&
-         ((d->vcpu == NULL) || ((v = d->vcpu[0]) == NULL)) )
-        return MTRR_TYPE_WRBACK;
-
-    if ( !is_pvh_vcpu(v) &&
-         !v->domain->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] )
-        return MTRR_TYPE_WRBACK;
+    if ( v->domain != d )
+        v = d->vcpu ? d->vcpu[0] : NULL;
 
     if ( !mfn_valid(mfn_x(mfn)) )
         return MTRR_TYPE_UNCACHABLE;
@@ -703,14 +699,24 @@ uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
     if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) )
         return type;
 
-    if ( !iommu_enabled )
+    if ( !iommu_enabled ||
+         (rangeset_is_empty(d->iomem_caps) &&
+          rangeset_is_empty(d->arch.ioport_caps) &&
+          !has_arch_pdevs(d)) )
     {
+        ASSERT(!direct_mmio ||
+               mfn_x(mfn) == d->arch.hvm_domain.vmx.apic_access_mfn);
         *ipat = 1;
         return MTRR_TYPE_WRBACK;
     }
 
     if ( direct_mmio )
-        return MTRR_TYPE_UNCACHABLE;
+    {
+        if ( mfn_x(mfn) != d->arch.hvm_domain.vmx.apic_access_mfn )
+            return MTRR_TYPE_UNCACHABLE;
+        *ipat = 1;
+        return MTRR_TYPE_WRBACK;
+    }
 
     if ( iommu_snoop )
     {
@@ -718,10 +724,41 @@ uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
         return MTRR_TYPE_WRBACK;
     }
 
-    gmtrr_mtype = is_hvm_vcpu(v) ?
+    gmtrr_mtype = is_hvm_domain(d) && v &&
+                  d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] ?
                   get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)) :
                   MTRR_TYPE_WRBACK;
 
     hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn_x(mfn) << PAGE_SHIFT));
-    return ((gmtrr_mtype <= hmtrr_mtype) ? gmtrr_mtype : hmtrr_mtype);
+
+    /* If both types match we're fine. */
+    if ( likely(gmtrr_mtype == hmtrr_mtype) )
+        return hmtrr_mtype;
+
+    /* If either type is UC, we have to go with that one. */
+    if ( gmtrr_mtype == MTRR_TYPE_UNCACHABLE ||
+         hmtrr_mtype == MTRR_TYPE_UNCACHABLE )
+        return MTRR_TYPE_UNCACHABLE;
+
+    /* If either type is WB, we have to go with the other one. */
+    if ( gmtrr_mtype == MTRR_TYPE_WRBACK )
+        return hmtrr_mtype;
+    if ( hmtrr_mtype == MTRR_TYPE_WRBACK )
+        return gmtrr_mtype;
+
+    /*
+     * At this point we have disagreeing WC, WT, or WP types. The only
+     * combination that can be cleanly resolved is WT:WP. The ones involving
+     * WC need to be converted to UC, both due to the memory ordering
+     * differences and because WC disallows reads to be cached (WT and WP
+     * permit this), while WT and WP require writes to go straight to memory
+     * (WC can buffer them).
+     */
+    if ( (gmtrr_mtype == MTRR_TYPE_WRTHROUGH &&
+          hmtrr_mtype == MTRR_TYPE_WRPROT) ||
+         (gmtrr_mtype == MTRR_TYPE_WRPROT &&
+          hmtrr_mtype == MTRR_TYPE_WRTHROUGH) )
+        return MTRR_TYPE_WRPROT;
+
+    return MTRR_TYPE_UNCACHABLE;
 }
diff --git a/xen/arch/x86/hvm/rtc.c b/xen/arch/x86/hvm/rtc.c
index cdedefe..639b4c5 100644
--- a/xen/arch/x86/hvm/rtc.c
+++ b/xen/arch/x86/hvm/rtc.c
@@ -78,29 +78,44 @@ static void rtc_update_irq(RTCState *s)
     hvm_isa_irq_assert(vrtc_domain(s), RTC_IRQ);
 }
 
-bool_t rtc_periodic_interrupt(void *opaque)
+/* Called by the VPT code after it's injected a PF interrupt for us.
+ * Fix up the register state to reflect what happened. */
+static void rtc_pf_callback(struct vcpu *v, void *opaque)
 {
     RTCState *s = opaque;
-    bool_t ret;
 
     spin_lock(&s->lock);
-    ret = rtc_mode_is(s, no_ack) || !(s->hw.cmos_data[RTC_REG_C] & RTC_IRQF);
-    if ( rtc_mode_is(s, no_ack) || !(s->hw.cmos_data[RTC_REG_C] & RTC_PF) )
-    {
-        s->hw.cmos_data[RTC_REG_C] |= RTC_PF;
-        rtc_update_irq(s);
-    }
-    else if ( ++(s->pt_dead_ticks) >= 10 )
+
+    if ( !rtc_mode_is(s, no_ack)
+         && (s->hw.cmos_data[RTC_REG_C] & RTC_IRQF)
+         && ++(s->pt_dead_ticks) >= 10 )
     {
         /* VM is ignoring its RTC; no point in running the timer */
         destroy_periodic_time(&s->pt);
-        s->pt_code = 0;
+        s->period = 0;
     }
-    if ( !(s->hw.cmos_data[RTC_REG_C] & RTC_IRQF) )
-        ret = 0;
+
+    s->hw.cmos_data[RTC_REG_C] |= RTC_PF|RTC_IRQF;
+
     spin_unlock(&s->lock);
+}
 
-    return ret;
+/* Check whether the REG_C.PF bit should have been set by a tick since
+ * the last time we looked. This is used to track ticks when REG_B.PIE
+ * is clear; when PIE is set, PF ticks are handled by the VPT callbacks.  */
+static void check_for_pf_ticks(RTCState *s)
+{
+    s_time_t now;
+
+    if ( s->period == 0 || (s->hw.cmos_data[RTC_REG_B] & RTC_PIE) )
+        return;
+
+    now = NOW();
+    if ( (now - s->start_time) / s->period
+         != (s->check_ticks_since - s->start_time) / s->period )
+        s->hw.cmos_data[RTC_REG_C] |= RTC_PF;
+
+    s->check_ticks_since = now;
 }
 
 /* Enable/configure/disable the periodic timer based on the RTC_PIE and
@@ -125,24 +140,29 @@ static void rtc_timer_update(RTCState *s)
     case RTC_REF_CLCK_4MHZ:
         if ( period_code != 0 )
         {
-            if ( period_code != s->pt_code )
+            period = 1 << (period_code - 1); /* period in 32 Khz cycles */
+            period = DIV_ROUND(period * 1000000000ULL, 32768); /* in ns */
+            if ( period != s->period )
             {
-                s->pt_code = period_code;
-                period = 1 << (period_code - 1); /* period in 32 Khz cycles */
-                period = DIV_ROUND(period * 1000000000ULL, 32768); /* in ns */
+                s_time_t now = NOW();
+
+                s->period = period;
                 if ( v->domain->arch.hvm_domain.params[HVM_PARAM_VPT_ALIGN] )
                     delta = 0;
                 else
-                    delta = period - ((NOW() - s->start_time) % period);
-                create_periodic_time(v, &s->pt, delta, period,
-                                     RTC_IRQ, NULL, s);
+                    delta = period - ((now - s->start_time) % period);
+                if ( s->hw.cmos_data[RTC_REG_B] & RTC_PIE )
+                    create_periodic_time(v, &s->pt, delta, period,
+                                         RTC_IRQ, rtc_pf_callback, s);
+                else
+                    s->check_ticks_since = now;
             }
             break;
         }
         /* fall through */
     default:
         destroy_periodic_time(&s->pt);
-        s->pt_code = 0;
+        s->period = 0;
         break;
     }
 }
@@ -484,14 +504,19 @@ static int rtc_ioport_write(void *opaque, uint32_t addr, uint32_t data)
             if ( orig & RTC_SET )
                 rtc_set_time(s);
         }
+        check_for_pf_ticks(s);
         s->hw.cmos_data[RTC_REG_B] = data;
         /*
          * If the interrupt is already set when the interrupt becomes
          * enabled, raise an interrupt immediately.
          */
         rtc_update_irq(s);
-        if ( (data & RTC_PIE) && !(orig & RTC_PIE) )
+        if ( (data ^ orig) & RTC_PIE )
+        {
+            destroy_periodic_time(&s->pt);
+            s->period = 0;
             rtc_timer_update(s);
+        }
         if ( (data ^ orig) & RTC_SET )
             check_update_timer(s);
         if ( (data ^ orig) & (RTC_24H | RTC_DM_BINARY | RTC_SET) )
@@ -645,14 +670,14 @@ static uint32_t rtc_ioport_read(RTCState *s, uint32_t addr)
             ret |= RTC_UIP;
         break;
     case RTC_REG_C:
+        check_for_pf_ticks(s);
         ret = s->hw.cmos_data[s->hw.cmos_index];
         s->hw.cmos_data[RTC_REG_C] = 0x00;
-        if ( (ret & RTC_IRQF) && !rtc_mode_is(s, no_ack) )
+        if ( ret & RTC_IRQF )
             hvm_isa_irq_deassert(d, RTC_IRQ);
-        rtc_update_irq(s);
         check_update_timer(s);
         alarm_timer_update(s);
-        rtc_timer_update(s);
+        s->pt_dead_ticks = 0;
         break;
     default:
         ret = s->hw.cmos_data[s->hw.cmos_index];
@@ -748,7 +773,7 @@ void rtc_reset(struct domain *d)
     RTCState *s = domain_vrtc(d);
 
     destroy_periodic_time(&s->pt);
-    s->pt_code = 0;
+    s->period = 0;
     s->pt.source = PTSRC_isa;
 }
 
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 406d394..3f274f0 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -728,7 +728,7 @@ static void svm_set_rdtsc_exiting(struct vcpu *v, bool_t enable)
     general1_intercepts &= ~GENERAL1_INTERCEPT_RDTSC;
     general2_intercepts &= ~GENERAL2_INTERCEPT_RDTSCP;
 
-    if ( enable && !cpu_has_tsc_ratio )
+    if ( enable )
     {
         general1_intercepts |= GENERAL1_INTERCEPT_RDTSC;
         general2_intercepts |= GENERAL2_INTERCEPT_RDTSCP;
@@ -861,6 +861,14 @@ static void svm_ctxt_switch_from(struct vcpu *v)
 {
     int cpu = smp_processor_id();
 
+    /*
+     * Return early if trying to do a context switch without SVM enabled,
+     * this can happen when the hypervisor shuts down with HVM guests
+     * still running.
+     */
+    if ( unlikely((read_efer() & EFER_SVME) == 0) )
+        return;
+
     svm_fpu_leave(v);
 
     svm_save_dr(v);
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 44f33cb..ee1f4d5 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -74,7 +74,7 @@ u64 vmx_ept_vpid_cap __read_mostly;
 static DEFINE_PER_CPU_READ_MOSTLY(struct vmcs_struct *, vmxon_region);
 static DEFINE_PER_CPU(struct vmcs_struct *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
-static DEFINE_PER_CPU(bool_t, vmxon);
+DEFINE_PER_CPU(bool_t, vmxon);
 
 static u32 vmcs_revision_id __read_mostly;
 u64 __read_mostly vmx_basic_msr;
@@ -828,8 +828,12 @@ void virtual_vmcs_enter(void *vvmcs)
 
 void virtual_vmcs_exit(void *vvmcs)
 {
+    struct vmcs_struct *cur = this_cpu(current_vmcs);
+
     __vmpclear(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
-    __vmptrld(virt_to_maddr(this_cpu(current_vmcs)));
+    if ( cur )
+        __vmptrld(virt_to_maddr(cur));
+
 }
 
 u64 virtual_vmcs_vmread(void *vvmcs, u32 vmcs_encoding)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index f6409d6..2cd4c59 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -617,6 +617,14 @@ static void vmx_fpu_leave(struct vcpu *v)
 
 static void vmx_ctxt_switch_from(struct vcpu *v)
 {
+    /*
+     * Return early if trying to do a context switch without VMX enabled,
+     * this can happen when the hypervisor shuts down with HVM guests
+     * still running.
+     */
+    if ( unlikely(!this_cpu(vmxon)) )
+        return;
+
     vmx_fpu_leave(v);
     vmx_save_guest_msrs(v);
     vmx_restore_host_msrs();
@@ -984,6 +992,8 @@ static void vmx_handle_cd(struct vcpu *v, unsigned long value)
 
             vmx_get_guest_pat(v, pat);
             vmx_set_guest_pat(v, uc_pat);
+            vmx_enable_intercept_for_msr(v, MSR_IA32_CR_PAT,
+                                         MSR_TYPE_R | MSR_TYPE_W);
 
             wbinvd();               /* flush possibly polluted cache */
             hvm_asid_flush_vcpu(v); /* invalidate memory type cached in TLB */
@@ -993,6 +1003,9 @@ static void vmx_handle_cd(struct vcpu *v, unsigned long value)
         {
             v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
             vmx_set_guest_pat(v, *pat);
+            if ( !iommu_enabled || iommu_snoop )
+                vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT,
+                                              MSR_TYPE_R | MSR_TYPE_W);
             hvm_asid_flush_vcpu(v); /* no need to flush cache */
         }
     }
@@ -1914,10 +1927,14 @@ static const struct lbr_info *last_branch_msr_get(void)
         case 58: case 62:
         /* Haswell */
         case 60: case 63: case 69: case 70:
+        /* future */
+        case 61: case 78:
             return nh_lbr;
             break;
         /* Atom */
-        case 28:
+        case 28: case 38: case 39: case 53: case 54:
+        /* Silvermont */
+        case 55: case 74: case 77: case 90: case 93:
             return at_lbr;
             break;
         }
@@ -2038,9 +2055,9 @@ static int vmx_alloc_vlapic_mapping(struct domain *d)
     if ( apic_va == NULL )
         return -ENOMEM;
     share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
+    d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
     set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE),
         _mfn(virt_to_mfn(apic_va)));
-    d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
 
     return 0;
 }
@@ -2541,6 +2558,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
     vcpu_nestedhvm(v).nv_vmswitch_in_progress = 0;
     if ( nestedhvm_vcpu_in_guestmode(v) )
     {
+        paging_update_nestedmode(v);
         if ( nvmx_n2_vmexit_handler(regs, exit_reason) )
             goto out;
     }
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index ee26362..f6e8ec0 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -916,6 +916,10 @@ int vmx_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags)
         case 0x3f:
         case 0x45:
         case 0x46:
+
+        /* future: */
+        case 0x3d:
+        case 0x4e:
             ret = core2_vpmu_initialise(v, vpmu_flags);
             if ( !ret )
                 vpmu->arch_vpmu_ops = &core2_vpmu_ops;
diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
index 1961bda..f7af688 100644
--- a/xen/arch/x86/hvm/vpt.c
+++ b/xen/arch/x86/hvm/vpt.c
@@ -231,12 +231,9 @@ int pt_update_irq(struct vcpu *v)
     struct periodic_time *pt, *temp, *earliest_pt;
     uint64_t max_lag;
     int irq, is_lapic;
-    void *pt_priv;
 
- rescan:
     spin_lock(&v->arch.hvm_vcpu.tm_lock);
 
- rescan_locked:
     earliest_pt = NULL;
     max_lag = -1ULL;
     list_for_each_entry_safe ( pt, temp, head, list )
@@ -270,48 +267,11 @@ int pt_update_irq(struct vcpu *v)
     earliest_pt->irq_issued = 1;
     irq = earliest_pt->irq;
     is_lapic = (earliest_pt->source == PTSRC_lapic);
-    pt_priv = earliest_pt->priv;
 
     spin_unlock(&v->arch.hvm_vcpu.tm_lock);
 
     if ( is_lapic )
         vlapic_set_irq(vcpu_vlapic(v), irq, 0);
-    else if ( irq == RTC_IRQ && pt_priv )
-    {
-        if ( !rtc_periodic_interrupt(pt_priv) )
-            irq = -1;
-
-        pt_lock(earliest_pt);
-
-        if ( irq < 0 && earliest_pt->pending_intr_nr )
-        {
-            /*
-             * RTC periodic timer runs without the corresponding interrupt
-             * being enabled - need to mimic enough of pt_intr_post() to keep
-             * things going.
-             */
-            earliest_pt->pending_intr_nr = 0;
-            earliest_pt->irq_issued = 0;
-            set_timer(&earliest_pt->timer, earliest_pt->scheduled);
-        }
-        else if ( irq >= 0 && pt_irq_masked(earliest_pt) )
-        {
-            if ( earliest_pt->on_list )
-            {
-                /* suspend timer emulation */
-                list_del(&earliest_pt->list);
-                earliest_pt->on_list = 0;
-            }
-            irq = -1;
-        }
-
-        /* Avoid dropping the lock if we can. */
-        if ( irq < 0 && v == earliest_pt->vcpu )
-            goto rescan_locked;
-        pt_unlock(earliest_pt);
-        if ( irq < 0 )
-            goto rescan;
-    }
     else
     {
         hvm_isa_irq_deassert(v->domain, irq);
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index bd72138..a372e0b 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -266,10 +266,10 @@ void vcpu_restore_fpu_lazy(struct vcpu *v)
  * On each context switch, save the necessary FPU info of VCPU being switch 
  * out. It dispatches saving operation based on CPU's capability.
  */
-void vcpu_save_fpu(struct vcpu *v)
+static bool_t _vcpu_save_fpu(struct vcpu *v)
 {
     if ( !v->fpu_dirtied && !v->arch.nonlazy_xstate_used )
-        return;
+        return 0;
 
     ASSERT(!is_idle_vcpu(v));
 
@@ -284,9 +284,22 @@ void vcpu_save_fpu(struct vcpu *v)
         fpu_fsave(v);
 
     v->fpu_dirtied = 0;
+
+    return 1;
+}
+
+void vcpu_save_fpu(struct vcpu *v)
+{
+    _vcpu_save_fpu(v);
     stts();
 }
 
+void save_fpu_enable(void)
+{
+    if ( !_vcpu_save_fpu(current) )
+        clts();
+}
+
 /* Initialize FPU's context save area */
 int vcpu_init_fpu(struct vcpu *v)
 {
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index db70077..88444be 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1068,13 +1068,13 @@ bool_t cpu_has_pending_apic_eoi(void)
 
 static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
 {
-    if ( !is_hvm_domain(d) && d->arch.pv_domain.pirq_eoi_map )
+    if ( is_pv_domain(d) && d->arch.pv_domain.pirq_eoi_map )
         set_bit(irq, d->arch.pv_domain.pirq_eoi_map);
 }
 
 static inline void clear_pirq_eoi(struct domain *d, unsigned int irq)
 {
-    if ( !is_hvm_domain(d) && d->arch.pv_domain.pirq_eoi_map )
+    if ( is_pv_domain(d) && d->arch.pv_domain.pirq_eoi_map )
         clear_bit(irq, d->arch.pv_domain.pirq_eoi_map);
 }
 
diff --git a/xen/arch/x86/microcode_amd.c b/xen/arch/x86/microcode_amd.c
index 3014245..32d7fa1 100644
--- a/xen/arch/x86/microcode_amd.c
+++ b/xen/arch/x86/microcode_amd.c
@@ -164,7 +164,7 @@ static int apply_microcode(int cpu)
     if ( rev != hdr->patch_id )
     {
         printk(KERN_ERR "microcode: CPU%d update from revision "
-               "%#x to %#x failed\n", cpu, hdr->patch_id, rev);
+               "%#x to %#x failed\n", cpu, rev, hdr->patch_id);
         return -EIO;
     }
 
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 172c68c..fdc5ed3 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2934,7 +2934,7 @@ long do_mmuext_op(
 
     for ( i = 0; i < count; i++ )
     {
-        if ( curr->arch.old_guest_table || hypercall_preempt_check() )
+        if ( curr->arch.old_guest_table || (i && hypercall_preempt_check()) )
         {
             rc = -EAGAIN;
             break;
@@ -3481,7 +3481,7 @@ long do_mmu_update(
 
     for ( i = 0; i < count; i++ )
     {
-        if ( curr->arch.old_guest_table || hypercall_preempt_check() )
+        if ( curr->arch.old_guest_table || (i && hypercall_preempt_check()) )
         {
             rc = -EAGAIN;
             break;
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index 5f75636..71227ef 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -326,7 +326,7 @@ hap_set_allocation(struct domain *d, unsigned int pages, int *preempted)
     else
         pages -= d->arch.paging.hap.p2m_pages;
 
-    while ( d->arch.paging.hap.total_pages != pages )
+    for ( ; ; )
     {
         if ( d->arch.paging.hap.total_pages < pages )
         {
@@ -355,6 +355,8 @@ hap_set_allocation(struct domain *d, unsigned int pages, int *preempted)
             d->arch.paging.hap.total_pages--;
             free_domheap_page(pg);
         }
+        else
+            break;
 
         /* Check to see if we need to yield and try again */
         if ( preempted && hypercall_preempt_check() )
@@ -709,9 +711,8 @@ hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
     }
 
     safe_write_pte(p, new);
-    if ( (old_flags & _PAGE_PRESENT)
-         && (level == 1 || (level == 2 && (old_flags & _PAGE_PSE))) )
-             flush_tlb_mask(d->domain_dirty_cpumask);
+    if ( old_flags & _PAGE_PRESENT )
+        flush_tlb_mask(d->domain_dirty_cpumask);
 
     paging_unlock(d);
 
diff --git a/xen/arch/x86/mm/hap/nested_hap.c b/xen/arch/x86/mm/hap/nested_hap.c
index 38e2327..f4c3f34 100644
--- a/xen/arch/x86/mm/hap/nested_hap.c
+++ b/xen/arch/x86/mm/hap/nested_hap.c
@@ -133,7 +133,7 @@ nestedhap_fix_p2m(struct vcpu *v, struct p2m_domain *p2m,
         gdprintk(XENLOG_ERR,
 		"failed to set entry for %#"PRIx64" -> %#"PRIx64"\n",
 		L2_gpa, L0_gpa);
-        BUG();
+        domain_crash(p2m->domain);
     }
 }
 
diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c
index d00e404..909d7a4 100644
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -655,6 +655,38 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
     return rc;
 }
 
+void mem_event_vcpu_pause(struct vcpu *v)
+{
+    ASSERT(v == current);
+
+    atomic_inc(&v->mem_event_pause_count);
+    vcpu_pause_nosync(v);
+}
+
+void mem_event_vcpu_unpause(struct vcpu *v)
+{
+    int old, new, prev = v->mem_event_pause_count.counter;
+
+    /* All unpause requests as a result of toolstack responses.  Prevent
+     * underflow of the vcpu pause count. */
+    do
+    {
+        old = prev;
+        new = old - 1;
+
+        if ( new < 0 )
+        {
+            printk(XENLOG_G_WARNING
+                   "d%d:v%d mem_event: Too many unpause attempts\n",
+                   v->domain->domain_id, v->vcpu_id);
+            return;
+        }
+
+        prev = cmpxchg(&v->mem_event_pause_count.counter, old, new);
+    } while ( prev != old );
+
+    vcpu_unpause(v);
+}
 
 /*
  * Local variables:
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 4a5d9e8..11df871 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -568,7 +568,7 @@ int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn,
     if ( v->domain == d )
     {
         req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
-        vcpu_pause_nosync(v);
+        mem_event_vcpu_pause(v);
     }
 
     req.p2mt = p2m_ram_shared;
@@ -596,11 +596,20 @@ int mem_sharing_sharing_resume(struct domain *d)
     /* Get all requests off the ring */
     while ( mem_event_get_response(d, &d->mem_event->share, &rsp) )
     {
+        struct vcpu *v;
+
         if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
             continue;
+
+        /* Validate the vcpu_id in the response. */
+        if ( (rsp.vcpu_id >= d->max_vcpus) || !d->vcpu[rsp.vcpu_id] )
+            continue;
+
+        v = d->vcpu[rsp.vcpu_id];
+
         /* Unpause domain/vcpu */
         if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+            mem_event_vcpu_unpause(v);
     }
 
     return 0;
@@ -1268,8 +1277,8 @@ int relinquish_shared_pages(struct domain *d)
         return 0;
 
     p2m_lock(p2m);
-    for (gfn = p2m->next_shared_gfn_to_relinquish; 
-         gfn < p2m->max_mapped_pfn; gfn++ )
+    for ( gfn = p2m->next_shared_gfn_to_relinquish;
+          gfn <= p2m->max_mapped_pfn; gfn++ )
     {
         p2m_access_t a;
         p2m_type_t t;
diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
index 81645c4..d14565d 100644
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -242,7 +242,8 @@ p2m_pod_set_cache_target(struct p2m_domain *p2m, unsigned long pod_target, int p
 
         p2m_pod_cache_add(p2m, page, order);
 
-        if ( hypercall_preempt_check() && preemptible )
+        if ( preemptible && pod_target != p2m->pod.count &&
+             hypercall_preempt_check() )
         {
             ret = -EAGAIN;
             goto out;
@@ -286,7 +287,8 @@ p2m_pod_set_cache_target(struct p2m_domain *p2m, unsigned long pod_target, int p
 
             put_page(page+i);
 
-            if ( hypercall_preempt_check() && preemptible )
+            if ( preemptible && pod_target != p2m->pod.count &&
+                 hypercall_preempt_check() )
             {
                 ret = -EAGAIN;
                 goto out;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 8f380ed..246ddd7 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1094,7 +1094,7 @@ void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
     /* Pause domain if request came from guest and gfn has paging type */
     if ( p2m_is_paging(p2mt) && v->domain == d )
     {
-        vcpu_pause_nosync(v);
+        mem_event_vcpu_pause(v);
         req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
     }
     /* No need to inform pager if the gfn is not in the page-out path */
@@ -1228,8 +1228,17 @@ void p2m_mem_paging_resume(struct domain *d)
     /* Pull all responses off the ring */
     while( mem_event_get_response(d, &d->mem_event->paging, &rsp) )
     {
+        struct vcpu *v;
+
         if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
             continue;
+
+        /* Validate the vcpu_id in the response. */
+        if ( (rsp.vcpu_id >= d->max_vcpus) || !d->vcpu[rsp.vcpu_id] )
+            continue;
+
+        v = d->vcpu[rsp.vcpu_id];
+
         /* Fix p2m entry if the page was not dropped */
         if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
         {
@@ -1248,7 +1257,7 @@ void p2m_mem_paging_resume(struct domain *d)
         }
         /* Unpause domain */
         if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+            mem_event_vcpu_unpause(v);
     }
 }
 
@@ -1343,7 +1352,7 @@ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla,
 
     /* Pause the current VCPU */
     if ( p2ma != p2m_access_n2rwx )
-        vcpu_pause_nosync(v);
+        mem_event_vcpu_pause(v);
 
     /* VCPU may be paused, return whether we promoted automatically */
     return (p2ma == p2m_access_n2rwx);
@@ -1356,25 +1365,33 @@ void p2m_mem_access_resume(struct domain *d)
     /* Pull all responses off the ring */
     while( mem_event_get_response(d, &d->mem_event->access, &rsp) )
     {
+        struct vcpu *v;
+
         if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
             continue;
+
+        /* Validate the vcpu_id in the response. */
+        if ( (rsp.vcpu_id >= d->max_vcpus) || !d->vcpu[rsp.vcpu_id] )
+            continue;
+
+        v = d->vcpu[rsp.vcpu_id];
+
         /* Unpause domain */
         if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+            mem_event_vcpu_unpause(v);
     }
 }
 
 /* Set access type for a region of pfns.
  * If start_pfn == -1ul, sets the default access type */
-int p2m_set_mem_access(struct domain *d, unsigned long start_pfn, 
-                       uint32_t nr, hvmmem_access_t access) 
+long p2m_set_mem_access(struct domain *d, unsigned long pfn, uint32_t nr,
+                        hvmmem_access_t access)
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
-    unsigned long pfn;
     p2m_access_t a, _a;
     p2m_type_t t;
     mfn_t mfn;
-    int rc = 0;
+    long rc;
 
     /* N.B. _not_ static: initializer depends on p2m->default_access */
     p2m_access_t memaccess[] = {
@@ -1397,14 +1414,17 @@ int p2m_set_mem_access(struct domain *d, unsigned long start_pfn,
     a = memaccess[access];
 
     /* If request to set default access */
-    if ( start_pfn == ~0ull ) 
+    if ( pfn == ~0ul )
     {
         p2m->default_access = a;
         return 0;
     }
 
+    if ( !nr )
+        return 0;
+
     p2m_lock(p2m);
-    for ( pfn = start_pfn; pfn < start_pfn + nr; pfn++ )
+    for ( ; ; ++pfn )
     {
         mfn = p2m->get_entry(p2m, pfn, &t, &_a, 0, NULL);
         if ( p2m->set_entry(p2m, pfn, mfn, PAGE_ORDER_4K, t, a) == 0 )
@@ -1412,6 +1432,13 @@ int p2m_set_mem_access(struct domain *d, unsigned long start_pfn,
             rc = -ENOMEM;
             break;
         }
+
+        /* Check for continuation if it's not the last interation. */
+        if ( !--nr || hypercall_preempt_check() )
+        {
+            rc = nr;
+            break;
+        }
     }
     p2m_unlock(p2m);
     return rc;
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 11c6b62..be095f6 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -1674,7 +1674,7 @@ static unsigned int sh_set_allocation(struct domain *d,
     SHADOW_PRINTK("current %i target %i\n", 
                    d->arch.paging.shadow.total_pages, pages);
 
-    while ( d->arch.paging.shadow.total_pages != pages ) 
+    for ( ; ; )
     {
         if ( d->arch.paging.shadow.total_pages < pages ) 
         {
@@ -1709,6 +1709,8 @@ static unsigned int sh_set_allocation(struct domain *d,
             d->arch.paging.shadow.total_pages--;
             free_domheap_page(sp);
         }
+        else
+            break;
 
         /* Check to see if we need to yield and try again */
         if ( preempted && hypercall_preempt_check() )
@@ -3487,9 +3489,7 @@ int shadow_track_dirty_vram(struct domain *d,
     struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
-    if (end_pfn < begin_pfn
-            || begin_pfn > p2m->max_mapped_pfn
-            || end_pfn >= p2m->max_mapped_pfn)
+    if ( end_pfn < begin_pfn || end_pfn > p2m->max_mapped_pfn + 1 )
         return -EINVAL;
 
     /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index 1aaceeb..61d6dd0 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -636,7 +636,7 @@ static u64 read_pci_mem_bar(u16 seg, u8 bus, u8 slot, u8 func, u8 bir, int vf)
             return 0;
         base = pos + PCI_SRIOV_BAR;
         vf -= PCI_BDF(bus, slot, func) + offset;
-        if ( vf < 0 || (vf && vf % stride) )
+        if ( vf < 0 )
             return 0;
         if ( stride )
         {
diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
index c67a9c3..7aaab8a 100644
--- a/xen/arch/x86/nmi.c
+++ b/xen/arch/x86/nmi.c
@@ -118,11 +118,12 @@ int __init check_nmi_watchdog (void)
 {
     static unsigned int __initdata prev_nmi_count[NR_CPUS];
     int cpu;
-    
+    bool_t ok = 1;
+
     if ( !nmi_watchdog )
         return 0;
 
-    printk("Testing NMI watchdog --- ");
+    printk("Testing NMI watchdog on all CPUs:");
 
     for_each_online_cpu ( cpu )
         prev_nmi_count[cpu] = nmi_count(cpu);
@@ -137,12 +138,13 @@ int __init check_nmi_watchdog (void)
     for_each_online_cpu ( cpu )
     {
         if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 )
-            printk("CPU#%d stuck. ", cpu);
-        else
-            printk("CPU#%d okay. ", cpu);
+        {
+            printk(" %d", cpu);
+            ok = 0;
+        }
     }
 
-    printk("\n");
+    printk(" %s\n", ok ? "ok" : "stuck");
 
     /*
      * Now that we know it works we can reduce NMI frequency to
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index 6e31e1f..f80d661 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -37,6 +37,7 @@
 #include <asm/hpet.h>
 #include <io_ports.h>
 #include <asm/setup.h> /* for early_time_init */
+#include <asm/hvm/svm/svm.h> /* for cpu_has_tsc_ratio */
 #include <public/arch-x86/cpuid.h>
 
 /* opt_clocksource: Force clocksource to one of: pit, hpet, acpi. */
@@ -1632,6 +1633,7 @@ int dom0_pit_access(struct ioreq *ioreq)
             outb(ioreq->data, PIT_MODE);
             return 1;
         }
+        break;
 
     case 0x61:
         if ( ioreq->dir == IOREQ_READ )
@@ -1883,10 +1885,16 @@ void tsc_set_info(struct domain *d,
         d->arch.vtsc_offset = get_s_time() - elapsed_nsec;
         d->arch.tsc_khz = gtsc_khz ? gtsc_khz : cpu_khz;
         set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000 );
-        /* use native TSC if initial host has safe TSC, has not migrated
-         * yet and tsc_khz == cpu_khz */
-        if ( host_tsc_is_safe() && incarnation == 0 &&
-                d->arch.tsc_khz == cpu_khz )
+        /*
+         * Use native TSC if the host has safe TSC and:
+         *  HVM/PVH: host and guest frequencies are the same (either
+         *           "naturally" or via TSC scaling)
+         *  PV: guest has not migrated yet (and thus arch.tsc_khz == cpu_khz)
+         */
+        if ( host_tsc_is_safe() &&
+             ((has_hvm_container_domain(d) &&
+               (d->arch.tsc_khz == cpu_khz || cpu_has_tsc_ratio)) ||
+              incarnation == 0) )
             d->arch.vtsc = 0;
         else 
             d->arch.ns_to_vtsc = scale_reciprocal(d->arch.vtsc_to_ns);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 0bd43b9..4d27e12 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -3595,13 +3595,6 @@ long do_set_trap_table(XEN_GUEST_HANDLE_PARAM(const_trap_info_t) traps)
 
     for ( ; ; )
     {
-        if ( hypercall_preempt_check() )
-        {
-            rc = hypercall_create_continuation(
-                __HYPERVISOR_set_trap_table, "h", traps);
-            break;
-        }
-
         if ( copy_from_guest(&cur, traps, 1) )
         {
             rc = -EFAULT;
@@ -3622,6 +3615,13 @@ long do_set_trap_table(XEN_GUEST_HANDLE_PARAM(const_trap_info_t) traps)
             init_int80_direct_trap(curr);
 
         guest_handle_add_offset(traps, 1);
+
+        if ( hypercall_preempt_check() )
+        {
+            rc = hypercall_create_continuation(
+                __HYPERVISOR_set_trap_table, "h", traps);
+            break;
+        }
     }
 
     return rc;
diff --git a/xen/arch/x86/x86_64/compat/traps.c b/xen/arch/x86/x86_64/compat/traps.c
index 21a82b9..5f0ea0a 100644
--- a/xen/arch/x86/x86_64/compat/traps.c
+++ b/xen/arch/x86/x86_64/compat/traps.c
@@ -329,13 +329,6 @@ int compat_set_trap_table(XEN_GUEST_HANDLE(trap_info_compat_t) traps)
 
     for ( ; ; )
     {
-        if ( hypercall_preempt_check() )
-        {
-            rc = hypercall_create_continuation(
-                __HYPERVISOR_set_trap_table, "h", traps);
-            break;
-        }
-
         if ( copy_from_guest(&cur, traps, 1) )
         {
             rc = -EFAULT;
@@ -353,6 +346,13 @@ int compat_set_trap_table(XEN_GUEST_HANDLE(trap_info_compat_t) traps)
             init_int80_direct_trap(current);
 
         guest_handle_add_offset(traps, 1);
+
+        if ( hypercall_preempt_check() )
+        {
+            rc = hypercall_create_continuation(
+                __HYPERVISOR_set_trap_table, "h", traps);
+            break;
+        }
     }
 
     return rc;
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index e833cdf..50d8965 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -720,29 +720,26 @@ do{ uint8_t stub[] = { _bytes, 0xc3 };                                  \
     put_fpu(&fic);                                                      \
 } while (0)
 
-static unsigned long __get_rep_prefix(
-    struct cpu_user_regs *int_regs,
-    struct cpu_user_regs *ext_regs,
+static unsigned long _get_rep_prefix(
+    const struct cpu_user_regs *int_regs,
     int ad_bytes)
 {
-    unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx :
-                         (ad_bytes == 4) ? (uint32_t)int_regs->ecx :
-                         int_regs->ecx);
-
-    /* Skip the instruction if no repetitions are required. */
-    if ( ecx == 0 )
-        ext_regs->eip = int_regs->eip;
-
-    return ecx;
+    return (ad_bytes == 2) ? (uint16_t)int_regs->ecx :
+           (ad_bytes == 4) ? (uint32_t)int_regs->ecx :
+           int_regs->ecx;
 }
 
 #define get_rep_prefix() ({                                             \
     unsigned long max_reps = 1;                                         \
     if ( rep_prefix() )                                                 \
-        max_reps = __get_rep_prefix(&_regs, ctxt->regs, ad_bytes);      \
+        max_reps = _get_rep_prefix(&_regs, ad_bytes);                   \
     if ( max_reps == 0 )                                                \
-        goto done;                                                      \
-   max_reps;                                                            \
+    {                                                                   \
+        /* Skip the instruction if no repetitions are required. */      \
+        dst.type = OP_NONE;                                             \
+        goto writeback;                                                 \
+    }                                                                   \
+    max_reps;                                                           \
 })
 
 static void __put_rep_prefix(
@@ -3921,7 +3918,8 @@ x86_emulate(
         if ( !rc && (b & 1) && (ea.type == OP_MEM) )
             rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,
                             ea.bytes, ctxt);
-        goto done;
+        dst.type = OP_NONE;
+        break;
     }
 
     case 0x20: /* mov cr,reg */
@@ -4188,7 +4186,8 @@ x86_emulate(
         if ( !rc && (b != 0x6f) && (ea.type == OP_MEM) )
             rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,
                             ea.bytes, ctxt);
-        goto done;
+        dst.type = OP_NONE;
+        break;
     }
 
     case 0x80 ... 0x8f: /* jcc (near) */ {
diff --git a/xen/common/compat/multicall.c b/xen/common/compat/multicall.c
index 95c047a..2af8aef 100644
--- a/xen/common/compat/multicall.c
+++ b/xen/common/compat/multicall.c
@@ -29,7 +29,7 @@ DEFINE_XEN_GUEST_HANDLE(multicall_entry_compat_t);
 
 static void __trace_multicall_call(multicall_entry_t *call)
 {
-    unsigned long args[6];
+    xen_ulong_t args[6];
     int i;
 
     for ( i = 0; i < ARRAY_SIZE(args); i++ )
diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c
index 933de78..e46e930 100644
--- a/xen/common/cpupool.c
+++ b/xen/common/cpupool.c
@@ -471,12 +471,24 @@ static void cpupool_cpu_add(unsigned int cpu)
  */
 static int cpupool_cpu_remove(unsigned int cpu)
 {
-    int ret = 0;
+    int ret = -EBUSY;
+    struct cpupool **c;
 
     spin_lock(&cpupool_lock);
-    if ( !cpumask_test_cpu(cpu, cpupool0->cpu_valid))
-        ret = -EBUSY;
+    if ( cpumask_test_cpu(cpu, cpupool0->cpu_valid) )
+        ret = 0;
     else
+    {
+        for_each_cpupool(c)
+        {
+            if ( cpumask_test_cpu(cpu, (*c)->cpu_suspended ) )
+            {
+                ret = 0;
+                break;
+            }
+        }
+    }
+    if ( !ret )
         cpumask_set_cpu(cpu, &cpupool_locked_cpus);
     spin_unlock(&cpupool_lock);
 
diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c
index 55716a8..2ebacb2 100644
--- a/xen/common/device_tree.c
+++ b/xen/common/device_tree.c
@@ -350,9 +350,11 @@ static void __init process_multiboot_node(const void *fdt, int node,
     struct dt_mb_module *mod;
     int len;
 
-    if ( fdt_node_check_compatible(fdt, node, "xen,linux-zimage") == 0 )
+    if ( fdt_node_check_compatible(fdt, node, "xen,linux-zimage") == 0 ||
+         fdt_node_check_compatible(fdt, node, "multiboot,kernel") == 0 )
         nr = MOD_KERNEL;
-    else if ( fdt_node_check_compatible(fdt, node, "xen,linux-initrd") == 0)
+    else if ( fdt_node_check_compatible(fdt, node, "xen,linux-initrd") == 0 ||
+              fdt_node_check_compatible(fdt, node, "multiboot,ramdisk") == 0 )
         nr = MOD_INITRD;
     else
         early_panic("%s not a known xen multiboot type\n", name);
@@ -443,7 +445,8 @@ static int __init early_scan_node(const void *fdt,
 {
     if ( device_tree_node_matches(fdt, node, "memory") )
         process_memory_node(fdt, node, name, address_cells, size_cells);
-    else if ( device_tree_node_compatible(fdt, node, "xen,multiboot-module" ) )
+    else if ( device_tree_node_compatible(fdt, node, "xen,multiboot-module" ) ||
+              device_tree_node_compatible(fdt, node, "multiboot,module" ))
         process_multiboot_node(fdt, node, name, address_cells, size_cells);
     else if ( depth == 1 && device_tree_node_matches(fdt, node, "chosen") )
         process_chosen_node(fdt, node, name, address_cells, size_cells);
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 2636fc9..1308193 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -264,7 +264,7 @@ struct domain *domain_create(
         if ( (err = xsm_domain_create(XSM_HOOK, d, ssidref)) != 0 )
             goto fail;
 
-        d->is_paused_by_controller = 1;
+        d->controller_pause_count = 1;
         atomic_inc(&d->pause_count);
 
         if ( domid )
@@ -539,6 +539,8 @@ int domain_kill(struct domain *d)
             BUG_ON(rc != -EAGAIN);
             break;
         }
+        if ( sched_move_domain(d, cpupool0) )
+            return -EAGAIN;
         for_each_vcpu ( d, v )
             unmap_vcpu_info(v);
         d->is_dying = DOMDYING_dead;
@@ -678,18 +680,13 @@ void vcpu_end_shutdown_deferral(struct vcpu *v)
 #ifdef HAS_GDBSX
 void domain_pause_for_debugger(void)
 {
-    struct domain *d = current->domain;
-    struct vcpu *v;
+    struct vcpu *curr = current;
+    struct domain *d = curr->domain;
 
-    atomic_inc(&d->pause_count);
-    if ( test_and_set_bool(d->is_paused_by_controller) )
-        domain_unpause(d); /* race-free atomic_dec(&d->pause_count) */
-
-    for_each_vcpu ( d, v )
-        vcpu_sleep_nosync(v);
+    domain_pause_by_systemcontroller_nosync(d);
 
     /* if gdbsx active, we just need to pause the domain */
-    if (current->arch.gdbsx_vcpu_event == 0)
+    if ( curr->arch.gdbsx_vcpu_event == 0 )
         send_global_virq(VIRQ_DEBUGGER);
 }
 #endif
@@ -721,8 +718,6 @@ static void complete_domain_destroy(struct rcu_head *head)
 
     sched_destroy_domain(d);
 
-    cpupool_rm_domain(d);
-
     /* Free page used by xen oprofile buffer. */
 #ifdef CONFIG_XENOPROF
     free_xenoprof_pages(d);
@@ -770,6 +765,8 @@ void domain_destroy(struct domain *d)
     if ( _atomic_read(old) != 0 )
         return;
 
+    cpupool_rm_domain(d);
+
     /* Delete from task list and task hashtable. */
     TRACE_1D(TRC_SCHED_DOM_REM, d->domain_id);
     spin_lock(&domlist_update_lock);
@@ -837,17 +834,49 @@ void domain_unpause(struct domain *d)
             vcpu_wake(v);
 }
 
-void domain_pause_by_systemcontroller(struct domain *d)
+int __domain_pause_by_systemcontroller(struct domain *d,
+                                       void (*pause_fn)(struct domain *d))
 {
-    domain_pause(d);
-    if ( test_and_set_bool(d->is_paused_by_controller) )
-        domain_unpause(d);
+    int old, new, prev = d->controller_pause_count;
+
+    do
+    {
+        old = prev;
+        new = old + 1;
+
+        /*
+         * Limit the toolstack pause count to an arbitrary 255 to prevent the
+         * toolstack overflowing d->pause_count with many repeated hypercalls.
+         */
+        if ( new > 255 )
+            return -EUSERS;
+
+        prev = cmpxchg(&d->controller_pause_count, old, new);
+    } while ( prev != old );
+
+    pause_fn(d);
+
+    return 0;
 }
 
-void domain_unpause_by_systemcontroller(struct domain *d)
+int domain_unpause_by_systemcontroller(struct domain *d)
 {
-    if ( test_and_clear_bool(d->is_paused_by_controller) )
-        domain_unpause(d);
+    int old, new, prev = d->controller_pause_count;
+
+    do
+    {
+        old = prev;
+        new = old - 1;
+
+        if ( new < 0 )
+            return -EINVAL;
+
+        prev = cmpxchg(&d->controller_pause_count, old, new);
+    } while ( prev != old );
+
+    domain_unpause(d);
+
+    return 0;
 }
 
 int vcpu_reset(struct vcpu *v)
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 7cf610a..060af1b 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -181,7 +181,7 @@ void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
     info->flags = (info->nr_online_vcpus ? flags : 0) |
         ((d->is_dying == DOMDYING_dead) ? XEN_DOMINF_dying    : 0) |
         (d->is_shut_down                ? XEN_DOMINF_shutdown : 0) |
-        (d->is_paused_by_controller     ? XEN_DOMINF_paused   : 0) |
+        (d->controller_pause_count > 0  ? XEN_DOMINF_paused   : 0) |
         (d->debugger_attached           ? XEN_DOMINF_debugged : 0) |
         d->shutdown_code << XEN_DOMINF_shutdownshift;
 
@@ -384,22 +384,14 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
     break;
 
     case XEN_DOMCTL_pausedomain:
-    {
         ret = -EINVAL;
         if ( d != current->domain )
-        {
-            domain_pause_by_systemcontroller(d);
-            ret = 0;
-        }
-    }
-    break;
+            ret = domain_pause_by_systemcontroller(d);
+        break;
 
     case XEN_DOMCTL_unpausedomain:
-    {
-        domain_unpause_by_systemcontroller(d);
-        ret = 0;
-    }
-    break;
+        ret = domain_unpause_by_systemcontroller(d);
+        break;
 
     case XEN_DOMCTL_resumedomain:
     {
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index db952af..6fd4a2c 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -275,12 +275,12 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
         goto out;
 
     lchn->u.interdomain.remote_dom  = rd;
-    lchn->u.interdomain.remote_port = (u16)rport;
+    lchn->u.interdomain.remote_port = rport;
     lchn->state                     = ECS_INTERDOMAIN;
     evtchn_port_init(ld, lchn);
     
     rchn->u.interdomain.remote_dom  = ld;
-    rchn->u.interdomain.remote_port = (u16)lport;
+    rchn->u.interdomain.remote_port = lport;
     rchn->state                     = ECS_INTERDOMAIN;
 
     /*
diff --git a/xen/common/kexec.c b/xen/common/kexec.c
index 481b0c2..23d964e 100644
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -265,6 +265,8 @@ static int noinline one_cpu_only(void)
     }
 
     set_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags);
+    printk("Executing kexec image on cpu%u\n", cpu);
+
     return 0;
 }
 
@@ -340,8 +342,6 @@ void kexec_crash(void)
     if ( !test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) )
         return;
 
-    printk("Executing crash image\n");
-
     kexecing = TRUE;
 
     if ( kexec_common_shutdown() != 0 )
diff --git a/xen/common/kimage.c b/xen/common/kimage.c
index 91943f1..9b79a5e 100644
--- a/xen/common/kimage.c
+++ b/xen/common/kimage.c
@@ -182,6 +182,7 @@ static int do_kimage_alloc(struct kexec_image **rimage, paddr_t entry,
         goto out;
 
     /* Add an empty indirection page. */
+    result = -ENOMEM;
     image->entry_page = kimage_alloc_control_page(image, 0);
     if ( !image->entry_page )
         goto out;
diff --git a/xen/common/lz4/decompress.c b/xen/common/lz4/decompress.c
index 40b3381..5cf8f37 100644
--- a/xen/common/lz4/decompress.c
+++ b/xen/common/lz4/decompress.c
@@ -84,6 +84,8 @@ static int INIT lz4_uncompress(const unsigned char *source, unsigned char *dest,
 			ip += length;
 			break; /* EOF */
 		}
+		if (unlikely((unsigned long)cpy < (unsigned long)op))
+			goto _output_error;
 		LZ4_WILDCOPY(ip, op, cpy);
 		ip -= (op - cpy);
 		op = cpy;
@@ -142,6 +144,8 @@ static int INIT lz4_uncompress(const unsigned char *source, unsigned char *dest,
 				goto _output_error;
 			continue;
 		}
+		if (unlikely((unsigned long)cpy < (unsigned long)op))
+			goto _output_error;
 		LZ4_SECURECOPY(ref, op, cpy);
 		op = cpy; /* correction */
 	}
@@ -207,6 +211,8 @@ static int lz4_uncompress_unknownoutputsize(const unsigned char *source,
 			op += length;
 			break;/* Necessarily EOF, due to parsing restrictions */
 		}
+		if (unlikely((unsigned long)cpy < (unsigned long)op))
+			goto _output_error;
 		LZ4_WILDCOPY(ip, op, cpy);
 		ip -= (op - cpy);
 		op = cpy;
@@ -270,6 +276,8 @@ static int lz4_uncompress_unknownoutputsize(const unsigned char *source,
 				goto _output_error;
 			continue;
 		}
+		if (unlikely((unsigned long)cpy < (unsigned long)op))
+			goto _output_error;
 		LZ4_SECURECOPY(ref, op, cpy);
 		op = cpy; /* correction */
 	}
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 5a0efd5..4d6ffee 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -63,7 +63,7 @@ static void increase_reservation(struct memop_args *a)
 
     for ( i = a->nr_done; i < a->nr_extents; i++ )
     {
-        if ( hypercall_preempt_check() )
+        if ( i != a->nr_done && hypercall_preempt_check() )
         {
             a->preempted = 1;
             goto out;
@@ -109,7 +109,7 @@ static void populate_physmap(struct memop_args *a)
 
     for ( i = a->nr_done; i < a->nr_extents; i++ )
     {
-        if ( hypercall_preempt_check() )
+        if ( i != a->nr_done && hypercall_preempt_check() )
         {
             a->preempted = 1;
             goto out;
@@ -268,7 +268,7 @@ static void decrease_reservation(struct memop_args *a)
 
     for ( i = a->nr_done; i < a->nr_extents; i++ )
     {
-        if ( hypercall_preempt_check() )
+        if ( i != a->nr_done && hypercall_preempt_check() )
         {
             a->preempted = 1;
             goto out;
@@ -398,7 +398,8 @@ static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
           i < (exch.in.nr_extents >> in_chunk_order);
           i++ )
     {
-        if ( hypercall_preempt_check() )
+        if ( i != (exch.nr_exchanged >> in_chunk_order) &&
+             hypercall_preempt_check() )
         {
             exch.nr_exchanged = i << in_chunk_order;
             rcu_unlock_domain(d);
diff --git a/xen/common/multicall.c b/xen/common/multicall.c
index 2afba98..fa9d910 100644
--- a/xen/common/multicall.c
+++ b/xen/common/multicall.c
@@ -35,10 +35,10 @@ static void trace_multicall_call(multicall_entry_t *call)
 
 ret_t
 do_multicall(
-    XEN_GUEST_HANDLE_PARAM(multicall_entry_t) call_list, unsigned int nr_calls)
+    XEN_GUEST_HANDLE_PARAM(multicall_entry_t) call_list, uint32_t nr_calls)
 {
     struct mc_state *mcs = &current->mc_state;
-    unsigned int     i;
+    uint32_t         i;
     int              rc = 0;
 
     if ( unlikely(__test_and_set_bit(_MCSF_in_multicall, &mcs->flags)) )
@@ -52,7 +52,7 @@ do_multicall(
 
     for ( i = 0; !rc && i < nr_calls; i++ )
     {
-        if ( hypercall_preempt_check() )
+        if ( i && hypercall_preempt_check() )
             goto preempted;
 
         if ( unlikely(__copy_from_guest(&mcs->call, call_list, 1)) )
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 601319c..5cbdeb7 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1409,7 +1409,10 @@ void free_xenheap_pages(void *v, unsigned int order)
     pg = virt_to_page(v);
 
     for ( i = 0; i < (1u << order); i++ )
+    {
+        scrub_one_page(&pg[i]);
         pg[i].count_info &= ~PGC_xen_heap;
+    }
 
     free_heap_pages(pg, order);
 }
@@ -1579,6 +1582,8 @@ void free_domheap_pages(struct page_info *pg, unsigned int order)
     else
     {
         /* Freeing anonymous domain-heap pages. */
+        for ( i = 0; i < (1 << order); i++ )
+            scrub_one_page(&pg[i]);
         free_heap_pages(pg, order);
         drop_dom_ref = 0;
     }
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 1192fe2..2f28487 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -297,7 +297,8 @@ int sched_move_domain(struct domain *d, struct cpupool *c)
         spin_unlock_irq(lock);
 
         v->sched_priv = vcpu_priv[v->vcpu_id];
-        evtchn_move_pirqs(v);
+        if ( !d->is_dying )
+            evtchn_move_pirqs(v);
 
         new_p = cpumask_cycle(new_p, c->cpu_valid);
 
diff --git a/xen/common/timer.c b/xen/common/timer.c
index 1895a78..f36aebc 100644
--- a/xen/common/timer.c
+++ b/xen/common/timer.c
@@ -492,8 +492,9 @@ static void timer_softirq_action(void)
         deadline = heap[1]->expires;
     if ( (ts->list != NULL) && (ts->list->expires < deadline) )
         deadline = ts->list->expires;
+    now = NOW();
     this_cpu(timer_deadline) =
-        (deadline == STIME_MAX) ? 0 : deadline + timer_slop;
+        (deadline == STIME_MAX) ? 0 : MAX(deadline, now + timer_slop);
 
     if ( !reprogram_timer(this_cpu(timer_deadline)) )
         raise_softirq(TIMER_SOFTIRQ);
diff --git a/xen/common/tmem.c b/xen/common/tmem.c
index d9e912b..ea4d9cc 100644
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -1845,8 +1845,6 @@ static int do_tmem_destroy_pool(uint32_t pool_id)
     struct client *client = current->domain->tmem_client;
     struct tmem_pool *pool;
 
-    if ( client->pools == NULL )
-        return 0;
     if ( pool_id >= MAX_POOLS_PER_DOMAIN )
         return 0;
     if ( (pool = client->pools[pool_id]) == NULL )
diff --git a/xen/common/trace.c b/xen/common/trace.c
index 41ddc33..0bc8ca2 100644
--- a/xen/common/trace.c
+++ b/xen/common/trace.c
@@ -817,7 +817,7 @@ unlock:
 }
 
 void __trace_hypercall(uint32_t event, unsigned long op,
-                       const unsigned long *args)
+                       const xen_ulong_t *args)
 {
     struct {
         uint32_t op;
diff --git a/xen/common/xmalloc_tlsf.c b/xen/common/xmalloc_tlsf.c
index d3bdfa7..a5769c9 100644
--- a/xen/common/xmalloc_tlsf.c
+++ b/xen/common/xmalloc_tlsf.c
@@ -527,11 +527,10 @@ static void xmalloc_pool_put(void *p)
 
 static void *xmalloc_whole_pages(unsigned long size, unsigned long align)
 {
-    unsigned int i, order = get_order_from_bytes(size);
+    unsigned int i, order;
     void *res, *p;
 
-    if ( align > size )
-        get_order_from_bytes(align);
+    order = get_order_from_bytes(max(align, size));
 
     res = alloc_xenheap_pages(order, 0);
     if ( res == NULL )
diff --git a/xen/drivers/acpi/apei/Makefile b/xen/drivers/acpi/apei/Makefile
index af6ecb0..6ad7c64 100644
--- a/xen/drivers/acpi/apei/Makefile
+++ b/xen/drivers/acpi/apei/Makefile
@@ -1,3 +1,4 @@
 obj-y += erst.o
+obj-y += hest.o
 obj-y += apei-base.o
 obj-y += apei-io.o
diff --git a/xen/drivers/acpi/apei/erst.c b/xen/drivers/acpi/apei/erst.c
index 98f73d9..2583afa 100644
--- a/xen/drivers/acpi/apei/erst.c
+++ b/xen/drivers/acpi/apei/erst.c
@@ -760,21 +760,27 @@ int __init erst_init(void)
 {
 	int rc = 0;
 	acpi_status status;
+	acpi_physical_address erst_addr;
+	acpi_native_uint erst_len;
 	struct apei_exec_context ctx;
 
 	if (acpi_disabled)
 		return -ENODEV;
 
-	status = acpi_get_table(ACPI_SIG_ERST, 0,
-				(struct acpi_table_header **)&erst_tab);
+	status = acpi_get_table_phys(ACPI_SIG_ERST, 0, &erst_addr, &erst_len);
 	if (status == AE_NOT_FOUND) {
 		printk(KERN_INFO "ERST table was not found\n");
 		return -ENODEV;
-	} else if (ACPI_FAILURE(status)) {
+	}
+	if (ACPI_FAILURE(status)) {
 		const char *msg = acpi_format_exception(status);
 		printk(KERN_WARNING "Failed to get ERST table: %s\n", msg);
 		return -EINVAL;
 	}
+	map_pages_to_xen((unsigned long)__va(erst_addr), PFN_DOWN(erst_addr),
+			 PFN_UP(erst_addr + erst_len) - PFN_DOWN(erst_addr),
+			 PAGE_HYPERVISOR);
+	erst_tab = __va(erst_addr);
 
 	rc = erst_check_table(erst_tab);
 	if (rc) {
diff --git a/xen/drivers/acpi/apei/hest.c b/xen/drivers/acpi/apei/hest.c
new file mode 100644
index 0000000..b8790a6
--- /dev/null
+++ b/xen/drivers/acpi/apei/hest.c
@@ -0,0 +1,200 @@
+/*
+ * APEI Hardware Error Souce Table support
+ *
+ * HEST describes error sources in detail; communicates operational
+ * parameters (i.e. severity levels, masking bits, and threshold
+ * values) to Linux as necessary. It also allows the BIOS to report
+ * non-standard error sources to Linux (for example, chipset-specific
+ * error registers).
+ *
+ * For more information about HEST, please refer to ACPI Specification
+ * version 4.0, section 17.3.2.
+ *
+ * Copyright 2009 Intel Corp.
+ *   Author: Huang Ying <ying.huang at intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <xen/kernel.h>
+#include <xen/mm.h>
+#include <xen/pfn.h>
+#include <acpi/acpi.h>
+#include <acpi/apei.h>
+
+#include "apei-internal.h"
+
+#define HEST_PFX "HEST: "
+
+static bool_t hest_disable;
+boolean_param("hest_disable", hest_disable);
+
+/* HEST table parsing */
+
+static struct acpi_table_hest *__read_mostly hest_tab;
+
+static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
+	[ACPI_HEST_TYPE_IA32_CHECK] = -1,	/* need further calculation */
+	[ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
+	[ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi),
+	[ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root),
+	[ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
+	[ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
+	[ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
+};
+
+static int hest_esrc_len(const struct acpi_hest_header *hest_hdr)
+{
+	u16 hest_type = hest_hdr->type;
+	int len;
+
+	if (hest_type >= ACPI_HEST_TYPE_RESERVED)
+		return 0;
+
+	len = hest_esrc_len_tab[hest_type];
+
+	if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) {
+		const struct acpi_hest_ia_corrected *cmc =
+			container_of(hest_hdr,
+				     const struct acpi_hest_ia_corrected,
+				     header);
+
+		len = sizeof(*cmc) + cmc->num_hardware_banks *
+		      sizeof(struct acpi_hest_ia_error_bank);
+	} else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
+		const struct acpi_hest_ia_machine_check *mc =
+			container_of(hest_hdr,
+				     const struct acpi_hest_ia_machine_check,
+				     header);
+
+		len = sizeof(*mc) + mc->num_hardware_banks *
+		      sizeof(struct acpi_hest_ia_error_bank);
+	}
+	BUG_ON(len == -1);
+
+	return len;
+};
+
+int apei_hest_parse(apei_hest_func_t func, void *data)
+{
+	struct acpi_hest_header *hest_hdr;
+	int i, rc, len;
+
+	if (hest_disable || !hest_tab)
+		return -EINVAL;
+
+	hest_hdr = (struct acpi_hest_header *)(hest_tab + 1);
+	for (i = 0; i < hest_tab->error_source_count; i++) {
+		len = hest_esrc_len(hest_hdr);
+		if (!len) {
+			printk(XENLOG_WARNING HEST_PFX
+			       "Unknown or unused hardware error source "
+			       "type: %d for hardware error source: %d\n",
+			       hest_hdr->type, hest_hdr->source_id);
+			return -EINVAL;
+		}
+		if ((void *)hest_hdr + len >
+		    (void *)hest_tab + hest_tab->header.length) {
+			printk(XENLOG_WARNING HEST_PFX
+			       "Table contents overflow for hardware error source: %d\n",
+			       hest_hdr->source_id);
+			return -EINVAL;
+		}
+
+		rc = func(hest_hdr, data);
+		if (rc)
+			return rc;
+
+		hest_hdr = (void *)hest_hdr + len;
+	}
+
+	return 0;
+}
+
+/*
+ * Check if firmware advertises firmware first mode. We need FF bit to be set
+ * along with a set of MC banks which work in FF mode.
+ */
+static int __init hest_parse_cmc(const struct acpi_hest_header *hest_hdr,
+				 void *data)
+{
+#ifdef CONFIG_X86_MCE
+	unsigned int i;
+	const struct acpi_hest_ia_corrected *cmc;
+	const struct acpi_hest_ia_error_bank *mc_bank;
+
+	if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
+		return 0;
+
+	cmc = container_of(hest_hdr, const struct acpi_hest_ia_corrected, header);
+	if (!cmc->enabled)
+		return 0;
+
+	/*
+	 * We expect HEST to provide a list of MC banks that report errors
+	 * in firmware first mode. Otherwise, return non-zero value to
+	 * indicate that we are done parsing HEST.
+	 */
+	if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks)
+		return 1;
+
+	printk(XENLOG_INFO HEST_PFX "Enabling Firmware First mode for corrected errors.\n");
+
+	mc_bank = (const struct acpi_hest_ia_error_bank *)(cmc + 1);
+	for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
+		mce_disable_bank(mc_bank->bank_number);
+#else
+# define acpi_disable_cmcff 1
+#endif
+
+	return 1;
+}
+
+void __init acpi_hest_init(void)
+{
+	acpi_status status;
+	acpi_physical_address hest_addr;
+	acpi_native_uint hest_len;
+
+	if (acpi_disabled)
+		return;
+
+	if (hest_disable) {
+		printk(XENLOG_INFO HEST_PFX "Table parsing disabled.\n");
+		return;
+	}
+
+	status = acpi_get_table_phys(ACPI_SIG_HEST, 0, &hest_addr, &hest_len);
+	if (status == AE_NOT_FOUND)
+		goto err;
+	if (ACPI_FAILURE(status)) {
+		printk(XENLOG_ERR HEST_PFX "Failed to get table, %s\n",
+		       acpi_format_exception(status));
+		goto err;
+	}
+	map_pages_to_xen((unsigned long)__va(hest_addr), PFN_DOWN(hest_addr),
+			 PFN_UP(hest_addr + hest_len) - PFN_DOWN(hest_addr),
+			 PAGE_HYPERVISOR);
+	hest_tab = __va(hest_addr);
+
+	if (!acpi_disable_cmcff)
+		apei_hest_parse(hest_parse_cmc, NULL);
+
+	printk(XENLOG_INFO HEST_PFX "Table parsing has been initialized\n");
+	return;
+err:
+	hest_disable = 1;
+}
diff --git a/xen/drivers/acpi/tables.c b/xen/drivers/acpi/tables.c
index 08e8f3b..1beca79 100644
--- a/xen/drivers/acpi/tables.c
+++ b/xen/drivers/acpi/tables.c
@@ -233,6 +233,12 @@ acpi_table_parse_entries(char *id,
 
 	while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) <
 	       table_end) {
+		if (entry->length < sizeof(*entry)) {
+			printk(KERN_ERR PREFIX "[%4.4s:%#x] Invalid length\n",
+			       id, entry_id);
+			return -ENODATA;
+		}
+
 		if (entry->type == entry_id
 		    && (!max_entries || count++ < max_entries))
 			if (handler(entry, table_end))
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index 532c426..89ffe64 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -375,12 +375,12 @@ static DECLARE_SOFTIRQ_TASKLET(notify_dom0_con_ring_tasklet,
 static long guest_console_write(XEN_GUEST_HANDLE_PARAM(char) buffer, int count)
 {
     char kbuf[128];
-    int kcount;
+    int kcount = 0;
     struct domain *cd = current->domain;
 
     while ( count > 0 )
     {
-        if ( hypercall_preempt_check() )
+        if ( kcount && hypercall_preempt_check() )
             return hypercall_create_continuation(
                 __HYPERVISOR_console_io, "iih",
                 CONSOLEIO_write, count, buffer);
diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c
index 1294561..5e02ac8 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -691,8 +691,6 @@ int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
         if ( !iommu_update_pde_count(d, pt_mfn[merge_level],
                                      gfn, mfn, merge_level) )
             break;
-        /* Deallocate lower level page table */
-        free_amd_iommu_pgtable(mfn_to_page(pt_mfn[merge_level - 1]));
 
         if ( iommu_merge_pages(d, pt_mfn[merge_level], gfn, 
                                flags, merge_level) )
@@ -703,6 +701,9 @@ int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
             domain_crash(d);
             return -EFAULT;
         }
+
+        /* Deallocate lower level page table */
+        free_amd_iommu_pgtable(mfn_to_page(pt_mfn[merge_level - 1]));
     }
 
 out:
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index c26aabc..f97fcf2 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -163,7 +163,7 @@ static void amd_iommu_setup_domain_device(
          !pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
     {
         if ( devfn == pdev->devfn )
-            enable_ats_device(iommu->seg, bus, devfn);
+            enable_ats_device(iommu->seg, bus, devfn, iommu);
 
         amd_iommu_flush_iotlb(devfn, pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
     }
diff --git a/xen/drivers/passthrough/ats.h b/xen/drivers/passthrough/ats.h
index cf649d4..000e76d 100644
--- a/xen/drivers/passthrough/ats.h
+++ b/xen/drivers/passthrough/ats.h
@@ -24,6 +24,7 @@ struct pci_ats_dev {
     u8 bus;
     u8 devfn;
     u16 ats_queue_depth;    /* ATS device invalidation queue depth */
+    const void *iommu;      /* No common IOMMU struct so use void pointer */
 };
 
 #define ATS_REG_CAP    4
@@ -34,7 +35,7 @@ struct pci_ats_dev {
 extern struct list_head ats_devices;
 extern bool_t ats_enabled;
 
-int enable_ats_device(int seg, int bus, int devfn);
+int enable_ats_device(int seg, int bus, int devfn, const void *iommu);
 void disable_ats_device(int seg, int bus, int devfn);
 struct pci_ats_dev *get_ats_device(int seg, int bus, int devfn);
 
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 19b0e23..c687c53 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -412,9 +412,8 @@ static int iommu_populate_page_table(struct domain *d)
 void iommu_domain_destroy(struct domain *d)
 {
     struct hvm_iommu *hd  = domain_hvm_iommu(d);
-    struct list_head *ioport_list, *rmrr_list, *tmp;
+    struct list_head *ioport_list, *tmp;
     struct g2m_ioport *ioport;
-    struct mapped_rmrr *mrmrr;
 
     if ( !iommu_enabled || !hd->platform_ops )
         return;
@@ -428,13 +427,6 @@ void iommu_domain_destroy(struct domain *d)
         list_del(&ioport->list);
         xfree(ioport);
     }
-
-    list_for_each_safe ( rmrr_list, tmp, &hd->mapped_rmrrs )
-    {
-        mrmrr = list_entry(rmrr_list, struct mapped_rmrr, list);
-        list_del(&mrmrr->list);
-        xfree(mrmrr);
-    }
 }
 
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index c5c8344..e5d332d 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -27,6 +27,7 @@
 #include <xen/delay.h>
 #include <xen/keyhandler.h>
 #include <xen/radix-tree.h>
+#include <xen/softirq.h>
 #include <xen/tasklet.h>
 #include <xsm/xsm.h>
 #include <asm/msi.h>
@@ -153,6 +154,115 @@ static void __init parse_phantom_dev(char *str) {
 }
 custom_param("pci-phantom", parse_phantom_dev);
 
+static u16 __read_mostly command_mask;
+static u16 __read_mostly bridge_ctl_mask;
+
+/*
+ * The 'pci' parameter controls certain PCI device aspects.
+ * Optional comma separated value may contain:
+ *
+ *   serr                       don't suppress system errors (default)
+ *   no-serr                    suppress system errors
+ *   perr                       don't suppress parity errors (default)
+ *   no-perr                    suppress parity errors
+ */
+static void __init parse_pci_param(char *s)
+{
+    char *ss;
+
+    do {
+        bool_t on = !!strncmp(s, "no-", 3);
+        u16 cmd_mask = 0, brctl_mask = 0;
+
+        if ( !on )
+            s += 3;
+
+        ss = strchr(s, ',');
+        if ( ss )
+            *ss = '\0';
+
+        if ( !strcmp(s, "serr") )
+        {
+            cmd_mask = PCI_COMMAND_SERR;
+            brctl_mask = PCI_BRIDGE_CTL_SERR | PCI_BRIDGE_CTL_DTMR_SERR;
+        }
+        else if ( !strcmp(s, "perr") )
+        {
+            cmd_mask = PCI_COMMAND_PARITY;
+            brctl_mask = PCI_BRIDGE_CTL_PARITY;
+        }
+
+        if ( on )
+        {
+            command_mask &= ~cmd_mask;
+            bridge_ctl_mask &= ~brctl_mask;
+        }
+        else
+        {
+            command_mask |= cmd_mask;
+            bridge_ctl_mask |= brctl_mask;
+        }
+
+        s = ss + 1;
+    } while ( ss );
+}
+custom_param("pci", parse_pci_param);
+
+static void check_pdev(const struct pci_dev *pdev)
+{
+#define PCI_STATUS_CHECK \
+    (PCI_STATUS_PARITY | PCI_STATUS_SIG_TARGET_ABORT | \
+     PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT | \
+     PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_DETECTED_PARITY)
+    u16 seg = pdev->seg;
+    u8 bus = pdev->bus;
+    u8 dev = PCI_SLOT(pdev->devfn);
+    u8 func = PCI_FUNC(pdev->devfn);
+    u16 val;
+
+    if ( command_mask )
+    {
+        val = pci_conf_read16(seg, bus, dev, func, PCI_COMMAND);
+        if ( val & command_mask )
+            pci_conf_write16(seg, bus, dev, func, PCI_COMMAND,
+                             val & ~command_mask);
+        val = pci_conf_read16(seg, bus, dev, func, PCI_STATUS);
+        if ( val & PCI_STATUS_CHECK )
+        {
+            printk(XENLOG_INFO "%04x:%02x:%02x.%u status %04x -> %04x\n",
+                   seg, bus, dev, func, val, val & ~PCI_STATUS_CHECK);
+            pci_conf_write16(seg, bus, dev, func, PCI_STATUS,
+                             val & PCI_STATUS_CHECK);
+        }
+    }
+
+    switch ( pci_conf_read8(seg, bus, dev, func, PCI_HEADER_TYPE) & 0x7f )
+    {
+    case PCI_HEADER_TYPE_BRIDGE:
+        if ( !bridge_ctl_mask )
+            break;
+        val = pci_conf_read16(seg, bus, dev, func, PCI_BRIDGE_CONTROL);
+        if ( val & bridge_ctl_mask )
+            pci_conf_write16(seg, bus, dev, func, PCI_BRIDGE_CONTROL,
+                             val & ~bridge_ctl_mask);
+        val = pci_conf_read16(seg, bus, dev, func, PCI_SEC_STATUS);
+        if ( val & PCI_STATUS_CHECK )
+        {
+            printk(XENLOG_INFO
+                   "%04x:%02x:%02x.%u secondary status %04x -> %04x\n",
+                   seg, bus, dev, func, val, val & ~PCI_STATUS_CHECK);
+            pci_conf_write16(seg, bus, dev, func, PCI_SEC_STATUS,
+                             val & PCI_STATUS_CHECK);
+        }
+        break;
+
+    case PCI_HEADER_TYPE_CARDBUS:
+        /* TODO */
+        break;
+    }
+#undef PCI_STATUS_CHECK
+}
+
 static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
 {
     struct pci_dev *pdev;
@@ -251,6 +361,8 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
             break;
     }
 
+    check_pdev(pdev);
+
     return pdev;
 }
 
@@ -565,6 +677,8 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn, const struct pci_dev_info *info)
                    seg, bus, slot, func, ctrl);
     }
 
+    check_pdev(pdev);
+
     ret = 0;
     if ( !pdev->domain )
     {
@@ -922,6 +1036,20 @@ static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg)
                 printk(XENLOG_WARNING "Dom%d owning %04x:%02x:%02x.%u?\n",
                        pdev->domain->domain_id, pseg->nr, bus,
                        PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+            if ( iommu_verbose )
+            {
+                spin_unlock(&pcidevs_lock);
+                process_pending_softirqs();
+                spin_lock(&pcidevs_lock);
+            }
+        }
+
+        if ( !iommu_verbose )
+        {
+            spin_unlock(&pcidevs_lock);
+            process_pending_softirqs();
+            spin_lock(&pcidevs_lock);
         }
     }
 
@@ -938,6 +1066,106 @@ void __init setup_dom0_pci_devices(
     spin_unlock(&pcidevs_lock);
 }
 
+#ifdef CONFIG_ACPI
+#include <acpi/acpi.h>
+#include <acpi/apei.h>
+
+static int hest_match_pci(const struct acpi_hest_aer_common *p,
+                          const struct pci_dev *pdev)
+{
+    return ACPI_HEST_SEGMENT(p->bus) == pdev->seg &&
+           ACPI_HEST_BUS(p->bus)     == pdev->bus &&
+           p->device                 == PCI_SLOT(pdev->devfn) &&
+           p->function               == PCI_FUNC(pdev->devfn);
+}
+
+static bool_t hest_match_type(const struct acpi_hest_header *hest_hdr,
+                              const struct pci_dev *pdev)
+{
+    unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus,
+                                           PCI_SLOT(pdev->devfn),
+                                           PCI_FUNC(pdev->devfn),
+                                           PCI_CAP_ID_EXP);
+    u8 pcie = MASK_EXTR(pci_conf_read16(pdev->seg, pdev->bus,
+                                        PCI_SLOT(pdev->devfn),
+                                        PCI_FUNC(pdev->devfn),
+                                        pos + PCI_EXP_FLAGS),
+                        PCI_EXP_FLAGS_TYPE);
+
+    switch ( hest_hdr->type )
+    {
+    case ACPI_HEST_TYPE_AER_ROOT_PORT:
+        return pcie == PCI_EXP_TYPE_ROOT_PORT;
+    case ACPI_HEST_TYPE_AER_ENDPOINT:
+        return pcie == PCI_EXP_TYPE_ENDPOINT;
+    case ACPI_HEST_TYPE_AER_BRIDGE:
+        return pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                               PCI_FUNC(pdev->devfn), PCI_CLASS_DEVICE) ==
+               PCI_CLASS_BRIDGE_PCI;
+    }
+
+    return 0;
+}
+
+struct aer_hest_parse_info {
+    const struct pci_dev *pdev;
+    bool_t firmware_first;
+};
+
+static bool_t hest_source_is_pcie_aer(const struct acpi_hest_header *hest_hdr)
+{
+    if ( hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT ||
+         hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT ||
+         hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE )
+        return 1;
+    return 0;
+}
+
+static int aer_hest_parse(const struct acpi_hest_header *hest_hdr, void *data)
+{
+    struct aer_hest_parse_info *info = data;
+    const struct acpi_hest_aer_common *p;
+    bool_t ff;
+
+    if ( !hest_source_is_pcie_aer(hest_hdr) )
+        return 0;
+
+    p = (const struct acpi_hest_aer_common *)(hest_hdr + 1);
+    ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
+
+    /*
+     * If no specific device is supplied, determine whether
+     * FIRMWARE_FIRST is set for *any* PCIe device.
+     */
+    if ( !info->pdev )
+    {
+        info->firmware_first |= ff;
+        return 0;
+    }
+
+    /* Otherwise, check the specific device */
+    if ( p->flags & ACPI_HEST_GLOBAL ?
+         hest_match_type(hest_hdr, info->pdev) :
+         hest_match_pci(p, info->pdev) )
+    {
+        info->firmware_first = ff;
+        return 1;
+    }
+
+    return 0;
+}
+
+bool_t pcie_aer_get_firmware_first(const struct pci_dev *pdev)
+{
+    struct aer_hest_parse_info info = { .pdev = pdev };
+
+    return pci_find_cap_offset(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                               PCI_FUNC(pdev->devfn), PCI_CAP_ID_EXP) &&
+           apei_hest_parse(aer_hest_parse, &info) >= 0 &&
+           info.firmware_first;
+}
+#endif
+
 static int _dump_pci_devices(struct pci_seg *pseg, void *arg)
 {
     struct pci_dev *pdev;
diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c
index cb998e2..1152c3a 100644
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -580,6 +580,16 @@ acpi_parse_one_rmrr(struct acpi_dmar_header *header)
     if ( (ret = acpi_dmar_check_length(header, sizeof(*rmrr))) != 0 )
         return ret;
 
+    list_for_each_entry(rmrru, &acpi_rmrr_units, list)
+       if ( base_addr <= rmrru->end_address && rmrru->base_address <= end_addr )
+       {
+           printk(XENLOG_ERR VTDPREFIX
+                  "Overlapping RMRRs [%"PRIx64",%"PRIx64"] and [%"PRIx64",%"PRIx64"]\n",
+                  rmrru->base_address, rmrru->end_address,
+                  base_addr, end_addr);
+           return -EEXIST;
+       }
+
     /* This check is here simply to detect when RMRR values are
      * not properly represented in the system memory map and
      * inform the user
diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
index 14cd90e..afe7faf 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -99,7 +99,7 @@ void platform_quirks_init(void);
 void vtd_ops_preamble_quirk(struct iommu* iommu);
 void vtd_ops_postamble_quirk(struct iommu* iommu);
 void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map);
-void pci_vtd_quirk(struct pci_dev *pdev);
+void pci_vtd_quirk(const struct pci_dev *);
 int platform_supports_intremap(void);
 int platform_supports_x2apic(void);
 
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index 5f10034..e543c08 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -31,7 +31,6 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <xen/keyhandler.h>
-#include <xen/softirq.h>
 #include <asm/msi.h>
 #include <asm/irq.h>
 #include <asm/hvm/vmx/vmx.h>
@@ -43,6 +42,12 @@
 #include "vtd.h"
 #include "../ats.h"
 
+struct mapped_rmrr {
+    struct list_head list;
+    u64 base, end;
+    unsigned int count;
+};
+
 /* Possible unfiltered LAPIC/MSI messages from untrusted sources? */
 bool_t __read_mostly untrusted_msi;
 
@@ -620,7 +625,6 @@ static void dma_pte_clear_one(struct domain *domain, u64 addr)
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct dma_pte *page = NULL, *pte = NULL;
     u64 pg_maddr;
-    struct mapped_rmrr *mrmrr;
 
     spin_lock(&hd->mapping_lock);
     /* get last level pte */
@@ -649,21 +653,6 @@ static void dma_pte_clear_one(struct domain *domain, u64 addr)
         __intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K, 1, 1);
 
     unmap_vtd_domain_page(page);
-
-    /* if the cleared address is between mapped RMRR region,
-     * remove the mapped RMRR
-     */
-    spin_lock(&hd->mapping_lock);
-    list_for_each_entry ( mrmrr, &hd->mapped_rmrrs, list )
-    {
-        if ( addr >= mrmrr->base && addr <= mrmrr->end )
-        {
-            list_del(&mrmrr->list);
-            xfree(mrmrr);
-            break;
-        }
-    }
-    spin_unlock(&hd->mapping_lock);
 }
 
 static void iommu_free_pagetable(u64 pt_maddr, int level)
@@ -1453,7 +1442,7 @@ static int domain_context_mapping(
         ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
                                          pdev);
         if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
-            enable_ats_device(seg, bus, devfn);
+            enable_ats_device(seg, bus, devfn, drhd->iommu);
 
         break;
 
@@ -1494,8 +1483,8 @@ static int domain_context_mapping(
         break;
     }
 
-    if ( iommu_verbose )
-        process_pending_softirqs();
+    if ( !ret && devfn == pdev->devfn )
+        pci_vtd_quirk(pdev);
 
     return ret;
 }
@@ -1704,10 +1693,17 @@ static int reassign_device_ownership(
 void iommu_domain_teardown(struct domain *d)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct mapped_rmrr *mrmrr, *tmp;
 
     if ( list_empty(&acpi_drhd_units) )
         return;
 
+    list_for_each_entry_safe ( mrmrr, tmp, &hd->mapped_rmrrs, list )
+    {
+        list_del(&mrmrr->list);
+        xfree(mrmrr);
+    }
+
     if ( iommu_use_hap_pt(d) )
         return;
 
@@ -1852,14 +1848,17 @@ static int rmrr_identity_mapping(struct domain *d,
     ASSERT(rmrr->base_address < rmrr->end_address);
 
     /*
-     * No need to acquire hd->mapping_lock, as the only theoretical race is
-     * with the insertion below (impossible due to holding pcidevs_lock).
+     * No need to acquire hd->mapping_lock: Both insertion and removal
+     * get done while holding pcidevs_lock.
      */
     list_for_each_entry( mrmrr, &hd->mapped_rmrrs, list )
     {
         if ( mrmrr->base == rmrr->base_address &&
              mrmrr->end == rmrr->end_address )
+        {
+            ++mrmrr->count;
             return 0;
+        }
     }
 
     base = rmrr->base_address & PAGE_MASK_4K;
@@ -1880,9 +1879,8 @@ static int rmrr_identity_mapping(struct domain *d,
         return -ENOMEM;
     mrmrr->base = rmrr->base_address;
     mrmrr->end = rmrr->end_address;
-    spin_lock(&hd->mapping_lock);
+    mrmrr->count = 1;
     list_add_tail(&mrmrr->list, &hd->mapped_rmrrs);
-    spin_unlock(&hd->mapping_lock);
 
     return 0;
 }
@@ -1927,10 +1925,12 @@ static int intel_iommu_enable_device(struct pci_dev *pdev)
     struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev);
     int ret = drhd ? ats_device(pdev, drhd) : -ENODEV;
 
+    pci_vtd_quirk(pdev);
+
     if ( ret <= 0 )
         return ret;
 
-    ret = enable_ats_device(pdev->seg, pdev->bus, pdev->devfn);
+    ret = enable_ats_device(pdev->seg, pdev->bus, pdev->devfn, drhd->iommu);
 
     return ret >= 0 ? 0 : ret;
 }
@@ -1944,17 +1944,52 @@ static int intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
     if ( !pdev->domain )
         return -EINVAL;
 
-    /* If the device belongs to dom0, and it has RMRR, don't remove it
-     * from dom0, because BIOS may use RMRR at booting time.
-     */
-    if ( pdev->domain->domain_id == 0 )
+    for_each_rmrr_device ( rmrr, bdf, i )
     {
-        for_each_rmrr_device ( rmrr, bdf, i )
+        struct hvm_iommu *hd;
+        struct mapped_rmrr *mrmrr, *tmp;
+
+        if ( rmrr->segment != pdev->seg ||
+             PCI_BUS(bdf) != pdev->bus ||
+             PCI_DEVFN2(bdf) != devfn )
+            continue;
+
+        /*
+         * If the device belongs to dom0, and it has RMRR, don't remove
+         * it from dom0, because BIOS may use RMRR at booting time.
+         */
+        if ( is_hardware_domain(pdev->domain) )
+            return 0;
+
+        hd = domain_hvm_iommu(pdev->domain);
+
+        /*
+         * No need to acquire hd->mapping_lock: Both insertion and removal
+         * get done while holding pcidevs_lock.
+         */
+        ASSERT(spin_is_locked(&pcidevs_lock));
+        list_for_each_entry_safe ( mrmrr, tmp, &hd->mapped_rmrrs, list )
         {
-            if ( rmrr->segment == pdev->seg &&
-                 PCI_BUS(bdf) == pdev->bus &&
-                 PCI_DEVFN2(bdf) == devfn )
-                return 0;
+            unsigned long base_pfn, end_pfn;
+
+            if ( rmrr->base_address != mrmrr->base ||
+                 rmrr->end_address != mrmrr->end )
+                continue;
+
+            if ( --mrmrr->count )
+                break;
+
+            base_pfn = (mrmrr->base & PAGE_MASK_4K) >> PAGE_SHIFT_4K;
+            end_pfn = PAGE_ALIGN_4K(mrmrr->end) >> PAGE_SHIFT_4K;
+            while ( base_pfn < end_pfn )
+            {
+                if ( intel_iommu_unmap_page(pdev->domain, base_pfn) )
+                    return -ENXIO;
+                base_pfn++;
+            }
+
+            list_del(&mrmrr->list);
+            xfree(mrmrr);
         }
     }
 
@@ -1963,12 +1998,7 @@ static int intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
 
 static int __init setup_dom0_device(u8 devfn, struct pci_dev *pdev)
 {
-    int err;
-
-    err = domain_context_mapping(pdev->domain, devfn, pdev);
-    if ( !err && devfn == pdev->devfn )
-        pci_vtd_quirk(pdev);
-    return err;
+    return domain_context_mapping(pdev->domain, devfn, pdev);
 }
 
 void clear_fault_bits(struct iommu *iommu)
diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c
index ef5de99..fe29e82 100644
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -196,7 +196,7 @@ static int queue_invalidate_wait(struct iommu *iommu,
     u8 iflag, u8 sw, u8 fn)
 {
     s_time_t start_time;
-    u32 poll_slot = QINVAL_STAT_INIT;
+    volatile u32 poll_slot = QINVAL_STAT_INIT;
     int index = -1;
     int ret = -1;
     unsigned long flags;
diff --git a/xen/drivers/passthrough/vtd/quirks.c b/xen/drivers/passthrough/vtd/quirks.c
index 7f6c3a7..647723d 100644
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -27,6 +27,7 @@
 #include <xen/softirq.h>
 #include <xen/time.h>
 #include <xen/pci.h>
+#include <xen/pci_ids.h>
 #include <xen/pci_regs.h>
 #include <xen/keyhandler.h>
 #include <asm/msi.h>
@@ -378,24 +379,127 @@ void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map)
     }
 }
 
-/*
- * Mask reporting Intel VT-d faults to IOH core logic:
- *   - Some platform escalates VT-d faults to platform errors 
- *   - This can cause system failure upon non-fatal VT-d faults
- *   - Potential security issue if malicious guest trigger VT-d faults
- */
-void __init pci_vtd_quirk(struct pci_dev *pdev)
+void pci_vtd_quirk(const struct pci_dev *pdev)
 {
     int seg = pdev->seg;
     int bus = pdev->bus;
     int dev = PCI_SLOT(pdev->devfn);
     int func = PCI_FUNC(pdev->devfn);
-    int id, val;
+    int pos;
+    bool_t ff;
+    u32 val, val2;
+    u64 bar;
+    paddr_t pa;
+    const char *action;
+
+    if ( pci_conf_read16(seg, bus, dev, func, PCI_VENDOR_ID) !=
+         PCI_VENDOR_ID_INTEL )
+        return;
 
-    id = pci_conf_read32(seg, bus, dev, func, 0);
-    if ( id == 0x342e8086 || id == 0x3c288086 )
+    switch ( pci_conf_read16(seg, bus, dev, func, PCI_DEVICE_ID) )
     {
+    /*
+     * Mask reporting Intel VT-d faults to IOH core logic:
+     *   - Some platform escalates VT-d faults to platform errors.
+     *   - This can cause system failure upon non-fatal VT-d faults.
+     *   - Potential security issue if malicious guest trigger VT-d faults.
+     */
+    case 0x0e28: /* Xeon-E5v2 (IvyBridge) */
+    case 0x342e: /* Tylersburg chipset (Nehalem / Westmere systems) */
+    case 0x3728: /* Xeon C5500/C3500 (JasperForest) */
+    case 0x3c28: /* Sandybridge */
         val = pci_conf_read32(seg, bus, dev, func, 0x1AC);
         pci_conf_write32(seg, bus, dev, func, 0x1AC, val | (1 << 31));
+        printk(XENLOG_INFO "Masked VT-d error signaling on %04x:%02x:%02x.%u\n",
+               seg, bus, dev, func);
+        break;
+
+    /* Tylersburg (EP)/Boxboro (MP) chipsets (NHM-EP/EX, WSM-EP/EX) */
+    case 0x3400 ... 0x3407: /* host bridges */
+    case 0x3408 ... 0x3411: case 0x3420 ... 0x3421: /* root ports */
+    /* JasperForest (Intel Xeon Processor C5500/C3500 */
+    case 0x3700 ... 0x370f: /* host bridges */
+    case 0x3720 ... 0x3724: /* root ports */
+    /* Sandybridge-EP (Romley) */
+    case 0x3c00: /* host bridge */
+    case 0x3c01 ... 0x3c0b: /* root ports */
+        pos = pci_find_ext_capability(seg, bus, pdev->devfn,
+                                      PCI_EXT_CAP_ID_ERR);
+        if ( !pos )
+        {
+            pos = pci_find_ext_capability(seg, bus, pdev->devfn,
+                                          PCI_EXT_CAP_ID_VNDR);
+            while ( pos )
+            {
+                val = pci_conf_read32(seg, bus, dev, func, pos + PCI_VNDR_HEADER);
+                if ( PCI_VNDR_HEADER_ID(val) == 4 && PCI_VNDR_HEADER_REV(val) == 1 )
+                {
+                    pos += PCI_VNDR_HEADER;
+                    break;
+                }
+                pos = pci_find_next_ext_capability(seg, bus, pdev->devfn, pos,
+                                                   PCI_EXT_CAP_ID_VNDR);
+            }
+            ff = 0;
+        }
+        else
+            ff = pcie_aer_get_firmware_first(pdev);
+        if ( !pos )
+        {
+            printk(XENLOG_WARNING "%04x:%02x:%02x.%u without AER capability?\n",
+                   seg, bus, dev, func);
+            break;
+        }
+
+        val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK);
+        val2 = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK);
+        if ( (val & PCI_ERR_UNC_UNSUP) && (val2 & PCI_ERR_COR_ADV_NFAT) )
+            action = "Found masked";
+        else if ( !ff )
+        {
+            pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK,
+                             val | PCI_ERR_UNC_UNSUP);
+            pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK,
+                             val2 | PCI_ERR_COR_ADV_NFAT);
+            action = "Masked";
+        }
+        else
+            action = "Must not mask";
+
+        /* XPUNCERRMSK Send Completion with Unsupported Request */
+        val = pci_conf_read32(seg, bus, dev, func, 0x20c);
+        pci_conf_write32(seg, bus, dev, func, 0x20c, val | (1 << 4));
+
+        printk(XENLOG_INFO "%s UR signaling on %04x:%02x:%02x.%u\n",
+               action, seg, bus, dev, func);
+        break;
+
+    case 0x100: case 0x104: case 0x108: /* Sandybridge */
+    case 0x150: case 0x154: case 0x158: /* Ivybridge */
+    case 0xa04: /* Haswell ULT */
+    case 0xc00: case 0xc04: case 0xc08: /* Haswell */
+        bar = pci_conf_read32(seg, bus, dev, func, 0x6c);
+        bar = (bar << 32) | pci_conf_read32(seg, bus, dev, func, 0x68);
+        pa = bar & 0x7ffffff000UL; /* bits 12...38 */
+        if ( (bar & 1) && pa &&
+             page_is_ram_type(paddr_to_pfn(pa), RAM_TYPE_RESERVED) )
+        {
+            u32 __iomem *va = ioremap(pa, PAGE_SIZE);
+
+            if ( va )
+            {
+                __set_bit(0x1c8 * 8 + 20, va);
+                iounmap(va);
+                printk(XENLOG_INFO "Masked UR signaling on %04x:%02x:%02x.%u\n",
+                       seg, bus, dev, func);
+            }
+            else
+                printk(XENLOG_ERR "Could not map %"PRIpaddr" for %04x:%02x:%02x.%u\n",
+                       pa, seg, bus, dev, func);
+        }
+        else
+            printk(XENLOG_WARNING "Bogus DMIBAR %#"PRIx64" on %04x:%02x:%02x.%u\n",
+                   bar, seg, bus, dev, func);
+        break;
     }
 }
diff --git a/xen/drivers/passthrough/vtd/x86/ats.c b/xen/drivers/passthrough/vtd/x86/ats.c
index f3b8c2d..6b0632b 100644
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -110,17 +110,23 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
     u64 addr, unsigned int size_order, u64 type)
 {
     struct pci_ats_dev *pdev;
-    int sbit, ret = 0;
-    u16 sid;
+    int ret = 0;
 
     if ( !ecap_dev_iotlb(iommu->ecap) )
         return ret;
 
     list_for_each_entry( pdev, &ats_devices, list )
     {
-        sid = (pdev->bus << 8) | pdev->devfn;
+        u16 sid = PCI_BDF2(pdev->bus, pdev->devfn);
+        bool_t sbit;
+        int rc = 0;
 
-        switch ( type ) {
+        /* Only invalidate devices that belong to this IOMMU */
+        if ( pdev->iommu != iommu )
+            continue;
+
+        switch ( type )
+        {
         case DMA_TLB_DSI_FLUSH:
             if ( !device_in_domain(iommu, pdev, did) )
                 break;
@@ -129,32 +135,37 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
             /* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */
             sbit = 1;
             addr = (~0 << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
-            ret |= qinval_device_iotlb(iommu, pdev->ats_queue_depth,
-                                       sid, sbit, addr);
+            rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,
+                                     sid, sbit, addr);
             break;
         case DMA_TLB_PSI_FLUSH:
             if ( !device_in_domain(iommu, pdev, did) )
                 break;
 
-            addr &= ~0 << (PAGE_SHIFT + size_order);
-
             /* if size <= 4K, set sbit = 0, else set sbit = 1 */
             sbit = size_order ? 1 : 0;
 
             /* clear lower bits */
-            addr &= (~0 << (PAGE_SHIFT + size_order));
+            addr &= ~0 << PAGE_SHIFT_4K;
 
             /* if sbit == 1, zero out size_order bit and set lower bits to 1 */
             if ( sbit )
-                addr &= (~0  & ~(1 << (PAGE_SHIFT + size_order)));
+            {
+                addr &= ~((u64)PAGE_SIZE_4K << (size_order - 1));
+                addr |= (((u64)1 << (size_order - 1)) - 1) << PAGE_SHIFT_4K;
+            }
 
-            ret |= qinval_device_iotlb(iommu, pdev->ats_queue_depth,
-                                       sid, sbit, addr);
+            rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,
+                                     sid, sbit, addr);
             break;
         default:
             dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
-            break;
+            return -EOPNOTSUPP;
         }
+
+        if ( !ret )
+            ret = rc;
     }
+
     return ret;
 }
diff --git a/xen/drivers/passthrough/x86/ats.c b/xen/drivers/passthrough/x86/ats.c
index bb7ee9a..1e3e03a 100644
--- a/xen/drivers/passthrough/x86/ats.c
+++ b/xen/drivers/passthrough/x86/ats.c
@@ -23,7 +23,7 @@ LIST_HEAD(ats_devices);
 bool_t __read_mostly ats_enabled = 1;
 boolean_param("ats", ats_enabled);
 
-int enable_ats_device(int seg, int bus, int devfn)
+int enable_ats_device(int seg, int bus, int devfn, const void *iommu)
 {
     struct pci_ats_dev *pdev = NULL;
     u32 value;
@@ -66,6 +66,7 @@ int enable_ats_device(int seg, int bus, int devfn)
         pdev->seg = seg;
         pdev->bus = bus;
         pdev->devfn = devfn;
+        pdev->iommu = iommu;
         value = pci_conf_read16(seg, bus, PCI_SLOT(devfn),
                                 PCI_FUNC(devfn), pos + ATS_REG_CAP);
         pdev->ats_queue_depth = value & ATS_QUEUE_DEPTH_MASK ?:
diff --git a/xen/drivers/pci/pci.c b/xen/drivers/pci/pci.c
index 25dc5f1..ca07ed0 100644
--- a/xen/drivers/pci/pci.c
+++ b/xen/drivers/pci/pci.c
@@ -66,23 +66,33 @@ int pci_find_next_cap(u16 seg, u8 bus, unsigned int devfn, u8 pos, int cap)
 
 /**
  * pci_find_ext_capability - Find an extended capability
- * @dev: PCI device to query
+ * @seg/@bus/@devfn: PCI device to query
  * @cap: capability code
  *
  * Returns the address of the requested extended capability structure
  * within the device's PCI configuration space or 0 if the device does
- * not support it.  Possible values for @cap:
- *
- *  %PCI_EXT_CAP_ID_ERR         Advanced Error Reporting
- *  %PCI_EXT_CAP_ID_VC          Virtual Channel
- *  %PCI_EXT_CAP_ID_DSN         Device Serial Number
- *  %PCI_EXT_CAP_ID_PWR         Power Budgeting
+ * not support it.
  */
 int pci_find_ext_capability(int seg, int bus, int devfn, int cap)
 {
+    return pci_find_next_ext_capability(seg, bus, devfn, 0, cap);
+}
+
+/**
+ * pci_find_next_ext_capability - Find another extended capability
+ * @seg/@bus/@devfn: PCI device to query
+ * @pos: starting position
+ * @cap: capability code
+ *
+ * Returns the address of the requested extended capability structure
+ * within the device's PCI configuration space or 0 if the device does
+ * not support it.
+ */
+int pci_find_next_ext_capability(int seg, int bus, int devfn, int start, int cap)
+{
     u32 header;
     int ttl = 480; /* 3840 bytes, minimum 8 bytes per capability */
-    int pos = 0x100;
+    int pos = max(start, 0x100);
 
     header = pci_conf_read32(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos);
 
@@ -92,9 +102,10 @@ int pci_find_ext_capability(int seg, int bus, int devfn, int cap)
      */
     if ( (header == 0) || (header == -1) )
         return 0;
+    ASSERT(start != pos || PCI_EXT_CAP_ID(header) == cap);
 
     while ( ttl-- > 0 ) {
-        if ( PCI_EXT_CAP_ID(header) == cap )
+        if ( PCI_EXT_CAP_ID(header) == cap && pos != start )
             return pos;
         pos = PCI_EXT_CAP_NEXT(header);
         if ( pos < 0x100 )
diff --git a/xen/include/acpi/actbl1.h b/xen/include/acpi/actbl1.h
index 492be4e..9311e3a 100644
--- a/xen/include/acpi/actbl1.h
+++ b/xen/include/acpi/actbl1.h
@@ -445,6 +445,14 @@ struct acpi_hest_aer_common {
 #define ACPI_HEST_FIRMWARE_FIRST        (1)
 #define ACPI_HEST_GLOBAL                (1<<1)
 
+/*
+ * Macros to access the bus/segment numbers in Bus field above:
+ *  Bus number is encoded in bits 7:0
+ *  Segment number is encoded in bits 23:8
+ */
+#define ACPI_HEST_BUS(bus)              ((bus) & 0xFF)
+#define ACPI_HEST_SEGMENT(bus)          (((bus) >> 8) & 0xFFFF)
+
 /* Hardware Error Notification */
 
 struct acpi_hest_notify {
diff --git a/xen/include/acpi/apei.h b/xen/include/acpi/apei.h
index 162f616..087bbef 100644
--- a/xen/include/acpi/apei.h
+++ b/xen/include/acpi/apei.h
@@ -12,6 +12,9 @@
 
 #define FIX_APEI_RANGE_MAX 64
 
+typedef int (*apei_hest_func_t)(const struct acpi_hest_header *, void *);
+int apei_hest_parse(apei_hest_func_t, void *);
+
 int erst_write(const struct cper_record_header *record);
 size_t erst_get_record_count(void);
 int erst_get_next_record_id(u64 *record_id);
diff --git a/xen/include/asm-arm/arm32/page.h b/xen/include/asm-arm/arm32/page.h
index b8221ca..80d5c36 100644
--- a/xen/include/asm-arm/arm32/page.h
+++ b/xen/include/asm-arm/arm32/page.h
@@ -90,11 +90,14 @@ static inline uint64_t __va_to_par(vaddr_t va)
 }
 
 /* Ask the MMU to translate a Guest VA for us */
-static inline uint64_t gva_to_ma_par(vaddr_t va)
+static inline uint64_t gva_to_ma_par(vaddr_t va, unsigned int flags)
 {
     uint64_t par, tmp;
     tmp = READ_CP64(PAR);
-    WRITE_CP32(va, ATS12NSOPR);
+    if ( (flags & GV2M_WRITE) == GV2M_WRITE )
+        WRITE_CP32(va, ATS12NSOPW);
+    else
+        WRITE_CP32(va, ATS12NSOPR);
     isb(); /* Ensure result is available. */
     par = READ_CP64(PAR);
     WRITE_CP64(tmp, PAR);
diff --git a/xen/include/asm-arm/arm64/page.h b/xen/include/asm-arm/arm64/page.h
index 3352821..3922d87 100644
--- a/xen/include/asm-arm/arm64/page.h
+++ b/xen/include/asm-arm/arm64/page.h
@@ -83,11 +83,14 @@ static inline uint64_t __va_to_par(vaddr_t va)
 }
 
 /* Ask the MMU to translate a Guest VA for us */
-static inline uint64_t gva_to_ma_par(vaddr_t va)
+static inline uint64_t gva_to_ma_par(vaddr_t va, unsigned int flags)
 {
     uint64_t par, tmp = READ_SYSREG64(PAR_EL1);
 
-    asm volatile ("at s12e1r, %0;" : : "r" (va));
+    if ( (flags & GV2M_WRITE) == GV2M_WRITE )
+        asm volatile ("at s12e1r, %0;" : : "r" (va));
+    else
+        asm volatile ("at s12e1w, %0;" : : "r" (va));
     isb();
     par = READ_SYSREG64(PAR_EL1);
     WRITE_SYSREG64(tmp, PAR_EL1);
diff --git a/xen/include/asm-arm/arm64/vfp.h b/xen/include/asm-arm/arm64/vfp.h
index 373f156..6ab5d36 100644
--- a/xen/include/asm-arm/arm64/vfp.h
+++ b/xen/include/asm-arm/arm64/vfp.h
@@ -1,9 +1,12 @@
 #ifndef _ARM_ARM64_VFP_H
 #define _ARM_ARM64_VFP_H
 
+/* ARM64 VFP instruction requires fpregs address to be 128-byte aligned */
+#define __vfp_aligned __attribute__((aligned(16)))
+
 struct vfp_state
 {
-    uint64_t fpregs[64];
+    uint64_t fpregs[64] __vfp_aligned;
     uint32_t fpcr;
     uint32_t fpexc32_el2;
     uint32_t fpsr;
diff --git a/xen/include/asm-arm/bitops.h b/xen/include/asm-arm/bitops.h
index 0a7caee..25f96c8 100644
--- a/xen/include/asm-arm/bitops.h
+++ b/xen/include/asm-arm/bitops.h
@@ -18,13 +18,14 @@
 #define __set_bit(n,p)            set_bit(n,p)
 #define __clear_bit(n,p)          clear_bit(n,p)
 
+#define BITS_PER_WORD           32
 #define BIT(nr)                 (1UL << (nr))
-#define BIT_MASK(nr)            (1UL << ((nr) % BITS_PER_LONG))
-#define BIT_WORD(nr)            ((nr) / BITS_PER_LONG)
+#define BIT_MASK(nr)            (1UL << ((nr) % BITS_PER_WORD))
+#define BIT_WORD(nr)            ((nr) / BITS_PER_WORD)
 #define BITS_PER_BYTE           8
 
-#define ADDR (*(volatile long *) addr)
-#define CONST_ADDR (*(const volatile long *) addr)
+#define ADDR (*(volatile int *) addr)
+#define CONST_ADDR (*(const volatile int *) addr)
 
 #if defined(CONFIG_ARM_32)
 # include <asm/arm32/bitops.h>
@@ -45,10 +46,10 @@
  */
 static inline int __test_and_set_bit(int nr, volatile void *addr)
 {
-        unsigned long mask = BIT_MASK(nr);
-        volatile unsigned long *p =
-                ((volatile unsigned long *)addr) + BIT_WORD(nr);
-        unsigned long old = *p;
+        unsigned int mask = BIT_MASK(nr);
+        volatile unsigned int *p =
+                ((volatile unsigned int *)addr) + BIT_WORD(nr);
+        unsigned int old = *p;
 
         *p = old | mask;
         return (old & mask) != 0;
@@ -65,10 +66,10 @@ static inline int __test_and_set_bit(int nr, volatile void *addr)
  */
 static inline int __test_and_clear_bit(int nr, volatile void *addr)
 {
-        unsigned long mask = BIT_MASK(nr);
-        volatile unsigned long *p =
-                ((volatile unsigned long *)addr) + BIT_WORD(nr);
-        unsigned long old = *p;
+        unsigned int mask = BIT_MASK(nr);
+        volatile unsigned int *p =
+                ((volatile unsigned int *)addr) + BIT_WORD(nr);
+        unsigned int old = *p;
 
         *p = old & ~mask;
         return (old & mask) != 0;
@@ -78,10 +79,10 @@ static inline int __test_and_clear_bit(int nr, volatile void *addr)
 static inline int __test_and_change_bit(int nr,
                                             volatile void *addr)
 {
-        unsigned long mask = BIT_MASK(nr);
-        volatile unsigned long *p =
-                ((volatile unsigned long *)addr) + BIT_WORD(nr);
-        unsigned long old = *p;
+        unsigned int mask = BIT_MASK(nr);
+        volatile unsigned int *p =
+                ((volatile unsigned int *)addr) + BIT_WORD(nr);
+        unsigned int old = *p;
 
         *p = old ^ mask;
         return (old & mask) != 0;
@@ -94,8 +95,8 @@ static inline int __test_and_change_bit(int nr,
  */
 static inline int test_bit(int nr, const volatile void *addr)
 {
-        const volatile unsigned long *p = (const volatile unsigned long *)addr;
-        return 1UL & (p[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+        const volatile unsigned int *p = (const volatile unsigned int *)addr;
+        return 1UL & (p[BIT_WORD(nr)] >> (nr & (BITS_PER_WORD-1)));
 }
 
 static inline int constant_fls(int x)
diff --git a/xen/include/asm-arm/cpregs.h b/xen/include/asm-arm/cpregs.h
index 508467a..f1100c8 100644
--- a/xen/include/asm-arm/cpregs.h
+++ b/xen/include/asm-arm/cpregs.h
@@ -71,6 +71,20 @@
 
 /* Coprocessor 14 */
 
+/* CP14 0: Debug Register interface */
+#define DBGDIDR         p14,0,c0,c0,0   /* Debug ID Register */
+#define DBGDSCRINT      p14,0,c0,c1,0   /* Debug Status and Control Internal */
+#define DBGDSCREXT      p14,0,c0,c2,2   /* Debug Status and Control External */
+#define DBGVCR          p14,0,c0,c7,0   /* Vector Catch */
+#define DBGBVR0         p14,0,c0,c0,4   /* Breakpoint Value 0 */
+#define DBGBCR0         p14,0,c0,c0,5   /* Breakpoint Control 0 */
+#define DBGWVR0         p14,0,c0,c0,6   /* Watchpoint Value 0 */
+#define DBGWCR0         p14,0,c0,c0,7   /* Watchpoint Control 0 */
+#define DBGBVR1         p14,0,c0,c1,4   /* Breakpoint Value 1 */
+#define DBGBCR1         p14,0,c0,c1,5   /* Breakpoint Control 1 */
+#define DBGOSLAR        p14,0,c1,c0,4   /* OS Lock Access */
+#define DBGOSDLR        p14,0,c1,c3,4   /* OS Double Lock */
+
 /* CP14 CR0: */
 #define TEECR           p14,6,c0,c0,0   /* ThumbEE Configuration Register */
 
@@ -115,6 +129,9 @@
 #define NSACR           p15,0,c1,c1,2   /* Non-Secure Access Control Register */
 #define HSCTLR          p15,4,c1,c0,0   /* Hyp. System Control Register */
 #define HCR             p15,4,c1,c1,0   /* Hyp. Configuration Register */
+#define HDCR            p15,4,c1,c1,1   /* Hyp. Debug Configuration Register */
+#define HCPTR           p15,4,c1,c1,2   /* Hyp. Coprocessor Trap Register */
+#define HSTR            p15,4,c1,c1,3   /* Hyp. System Trap Register */
 
 /* CP15 CR2: Translation Table Base and Control Registers */
 #define TTBCR           p15,0,c2,c0,2   /* Translatation Table Base Control Register */
@@ -195,7 +212,22 @@
 #define TLBIMVAH        p15,4,c8,c7,1   /* Invalidate Unified Hyp. TLB by MVA */
 #define TLBIALLNSNH     p15,4,c8,c7,4   /* Invalidate Entire Non-Secure Non-Hyp. Unified TLB */
 
-/* CP15 CR9: */
+/* CP15 CR9: Performance monitors */
+#define PMCR            p15,0,c9,c12,0  /* Perf. Mon. Control Register */
+#define PMCNTENSET      p15,0,c9,c12,1  /* Perf. Mon. Count Enable Set register */
+#define PMCNTENCLR      p15,0,c9,c12,2  /* Perf. Mon. Count Enable Clear register */
+#define PMOVSR          p15,0,c9,c12,3  /* Perf. Mon. Overflow Flag Status Register */
+#define PMSWINC         p15,0,c9,c12,4  /* Perf. Mon. Software Increment register */
+#define PMSELR          p15,0,c9,c12,5  /* Perf. Mon. Event Counter Selection Register */
+#define PMCEID0         p15,0,c9,c12,6  /* Perf. Mon. Common Event Identification register 0 */
+#define PMCEID1         p15,0,c9,c12,7  /* Perf. Mon. Common Event Identification register 1 */
+#define PMCCNTR         p15,0,c9,c13,0  /* Perf. Mon. Cycle Count Register */
+#define PMXEVCNTR       p15,0,c9,c13,1  /* Perf. Mon. Event Type Select Register */
+#define PMXEVCNR        p15,0,c9,c13,2  /* Perf. Mon. Event Count Register */
+#define PMUSERENR       p15,0,c9,c14,0  /* Perf. Mon. User Enable Register */
+#define PMINTENSET      p15,0,c9,c14,1  /* Perf. Mon. Interrupt Enable Set Register */
+#define PMINTENCLR      p15,0,c9,c14,2  /* Perf. Mon. Interrupt Enable Clear Register */
+#define PMOVSSET        p15,0,c9,c14,3  /* Perf. Mon. Overflow Flag Status Set register */
 
 /* CP15 CR10: */
 #define MAIR0           p15,0,c10,c2,0  /* Memory Attribute Indirection Register 0 AKA PRRR */
@@ -260,6 +292,7 @@
 #define CNTV_CVAL_EL0           CNTV_CVAL
 #define CONTEXTIDR_EL1          CONTEXTIDR
 #define CPACR_EL1               CPACR
+#define CPTR_EL2                HCPTR
 #define CSSELR_EL1              CSSELR
 #define DACR32_EL2              DACR
 #define ESR_EL1                 DFSR
@@ -268,6 +301,7 @@
 #define FAR_EL2                 HIFAR
 #define HCR_EL2                 HCR
 #define HPFAR_EL2               HPFAR
+#define HSTR_EL2                HSTR
 #define ID_AFR0_EL1             ID_AFR0
 #define ID_DFR0_EL1             ID_DFR0
 #define ID_ISAR0_EL1            ID_ISAR0
@@ -283,6 +317,7 @@
 #define ID_PFR0_EL1             ID_PFR0
 #define ID_PFR1_EL1             ID_PFR1
 #define IFSR32_EL2              IFSR
+#define MDCR_EL2                HDCR
 #define MIDR_EL1                MIDR
 #define MPIDR_EL1               MPIDR
 #define PAR_EL1                 PAR
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index bc20a15..49f64b8 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -283,6 +283,9 @@ struct arch_vcpu
         spinlock_t lock;
     } vgic;
 
+    /* Timer registers  */
+    uint32_t cntkctl;
+
     struct vtimer phys_timer;
     struct vtimer virt_timer;
 }  __cacheline_aligned;
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index b8d4e7d..8bf179d 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -233,9 +233,9 @@ static inline void *maddr_to_virt(paddr_t ma)
 }
 #endif
 
-static inline int gvirt_to_maddr(vaddr_t va, paddr_t *pa)
+static inline int gvirt_to_maddr(vaddr_t va, paddr_t *pa, unsigned int flags)
 {
-    uint64_t par = gva_to_ma_par(va);
+    uint64_t par = gva_to_ma_par(va, flags);
     if ( par & PAR_F )
         return -EFAULT;
     *pa = (par & PADDR_MASK & PAGE_MASK) | ((unsigned long) va & ~PAGE_MASK);
@@ -273,6 +273,9 @@ struct domain *page_get_owner_and_reference(struct page_info *page);
 void put_page(struct page_info *page);
 int  get_page(struct page_info *page, struct domain *domain);
 
+struct page_info *get_page_from_gva(struct domain *d, vaddr_t va,
+                                    unsigned long flags);
+
 /*
  * The MPT (machine->physical mapping table) is an array of word-sized
  * values, indexed on machine frame number. It is expected that guest OSes
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
index 3b39c45..e1013c8 100644
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -75,6 +75,10 @@ int p2m_alloc_table(struct domain *d);
 /* */
 void p2m_load_VTTBR(struct domain *d);
 
+/* Context switch */
+void p2m_save_state(struct vcpu *p);
+void p2m_restore_state(struct vcpu *n);
+
 /* Look up the MFN corresponding to a domain's PFN. */
 paddr_t p2m_lookup(struct domain *d, paddr_t gpfn, p2m_type_t *t);
 
diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
index e00be9e..c118309 100644
--- a/xen/include/asm-arm/page.h
+++ b/xen/include/asm-arm/page.h
@@ -73,6 +73,10 @@
 #define MATTR_DEV     0x1
 #define MATTR_MEM     0xf
 
+/* Flags for get_page_from_gva, gvirt_to_maddr etc */
+#define GV2M_READ  (0u<<0)
+#define GV2M_WRITE (1u<<0)
+
 #ifndef __ASSEMBLY__
 
 #include <xen/types.h>
diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index 06e638f..1f158ce 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -84,13 +84,28 @@
 #define HCR_SWIO        (_AC(1,UL)<<1) /* Set/Way Invalidation Override */
 #define HCR_VM          (_AC(1,UL)<<0) /* Virtual MMU Enable */
 
+/* HCPTR Hyp. Coprocessor Trap Register */
+#define HCPTR_TTA       ((_AC(1,U)<<20))        /* Trap trace registers */
+#define HCPTR_CP(x)     ((_AC(1,U)<<(x)))       /* Trap Coprocessor x */
+#define HCPTR_CP_MASK   ((_AC(1,U)<<14)-1)
+
+/* HSTR Hyp. System Trap Register */
+#define HSTR_T(x)       ((_AC(1,U)<<(x)))       /* Trap Cp15 c<x> */
+
+/* HDCR Hyp. Debug Configuration Register */
+#define HDCR_TDRA       (_AC(1,U)<<11)          /* Trap Debug ROM access */
+#define HDCR_TDOSA      (_AC(1,U)<<10)          /* Trap Debug-OS-related register access */
+#define HDCR_TDA        (_AC(1,U)<<9)           /* Trap Debug Access */
+#define HDCR_TPM        (_AC(1,U)<<6)           /* Trap Performance Monitors accesses */
+#define HDCR_TPMCR      (_AC(1,U)<<5)           /* Trap PMCR accesses */
+
 #define HSR_EC_UNKNOWN              0x00
 #define HSR_EC_WFI_WFE              0x01
 #define HSR_EC_CP15_32              0x03
 #define HSR_EC_CP15_64              0x04
-#define HSR_EC_CP14_32              0x05
-#define HSR_EC_CP14_DBG             0x06
-#define HSR_EC_CP                   0x07
+#define HSR_EC_CP14_32              0x05        /* Trapped MCR or MRC access to CP14 */
+#define HSR_EC_CP14_DBG             0x06        /* Trapped LDC/STC access to CP14 (only for debug registers) */
+#define HSR_EC_CP                   0x07        /* HCPTR-trapped access to CP0-CP13 */
 #define HSR_EC_CP10                 0x08
 #define HSR_EC_JAZELLE              0x09
 #define HSR_EC_BXJ                  0x0a
@@ -417,14 +432,16 @@ union hsr {
 #define VECTOR32_PABT 12
 #define VECTOR32_DABT 16
 /* ... ARM64 */
-#define VECTOR64_CURRENT_SP0_SYNC  0x000
-#define VECTOR64_CURRENT_SP0_IRQ   0x080
-#define VECTOR64_CURRENT_SP0_FIQ   0x100
-#define VECTOR64_CURRENT_SP0_ERROR 0x180
-#define VECTOR64_CURRENT_SPx_SYNC  0x200
-#define VECTOR64_CURRENT_SPx_IRQ   0x280
-#define VECTOR64_CURRENT_SPx_FIQ   0x300
-#define VECTOR64_CURRENT_SPx_ERROR 0x380
+#define VECTOR64_CURRENT_SP0_BASE  0x000
+#define VECTOR64_CURRENT_SPx_BASE  0x200
+#define VECTOR64_LOWER64_BASE      0x400
+#define VECTOR64_LOWER32_BASE      0x600
+
+#define VECTOR64_SYNC_OFFSET       0x000
+#define VECTOR64_IRQ_OFFSET        0x080
+#define VECTOR64_FIQ_OFFSET        0x100
+#define VECTOR64_ERROR_OFFSET      0x180
+
 
 #if defined(CONFIG_ARM_32)
 # include <asm/arm32/processor.h>
diff --git a/xen/include/asm-arm/sysregs.h b/xen/include/asm-arm/sysregs.h
index 0cee0e9..b00871c 100644
--- a/xen/include/asm-arm/sysregs.h
+++ b/xen/include/asm-arm/sysregs.h
@@ -40,6 +40,32 @@
     ((__HSR_SYSREG_##crm) << HSR_SYSREG_CRM_SHIFT) | \
     ((__HSR_SYSREG_##op2) << HSR_SYSREG_OP2_SHIFT)
 
+#define HSR_SYSREG_MDSCR_EL1      HSR_SYSREG(2,0,c0,c2,2)
+#define HSR_SYSREG_OSLAR_EL1      HSR_SYSREG(2,0,c1,c0,4)
+#define HSR_SYSREG_OSDLR_EL1      HSR_SYSREG(2,0,c1,c3,4)
+
+#define HSR_SYSREG_DBGBVRn_EL1(n) HSR_SYSREG(2,0,c0,c##n,4)
+#define HSR_SYSREG_DBGBCRn_EL1(n) HSR_SYSREG(2,0,c0,c##n,5)
+#define HSR_SYSREG_DBGWVRn_EL1(n) HSR_SYSREG(2,0,c0,c##n,6)
+#define HSR_SYSREG_DBGWCRn_EL1(n) HSR_SYSREG(2,0,c0,c##n,7)
+
+#define HSR_SYSREG_DBG_CASES(REG) case HSR_SYSREG_##REG##n_EL1(0):  \
+                                  case HSR_SYSREG_##REG##n_EL1(1):  \
+                                  case HSR_SYSREG_##REG##n_EL1(2):  \
+                                  case HSR_SYSREG_##REG##n_EL1(3):  \
+                                  case HSR_SYSREG_##REG##n_EL1(4):  \
+                                  case HSR_SYSREG_##REG##n_EL1(5):  \
+                                  case HSR_SYSREG_##REG##n_EL1(6):  \
+                                  case HSR_SYSREG_##REG##n_EL1(7):  \
+                                  case HSR_SYSREG_##REG##n_EL1(8):  \
+                                  case HSR_SYSREG_##REG##n_EL1(9):  \
+                                  case HSR_SYSREG_##REG##n_EL1(10): \
+                                  case HSR_SYSREG_##REG##n_EL1(11): \
+                                  case HSR_SYSREG_##REG##n_EL1(12): \
+                                  case HSR_SYSREG_##REG##n_EL1(13): \
+                                  case HSR_SYSREG_##REG##n_EL1(14): \
+                                  case HSR_SYSREG_##REG##n_EL1(15)
+
 #define HSR_SYSREG_SCTLR_EL1      HSR_SYSREG(3,0,c1, c0,0)
 #define HSR_SYSREG_TTBR0_EL1      HSR_SYSREG(3,0,c2, c0,0)
 #define HSR_SYSREG_TTBR1_EL1      HSR_SYSREG(3,0,c2, c0,1)
@@ -48,10 +74,28 @@
 #define HSR_SYSREG_AFSR1_EL1      HSR_SYSREG(3,0,c5, c1,1)
 #define HSR_SYSREG_ESR_EL1        HSR_SYSREG(3,0,c5, c2,0)
 #define HSR_SYSREG_FAR_EL1        HSR_SYSREG(3,0,c6, c0,0)
+#define HSR_SYSREG_PMINTENSET_EL1 HSR_SYSREG(3,0,c9,c14,1)
+#define HSR_SYSREG_PMINTENCLR_EL1 HSR_SYSREG(3,0,c9,c14,2)
 #define HSR_SYSREG_MAIR_EL1       HSR_SYSREG(3,0,c10,c2,0)
 #define HSR_SYSREG_AMAIR_EL1      HSR_SYSREG(3,0,c10,c3,0)
 #define HSR_SYSREG_CONTEXTIDR_EL1 HSR_SYSREG(3,0,c13,c0,1)
 
+#define HSR_SYSREG_PMCR_EL0       HSR_SYSREG(3,3,c9,c12,0)
+#define HSR_SYSREG_PMCNTENSET_EL0 HSR_SYSREG(3,3,c9,c12,1)
+#define HSR_SYSREG_PMCNTENCLR_EL0 HSR_SYSREG(3,3,c9,c12,2)
+#define HSR_SYSREG_PMOVSCLR_EL0   HSR_SYSREG(3,3,c9,c12,3)
+#define HSR_SYSREG_PMSWINC_EL0    HSR_SYSREG(3,3,c9,c12,4)
+#define HSR_SYSREG_PMSELR_EL0     HSR_SYSREG(3,3,c9,c12,5)
+#define HSR_SYSREG_PMCEID0_EL0    HSR_SYSREG(3,3,c9,c12,6)
+#define HSR_SYSREG_PMCEID1_EL0    HSR_SYSREG(3,3,c9,c12,7)
+
+#define HSR_SYSREG_PMCCNTR_EL0    HSR_SYSREG(3,3,c9,c13,0)
+#define HSR_SYSREG_PMXEVTYPER_EL0 HSR_SYSREG(3,3,c9,c13,1)
+#define HSR_SYSREG_PMXEVCNTR_EL0  HSR_SYSREG(3,3,c9,c13,2)
+
+#define HSR_SYSREG_PMUSERENR_EL0  HSR_SYSREG(3,3,c9,c14,0)
+#define HSR_SYSREG_PMOVSSET_EL0   HSR_SYSREG(3,3,c9,c14,3)
+
 #define HSR_SYSREG_CNTPCT_EL0     HSR_SYSREG(3,3,c14,c0,0)
 #define HSR_SYSREG_CNTP_CTL_EL0   HSR_SYSREG(3,3,c14,c2,1)
 #define HSR_SYSREG_CNTP_TVAL_EL0  HSR_SYSREG(3,3,c14,c2,0)
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 1cfaf94..526821f 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -71,6 +71,7 @@
 #define X86_FEATURE_TSC_RELIABLE (3*32+12) /* TSC is known to be reliable */
 #define X86_FEATURE_XTOPOLOGY    (3*32+13) /* cpu topology enum extensions */
 #define X86_FEATURE_CPUID_FAULTING (3*32+14) /* cpuid faulting */
+#define X86_FEATURE_CLFLUSH_MONITOR (3*32+15) /* clflush reqd with monitor */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index ccca5df..974bcc0 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -347,6 +347,19 @@ static inline int hvm_event_pending(struct vcpu *v)
     return hvm_funcs.event_pending(v);
 }
 
+static inline bool_t hvm_vcpu_has_smep(void)
+{
+    unsigned int eax, ebx;
+
+    hvm_cpuid(0, &eax, NULL, NULL, NULL);
+
+    if ( eax < 7 )
+        return 0;
+
+    hvm_cpuid(7, NULL, &ebx, NULL, NULL);
+    return !!(ebx & cpufeat_mask(X86_FEATURE_SMEP));
+}
+
 /* These reserved bits in lower 32 remain 0 after any load of CR0 */
 #define HVM_CR0_GUEST_RESERVED_BITS             \
     (~((unsigned long)                          \
@@ -360,18 +373,24 @@ static inline int hvm_event_pending(struct vcpu *v)
     (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))
 
 /* These bits in CR4 cannot be set by the guest. */
-#define HVM_CR4_GUEST_RESERVED_BITS(_v)                 \
+#define HVM_CR4_GUEST_RESERVED_BITS(v, restore) ({      \
+    const struct vcpu *_v = (v);                        \
+    bool_t _restore = !!(restore);                      \
+    ASSERT((_restore) || _v == current);                \
     (~((unsigned long)                                  \
        (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD |       \
         X86_CR4_DE  | X86_CR4_PSE | X86_CR4_PAE |       \
         X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE |       \
         X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT |           \
-        (cpu_has_smep ? X86_CR4_SMEP : 0) |             \
+        (((_restore) ? cpu_has_smep :                   \
+                       hvm_vcpu_has_smep()) ?           \
+         X86_CR4_SMEP : 0) |                            \
         (cpu_has_fsgsbase ? X86_CR4_FSGSBASE : 0) |     \
-        ((nestedhvm_enabled((_v)->domain) && cpu_has_vmx)\
-                      ? X86_CR4_VMXE : 0)  |             \
-        (cpu_has_pcid ? X86_CR4_PCIDE : 0) |             \
-        (cpu_has_xsave ? X86_CR4_OSXSAVE : 0))))
+        ((nestedhvm_enabled(_v->domain) && cpu_has_vmx) \
+                      ? X86_CR4_VMXE : 0)  |            \
+        (cpu_has_pcid ? X86_CR4_PCIDE : 0) |            \
+        (cpu_has_xsave ? X86_CR4_OSXSAVE : 0))));       \
+})
 
 /* These exceptions must always be intercepted. */
 #define HVM_TRAP_MASK ((1U << TRAP_machine_check) | (1U << TRAP_invalid_op))
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index ebaba5c..f0c5fa5 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -483,6 +483,8 @@ void virtual_vmcs_exit(void *vvmcs);
 u64 virtual_vmcs_vmread(void *vvmcs, u32 vmcs_encoding);
 void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val);
 
+DECLARE_PER_CPU(bool_t, vmxon);
+
 #endif /* ASM_X86_HVM_VMX_VMCS_H__ */
 
 /*
diff --git a/xen/include/asm-x86/hvm/vpt.h b/xen/include/asm-x86/hvm/vpt.h
index 87c3a66..41159d8 100644
--- a/xen/include/asm-x86/hvm/vpt.h
+++ b/xen/include/asm-x86/hvm/vpt.h
@@ -113,7 +113,8 @@ typedef struct RTCState {
     /* periodic timer */
     struct periodic_time pt;
     s_time_t start_time;
-    int pt_code;
+    s_time_t check_ticks_since;
+    int period;
     uint8_t pt_dead_ticks;
     uint32_t use_timer;
     spinlock_t lock;
@@ -175,7 +176,7 @@ void destroy_periodic_time(struct periodic_time *pt);
 int pv_pit_handler(int port, int data, int write);
 void pit_reset(struct domain *d);
 
-void pit_init(struct vcpu *v, unsigned long cpu_khz);
+void pit_init(struct domain *d, unsigned long cpu_khz);
 void pit_stop_channel0_irq(PITState * pit);
 void pit_deinit(struct domain *d);
 void rtc_init(struct domain *d);
@@ -183,7 +184,6 @@ void rtc_migrate_timers(struct vcpu *v);
 void rtc_deinit(struct domain *d);
 void rtc_reset(struct domain *d);
 void rtc_update_clock(struct domain *d);
-bool_t rtc_periodic_interrupt(void *);
 
 void pmtimer_init(struct vcpu *v);
 void pmtimer_deinit(struct domain *d);
diff --git a/xen/include/asm-x86/i387.h b/xen/include/asm-x86/i387.h
index 1f5fe50..fd268a9 100644
--- a/xen/include/asm-x86/i387.h
+++ b/xen/include/asm-x86/i387.h
@@ -38,6 +38,7 @@ struct ix87_state {
 void vcpu_restore_fpu_eager(struct vcpu *v);
 void vcpu_restore_fpu_lazy(struct vcpu *v);
 void vcpu_save_fpu(struct vcpu *v);
+void save_fpu_enable(void);
 
 int vcpu_init_fpu(struct vcpu *v);
 void vcpu_destroy_fpu(struct vcpu *v);
diff --git a/xen/include/asm-x86/mem_event.h b/xen/include/asm-x86/mem_event.h
index 045ef9b..ed4481a 100644
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -66,6 +66,9 @@ int do_mem_event_op(int op, uint32_t domain, void *arg);
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE_PARAM(void) u_domctl);
 
+void mem_event_vcpu_pause(struct vcpu *v);
+void mem_event_vcpu_unpause(struct vcpu *v);
+
 #endif /* __MEM_EVENT_H__ */
 
 
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index f4e7253..a2cb1b7 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -576,8 +576,8 @@ void p2m_mem_access_resume(struct domain *d);
 
 /* Set access type for a region of pfns.
  * If start_pfn == -1ul, sets the default access type */
-int p2m_set_mem_access(struct domain *d, unsigned long start_pfn, 
-                       uint32_t nr, hvmmem_access_t access);
+long p2m_set_mem_access(struct domain *d, unsigned long start_pfn,
+                        uint32_t nr, hvmmem_access_t access);
 
 /* Get access type for a pfn
  * If pfn == -1ul, gets the default access type */
diff --git a/xen/include/public/arch-arm.h b/xen/include/public/arch-arm.h
index 7496556..dd53c94 100644
--- a/xen/include/public/arch-arm.h
+++ b/xen/include/public/arch-arm.h
@@ -369,7 +369,8 @@ typedef uint64_t xen_callback_t;
 #define GUEST_GICC_BASE   0x2c002000ULL
 #define GUEST_GICC_SIZE   0x100ULL
 
-#define GUEST_RAM_BASE    0x80000000ULL
+#define GUEST_RAM_BASE    0x80000000ULL /* 768M @ 2GB */
+#define GUEST_RAM_SIZE    0x30000000ULL
 
 #define GUEST_GNTTAB_BASE 0xb0000000ULL
 #define GUEST_GNTTAB_SIZE 0x00020000ULL
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 8c5697e..a6a2092 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -541,13 +541,15 @@ DEFINE_XEN_GUEST_HANDLE(mmu_update_t);
 /*
  * ` enum neg_errnoval
  * ` HYPERVISOR_multicall(multicall_entry_t call_list[],
- * `                      unsigned int nr_calls);
+ * `                      uint32_t nr_calls);
  *
- * NB. The fields are natural register size for this architecture.
+ * NB. The fields are logically the natural register size for this
+ * architecture. In cases where xen_ulong_t is larger than this then
+ * any unused bits in the upper portion must be zero.
  */
 struct multicall_entry {
-    unsigned long op, result;
-    unsigned long args[6];
+    xen_ulong_t op, result;
+    xen_ulong_t args[6];
 };
 typedef struct multicall_entry multicall_entry_t;
 DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
diff --git a/xen/include/xen/acpi.h b/xen/include/xen/acpi.h
index aedec65..3aeba4a 100644
--- a/xen/include/xen/acpi.h
+++ b/xen/include/xen/acpi.h
@@ -61,6 +61,7 @@ int acpi_boot_init (void);
 int acpi_boot_table_init (void);
 int acpi_numa_init (void);
 int erst_init(void);
+void acpi_hest_init(void);
 
 int acpi_table_init (void);
 int acpi_table_parse(char *id, acpi_table_handler handler);
diff --git a/xen/include/xen/hvm/iommu.h b/xen/include/xen/hvm/iommu.h
index 26539e0..8c98274 100644
--- a/xen/include/xen/hvm/iommu.h
+++ b/xen/include/xen/hvm/iommu.h
@@ -29,12 +29,6 @@ struct g2m_ioport {
     unsigned int np;
 };
 
-struct mapped_rmrr {
-    struct list_head list;
-    u64 base;
-    u64 end;
-};
-
 struct hvm_iommu {
     u64 pgd_maddr;                 /* io page directory machine address */
     spinlock_t mapping_lock;       /* io page table lock */
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index cadb525..f3f080f 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -140,9 +140,12 @@ int pci_mmcfg_write(unsigned int seg, unsigned int bus,
 int pci_find_cap_offset(u16 seg, u8 bus, u8 dev, u8 func, u8 cap);
 int pci_find_next_cap(u16 seg, u8 bus, unsigned int devfn, u8 pos, int cap);
 int pci_find_ext_capability(int seg, int bus, int devfn, int cap);
+int pci_find_next_ext_capability(int seg, int bus, int devfn, int pos, int cap);
 const char *parse_pci(const char *, unsigned int *seg, unsigned int *bus,
                       unsigned int *dev, unsigned int *func);
 
+bool_t pcie_aer_get_firmware_first(const struct pci_dev *);
+
 struct pirq;
 int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable);
 void msixtbl_pt_unregister(struct domain *, struct pirq *);
diff --git a/xen/include/xen/pci_ids.h b/xen/include/xen/pci_ids.h
new file mode 100644
index 0000000..f5b1d94
--- /dev/null
+++ b/xen/include/xen/pci_ids.h
@@ -0,0 +1,9 @@
+#define PCI_VENDOR_ID_AMD                0x1022
+
+#define PCI_VENDOR_ID_NVIDIA             0x10de
+
+#define PCI_VENDOR_ID_OXSEMI             0x1415
+
+#define PCI_VENDOR_ID_BROADCOM           0x14e4
+
+#define PCI_VENDOR_ID_INTEL              0x8086
diff --git a/xen/include/xen/pci_regs.h b/xen/include/xen/pci_regs.h
index ad47054..ecd6124 100644
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -125,7 +125,7 @@
 #define  PCI_IO_RANGE_TYPE_16	0x00
 #define  PCI_IO_RANGE_TYPE_32	0x01
 #define  PCI_IO_RANGE_MASK	(~0x0fUL)
-#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register */
 #define PCI_MEMORY_BASE		0x20	/* Memory range behind */
 #define PCI_MEMORY_LIMIT	0x22
 #define  PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
@@ -152,6 +152,7 @@
 #define  PCI_BRIDGE_CTL_MASTER_ABORT	0x20  /* Report master aborts */
 #define  PCI_BRIDGE_CTL_BUS_RESET	0x40	/* Secondary bus reset */
 #define  PCI_BRIDGE_CTL_FAST_BACK	0x80	/* Fast Back2Back enabled on secondary interface */
+#define  PCI_BRIDGE_CTL_DTMR_SERR	0x800	/* SERR upon discard timer expiry */
 
 /* Header type 2 (CardBus bridges) */
 #define PCI_CB_CAPABILITY_LIST	0x14
@@ -431,6 +432,7 @@
 #define PCI_EXT_CAP_ID_VC	2
 #define PCI_EXT_CAP_ID_DSN	3
 #define PCI_EXT_CAP_ID_PWR	4
+#define PCI_EXT_CAP_ID_VNDR	11
 #define PCI_EXT_CAP_ID_ACS	13
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_ATS	15
@@ -459,6 +461,7 @@
 #define  PCI_ERR_COR_BAD_DLLP	0x00000080	/* Bad DLLP Status */
 #define  PCI_ERR_COR_REP_ROLL	0x00000100	/* REPLAY_NUM Rollover */
 #define  PCI_ERR_COR_REP_TIMER	0x00001000	/* Replay Timer Timeout */
+#define  PCI_ERR_COR_ADV_NFAT	0x00002000	/* Advisory Non-Fatal */
 #define PCI_ERR_COR_MASK	20	/* Correctable Error Mask */
 	/* Same bits as above */
 #define PCI_ERR_CAP		24	/* Advanced Error Capabilities */
@@ -510,6 +513,12 @@
 #define PCI_PWR_CAP		12	/* Capability */
 #define  PCI_PWR_CAP_BUDGET(x)	((x) & 1)	/* Included in system budget */
 
+/* Vendor-Specific (VSEC, PCI_EXT_CAP_ID_VNDR) */
+#define PCI_VNDR_HEADER		4	/* Vendor-Specific Header */
+#define  PCI_VNDR_HEADER_ID(x)	((x) & 0xffff)
+#define  PCI_VNDR_HEADER_REV(x)	(((x) >> 16) & 0xf)
+#define  PCI_VNDR_HEADER_LEN(x)	(((x) >> 20) & 0xfff)
+
 /*
  * Hypertransport sub capability types
  *
diff --git a/xen/include/xen/prefetch.h b/xen/include/xen/prefetch.h
index 8d7d3ff..ba73998 100644
--- a/xen/include/xen/prefetch.h
+++ b/xen/include/xen/prefetch.h
@@ -28,24 +28,17 @@
 	prefetchw(x)	- prefetches the cacheline at "x" for write
 	spin_lock_prefetch(x) - prefectches the spinlock *x for taking
 	
-	there is also PREFETCH_STRIDE which is the architecure-prefered 
+	there is also PREFETCH_STRIDE which is the architecture-preferred
 	"lookahead" size for prefetching streamed operations.
 	
 */
 
-/*
- *	These cannot be do{}while(0) macros. See the mental gymnastics in
- *	the loop macro.
- */
- 
 #ifndef ARCH_HAS_PREFETCH
-#define ARCH_HAS_PREFETCH
-static inline void prefetch(const void *x) {;}
+#define prefetch(x) __builtin_prefetch(x)
 #endif
 
 #ifndef ARCH_HAS_PREFETCHW
-#define ARCH_HAS_PREFETCHW
-static inline void prefetchw(const void *x) {;}
+#define prefetchw(x) __builtin_prefetch(x,1)
 #endif
 
 #ifndef ARCH_HAS_SPINLOCK_PREFETCH
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index fb8bd36..4418883 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -86,13 +86,13 @@ struct evtchn
             domid_t remote_domid;
         } unbound;     /* state == ECS_UNBOUND */
         struct {
-            u16            remote_port;
+            evtchn_port_t  remote_port;
             struct domain *remote_dom;
         } interdomain; /* state == ECS_INTERDOMAIN */
         struct {
-            u16            irq;
-            u16            next_port;
-            u16            prev_port;
+            u32            irq;
+            evtchn_port_t  next_port;
+            evtchn_port_t  prev_port;
         } pirq;        /* state == ECS_PIRQ */
         u16 virq;      /* state == ECS_VIRQ */
     } u;
@@ -189,8 +189,11 @@ struct vcpu
     unsigned long    pause_flags;
     atomic_t         pause_count;
 
+    /* VCPU paused for mem_event replies. */
+    atomic_t         mem_event_pause_count;
+
     /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
-    u16              virq_to_evtchn[NR_VIRQS];
+    evtchn_port_t    virq_to_evtchn[NR_VIRQS];
     spinlock_t       virq_lock;
 
     /* Bitmask of CPUs on which this VCPU may run. */
@@ -338,7 +341,7 @@ struct domain
     /* Is this guest dying (i.e., a zombie)? */
     enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
     /* Domain is paused by controller software? */
-    bool_t           is_paused_by_controller;
+    int              controller_pause_count;
     /* Domain's VCPUs are pinned 1:1 to physical CPUs? */
     bool_t           is_pinned;
 
@@ -742,8 +745,17 @@ void domain_pause(struct domain *d);
 void domain_pause_nosync(struct domain *d);
 void vcpu_unpause(struct vcpu *v);
 void domain_unpause(struct domain *d);
-void domain_pause_by_systemcontroller(struct domain *d);
-void domain_unpause_by_systemcontroller(struct domain *d);
+int domain_unpause_by_systemcontroller(struct domain *d);
+int __domain_pause_by_systemcontroller(struct domain *d,
+                                       void (*pause_fn)(struct domain *d));
+static inline int domain_pause_by_systemcontroller(struct domain *d)
+{
+    return __domain_pause_by_systemcontroller(d, domain_pause);
+}
+static inline int domain_pause_by_systemcontroller_nosync(struct domain *d)
+{
+    return __domain_pause_by_systemcontroller(d, domain_pause_nosync);
+}
 void cpu_init(void);
 
 struct scheduler;
diff --git a/xen/include/xen/trace.h b/xen/include/xen/trace.h
index 3b8a7b3..12966ea 100644
--- a/xen/include/xen/trace.h
+++ b/xen/include/xen/trace.h
@@ -45,7 +45,7 @@ static inline void trace_var(u32 event, int cycles, int extra,
 }
 
 void __trace_hypercall(uint32_t event, unsigned long op,
-                       const unsigned long *args);
+                       const xen_ulong_t *args);
 
 /* Convenience macros for calling the trace function. */
 #define TRACE_0D(_e)                            \

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-xen/xen.git



More information about the Pkg-xen-changes mailing list