[kernel] r21999 - in dists/sid/linux/debian: . patches patches/bugfix/x86

Ben Hutchings benh at moszumanska.debian.org
Fri Oct 31 02:41:18 UTC 2014


Author: benh
Date: Fri Oct 31 02:41:18 2014
New Revision: 21999

Log:
[x86] KVM: Various security fixes

Added:
   dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Check-non-canonical-addresses-upon-WRMSR.patch
   dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Emulator-fixes-for-eip-canonical-checks-on-n.patch
   dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Fix-wrong-masking-on-relative-jump-call.patch
   dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Handle-errors-when-RIP-is-set-during-far-jum.patch
   dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Improve-thread-safety-in-pit.patch
   dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Prevent-host-from-panicking-on-shared-MSR-wr.patch
   dists/sid/linux/debian/patches/bugfix/x86/kvm-vmx-handle-invvpid-vm-exit-gracefully.patch
Modified:
   dists/sid/linux/debian/changelog
   dists/sid/linux/debian/patches/series

Modified: dists/sid/linux/debian/changelog
==============================================================================
--- dists/sid/linux/debian/changelog	Fri Oct 31 02:28:46 2014	(r21998)
+++ dists/sid/linux/debian/changelog	Fri Oct 31 02:41:18 2014	(r21999)
@@ -132,6 +132,15 @@
   * drivers/net,ipv6: Fix virtio/IPv6 regression in 3.16:
     - drivers/net: Disable UFO through virtio
     - drivers/net,ipv6: Select IPv6 fragment idents for virtio UFO packets
+  * [x86] KVM: Check non-canonical addresses upon WRMSR (CVE-2014-3610)
+  * [x86] KVM: Prevent host from panicking on shared MSR writes.
+    (CVE-2014-3610)
+  * [x86] KVM: Improve thread safety in pit (CVE-2014-3611)
+  * [x86] kvm: vmx: handle invvpid vm exit gracefully (CVE-2014-3646)
+  * [x86] KVM: Fix wrong masking on relative jump/call
+  * [x86] KVM: Emulator fixes for eip canonical checks on near branches
+    (CVE-2014-3647)
+  * [x86] KVM: Handle errors when RIP is set during far jumps (CVE-2014-3647)
 
   [ Mauricio Faria de Oliveira ]
   * [ppc64el] Disable CONFIG_CMDLINE{,_BOOL} usage for setting consoles

Added: dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Check-non-canonical-addresses-upon-WRMSR.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Check-non-canonical-addresses-upon-WRMSR.patch	Fri Oct 31 02:41:18 2014	(r21999)
@@ -0,0 +1,135 @@
+From: Nadav Amit <namit at cs.technion.ac.il>
+Date: Tue, 16 Sep 2014 03:24:05 +0300
+Subject: KVM: x86: Check non-canonical addresses upon WRMSR
+Origin: https://git.kernel.org/linus/854e8bb1aa06c578c2c9145fa6bfe3680ef63b23
+
+Upon WRMSR, the CPU should inject #GP if a non-canonical value (address) is
+written to certain MSRs. The behavior is "almost" identical for AMD and Intel
+(ignoring MSRs that are not implemented in either architecture since they would
+anyhow #GP). However, IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
+non-canonical address is written on Intel but not on AMD (which ignores the top
+32-bits).
+
+Accordingly, this patch injects a #GP on the MSRs which behave identically on
+Intel and AMD.  To eliminate the differences between the architecutres, the
+value which is written to IA32_SYSENTER_ESP and IA32_SYSENTER_EIP is turned to
+canonical value before writing instead of injecting a #GP.
+
+Some references from Intel and AMD manuals:
+
+According to Intel SDM description of WRMSR instruction #GP is expected on
+WRMSR "If the source register contains a non-canonical address and ECX
+specifies one of the following MSRs: IA32_DS_AREA, IA32_FS_BASE, IA32_GS_BASE,
+IA32_KERNEL_GS_BASE, IA32_LSTAR, IA32_SYSENTER_EIP, IA32_SYSENTER_ESP."
+
+According to AMD manual instruction manual:
+LSTAR/CSTAR (SYSCALL): "The WRMSR instruction loads the target RIP into the
+LSTAR and CSTAR registers.  If an RIP written by WRMSR is not in canonical
+form, a general-protection exception (#GP) occurs."
+IA32_GS_BASE and IA32_FS_BASE (WRFSBASE/WRGSBASE): "The address written to the
+base field must be in canonical form or a #GP fault will occur."
+IA32_KERNEL_GS_BASE (SWAPGS): "The address stored in the KernelGSbase MSR must
+be in canonical form."
+
+This patch fixes CVE-2014-3610.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: Nadav Amit <namit at cs.technion.ac.il>
+Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
+---
+ arch/x86/include/asm/kvm_host.h | 14 ++++++++++++++
+ arch/x86/kvm/svm.c              |  2 +-
+ arch/x86/kvm/vmx.c              |  2 +-
+ arch/x86/kvm/x86.c              | 27 ++++++++++++++++++++++++++-
+ 4 files changed, 42 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct
+ 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+ }
+ 
++static inline u64 get_canonical(u64 la)
++{
++	return ((int64_t)la << 16) >> 16;
++}
++
++static inline bool is_noncanonical_address(u64 la)
++{
++#ifdef CONFIG_X86_64
++	return get_canonical(la) != la;
++#else
++	return false;
++#endif
++}
++
+ #define TSS_IOPB_BASE_OFFSET 0x66
+ #define TSS_BASE_SIZE 0x68
+ #define TSS_IOPB_SIZE (65536 / 8)
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -3228,7 +3228,7 @@ static int wrmsr_interception(struct vcp
+ 	msr.host_initiated = false;
+ 
+ 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
+-	if (svm_set_msr(&svm->vcpu, &msr)) {
++	if (kvm_set_msr(&svm->vcpu, &msr)) {
+ 		trace_kvm_msr_write_ex(ecx, data);
+ 		kvm_inject_gp(&svm->vcpu, 0);
+ 	} else {
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -5246,7 +5246,7 @@ static int handle_wrmsr(struct kvm_vcpu
+ 	msr.data = data;
+ 	msr.index = ecx;
+ 	msr.host_initiated = false;
+-	if (vmx_set_msr(vcpu, &msr) != 0) {
++	if (kvm_set_msr(vcpu, &msr) != 0) {
+ 		trace_kvm_msr_write_ex(ecx, data);
+ 		kvm_inject_gp(vcpu, 0);
+ 		return 1;
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -948,7 +948,6 @@ void kvm_enable_efer_bits(u64 mask)
+ }
+ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
+ 
+-
+ /*
+  * Writes msr value into into the appropriate "register".
+  * Returns 0 on success, non-0 otherwise.
+@@ -956,8 +955,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
+  */
+ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+ {
++	switch (msr->index) {
++	case MSR_FS_BASE:
++	case MSR_GS_BASE:
++	case MSR_KERNEL_GS_BASE:
++	case MSR_CSTAR:
++	case MSR_LSTAR:
++		if (is_noncanonical_address(msr->data))
++			return 1;
++		break;
++	case MSR_IA32_SYSENTER_EIP:
++	case MSR_IA32_SYSENTER_ESP:
++		/*
++		 * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
++		 * non-canonical address is written on Intel but not on
++		 * AMD (which ignores the top 32-bits, because it does
++		 * not implement 64-bit SYSENTER).
++		 *
++		 * 64-bit code should hence be able to write a non-canonical
++		 * value on AMD.  Making the address canonical ensures that
++		 * vmentry does not fail on Intel after writing a non-canonical
++		 * value, and that something deterministic happens if the guest
++		 * invokes 64-bit SYSENTER.
++		 */
++		msr->data = get_canonical(msr->data);
++	}
+ 	return kvm_x86_ops->set_msr(vcpu, msr);
+ }
++EXPORT_SYMBOL_GPL(kvm_set_msr);
+ 
+ /*
+  * Adapt set_msr() to msr_io()'s calling convention

Added: dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Emulator-fixes-for-eip-canonical-checks-on-n.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Emulator-fixes-for-eip-canonical-checks-on-n.patch	Fri Oct 31 02:41:18 2014	(r21999)
@@ -0,0 +1,231 @@
+From: Nadav Amit <namit at cs.technion.ac.il>
+Date: Thu, 18 Sep 2014 22:39:38 +0300
+Subject: KVM: x86: Emulator fixes for eip canonical checks on near branches
+Origin: https://git.kernel.org/linus/234f3ce485d54017f15cf5e0699cff4100121601
+
+Before changing rip (during jmp, call, ret, etc.) the target should be asserted
+to be canonical one, as real CPUs do.  During sysret, both target rsp and rip
+should be canonical. If any of these values is noncanonical, a #GP exception
+should occur.  The exception to this rule are syscall and sysenter instructions
+in which the assigned rip is checked during the assignment to the relevant
+MSRs.
+
+This patch fixes the emulator to behave as real CPUs do for near branches.
+Far branches are handled by the next patch.
+
+This fixes CVE-2014-3647.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: Nadav Amit <namit at cs.technion.ac.il>
+Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
+---
+ arch/x86/kvm/emulate.c | 78 ++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 54 insertions(+), 24 deletions(-)
+
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index 0476989..a1b9139 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -564,7 +564,8 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
+ 	return emulate_exception(ctxt, NM_VECTOR, 0, false);
+ }
+ 
+-static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
++static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
++			       int cs_l)
+ {
+ 	switch (ctxt->op_bytes) {
+ 	case 2:
+@@ -574,16 +575,25 @@ static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+ 		ctxt->_eip = (u32)dst;
+ 		break;
+ 	case 8:
++		if ((cs_l && is_noncanonical_address(dst)) ||
++		    (!cs_l && (dst & ~(u32)-1)))
++			return emulate_gp(ctxt, 0);
+ 		ctxt->_eip = dst;
+ 		break;
+ 	default:
+ 		WARN(1, "unsupported eip assignment size\n");
+ 	}
++	return X86EMUL_CONTINUE;
++}
++
++static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
++{
++	return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
+ }
+ 
+-static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
++static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+ {
+-	assign_eip_near(ctxt, ctxt->_eip + rel);
++	return assign_eip_near(ctxt, ctxt->_eip + rel);
+ }
+ 
+ static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
+@@ -1998,13 +2008,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
+ 	case 2: /* call near abs */ {
+ 		long int old_eip;
+ 		old_eip = ctxt->_eip;
+-		ctxt->_eip = ctxt->src.val;
++		rc = assign_eip_near(ctxt, ctxt->src.val);
++		if (rc != X86EMUL_CONTINUE)
++			break;
+ 		ctxt->src.val = old_eip;
+ 		rc = em_push(ctxt);
+ 		break;
+ 	}
+ 	case 4: /* jmp abs */
+-		ctxt->_eip = ctxt->src.val;
++		rc = assign_eip_near(ctxt, ctxt->src.val);
+ 		break;
+ 	case 5: /* jmp far */
+ 		rc = em_jmp_far(ctxt);
+@@ -2039,10 +2051,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
+ 
+ static int em_ret(struct x86_emulate_ctxt *ctxt)
+ {
+-	ctxt->dst.type = OP_REG;
+-	ctxt->dst.addr.reg = &ctxt->_eip;
+-	ctxt->dst.bytes = ctxt->op_bytes;
+-	return em_pop(ctxt);
++	int rc;
++	unsigned long eip;
++
++	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
++	if (rc != X86EMUL_CONTINUE)
++		return rc;
++
++	return assign_eip_near(ctxt, eip);
+ }
+ 
+ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
+@@ -2323,7 +2339,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
+ {
+ 	const struct x86_emulate_ops *ops = ctxt->ops;
+ 	struct desc_struct cs, ss;
+-	u64 msr_data;
++	u64 msr_data, rcx, rdx;
+ 	int usermode;
+ 	u16 cs_sel = 0, ss_sel = 0;
+ 
+@@ -2339,6 +2355,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
+ 	else
+ 		usermode = X86EMUL_MODE_PROT32;
+ 
++	rcx = reg_read(ctxt, VCPU_REGS_RCX);
++	rdx = reg_read(ctxt, VCPU_REGS_RDX);
++
+ 	cs.dpl = 3;
+ 	ss.dpl = 3;
+ 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
+@@ -2356,6 +2375,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
+ 		ss_sel = cs_sel + 8;
+ 		cs.d = 0;
+ 		cs.l = 1;
++		if (is_noncanonical_address(rcx) ||
++		    is_noncanonical_address(rdx))
++			return emulate_gp(ctxt, 0);
+ 		break;
+ 	}
+ 	cs_sel |= SELECTOR_RPL_MASK;
+@@ -2364,8 +2386,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
+ 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
+ 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
+ 
+-	ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
+-	*reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
++	ctxt->_eip = rdx;
++	*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
+ 
+ 	return X86EMUL_CONTINUE;
+ }
+@@ -2905,10 +2927,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
+ 
+ static int em_call(struct x86_emulate_ctxt *ctxt)
+ {
++	int rc;
+ 	long rel = ctxt->src.val;
+ 
+ 	ctxt->src.val = (unsigned long)ctxt->_eip;
+-	jmp_rel(ctxt, rel);
++	rc = jmp_rel(ctxt, rel);
++	if (rc != X86EMUL_CONTINUE)
++		return rc;
+ 	return em_push(ctxt);
+ }
+ 
+@@ -2940,11 +2965,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
+ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
+ {
+ 	int rc;
++	unsigned long eip;
+ 
+-	ctxt->dst.type = OP_REG;
+-	ctxt->dst.addr.reg = &ctxt->_eip;
+-	ctxt->dst.bytes = ctxt->op_bytes;
+-	rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
++	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
++	if (rc != X86EMUL_CONTINUE)
++		return rc;
++	rc = assign_eip_near(ctxt, eip);
+ 	if (rc != X86EMUL_CONTINUE)
+ 		return rc;
+ 	rsp_increment(ctxt, ctxt->src.val);
+@@ -3271,20 +3297,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
+ 
+ static int em_loop(struct x86_emulate_ctxt *ctxt)
+ {
++	int rc = X86EMUL_CONTINUE;
++
+ 	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
+ 	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
+ 	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
+-		jmp_rel(ctxt, ctxt->src.val);
++		rc = jmp_rel(ctxt, ctxt->src.val);
+ 
+-	return X86EMUL_CONTINUE;
++	return rc;
+ }
+ 
+ static int em_jcxz(struct x86_emulate_ctxt *ctxt)
+ {
++	int rc = X86EMUL_CONTINUE;
++
+ 	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
+-		jmp_rel(ctxt, ctxt->src.val);
++		rc = jmp_rel(ctxt, ctxt->src.val);
+ 
+-	return X86EMUL_CONTINUE;
++	return rc;
+ }
+ 
+ static int em_in(struct x86_emulate_ctxt *ctxt)
+@@ -4743,7 +4773,7 @@ special_insn:
+ 		break;
+ 	case 0x70 ... 0x7f: /* jcc (short) */
+ 		if (test_cc(ctxt->b, ctxt->eflags))
+-			jmp_rel(ctxt, ctxt->src.val);
++			rc = jmp_rel(ctxt, ctxt->src.val);
+ 		break;
+ 	case 0x8d: /* lea r16/r32, m */
+ 		ctxt->dst.val = ctxt->src.addr.mem.ea;
+@@ -4773,7 +4803,7 @@ special_insn:
+ 		break;
+ 	case 0xe9: /* jmp rel */
+ 	case 0xeb: /* jmp rel short */
+-		jmp_rel(ctxt, ctxt->src.val);
++		rc = jmp_rel(ctxt, ctxt->src.val);
+ 		ctxt->dst.type = OP_NONE; /* Disable writeback. */
+ 		break;
+ 	case 0xf4:              /* hlt */
+@@ -4898,7 +4928,7 @@ twobyte_insn:
+ 		break;
+ 	case 0x80 ... 0x8f: /* jnz rel, etc*/
+ 		if (test_cc(ctxt->b, ctxt->eflags))
+-			jmp_rel(ctxt, ctxt->src.val);
++			rc = jmp_rel(ctxt, ctxt->src.val);
+ 		break;
+ 	case 0x90 ... 0x9f:     /* setcc r/m8 */
+ 		ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);

Added: dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Fix-wrong-masking-on-relative-jump-call.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Fix-wrong-masking-on-relative-jump-call.patch	Fri Oct 31 02:41:18 2014	(r21999)
@@ -0,0 +1,60 @@
+From: Nadav Amit <namit at cs.technion.ac.il>
+Date: Thu, 18 Sep 2014 22:39:37 +0300
+Subject: KVM: x86: Fix wrong masking on relative jump/call
+Origin: https://git.kernel.org/linus/05c83ec9b73c8124555b706f6af777b10adf0862
+
+Relative jumps and calls do the masking according to the operand size, and not
+according to the address size as the KVM emulator does today.
+
+This patch fixes KVM behavior.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: Nadav Amit <namit at cs.technion.ac.il>
+Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
+---
+ arch/x86/kvm/emulate.c | 27 ++++++++++++++++++++++-----
+ 1 file changed, 22 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -499,11 +499,6 @@ static void rsp_increment(struct x86_emu
+ 	masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
+ }
+ 
+-static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+-{
+-	register_address_increment(ctxt, &ctxt->_eip, rel);
+-}
+-
+ static u32 desc_limit_scaled(struct desc_struct *desc)
+ {
+ 	u32 limit = get_desc_limit(desc);
+@@ -577,6 +572,28 @@ static int emulate_nm(struct x86_emulate
+ 	return emulate_exception(ctxt, NM_VECTOR, 0, false);
+ }
+ 
++static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
++{
++	switch (ctxt->op_bytes) {
++	case 2:
++		ctxt->_eip = (u16)dst;
++		break;
++	case 4:
++		ctxt->_eip = (u32)dst;
++		break;
++	case 8:
++		ctxt->_eip = dst;
++		break;
++	default:
++		WARN(1, "unsupported eip assignment size\n");
++	}
++}
++
++static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
++{
++	assign_eip_near(ctxt, ctxt->_eip + rel);
++}
++
+ static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
+ {
+ 	u16 selector;

Added: dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Handle-errors-when-RIP-is-set-during-far-jum.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Handle-errors-when-RIP-is-set-during-far-jum.patch	Fri Oct 31 02:41:18 2014	(r21999)
@@ -0,0 +1,246 @@
+From: Nadav Amit <namit at cs.technion.ac.il>
+Date: Thu, 18 Sep 2014 22:39:39 +0300
+Subject: KVM: x86: Handle errors when RIP is set during far jumps
+Origin: https://git.kernel.org/linus/d1442d85cc30ea75f7d399474ca738e0bc96f715
+
+Far jmp/call/ret may fault while loading a new RIP.  Currently KVM does not
+handle this case, and may result in failed vm-entry once the assignment is
+done.  The tricky part of doing so is that loading the new CS affects the
+VMCS/VMCB state, so if we fail during loading the new RIP, we are left in
+unconsistent state.  Therefore, this patch saves on 64-bit the old CS
+descriptor and restores it if loading RIP failed.
+
+This fixes CVE-2014-3647.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: Nadav Amit <namit at cs.technion.ac.il>
+Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
+[bwh: Backported to 3.16: Adjust context]
+---
+ arch/x86/kvm/emulate.c | 118 ++++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 88 insertions(+), 30 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -1429,7 +1429,9 @@ static int write_segment_descriptor(stru
+ 
+ /* Does not support long mode */
+ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+-				     u16 selector, int seg, u8 cpl, bool in_task_switch)
++				     u16 selector, int seg, u8 cpl,
++				     bool in_task_switch,
++				     struct desc_struct *desc)
+ {
+ 	struct desc_struct seg_desc, old_desc;
+ 	u8 dpl, rpl;
+@@ -1558,6 +1560,8 @@ static int __load_segment_descriptor(str
+ 	}
+ load:
+ 	ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg);
++	if (desc)
++		*desc = seg_desc;
+ 	return X86EMUL_CONTINUE;
+ exception:
+ 	emulate_exception(ctxt, err_vec, err_code, true);
+@@ -1568,7 +1572,7 @@ static int load_segment_descriptor(struc
+ 				   u16 selector, int seg)
+ {
+ 	u8 cpl = ctxt->ops->cpl(ctxt);
+-	return __load_segment_descriptor(ctxt, selector, seg, cpl, false);
++	return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
+ }
+ 
+ static void write_register_operand(struct operand *op)
+@@ -1965,17 +1969,31 @@ static int em_iret(struct x86_emulate_ct
+ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
+ {
+ 	int rc;
+-	unsigned short sel;
++	unsigned short sel, old_sel;
++	struct desc_struct old_desc, new_desc;
++	const struct x86_emulate_ops *ops = ctxt->ops;
++	u8 cpl = ctxt->ops->cpl(ctxt);
++
++	/* Assignment of RIP may only fail in 64-bit mode */
++	if (ctxt->mode == X86EMUL_MODE_PROT64)
++		ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
++				 VCPU_SREG_CS);
+ 
+ 	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
+ 
+-	rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS);
++	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
++				       &new_desc);
+ 	if (rc != X86EMUL_CONTINUE)
+ 		return rc;
+ 
+-	ctxt->_eip = 0;
+-	memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
+-	return X86EMUL_CONTINUE;
++	rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
++	if (rc != X86EMUL_CONTINUE) {
++		WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
++		/* assigning eip failed; restore the old cs */
++		ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
++		return rc;
++	}
++	return rc;
+ }
+ 
+ static int em_grp45(struct x86_emulate_ctxt *ctxt)
+@@ -2033,21 +2051,34 @@ static int em_ret(struct x86_emulate_ctx
+ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
+ {
+ 	int rc;
+-	unsigned long cs;
++	unsigned long eip, cs;
++	u16 old_cs;
+ 	int cpl = ctxt->ops->cpl(ctxt);
++	struct desc_struct old_desc, new_desc;
++	const struct x86_emulate_ops *ops = ctxt->ops;
+ 
+-	rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
++	if (ctxt->mode == X86EMUL_MODE_PROT64)
++		ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
++				 VCPU_SREG_CS);
++
++	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
+ 	if (rc != X86EMUL_CONTINUE)
+ 		return rc;
+-	if (ctxt->op_bytes == 4)
+-		ctxt->_eip = (u32)ctxt->_eip;
+ 	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
+ 	if (rc != X86EMUL_CONTINUE)
+ 		return rc;
+ 	/* Outer-privilege level return is not implemented */
+ 	if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
+ 		return X86EMUL_UNHANDLEABLE;
+-	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
++	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
++				       &new_desc);
++	if (rc != X86EMUL_CONTINUE)
++		return rc;
++	rc = assign_eip_far(ctxt, eip, new_desc.l);
++	if (rc != X86EMUL_CONTINUE) {
++		WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
++		ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
++	}
+ 	return rc;
+ }
+ 
+@@ -2465,19 +2496,24 @@ static int load_state_from_tss16(struct
+ 	 * Now load segment descriptors. If fault happens at this stage
+ 	 * it is handled in a context of new task
+ 	 */
+-	ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true);
++	ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
++					true, NULL);
+ 	if (ret != X86EMUL_CONTINUE)
+ 		return ret;
+-	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
++	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
++					true, NULL);
+ 	if (ret != X86EMUL_CONTINUE)
+ 		return ret;
+-	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
++	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
++					true, NULL);
+ 	if (ret != X86EMUL_CONTINUE)
+ 		return ret;
+-	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
++	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
++					true, NULL);
+ 	if (ret != X86EMUL_CONTINUE)
+ 		return ret;
+-	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
++	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
++					true, NULL);
+ 	if (ret != X86EMUL_CONTINUE)
+ 		return ret;
+ 
+@@ -2602,25 +2638,32 @@ static int load_state_from_tss32(struct
+ 	 * Now load segment descriptors. If fault happenes at this stage
+ 	 * it is handled in a context of new task
+ 	 */
+-	ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true);
++	ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
++					cpl, true, NULL);
+ 	if (ret != X86EMUL_CONTINUE)
+ 		return ret;
+-	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true);
++	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
++					true, NULL);
+ 	if (ret != X86EMUL_CONTINUE)
+ 		return ret;
+-	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true);
++	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
++					true, NULL);
+ 	if (ret != X86EMUL_CONTINUE)
+ 		return ret;
+-	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true);
++	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
++					true, NULL);
+ 	if (ret != X86EMUL_CONTINUE)
+ 		return ret;
+-	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true);
++	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
++					true, NULL);
+ 	if (ret != X86EMUL_CONTINUE)
+ 		return ret;
+-	ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true);
++	ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
++					true, NULL);
+ 	if (ret != X86EMUL_CONTINUE)
+ 		return ret;
+-	ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true);
++	ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
++					true, NULL);
+ 	if (ret != X86EMUL_CONTINUE)
+ 		return ret;
+ 
+@@ -2900,24 +2943,39 @@ static int em_call_far(struct x86_emulat
+ 	u16 sel, old_cs;
+ 	ulong old_eip;
+ 	int rc;
++	struct desc_struct old_desc, new_desc;
++	const struct x86_emulate_ops *ops = ctxt->ops;
++	int cpl = ctxt->ops->cpl(ctxt);
+ 
+-	old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
+ 	old_eip = ctxt->_eip;
++	ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
+ 
+ 	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
+-	if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS))
++	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
++				       &new_desc);
++	if (rc != X86EMUL_CONTINUE)
+ 		return X86EMUL_CONTINUE;
+ 
+-	ctxt->_eip = 0;
+-	memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
++	rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
++	if (rc != X86EMUL_CONTINUE)
++		goto fail;
+ 
+ 	ctxt->src.val = old_cs;
+ 	rc = em_push(ctxt);
+ 	if (rc != X86EMUL_CONTINUE)
+-		return rc;
++		goto fail;
+ 
+ 	ctxt->src.val = old_eip;
+-	return em_push(ctxt);
++	rc = em_push(ctxt);
++	/* If we failed, we tainted the memory, but the very least we should
++	   restore cs */
++	if (rc != X86EMUL_CONTINUE)
++		goto fail;
++	return rc;
++fail:
++	ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
++	return rc;
++
+ }
+ 
+ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)

Added: dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Improve-thread-safety-in-pit.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Improve-thread-safety-in-pit.patch	Fri Oct 31 02:41:18 2014	(r21999)
@@ -0,0 +1,34 @@
+From: Andy Honig <ahonig at google.com>
+Date: Wed, 27 Aug 2014 14:42:54 -0700
+Subject: KVM: x86: Improve thread safety in pit
+Origin: https://git.kernel.org/linus/2febc839133280d5a5e8e1179c94ea674489dae2
+
+There's a race condition in the PIT emulation code in KVM.  In
+__kvm_migrate_pit_timer the pit_timer object is accessed without
+synchronization.  If the race condition occurs at the wrong time this
+can crash the host kernel.
+
+This fixes CVE-2014-3611.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: Andrew Honig <ahonig at google.com>
+Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
+---
+ arch/x86/kvm/i8254.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
+index 518d864..298781d 100644
+--- a/arch/x86/kvm/i8254.c
++++ b/arch/x86/kvm/i8254.c
+@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
+ 		return;
+ 
+ 	timer = &pit->pit_state.timer;
++	mutex_lock(&pit->pit_state.lock);
+ 	if (hrtimer_cancel(timer))
+ 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
++	mutex_unlock(&pit->pit_state.lock);
+ }
+ 
+ static void destroy_pit_timer(struct kvm_pit *pit)

Added: dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Prevent-host-from-panicking-on-shared-MSR-wr.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux/debian/patches/bugfix/x86/KVM-x86-Prevent-host-from-panicking-on-shared-MSR-wr.patch	Fri Oct 31 02:41:18 2014	(r21999)
@@ -0,0 +1,81 @@
+From: Andy Honig <ahonig at google.com>
+Date: Wed, 27 Aug 2014 11:16:44 -0700
+Subject: KVM: x86: Prevent host from panicking on shared MSR writes.
+Origin: https://git.kernel.org/linus/8b3c3104c3f4f706e99365c3e0d2aa61b95f969f
+
+The previous patch blocked invalid writes directly when the MSR
+is written.  As a precaution, prevent future similar mistakes by
+gracefulling handle GPs caused by writes to shared MSRs.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: Andrew Honig <ahonig at google.com>
+[Remove parts obsoleted by Nadav's patch. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
+---
+ arch/x86/include/asm/kvm_host.h |  2 +-
+ arch/x86/kvm/vmx.c              |  7 +++++--
+ arch/x86/kvm/x86.c              | 11 ++++++++---
+ 3 files changed, 14 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1061,7 +1061,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcp
+ void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+ 
+ void kvm_define_shared_msr(unsigned index, u32 msr);
+-void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
++int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+ 
+ bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
+ 
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2615,12 +2615,15 @@ static int vmx_set_msr(struct kvm_vcpu *
+ 	default:
+ 		msr = find_msr_entry(vmx, msr_index);
+ 		if (msr) {
++			u64 old_msr_data = msr->data;
+ 			msr->data = data;
+ 			if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
+ 				preempt_disable();
+-				kvm_set_shared_msr(msr->index, msr->data,
+-						   msr->mask);
++				ret = kvm_set_shared_msr(msr->index, msr->data,
++							 msr->mask);
+ 				preempt_enable();
++				if (ret)
++					msr->data = old_msr_data;
+ 			}
+ 			break;
+ 		}
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -227,20 +227,25 @@ static void kvm_shared_msr_cpu_online(vo
+ 		shared_msr_update(i, shared_msrs_global.msrs[i]);
+ }
+ 
+-void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
++int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
+ {
+ 	unsigned int cpu = smp_processor_id();
+ 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
++	int err;
+ 
+ 	if (((value ^ smsr->values[slot].curr) & mask) == 0)
+-		return;
++		return 0;
+ 	smsr->values[slot].curr = value;
+-	wrmsrl(shared_msrs_global.msrs[slot], value);
++	err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
++	if (err)
++		return 1;
++
+ 	if (!smsr->registered) {
+ 		smsr->urn.on_user_return = kvm_on_user_return;
+ 		user_return_notifier_register(&smsr->urn);
+ 		smsr->registered = true;
+ 	}
++	return 0;
+ }
+ EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
+ 

Added: dists/sid/linux/debian/patches/bugfix/x86/kvm-vmx-handle-invvpid-vm-exit-gracefully.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux/debian/patches/bugfix/x86/kvm-vmx-handle-invvpid-vm-exit-gracefully.patch	Fri Oct 31 02:41:18 2014	(r21999)
@@ -0,0 +1,72 @@
+From: Petr Matousek <pmatouse at redhat.com>
+Date: Tue, 23 Sep 2014 20:22:30 +0200
+Subject: kvm: vmx: handle invvpid vm exit gracefully
+Origin: https://git.kernel.org/linus/a642fc305053cc1c6e47e4f4df327895747ab485
+
+On systems with invvpid instruction support (corresponding bit in
+IA32_VMX_EPT_VPID_CAP MSR is set) guest invocation of invvpid
+causes vm exit, which is currently not handled and results in
+propagation of unknown exit to userspace.
+
+Fix this by installing an invvpid vm exit handler.
+
+This is CVE-2014-3646.
+
+Cc: stable at vger.kernel.org
+Signed-off-by: Petr Matousek <pmatouse at redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
+---
+ arch/x86/include/uapi/asm/vmx.h | 2 ++
+ arch/x86/kvm/vmx.c              | 9 ++++++++-
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/uapi/asm/vmx.h
++++ b/arch/x86/include/uapi/asm/vmx.h
+@@ -67,6 +67,7 @@
+ #define EXIT_REASON_EPT_MISCONFIG       49
+ #define EXIT_REASON_INVEPT              50
+ #define EXIT_REASON_PREEMPTION_TIMER    52
++#define EXIT_REASON_INVVPID             53
+ #define EXIT_REASON_WBINVD              54
+ #define EXIT_REASON_XSETBV              55
+ #define EXIT_REASON_APIC_WRITE          56
+@@ -114,6 +115,7 @@
+ 	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
+ 	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
+ 	{ EXIT_REASON_INVD,                  "INVD" }, \
++	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
+ 	{ EXIT_REASON_INVPCID,               "INVPCID" }
+ 
+ #endif /* _UAPIVMX_H */
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -6618,6 +6618,12 @@ static int handle_invept(struct kvm_vcpu
+ 	return 1;
+ }
+ 
++static int handle_invvpid(struct kvm_vcpu *vcpu)
++{
++	kvm_queue_exception(vcpu, UD_VECTOR);
++	return 1;
++}
++
+ /*
+  * The exit handlers return 1 if the exit was handled fully and guest execution
+  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
+@@ -6663,6 +6669,7 @@ static int (*const kvm_vmx_exit_handlers
+ 	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_mwait,
+ 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
+ 	[EXIT_REASON_INVEPT]                  = handle_invept,
++	[EXIT_REASON_INVVPID]                 = handle_invvpid,
+ };
+ 
+ static const int kvm_vmx_max_exit_handlers =
+@@ -6896,7 +6903,7 @@ static bool nested_vmx_exit_handled(stru
+ 	case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
+ 	case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
+ 	case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+-	case EXIT_REASON_INVEPT:
++	case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
+ 		/*
+ 		 * VMX instructions trap unconditionally. This allows L1 to
+ 		 * emulate them for its L2 guest, i.e., allows 3-level nesting!

Modified: dists/sid/linux/debian/patches/series
==============================================================================
--- dists/sid/linux/debian/patches/series	Fri Oct 31 02:28:46 2014	(r21998)
+++ dists/sid/linux/debian/patches/series	Fri Oct 31 02:41:18 2014	(r21999)
@@ -406,3 +406,10 @@
 features/all/wireless-rt2x00-add-new-rt2800usb-device.patch
 bugfix/all/drivers-net-Disable-UFO-through-virtio.patch
 bugfix/all/drivers-net-ipv6-Select-IPv6-fragment-idents-for-vir.patch
+bugfix/x86/KVM-x86-Check-non-canonical-addresses-upon-WRMSR.patch
+bugfix/x86/KVM-x86-Prevent-host-from-panicking-on-shared-MSR-wr.patch
+bugfix/x86/KVM-x86-Improve-thread-safety-in-pit.patch
+bugfix/x86/KVM-x86-Fix-wrong-masking-on-relative-jump-call.patch
+bugfix/x86/kvm-vmx-handle-invvpid-vm-exit-gracefully.patch
+bugfix/x86/KVM-x86-Handle-errors-when-RIP-is-set-during-far-jum.patch
+bugfix/x86/KVM-x86-Emulator-fixes-for-eip-canonical-checks-on-n.patch



More information about the Kernel-svn-changes mailing list