[x86] Backport KVM nested VMX fixes from 3.3 to fix warnings and

crashes of L1 guests. svn path=/dists/trunk/linux-2.6/; revision=18603
2012-01-23 08:07:20 +00:00 · 2012-01-23 08:07:20 +00:00 · 943fad2464
parent d5302538cf
commit 943fad2464
4 changed files with 190 additions and 0 deletions
--- a/debian/changelog
+++ b/debian/changelog
@ -14,6 +14,10 @@ linux-2.6 (3.2.1-2) UNRELEASED; urgency=low
  * Build linux-libc-dev without multiarch if dpkg does not support it,
    to support backports

+  [ Aurelien Jarno ]
+  * [x86] Backport KVM nested VMX fixes from 3.3 to fix warnings and
+    crashes of L1 guests.
+
 -- Ben Hutchings <ben@decadent.org.uk>  Thu, 19 Jan 2012 05:42:57 +0000

 linux-2.6 (3.2.1-1) unstable; urgency=low
--- a/debian/patches/bugfix/x86/KVM-nVMX-Add-KVM_REQ_IMMEDIATE_EXIT.patch
+++ b/debian/patches/bugfix/x86/KVM-nVMX-Add-KVM_REQ_IMMEDIATE_EXIT.patch
@ -0,0 +1,111 @@
+From: Nadav Har'El <nyh@il.ibm.com>
+Date: Thu, 22 Sep 2011 13:52:56 +0300
+Subject: [PATCH 1/2] KVM: nVMX: Add KVM_REQ_IMMEDIATE_EXIT
+
+commit d6185f20a0efbf175e12831d0de330e4f21725aa upstream.
+
+This patch adds a new vcpu->requests bit, KVM_REQ_IMMEDIATE_EXIT.
+This bit requests that when next entering the guest, we should run it only
+for as little as possible, and exit again.
+
+We use this new option in nested VMX: When L1 launches L2, but L0 wishes L1
+to continue running so it can inject an event to it, we unfortunately cannot
+just pretend to have run L2 for a little while - We must really launch L2,
+otherwise certain one-off vmcs12 parameters (namely, L1 injection into L2)
+will be lost. So the existing code runs L2 in this case.
+But L2 could potentially run for a long time until it exits, and the
+injection into L1 will be delayed. The new KVM_REQ_IMMEDIATE_EXIT allows us
+to request that L2 will be entered, as necessary, but will exit as soon as
+possible after entry.
+
+Our implementation of this request uses smp_send_reschedule() to send a
+self-IPI, with interrupts disabled. The interrupts remain disabled until the
+guest is entered, and then, after the entry is complete (often including
+processing an injection and jumping to the relevant handler), the physical
+interrupt is noticed and causes an exit.
+
+On recent Intel processors, we could have achieved the same goal by using
+MTF instead of a self-IPI. Another technique worth considering in the future
+is to use VM_EXIT_ACK_INTR_ON_EXIT and a highest-priority vector IPI - to
+slightly improve performance by avoiding the useless interrupt handler
+which ends up being called when smp_send_reschedule() is used.
+
+Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+---
+ arch/x86/kvm/vmx.c       |   11 +++++++----
+ arch/x86/kvm/x86.c       |    7 ++++++-
+ include/linux/kvm_host.h |    1 +
+ 3 files changed, 14 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 579a0b5..d75d914 100644
+--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
+@@ -3945,12 +3945,15 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
+ static void enable_irq_window(struct kvm_vcpu *vcpu)
+ {
+ 	u32 cpu_based_vm_exec_control;
+-	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+-		/* We can get here when nested_run_pending caused
+-		 * vmx_interrupt_allowed() to return false. In this case, do
+-		 * nothing - the interrupt will be injected later.
+	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
+		/*
+		 * We get here if vmx_interrupt_allowed() said we can't
+		 * inject to L1 now because L2 must run. Ask L2 to exit
+		 * right after entry, so we can inject to L1 more promptly.
+ 		 */
+		kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
+ 		return;
+	}
+ 
+ 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ 	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 4c938da..e24edbc 100644
+--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
+@@ -5648,6 +5648,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
+ 	int r;
+ 	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+ 		vcpu->run->request_interrupt_window;
+	bool req_immediate_exit = 0;
+ 
+ 	if (vcpu->requests) {
+ 		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
+@@ -5687,7 +5688,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
+ 			record_steal_time(vcpu);
+ 		if (kvm_check_request(KVM_REQ_NMI, vcpu))
+ 			process_nmi(vcpu);
+-
+		req_immediate_exit =
+			kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
+ 	}
+ 
+ 	r = kvm_mmu_reload(vcpu);
+@@ -5738,6 +5740,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
+ 
+ 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ 
+	if (req_immediate_exit)
+		smp_send_reschedule(vcpu->cpu);
+
+ 	kvm_guest_enter();
+ 
+ 	if (unlikely(vcpu->arch.switch_db_regs)) {
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index d526231..9fedeb3 100644
+--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
+@@ -50,6 +50,7 @@
+ #define KVM_REQ_APF_HALT          12
+ #define KVM_REQ_STEAL_UPDATE      13
+ #define KVM_REQ_NMI               14
+#define KVM_REQ_IMMEDIATE_EXIT    15
+ 
+ #define KVM_USERSPACE_IRQ_SOURCE_ID	0
+ 
+-- 
+1.7.8.3
+
--- a/debian/patches/bugfix/x86/KVM-nVMX-Fix-warning-causing-idt-vectoring-info-beha.patch
+++ b/debian/patches/bugfix/x86/KVM-nVMX-Fix-warning-causing-idt-vectoring-info-beha.patch
@ -0,0 +1,73 @@
+From: Nadav Har'El <nyh@il.ibm.com>
+Date: Thu, 22 Sep 2011 13:53:26 +0300
+Subject: [PATCH 2/2] KVM: nVMX: Fix warning-causing idt-vectoring-info
+ behavior
+
+commit 51cfe38ea50aa631f58ed8c340ed6f0143c325a8 upstream.
+
+When L0 wishes to inject an interrupt while L2 is running, it emulates an exit
+to L1 with EXIT_REASON_EXTERNAL_INTERRUPT. This was explained in the original
+nVMX patch 23, titled "Correct handling of interrupt injection".
+
+Unfortunately, it is possible (though rare) that at this point there is valid
+idt_vectoring_info in vmcs02. For example, L1 injected some interrupt to L2,
+and when L2 tried to run this interrupt's handler, it got a page fault - so
+it returns the original interrupt vector in idt_vectoring_info. The problem
+is that if this is the case, we cannot exit to L1 with EXTERNAL_INTERRUPT
+like we wished to, because the VMX spec guarantees that idt_vectoring_info
+and exit_reason_external_interrupt can never happen together. This is not
+just specified in the spec - a KVM L1 actually prints a kernel warning
+"unexpected, valid vectoring info" if we violate this guarantee, and some
+users noticed these warnings in L1's logs.
+
+In order to better emulate a processor, which would never return the external
+interrupt and the idt-vectoring-info together, we need to separate the two
+injection steps: First, complete L1's injection into L2 (i.e., enter L2,
+injecting to it the idt-vectoring-info); Second, after entry into L2 succeeds
+and it exits back to L0, exit to L1 with the EXIT_REASON_EXTERNAL_INTERRUPT.
+Most of this is already in the code - the only change we need is to remain
+in L2 (and not exit to L1) in this case.
+
+Note that the previous patch ensures (by using KVM_REQ_IMMEDIATE_EXIT) that
+although we do enter L2 first, it will exit immediately after processing its
+injection, allowing us to promptly inject to L1.
+
+Note how we test vmcs12->idt_vectoring_info_field; This isn't really the
+vmcs12 value (we haven't exited to L1 yet, so vmcs12 hasn't been updated),
+but rather the place we save, at the end of vmx_vcpu_run, the vmcs02 value
+of this field. This was explained in patch 25 ("Correct handling of idt
+vectoring info") of the original nVMX patch series.
+
+Thanks to Dave Allan and to Federico Simoncelli for reporting this bug,
+to Abel Gordon for helping me figure out the solution, and to Avi Kivity
+for helping to improve it.
+
+Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+---
+ arch/x86/kvm/vmx.c |    7 ++++---
+ 1 files changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index d75d914..6e28d58 100644
+--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
+@@ -4080,11 +4080,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
+ {
+ 	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
+-		struct vmcs12 *vmcs12;
+-		if (to_vmx(vcpu)->nested.nested_run_pending)
+		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+		if (to_vmx(vcpu)->nested.nested_run_pending ||
+		    (vmcs12->idt_vectoring_info_field &
+		     VECTORING_INFO_VALID_MASK))
+ 			return 0;
+ 		nested_vmx_vmexit(vcpu);
+-		vmcs12 = get_vmcs12(vcpu);
+ 		vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
+ 		vmcs12->vm_exit_intr_info = 0;
+ 		/* fall through to normal code, but now in L1, not L2 */
+-- 
+1.7.8.3
+
--- a/debian/patches/series/2
+++ b/debian/patches/series/2
@ -0,0 +1,2 @@
+ bugfix/x86/KVM-nVMX-Add-KVM_REQ_IMMEDIATE_EXIT.patch
+ bugfix/x86/KVM-nVMX-Fix-warning-causing-idt-vectoring-info-beha.patch