diff --git a/debian/changelog b/debian/changelog index 333b8f62f..a28a8ca58 100644 --- a/debian/changelog +++ b/debian/changelog @@ -14,6 +14,10 @@ linux-2.6 (3.2.1-2) UNRELEASED; urgency=low * Build linux-libc-dev without multiarch if dpkg does not support it, to support backports + [ Aurelien Jarno ] + * [x86] Backport KVM nested VMX fixes from 3.3 to fix warnings and + crashes of L1 guests. + -- Ben Hutchings Thu, 19 Jan 2012 05:42:57 +0000 linux-2.6 (3.2.1-1) unstable; urgency=low diff --git a/debian/patches/bugfix/x86/KVM-nVMX-Add-KVM_REQ_IMMEDIATE_EXIT.patch b/debian/patches/bugfix/x86/KVM-nVMX-Add-KVM_REQ_IMMEDIATE_EXIT.patch new file mode 100644 index 000000000..4ef3426a0 --- /dev/null +++ b/debian/patches/bugfix/x86/KVM-nVMX-Add-KVM_REQ_IMMEDIATE_EXIT.patch @@ -0,0 +1,111 @@ +From: Nadav Har'El +Date: Thu, 22 Sep 2011 13:52:56 +0300 +Subject: [PATCH 1/2] KVM: nVMX: Add KVM_REQ_IMMEDIATE_EXIT + +commit d6185f20a0efbf175e12831d0de330e4f21725aa upstream. + +This patch adds a new vcpu->requests bit, KVM_REQ_IMMEDIATE_EXIT. +This bit requests that when next entering the guest, we should run it only +for as little as possible, and exit again. + +We use this new option in nested VMX: When L1 launches L2, but L0 wishes L1 +to continue running so it can inject an event to it, we unfortunately cannot +just pretend to have run L2 for a little while - We must really launch L2, +otherwise certain one-off vmcs12 parameters (namely, L1 injection into L2) +will be lost. So the existing code runs L2 in this case. +But L2 could potentially run for a long time until it exits, and the +injection into L1 will be delayed. The new KVM_REQ_IMMEDIATE_EXIT allows us +to request that L2 will be entered, as necessary, but will exit as soon as +possible after entry. + +Our implementation of this request uses smp_send_reschedule() to send a +self-IPI, with interrupts disabled. The interrupts remain disabled until the +guest is entered, and then, after the entry is complete (often including +processing an injection and jumping to the relevant handler), the physical +interrupt is noticed and causes an exit. + +On recent Intel processors, we could have achieved the same goal by using +MTF instead of a self-IPI. Another technique worth considering in the future +is to use VM_EXIT_ACK_INTR_ON_EXIT and a highest-priority vector IPI - to +slightly improve performance by avoiding the useless interrupt handler +which ends up being called when smp_send_reschedule() is used. + +Signed-off-by: Nadav Har'El +Signed-off-by: Avi Kivity +--- + arch/x86/kvm/vmx.c | 11 +++++++---- + arch/x86/kvm/x86.c | 7 ++++++- + include/linux/kvm_host.h | 1 + + 3 files changed, 14 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 579a0b5..d75d914 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -3945,12 +3945,15 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) + static void enable_irq_window(struct kvm_vcpu *vcpu) + { + u32 cpu_based_vm_exec_control; +- if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) +- /* We can get here when nested_run_pending caused +- * vmx_interrupt_allowed() to return false. In this case, do +- * nothing - the interrupt will be injected later. ++ if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { ++ /* ++ * We get here if vmx_interrupt_allowed() said we can't ++ * inject to L1 now because L2 must run. Ask L2 to exit ++ * right after entry, so we can inject to L1 more promptly. + */ ++ kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); + return; ++ } + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 4c938da..e24edbc 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -5648,6 +5648,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) + int r; + bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && + vcpu->run->request_interrupt_window; ++ bool req_immediate_exit = 0; + + if (vcpu->requests) { + if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) +@@ -5687,7 +5688,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) + record_steal_time(vcpu); + if (kvm_check_request(KVM_REQ_NMI, vcpu)) + process_nmi(vcpu); +- ++ req_immediate_exit = ++ kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); + } + + r = kvm_mmu_reload(vcpu); +@@ -5738,6 +5740,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) + + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + ++ if (req_immediate_exit) ++ smp_send_reschedule(vcpu->cpu); ++ + kvm_guest_enter(); + + if (unlikely(vcpu->arch.switch_db_regs)) { +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index d526231..9fedeb3 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -50,6 +50,7 @@ + #define KVM_REQ_APF_HALT 12 + #define KVM_REQ_STEAL_UPDATE 13 + #define KVM_REQ_NMI 14 ++#define KVM_REQ_IMMEDIATE_EXIT 15 + + #define KVM_USERSPACE_IRQ_SOURCE_ID 0 + +-- +1.7.8.3 + diff --git a/debian/patches/bugfix/x86/KVM-nVMX-Fix-warning-causing-idt-vectoring-info-beha.patch b/debian/patches/bugfix/x86/KVM-nVMX-Fix-warning-causing-idt-vectoring-info-beha.patch new file mode 100644 index 000000000..f714c789e --- /dev/null +++ b/debian/patches/bugfix/x86/KVM-nVMX-Fix-warning-causing-idt-vectoring-info-beha.patch @@ -0,0 +1,73 @@ +From: Nadav Har'El +Date: Thu, 22 Sep 2011 13:53:26 +0300 +Subject: [PATCH 2/2] KVM: nVMX: Fix warning-causing idt-vectoring-info + behavior + +commit 51cfe38ea50aa631f58ed8c340ed6f0143c325a8 upstream. + +When L0 wishes to inject an interrupt while L2 is running, it emulates an exit +to L1 with EXIT_REASON_EXTERNAL_INTERRUPT. This was explained in the original +nVMX patch 23, titled "Correct handling of interrupt injection". + +Unfortunately, it is possible (though rare) that at this point there is valid +idt_vectoring_info in vmcs02. For example, L1 injected some interrupt to L2, +and when L2 tried to run this interrupt's handler, it got a page fault - so +it returns the original interrupt vector in idt_vectoring_info. The problem +is that if this is the case, we cannot exit to L1 with EXTERNAL_INTERRUPT +like we wished to, because the VMX spec guarantees that idt_vectoring_info +and exit_reason_external_interrupt can never happen together. This is not +just specified in the spec - a KVM L1 actually prints a kernel warning +"unexpected, valid vectoring info" if we violate this guarantee, and some +users noticed these warnings in L1's logs. + +In order to better emulate a processor, which would never return the external +interrupt and the idt-vectoring-info together, we need to separate the two +injection steps: First, complete L1's injection into L2 (i.e., enter L2, +injecting to it the idt-vectoring-info); Second, after entry into L2 succeeds +and it exits back to L0, exit to L1 with the EXIT_REASON_EXTERNAL_INTERRUPT. +Most of this is already in the code - the only change we need is to remain +in L2 (and not exit to L1) in this case. + +Note that the previous patch ensures (by using KVM_REQ_IMMEDIATE_EXIT) that +although we do enter L2 first, it will exit immediately after processing its +injection, allowing us to promptly inject to L1. + +Note how we test vmcs12->idt_vectoring_info_field; This isn't really the +vmcs12 value (we haven't exited to L1 yet, so vmcs12 hasn't been updated), +but rather the place we save, at the end of vmx_vcpu_run, the vmcs02 value +of this field. This was explained in patch 25 ("Correct handling of idt +vectoring info") of the original nVMX patch series. + +Thanks to Dave Allan and to Federico Simoncelli for reporting this bug, +to Abel Gordon for helping me figure out the solution, and to Avi Kivity +for helping to improve it. + +Signed-off-by: Nadav Har'El +Signed-off-by: Avi Kivity +--- + arch/x86/kvm/vmx.c | 7 ++++--- + 1 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index d75d914..6e28d58 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -4080,11 +4080,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) + static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) + { + if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { +- struct vmcs12 *vmcs12; +- if (to_vmx(vcpu)->nested.nested_run_pending) ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); ++ if (to_vmx(vcpu)->nested.nested_run_pending || ++ (vmcs12->idt_vectoring_info_field & ++ VECTORING_INFO_VALID_MASK)) + return 0; + nested_vmx_vmexit(vcpu); +- vmcs12 = get_vmcs12(vcpu); + vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; + vmcs12->vm_exit_intr_info = 0; + /* fall through to normal code, but now in L1, not L2 */ +-- +1.7.8.3 + diff --git a/debian/patches/series/2 b/debian/patches/series/2 new file mode 100644 index 000000000..800ab07ab --- /dev/null +++ b/debian/patches/series/2 @@ -0,0 +1,2 @@ ++ bugfix/x86/KVM-nVMX-Add-KVM_REQ_IMMEDIATE_EXIT.patch ++ bugfix/x86/KVM-nVMX-Fix-warning-causing-idt-vectoring-info-beha.patch