diff --git a/debian/changelog b/debian/changelog index 380f1f3b3..d296aa710 100644 --- a/debian/changelog +++ b/debian/changelog @@ -22,10 +22,6 @@ linux-2.6 (3.1.5-1) UNRELEASED; urgency=low PCH_GBE, PCH_PHUB, SERIAL_PCH_UART, SPI_TOPCLIFF_PCH, USB_GADGET, USB_EG20T as modules - [ Aurelien Jarno ] - * [x86] Fix issues in KVM nVMX implementation by backporting changes - from 3.2. - -- Bastian Blank Thu, 01 Dec 2011 13:17:34 +0100 linux-2.6 (3.1.4-1) unstable; urgency=low diff --git a/debian/patches/bugfix/x86/KVM-nVMX-Add-KVM_REQ_IMMEDIATE_EXIT.patch b/debian/patches/bugfix/x86/KVM-nVMX-Add-KVM_REQ_IMMEDIATE_EXIT.patch deleted file mode 100644 index 4ca617264..000000000 --- a/debian/patches/bugfix/x86/KVM-nVMX-Add-KVM_REQ_IMMEDIATE_EXIT.patch +++ /dev/null @@ -1,103 +0,0 @@ -commit a3c6d93b2c5b8724e46ee3335f65bfc75c675090 -Author: Nadav Har'El -Date: Thu Sep 22 13:52:56 2011 +0300 - - KVM: nVMX: Add KVM_REQ_IMMEDIATE_EXIT - - This patch adds a new vcpu->requests bit, KVM_REQ_IMMEDIATE_EXIT. - This bit requests that when next entering the guest, we should run it only - for as little as possible, and exit again. - - We use this new option in nested VMX: When L1 launches L2, but L0 wishes L1 - to continue running so it can inject an event to it, we unfortunately cannot - just pretend to have run L2 for a little while - We must really launch L2, - otherwise certain one-off vmcs12 parameters (namely, L1 injection into L2) - will be lost. So the existing code runs L2 in this case. - But L2 could potentially run for a long time until it exits, and the - injection into L1 will be delayed. The new KVM_REQ_IMMEDIATE_EXIT allows us - to request that L2 will be entered, as necessary, but will exit as soon as - possible after entry. - - Our implementation of this request uses smp_send_reschedule() to send a - self-IPI, with interrupts disabled. The interrupts remain disabled until the - guest is entered, and then, after the entry is complete (often including - processing an injection and jumping to the relevant handler), the physical - interrupt is noticed and causes an exit. - - On recent Intel processors, we could have achieved the same goal by using - MTF instead of a self-IPI. Another technique worth considering in the future - is to use VM_EXIT_ACK_INTR_ON_EXIT and a highest-priority vector IPI - to - slightly improve performance by avoiding the useless interrupt handler - which ends up being called when smp_send_reschedule() is used. - - Signed-off-by: Nadav Har'El - Signed-off-by: Avi Kivity - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index a0d6bd9..4693d54 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -3858,12 +3858,15 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) - static void enable_irq_window(struct kvm_vcpu *vcpu) - { - u32 cpu_based_vm_exec_control; -- if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) -- /* We can get here when nested_run_pending caused -- * vmx_interrupt_allowed() to return false. In this case, do -- * nothing - the interrupt will be injected later. -+ if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { -+ /* -+ * We get here if vmx_interrupt_allowed() said we can't -+ * inject to L1 now because L2 must run. Ask L2 to exit -+ * right after entry, so we can inject to L1 more promptly. - */ -+ kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); - return; -+ } - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 41234ff..83b839f 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -5559,6 +5559,7 @@ static int vcpu_enter_guest(struct kvm_v - bool nmi_pending; - bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && - vcpu->run->request_interrupt_window; -+ bool req_immediate_exit = 0; - - if (vcpu->requests) { - if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) -@@ -5596,7 +5597,8 @@ static int vcpu_enter_guest(struct kvm_v - } - if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) - record_steal_time(vcpu); -- -+ req_immediate_exit = -+ kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); - } - - r = kvm_mmu_reload(vcpu); -@@ -5655,6 +5657,9 @@ static int vcpu_enter_guest(struct kvm_v - - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - -+ if (req_immediate_exit) -+ smp_send_reschedule(vcpu->cpu); -+ - kvm_guest_enter(); - - if (unlikely(vcpu->arch.switch_db_regs)) { -diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h -index d526231..9fedeb3 100644 ---- a/include/linux/kvm_host.h -+++ b/include/linux/kvm_host.h -@@ -50,6 +50,7 @@ - #define KVM_REQ_EVENT 11 - #define KVM_REQ_APF_HALT 12 - #define KVM_REQ_STEAL_UPDATE 13 -+#define KVM_REQ_IMMEDIATE_EXIT 15 - - #define KVM_USERSPACE_IRQ_SOURCE_ID 0 - diff --git a/debian/patches/bugfix/x86/KVM-nVMX-Fix-nested-VMX-TSC-emulation.patch b/debian/patches/bugfix/x86/KVM-nVMX-Fix-nested-VMX-TSC-emulation.patch deleted file mode 100644 index 1113f3ae3..000000000 --- a/debian/patches/bugfix/x86/KVM-nVMX-Fix-nested-VMX-TSC-emulation.patch +++ /dev/null @@ -1,77 +0,0 @@ -commit 27fc51b21cea3386a6672699631975d1097f9d39 -Author: Nadav Har'El -Date: Tue Aug 2 15:54:52 2011 +0300 - - KVM: nVMX: Fix nested VMX TSC emulation - - This patch fixes two corner cases in nested (L2) handling of TSC-related - issues: - - 1. Somewhat suprisingly, according to the Intel spec, if L1 allows WRMSR to - the TSC MSR without an exit, then this should set L1's TSC value itself - not - offset by vmcs12.TSC_OFFSET (like was wrongly done in the previous code). - - 2. Allow L1 to disable the TSC_OFFSETING control, and then correctly ignore - the vmcs12.TSC_OFFSET. - - Signed-off-by: Nadav Har'El - Signed-off-by: Avi Kivity - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 97b6454..5e8d411 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -1777,15 +1777,23 @@ static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) - */ - static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) - { -- vmcs_write64(TSC_OFFSET, offset); -- if (is_guest_mode(vcpu)) -+ if (is_guest_mode(vcpu)) { - /* -- * We're here if L1 chose not to trap the TSC MSR. Since -- * prepare_vmcs12() does not copy tsc_offset, we need to also -- * set the vmcs12 field here. -+ * We're here if L1 chose not to trap WRMSR to TSC. According -+ * to the spec, this should set L1's TSC; The offset that L1 -+ * set for L2 remains unchanged, and still needs to be added -+ * to the newly set TSC to get L2's TSC. - */ -- get_vmcs12(vcpu)->tsc_offset = offset - -- to_vmx(vcpu)->nested.vmcs01_tsc_offset; -+ struct vmcs12 *vmcs12; -+ to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset; -+ /* recalculate vmcs02.TSC_OFFSET: */ -+ vmcs12 = get_vmcs12(vcpu); -+ vmcs_write64(TSC_OFFSET, offset + -+ (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ? -+ vmcs12->tsc_offset : 0)); -+ } else { -+ vmcs_write64(TSC_OFFSET, offset); -+ } - } - - static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) -@@ -6485,8 +6493,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) - - set_cr4_guest_host_mask(vmx); - -- vmcs_write64(TSC_OFFSET, -- vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); -+ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) -+ vmcs_write64(TSC_OFFSET, -+ vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); -+ else -+ vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); - - if (enable_vpid) { - /* -@@ -6893,7 +6904,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) - - load_vmcs12_host_state(vcpu, vmcs12); - -- /* Update TSC_OFFSET if vmx_adjust_tsc_offset() was used while L2 ran */ -+ /* Update TSC_OFFSET if TSC was changed while L2 ran */ - vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); - - /* This is needed for same reason as it was needed in prepare_vmcs02 */ diff --git a/debian/patches/bugfix/x86/KVM-nVMX-Fix-warning-causing-idt-vectoring-info-behavior.patch b/debian/patches/bugfix/x86/KVM-nVMX-Fix-warning-causing-idt-vectoring-info-behavior.patch deleted file mode 100644 index d9125eca4..000000000 --- a/debian/patches/bugfix/x86/KVM-nVMX-Fix-warning-causing-idt-vectoring-info-behavior.patch +++ /dev/null @@ -1,66 +0,0 @@ -commit ea926dcf58278fa05f1873491ad298e67951c80f -Author: Nadav Har'El -Date: Thu Sep 22 13:53:26 2011 +0300 - - KVM: nVMX: Fix warning-causing idt-vectoring-info behavior - - When L0 wishes to inject an interrupt while L2 is running, it emulates an exit - to L1 with EXIT_REASON_EXTERNAL_INTERRUPT. This was explained in the original - nVMX patch 23, titled "Correct handling of interrupt injection". - - Unfortunately, it is possible (though rare) that at this point there is valid - idt_vectoring_info in vmcs02. For example, L1 injected some interrupt to L2, - and when L2 tried to run this interrupt's handler, it got a page fault - so - it returns the original interrupt vector in idt_vectoring_info. The problem - is that if this is the case, we cannot exit to L1 with EXTERNAL_INTERRUPT - like we wished to, because the VMX spec guarantees that idt_vectoring_info - and exit_reason_external_interrupt can never happen together. This is not - just specified in the spec - a KVM L1 actually prints a kernel warning - "unexpected, valid vectoring info" if we violate this guarantee, and some - users noticed these warnings in L1's logs. - - In order to better emulate a processor, which would never return the external - interrupt and the idt-vectoring-info together, we need to separate the two - injection steps: First, complete L1's injection into L2 (i.e., enter L2, - injecting to it the idt-vectoring-info); Second, after entry into L2 succeeds - and it exits back to L0, exit to L1 with the EXIT_REASON_EXTERNAL_INTERRUPT. - Most of this is already in the code - the only change we need is to remain - in L2 (and not exit to L1) in this case. - - Note that the previous patch ensures (by using KVM_REQ_IMMEDIATE_EXIT) that - although we do enter L2 first, it will exit immediately after processing its - injection, allowing us to promptly inject to L1. - - Note how we test vmcs12->idt_vectoring_info_field; This isn't really the - vmcs12 value (we haven't exited to L1 yet, so vmcs12 hasn't been updated), - but rather the place we save, at the end of vmx_vcpu_run, the vmcs02 value - of this field. This was explained in patch 25 ("Correct handling of idt - vectoring info") of the original nVMX patch series. - - Thanks to Dave Allan and to Federico Simoncelli for reporting this bug, - to Abel Gordon for helping me figure out the solution, and to Avi Kivity - for helping to improve it. - - Signed-off-by: Nadav Har'El - Signed-off-by: Avi Kivity - -diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c -index 4693d54..f3ec38f 100644 ---- a/arch/x86/kvm/vmx.c -+++ b/arch/x86/kvm/vmx.c -@@ -3993,11 +3993,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) - static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) - { - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { -- struct vmcs12 *vmcs12; -- if (to_vmx(vcpu)->nested.nested_run_pending) -+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); -+ if (to_vmx(vcpu)->nested.nested_run_pending || -+ (vmcs12->idt_vectoring_info_field & -+ VECTORING_INFO_VALID_MASK)) - return 0; - nested_vmx_vmexit(vcpu); -- vmcs12 = get_vmcs12(vcpu); - vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; - vmcs12->vm_exit_intr_info = 0; - /* fall through to normal code, but now in L1, not L2 */ diff --git a/debian/patches/series/base b/debian/patches/series/base index 3a454e50b..8d7628f31 100644 --- a/debian/patches/series/base +++ b/debian/patches/series/base @@ -86,8 +86,3 @@ + bugfix/all/0004-staging-brcm80211-restrict-register-access-method-fo.patch + bugfix/all/0005-staging-brcm80211-restrict-MIPS-dma-bug-workaround-t.patch + debian/inetpeer-hide-ABI-change-in-3.1.5.patch - -+ bugfix/x86/KVM-nVMX-Fix-nested-VMX-TSC-emulation.patch -+ bugfix/x86/KVM-nVMX-Add-KVM_REQ_IMMEDIATE_EXIT.patch -+ bugfix/x86/KVM-nVMX-Fix-warning-causing-idt-vectoring-info-behavior.patch -