2019-10-20 13:32:35 +00:00
|
|
|
From: Paolo Bonzini <pbonzini@redhat.com>
|
2019-10-24 21:48:50 +00:00
|
|
|
Date: Thu, 24 Oct 2019 18:34:28 +0200
|
2019-10-20 13:32:35 +00:00
|
|
|
Subject: kvm: mmu: ITLB_MULTIHIT mitigation
|
|
|
|
|
|
|
|
With some Intel processors, putting the same virtual address in the TLB
|
|
|
|
as both a 4 KiB and 2 MiB page can confuse the instruction fetch unit
|
|
|
|
and cause the processor to issue a machine check. Unfortunately if EPT
|
|
|
|
page tables use huge pages, it possible for a malicious guest to cause
|
|
|
|
this situation.
|
|
|
|
|
|
|
|
This patch adds a knob to mark huge pages as non-executable. When the
|
|
|
|
nx_huge_pages parameter is enabled (and we are using EPT), all huge pages
|
|
|
|
are marked as NX. If the guest attempts to execute in one of those pages,
|
|
|
|
the page is broken down into 4K pages, which are then marked executable.
|
|
|
|
|
|
|
|
This is not an issue for shadow paging (except nested EPT), because then
|
|
|
|
the host is in control of TLB flushes and the problematic situation cannot
|
|
|
|
happen. With nested EPT, again the nested guest can cause problems so we
|
|
|
|
treat shadow and direct EPT the same.
|
|
|
|
|
|
|
|
Signed-off-by: Junaid Shahid <junaids@google.com>
|
|
|
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
|
[bwh: Backported to 4.19:
|
|
|
|
- Use kvm_mmu_invalidate_zap_all_pages() instead of kvm_mmu_zap_all_fast()
|
|
|
|
- Adjust context]
|
|
|
|
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
|
|
|
---
|
|
|
|
.../admin-guide/kernel-parameters.txt | 11 ++
|
|
|
|
arch/x86/include/asm/kvm_host.h | 2 +
|
|
|
|
arch/x86/kernel/cpu/bugs.c | 13 +-
|
|
|
|
arch/x86/kvm/mmu.c | 135 +++++++++++++++++-
|
|
|
|
arch/x86/kvm/paging_tmpl.h | 29 +++-
|
2019-10-24 21:48:50 +00:00
|
|
|
arch/x86/kvm/x86.c | 9 ++
|
|
|
|
6 files changed, 186 insertions(+), 13 deletions(-)
|
2019-10-20 13:32:35 +00:00
|
|
|
|
|
|
|
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
|
|
|
index 16607b178b47..b2c1a5c63ab3 100644
|
|
|
|
--- a/Documentation/admin-guide/kernel-parameters.txt
|
|
|
|
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
|
|
|
@@ -1956,6 +1956,17 @@
|
|
|
|
KVM MMU at runtime.
|
|
|
|
Default is 0 (off)
|
|
|
|
|
|
|
|
+ kvm.nx_huge_pages=
|
|
|
|
+ [KVM] Controls the sw workaround for bug
|
|
|
|
+ X86_BUG_ITLB_MULTIHIT.
|
|
|
|
+ force : Always deploy workaround.
|
|
|
|
+ off : Default. Never deploy workaround.
|
|
|
|
+ auto : Deploy workaround based on presence of
|
|
|
|
+ X86_BUG_ITLB_MULTIHIT.
|
|
|
|
+
|
|
|
|
+ If the sw workaround is enabled for the host, guests
|
|
|
|
+ need not enable it for nested guests.
|
|
|
|
+
|
|
|
|
kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
|
|
|
|
Default is 1 (enabled)
|
|
|
|
|
|
|
|
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
|
|
|
|
index 90dccb5c79d9..59b44445ed59 100644
|
|
|
|
--- a/arch/x86/include/asm/kvm_host.h
|
|
|
|
+++ b/arch/x86/include/asm/kvm_host.h
|
|
|
|
@@ -282,6 +282,7 @@ struct kvm_mmu_page {
|
|
|
|
struct list_head link;
|
|
|
|
struct hlist_node hash_link;
|
|
|
|
bool unsync;
|
|
|
|
+ bool lpage_disallowed; /* Can't be replaced by an equiv large page */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The following two entries are used to key the shadow page in the
|
|
|
|
@@ -890,6 +891,7 @@ struct kvm_vm_stat {
|
|
|
|
ulong mmu_unsync;
|
|
|
|
ulong remote_tlb_flush;
|
|
|
|
ulong lpages;
|
|
|
|
+ ulong nx_lpage_splits;
|
|
|
|
ulong max_mmu_page_hash_collisions;
|
|
|
|
};
|
|
|
|
|
|
|
|
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
|
|
|
|
index 60e47e492c2f..1e764992fa64 100644
|
|
|
|
--- a/arch/x86/kernel/cpu/bugs.c
|
|
|
|
+++ b/arch/x86/kernel/cpu/bugs.c
|
|
|
|
@@ -1119,6 +1119,9 @@ void x86_spec_ctrl_setup_ap(void)
|
|
|
|
x86_amd_ssb_disable();
|
|
|
|
}
|
|
|
|
|
|
|
|
+bool itlb_multihit_kvm_mitigation;
|
|
|
|
+EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation);
|
|
|
|
+
|
|
|
|
#undef pr_fmt
|
|
|
|
#define pr_fmt(fmt) "L1TF: " fmt
|
|
|
|
|
|
|
|
@@ -1274,17 +1277,25 @@ static ssize_t l1tf_show_state(char *buf)
|
|
|
|
l1tf_vmx_states[l1tf_vmx_mitigation],
|
|
|
|
sched_smt_active() ? "vulnerable" : "disabled");
|
|
|
|
}
|
|
|
|
+
|
|
|
|
+static ssize_t itlb_multihit_show_state(char *buf)
|
|
|
|
+{
|
|
|
|
+ if (itlb_multihit_kvm_mitigation)
|
|
|
|
+ return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
|
|
|
|
+ else
|
|
|
|
+ return sprintf(buf, "KVM: Vulnerable\n");
|
|
|
|
+}
|
|
|
|
#else
|
|
|
|
static ssize_t l1tf_show_state(char *buf)
|
|
|
|
{
|
|
|
|
return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
|
|
|
|
}
|
|
|
|
-#endif
|
|
|
|
|
|
|
|
static ssize_t itlb_multihit_show_state(char *buf)
|
|
|
|
{
|
|
|
|
return sprintf(buf, "Processor vulnerable\n");
|
|
|
|
}
|
|
|
|
+#endif
|
|
|
|
|
|
|
|
static ssize_t mds_show_state(char *buf)
|
|
|
|
{
|
|
|
|
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
|
|
|
|
index 7f9be921df7c..19c3dc9b05cb 100644
|
|
|
|
--- a/arch/x86/kvm/mmu.c
|
|
|
|
+++ b/arch/x86/kvm/mmu.c
|
|
|
|
@@ -49,6 +49,20 @@
|
|
|
|
#include <asm/kvm_page_track.h>
|
|
|
|
#include "trace.h"
|
|
|
|
|
|
|
|
+extern bool itlb_multihit_kvm_mitigation;
|
|
|
|
+
|
|
|
|
+static int __read_mostly nx_huge_pages = -1;
|
|
|
|
+
|
|
|
|
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
|
|
|
|
+
|
|
|
|
+static struct kernel_param_ops nx_huge_pages_ops = {
|
|
|
|
+ .set = set_nx_huge_pages,
|
|
|
|
+ .get = param_get_bool,
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
|
|
|
|
+__MODULE_PARM_TYPE(nx_huge_pages, "bool");
|
|
|
|
+
|
|
|
|
/*
|
|
|
|
* When setting this variable to true it enables Two-Dimensional-Paging
|
|
|
|
* where the hardware walks 2 page tables:
|
|
|
|
@@ -284,6 +298,11 @@ static inline bool spte_ad_enabled(u64 spte)
|
|
|
|
return !(spte & shadow_acc_track_value);
|
|
|
|
}
|
|
|
|
|
|
|
|
+static bool is_nx_huge_page_enabled(void)
|
|
|
|
+{
|
|
|
|
+ return READ_ONCE(nx_huge_pages);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
static inline u64 spte_shadow_accessed_mask(u64 spte)
|
|
|
|
{
|
|
|
|
MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
|
|
|
|
@@ -1096,6 +1115,15 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|
|
|
kvm_mmu_gfn_disallow_lpage(slot, gfn);
|
|
|
|
}
|
|
|
|
|
|
|
|
+static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|
|
|
+{
|
|
|
|
+ if (sp->lpage_disallowed)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ ++kvm->stat.nx_lpage_splits;
|
|
|
|
+ sp->lpage_disallowed = true;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|
|
|
{
|
|
|
|
struct kvm_memslots *slots;
|
|
|
|
@@ -1113,6 +1141,12 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|
|
|
kvm_mmu_gfn_allow_lpage(slot, gfn);
|
|
|
|
}
|
|
|
|
|
|
|
|
+static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|
|
|
+{
|
|
|
|
+ --kvm->stat.nx_lpage_splits;
|
|
|
|
+ sp->lpage_disallowed = false;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
|
|
|
|
struct kvm_memory_slot *slot)
|
|
|
|
{
|
|
|
|
@@ -2665,6 +2699,9 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
|
|
|
kvm_reload_remote_mmus(kvm);
|
|
|
|
}
|
|
|
|
|
|
|
|
+ if (sp->lpage_disallowed)
|
|
|
|
+ unaccount_huge_nx_page(kvm, sp);
|
|
|
|
+
|
|
|
|
sp->role.invalid = 1;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
@@ -2873,6 +2910,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|
|
|
if (!speculative)
|
|
|
|
spte |= spte_shadow_accessed_mask(spte);
|
|
|
|
|
|
|
|
+ if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
|
|
|
|
+ is_nx_huge_page_enabled()) {
|
|
|
|
+ pte_access &= ~ACC_EXEC_MASK;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
if (pte_access & ACC_EXEC_MASK)
|
|
|
|
spte |= shadow_x_mask;
|
|
|
|
else
|
|
|
|
@@ -3091,9 +3133,32 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
|
|
|
|
__direct_pte_prefetch(vcpu, sp, sptep);
|
|
|
|
}
|
|
|
|
|
|
|
|
+static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
|
|
|
|
+ gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
|
|
|
|
+{
|
|
|
|
+ int level = *levelp;
|
|
|
|
+ u64 spte = *it.sptep;
|
|
|
|
+
|
|
|
|
+ if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
|
|
|
|
+ is_nx_huge_page_enabled() &&
|
|
|
|
+ is_shadow_present_pte(spte) &&
|
|
|
|
+ !is_large_pte(spte)) {
|
|
|
|
+ /*
|
|
|
|
+ * A small SPTE exists for this pfn, but FNAME(fetch)
|
|
|
|
+ * and __direct_map would like to create a large PTE
|
|
|
|
+ * instead: just force them to go down another level,
|
|
|
|
+ * patching back for them into pfn the next 9 bits of
|
|
|
|
+ * the address.
|
|
|
|
+ */
|
|
|
|
+ u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
|
|
|
|
+ *pfnp |= gfn & page_mask;
|
|
|
|
+ (*levelp)--;
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
|
|
|
|
int map_writable, int level, kvm_pfn_t pfn,
|
|
|
|
- bool prefault)
|
|
|
|
+ bool prefault, bool lpage_disallowed)
|
|
|
|
{
|
|
|
|
struct kvm_shadow_walk_iterator it;
|
|
|
|
struct kvm_mmu_page *sp;
|
|
|
|
@@ -3106,6 +3171,12 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
|
|
|
|
|
|
|
|
trace_kvm_mmu_spte_requested(gpa, level, pfn);
|
|
|
|
for_each_shadow_entry(vcpu, gpa, it) {
|
|
|
|
+ /*
|
|
|
|
+ * We cannot overwrite existing page tables with an NX
|
|
|
|
+ * large page, as the leaf could be executable.
|
|
|
|
+ */
|
|
|
|
+ disallowed_hugepage_adjust(it, gfn, &pfn, &level);
|
|
|
|
+
|
|
|
|
base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
|
|
|
if (it.level == level)
|
|
|
|
break;
|
|
|
|
@@ -3116,6 +3187,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
|
|
|
|
it.level - 1, true, ACC_ALL);
|
|
|
|
|
|
|
|
link_shadow_page(vcpu, it.sptep, sp);
|
|
|
|
+ if (lpage_disallowed)
|
|
|
|
+ account_huge_nx_page(vcpu->kvm, sp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@@ -3416,11 +3489,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
int level;
|
|
|
|
- bool force_pt_level = false;
|
|
|
|
+ bool force_pt_level;
|
|
|
|
kvm_pfn_t pfn;
|
|
|
|
unsigned long mmu_seq;
|
|
|
|
bool map_writable, write = error_code & PFERR_WRITE_MASK;
|
|
|
|
+ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
|
|
|
|
+ is_nx_huge_page_enabled();
|
|
|
|
|
|
|
|
+ force_pt_level = lpage_disallowed;
|
|
|
|
level = mapping_level(vcpu, gfn, &force_pt_level);
|
|
|
|
if (likely(!force_pt_level)) {
|
|
|
|
/*
|
|
|
|
@@ -3454,7 +3530,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
|
|
|
|
goto out_unlock;
|
|
|
|
if (likely(!force_pt_level))
|
|
|
|
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
|
|
|
|
- r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
|
|
|
|
+ r = __direct_map(vcpu, v, write, map_writable, level, pfn,
|
|
|
|
+ prefault, false);
|
|
|
|
out_unlock:
|
|
|
|
spin_unlock(&vcpu->kvm->mmu_lock);
|
|
|
|
kvm_release_pfn_clean(pfn);
|
|
|
|
@@ -4048,6 +4125,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
|
|
|
unsigned long mmu_seq;
|
|
|
|
int write = error_code & PFERR_WRITE_MASK;
|
|
|
|
bool map_writable;
|
|
|
|
+ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
|
|
|
|
+ is_nx_huge_page_enabled();
|
|
|
|
|
|
|
|
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
|
|
|
|
|
|
|
@@ -4058,8 +4137,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
- force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
|
|
|
|
- PT_DIRECTORY_LEVEL);
|
|
|
|
+ force_pt_level =
|
|
|
|
+ lpage_disallowed ||
|
|
|
|
+ !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
|
|
|
|
level = mapping_level(vcpu, gfn, &force_pt_level);
|
|
|
|
if (likely(!force_pt_level)) {
|
|
|
|
if (level > PT_DIRECTORY_LEVEL &&
|
|
|
|
@@ -4088,7 +4168,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
|
|
|
goto out_unlock;
|
|
|
|
if (likely(!force_pt_level))
|
|
|
|
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
|
|
|
|
- r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
|
|
|
|
+ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
|
|
|
|
+ prefault, lpage_disallowed);
|
|
|
|
out_unlock:
|
|
|
|
spin_unlock(&vcpu->kvm->mmu_lock);
|
|
|
|
kvm_release_pfn_clean(pfn);
|
|
|
|
@@ -5886,10 +5967,52 @@ static void mmu_destroy_caches(void)
|
|
|
|
kmem_cache_destroy(mmu_page_header_cache);
|
|
|
|
}
|
|
|
|
|
|
|
|
+static void __set_nx_huge_pages(bool val)
|
|
|
|
+{
|
|
|
|
+ nx_huge_pages = itlb_multihit_kvm_mitigation = val;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
|
|
|
|
+{
|
|
|
|
+ bool old_val = nx_huge_pages;
|
|
|
|
+ bool new_val;
|
|
|
|
+
|
|
|
|
+ /* In "auto" mode deploy workaround only if CPU has the bug. */
|
|
|
|
+ if (sysfs_streq(val, "off"))
|
|
|
|
+ new_val = 0;
|
|
|
|
+ else if (sysfs_streq(val, "force"))
|
|
|
|
+ new_val = 1;
|
|
|
|
+ else if (sysfs_streq(val, "auto"))
|
|
|
|
+ new_val = boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT);
|
|
|
|
+ else if (strtobool(val, &new_val) < 0)
|
|
|
|
+ return -EINVAL;
|
|
|
|
+
|
|
|
|
+ __set_nx_huge_pages(new_val);
|
|
|
|
+
|
|
|
|
+ if (new_val != old_val) {
|
|
|
|
+ struct kvm *kvm;
|
|
|
|
+ int idx;
|
|
|
|
+
|
|
|
|
+ mutex_lock(&kvm_lock);
|
|
|
|
+
|
|
|
|
+ list_for_each_entry(kvm, &vm_list, vm_list) {
|
|
|
|
+ idx = srcu_read_lock(&kvm->srcu);
|
|
|
|
+ kvm_mmu_invalidate_zap_all_pages(kvm);
|
|
|
|
+ srcu_read_unlock(&kvm->srcu, idx);
|
|
|
|
+ }
|
|
|
|
+ mutex_unlock(&kvm_lock);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
int kvm_mmu_module_init(void)
|
|
|
|
{
|
|
|
|
int ret = -ENOMEM;
|
|
|
|
|
|
|
|
+ if (nx_huge_pages == -1)
|
|
|
|
+ __set_nx_huge_pages(boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT));
|
|
|
|
+
|
|
|
|
kvm_mmu_reset_all_pte_masks();
|
|
|
|
|
|
|
|
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
|
|
|
|
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
|
|
|
|
index 3b022b08b577..adf42dc8d38b 100644
|
|
|
|
--- a/arch/x86/kvm/paging_tmpl.h
|
|
|
|
+++ b/arch/x86/kvm/paging_tmpl.h
|
|
|
|
@@ -596,13 +596,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
|
|
|
|
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
|
|
|
struct guest_walker *gw,
|
|
|
|
int write_fault, int hlevel,
|
|
|
|
- kvm_pfn_t pfn, bool map_writable, bool prefault)
|
|
|
|
+ kvm_pfn_t pfn, bool map_writable, bool prefault,
|
|
|
|
+ bool lpage_disallowed)
|
|
|
|
{
|
|
|
|
struct kvm_mmu_page *sp = NULL;
|
|
|
|
struct kvm_shadow_walk_iterator it;
|
|
|
|
unsigned direct_access, access = gw->pt_access;
|
|
|
|
int top_level, ret;
|
|
|
|
- gfn_t base_gfn;
|
|
|
|
+ gfn_t gfn, base_gfn;
|
|
|
|
|
|
|
|
direct_access = gw->pte_access;
|
|
|
|
|
|
|
|
@@ -647,13 +648,25 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
|
|
|
link_shadow_page(vcpu, it.sptep, sp);
|
|
|
|
}
|
|
|
|
|
|
|
|
- base_gfn = gw->gfn;
|
|
|
|
+ /*
|
|
|
|
+ * FNAME(page_fault) might have clobbered the bottom bits of
|
|
|
|
+ * gw->gfn, restore them from the virtual address.
|
|
|
|
+ */
|
|
|
|
+ gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
|
|
|
|
+ base_gfn = gfn;
|
|
|
|
|
|
|
|
trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
|
|
|
|
|
|
|
|
for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
|
|
|
|
clear_sp_write_flooding_count(it.sptep);
|
|
|
|
- base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * We cannot overwrite existing page tables with an NX
|
|
|
|
+ * large page, as the leaf could be executable.
|
|
|
|
+ */
|
|
|
|
+ disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
|
|
|
|
+
|
|
|
|
+ base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
|
|
|
if (it.level == hlevel)
|
|
|
|
break;
|
|
|
|
|
|
|
|
@@ -665,6 +678,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
|
|
|
sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
|
|
|
|
it.level - 1, true, direct_access);
|
|
|
|
link_shadow_page(vcpu, it.sptep, sp);
|
|
|
|
+ if (lpage_disallowed)
|
|
|
|
+ account_huge_nx_page(vcpu->kvm, sp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@@ -741,9 +756,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
|
|
|
int r;
|
|
|
|
kvm_pfn_t pfn;
|
|
|
|
int level = PT_PAGE_TABLE_LEVEL;
|
|
|
|
- bool force_pt_level = false;
|
|
|
|
unsigned long mmu_seq;
|
|
|
|
bool map_writable, is_self_change_mapping;
|
|
|
|
+ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
|
|
|
|
+ is_nx_huge_page_enabled();
|
|
|
|
+ bool force_pt_level = lpage_disallowed;
|
|
|
|
|
|
|
|
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
|
|
|
|
|
|
|
|
@@ -833,7 +850,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
|
|
|
if (!force_pt_level)
|
|
|
|
transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
|
|
|
|
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
|
|
|
|
- level, pfn, map_writable, prefault);
|
|
|
|
+ level, pfn, map_writable, prefault, lpage_disallowed);
|
|
|
|
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
|
|
|
|
|
|
|
|
out_unlock:
|
|
|
|
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
|
2019-10-24 21:48:50 +00:00
|
|
|
index 2714c1a0e59f..ec80bb27504f 100644
|
2019-10-20 13:32:35 +00:00
|
|
|
--- a/arch/x86/kvm/x86.c
|
|
|
|
+++ b/arch/x86/kvm/x86.c
|
|
|
|
@@ -206,6 +206,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|
|
|
{ "mmu_unsync", VM_STAT(mmu_unsync) },
|
|
|
|
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
|
|
|
|
{ "largepages", VM_STAT(lpages, .mode = 0444) },
|
|
|
|
+ { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
|
|
|
|
{ "max_mmu_page_hash_collisions",
|
|
|
|
VM_STAT(max_mmu_page_hash_collisions) },
|
|
|
|
{ NULL }
|
2019-10-24 21:48:50 +00:00
|
|
|
@@ -1130,6 +1131,14 @@ u64 kvm_get_arch_capabilities(void)
|
|
|
|
|
|
|
|
rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
|
|
|
|
+ * the nested hypervisor runs with NX huge pages. If it is not,
|
|
|
|
+ * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
|
|
|
|
+ * L1 guests, so it need not worry about its own (L2) guests.
|
|
|
|
+ */
|
|
|
|
+ data |= ARCH_CAP_PSCHANGE_MC_NO;
|
|
|
|
+
|
|
|
|
/*
|
|
|
|
* If we're doing cache flushes (either "always" or "cond")
|
|
|
|
* we will do one whenever the guest does a vmlaunch/vmresume.
|