From 9aee5ae400153e937d368c0bfe96e8c67697aa3b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 20 Oct 2019 14:35:22 +0100 Subject: [PATCH 01/13] debian/patches/series: Apply security fixes last (except ABI maintenance) The security fixes are where we have the greatest churn, so it's convenient if they can be pushed/popped without having to go through other patches. --- debian/patches/series | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/debian/patches/series b/debian/patches/series index b0b9b263d..891d589ca 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -158,16 +158,6 @@ features/all/db-mok-keyring/0003-MODSIGN-checking-the-blacklisted-hash-before-lo features/all/db-mok-keyring/0004-MODSIGN-check-the-attributes-of-db-and-mok.patch features/all/db-mok-keyring/modsign-make-shash-allocation-failure-fatal.patch -# Security fixes -debian/i386-686-pae-pci-set-pci-nobios-by-default.patch -debian/ntfs-mark-it-as-broken.patch -bugfix/all/netfilter-conntrack-use-consistent-ct-id-hash-calcul.patch -bugfix/all/ALSA-usb-audio-Fix-an-OOB-bug-in-parse_audio_mixer_unit.patch -bugfix/all/ALSA-usb-audio-Fix-a-stack-buffer-overflow-bug-in-check_input_term.patch -bugfix/all/vhost-make-sure-log_num-in_num.patch -bugfix/x86/x86-ptrace-fix-up-botched-merge-of-spectrev1-fix.patch -bugfix/all/KVM-coalesced_mmio-add-bounds-checking.patch - # Fix exported symbol versions bugfix/all/module-disable-matching-missing-version-crc.patch @@ -259,5 +249,15 @@ features/arm/ARM-dts-add-Raspberry-Pi-Compute-Module-3-and-IO-boa.patch features/arm64/arm64-dts-broadcom-Add-reference-to-Compute-Module-I.patch features/arm64/arm64-dts-broadcom-Use-the-.dtb-name-in-the-rule-rat.patch +# Security fixes +debian/i386-686-pae-pci-set-pci-nobios-by-default.patch +debian/ntfs-mark-it-as-broken.patch +bugfix/all/netfilter-conntrack-use-consistent-ct-id-hash-calcul.patch +bugfix/all/ALSA-usb-audio-Fix-an-OOB-bug-in-parse_audio_mixer_unit.patch +bugfix/all/ALSA-usb-audio-Fix-a-stack-buffer-overflow-bug-in-check_input_term.patch +bugfix/all/vhost-make-sure-log_num-in_num.patch +bugfix/x86/x86-ptrace-fix-up-botched-merge-of-spectrev1-fix.patch +bugfix/all/KVM-coalesced_mmio-add-bounds-checking.patch + # ABI maintenance debian/abi/powerpc-avoid-abi-change-for-disabling-tm.patch From d9bd5941448fac3369ea59234201babc7c20a5dc Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 20 Oct 2019 14:32:35 +0100 Subject: [PATCH 02/13] [x86] KVM: Add mitigation for Machine Check Error on Page Size Change (aka iTLB multi-hit, CVE-2018-12207) This is a backport of v6 of the "NX" patch set, and will probably require updates before release. --- debian/changelog | 19 + ...-kvm_mmu_page-member-to-save-8-bytes.patch | 54 ++ ...0002-kvm-Convert-kvm_lock-to-a-mutex.patch | 275 +++++++++++ ...release-the-page-inside-mmu_set_spte.patch | 137 ++++++ ...ME-fetch-and-__direct_map-more-simil.patch | 173 +++++++ ...now-unneeded-hugepage-gfn-adjustment.patch | 74 +++ ...vm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch | 41 ++ ...epoints-around-__direct_map-and-FNAM.patch | 148 ++++++ ...do-not-allow-clearing-largepages-deb.patch | 101 ++++ ...Add-ITLB_MULTIHIT-bug-infrastructure.patch | 280 +++++++++++ ...010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch | 464 ++++++++++++++++++ ...unction-for-creating-VM-worker-threa.patch | 131 +++++ ...Recovery-of-shattered-NX-large-pages.patch | 368 ++++++++++++++ debian/patches/series | 12 + 14 files changed, 2277 insertions(+) create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0001-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0002-kvm-Convert-kvm_lock-to-a-mutex.patch create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0003-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0004-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker-threa.patch create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch diff --git a/debian/changelog b/debian/changelog index 13e18b429..49e892720 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,22 @@ +linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium + + * [x86] KVM: Add mitigation for Machine Check Error on Page Size Change + (aka iTLB multi-hit, CVE-2018-12207): + - KVM: x86: adjust kvm_mmu_page member to save 8 bytes + - kvm: Convert kvm_lock to a mutex + - kvm: x86: Do not release the page inside mmu_set_spte() + - KVM: x86: make FNAME(fetch) and __direct_map more similar + - KVM: x86: remove now unneeded hugepage gfn adjustment + - KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON + - KVM: x86: add tracepoints around __direct_map and FNAME(fetch) + - kvm: x86, powerpc: do not allow clearing largepages debugfs entry + - x86: Add ITLB_MULTIHIT bug infrastructure + - kvm: mmu: ITLB_MULTIHIT mitigation + - kvm: Add helper function for creating VM worker threads + - kvm: x86: mmu: Recovery of shattered NX large pages + + -- Ben Hutchings Sun, 20 Oct 2019 14:21:28 +0100 + linux (4.19.67-2+deb10u1) buster-security; urgency=high [ Romain Perier ] diff --git a/debian/patches/bugfix/x86/itlb_multihit/0001-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch b/debian/patches/bugfix/x86/itlb_multihit/0001-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch new file mode 100644 index 000000000..3a332cb2a --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0001-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch @@ -0,0 +1,54 @@ +From: Wei Yang +Date: Thu, 6 Sep 2018 05:58:16 +0800 +Subject: KVM: x86: adjust kvm_mmu_page member to save 8 bytes + +commit 3ff519f29d98ecdc1961d825d105d68711093b6b upstream. + +On a 64bits machine, struct is naturally aligned with 8 bytes. Since +kvm_mmu_page member *unsync* and *role* are less then 4 bytes, we can +rearrange the sequence to compace the struct. + +As the comment shows, *role* and *gfn* are used to key the shadow page. In +order to keep the comment valid, this patch moves the *unsync* up and +exchange the position of *role* and *gfn*. + +From /proc/slabinfo, it shows the size of kvm_mmu_page is 8 bytes less and +with one more object per slap after applying this patch. + + # name + kvm_mmu_page_header 0 0 168 24 + + kvm_mmu_page_header 0 0 160 25 + +Signed-off-by: Wei Yang +Signed-off-by: Paolo Bonzini +Signed-off-by: Ben Hutchings +--- + arch/x86/include/asm/kvm_host.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 0d3f5cf3ff3e..90dccb5c79d9 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -281,18 +281,18 @@ struct kvm_rmap_head { + struct kvm_mmu_page { + struct list_head link; + struct hlist_node hash_link; ++ bool unsync; + + /* + * The following two entries are used to key the shadow page in the + * hash table. + */ +- gfn_t gfn; + union kvm_mmu_page_role role; ++ gfn_t gfn; + + u64 *spt; + /* hold the gfn of each spte inside spt */ + gfn_t *gfns; +- bool unsync; + int root_count; /* Currently serving as active root */ + unsigned int unsync_children; + struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ diff --git a/debian/patches/bugfix/x86/itlb_multihit/0002-kvm-Convert-kvm_lock-to-a-mutex.patch b/debian/patches/bugfix/x86/itlb_multihit/0002-kvm-Convert-kvm_lock-to-a-mutex.patch new file mode 100644 index 000000000..d1f52e63d --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0002-kvm-Convert-kvm_lock-to-a-mutex.patch @@ -0,0 +1,275 @@ +From: Junaid Shahid +Date: Thu, 3 Jan 2019 17:14:28 -0800 +Subject: kvm: Convert kvm_lock to a mutex + +commit 0d9ce162cf46c99628cc5da9510b959c7976735b upstream. + +It doesn't seem as if there is any particular need for kvm_lock to be a +spinlock, so convert the lock to a mutex so that sleepable functions (in +particular cond_resched()) can be called while holding it. + +Signed-off-by: Junaid Shahid +Signed-off-by: Paolo Bonzini +[bwh: Backported to 4.19: adjust context] +Signed-off-by: Ben Hutchings +--- + Documentation/virtual/kvm/locking.txt | 4 +--- + arch/s390/kvm/kvm-s390.c | 4 ++-- + arch/x86/kvm/mmu.c | 4 ++-- + arch/x86/kvm/x86.c | 14 ++++++------- + include/linux/kvm_host.h | 2 +- + virt/kvm/kvm_main.c | 30 +++++++++++++-------------- + 6 files changed, 28 insertions(+), 30 deletions(-) + +diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt +index 1bb8bcaf8497..635cd6eaf714 100644 +--- a/Documentation/virtual/kvm/locking.txt ++++ b/Documentation/virtual/kvm/locking.txt +@@ -15,8 +15,6 @@ KVM Lock Overview + + On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. + +-For spinlocks, kvm_lock is taken outside kvm->mmu_lock. +- + Everything else is a leaf: no other lock is taken inside the critical + sections. + +@@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above. + ------------ + + Name: kvm_lock +-Type: spinlock_t ++Type: mutex + Arch: any + Protects: - vm_list + +diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c +index fac1d4eaa426..3c317bc6b799 100644 +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -2110,13 +2110,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) + kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); + if (!kvm->arch.sca) + goto out_err; +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + sca_offset += 16; + if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) + sca_offset = 0; + kvm->arch.sca = (struct bsca_block *) + ((char *) kvm->arch.sca + sca_offset); +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + + sprintf(debug_name, "kvm-%u", current->pid); + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 88940261fb53..c9d4e02bd73a 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -5819,7 +5819,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) + int nr_to_scan = sc->nr_to_scan; + unsigned long freed = 0; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + + list_for_each_entry(kvm, &vm_list, vm_list) { + int idx; +@@ -5869,7 +5869,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) + break; + } + +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + return freed; + } + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 6ae8a013af31..0c085b895e6e 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -6502,7 +6502,7 @@ static void kvm_hyperv_tsc_notifier(void) + struct kvm_vcpu *vcpu; + int cpu; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + kvm_make_mclock_inprogress_request(kvm); + +@@ -6528,7 +6528,7 @@ static void kvm_hyperv_tsc_notifier(void) + + spin_unlock(&ka->pvclock_gtod_sync_lock); + } +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + } + #endif + +@@ -6586,17 +6586,17 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va + + smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->cpu != freq->cpu) + continue; + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); +- if (vcpu->cpu != smp_processor_id()) ++ if (vcpu->cpu != raw_smp_processor_id()) + send_ipi = 1; + } + } +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + + if (freq->old < freq->new && send_ipi) { + /* +@@ -6722,12 +6722,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) + struct kvm_vcpu *vcpu; + int i; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); + atomic_set(&kvm_guest_has_master_clock, 0); +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + } + + static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index d42a36e4e6c2..5246a480d15a 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -141,7 +141,7 @@ static inline bool is_error_page(struct page *page) + + extern struct kmem_cache *kvm_vcpu_cache; + +-extern spinlock_t kvm_lock; ++extern struct mutex kvm_lock; + extern struct list_head vm_list; + + struct kvm_io_range { +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 4a584a575221..6a8fe26198b9 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); + * kvm->lock --> kvm->slots_lock --> kvm->irq_lock + */ + +-DEFINE_SPINLOCK(kvm_lock); ++DEFINE_MUTEX(kvm_lock); + static DEFINE_RAW_SPINLOCK(kvm_count_lock); + LIST_HEAD(vm_list); + +@@ -684,9 +684,9 @@ static struct kvm *kvm_create_vm(unsigned long type) + if (r) + goto out_err; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_add(&kvm->vm_list, &vm_list); +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + + preempt_notifier_inc(); + +@@ -732,9 +732,9 @@ static void kvm_destroy_vm(struct kvm *kvm) + kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); + kvm_destroy_vm_debugfs(kvm); + kvm_arch_sync_events(kvm); +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_del(&kvm->vm_list); +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + kvm_free_irq_routing(kvm); + for (i = 0; i < KVM_NR_BUSES; i++) { + struct kvm_io_bus *bus = kvm_get_bus(kvm, i); +@@ -3828,13 +3828,13 @@ static int vm_stat_get(void *_offset, u64 *val) + u64 tmp_val; + + *val = 0; +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + stat_tmp.kvm = kvm; + vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); + *val += tmp_val; + } +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + return 0; + } + +@@ -3847,12 +3847,12 @@ static int vm_stat_clear(void *_offset, u64 val) + if (val) + return -EINVAL; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + stat_tmp.kvm = kvm; + vm_stat_clear_per_vm((void *)&stat_tmp, 0); + } +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + + return 0; + } +@@ -3867,13 +3867,13 @@ static int vcpu_stat_get(void *_offset, u64 *val) + u64 tmp_val; + + *val = 0; +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + stat_tmp.kvm = kvm; + vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); + *val += tmp_val; + } +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + return 0; + } + +@@ -3886,12 +3886,12 @@ static int vcpu_stat_clear(void *_offset, u64 val) + if (val) + return -EINVAL; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + stat_tmp.kvm = kvm; + vcpu_stat_clear_per_vm((void *)&stat_tmp, 0); + } +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + + return 0; + } +@@ -3912,7 +3912,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) + if (!kvm_dev.this_device || !kvm) + return; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + if (type == KVM_EVENT_CREATE_VM) { + kvm_createvm_count++; + kvm_active_vms++; +@@ -3921,7 +3921,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) + } + created = kvm_createvm_count; + active = kvm_active_vms; +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + + env = kzalloc(sizeof(*env), GFP_KERNEL); + if (!env) diff --git a/debian/patches/bugfix/x86/itlb_multihit/0003-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch b/debian/patches/bugfix/x86/itlb_multihit/0003-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch new file mode 100644 index 000000000..51cb71d1f --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0003-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch @@ -0,0 +1,137 @@ +From: Junaid Shahid +Date: Thu, 3 Jan 2019 16:22:21 -0800 +Subject: kvm: x86: Do not release the page inside mmu_set_spte() + +commit 43fdcda96e2550c6d1c46fb8a78801aa2f7276ed upstream. + +Release the page at the call-site where it was originally acquired. +This makes the exit code cleaner for most call sites, since they +do not need to duplicate code between success and the failure +label. + +Signed-off-by: Junaid Shahid +Signed-off-by: Paolo Bonzini +Signed-off-by: Ben Hutchings +--- + arch/x86/kvm/mmu.c | 18 +++++++----------- + arch/x86/kvm/paging_tmpl.h | 8 +++----- + 2 files changed, 10 insertions(+), 16 deletions(-) + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index c9d4e02bd73a..7dc18fb42168 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -3001,8 +3001,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, + } + } + +- kvm_release_pfn_clean(pfn); +- + return ret; + } + +@@ -3037,9 +3035,11 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, + if (ret <= 0) + return -1; + +- for (i = 0; i < ret; i++, gfn++, start++) ++ for (i = 0; i < ret; i++, gfn++, start++) { + mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn, + page_to_pfn(pages[i]), true, true); ++ put_page(pages[i]); ++ } + + return 0; + } +@@ -3445,6 +3445,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, + if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) + return r; + ++ r = RET_PF_RETRY; + spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + goto out_unlock; +@@ -3453,14 +3454,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); + r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); +- spin_unlock(&vcpu->kvm->mmu_lock); +- +- return r; + + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +- return RET_PF_RETRY; ++ return r; + } + + static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, +@@ -4082,6 +4080,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, + if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) + return r; + ++ r = RET_PF_RETRY; + spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + goto out_unlock; +@@ -4090,14 +4089,11 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); + r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); +- spin_unlock(&vcpu->kvm->mmu_lock); +- +- return r; + + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +- return RET_PF_RETRY; ++ return r; + } + + static void nonpaging_init_context(struct kvm_vcpu *vcpu, +diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h +index 14ffd973df54..569c55dae3fa 100644 +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -522,6 +522,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, + mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, + true, true); + ++ kvm_release_pfn_clean(pfn); + return true; + } + +@@ -673,7 +674,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, + return ret; + + out_gpte_changed: +- kvm_release_pfn_clean(pfn); + return RET_PF_RETRY; + } + +@@ -821,6 +821,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, + walker.pte_access &= ~ACC_EXEC_MASK; + } + ++ r = RET_PF_RETRY; + spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + goto out_unlock; +@@ -834,14 +835,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, + level, pfn, map_writable, prefault); + ++vcpu->stat.pf_fixed; + kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); +- spin_unlock(&vcpu->kvm->mmu_lock); +- +- return r; + + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +- return RET_PF_RETRY; ++ return r; + } + + static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) diff --git a/debian/patches/bugfix/x86/itlb_multihit/0004-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch b/debian/patches/bugfix/x86/itlb_multihit/0004-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch new file mode 100644 index 000000000..436fb76b1 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0004-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch @@ -0,0 +1,173 @@ +From: Paolo Bonzini +Date: Mon, 24 Jun 2019 13:06:21 +0200 +Subject: KVM: x86: make FNAME(fetch) and __direct_map more similar + +commit 3fcf2d1bdeb6a513523cb2c77012a6b047aa859c upstream. + +These two functions are basically doing the same thing through +kvm_mmu_get_page, link_shadow_page and mmu_set_spte; yet, for historical +reasons, their code looks very different. This patch tries to take the +best of each and make them very similar, so that it is easy to understand +changes that apply to both of them. + +Signed-off-by: Paolo Bonzini +[bwh: Backported to 4.19: adjust context] +Signed-off-by: Ben Hutchings +--- + arch/x86/kvm/mmu.c | 53 ++++++++++++++++++-------------------- + arch/x86/kvm/paging_tmpl.h | 30 ++++++++++----------- + 2 files changed, 39 insertions(+), 44 deletions(-) + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 7dc18fb42168..42a7120323bb 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -3087,40 +3087,39 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) + __direct_pte_prefetch(vcpu, sp, sptep); + } + +-static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, +- int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault) ++static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, ++ int map_writable, int level, kvm_pfn_t pfn, ++ bool prefault) + { +- struct kvm_shadow_walk_iterator iterator; ++ struct kvm_shadow_walk_iterator it; + struct kvm_mmu_page *sp; +- int emulate = 0; +- gfn_t pseudo_gfn; ++ int ret; ++ gfn_t gfn = gpa >> PAGE_SHIFT; ++ gfn_t base_gfn = gfn; + + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) +- return 0; ++ return RET_PF_RETRY; + +- for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { +- if (iterator.level == level) { +- emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, +- write, level, gfn, pfn, prefault, +- map_writable); +- direct_pte_prefetch(vcpu, iterator.sptep); +- ++vcpu->stat.pf_fixed; ++ for_each_shadow_entry(vcpu, gpa, it) { ++ base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); ++ if (it.level == level) + break; +- } + +- drop_large_spte(vcpu, iterator.sptep); +- if (!is_shadow_present_pte(*iterator.sptep)) { +- u64 base_addr = iterator.addr; ++ drop_large_spte(vcpu, it.sptep); ++ if (!is_shadow_present_pte(*it.sptep)) { ++ sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr, ++ it.level - 1, true, ACC_ALL); + +- base_addr &= PT64_LVL_ADDR_MASK(iterator.level); +- pseudo_gfn = base_addr >> PAGE_SHIFT; +- sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, +- iterator.level - 1, 1, ACC_ALL); +- +- link_shadow_page(vcpu, iterator.sptep, sp); ++ link_shadow_page(vcpu, it.sptep, sp); + } + } +- return emulate; ++ ++ ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL, ++ write, level, base_gfn, pfn, prefault, ++ map_writable); ++ direct_pte_prefetch(vcpu, it.sptep); ++ ++vcpu->stat.pf_fixed; ++ return ret; + } + + static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) +@@ -3453,8 +3452,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, + goto out_unlock; + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); +- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); +- ++ r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +@@ -4088,8 +4086,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, + goto out_unlock; + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); +- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); +- ++ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h +index 569c55dae3fa..eb95d3672acd 100644 +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -602,6 +602,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, + struct kvm_shadow_walk_iterator it; + unsigned direct_access, access = gw->pt_access; + int top_level, ret; ++ gfn_t base_gfn; + + direct_access = gw->pte_access; + +@@ -646,31 +647,29 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, + link_shadow_page(vcpu, it.sptep, sp); + } + +- for (; +- shadow_walk_okay(&it) && it.level > hlevel; +- shadow_walk_next(&it)) { +- gfn_t direct_gfn; ++ base_gfn = gw->gfn; + ++ for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { + clear_sp_write_flooding_count(it.sptep); ++ base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); ++ if (it.level == hlevel) ++ break; ++ + validate_direct_spte(vcpu, it.sptep, direct_access); + + drop_large_spte(vcpu, it.sptep); + +- if (is_shadow_present_pte(*it.sptep)) +- continue; +- +- direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); +- +- sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, +- true, direct_access); +- link_shadow_page(vcpu, it.sptep, sp); ++ if (!is_shadow_present_pte(*it.sptep)) { ++ sp = kvm_mmu_get_page(vcpu, base_gfn, addr, ++ it.level - 1, true, direct_access); ++ link_shadow_page(vcpu, it.sptep, sp); ++ } + } + +- clear_sp_write_flooding_count(it.sptep); + ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, +- it.level, gw->gfn, pfn, prefault, map_writable); ++ it.level, base_gfn, pfn, prefault, map_writable); + FNAME(pte_prefetch)(vcpu, gw, it.sptep); +- ++ ++vcpu->stat.pf_fixed; + return ret; + + out_gpte_changed: +@@ -833,7 +832,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, + transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); + r = FNAME(fetch)(vcpu, addr, &walker, write_fault, + level, pfn, map_writable, prefault); +- ++vcpu->stat.pf_fixed; + kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); + + out_unlock: diff --git a/debian/patches/bugfix/x86/itlb_multihit/0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch b/debian/patches/bugfix/x86/itlb_multihit/0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch new file mode 100644 index 000000000..37aebee2b --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch @@ -0,0 +1,74 @@ +From: Paolo Bonzini +Date: Sun, 23 Jun 2019 19:15:49 +0200 +Subject: KVM: x86: remove now unneeded hugepage gfn adjustment + +commit d679b32611c0102ce33b9e1a4e4b94854ed1812a upstream. + +After the previous patch, the low bits of the gfn are masked in +both FNAME(fetch) and __direct_map, so we do not need to clear them +in transparent_hugepage_adjust. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Ben Hutchings +--- + arch/x86/kvm/mmu.c | 9 +++------ + arch/x86/kvm/paging_tmpl.h | 2 +- + 2 files changed, 4 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 42a7120323bb..96803f996819 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -3155,11 +3155,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) + } + + static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, +- gfn_t *gfnp, kvm_pfn_t *pfnp, ++ gfn_t gfn, kvm_pfn_t *pfnp, + int *levelp) + { + kvm_pfn_t pfn = *pfnp; +- gfn_t gfn = *gfnp; + int level = *levelp; + + /* +@@ -3186,8 +3185,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, + mask = KVM_PAGES_PER_HPAGE(level) - 1; + VM_BUG_ON((gfn & mask) != (pfn & mask)); + if (pfn & mask) { +- gfn &= ~mask; +- *gfnp = gfn; + kvm_release_pfn_clean(pfn); + pfn &= ~mask; + kvm_get_pfn(pfn); +@@ -3451,7 +3448,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; + if (likely(!force_pt_level)) +- transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); ++ transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); + r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); +@@ -4085,7 +4082,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; + if (likely(!force_pt_level)) +- transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); ++ transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); + r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); +diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h +index eb95d3672acd..4aab953f1d31 100644 +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -829,7 +829,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; + if (!force_pt_level) +- transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); ++ transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); + r = FNAME(fetch)(vcpu, addr, &walker, write_fault, + level, pfn, map_writable, prefault); + kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); diff --git a/debian/patches/bugfix/x86/itlb_multihit/0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch b/debian/patches/bugfix/x86/itlb_multihit/0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch new file mode 100644 index 000000000..58cd52ba5 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch @@ -0,0 +1,41 @@ +From: Paolo Bonzini +Date: Sun, 30 Jun 2019 08:36:21 -0400 +Subject: KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON + +commit e9f2a760b158551bfbef6db31d2cae45ab8072e5 upstream. + +Note that in such a case it is quite likely that KVM will BUG_ON +in __pte_list_remove when the VM is closed. However, there is no +immediate risk of memory corruption in the host so a WARN_ON is +enough and it lets you gather traces for debugging. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Ben Hutchings +--- + arch/x86/kvm/mmu.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 96803f996819..68fa10d890ee 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -1027,10 +1027,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) + + static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) + { +- if (sp->role.direct) +- BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); +- else ++ if (!sp->role.direct) { + sp->gfns[index] = gfn; ++ return; ++ } ++ ++ if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index))) ++ pr_err_ratelimited("gfn mismatch under direct page %llx " ++ "(expected %llx, got %llx)\n", ++ sp->gfn, ++ kvm_mmu_page_get_gfn(sp, index), gfn); + } + + /* diff --git a/debian/patches/bugfix/x86/itlb_multihit/0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch b/debian/patches/bugfix/x86/itlb_multihit/0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch new file mode 100644 index 000000000..ce11a4504 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch @@ -0,0 +1,148 @@ +From: Paolo Bonzini +Date: Mon, 1 Jul 2019 06:22:57 -0400 +Subject: KVM: x86: add tracepoints around __direct_map and FNAME(fetch) + +commit 335e192a3fa415e1202c8b9ecdaaecd643f823cc upstream. + +These are useful in debugging shadow paging. + +Signed-off-by: Paolo Bonzini +[bwh: Backported to 4.19: adjust context] +Signed-off-by: Ben Hutchings +--- + arch/x86/kvm/mmu.c | 13 ++++----- + arch/x86/kvm/mmutrace.h | 59 ++++++++++++++++++++++++++++++++++++++ + arch/x86/kvm/paging_tmpl.h | 2 ++ + 3 files changed, 67 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 68fa10d890ee..7f9be921df7c 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -140,9 +140,6 @@ module_param(dbg, bool, 0644); + + #include + +-#define CREATE_TRACE_POINTS +-#include "mmutrace.h" +- + #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) + #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) + +@@ -261,9 +258,13 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; + + + static void mmu_spte_set(u64 *sptep, u64 spte); ++static bool is_executable_pte(u64 spte); + static union kvm_mmu_page_role + kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); + ++#define CREATE_TRACE_POINTS ++#include "mmutrace.h" ++ + void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) + { + BUG_ON((mmio_mask & mmio_value) != mmio_value); +@@ -2992,10 +2993,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, + ret = RET_PF_EMULATE; + + pgprintk("%s: setting spte %llx\n", __func__, *sptep); +- pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", +- is_large_pte(*sptep)? "2MB" : "4kB", +- *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn, +- *sptep, sptep); ++ trace_kvm_mmu_set_spte(level, gfn, sptep); + if (!was_rmapped && is_large_pte(*sptep)) + ++vcpu->kvm->stat.lpages; + +@@ -3106,6 +3104,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return RET_PF_RETRY; + ++ trace_kvm_mmu_spte_requested(gpa, level, pfn); + for_each_shadow_entry(vcpu, gpa, it) { + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + if (it.level == level) +diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h +index c73bf4e4988c..918b0d5bf272 100644 +--- a/arch/x86/kvm/mmutrace.h ++++ b/arch/x86/kvm/mmutrace.h +@@ -325,6 +325,65 @@ TRACE_EVENT( + __entry->kvm_gen == __entry->spte_gen + ) + ); ++ ++TRACE_EVENT( ++ kvm_mmu_set_spte, ++ TP_PROTO(int level, gfn_t gfn, u64 *sptep), ++ TP_ARGS(level, gfn, sptep), ++ ++ TP_STRUCT__entry( ++ __field(u64, gfn) ++ __field(u64, spte) ++ __field(u64, sptep) ++ __field(u8, level) ++ /* These depend on page entry type, so compute them now. */ ++ __field(bool, r) ++ __field(bool, x) ++ __field(u8, u) ++ ), ++ ++ TP_fast_assign( ++ __entry->gfn = gfn; ++ __entry->spte = *sptep; ++ __entry->sptep = virt_to_phys(sptep); ++ __entry->level = level; ++ __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK); ++ __entry->x = is_executable_pte(__entry->spte); ++ __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1; ++ ), ++ ++ TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx", ++ __entry->gfn, __entry->spte, ++ __entry->r ? "r" : "-", ++ __entry->spte & PT_WRITABLE_MASK ? "w" : "-", ++ __entry->x ? "x" : "-", ++ __entry->u == -1 ? "" : (__entry->u ? "u" : "-"), ++ __entry->level, __entry->sptep ++ ) ++); ++ ++TRACE_EVENT( ++ kvm_mmu_spte_requested, ++ TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn), ++ TP_ARGS(addr, level, pfn), ++ ++ TP_STRUCT__entry( ++ __field(u64, gfn) ++ __field(u64, pfn) ++ __field(u8, level) ++ ), ++ ++ TP_fast_assign( ++ __entry->gfn = addr >> PAGE_SHIFT; ++ __entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); ++ __entry->level = level; ++ ), ++ ++ TP_printk("gfn %llx pfn %llx level %d", ++ __entry->gfn, __entry->pfn, __entry->level ++ ) ++); ++ + #endif /* _TRACE_KVMMMU_H */ + + #undef TRACE_INCLUDE_PATH +diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h +index 4aab953f1d31..3b022b08b577 100644 +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -649,6 +649,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, + + base_gfn = gw->gfn; + ++ trace_kvm_mmu_spte_requested(addr, gw->level, pfn); ++ + for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { + clear_sp_write_flooding_count(it.sptep); + base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); diff --git a/debian/patches/bugfix/x86/itlb_multihit/0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch b/debian/patches/bugfix/x86/itlb_multihit/0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch new file mode 100644 index 000000000..186eef648 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch @@ -0,0 +1,101 @@ +From: Paolo Bonzini +Date: Mon, 30 Sep 2019 18:48:44 +0200 +Subject: kvm: x86, powerpc: do not allow clearing largepages debugfs entry + +commit 833b45de69a6016c4b0cebe6765d526a31a81580 upstream. + +The largepages debugfs entry is incremented/decremented as shadow +pages are created or destroyed. Clearing it will result in an +underflow, which is harmless to KVM but ugly (and could be +misinterpreted by tools that use debugfs information), so make +this particular statistic read-only. + +Signed-off-by: Paolo Bonzini +[bwh: Backported to 4.19: drop powerpc changes and the Cc to kvm-ppc] +Signed-off-by: Ben Hutchings +--- + arch/x86/kvm/x86.c | 6 +++--- + include/linux/kvm_host.h | 2 ++ + virt/kvm/kvm_main.c | 10 +++++++--- + 3 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 0c085b895e6e..2714c1a0e59f 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); + static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); + #endif + +-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM +-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU ++#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ ++#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ + + #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ + KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) +@@ -205,7 +205,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { + { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, + { "mmu_unsync", VM_STAT(mmu_unsync) }, + { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, +- { "largepages", VM_STAT(lpages) }, ++ { "largepages", VM_STAT(lpages, .mode = 0444) }, + { "max_mmu_page_hash_collisions", + VM_STAT(max_mmu_page_hash_collisions) }, + { NULL } +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 5246a480d15a..553a3115a735 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -1034,6 +1034,7 @@ enum kvm_stat_kind { + + struct kvm_stat_data { + int offset; ++ int mode; + struct kvm *kvm; + }; + +@@ -1041,6 +1042,7 @@ struct kvm_stats_debugfs_item { + const char *name; + int offset; + enum kvm_stat_kind kind; ++ int mode; + }; + extern struct kvm_stats_debugfs_item debugfs_entries[]; + extern struct dentry *kvm_debugfs_dir; +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 6a8fe26198b9..5482949b452c 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -616,8 +616,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) + + stat_data->kvm = kvm; + stat_data->offset = p->offset; ++ stat_data->mode = p->mode ? p->mode : 0644; + kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; +- debugfs_create_file(p->name, 0644, kvm->debugfs_dentry, ++ debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry, + stat_data, stat_fops_per_vm[p->kind]); + } + return 0; +@@ -3714,7 +3715,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, + if (!refcount_inc_not_zero(&stat_data->kvm->users_count)) + return -ENOENT; + +- if (simple_attr_open(inode, file, get, set, fmt)) { ++ if (simple_attr_open(inode, file, get, ++ stat_data->mode & S_IWUGO ? set : NULL, ++ fmt)) { + kvm_put_kvm(stat_data->kvm); + return -ENOMEM; + } +@@ -3962,7 +3965,8 @@ static void kvm_init_debug(void) + + kvm_debugfs_num_entries = 0; + for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { +- debugfs_create_file(p->name, 0644, kvm_debugfs_dir, ++ int mode = p->mode ? p->mode : 0644; ++ debugfs_create_file(p->name, mode, kvm_debugfs_dir, + (void *)(long)p->offset, + stat_fops[p->kind]); + } diff --git a/debian/patches/bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch b/debian/patches/bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch new file mode 100644 index 000000000..0ff74e465 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch @@ -0,0 +1,280 @@ +From: Pawan Gupta +Date: Fri, 11 Oct 2019 12:40:12 +0200 +Subject: x86: Add ITLB_MULTIHIT bug infrastructure + +Some processors may incur a machine check error possibly +resulting in an unrecoverable cpu hang when an instruction fetch +encounters a TLB multi-hit in the instruction TLB. This can occur +when the page size is changed along with either the physical +address or cache type [1]. + +This issue affects both bare-metal x86 page tables and EPT. + +This can be mitigated by either eliminating the use of large +pages or by using careful TLB invalidations when changing the +page size in the page tables. + +Just like Spectre, Meltdown, L1TF and MDS, a new bit has been +allocated in MSR_IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) and will +be set on CPUs which are mitigated against this issue. + +[1] For example please refer to erratum SKL002 in "6th Generation +Intel Processor Family Specification Update" +https://www.intel.com/content/www/us/en/products/docs/processors/core/desktop-6th-gen-core-family-spec-update.html +https://www.google.com/search?q=site:intel.com+SKL002 + +There are a lot of other affected processors outside of Skylake and +that the erratum(referred above) does not fully disclose the issue +and the impact, both on Skylake and across all the affected CPUs. + +Signed-off-by: Vineela Tummalapalli +Co-developed-by: Pawan Gupta +Signed-off-by: Pawan Gupta +Signed-off-by: Paolo Bonzini +[bwh: Backported to 4.19: + - No support for X86_VENDOR_HYGON, ATOM_AIRMONT_NP + - Adjust context] +Signed-off-by: Ben Hutchings +--- + .../ABI/testing/sysfs-devices-system-cpu | 1 + + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/msr-index.h | 7 ++ + arch/x86/kernel/cpu/bugs.c | 13 ++++ + arch/x86/kernel/cpu/common.c | 67 ++++++++++--------- + drivers/base/cpu.c | 8 +++ + include/linux/cpu.h | 2 + + 7 files changed, 68 insertions(+), 31 deletions(-) + +diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu +index 8718d4ad227b..a0edcdc7c0b8 100644 +--- a/Documentation/ABI/testing/sysfs-devices-system-cpu ++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu +@@ -478,6 +478,7 @@ What: /sys/devices/system/cpu/vulnerabilities + /sys/devices/system/cpu/vulnerabilities/spec_store_bypass + /sys/devices/system/cpu/vulnerabilities/l1tf + /sys/devices/system/cpu/vulnerabilities/mds ++ /sys/devices/system/cpu/vulnerabilities/itlb_multihit + Date: January 2018 + Contact: Linux kernel mailing list + Description: Information about CPU vulnerabilities +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 759f0a176612..ccad4f183400 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -389,5 +389,6 @@ + #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ + #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ + #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ ++#define X86_BUG_ITLB_MULTIHIT X86_BUG(22) /* CPU may incur MCE during certain page attribute changes */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index a1d22e4428f6..f58e6921cbf7 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -84,6 +84,13 @@ + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ ++#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /* ++ * The processor is not susceptible to a ++ * machine check error due to modifying the ++ * code page size along with either the ++ * physical address or cache type ++ * without TLB invalidation. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index ee7d17611ead..60e47e492c2f 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1281,6 +1281,11 @@ static ssize_t l1tf_show_state(char *buf) + } + #endif + ++static ssize_t itlb_multihit_show_state(char *buf) ++{ ++ return sprintf(buf, "Processor vulnerable\n"); ++} ++ + static ssize_t mds_show_state(char *buf) + { + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { +@@ -1366,6 +1371,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr + case X86_BUG_MDS: + return mds_show_state(buf); + ++ case X86_BUG_ITLB_MULTIHIT: ++ return itlb_multihit_show_state(buf); ++ + default: + break; + } +@@ -1402,4 +1410,9 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu + { + return cpu_show_common(dev, attr, buf, X86_BUG_MDS); + } ++ ++ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); ++} + #endif +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index b33fdfa0ff49..128808dccd2f 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -946,13 +946,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) + #endif + } + +-#define NO_SPECULATION BIT(0) +-#define NO_MELTDOWN BIT(1) +-#define NO_SSB BIT(2) +-#define NO_L1TF BIT(3) +-#define NO_MDS BIT(4) +-#define MSBDS_ONLY BIT(5) +-#define NO_SWAPGS BIT(6) ++#define NO_SPECULATION BIT(0) ++#define NO_MELTDOWN BIT(1) ++#define NO_SSB BIT(2) ++#define NO_L1TF BIT(3) ++#define NO_MDS BIT(4) ++#define MSBDS_ONLY BIT(5) ++#define NO_SWAPGS BIT(6) ++#define NO_ITLB_MULTIHIT BIT(7) + + #define VULNWL(_vendor, _family, _model, _whitelist) \ + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } +@@ -970,26 +971,26 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), + + /* Intel Family 6 */ +- VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), +- VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), +- VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), +- VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), +- VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), +- +- VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), +- VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), +- VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), +- VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), +- VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), +- VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), ++ VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ ++ VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + + VULNWL_INTEL(CORE_YONAH, NO_SSB), + +- VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS), ++ VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + +- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS), +- VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS), +- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS), ++ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + + /* + * Technically, swapgs isn't serializing on AMD (despite it previously +@@ -1000,13 +1001,13 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + */ + + /* AMD Family 0xf - 0x12 */ +- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), +- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), +- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), +- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), ++ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ +- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), ++ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + {} + }; + +@@ -1021,15 +1022,19 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + { + u64 ia32_cap = 0; + ++ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) ++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); ++ ++ /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ ++ if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) ++ setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); ++ + if (cpu_matches(NO_SPECULATION)) + return; + + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + +- if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) +- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); +- + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && + !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); +diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c +index 2fd6ca1021c2..c21e2aec5cbb 100644 +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -552,12 +552,19 @@ ssize_t __weak cpu_show_mds(struct device *dev, + return sprintf(buf, "Not affected\n"); + } + ++ssize_t __weak cpu_show_itlb_multihit(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "Not affected\n"); ++} ++ + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); + static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); + static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); + static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); + static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); + static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); ++static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); + + static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, +@@ -566,6 +573,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_spec_store_bypass.attr, + &dev_attr_l1tf.attr, + &dev_attr_mds.attr, ++ &dev_attr_itlb_multihit.attr, + NULL + }; + +diff --git a/include/linux/cpu.h b/include/linux/cpu.h +index 006f69f9277b..7bb824b0f30e 100644 +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -59,6 +59,8 @@ extern ssize_t cpu_show_l1tf(struct device *dev, + struct device_attribute *attr, char *buf); + extern ssize_t cpu_show_mds(struct device *dev, + struct device_attribute *attr, char *buf); ++extern ssize_t cpu_show_itlb_multihit(struct device *dev, ++ struct device_attribute *attr, char *buf); + + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/debian/patches/bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch b/debian/patches/bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch new file mode 100644 index 000000000..62959cf6c --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch @@ -0,0 +1,464 @@ +From: Paolo Bonzini +Date: Fri, 11 Oct 2019 12:40:14 +0200 +Subject: kvm: mmu: ITLB_MULTIHIT mitigation + +With some Intel processors, putting the same virtual address in the TLB +as both a 4 KiB and 2 MiB page can confuse the instruction fetch unit +and cause the processor to issue a machine check. Unfortunately if EPT +page tables use huge pages, it possible for a malicious guest to cause +this situation. + +This patch adds a knob to mark huge pages as non-executable. When the +nx_huge_pages parameter is enabled (and we are using EPT), all huge pages +are marked as NX. If the guest attempts to execute in one of those pages, +the page is broken down into 4K pages, which are then marked executable. + +This is not an issue for shadow paging (except nested EPT), because then +the host is in control of TLB flushes and the problematic situation cannot +happen. With nested EPT, again the nested guest can cause problems so we +treat shadow and direct EPT the same. + +Signed-off-by: Junaid Shahid +Signed-off-by: Paolo Bonzini +[bwh: Backported to 4.19: + - Use kvm_mmu_invalidate_zap_all_pages() instead of kvm_mmu_zap_all_fast() + - Adjust context] +Signed-off-by: Ben Hutchings +--- + .../admin-guide/kernel-parameters.txt | 11 ++ + arch/x86/include/asm/kvm_host.h | 2 + + arch/x86/kernel/cpu/bugs.c | 13 +- + arch/x86/kvm/mmu.c | 135 +++++++++++++++++- + arch/x86/kvm/paging_tmpl.h | 29 +++- + arch/x86/kvm/x86.c | 1 + + 6 files changed, 178 insertions(+), 13 deletions(-) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 16607b178b47..b2c1a5c63ab3 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -1956,6 +1956,17 @@ + KVM MMU at runtime. + Default is 0 (off) + ++ kvm.nx_huge_pages= ++ [KVM] Controls the sw workaround for bug ++ X86_BUG_ITLB_MULTIHIT. ++ force : Always deploy workaround. ++ off : Default. Never deploy workaround. ++ auto : Deploy workaround based on presence of ++ X86_BUG_ITLB_MULTIHIT. ++ ++ If the sw workaround is enabled for the host, guests ++ need not enable it for nested guests. ++ + kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM. + Default is 1 (enabled) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 90dccb5c79d9..59b44445ed59 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -282,6 +282,7 @@ struct kvm_mmu_page { + struct list_head link; + struct hlist_node hash_link; + bool unsync; ++ bool lpage_disallowed; /* Can't be replaced by an equiv large page */ + + /* + * The following two entries are used to key the shadow page in the +@@ -890,6 +891,7 @@ struct kvm_vm_stat { + ulong mmu_unsync; + ulong remote_tlb_flush; + ulong lpages; ++ ulong nx_lpage_splits; + ulong max_mmu_page_hash_collisions; + }; + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 60e47e492c2f..1e764992fa64 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1119,6 +1119,9 @@ void x86_spec_ctrl_setup_ap(void) + x86_amd_ssb_disable(); + } + ++bool itlb_multihit_kvm_mitigation; ++EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation); ++ + #undef pr_fmt + #define pr_fmt(fmt) "L1TF: " fmt + +@@ -1274,17 +1277,25 @@ static ssize_t l1tf_show_state(char *buf) + l1tf_vmx_states[l1tf_vmx_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); + } ++ ++static ssize_t itlb_multihit_show_state(char *buf) ++{ ++ if (itlb_multihit_kvm_mitigation) ++ return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); ++ else ++ return sprintf(buf, "KVM: Vulnerable\n"); ++} + #else + static ssize_t l1tf_show_state(char *buf) + { + return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); + } +-#endif + + static ssize_t itlb_multihit_show_state(char *buf) + { + return sprintf(buf, "Processor vulnerable\n"); + } ++#endif + + static ssize_t mds_show_state(char *buf) + { +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 7f9be921df7c..19c3dc9b05cb 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -49,6 +49,20 @@ + #include + #include "trace.h" + ++extern bool itlb_multihit_kvm_mitigation; ++ ++static int __read_mostly nx_huge_pages = -1; ++ ++static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); ++ ++static struct kernel_param_ops nx_huge_pages_ops = { ++ .set = set_nx_huge_pages, ++ .get = param_get_bool, ++}; ++ ++module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644); ++__MODULE_PARM_TYPE(nx_huge_pages, "bool"); ++ + /* + * When setting this variable to true it enables Two-Dimensional-Paging + * where the hardware walks 2 page tables: +@@ -284,6 +298,11 @@ static inline bool spte_ad_enabled(u64 spte) + return !(spte & shadow_acc_track_value); + } + ++static bool is_nx_huge_page_enabled(void) ++{ ++ return READ_ONCE(nx_huge_pages); ++} ++ + static inline u64 spte_shadow_accessed_mask(u64 spte) + { + MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value); +@@ -1096,6 +1115,15 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) + kvm_mmu_gfn_disallow_lpage(slot, gfn); + } + ++static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) ++{ ++ if (sp->lpage_disallowed) ++ return; ++ ++ ++kvm->stat.nx_lpage_splits; ++ sp->lpage_disallowed = true; ++} ++ + static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) + { + struct kvm_memslots *slots; +@@ -1113,6 +1141,12 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) + kvm_mmu_gfn_allow_lpage(slot, gfn); + } + ++static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) ++{ ++ --kvm->stat.nx_lpage_splits; ++ sp->lpage_disallowed = false; ++} ++ + static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, + struct kvm_memory_slot *slot) + { +@@ -2665,6 +2699,9 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, + kvm_reload_remote_mmus(kvm); + } + ++ if (sp->lpage_disallowed) ++ unaccount_huge_nx_page(kvm, sp); ++ + sp->role.invalid = 1; + return ret; + } +@@ -2873,6 +2910,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, + if (!speculative) + spte |= spte_shadow_accessed_mask(spte); + ++ if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) && ++ is_nx_huge_page_enabled()) { ++ pte_access &= ~ACC_EXEC_MASK; ++ } ++ + if (pte_access & ACC_EXEC_MASK) + spte |= shadow_x_mask; + else +@@ -3091,9 +3133,32 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) + __direct_pte_prefetch(vcpu, sp, sptep); + } + ++static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, ++ gfn_t gfn, kvm_pfn_t *pfnp, int *levelp) ++{ ++ int level = *levelp; ++ u64 spte = *it.sptep; ++ ++ if (it.level == level && level > PT_PAGE_TABLE_LEVEL && ++ is_nx_huge_page_enabled() && ++ is_shadow_present_pte(spte) && ++ !is_large_pte(spte)) { ++ /* ++ * A small SPTE exists for this pfn, but FNAME(fetch) ++ * and __direct_map would like to create a large PTE ++ * instead: just force them to go down another level, ++ * patching back for them into pfn the next 9 bits of ++ * the address. ++ */ ++ u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1); ++ *pfnp |= gfn & page_mask; ++ (*levelp)--; ++ } ++} ++ + static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, + int map_writable, int level, kvm_pfn_t pfn, +- bool prefault) ++ bool prefault, bool lpage_disallowed) + { + struct kvm_shadow_walk_iterator it; + struct kvm_mmu_page *sp; +@@ -3106,6 +3171,12 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, + + trace_kvm_mmu_spte_requested(gpa, level, pfn); + for_each_shadow_entry(vcpu, gpa, it) { ++ /* ++ * We cannot overwrite existing page tables with an NX ++ * large page, as the leaf could be executable. ++ */ ++ disallowed_hugepage_adjust(it, gfn, &pfn, &level); ++ + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + if (it.level == level) + break; +@@ -3116,6 +3187,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, + it.level - 1, true, ACC_ALL); + + link_shadow_page(vcpu, it.sptep, sp); ++ if (lpage_disallowed) ++ account_huge_nx_page(vcpu->kvm, sp); + } + } + +@@ -3416,11 +3489,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, + { + int r; + int level; +- bool force_pt_level = false; ++ bool force_pt_level; + kvm_pfn_t pfn; + unsigned long mmu_seq; + bool map_writable, write = error_code & PFERR_WRITE_MASK; ++ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && ++ is_nx_huge_page_enabled(); + ++ force_pt_level = lpage_disallowed; + level = mapping_level(vcpu, gfn, &force_pt_level); + if (likely(!force_pt_level)) { + /* +@@ -3454,7 +3530,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, + goto out_unlock; + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); +- r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); ++ r = __direct_map(vcpu, v, write, map_writable, level, pfn, ++ prefault, false); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +@@ -4048,6 +4125,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, + unsigned long mmu_seq; + int write = error_code & PFERR_WRITE_MASK; + bool map_writable; ++ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && ++ is_nx_huge_page_enabled(); + + MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); + +@@ -4058,8 +4137,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, + if (r) + return r; + +- force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn, +- PT_DIRECTORY_LEVEL); ++ force_pt_level = ++ lpage_disallowed || ++ !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL); + level = mapping_level(vcpu, gfn, &force_pt_level); + if (likely(!force_pt_level)) { + if (level > PT_DIRECTORY_LEVEL && +@@ -4088,7 +4168,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, + goto out_unlock; + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); +- r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); ++ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, ++ prefault, lpage_disallowed); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +@@ -5886,10 +5967,52 @@ static void mmu_destroy_caches(void) + kmem_cache_destroy(mmu_page_header_cache); + } + ++static void __set_nx_huge_pages(bool val) ++{ ++ nx_huge_pages = itlb_multihit_kvm_mitigation = val; ++} ++ ++static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) ++{ ++ bool old_val = nx_huge_pages; ++ bool new_val; ++ ++ /* In "auto" mode deploy workaround only if CPU has the bug. */ ++ if (sysfs_streq(val, "off")) ++ new_val = 0; ++ else if (sysfs_streq(val, "force")) ++ new_val = 1; ++ else if (sysfs_streq(val, "auto")) ++ new_val = boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT); ++ else if (strtobool(val, &new_val) < 0) ++ return -EINVAL; ++ ++ __set_nx_huge_pages(new_val); ++ ++ if (new_val != old_val) { ++ struct kvm *kvm; ++ int idx; ++ ++ mutex_lock(&kvm_lock); ++ ++ list_for_each_entry(kvm, &vm_list, vm_list) { ++ idx = srcu_read_lock(&kvm->srcu); ++ kvm_mmu_invalidate_zap_all_pages(kvm); ++ srcu_read_unlock(&kvm->srcu, idx); ++ } ++ mutex_unlock(&kvm_lock); ++ } ++ ++ return 0; ++} ++ + int kvm_mmu_module_init(void) + { + int ret = -ENOMEM; + ++ if (nx_huge_pages == -1) ++ __set_nx_huge_pages(boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT)); ++ + kvm_mmu_reset_all_pte_masks(); + + pte_list_desc_cache = kmem_cache_create("pte_list_desc", +diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h +index 3b022b08b577..adf42dc8d38b 100644 +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -596,13 +596,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, + static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, + struct guest_walker *gw, + int write_fault, int hlevel, +- kvm_pfn_t pfn, bool map_writable, bool prefault) ++ kvm_pfn_t pfn, bool map_writable, bool prefault, ++ bool lpage_disallowed) + { + struct kvm_mmu_page *sp = NULL; + struct kvm_shadow_walk_iterator it; + unsigned direct_access, access = gw->pt_access; + int top_level, ret; +- gfn_t base_gfn; ++ gfn_t gfn, base_gfn; + + direct_access = gw->pte_access; + +@@ -647,13 +648,25 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, + link_shadow_page(vcpu, it.sptep, sp); + } + +- base_gfn = gw->gfn; ++ /* ++ * FNAME(page_fault) might have clobbered the bottom bits of ++ * gw->gfn, restore them from the virtual address. ++ */ ++ gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT); ++ base_gfn = gfn; + + trace_kvm_mmu_spte_requested(addr, gw->level, pfn); + + for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { + clear_sp_write_flooding_count(it.sptep); +- base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); ++ ++ /* ++ * We cannot overwrite existing page tables with an NX ++ * large page, as the leaf could be executable. ++ */ ++ disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel); ++ ++ base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + if (it.level == hlevel) + break; + +@@ -665,6 +678,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, + sp = kvm_mmu_get_page(vcpu, base_gfn, addr, + it.level - 1, true, direct_access); + link_shadow_page(vcpu, it.sptep, sp); ++ if (lpage_disallowed) ++ account_huge_nx_page(vcpu->kvm, sp); + } + } + +@@ -741,9 +756,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, + int r; + kvm_pfn_t pfn; + int level = PT_PAGE_TABLE_LEVEL; +- bool force_pt_level = false; + unsigned long mmu_seq; + bool map_writable, is_self_change_mapping; ++ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && ++ is_nx_huge_page_enabled(); ++ bool force_pt_level = lpage_disallowed; + + pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); + +@@ -833,7 +850,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, + if (!force_pt_level) + transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); + r = FNAME(fetch)(vcpu, addr, &walker, write_fault, +- level, pfn, map_writable, prefault); ++ level, pfn, map_writable, prefault, lpage_disallowed); + kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); + + out_unlock: +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 2714c1a0e59f..406a37aa61c7 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -206,6 +206,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { + { "mmu_unsync", VM_STAT(mmu_unsync) }, + { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, + { "largepages", VM_STAT(lpages, .mode = 0444) }, ++ { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) }, + { "max_mmu_page_hash_collisions", + VM_STAT(max_mmu_page_hash_collisions) }, + { NULL } diff --git a/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker-threa.patch b/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker-threa.patch new file mode 100644 index 000000000..81acc63f2 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker-threa.patch @@ -0,0 +1,131 @@ +From: Junaid Shahid +Date: Fri, 11 Oct 2019 12:40:15 +0200 +Subject: kvm: Add helper function for creating VM worker threads + +This adds a function to create a kernel thread associated with a given +VM. In particular, it ensures that the worker thread inherits the +priority and cgroups of the calling thread. + +Signed-off-by: Junaid Shahid +Signed-off-by: Paolo Bonzini +[bwh: Backported to 4.19: adjust context] +Signed-off-by: Ben Hutchings +--- + include/linux/kvm_host.h | 6 +++ + virt/kvm/kvm_main.c | 84 ++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 90 insertions(+) + +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 553a3115a735..96207939d862 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -1305,4 +1305,10 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) + } + #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ + ++typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data); ++ ++int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, ++ uintptr_t data, const char *name, ++ struct task_struct **thread_ptr); ++ + #endif +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 5482949b452c..77da54d334b2 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -51,6 +51,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -4142,3 +4143,86 @@ void kvm_exit(void) + kvm_vfio_ops_exit(); + } + EXPORT_SYMBOL_GPL(kvm_exit); ++ ++struct kvm_vm_worker_thread_context { ++ struct kvm *kvm; ++ struct task_struct *parent; ++ struct completion init_done; ++ kvm_vm_thread_fn_t thread_fn; ++ uintptr_t data; ++ int err; ++}; ++ ++static int kvm_vm_worker_thread(void *context) ++{ ++ /* ++ * The init_context is allocated on the stack of the parent thread, so ++ * we have to locally copy anything that is needed beyond initialization ++ */ ++ struct kvm_vm_worker_thread_context *init_context = context; ++ struct kvm *kvm = init_context->kvm; ++ kvm_vm_thread_fn_t thread_fn = init_context->thread_fn; ++ uintptr_t data = init_context->data; ++ int err; ++ ++ err = kthread_park(current); ++ /* kthread_park(current) is never supposed to return an error */ ++ WARN_ON(err != 0); ++ if (err) ++ goto init_complete; ++ ++ err = cgroup_attach_task_all(init_context->parent, current); ++ if (err) { ++ kvm_err("%s: cgroup_attach_task_all failed with err %d\n", ++ __func__, err); ++ goto init_complete; ++ } ++ ++ set_user_nice(current, task_nice(init_context->parent)); ++ ++init_complete: ++ init_context->err = err; ++ complete(&init_context->init_done); ++ init_context = NULL; ++ ++ if (err) ++ return err; ++ ++ /* Wait to be woken up by the spawner before proceeding. */ ++ kthread_parkme(); ++ ++ if (!kthread_should_stop()) ++ err = thread_fn(kvm, data); ++ ++ return err; ++} ++ ++int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, ++ uintptr_t data, const char *name, ++ struct task_struct **thread_ptr) ++{ ++ struct kvm_vm_worker_thread_context init_context = {}; ++ struct task_struct *thread; ++ ++ *thread_ptr = NULL; ++ init_context.kvm = kvm; ++ init_context.parent = current; ++ init_context.thread_fn = thread_fn; ++ init_context.data = data; ++ init_completion(&init_context.init_done); ++ ++ thread = kthread_run(kvm_vm_worker_thread, &init_context, ++ "%s-%d", name, task_pid_nr(current)); ++ if (IS_ERR(thread)) ++ return PTR_ERR(thread); ++ ++ /* kthread_run is never supposed to return NULL */ ++ WARN_ON(thread == NULL); ++ ++ wait_for_completion(&init_context.init_done); ++ ++ if (!init_context.err) ++ *thread_ptr = thread; ++ ++ return init_context.err; ++} diff --git a/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch b/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch new file mode 100644 index 000000000..dd448bbf7 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch @@ -0,0 +1,368 @@ +From: Junaid Shahid +Date: Fri, 11 Oct 2019 12:40:16 +0200 +Subject: kvm: x86: mmu: Recovery of shattered NX large pages + +The page table pages corresponding to broken down large pages are +zapped in FIFO order, so that the large page can potentially +be recovered, if it is no longer being used for execution. This removes +the performance penalty for walking deeper EPT page tables. + +By default, one large page will last about one hour once the guest +reaches a steady state. + +Signed-off-by: Junaid Shahid +Signed-off-by: Paolo Bonzini +[bwh: Backported to 4.19: adjust context] +Signed-off-by: Ben Hutchings +--- + .../admin-guide/kernel-parameters.txt | 6 + + arch/x86/include/asm/kvm_host.h | 5 + + arch/x86/kvm/mmu.c | 129 ++++++++++++++++++ + arch/x86/kvm/mmu.h | 4 + + arch/x86/kvm/x86.c | 11 ++ + virt/kvm/kvm_main.c | 30 +++- + 6 files changed, 184 insertions(+), 1 deletion(-) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index b2c1a5c63ab3..efdc471ed0b9 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -1967,6 +1967,12 @@ + If the sw workaround is enabled for the host, guests + need not enable it for nested guests. + ++ kvm.nx_huge_pages_recovery_ratio= ++ [KVM] Controls how many 4KiB pages are periodically zapped ++ back to huge pages. 0 disables the recovery, otherwise if ++ the value is N KVM will zap 1/Nth of the 4KiB pages every ++ minute. The default is 60. ++ + kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM. + Default is 1 (enabled) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 59b44445ed59..efe3ba61fc23 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -281,6 +281,8 @@ struct kvm_rmap_head { + struct kvm_mmu_page { + struct list_head link; + struct hlist_node hash_link; ++ struct list_head lpage_disallowed_link; ++ + bool unsync; + bool lpage_disallowed; /* Can't be replaced by an equiv large page */ + +@@ -808,6 +810,7 @@ struct kvm_arch { + */ + struct list_head active_mmu_pages; + struct list_head zapped_obsolete_pages; ++ struct list_head lpage_disallowed_mmu_pages; + struct kvm_page_track_notifier_node mmu_sp_tracker; + struct kvm_page_track_notifier_head track_notifier_head; + +@@ -878,6 +881,8 @@ struct kvm_arch { + bool x2apic_broadcast_quirk_disabled; + + bool guest_can_read_msr_platform_info; ++ ++ struct task_struct *nx_lpage_recovery_thread; + }; + + struct kvm_vm_stat { +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 19c3dc9b05cb..bafb9001ce94 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -52,16 +53,26 @@ + extern bool itlb_multihit_kvm_mitigation; + + static int __read_mostly nx_huge_pages = -1; ++static uint __read_mostly nx_huge_pages_recovery_ratio = 60; + + static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); ++static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp); + + static struct kernel_param_ops nx_huge_pages_ops = { + .set = set_nx_huge_pages, + .get = param_get_bool, + }; + ++static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = { ++ .set = set_nx_huge_pages_recovery_ratio, ++ .get = param_get_uint, ++}; ++ + module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644); + __MODULE_PARM_TYPE(nx_huge_pages, "bool"); ++module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops, ++ &nx_huge_pages_recovery_ratio, 0644); ++__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint"); + + /* + * When setting this variable to true it enables Two-Dimensional-Paging +@@ -1121,6 +1132,8 @@ static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) + return; + + ++kvm->stat.nx_lpage_splits; ++ list_add_tail(&sp->lpage_disallowed_link, ++ &kvm->arch.lpage_disallowed_mmu_pages); + sp->lpage_disallowed = true; + } + +@@ -1145,6 +1158,7 @@ static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) + { + --kvm->stat.nx_lpage_splits; + sp->lpage_disallowed = false; ++ list_del(&sp->lpage_disallowed_link); + } + + static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, +@@ -5999,6 +6013,8 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) + idx = srcu_read_lock(&kvm->srcu); + kvm_mmu_invalidate_zap_all_pages(kvm); + srcu_read_unlock(&kvm->srcu, idx); ++ ++ wake_up_process(kvm->arch.nx_lpage_recovery_thread); + } + mutex_unlock(&kvm_lock); + } +@@ -6079,3 +6095,116 @@ void kvm_mmu_module_exit(void) + unregister_shrinker(&mmu_shrinker); + mmu_audit_disable(); + } ++ ++static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp) ++{ ++ unsigned int old_val; ++ int err; ++ ++ old_val = nx_huge_pages_recovery_ratio; ++ err = param_set_uint(val, kp); ++ if (err) ++ return err; ++ ++ if (READ_ONCE(nx_huge_pages) && ++ !old_val && nx_huge_pages_recovery_ratio) { ++ struct kvm *kvm; ++ ++ mutex_lock(&kvm_lock); ++ ++ list_for_each_entry(kvm, &vm_list, vm_list) ++ wake_up_process(kvm->arch.nx_lpage_recovery_thread); ++ ++ mutex_unlock(&kvm_lock); ++ } ++ ++ return err; ++} ++ ++static void kvm_recover_nx_lpages(struct kvm *kvm) ++{ ++ int rcu_idx; ++ struct kvm_mmu_page *sp; ++ unsigned int ratio; ++ LIST_HEAD(invalid_list); ++ ulong to_zap; ++ ++ rcu_idx = srcu_read_lock(&kvm->srcu); ++ spin_lock(&kvm->mmu_lock); ++ ++ ratio = READ_ONCE(nx_huge_pages_recovery_ratio); ++ to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0; ++ while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) { ++ /* ++ * We use a separate list instead of just using active_mmu_pages ++ * because the number of lpage_disallowed pages is expected to ++ * be relatively small compared to the total. ++ */ ++ sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages, ++ struct kvm_mmu_page, ++ lpage_disallowed_link); ++ WARN_ON_ONCE(!sp->lpage_disallowed); ++ kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); ++ WARN_ON_ONCE(sp->lpage_disallowed); ++ ++ if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) { ++ kvm_mmu_commit_zap_page(kvm, &invalid_list); ++ if (to_zap) ++ cond_resched_lock(&kvm->mmu_lock); ++ } ++ } ++ ++ spin_unlock(&kvm->mmu_lock); ++ srcu_read_unlock(&kvm->srcu, rcu_idx); ++} ++ ++static long get_nx_lpage_recovery_timeout(u64 start_time) ++{ ++ return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio) ++ ? start_time + 60 * HZ - get_jiffies_64() ++ : MAX_SCHEDULE_TIMEOUT; ++} ++ ++static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data) ++{ ++ u64 start_time; ++ long remaining_time; ++ ++ while (true) { ++ start_time = get_jiffies_64(); ++ remaining_time = get_nx_lpage_recovery_timeout(start_time); ++ ++ set_current_state(TASK_INTERRUPTIBLE); ++ while (!kthread_should_stop() && remaining_time > 0) { ++ schedule_timeout(remaining_time); ++ remaining_time = get_nx_lpage_recovery_timeout(start_time); ++ set_current_state(TASK_INTERRUPTIBLE); ++ } ++ ++ set_current_state(TASK_RUNNING); ++ ++ if (kthread_should_stop()) ++ return 0; ++ ++ kvm_recover_nx_lpages(kvm); ++ } ++} ++ ++int kvm_mmu_post_init_vm(struct kvm *kvm) ++{ ++ int err; ++ ++ err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0, ++ "kvm-nx-lpage-recovery", ++ &kvm->arch.nx_lpage_recovery_thread); ++ if (!err) ++ kthread_unpark(kvm->arch.nx_lpage_recovery_thread); ++ ++ return err; ++} ++ ++void kvm_mmu_pre_destroy_vm(struct kvm *kvm) ++{ ++ if (kvm->arch.nx_lpage_recovery_thread) ++ kthread_stop(kvm->arch.nx_lpage_recovery_thread); ++} +diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h +index 65892288bf51..f7b2de7b6382 100644 +--- a/arch/x86/kvm/mmu.h ++++ b/arch/x86/kvm/mmu.h +@@ -216,4 +216,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); + bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, + struct kvm_memory_slot *slot, u64 gfn); + int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); ++ ++int kvm_mmu_post_init_vm(struct kvm *kvm); ++void kvm_mmu_pre_destroy_vm(struct kvm *kvm); ++ + #endif +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 406a37aa61c7..1ecadf51f154 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -8950,6 +8950,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) + INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); + INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); ++ INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); + atomic_set(&kvm->arch.noncoherent_dma_count, 0); + +@@ -8981,6 +8982,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) + return 0; + } + ++int kvm_arch_post_init_vm(struct kvm *kvm) ++{ ++ return kvm_mmu_post_init_vm(kvm); ++} ++ + static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) + { + vcpu_load(vcpu); +@@ -9082,6 +9088,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) + } + EXPORT_SYMBOL_GPL(x86_set_memory_region); + ++void kvm_arch_pre_destroy_vm(struct kvm *kvm) ++{ ++ kvm_mmu_pre_destroy_vm(kvm); ++} ++ + void kvm_arch_destroy_vm(struct kvm *kvm) + { + if (current->mm == kvm->mm) { +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 77da54d334b2..7a0d86d52230 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -625,6 +625,23 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) + return 0; + } + ++/* ++ * Called after the VM is otherwise initialized, but just before adding it to ++ * the vm_list. ++ */ ++int __weak kvm_arch_post_init_vm(struct kvm *kvm) ++{ ++ return 0; ++} ++ ++/* ++ * Called just after removing the VM from the vm_list, but before doing any ++ * other destruction. ++ */ ++void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm) ++{ ++} ++ + static struct kvm *kvm_create_vm(unsigned long type) + { + int r, i; +@@ -679,10 +696,14 @@ static struct kvm *kvm_create_vm(unsigned long type) + rcu_assign_pointer(kvm->buses[i], + kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); + if (!kvm->buses[i]) +- goto out_err; ++ goto out_err_no_mmu_notifier; + } + + r = kvm_init_mmu_notifier(kvm); ++ if (r) ++ goto out_err_no_mmu_notifier; ++ ++ r = kvm_arch_post_init_vm(kvm); + if (r) + goto out_err; + +@@ -695,6 +716,11 @@ static struct kvm *kvm_create_vm(unsigned long type) + return kvm; + + out_err: ++#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) ++ if (kvm->mmu_notifier.ops) ++ mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); ++#endif ++out_err_no_mmu_notifier: + cleanup_srcu_struct(&kvm->irq_srcu); + out_err_no_irq_srcu: + cleanup_srcu_struct(&kvm->srcu); +@@ -737,6 +763,8 @@ static void kvm_destroy_vm(struct kvm *kvm) + mutex_lock(&kvm_lock); + list_del(&kvm->vm_list); + mutex_unlock(&kvm_lock); ++ kvm_arch_pre_destroy_vm(kvm); ++ + kvm_free_irq_routing(kvm); + for (i = 0; i < KVM_NR_BUSES; i++) { + struct kvm_io_bus *bus = kvm_get_bus(kvm, i); diff --git a/debian/patches/series b/debian/patches/series index 891d589ca..6dc480d6d 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -258,6 +258,18 @@ bugfix/all/ALSA-usb-audio-Fix-a-stack-buffer-overflow-bug-in-check_input_term.pa bugfix/all/vhost-make-sure-log_num-in_num.patch bugfix/x86/x86-ptrace-fix-up-botched-merge-of-spectrev1-fix.patch bugfix/all/KVM-coalesced_mmio-add-bounds-checking.patch +bugfix/x86//itlb_multihit/0001-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch +bugfix/x86//itlb_multihit/0002-kvm-Convert-kvm_lock-to-a-mutex.patch +bugfix/x86//itlb_multihit/0003-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch +bugfix/x86//itlb_multihit/0004-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch +bugfix/x86//itlb_multihit/0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch +bugfix/x86//itlb_multihit/0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch +bugfix/x86//itlb_multihit/0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch +bugfix/x86//itlb_multihit/0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch +bugfix/x86//itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch +bugfix/x86//itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch +bugfix/x86//itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker-threa.patch +bugfix/x86//itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch # ABI maintenance debian/abi/powerpc-avoid-abi-change-for-disabling-tm.patch From 96c0e74c50744d44162d02fd8966f03302e95c3f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 20 Oct 2019 14:46:03 +0100 Subject: [PATCH 03/13] [x86] Add mitigation for TSX Asynchronous Abort (CVE-2019-11135) This is a backport of v6 of the TAA patch set, and will probably require updates before release. The subject lines for these patches didn't come through. --- debian/changelog | 3 + .../patches/bugfix/x86/taa/0013-TAAv6-1.patch | 67 +++ .../patches/bugfix/x86/taa/0014-TAAv6-2.patch | 45 ++ .../patches/bugfix/x86/taa/0015-TAAv6-3.patch | 245 +++++++++++ .../patches/bugfix/x86/taa/0016-TAAv6-4.patch | 336 ++++++++++++++ .../patches/bugfix/x86/taa/0017-TAAv6-5.patch | 119 +++++ .../patches/bugfix/x86/taa/0018-TAAv6-6.patch | 57 +++ .../patches/bugfix/x86/taa/0019-TAAv6-7.patch | 51 +++ .../patches/bugfix/x86/taa/0020-TAAv6-8.patch | 411 ++++++++++++++++++ .../patches/bugfix/x86/taa/0021-TAAv6-9.patch | 385 ++++++++++++++++ debian/patches/series | 9 + 11 files changed, 1728 insertions(+) create mode 100644 debian/patches/bugfix/x86/taa/0013-TAAv6-1.patch create mode 100644 debian/patches/bugfix/x86/taa/0014-TAAv6-2.patch create mode 100644 debian/patches/bugfix/x86/taa/0015-TAAv6-3.patch create mode 100644 debian/patches/bugfix/x86/taa/0016-TAAv6-4.patch create mode 100644 debian/patches/bugfix/x86/taa/0017-TAAv6-5.patch create mode 100644 debian/patches/bugfix/x86/taa/0018-TAAv6-6.patch create mode 100644 debian/patches/bugfix/x86/taa/0019-TAAv6-7.patch create mode 100644 debian/patches/bugfix/x86/taa/0020-TAAv6-8.patch create mode 100644 debian/patches/bugfix/x86/taa/0021-TAAv6-9.patch diff --git a/debian/changelog b/debian/changelog index 49e892720..bbbbbfc5c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -14,6 +14,9 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium - kvm: mmu: ITLB_MULTIHIT mitigation - kvm: Add helper function for creating VM worker threads - kvm: x86: mmu: Recovery of shattered NX large pages + * [x86] Add mitigation for TSX Asynchronous Abort (CVE-2019-11135). + TSX is now disabled by default; see + Documentation/admin-guide/hw-vuln/tsx_async_abort.rst -- Ben Hutchings Sun, 20 Oct 2019 14:21:28 +0100 diff --git a/debian/patches/bugfix/x86/taa/0013-TAAv6-1.patch b/debian/patches/bugfix/x86/taa/0013-TAAv6-1.patch new file mode 100644 index 000000000..c05a9f5f5 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0013-TAAv6-1.patch @@ -0,0 +1,67 @@ +From: speck for Pawan Gupta +Date: Wed, 9 Oct 2019 16:22:56 -0700 +Subject: TAAv6 1 + +Transactional Synchronization Extensions (TSX) may be used on certain +processors as part of a speculative side channel attack. A microcode +update for existing processors that are vulnerable to this attack will +add a new MSR, IA32_TSX_CTRL to allow the system administrator the +option to disable TSX as one of the possible mitigations. [Note that +future processors that are not vulnerable will also support the +IA32_TSX_CTRL MSR]. Add defines for the new IA32_TSX_CTRL MSR and its +bits. + +TSX has two sub-features: + +1. Restricted Transactional Memory (RTM) is an explicitly-used feature + where new instructions begin and end TSX transactions. +2. Hardware Lock Elision (HLE) is implicitly used when certain kinds of + "old" style locks are used by software. + +Bit 7 of the IA32_ARCH_CAPABILITIES indicates the presence of the +IA32_TSX_CTRL MSR. + +There are two control bits in IA32_TSX_CTRL MSR: + + Bit 0: When set it disables the Restricted Transactional Memory (RTM) + sub-feature of TSX (will force all transactions to abort on the + XBEGIN instruction). + + Bit 1: When set it disables the enumeration of the RTM and HLE feature + (i.e. it will make CPUID(EAX=7).EBX{bit4} and + CPUID(EAX=7).EBX{bit11} read as 0). + +The other TSX sub-feature, Hardware Lock Elision (HLE), is unconditionally +disabled but still enumerated as present by CPUID(EAX=7).EBX{bit4}. + +Signed-off-by: Pawan Gupta +Reviewed-by: Mark Gross +Reviewed-by: Tony Luck +Tested-by: Neelima Krishnan +--- + arch/x86/include/asm/msr-index.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index f58e6921cbf7..f45ca8aad98f 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -91,6 +91,7 @@ + * physical address or cache type + * without TLB invalidation. + */ ++#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +@@ -101,6 +102,10 @@ + #define MSR_IA32_BBL_CR_CTL 0x00000119 + #define MSR_IA32_BBL_CR_CTL3 0x0000011e + ++#define MSR_IA32_TSX_CTRL 0x00000122 ++#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM fxeature */ ++#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ ++ + #define MSR_IA32_SYSENTER_CS 0x00000174 + #define MSR_IA32_SYSENTER_ESP 0x00000175 + #define MSR_IA32_SYSENTER_EIP 0x00000176 diff --git a/debian/patches/bugfix/x86/taa/0014-TAAv6-2.patch b/debian/patches/bugfix/x86/taa/0014-TAAv6-2.patch new file mode 100644 index 000000000..945d59b85 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0014-TAAv6-2.patch @@ -0,0 +1,45 @@ +From: speck for Pawan Gupta +Date: Wed, 9 Oct 2019 16:23:56 -0700 +Subject: TAAv6 2 + +Add a helper function to read IA32_ARCH_CAPABILITIES MSR. If the CPU +doesn't support this MSR return 0. + +Signed-off-by: Pawan Gupta +Reviewed-by: Mark Gross +Reviewed-by: Tony Luck +Tested-by: Neelima Krishnan +[bwh: Forward-ported on top of NX: Fix conflict (neighbouring changes) + in arch/x86/kernel/cpu/common.c] +Signed-off-by: Ben Hutchings +--- + arch/x86/kernel/cpu/common.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 128808dccd2f..cee109bd7f00 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1018,13 +1018,20 @@ static bool __init cpu_matches(unsigned long which) + return m && !!(m->driver_data & which); + } + +-static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) ++u64 x86_read_arch_cap_msr(void) + { + u64 ia32_cap = 0; + +- if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) ++ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + ++ return ia32_cap; ++} ++ ++static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) ++{ ++ u64 ia32_cap = x86_read_arch_cap_msr(); ++ + /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ + if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); diff --git a/debian/patches/bugfix/x86/taa/0015-TAAv6-3.patch b/debian/patches/bugfix/x86/taa/0015-TAAv6-3.patch new file mode 100644 index 000000000..2ec4fc340 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0015-TAAv6-3.patch @@ -0,0 +1,245 @@ +From: speck for Pawan Gupta +Date: Wed, 9 Oct 2019 16:24:56 -0700 +Subject: TAAv6 3 + +Add kernel cmdline parameter "tsx" to control the Transactional +Synchronization Extensions (TSX) feature. On CPUs that support TSX +control, use "tsx=on|off" to enable or disable TSX. Not specifying this +option is equivalent to "tsx=off". This is because on certain processors +TSX may be used as a part of a speculative side channel attack. + +Signed-off-by: Pawan Gupta +Reviewed-by: Mark Gross +Reviewed-by: Tony Luck +Tested-by: Neelima Krishnan +[bwh: Backported to 4.19: adjust context] +Signed-off-by: Ben Hutchings +--- + .../admin-guide/kernel-parameters.txt | 11 ++ + arch/x86/kernel/cpu/Makefile | 2 +- + arch/x86/kernel/cpu/common.c | 2 + + arch/x86/kernel/cpu/cpu.h | 18 +++ + arch/x86/kernel/cpu/intel.c | 5 + + arch/x86/kernel/cpu/tsx.c | 115 ++++++++++++++++++ + 6 files changed, 152 insertions(+), 1 deletion(-) + create mode 100644 arch/x86/kernel/cpu/tsx.c + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index efdc471ed0b9..f03756d2addb 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4707,6 +4707,17 @@ + marks the TSC unconditionally unstable at bootup and + avoids any further wobbles once the TSC watchdog notices. + ++ tsx= [X86] Control Transactional Synchronization ++ Extensions (TSX) feature in Intel processors that ++ support TSX control. ++ ++ This parameter controls the TSX feature. The options are: ++ ++ on - Enable TSX on the system. ++ off - Disable TSX on the system. ++ ++ Not specifying this option is equivalent to tsx=off. ++ + turbografx.map[2|3]= [HW,JOY] + TurboGraFX parallel port interface + Format: +diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile +index 347137e80bf5..320769b4807b 100644 +--- a/arch/x86/kernel/cpu/Makefile ++++ b/arch/x86/kernel/cpu/Makefile +@@ -28,7 +28,7 @@ obj-y += cpuid-deps.o + obj-$(CONFIG_PROC_FS) += proc.o + obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o + +-obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o ++obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o tsx.o + obj-$(CONFIG_CPU_SUP_AMD) += amd.o + obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o + obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index cee109bd7f00..5f89d78fe132 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1487,6 +1487,8 @@ void __init identify_boot_cpu(void) + enable_sep_cpu(); + #endif + cpu_detect_tlb(&boot_cpu_data); ++ ++ tsx_init(); + } + + void identify_secondary_cpu(struct cpuinfo_x86 *c) +diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h +index 7b229afa0a37..236582c90d3f 100644 +--- a/arch/x86/kernel/cpu/cpu.h ++++ b/arch/x86/kernel/cpu/cpu.h +@@ -45,6 +45,22 @@ struct _tlb_table { + extern const struct cpu_dev *const __x86_cpu_dev_start[], + *const __x86_cpu_dev_end[]; + ++#ifdef CONFIG_CPU_SUP_INTEL ++enum tsx_ctrl_states { ++ TSX_CTRL_ENABLE, ++ TSX_CTRL_DISABLE, ++ TSX_CTRL_NOT_SUPPORTED, ++}; ++ ++extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; ++ ++extern void __init tsx_init(void); ++extern void tsx_enable(void); ++extern void tsx_disable(void); ++#else ++static inline void tsx_init(void) { } ++#endif /* CONFIG_CPU_SUP_INTEL */ ++ + extern void get_cpu_cap(struct cpuinfo_x86 *c); + extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); + extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +@@ -65,4 +81,6 @@ unsigned int aperfmperf_get_khz(int cpu); + + extern void x86_spec_ctrl_setup_ap(void); + ++extern u64 x86_read_arch_cap_msr(void); ++ + #endif /* ARCH_X86_CPU_H */ +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index fc3c07fe7df5..a5287b18a63f 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -766,6 +766,11 @@ static void init_intel(struct cpuinfo_x86 *c) + init_intel_energy_perf(c); + + init_intel_misc_features(c); ++ ++ if (tsx_ctrl_state == TSX_CTRL_ENABLE) ++ tsx_enable(); ++ if (tsx_ctrl_state == TSX_CTRL_DISABLE) ++ tsx_disable(); + } + + #ifdef CONFIG_X86_32 +diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c +new file mode 100644 +index 000000000000..e39b33b7cef8 +--- /dev/null ++++ b/arch/x86/kernel/cpu/tsx.c +@@ -0,0 +1,115 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Intel Transactional Synchronization Extensions (TSX) control. ++ * ++ * Copyright (C) 2019 Intel Corporation ++ * ++ * Author: ++ * Pawan Gupta ++ */ ++ ++#include ++#include ++ ++#include ++ ++#include "cpu.h" ++ ++enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; ++ ++void tsx_disable(void) ++{ ++ u64 tsx; ++ ++ rdmsrl(MSR_IA32_TSX_CTRL, tsx); ++ ++ /* Force all transactions to immediately abort */ ++ tsx |= TSX_CTRL_RTM_DISABLE; ++ /* ++ * Ensure TSX support is not enumerated in CPUID. ++ * This is visible to userspace and will ensure they ++ * do not waste resources trying TSX transactions that ++ * will always abort. ++ */ ++ tsx |= TSX_CTRL_CPUID_CLEAR; ++ ++ wrmsrl(MSR_IA32_TSX_CTRL, tsx); ++} ++ ++void tsx_enable(void) ++{ ++ u64 tsx; ++ ++ rdmsrl(MSR_IA32_TSX_CTRL, tsx); ++ ++ /* Enable the RTM feature in the cpu */ ++ tsx &= ~TSX_CTRL_RTM_DISABLE; ++ /* ++ * Ensure TSX support is enumerated in CPUID. ++ * This is visible to userspace and will ensure they ++ * can enumerate and use the TSX feature. ++ */ ++ tsx &= ~TSX_CTRL_CPUID_CLEAR; ++ ++ wrmsrl(MSR_IA32_TSX_CTRL, tsx); ++} ++ ++static bool __init tsx_ctrl_is_supported(void) ++{ ++ u64 ia32_cap = x86_read_arch_cap_msr(); ++ ++ /* ++ * TSX is controlled via MSR_IA32_TSX_CTRL. However, ++ * support for this MSR is enumerated by ARCH_CAP_TSX_MSR bit ++ * in MSR_IA32_ARCH_CAPABILITIES. ++ */ ++ return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); ++} ++ ++void __init tsx_init(void) ++{ ++ char arg[20]; ++ int ret; ++ ++ if (!tsx_ctrl_is_supported()) ++ return; ++ ++ ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg)); ++ if (ret >= 0) { ++ if (!strcmp(arg, "on")) { ++ tsx_ctrl_state = TSX_CTRL_ENABLE; ++ } else if (!strcmp(arg, "off")) { ++ tsx_ctrl_state = TSX_CTRL_DISABLE; ++ } else { ++ tsx_ctrl_state = TSX_CTRL_DISABLE; ++ pr_info("tsx: invalid option, defaulting to off\n"); ++ } ++ } else { ++ /* tsx= not provided, defaulting to off */ ++ tsx_ctrl_state = TSX_CTRL_DISABLE; ++ } ++ ++ if (tsx_ctrl_state == TSX_CTRL_DISABLE) { ++ tsx_disable(); ++ /* ++ * tsx_disable() will change the state of the ++ * RTM CPUID bit. Clear it here since it is now ++ * expected to be not set. ++ */ ++ setup_clear_cpu_cap(X86_FEATURE_RTM); ++ } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { ++ /* ++ * HW defaults TSX to be enabled at bootup. ++ * We may still need the TSX enable support ++ * during init for special cases like ++ * kexec after TSX is disabled. ++ */ ++ tsx_enable(); ++ /* ++ * tsx_enable() will change the state of the ++ * RTM CPUID bit. Force it here since it is now ++ * expected to be set. ++ */ ++ setup_force_cpu_cap(X86_FEATURE_RTM); ++ } ++} diff --git a/debian/patches/bugfix/x86/taa/0016-TAAv6-4.patch b/debian/patches/bugfix/x86/taa/0016-TAAv6-4.patch new file mode 100644 index 000000000..fbd8a7e42 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0016-TAAv6-4.patch @@ -0,0 +1,336 @@ +From: speck for Pawan Gupta +Date: Wed, 9 Oct 2019 16:25:56 -0700 +Subject: TAAv6 4 + +TSX Async Abort (TAA) is a side channel vulnerability to the internal +buffers in some Intel processors similar to Microachitectural Data +Sampling (MDS). In this case certain loads may speculatively pass +invalid data to dependent operations when an asynchronous abort +condition is pending in a TSX transaction. This includes loads with no +fault or assist condition. Such loads may speculatively expose stale +data from the uarch data structures as in MDS. Scope of exposure is +within the same-thread and cross-thread. This issue affects all current +processors that support TSX, but do not have ARCH_CAP_TAA_NO (bit 8) set +in MSR_IA32_ARCH_CAPABILITIES. + +On CPUs which have their IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0, +CPUID.MD_CLEAR=1 and the MDS mitigation is clearing the CPU buffers +using VERW or L1D_FLUSH, there is no additional mitigation needed for +TAA. + +On affected CPUs with MDS_NO=1 this issue can be mitigated by disabling +Transactional Synchronization Extensions (TSX) feature. A new MSR +IA32_TSX_CTRL in future and current processors after a microcode update +can be used to control TSX feature. TSX_CTRL_RTM_DISABLE bit disables +the TSX sub-feature Restricted Transactional Memory (RTM). +TSX_CTRL_CPUID_CLEAR bit clears the RTM enumeration in CPUID. The other +TSX sub-feature, Hardware Lock Elision (HLE), is unconditionally +disabled with updated microcode but still enumerated as present by +CPUID(EAX=7).EBX{bit4}. + +The second mitigation approach is similar to MDS which is clearing the +affected CPU buffers on return to user space and when entering a guest. +Relevant microcode update is required for the mitigation to work. More +details on this approach can be found here: +https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html + +TSX feature can be controlled by the "tsx" command line parameter. If +the TSX feature is forced to be enabled then "Clear CPU buffers" (MDS +mitigation) is deployed. The effective mitigation state can be read from +sysfs. + +Signed-off-by: Pawan Gupta +Reviewed-by: Mark Gross +Reviewed-by: Tony Luck +Tested-by: Neelima Krishnan +[bwh: Forward-ported on top of NX: Renumber bug bit after + X86_BUG_ITLB_MULTIHIT] +[bwh: Backported to 4.19: Add #include "cpu.h" in bugs.c] +Signed-off-by: Ben Hutchings +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/msr-index.h | 4 + + arch/x86/include/asm/nospec-branch.h | 4 +- + arch/x86/include/asm/processor.h | 7 ++ + arch/x86/kernel/cpu/bugs.c | 129 ++++++++++++++++++++++++++- + arch/x86/kernel/cpu/common.c | 15 ++++ + 6 files changed, 156 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index ccad4f183400..5a2eecfed727 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -390,5 +390,6 @@ + #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ + #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ + #define X86_BUG_ITLB_MULTIHIT X86_BUG(22) /* CPU may incur MCE during certain page attribute changes */ ++#define X86_BUG_TAA X86_BUG(23) /* CPU is affected by TSX Async Abort(TAA) */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index f45ca8aad98f..6d17eb64cc69 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -92,6 +92,10 @@ + * without TLB invalidation. + */ + #define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ ++#define ARCH_CAP_TAA_NO BIT(8) /* ++ * Not susceptible to ++ * TSX Async Abort (TAA) vulnerabilities. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 28cb2b31527a..09c7466c4880 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -323,7 +323,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); + #include + + /** +- * mds_clear_cpu_buffers - Mitigation for MDS vulnerability ++ * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * + * This uses the otherwise unused and obsolete VERW instruction in + * combination with microcode which triggers a CPU buffer flush when the +@@ -346,7 +346,7 @@ static inline void mds_clear_cpu_buffers(void) + } + + /** +- * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability ++ * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index b54f25697beb..4a163f33a07d 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -1003,4 +1003,11 @@ enum mds_mitigations { + MDS_MITIGATION_VMWERV, + }; + ++enum taa_mitigations { ++ TAA_MITIGATION_OFF, ++ TAA_MITIGATION_UCODE_NEEDED, ++ TAA_MITIGATION_VERW, ++ TAA_MITIGATION_TSX_DISABLE, ++}; ++ + #endif /* _ASM_X86_PROCESSOR_H */ +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 1e764992fa64..841f106a277a 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -32,11 +32,14 @@ + #include + #include + ++#include "cpu.h" ++ + static void __init spectre_v1_select_mitigation(void); + static void __init spectre_v2_select_mitigation(void); + static void __init ssb_select_mitigation(void); + static void __init l1tf_select_mitigation(void); + static void __init mds_select_mitigation(void); ++static void __init taa_select_mitigation(void); + + /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ + u64 x86_spec_ctrl_base; +@@ -103,6 +106,7 @@ void __init check_bugs(void) + ssb_select_mitigation(); + l1tf_select_mitigation(); + mds_select_mitigation(); ++ taa_select_mitigation(); + + arch_smt_update(); + +@@ -266,6 +270,110 @@ static int __init mds_cmdline(char *str) + } + early_param("mds", mds_cmdline); + ++#undef pr_fmt ++#define pr_fmt(fmt) "TAA: " fmt ++ ++/* Default mitigation for TAA-affected CPUs */ ++static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW; ++static bool taa_nosmt __ro_after_init; ++ ++static const char * const taa_strings[] = { ++ [TAA_MITIGATION_OFF] = "Vulnerable", ++ [TAA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", ++ [TAA_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", ++ [TAA_MITIGATION_TSX_DISABLE] = "Mitigation: TSX disabled", ++}; ++ ++static void __init taa_select_mitigation(void) ++{ ++ u64 ia32_cap = x86_read_arch_cap_msr(); ++ ++ if (!boot_cpu_has_bug(X86_BUG_TAA)) { ++ taa_mitigation = TAA_MITIGATION_OFF; ++ return; ++ } ++ ++ /* ++ * As X86_BUG_TAA=1, TSX feature is supported by the hardware. If ++ * TSX was disabled (X86_FEATURE_RTM=0) earlier during tsx_init(). ++ * Select TSX_DISABLE as mitigation. ++ * ++ * This check is ahead of mitigations=off and tsx_async_abort=off ++ * because when TSX is disabled mitigation is already in place. This ++ * ensures sysfs doesn't show "Vulnerable" when TSX is disabled. ++ */ ++ if (!boot_cpu_has(X86_FEATURE_RTM)) { ++ taa_mitigation = TAA_MITIGATION_TSX_DISABLE; ++ pr_info("%s\n", taa_strings[taa_mitigation]); ++ return; ++ } ++ ++ /* All mitigations turned off from cmdline (mitigations=off) */ ++ if (cpu_mitigations_off()) { ++ taa_mitigation = TAA_MITIGATION_OFF; ++ return; ++ } ++ ++ /* TAA mitigation is turned off from cmdline (tsx_async_abort=off) */ ++ if (taa_mitigation == TAA_MITIGATION_OFF) { ++ pr_info("%s\n", taa_strings[taa_mitigation]); ++ return; ++ } ++ ++ if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) ++ taa_mitigation = TAA_MITIGATION_VERW; ++ else ++ taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; ++ ++ /* ++ * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1. ++ * A microcode update fixes this behavior to clear CPU buffers. ++ * Microcode update also adds support for MSR_IA32_TSX_CTRL which ++ * is enumerated by ARCH_CAP_TSX_CTRL_MSR bit. ++ * ++ * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode ++ * update is required. ++ */ ++ if ((ia32_cap & ARCH_CAP_MDS_NO) && ++ !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) ++ taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; ++ ++ /* ++ * TSX is enabled, select alternate mitigation for TAA which is ++ * same as MDS. Enable MDS static branch to clear CPU buffers. ++ * ++ * For guests that can't determine whether the correct microcode is ++ * present on host, enable the mitigation for UCODE_NEEDED as well. ++ */ ++ static_branch_enable(&mds_user_clear); ++ ++ if (taa_nosmt || cpu_mitigations_auto_nosmt()) ++ cpu_smt_disable(false); ++ ++ pr_info("%s\n", taa_strings[taa_mitigation]); ++} ++ ++static int __init tsx_async_abort_cmdline(char *str) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_TAA)) ++ return 0; ++ ++ if (!str) ++ return -EINVAL; ++ ++ if (!strcmp(str, "off")) { ++ taa_mitigation = TAA_MITIGATION_OFF; ++ } else if (!strcmp(str, "full")) { ++ taa_mitigation = TAA_MITIGATION_VERW; ++ } else if (!strcmp(str, "full,nosmt")) { ++ taa_mitigation = TAA_MITIGATION_VERW; ++ taa_nosmt = true; ++ } ++ ++ return 0; ++} ++early_param("tsx_async_abort", tsx_async_abort_cmdline); ++ + #undef pr_fmt + #define pr_fmt(fmt) "Spectre V1 : " fmt + +@@ -751,7 +859,7 @@ static void update_indir_branch_cond(void) + #undef pr_fmt + #define pr_fmt(fmt) fmt + +-/* Update the static key controlling the MDS CPU buffer clear in idle */ ++/* Update the static key controlling the MDS and TAA CPU buffer clear in idle */ + static void update_mds_branch_idle(void) + { + /* +@@ -761,8 +869,11 @@ static void update_mds_branch_idle(void) + * The other variants cannot be mitigated when SMT is enabled, so + * clearing the buffers on idle just to prevent the Store Buffer + * repartitioning leak would be a window dressing exercise. ++ * ++ * Apply idle buffer clearing to TAA affected CPUs also. + */ +- if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) ++ if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY) && ++ !boot_cpu_has_bug(X86_BUG_TAA)) + return; + + if (sched_smt_active()) +@@ -772,6 +883,7 @@ static void update_mds_branch_idle(void) + } + + #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" ++#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" + + void arch_smt_update(void) + { +@@ -804,6 +916,19 @@ void arch_smt_update(void) + break; + } + ++ switch (taa_mitigation) { ++ case TAA_MITIGATION_VERW: ++ case TAA_MITIGATION_UCODE_NEEDED: ++ if (sched_smt_active()) ++ pr_warn_once(TAA_MSG_SMT); ++ /* TSX is enabled, apply MDS idle buffer clearing. */ ++ update_mds_branch_idle(); ++ break; ++ case TAA_MITIGATION_TSX_DISABLE: ++ case TAA_MITIGATION_OFF: ++ break; ++ } ++ + mutex_unlock(&spec_ctrl_mutex); + } + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 5f89d78fe132..394bcb0403c9 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1058,6 +1058,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + if (!cpu_matches(NO_SWAPGS)) + setup_force_cpu_bug(X86_BUG_SWAPGS); + ++ /* ++ * When processor is not mitigated for TAA (TAA_NO=0) set TAA bug when: ++ * - TSX is supported or ++ * - TSX_CTRL is supported ++ * ++ * TSX_CTRL check is needed for cases when TSX could be disabled before ++ * the kernel boot e.g. kexec ++ * TSX_CTRL check alone is not sufficient for cases when the microcode ++ * update is not present or running as guest that don't get TSX_CTRL. ++ */ ++ if (!(ia32_cap & ARCH_CAP_TAA_NO) && ++ (boot_cpu_has(X86_FEATURE_RTM) || ++ (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) ++ setup_force_cpu_bug(X86_BUG_TAA); ++ + if (cpu_matches(NO_MELTDOWN)) + return; + diff --git a/debian/patches/bugfix/x86/taa/0017-TAAv6-5.patch b/debian/patches/bugfix/x86/taa/0017-TAAv6-5.patch new file mode 100644 index 000000000..2aae2283a --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0017-TAAv6-5.patch @@ -0,0 +1,119 @@ +From: speck for Pawan Gupta +Date: Wed, 9 Oct 2019 16:26:56 -0700 +Subject: TAAv6 5 + +Add the sysfs reporting file for TSX Async Abort. It exposes the +vulnerability and the mitigation state similar to the existing files for +the other hardware vulnerabilities. + +sysfs file path is: +/sys/devices/system/cpu/vulnerabilities/tsx_async_abort + +Signed-off-by: Pawan Gupta +Reviewed-by: Mark Gross +Reviewed-by: Tony Luck +Tested-by: Neelima Krishnan +[bwh: Forward-ported on top of NX: Fix conflicts (neighbouring + insertions) in arch/x86/kernel/cpu/bugs.c, drivers/base/cpu.c, + include/linux/cpu.h] +Signed-off-by: Ben Hutchings +--- + arch/x86/kernel/cpu/bugs.c | 23 +++++++++++++++++++++++ + drivers/base/cpu.c | 9 +++++++++ + include/linux/cpu.h | 3 +++ + 3 files changed, 35 insertions(+) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 841f106a277a..c435bc5dc19b 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1439,6 +1439,21 @@ static ssize_t mds_show_state(char *buf) + sched_smt_active() ? "vulnerable" : "disabled"); + } + ++static ssize_t tsx_async_abort_show_state(char *buf) ++{ ++ if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLE) || ++ (taa_mitigation == TAA_MITIGATION_OFF)) ++ return sprintf(buf, "%s\n", taa_strings[taa_mitigation]); ++ ++ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { ++ return sprintf(buf, "%s; SMT Host state unknown\n", ++ taa_strings[taa_mitigation]); ++ } ++ ++ return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], ++ sched_smt_active() ? "vulnerable" : "disabled"); ++} ++ + static char *stibp_state(void) + { + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) +@@ -1510,6 +1525,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr + case X86_BUG_ITLB_MULTIHIT: + return itlb_multihit_show_state(buf); + ++ case X86_BUG_TAA: ++ return tsx_async_abort_show_state(buf); ++ + default: + break; + } +@@ -1551,4 +1569,9 @@ ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr + { + return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); + } ++ ++ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ return cpu_show_common(dev, attr, buf, X86_BUG_TAA); ++} + #endif +diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c +index c21e2aec5cbb..e9e7fde0fe00 100644 +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -558,6 +558,13 @@ ssize_t __weak cpu_show_itlb_multihit(struct device *dev, + return sprintf(buf, "Not affected\n"); + } + ++ssize_t __weak cpu_show_tsx_async_abort(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ return sprintf(buf, "Not affected\n"); ++} ++ + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); + static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); + static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); +@@ -565,6 +572,7 @@ static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); + static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); + static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); + static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); ++static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); + + static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, +@@ -574,6 +582,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_l1tf.attr, + &dev_attr_mds.attr, + &dev_attr_itlb_multihit.attr, ++ &dev_attr_tsx_async_abort.attr, + NULL + }; + +diff --git a/include/linux/cpu.h b/include/linux/cpu.h +index 7bb824b0f30e..9d8dba19844e 100644 +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -61,6 +61,9 @@ extern ssize_t cpu_show_mds(struct device *dev, + struct device_attribute *attr, char *buf); + extern ssize_t cpu_show_itlb_multihit(struct device *dev, + struct device_attribute *attr, char *buf); ++extern ssize_t cpu_show_tsx_async_abort(struct device *dev, ++ struct device_attribute *attr, ++ char *buf); + + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/debian/patches/bugfix/x86/taa/0018-TAAv6-6.patch b/debian/patches/bugfix/x86/taa/0018-TAAv6-6.patch new file mode 100644 index 000000000..f03d82de9 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0018-TAAv6-6.patch @@ -0,0 +1,57 @@ +From: speck for Pawan Gupta +Date: Wed, 9 Oct 2019 16:27:56 -0700 +Subject: TAAv6 6 + +Export IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0 to guests on TSX Async +Abort(TAA) affected hosts that have TSX enabled and updated microcode. +This is required so that the guests don't complain, + + "Vulnerable: Clear CPU buffers attempted, no microcode" + +when the host has the updated microcode to clear CPU buffers. + +Microcode update also adds support for MSR_IA32_TSX_CTRL which is +enumerated by the ARCH_CAP_TSX_CTRL bit in IA32_ARCH_CAPABILITIES MSR. +Guests can't do this check themselves when the ARCH_CAP_TSX_CTRL bit is +not exported to the guests. + +In this case export MDS_NO=0 to the guests. When guests have +CPUID.MD_CLEAR=1 guests deploy MDS mitigation which also mitigates TAA. + +Signed-off-by: Pawan Gupta +Reviewed-by: Tony Luck +Tested-by: Neelima Krishnan +--- + arch/x86/kvm/x86.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 1ecadf51f154..5ccf79739b2b 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1143,6 +1143,25 @@ u64 kvm_get_arch_capabilities(void) + if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER) + data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; + ++ /* ++ * On TAA affected systems, export MDS_NO=0 when: ++ * - TSX is enabled on host, i.e. X86_FEATURE_RTM=1. ++ * - Updated microcode is present. This is detected by ++ * the presence of ARCH_CAP_TSX_CTRL_MSR. This ensures ++ * VERW clears CPU buffers. ++ * ++ * When MDS_NO=0 is exported, guests deploy clear CPU buffer ++ * mitigation and don't complain: ++ * ++ * "Vulnerable: Clear CPU buffers attempted, no microcode" ++ * ++ * If TSX is disabled on the system, guests are also mitigated against ++ * TAA and clear CPU buffer mitigation is not required for guests. ++ */ ++ if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) && ++ (data & ARCH_CAP_TSX_CTRL_MSR)) ++ data &= ~ARCH_CAP_MDS_NO; ++ + return data; + } + EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); diff --git a/debian/patches/bugfix/x86/taa/0019-TAAv6-7.patch b/debian/patches/bugfix/x86/taa/0019-TAAv6-7.patch new file mode 100644 index 000000000..c431406a5 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0019-TAAv6-7.patch @@ -0,0 +1,51 @@ +From: speck for Pawan Gupta +Date: Wed, 9 Oct 2019 16:28:56 -0700 +Subject: TAAv6 7 + +Platforms which are not affected by X86_BUG_TAA may want the TSX feature +enabled. Add "auto" option to the TSX cmdline parameter. When tsx=auto +disable TSX when X86_BUG_TAA is present, otherwise enable TSX. + +More details on X86_BUG_TAA can be found here: +https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html + +Signed-off-by: Pawan Gupta +Reviewed-by: Tony Luck +Tested-by: Neelima Krishnan +--- + Documentation/admin-guide/kernel-parameters.txt | 5 +++++ + arch/x86/kernel/cpu/tsx.c | 5 +++++ + 2 files changed, 10 insertions(+) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index f03756d2addb..dffdd4d86f4b 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4715,6 +4715,11 @@ + + on - Enable TSX on the system. + off - Disable TSX on the system. ++ auto - Disable TSX if X86_BUG_TAA is present, ++ otherwise enable TSX on the system. ++ ++ More details on X86_BUG_TAA are here: ++ Documentation/admin-guide/hw-vuln/tsx_async_abort.rst + + Not specifying this option is equivalent to tsx=off. + +diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c +index e39b33b7cef8..e93abe6f0bb9 100644 +--- a/arch/x86/kernel/cpu/tsx.c ++++ b/arch/x86/kernel/cpu/tsx.c +@@ -80,6 +80,11 @@ void __init tsx_init(void) + tsx_ctrl_state = TSX_CTRL_ENABLE; + } else if (!strcmp(arg, "off")) { + tsx_ctrl_state = TSX_CTRL_DISABLE; ++ } else if (!strcmp(arg, "auto")) { ++ if (boot_cpu_has_bug(X86_BUG_TAA)) ++ tsx_ctrl_state = TSX_CTRL_DISABLE; ++ else ++ tsx_ctrl_state = TSX_CTRL_ENABLE; + } else { + tsx_ctrl_state = TSX_CTRL_DISABLE; + pr_info("tsx: invalid option, defaulting to off\n"); diff --git a/debian/patches/bugfix/x86/taa/0020-TAAv6-8.patch b/debian/patches/bugfix/x86/taa/0020-TAAv6-8.patch new file mode 100644 index 000000000..24a24c010 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0020-TAAv6-8.patch @@ -0,0 +1,411 @@ +From: speck for Pawan Gupta +Date: Wed, 9 Oct 2019 16:29:57 -0700 +Subject: TAAv6 8 + +Add the documenation for TSX Async Abort. Include the description of +the issue, how to check the mitigation state, control the mitigation, +guidance for system administrators. + +Signed-off-by: Pawan Gupta +Co-developed-by: Antonio Gomez Iglesias +Signed-off-by: Antonio Gomez Iglesias +Reviewed-by: Mark Gross +Reviewed-by: Tony Luck +[bwh: Forward-ported on top of NX: Fix conflict (neighbouring + insertions) in Documentation/ABI/testing/sysfs-devices-system-cpu] +[bwh: Backported to 4.19: adjust context] +Signed-off-by: Ben Hutchings +--- + .../ABI/testing/sysfs-devices-system-cpu | 1 + + Documentation/admin-guide/hw-vuln/index.rst | 1 + + .../admin-guide/hw-vuln/tsx_async_abort.rst | 240 ++++++++++++++++++ + .../admin-guide/kernel-parameters.txt | 36 +++ + Documentation/x86/index.rst | 1 + + Documentation/x86/tsx_async_abort.rst | 54 ++++ + 6 files changed, 333 insertions(+) + create mode 100644 Documentation/admin-guide/hw-vuln/tsx_async_abort.rst + create mode 100644 Documentation/x86/tsx_async_abort.rst + +--- a/Documentation/ABI/testing/sysfs-devices-system-cpu ++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu +@@ -479,6 +479,7 @@ What: /sys/devices/system/cpu/vulnerabi + /sys/devices/system/cpu/vulnerabilities/l1tf + /sys/devices/system/cpu/vulnerabilities/mds + /sys/devices/system/cpu/vulnerabilities/itlb_multihit ++ /sys/devices/system/cpu/vulnerabilities/tsx_async_abort + Date: January 2018 + Contact: Linux kernel mailing list + Description: Information about CPU vulnerabilities +--- a/Documentation/admin-guide/hw-vuln/index.rst ++++ b/Documentation/admin-guide/hw-vuln/index.rst +@@ -12,3 +12,4 @@ are configurable at compile, boot or run + spectre + l1tf + mds ++ tsx_async_abort +--- /dev/null ++++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst +@@ -0,0 +1,240 @@ ++TAA - TSX Asynchronous Abort ++====================================== ++ ++TAA is a hardware vulnerability that allows unprivileged speculative access to ++data which is available in various CPU internal buffers by using asynchronous ++aborts within an Intel TSX transactional region. ++ ++Affected processors ++------------------- ++ ++This vulnerability only affects Intel processors that support Intel ++Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8) ++is 0 in the IA32_ARCH_CAPABILITIES MSR. On processors where the MDS_NO bit ++(bit 5)is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations ++also mitigate against TAA. ++ ++Whether a processor is affected or not can be read out from the TAA ++vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`. ++ ++Related CVEs ++------------ ++ ++The following CVE entry is related to this TAA issue: ++ ++ ============== ===== =================================================== ++ CVE-2019-11135 TAA TSX Asynchronous Abort (TAA) condition on some ++ microprocessors utilizing speculative execution may ++ allow an authenticated user to potentially enable ++ information disclosure via a side channel with ++ local access. ++ ============== ===== =================================================== ++ ++Problem ++------- ++ ++When performing store, load, L1 refill operations, processors write data into ++temporary microarchitectural structures (buffers). The data in the buffer can ++be forwarded to load operations as an optimization. ++ ++Intel TSX are an extension to the x86 instruction set architecture that adds ++hardware transactional memory support to improve performance of multi-threaded ++software. TSX lets the processor expose and exploit concurrence hidden in an ++application due to dynamically avoiding unnecessary synchronization. ++ ++TSX supports atomic memory transactions that are either committed (success) or ++aborted. During an abort, operations that happened within the transactional region ++are rolled back. An asynchronous abort takes place, among other options, when a ++different thread accesses a cache line that is also used within the transactional ++region when that access might lead to a data race. ++ ++Immediately after an uncompleted asynchronous abort, certain speculatively ++executed loads may read data from those internal buffers and pass it to dependent ++operations. This can be then used to infer the value via a cache side channel ++attack. ++ ++Because the buffers are potentially shared between Hyper-Threads cross ++Hyper-Thread attacks are possible. ++ ++The victim of a malicious actor does not need to make use of TSX. Only the ++attacker needs to begin a TSX transaction and raise an asynchronous abort ++to try to leak some of data stored in the buffers. ++ ++Deeper technical information is available in the TAA specific x86 architecture ++section: :ref:`Documentation/x86/tsx_async_abort.rst `. ++ ++ ++Attack scenarios ++---------------- ++ ++Attacks against the TAA vulnerability can be implemented from unprivileged ++applications running on hosts or guests. ++ ++As for MDS, the attacker has no control over the memory addresses that can be ++leaked. Only the victim is responsible for bringing data to the CPU. As a ++result, the malicious actor has to first sample as much data as possible and ++then postprocess it to try to infer any useful information from it. ++ ++A potential attacker only has read access to the data. Also, there is no direct ++privilege escalation by using this technique. ++ ++ ++.. _tsx_async_abort_sys_info: ++ ++TAA system information ++----------------------- ++ ++The Linux kernel provides a sysfs interface to enumerate the current TAA status ++of mitigated systems. The relevant sysfs file is: ++ ++/sys/devices/system/cpu/vulnerabilities/tsx_async_abort ++ ++The possible values in this file are: ++ ++.. list-table:: ++ ++ * - 'Vulnerable' ++ - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied. ++ * - 'Vulnerable: Clear CPU buffers attempted, no microcode' ++ - The system tries to clear the buffers but the microcode might not support the operation. ++ * - 'Mitigation: Clear CPU buffers' ++ - The microcode has been updated to clear the buffers. TSX is still enabled. ++ * - 'Mitigation: TSX disabled' ++ - TSX is disabled. ++ * - 'Not affected' ++ - The CPU is not affected by this issue. ++ ++.. _ucode_needed: ++ ++Best effort mitigation mode ++^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++If the processor is vulnerable, but the availability of the microcode-based ++mitigation mechanism is not advertised via CPUID the kernel selects a best ++effort mitigation mode. This mode invokes the mitigation instructions ++without a guarantee that they clear the CPU buffers. ++ ++This is done to address virtualization scenarios where the host has the ++microcode update applied, but the hypervisor is not yet updated to expose the ++CPUID to the guest. If the host has updated microcode the protection takes ++effect; otherwise a few CPU cycles are wasted pointlessly. ++ ++The state in the tsx_async_abort sysfs file reflects this situation ++accordingly. ++ ++ ++Mitigation mechanism ++-------------------- ++ ++The kernel detects the affected CPUs and the presence of the microcode which is ++required. If a CPU is affected and the microcode is available, then the kernel ++enables the mitigation by default. ++ ++ ++The mitigation can be controlled at boot time via a kernel command line option. ++See :ref:`taa_mitigation_control_command_line`. It also provides a sysfs ++interface. See :ref:`taa_mitigation_sysfs`. ++ ++.. _virt_mechanism: ++ ++Virtualization mitigation ++^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++Affected systems where the host has the TAA microcode and the TAA mitigation is ++ON (with TSX disabled) are not vulnerable regardless of the status of the VMs. ++ ++In all other cases, if the host either does not have the TAA microcode or the ++kernel is not mitigated, the system might be vulnerable. ++ ++ ++.. _taa_mitigation_control_command_line: ++ ++Mitigation control on the kernel command line ++--------------------------------------------- ++ ++The kernel command line allows to control the TAA mitigations at boot time with ++the option "tsx_async_abort=". The valid arguments for this option are: ++ ++ ============ ============================================================= ++ off This option disables the TAA mitigation on affected platforms. ++ If the system has TSX enabled (see next parameter) and the CPU ++ is affected, the system is vulnerable. ++ ++ full TAA mitigation is enabled. If TSX is enabled, on an affected ++ system it will clear CPU buffers on ring transitions. On ++ systems which are MDS-affected and deploy MDS mitigation, ++ TAA is also mitigated. Specifying this option on those ++ systems will have no effect. ++ ++ full,nosmt The same as tsx_async_abort=full, with SMT disabled on ++ vulnerable CPUs that have TSX enabled. This is the complete ++ mitigation. When TSX is disabled, SMT is not disabled because ++ CPU is not vulnerable to cross-thread TAA attacks. ++ ============ ============================================================= ++ ++Not specifying this option is equivalent to "tsx_async_abort=full". ++ ++The kernel command line also allows to control the TSX feature using the ++parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used ++to control the TSX feature and the enumeration of the TSX feature bits (RTM ++and HLE) in CPUID. ++ ++The valid options are: ++ ++ ============ ============================================================= ++ off Disables TSX. ++ ++ on Enables TSX. ++ ++ auto Disables TSX on affected platform, otherwise enables TSX. ++ ============ ============================================================= ++ ++Not specifying this option is equivalent to "tsx=off". ++ ++The following combinations of the "tsx_async_abort" and "tsx" are possible. For ++affected platforms tsx=auto is equivalent to tsx=off and the result will be: ++ ++ ========= ==================== ========================================= ++ tsx=on tsx_async_abort=full The system will use VERW to clear CPU ++ buffers. ++ tsx=on tsx_async_abort=off The system is vulnerable. ++ tsx=off tsx_async_abort=full TSX is disabled. System is not vulnerable. ++ tsx=off tsx_async_abort=off TSX is disabled. System is not vulnerable. ++ ========= ==================== ========================================= ++ ++For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU ++buffers. For platforms without TSX control "tsx" command line argument has no ++effect. ++ ++ ++Mitigation selection guide ++-------------------------- ++ ++1. Trusted userspace and guests ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++If all user space applications are from a trusted source and do not execute ++untrusted code which is supplied externally, then the mitigation can be ++disabled. The same applies to virtualized environments with trusted guests. ++ ++ ++2. Untrusted userspace and guests ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++If there are untrusted applications or guests on the system, enabling TSX ++might allow a malicious actor to leak data from the host or from other ++processes running on the same physical core. ++ ++If the microcode is available and the TSX is disabled on the host, attacks ++are prevented in a virtualized environment as well, even if the VMs do not ++explicitly enable the mitigation. ++ ++ ++.. _taa_default_mitigations: ++ ++Default mitigations ++------------------- ++ ++The kernel's default action for vulnerable processors is: ++ ++ - Deploy TSX disable mitigation (tsx_async_abort=full). +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -2538,6 +2538,7 @@ + spec_store_bypass_disable=off [X86,PPC] + l1tf=off [X86] + mds=off [X86] ++ tsx_async_abort=off [X86] + + auto (default) + Mitigate all CPU vulnerabilities, but leave SMT +@@ -2553,6 +2554,7 @@ + be fully mitigated, even if it means losing SMT. + Equivalent to: l1tf=flush,nosmt [X86] + mds=full,nosmt [X86] ++ tsx_async_abort=full,nosmt [X86] + + mminit_loglevel= + [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this +@@ -4528,6 +4530,40 @@ + neutralize any effect of /proc/sys/kernel/sysrq. + Useful for debugging. + ++ tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async ++ Abort (TAA) vulnerability. ++ ++ Similar to Micro-architectural Data Sampling (MDS) ++ certain CPUs that support Transactional ++ Synchronization Extensions (TSX) are vulnerable to an ++ exploit against CPU internal buffers which can forward ++ information to a disclosure gadget under certain ++ conditions. ++ ++ In vulnerable processors, the speculatively forwarded ++ data can be used in a cache side channel attack, to ++ access data to which the attacker does not have direct ++ access. ++ ++ This parameter controls the TAA mitigation. The ++ options are: ++ ++ full - Enable TAA mitigation on vulnerable CPUs ++ full,nosmt - Enable TAA mitigation and disable SMT on ++ vulnerable CPUs. If TSX is disabled, SMT ++ is not disabled because CPU is not ++ vulnerable to cross-thread TAA attacks. ++ off - Unconditionally disable TAA mitigation ++ ++ Not specifying this option is equivalent to ++ tsx_async_abort=full. On CPUs which are MDS affected ++ and deploy MDS mitigation, TAA mitigation is not ++ required and doesn't provide any additional ++ mitigation. ++ ++ For details see: ++ Documentation/admin-guide/hw-vuln/tsx_async_abort.rst ++ + tcpmhash_entries= [KNL,NET] + Set the number of tcp_metrics_hash slots. + Default value is 8192 or 16384 depending on total +--- a/Documentation/x86/index.rst ++++ b/Documentation/x86/index.rst +@@ -6,3 +6,4 @@ x86 architecture specifics + :maxdepth: 1 + + mds ++ tsx_async_abort +--- /dev/null ++++ b/Documentation/x86/tsx_async_abort.rst +@@ -0,0 +1,54 @@ ++TSX Async Abort (TAA) mitigation ++================================================= ++ ++.. _tsx_async_abort: ++ ++Overview ++-------- ++ ++TSX Async Abort (TAA) is a side channel attack on internal buffers in some ++Intel processors similar to Microachitectural Data Sampling (MDS). In this ++case certain loads may speculatively pass invalid data to dependent operations ++when an asynchronous abort condition is pending in a Transactional ++Synchronization Extensions (TSX) transaction. This includes loads with no ++fault or assist condition. Such loads may speculatively expose stale data from ++the same uarch data structures as in MDS, with same scope of exposure i.e. ++same-thread and cross-thread. This issue affects all current processors that ++support TSX. ++ ++Mitigation strategy ++------------------- ++ ++a) TSX disable - One of the mitigation is to disable TSX feature. A new MSR ++IA32_TSX_CTRL will be available in future and current processors and after a ++microcode update in which can be used to disable TSX. This MSR can be used to ++disable the TSX feature and the enumeration of the TSX feature bits(RTM and ++HLE) in CPUID. ++ ++b) CPU clear buffers - Similar to MDS, clearing the CPU buffers mitigates this ++vulnerability. More details on this approach can be found here ++https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html ++ ++Kernel internal mitigation modes ++-------------------------------- ++ ++ ============= ============================================================ ++ off Mitigation is disabled. Either the CPU is not affected or ++ tsx_async_abort=off is supplied on the kernel command line. ++ ++ tsx disabled Mitigation is enabled. TSX feature is disabled by default at ++ bootup on processors that support TSX control. ++ ++ verw Mitigation is enabled. CPU is affected and MD_CLEAR is ++ advertised in CPUID. ++ ++ ucode needed Mitigation is enabled. CPU is affected and MD_CLEAR is not ++ advertised in CPUID. That is mainly for virtualization ++ scenarios where the host has the updated microcode but the ++ hypervisor does not expose MD_CLEAR in CPUID. It's a best ++ effort approach without guarantee. ++ ============= ============================================================ ++ ++If the CPU is affected and "tsx_async_abort" kernel command line parameter is ++not provided then the kernel selects an appropriate mitigation depending on the ++status of RTM and MD_CLEAR CPUID bits. diff --git a/debian/patches/bugfix/x86/taa/0021-TAAv6-9.patch b/debian/patches/bugfix/x86/taa/0021-TAAv6-9.patch new file mode 100644 index 000000000..9d739b982 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0021-TAAv6-9.patch @@ -0,0 +1,385 @@ +From: speck for Pawan Gupta +Date: Wed, 9 Oct 2019 16:30:57 -0700 +Subject: TAAv6 9 + +Transactional Synchronization Extensions (TSX) is an extension to the +x86 instruction set architecture (ISA) that adds Hardware Transactional +Memory (HTM) support. Changing TSX state currently requires a reboot. +This may not be desirable when rebooting imposes a huge penalty. Add +support to control TSX feature via a new sysfs file: +/sys/devices/system/cpu/hw_tx_mem + +- Writing 0|off|N|n to this file disables TSX feature on all the CPUs. + This is equivalent to boot parameter tsx=off. +- Writing 1|on|Y|y to this file enables TSX feature on all the CPUs. + This is equivalent to boot parameter tsx=on. +- Reading from this returns the status of TSX feature. +- When TSX control is not supported this interface is not visible in + sysfs. + +Changing the TSX state from this interface also updates CPUID.RTM +feature bit. From the kernel side, this feature bit doesn't result in +any ALTERNATIVE code patching. No memory allocations are done to +save/restore user state. No code paths in outside of the tests for +vulnerability to TAA are dependent on the value of the feature bit. In +general the kernel doesn't care whether RTM is present or not. + +Applications typically look at CPUID bits once at startup (or when first +calling into a library that uses the feature). So we have a couple of +cases to cover: + +1) An application started and saw that RTM was enabled, so began + to use it. Then TSX was disabled. Net result in this case is that + the application will keep trying to use RTM, but every xbegin() will + immediately abort the transaction. This has a performance impact to + the application, but it doesn't affect correctness because all users + of RTM must have a fallback path for when the transaction aborts. Note + that even if an application is in the middle of a transaction when we + disable RTM, we are safe. The XPI that we use to update the TSX_CTRL + MSR will abort the transaction (just as any interrupt would abort + a transaction). + +2) An application starts and sees RTM is not available. So it will + always use alternative paths. Even if TSX is enabled and RTM is set, + applications in general do not re-evaluate their choice so will + continue to run in non-TSX mode. + +When the TSX state is changed from the sysfs interface, TSX Async Abort +(TAA) mitigation state also needs to be updated. Set the TAA mitigation +state as per TSX and VERW static branch state. + +Signed-off-by: Pawan Gupta +Reviewed-by: Mark Gross +Reviewed-by: Tony Luck +Tested-by: Neelima Krishnan +[bwh: Backported to 4.19: adjust context] +Signed-off-by: Ben Hutchings +--- + .../ABI/testing/sysfs-devices-system-cpu | 23 ++++ + .../admin-guide/hw-vuln/tsx_async_abort.rst | 29 +++++ + arch/x86/kernel/cpu/bugs.c | 21 +++- + arch/x86/kernel/cpu/cpu.h | 3 +- + arch/x86/kernel/cpu/tsx.c | 100 +++++++++++++++++- + drivers/base/cpu.c | 32 +++++- + include/linux/cpu.h | 6 ++ + 7 files changed, 210 insertions(+), 4 deletions(-) + +diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu +index a1bd0b6766d7..2a98f6c70add 100644 +--- a/Documentation/ABI/testing/sysfs-devices-system-cpu ++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu +@@ -513,3 +513,26 @@ Description: Control Symetric Multi Threading (SMT) + + If control status is "forceoff" or "notsupported" writes + are rejected. ++ ++What: /sys/devices/system/cpu/hw_tx_mem ++Date: August 2019 ++Contact: Pawan Gupta ++ Linux kernel mailing list ++Description: Hardware Transactional Memory (HTM) control. ++ ++ Read/write interface to control HTM feature for all the CPUs in ++ the system. This interface is only present on platforms that ++ support HTM control. HTM is a hardware feature to speed up the ++ execution of multi-threaded software through lock elision. An ++ example of HTM implementation is Intel Transactional ++ Synchronization Extensions (TSX). ++ ++ Read returns the status of HTM feature. ++ ++ 0: HTM is disabled ++ 1: HTM is enabled ++ ++ Write sets the state of HTM feature. ++ ++ 0: Disables HTM ++ 1: Enables HTM +diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst +index 58f24db49615..b62bc749fd8c 100644 +--- a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst ++++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst +@@ -207,6 +207,35 @@ buffers. For platforms without TSX control "tsx" command line argument has no + effect. + + ++.. _taa_mitigation_sysfs: ++ ++Mitigation control using sysfs ++------------------------------ ++ ++For those affected systems that can not be frequently rebooted to enable or ++disable TSX, sysfs can be used as an alternative after installing the updates. ++The possible values for the file /sys/devices/system/cpu/hw_tx_mem are: ++ ++ ============ ============================================================= ++ 0 Disable TSX. Upon entering a TSX transactional region, the code ++ will immediately abort, before any instruction executes within ++ the transactional region even speculatively, and continue on ++ the fallback. Equivalent to boot parameter "tsx=off". ++ ++ 1 Enable TSX. Equivalent to boot parameter "tsx=on". ++ ++ ============ ============================================================= ++ ++Reading from this file returns the status of TSX feature. This file is only ++present on systems that support TSX control. ++ ++When disabling TSX by using the sysfs mechanism, applications that are already ++running and use TSX will see their transactional regions aborted and execution ++flow will be redirected to the fallback, losing the benefits of the ++non-blocking path. TSX needs fallback code to guarantee correct execution ++without transactional regions. ++ ++ + Mitigation selection guide + -------------------------- + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index c435bc5dc19b..f0a998c10056 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -274,7 +274,7 @@ early_param("mds", mds_cmdline); + #define pr_fmt(fmt) "TAA: " fmt + + /* Default mitigation for TAA-affected CPUs */ +-static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW; ++static enum taa_mitigations taa_mitigation = TAA_MITIGATION_VERW; + static bool taa_nosmt __ro_after_init; + + static const char * const taa_strings[] = { +@@ -374,6 +374,25 @@ static int __init tsx_async_abort_cmdline(char *str) + } + early_param("tsx_async_abort", tsx_async_abort_cmdline); + ++void taa_update_mitigation(bool tsx_enabled) ++{ ++ /* ++ * When userspace changes the TSX state, update taa_mitigation ++ * so that the updated mitigation state is shown in: ++ * /sys/devices/system/cpu/vulnerabilities/tsx_async_abort ++ * ++ * Check if TSX is disabled. ++ * Check if CPU buffer clear is enabled. ++ * else the system is vulnerable. ++ */ ++ if (!tsx_enabled) ++ taa_mitigation = TAA_MITIGATION_TSX_DISABLE; ++ else if (static_key_count(&mds_user_clear.key)) ++ taa_mitigation = TAA_MITIGATION_VERW; ++ else ++ taa_mitigation = TAA_MITIGATION_OFF; ++} ++ + #undef pr_fmt + #define pr_fmt(fmt) "Spectre V1 : " fmt + +diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h +index 236582c90d3f..57fd603d367f 100644 +--- a/arch/x86/kernel/cpu/cpu.h ++++ b/arch/x86/kernel/cpu/cpu.h +@@ -52,11 +52,12 @@ enum tsx_ctrl_states { + TSX_CTRL_NOT_SUPPORTED, + }; + +-extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; ++extern enum tsx_ctrl_states tsx_ctrl_state; + + extern void __init tsx_init(void); + extern void tsx_enable(void); + extern void tsx_disable(void); ++extern void taa_update_mitigation(bool tsx_enabled); + #else + static inline void tsx_init(void) { } + #endif /* CONFIG_CPU_SUP_INTEL */ +diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c +index e93abe6f0bb9..96320449abb7 100644 +--- a/arch/x86/kernel/cpu/tsx.c ++++ b/arch/x86/kernel/cpu/tsx.c +@@ -10,12 +10,15 @@ + + #include + #include ++#include + + #include + + #include "cpu.h" + +-enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; ++static DEFINE_MUTEX(tsx_mutex); ++ ++enum tsx_ctrl_states tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED; + + void tsx_disable(void) + { +@@ -118,3 +121,98 @@ void __init tsx_init(void) + setup_force_cpu_cap(X86_FEATURE_RTM); + } + } ++ ++static void tsx_update_this_cpu(void *arg) ++{ ++ unsigned long enable = (unsigned long)arg; ++ ++ if (enable) ++ tsx_enable(); ++ else ++ tsx_disable(); ++} ++ ++/* Take tsx_mutex lock and update tsx_ctrl_state when calling this function */ ++static void tsx_update_on_each_cpu(bool val) ++{ ++ get_online_cpus(); ++ on_each_cpu(tsx_update_this_cpu, (void *)val, 1); ++ put_online_cpus(); ++} ++ ++ssize_t hw_tx_mem_show(struct device *dev, struct device_attribute *attr, ++ char *buf) ++{ ++ return sprintf(buf, "%d\n", tsx_ctrl_state == TSX_CTRL_ENABLE ? 1 : 0); ++} ++ ++ssize_t hw_tx_mem_store(struct device *dev, struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ enum tsx_ctrl_states requested_state; ++ ssize_t ret; ++ bool val; ++ ++ ret = kstrtobool(buf, &val); ++ if (ret) ++ return ret; ++ ++ mutex_lock(&tsx_mutex); ++ ++ if (val) ++ requested_state = TSX_CTRL_ENABLE; ++ else ++ requested_state = TSX_CTRL_DISABLE; ++ ++ /* Current state is same as the requested state, do nothing */ ++ if (tsx_ctrl_state == requested_state) ++ goto exit; ++ ++ tsx_ctrl_state = requested_state; ++ ++ /* ++ * Changing the TSX state from this interface also updates CPUID.RTM ++ * feature bit. From the kernel side, this feature bit doesn't result ++ * in any ALTERNATIVE code patching. No memory allocations are done to ++ * save/restore user state. No code paths in outside of the tests for ++ * vulnerability to TAA are dependent on the value of the feature bit. ++ * In general the kernel doesn't care whether RTM is present or not. ++ * ++ * From the user side it is a bit fuzzier. Applications typically look ++ * at CPUID bits once at startup (or when first calling into a library ++ * that uses the feature). So we have a couple of cases to cover: ++ * ++ * 1) An application started and saw that RTM was enabled, so began ++ * to use it. Then TSX was disabled. Net result in this case is ++ * that the application will keep trying to use RTM, but every ++ * xbegin() will immediately abort the transaction. This has a ++ * performance impact to the application, but it doesn't affect ++ * correctness because all users of RTM must have a fallback path ++ * for when the transaction aborts. Note that even if an application ++ * is in the middle of a transaction when we disable RTM, we are ++ * safe. The XPI that we use to update the TSX_CTRL MSR will abort ++ * the transaction (just as any interrupt would abort a ++ * transaction). ++ * ++ * 2) An application starts and sees RTM is not available. So it will ++ * always use alternative paths. Even if TSX is enabled and RTM is ++ * set, applications in general do not re-evaluate their choice so ++ * will continue to run in non-TSX mode. ++ */ ++ tsx_update_on_each_cpu(val); ++ ++ if (boot_cpu_has_bug(X86_BUG_TAA)) ++ taa_update_mitigation(val); ++exit: ++ mutex_unlock(&tsx_mutex); ++ ++ return count; ++} ++ ++umode_t hw_tx_mem_is_visible(void) ++{ ++ if (tsx_ctrl_state == TSX_CTRL_NOT_SUPPORTED) ++ return 0; ++ ++ return 0644; ++} +diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c +index e9e7fde0fe00..ebc46fd81762 100644 +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -458,6 +458,34 @@ struct device *cpu_device_create(struct device *parent, void *drvdata, + } + EXPORT_SYMBOL_GPL(cpu_device_create); + ++ssize_t __weak hw_tx_mem_show(struct device *dev, struct device_attribute *a, ++ char *buf) ++{ ++ return -ENODEV; ++} ++ ++ssize_t __weak hw_tx_mem_store(struct device *dev, struct device_attribute *a, ++ const char *buf, size_t count) ++{ ++ return -ENODEV; ++} ++ ++DEVICE_ATTR_RW(hw_tx_mem); ++ ++umode_t __weak hw_tx_mem_is_visible(void) ++{ ++ return 0; ++} ++ ++static umode_t cpu_root_attrs_is_visible(struct kobject *kobj, ++ struct attribute *attr, int index) ++{ ++ if (attr == &dev_attr_hw_tx_mem.attr) ++ return hw_tx_mem_is_visible(); ++ ++ return attr->mode; ++} ++ + #ifdef CONFIG_GENERIC_CPU_AUTOPROBE + static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL); + #endif +@@ -479,11 +507,13 @@ static struct attribute *cpu_root_attrs[] = { + #ifdef CONFIG_GENERIC_CPU_AUTOPROBE + &dev_attr_modalias.attr, + #endif ++ &dev_attr_hw_tx_mem.attr, + NULL + }; + + static struct attribute_group cpu_root_attr_group = { +- .attrs = cpu_root_attrs, ++ .attrs = cpu_root_attrs, ++ .is_visible = cpu_root_attrs_is_visible, + }; + + static const struct attribute_group *cpu_root_attr_groups[] = { +diff --git a/include/linux/cpu.h b/include/linux/cpu.h +index 9d8dba19844e..7bd8ced5c000 100644 +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -65,6 +65,12 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev, + struct device_attribute *attr, + char *buf); + ++extern ssize_t hw_tx_mem_show(struct device *dev, struct device_attribute *a, ++ char *buf); ++extern ssize_t hw_tx_mem_store(struct device *dev, struct device_attribute *a, ++ const char *buf, size_t count); ++extern umode_t hw_tx_mem_is_visible(void); ++ + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, + const struct attribute_group **groups, diff --git a/debian/patches/series b/debian/patches/series index 6dc480d6d..059be0555 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -270,6 +270,15 @@ bugfix/x86//itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch bugfix/x86//itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch bugfix/x86//itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker-threa.patch bugfix/x86//itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch +bugfix/x86/taa/0013-TAAv6-1.patch +bugfix/x86/taa/0014-TAAv6-2.patch +bugfix/x86/taa/0015-TAAv6-3.patch +bugfix/x86/taa/0016-TAAv6-4.patch +bugfix/x86/taa/0017-TAAv6-5.patch +bugfix/x86/taa/0018-TAAv6-6.patch +bugfix/x86/taa/0019-TAAv6-7.patch +bugfix/x86/taa/0020-TAAv6-8.patch +bugfix/x86/taa/0021-TAAv6-9.patch # ABI maintenance debian/abi/powerpc-avoid-abi-change-for-disabling-tm.patch From b2cc5e7f742b4fda017532cb41d81e277d03622c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 24 Oct 2019 22:48:50 +0100 Subject: [PATCH 04/13] [x86] Update NX patch set to v7 --- ...Add-ITLB_MULTIHIT-bug-infrastructure.patch | 2 +- ...010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch | 23 ++++++++++++++---- ...per-function-for-creating-VM-worker.patch} | 6 +++-- ...Recovery-of-shattered-NX-large-pages.patch | 10 ++++---- debian/patches/series | 24 +++++++++---------- 5 files changed, 41 insertions(+), 24 deletions(-) rename debian/patches/bugfix/x86/itlb_multihit/{0011-kvm-Add-helper-function-for-creating-VM-worker-threa.patch => 0011-kvm-Add-helper-function-for-creating-VM-worker.patch} (97%) diff --git a/debian/patches/bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch b/debian/patches/bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch index 0ff74e465..bbbf5f225 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch @@ -1,5 +1,5 @@ From: Pawan Gupta -Date: Fri, 11 Oct 2019 12:40:12 +0200 +Date: Thu, 24 Oct 2019 18:34:26 +0200 Subject: x86: Add ITLB_MULTIHIT bug infrastructure Some processors may incur a machine check error possibly diff --git a/debian/patches/bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch b/debian/patches/bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch index 62959cf6c..2f1e70bb8 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch @@ -1,5 +1,5 @@ From: Paolo Bonzini -Date: Fri, 11 Oct 2019 12:40:14 +0200 +Date: Thu, 24 Oct 2019 18:34:28 +0200 Subject: kvm: mmu: ITLB_MULTIHIT mitigation With some Intel processors, putting the same virtual address in the TLB @@ -30,8 +30,8 @@ Signed-off-by: Ben Hutchings arch/x86/kernel/cpu/bugs.c | 13 +- arch/x86/kvm/mmu.c | 135 +++++++++++++++++- arch/x86/kvm/paging_tmpl.h | 29 +++- - arch/x86/kvm/x86.c | 1 + - 6 files changed, 178 insertions(+), 13 deletions(-) + arch/x86/kvm/x86.c | 9 ++ + 6 files changed, 186 insertions(+), 13 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 16607b178b47..b2c1a5c63ab3 100644 @@ -451,7 +451,7 @@ index 3b022b08b577..adf42dc8d38b 100644 out_unlock: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 2714c1a0e59f..406a37aa61c7 100644 +index 2714c1a0e59f..ec80bb27504f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -206,6 +206,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { @@ -462,3 +462,18 @@ index 2714c1a0e59f..406a37aa61c7 100644 { "max_mmu_page_hash_collisions", VM_STAT(max_mmu_page_hash_collisions) }, { NULL } +@@ -1130,6 +1131,14 @@ u64 kvm_get_arch_capabilities(void) + + rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data); + ++ /* ++ * If nx_huge_pages is enabled, KVM's shadow paging will ensure that ++ * the nested hypervisor runs with NX huge pages. If it is not, ++ * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other ++ * L1 guests, so it need not worry about its own (L2) guests. ++ */ ++ data |= ARCH_CAP_PSCHANGE_MC_NO; ++ + /* + * If we're doing cache flushes (either "always" or "cond") + * we will do one whenever the guest does a vmlaunch/vmresume. diff --git a/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker-threa.patch b/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker.patch similarity index 97% rename from debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker-threa.patch rename to debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker.patch index 81acc63f2..4af53ea23 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker-threa.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker.patch @@ -1,6 +1,8 @@ From: Junaid Shahid -Date: Fri, 11 Oct 2019 12:40:15 +0200 -Subject: kvm: Add helper function for creating VM worker threads +Date: Thu, 24 Oct 2019 18:34:29 +0200 +Subject: kvm: Add helper function for creating VM worker + + threads This adds a function to create a kernel thread associated with a given VM. In particular, it ensures that the worker thread inherits the diff --git a/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch b/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch index dd448bbf7..e1962f7a7 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch @@ -1,5 +1,5 @@ From: Junaid Shahid -Date: Fri, 11 Oct 2019 12:40:16 +0200 +Date: Thu, 24 Oct 2019 18:34:30 +0200 Subject: kvm: x86: mmu: Recovery of shattered NX large pages The page table pages corresponding to broken down large pages are @@ -266,10 +266,10 @@ index 65892288bf51..f7b2de7b6382 100644 + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 406a37aa61c7..1ecadf51f154 100644 +index ec80bb27504f..da688e726632 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -8950,6 +8950,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +@@ -8958,6 +8958,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); @@ -277,7 +277,7 @@ index 406a37aa61c7..1ecadf51f154 100644 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); atomic_set(&kvm->arch.noncoherent_dma_count, 0); -@@ -8981,6 +8982,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +@@ -8989,6 +8990,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return 0; } @@ -289,7 +289,7 @@ index 406a37aa61c7..1ecadf51f154 100644 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { vcpu_load(vcpu); -@@ -9082,6 +9088,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) +@@ -9090,6 +9096,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) } EXPORT_SYMBOL_GPL(x86_set_memory_region); diff --git a/debian/patches/series b/debian/patches/series index 059be0555..1ffa003fb 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -258,18 +258,18 @@ bugfix/all/ALSA-usb-audio-Fix-a-stack-buffer-overflow-bug-in-check_input_term.pa bugfix/all/vhost-make-sure-log_num-in_num.patch bugfix/x86/x86-ptrace-fix-up-botched-merge-of-spectrev1-fix.patch bugfix/all/KVM-coalesced_mmio-add-bounds-checking.patch -bugfix/x86//itlb_multihit/0001-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch -bugfix/x86//itlb_multihit/0002-kvm-Convert-kvm_lock-to-a-mutex.patch -bugfix/x86//itlb_multihit/0003-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch -bugfix/x86//itlb_multihit/0004-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch -bugfix/x86//itlb_multihit/0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch -bugfix/x86//itlb_multihit/0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch -bugfix/x86//itlb_multihit/0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch -bugfix/x86//itlb_multihit/0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch -bugfix/x86//itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch -bugfix/x86//itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch -bugfix/x86//itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker-threa.patch -bugfix/x86//itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch +bugfix/x86/itlb_multihit/0001-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch +bugfix/x86/itlb_multihit/0002-kvm-Convert-kvm_lock-to-a-mutex.patch +bugfix/x86/itlb_multihit/0003-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch +bugfix/x86/itlb_multihit/0004-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch +bugfix/x86/itlb_multihit/0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch +bugfix/x86/itlb_multihit/0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch +bugfix/x86/itlb_multihit/0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch +bugfix/x86/itlb_multihit/0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch +bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch +bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch +bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker.patch +bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch bugfix/x86/taa/0013-TAAv6-1.patch bugfix/x86/taa/0014-TAAv6-2.patch bugfix/x86/taa/0015-TAAv6-3.patch From 537ad2315a8f7bc8e263912803d3ac4d3281f4ee Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 24 Oct 2019 22:51:45 +0100 Subject: [PATCH 05/13] [x86] Update TAA patch set to v7 --- debian/changelog | 11 +- ...3-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch} | 39 +- ...lper-function-x86_read_arch_cap_msr.patch} | 16 +- ...-cmdline-option-with-TSX-disabled-b.patch} | 59 ++- ...aa-Add-mitigation-for-TSX-Async-Abo.patch} | 169 ++++---- ...aa-Add-sysfs-reporting-for-TSX-Asyn.patch} | 27 +- ...S_NO-0-to-guests-when-TSX-is-enable.patch} | 36 +- ...option-to-the-tsx-cmdline-parameter.patch} | 45 +- ...aa-Add-documentation-for-TSX-Async-.patch} | 164 ++++++-- .../patches/bugfix/x86/taa/0021-TAAv6-9.patch | 385 ------------------ ...onfig-options-to-set-tsx-on-off-auto.patch | 134 ++++++ debian/patches/series | 18 +- 12 files changed, 514 insertions(+), 589 deletions(-) rename debian/patches/bugfix/x86/taa/{0013-TAAv6-1.patch => 0013-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch} (62%) rename debian/patches/bugfix/x86/taa/{0014-TAAv6-2.patch => 0014-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch} (74%) rename debian/patches/bugfix/x86/taa/{0015-TAAv6-3.patch => 0015-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch} (79%) rename debian/patches/bugfix/x86/taa/{0016-TAAv6-4.patch => 0016-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch} (66%) rename debian/patches/bugfix/x86/taa/{0017-TAAv6-5.patch => 0017-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch} (82%) rename debian/patches/bugfix/x86/taa/{0018-TAAv6-6.patch => 0018-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch} (56%) rename debian/patches/bugfix/x86/taa/{0019-TAAv6-7.patch => 0019-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch} (54%) rename debian/patches/bugfix/x86/taa/{0020-TAAv6-8.patch => 0020-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch} (65%) delete mode 100644 debian/patches/bugfix/x86/taa/0021-TAAv6-9.patch create mode 100644 debian/patches/bugfix/x86/taa/0021-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch diff --git a/debian/changelog b/debian/changelog index bbbbbfc5c..baf2fb278 100644 --- a/debian/changelog +++ b/debian/changelog @@ -14,7 +14,16 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium - kvm: mmu: ITLB_MULTIHIT mitigation - kvm: Add helper function for creating VM worker threads - kvm: x86: mmu: Recovery of shattered NX large pages - * [x86] Add mitigation for TSX Asynchronous Abort (CVE-2019-11135). + * [x86] Add mitigation for TSX Asynchronous Abort (CVE-2019-11135): + - x86/msr: Add the IA32_TSX_CTRL MSR + - x86/cpu: Add a helper function x86_read_arch_cap_msr() + - x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default + - x86/speculation/taa: Add mitigation for TSX Async Abort + - x86/speculation/taa: Add sysfs reporting for TSX Async Abort + - kvm/x86: Export MDS_NO=0 to guests when TSX is enabled + - x86/tsx: Add "auto" option to the tsx= cmdline parameter + - x86/speculation/taa: Add documentation for TSX Async Abort + - x86/tsx: Add config options to set tsx=on|off|auto TSX is now disabled by default; see Documentation/admin-guide/hw-vuln/tsx_async_abort.rst diff --git a/debian/patches/bugfix/x86/taa/0013-TAAv6-1.patch b/debian/patches/bugfix/x86/taa/0013-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch similarity index 62% rename from debian/patches/bugfix/x86/taa/0013-TAAv6-1.patch rename to debian/patches/bugfix/x86/taa/0013-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch index c05a9f5f5..71d885413 100644 --- a/debian/patches/bugfix/x86/taa/0013-TAAv6-1.patch +++ b/debian/patches/bugfix/x86/taa/0013-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch @@ -1,15 +1,17 @@ -From: speck for Pawan Gupta -Date: Wed, 9 Oct 2019 16:22:56 -0700 -Subject: TAAv6 1 +From: Pawan Gupta +Date: Wed, 23 Oct 2019 10:45:50 +0200 +Subject: x86/msr: Add the IA32_TSX_CTRL MSR Transactional Synchronization Extensions (TSX) may be used on certain processors as part of a speculative side channel attack. A microcode update for existing processors that are vulnerable to this attack will -add a new MSR, IA32_TSX_CTRL to allow the system administrator the -option to disable TSX as one of the possible mitigations. [Note that -future processors that are not vulnerable will also support the -IA32_TSX_CTRL MSR]. Add defines for the new IA32_TSX_CTRL MSR and its -bits. +add a new MSR - IA32_TSX_CTRL to allow the system administrator the +option to disable TSX as one of the possible mitigations. + + [ Note that future processors that are not vulnerable will also + support the IA32_TSX_CTRL MSR. ] + +Add defines for the new IA32_TSX_CTRL MSR and its bits. TSX has two sub-features: @@ -23,27 +25,34 @@ IA32_TSX_CTRL MSR. There are two control bits in IA32_TSX_CTRL MSR: - Bit 0: When set it disables the Restricted Transactional Memory (RTM) + Bit 0: When set, it disables the Restricted Transactional Memory (RTM) sub-feature of TSX (will force all transactions to abort on the XBEGIN instruction). - Bit 1: When set it disables the enumeration of the RTM and HLE feature + Bit 1: When set, it disables the enumeration of the RTM and HLE feature (i.e. it will make CPUID(EAX=7).EBX{bit4} and - CPUID(EAX=7).EBX{bit11} read as 0). + CPUID(EAX=7).EBX{bit11} read as 0). -The other TSX sub-feature, Hardware Lock Elision (HLE), is unconditionally -disabled but still enumerated as present by CPUID(EAX=7).EBX{bit4}. +The other TSX sub-feature, Hardware Lock Elision (HLE), is +unconditionally disabled but still enumerated as present by +CPUID(EAX=7).EBX{bit4}. Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov Reviewed-by: Mark Gross Reviewed-by: Tony Luck Tested-by: Neelima Krishnan +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Paolo Bonzini +Cc: Thomas Gleixner +Cc: x86-ml --- arch/x86/include/asm/msr-index.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index f58e6921cbf7..f45ca8aad98f 100644 +index f58e6921cbf7..da7887a9f314 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -91,6 +91,7 @@ @@ -59,7 +68,7 @@ index f58e6921cbf7..f45ca8aad98f 100644 #define MSR_IA32_BBL_CR_CTL3 0x0000011e +#define MSR_IA32_TSX_CTRL 0x00000122 -+#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM fxeature */ ++#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ +#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ + #define MSR_IA32_SYSENTER_CS 0x00000174 diff --git a/debian/patches/bugfix/x86/taa/0014-TAAv6-2.patch b/debian/patches/bugfix/x86/taa/0014-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch similarity index 74% rename from debian/patches/bugfix/x86/taa/0014-TAAv6-2.patch rename to debian/patches/bugfix/x86/taa/0014-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch index 945d59b85..fe671a4f1 100644 --- a/debian/patches/bugfix/x86/taa/0014-TAAv6-2.patch +++ b/debian/patches/bugfix/x86/taa/0014-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch @@ -1,14 +1,20 @@ -From: speck for Pawan Gupta -Date: Wed, 9 Oct 2019 16:23:56 -0700 -Subject: TAAv6 2 +From: Pawan Gupta +Date: Wed, 23 Oct 2019 10:52:35 +0200 +Subject: x86/cpu: Add a helper function x86_read_arch_cap_msr() -Add a helper function to read IA32_ARCH_CAPABILITIES MSR. If the CPU -doesn't support this MSR return 0. +Add a helper function to read the IA32_ARCH_CAPABILITIES MSR. Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov Reviewed-by: Mark Gross Reviewed-by: Tony Luck Tested-by: Neelima Krishnan +Cc: Andy Lutomirski +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: x86-ml [bwh: Forward-ported on top of NX: Fix conflict (neighbouring changes) in arch/x86/kernel/cpu/common.c] Signed-off-by: Ben Hutchings diff --git a/debian/patches/bugfix/x86/taa/0015-TAAv6-3.patch b/debian/patches/bugfix/x86/taa/0015-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch similarity index 79% rename from debian/patches/bugfix/x86/taa/0015-TAAv6-3.patch rename to debian/patches/bugfix/x86/taa/0015-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch index 2ec4fc340..c4da1231d 100644 --- a/debian/patches/bugfix/x86/taa/0015-TAAv6-3.patch +++ b/debian/patches/bugfix/x86/taa/0015-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch @@ -1,17 +1,40 @@ -From: speck for Pawan Gupta -Date: Wed, 9 Oct 2019 16:24:56 -0700 -Subject: TAAv6 3 +From: Pawan Gupta +Date: Wed, 23 Oct 2019 11:01:53 +0200 +Subject: x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default -Add kernel cmdline parameter "tsx" to control the Transactional -Synchronization Extensions (TSX) feature. On CPUs that support TSX -control, use "tsx=on|off" to enable or disable TSX. Not specifying this +Add a kernel cmdline parameter "tsx" to control the Transactional +Synchronization Extensions (TSX) feature. On CPUs that support TSX +control, use "tsx=on|off" to enable or disable TSX. Not specifying this option is equivalent to "tsx=off". This is because on certain processors TSX may be used as a part of a speculative side channel attack. +Carve out the TSX controlling functionality into a separate compilation +unit because TSX is a CPU feature while the TSX async abort control +machinery will go to cpu/bugs.c. + + [ bp: Massage, shorten and clear the arg buffer. ] + Signed-off-by: Pawan Gupta -Reviewed-by: Mark Gross -Reviewed-by: Tony Luck -Tested-by: Neelima Krishnan +Signed-off-by: Borislav Petkov +Cc: Andrew Morton +Cc: Andy Lutomirski +Cc: Babu Moger +Cc: Fenghua Yu +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Jonathan Corbet +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: Kees Cook +Cc: linux-doc@vger.kernel.org +Cc: Peter Zijlstra +Cc: "Rafael J. Wysocki" +Cc: Rahul Tanwar +Cc: Ricardo Neri +Cc: Sean Christopherson +Cc: Thomas Gleixner +Cc: x86-ml +Cc: Zhao Yakui [bwh: Backported to 4.19: adjust context] Signed-off-by: Ben Hutchings --- @@ -20,8 +43,8 @@ Signed-off-by: Ben Hutchings arch/x86/kernel/cpu/common.c | 2 + arch/x86/kernel/cpu/cpu.h | 18 +++ arch/x86/kernel/cpu/intel.c | 5 + - arch/x86/kernel/cpu/tsx.c | 115 ++++++++++++++++++ - 6 files changed, 152 insertions(+), 1 deletion(-) + arch/x86/kernel/cpu/tsx.c | 119 ++++++++++++++++++ + 6 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/cpu/tsx.c diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt @@ -124,10 +147,10 @@ index fc3c07fe7df5..a5287b18a63f 100644 #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c new file mode 100644 -index 000000000000..e39b33b7cef8 +index 000000000000..e5933ef50add --- /dev/null +++ b/arch/x86/kernel/cpu/tsx.c -@@ -0,0 +1,115 @@ +@@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Transactional Synchronization Extensions (TSX) control. @@ -138,7 +161,6 @@ index 000000000000..e39b33b7cef8 + * Pawan Gupta + */ + -+#include +#include + +#include @@ -155,6 +177,7 @@ index 000000000000..e39b33b7cef8 + + /* Force all transactions to immediately abort */ + tsx |= TSX_CTRL_RTM_DISABLE; ++ + /* + * Ensure TSX support is not enumerated in CPUID. + * This is visible to userspace and will ensure they @@ -174,6 +197,7 @@ index 000000000000..e39b33b7cef8 + + /* Enable the RTM feature in the cpu */ + tsx &= ~TSX_CTRL_RTM_DISABLE; ++ + /* + * Ensure TSX support is enumerated in CPUID. + * This is visible to userspace and will ensure they @@ -198,7 +222,7 @@ index 000000000000..e39b33b7cef8 + +void __init tsx_init(void) +{ -+ char arg[20]; ++ char arg[4] = {}; + int ret; + + if (!tsx_ctrl_is_supported()) @@ -212,7 +236,7 @@ index 000000000000..e39b33b7cef8 + tsx_ctrl_state = TSX_CTRL_DISABLE; + } else { + tsx_ctrl_state = TSX_CTRL_DISABLE; -+ pr_info("tsx: invalid option, defaulting to off\n"); ++ pr_err("tsx: invalid option, defaulting to off\n"); + } + } else { + /* tsx= not provided, defaulting to off */ @@ -221,6 +245,7 @@ index 000000000000..e39b33b7cef8 + + if (tsx_ctrl_state == TSX_CTRL_DISABLE) { + tsx_disable(); ++ + /* + * tsx_disable() will change the state of the + * RTM CPUID bit. Clear it here since it is now @@ -228,6 +253,7 @@ index 000000000000..e39b33b7cef8 + */ + setup_clear_cpu_cap(X86_FEATURE_RTM); + } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { ++ + /* + * HW defaults TSX to be enabled at bootup. + * We may still need the TSX enable support @@ -235,6 +261,7 @@ index 000000000000..e39b33b7cef8 + * kexec after TSX is disabled. + */ + tsx_enable(); ++ + /* + * tsx_enable() will change the state of the + * RTM CPUID bit. Force it here since it is now diff --git a/debian/patches/bugfix/x86/taa/0016-TAAv6-4.patch b/debian/patches/bugfix/x86/taa/0016-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch similarity index 66% rename from debian/patches/bugfix/x86/taa/0016-TAAv6-4.patch rename to debian/patches/bugfix/x86/taa/0016-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch index fbd8a7e42..845c8f067 100644 --- a/debian/patches/bugfix/x86/taa/0016-TAAv6-4.patch +++ b/debian/patches/bugfix/x86/taa/0016-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch @@ -1,29 +1,33 @@ -From: speck for Pawan Gupta -Date: Wed, 9 Oct 2019 16:25:56 -0700 -Subject: TAAv6 4 +From: Pawan Gupta +Date: Wed, 23 Oct 2019 11:30:45 +0200 +Subject: x86/speculation/taa: Add mitigation for TSX Async Abort TSX Async Abort (TAA) is a side channel vulnerability to the internal buffers in some Intel processors similar to Microachitectural Data -Sampling (MDS). In this case certain loads may speculatively pass +Sampling (MDS). In this case, certain loads may speculatively pass invalid data to dependent operations when an asynchronous abort -condition is pending in a TSX transaction. This includes loads with no -fault or assist condition. Such loads may speculatively expose stale -data from the uarch data structures as in MDS. Scope of exposure is -within the same-thread and cross-thread. This issue affects all current -processors that support TSX, but do not have ARCH_CAP_TAA_NO (bit 8) set -in MSR_IA32_ARCH_CAPABILITIES. +condition is pending in a TSX transaction. + +This includes loads with no fault or assist condition. Such loads may +speculatively expose stale data from the uarch data structures as in +MDS. Scope of exposure is within the same-thread and cross-thread. This +issue affects all current processors that support TSX, but do not have +ARCH_CAP_TAA_NO (bit 8) set in MSR_IA32_ARCH_CAPABILITIES. On CPUs which have their IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0, CPUID.MD_CLEAR=1 and the MDS mitigation is clearing the CPU buffers using VERW or L1D_FLUSH, there is no additional mitigation needed for -TAA. +TAA. On affected CPUs with MDS_NO=1 this issue can be mitigated by +disabling the Transactional Synchronization Extensions (TSX) feature. -On affected CPUs with MDS_NO=1 this issue can be mitigated by disabling -Transactional Synchronization Extensions (TSX) feature. A new MSR -IA32_TSX_CTRL in future and current processors after a microcode update -can be used to control TSX feature. TSX_CTRL_RTM_DISABLE bit disables -the TSX sub-feature Restricted Transactional Memory (RTM). -TSX_CTRL_CPUID_CLEAR bit clears the RTM enumeration in CPUID. The other +A new MSR IA32_TSX_CTRL in future and current processors after a +microcode update can be used to control the TSX feature. There are two +bits in that MSR: + +* TSX_CTRL_RTM_DISABLE disables the TSX sub-feature Restricted +Transactional Memory (RTM). + +* TSX_CTRL_CPUID_CLEAR clears the RTM enumeration in CPUID. The other TSX sub-feature, Hardware Lock Elision (HLE), is unconditionally disabled with updated microcode but still enumerated as present by CPUID(EAX=7).EBX{bit4}. @@ -32,17 +36,36 @@ The second mitigation approach is similar to MDS which is clearing the affected CPU buffers on return to user space and when entering a guest. Relevant microcode update is required for the mitigation to work. More details on this approach can be found here: -https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html -TSX feature can be controlled by the "tsx" command line parameter. If -the TSX feature is forced to be enabled then "Clear CPU buffers" (MDS -mitigation) is deployed. The effective mitigation state can be read from -sysfs. + https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html + +The TSX feature can be controlled by the "tsx" command line parameter. +If it is force-enabled then "Clear CPU buffers" (MDS mitigation) is +deployed. The effective mitigation state can be read from sysfs. + + [ bp: + - massage + comments cleanup + - s/TAA_MITIGATION_TSX_DISABLE/TAA_MITIGATION_TSX_DISABLED/g - Josh. + - remove partial TAA mitigation in update_mds_branch_idle() - Josh. + - s/tsx_async_abort_cmdline/tsx_async_abort_parse_cmdline/g + ] Signed-off-by: Pawan Gupta -Reviewed-by: Mark Gross -Reviewed-by: Tony Luck -Tested-by: Neelima Krishnan +Signed-off-by: Borislav Petkov +Cc: Andrew Morton +Cc: Andy Lutomirski +Cc: Fenghua Yu +Cc: Greg Kroah-Hartman +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Josh Poimboeuf +Cc: Kees Cook +Cc: Paolo Bonzini +Cc: "Peter Zijlstra (Intel)" +Cc: Sean Christopherson +Cc: Thomas Gleixner +Cc: Thomas Lendacky +Cc: x86-ml [bwh: Forward-ported on top of NX: Renumber bug bit after X86_BUG_ITLB_MULTIHIT] [bwh: Backported to 4.19: Add #include "cpu.h" in bugs.c] @@ -52,9 +75,9 @@ Signed-off-by: Ben Hutchings arch/x86/include/asm/msr-index.h | 4 + arch/x86/include/asm/nospec-branch.h | 4 +- arch/x86/include/asm/processor.h | 7 ++ - arch/x86/kernel/cpu/bugs.c | 129 ++++++++++++++++++++++++++- + arch/x86/kernel/cpu/bugs.c | 112 +++++++++++++++++++++++++++ arch/x86/kernel/cpu/common.c | 15 ++++ - 6 files changed, 156 insertions(+), 4 deletions(-) + 6 files changed, 141 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ccad4f183400..5a2eecfed727 100644 @@ -68,7 +91,7 @@ index ccad4f183400..5a2eecfed727 100644 #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index f45ca8aad98f..6d17eb64cc69 100644 +index da7887a9f314..0f4feee6d082 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -92,6 +92,10 @@ @@ -105,7 +128,7 @@ index 28cb2b31527a..09c7466c4880 100644 * Clear CPU buffers if the corresponding static key is enabled */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index b54f25697beb..4a163f33a07d 100644 +index b54f25697beb..efb44bd3a714 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -1003,4 +1003,11 @@ enum mds_mitigations { @@ -116,12 +139,12 @@ index b54f25697beb..4a163f33a07d 100644 + TAA_MITIGATION_OFF, + TAA_MITIGATION_UCODE_NEEDED, + TAA_MITIGATION_VERW, -+ TAA_MITIGATION_TSX_DISABLE, ++ TAA_MITIGATION_TSX_DISABLED, +}; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 1e764992fa64..841f106a277a 100644 +index 1e764992fa64..828b2fe4bc0a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -32,11 +32,14 @@ @@ -147,7 +170,7 @@ index 1e764992fa64..841f106a277a 100644 arch_smt_update(); -@@ -266,6 +270,110 @@ static int __init mds_cmdline(char *str) +@@ -266,6 +270,100 @@ static int __init mds_cmdline(char *str) } early_param("mds", mds_cmdline); @@ -162,44 +185,32 @@ index 1e764992fa64..841f106a277a 100644 + [TAA_MITIGATION_OFF] = "Vulnerable", + [TAA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [TAA_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", -+ [TAA_MITIGATION_TSX_DISABLE] = "Mitigation: TSX disabled", ++ [TAA_MITIGATION_TSX_DISABLED] = "Mitigation: TSX disabled", +}; + +static void __init taa_select_mitigation(void) +{ -+ u64 ia32_cap = x86_read_arch_cap_msr(); ++ u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_TAA)) { + taa_mitigation = TAA_MITIGATION_OFF; + return; + } + -+ /* -+ * As X86_BUG_TAA=1, TSX feature is supported by the hardware. If -+ * TSX was disabled (X86_FEATURE_RTM=0) earlier during tsx_init(). -+ * Select TSX_DISABLE as mitigation. -+ * -+ * This check is ahead of mitigations=off and tsx_async_abort=off -+ * because when TSX is disabled mitigation is already in place. This -+ * ensures sysfs doesn't show "Vulnerable" when TSX is disabled. -+ */ ++ /* TSX previously disabled by tsx=off */ + if (!boot_cpu_has(X86_FEATURE_RTM)) { -+ taa_mitigation = TAA_MITIGATION_TSX_DISABLE; -+ pr_info("%s\n", taa_strings[taa_mitigation]); -+ return; ++ taa_mitigation = TAA_MITIGATION_TSX_DISABLED; ++ goto out; + } + -+ /* All mitigations turned off from cmdline (mitigations=off) */ + if (cpu_mitigations_off()) { + taa_mitigation = TAA_MITIGATION_OFF; + return; + } + -+ /* TAA mitigation is turned off from cmdline (tsx_async_abort=off) */ -+ if (taa_mitigation == TAA_MITIGATION_OFF) { -+ pr_info("%s\n", taa_strings[taa_mitigation]); -+ return; -+ } ++ /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */ ++ if (taa_mitigation == TAA_MITIGATION_OFF) ++ goto out; + + if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) + taa_mitigation = TAA_MITIGATION_VERW; @@ -208,20 +219,21 @@ index 1e764992fa64..841f106a277a 100644 + + /* + * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1. -+ * A microcode update fixes this behavior to clear CPU buffers. -+ * Microcode update also adds support for MSR_IA32_TSX_CTRL which -+ * is enumerated by ARCH_CAP_TSX_CTRL_MSR bit. ++ * A microcode update fixes this behavior to clear CPU buffers. It also ++ * adds support for MSR_IA32_TSX_CTRL which is enumerated by the ++ * ARCH_CAP_TSX_CTRL_MSR bit. + * + * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode + * update is required. + */ -+ if ((ia32_cap & ARCH_CAP_MDS_NO) && ++ ia32_cap = x86_read_arch_cap_msr(); ++ if ( (ia32_cap & ARCH_CAP_MDS_NO) && + !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; + + /* + * TSX is enabled, select alternate mitigation for TAA which is -+ * same as MDS. Enable MDS static branch to clear CPU buffers. ++ * the same as MDS. Enable MDS static branch to clear CPU buffers. + * + * For guests that can't determine whether the correct microcode is + * present on host, enable the mitigation for UCODE_NEEDED as well. @@ -231,10 +243,11 @@ index 1e764992fa64..841f106a277a 100644 + if (taa_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); + ++out: + pr_info("%s\n", taa_strings[taa_mitigation]); +} + -+static int __init tsx_async_abort_cmdline(char *str) ++static int __init tsx_async_abort_parse_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_TAA)) + return 0; @@ -253,34 +266,12 @@ index 1e764992fa64..841f106a277a 100644 + + return 0; +} -+early_param("tsx_async_abort", tsx_async_abort_cmdline); ++early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt -@@ -751,7 +859,7 @@ static void update_indir_branch_cond(void) - #undef pr_fmt - #define pr_fmt(fmt) fmt - --/* Update the static key controlling the MDS CPU buffer clear in idle */ -+/* Update the static key controlling the MDS and TAA CPU buffer clear in idle */ - static void update_mds_branch_idle(void) - { - /* -@@ -761,8 +869,11 @@ static void update_mds_branch_idle(void) - * The other variants cannot be mitigated when SMT is enabled, so - * clearing the buffers on idle just to prevent the Store Buffer - * repartitioning leak would be a window dressing exercise. -+ * -+ * Apply idle buffer clearing to TAA affected CPUs also. - */ -- if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) -+ if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY) && -+ !boot_cpu_has_bug(X86_BUG_TAA)) - return; - - if (sched_smt_active()) -@@ -772,6 +883,7 @@ static void update_mds_branch_idle(void) +@@ -772,6 +870,7 @@ static void update_mds_branch_idle(void) } #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" @@ -288,7 +279,7 @@ index 1e764992fa64..841f106a277a 100644 void arch_smt_update(void) { -@@ -804,6 +916,19 @@ void arch_smt_update(void) +@@ -804,6 +903,19 @@ void arch_smt_update(void) break; } @@ -300,7 +291,7 @@ index 1e764992fa64..841f106a277a 100644 + /* TSX is enabled, apply MDS idle buffer clearing. */ + update_mds_branch_idle(); + break; -+ case TAA_MITIGATION_TSX_DISABLE: ++ case TAA_MITIGATION_TSX_DISABLED: + case TAA_MITIGATION_OFF: + break; + } @@ -309,7 +300,7 @@ index 1e764992fa64..841f106a277a 100644 } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 5f89d78fe132..394bcb0403c9 100644 +index 5f89d78fe132..de1e552c9705 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1058,6 +1058,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) @@ -317,17 +308,17 @@ index 5f89d78fe132..394bcb0403c9 100644 setup_force_cpu_bug(X86_BUG_SWAPGS); + /* -+ * When processor is not mitigated for TAA (TAA_NO=0) set TAA bug when: ++ * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when: + * - TSX is supported or -+ * - TSX_CTRL is supported ++ * - TSX_CTRL is present + * + * TSX_CTRL check is needed for cases when TSX could be disabled before -+ * the kernel boot e.g. kexec ++ * the kernel boot e.g. kexec. + * TSX_CTRL check alone is not sufficient for cases when the microcode + * update is not present or running as guest that don't get TSX_CTRL. + */ + if (!(ia32_cap & ARCH_CAP_TAA_NO) && -+ (boot_cpu_has(X86_FEATURE_RTM) || ++ (cpu_has(c, X86_FEATURE_RTM) || + (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + setup_force_cpu_bug(X86_BUG_TAA); + diff --git a/debian/patches/bugfix/x86/taa/0017-TAAv6-5.patch b/debian/patches/bugfix/x86/taa/0017-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch similarity index 82% rename from debian/patches/bugfix/x86/taa/0017-TAAv6-5.patch rename to debian/patches/bugfix/x86/taa/0017-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch index 2aae2283a..867d136e1 100644 --- a/debian/patches/bugfix/x86/taa/0017-TAAv6-5.patch +++ b/debian/patches/bugfix/x86/taa/0017-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch @@ -1,18 +1,27 @@ -From: speck for Pawan Gupta -Date: Wed, 9 Oct 2019 16:26:56 -0700 -Subject: TAAv6 5 +From: Pawan Gupta +Date: Wed, 23 Oct 2019 12:19:51 +0200 +Subject: x86/speculation/taa: Add sysfs reporting for TSX Async Abort Add the sysfs reporting file for TSX Async Abort. It exposes the vulnerability and the mitigation state similar to the existing files for the other hardware vulnerabilities. -sysfs file path is: +Sysfs file path is: /sys/devices/system/cpu/vulnerabilities/tsx_async_abort Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov Reviewed-by: Mark Gross Reviewed-by: Tony Luck Tested-by: Neelima Krishnan +Cc: Greg Kroah-Hartman +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Jiri Kosina +Cc: Josh Poimboeuf +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: x86-ml [bwh: Forward-ported on top of NX: Fix conflicts (neighbouring insertions) in arch/x86/kernel/cpu/bugs.c, drivers/base/cpu.c, include/linux/cpu.h] @@ -24,16 +33,16 @@ Signed-off-by: Ben Hutchings 3 files changed, 35 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 841f106a277a..c435bc5dc19b 100644 +index 828b2fe4bc0a..1ed43b858c52 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c -@@ -1439,6 +1439,21 @@ static ssize_t mds_show_state(char *buf) +@@ -1426,6 +1426,21 @@ static ssize_t mds_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t tsx_async_abort_show_state(char *buf) +{ -+ if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLE) || ++ if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) || + (taa_mitigation == TAA_MITIGATION_OFF)) + return sprintf(buf, "%s\n", taa_strings[taa_mitigation]); + @@ -49,7 +58,7 @@ index 841f106a277a..c435bc5dc19b 100644 static char *stibp_state(void) { if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) -@@ -1510,6 +1525,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr +@@ -1497,6 +1512,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITLB_MULTIHIT: return itlb_multihit_show_state(buf); @@ -59,7 +68,7 @@ index 841f106a277a..c435bc5dc19b 100644 default: break; } -@@ -1551,4 +1569,9 @@ ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr +@@ -1538,4 +1556,9 @@ ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr { return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); } diff --git a/debian/patches/bugfix/x86/taa/0018-TAAv6-6.patch b/debian/patches/bugfix/x86/taa/0018-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch similarity index 56% rename from debian/patches/bugfix/x86/taa/0018-TAAv6-6.patch rename to debian/patches/bugfix/x86/taa/0018-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch index f03d82de9..8d1c54c4d 100644 --- a/debian/patches/bugfix/x86/taa/0018-TAAv6-6.patch +++ b/debian/patches/bugfix/x86/taa/0018-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch @@ -1,12 +1,12 @@ -From: speck for Pawan Gupta -Date: Wed, 9 Oct 2019 16:27:56 -0700 -Subject: TAAv6 6 +From: Pawan Gupta +Date: Wed, 23 Oct 2019 12:23:33 +0200 +Subject: kvm/x86: Export MDS_NO=0 to guests when TSX is enabled -Export IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0 to guests on TSX Async -Abort(TAA) affected hosts that have TSX enabled and updated microcode. -This is required so that the guests don't complain, +Export the IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0 to guests on TSX +Async Abort(TAA) affected hosts that have TSX enabled and updated +microcode. This is required so that the guests don't complain, - "Vulnerable: Clear CPU buffers attempted, no microcode" + "Vulnerable: Clear CPU buffers attempted, no microcode" when the host has the updated microcode to clear CPU buffers. @@ -16,29 +16,39 @@ Guests can't do this check themselves when the ARCH_CAP_TSX_CTRL bit is not exported to the guests. In this case export MDS_NO=0 to the guests. When guests have -CPUID.MD_CLEAR=1 guests deploy MDS mitigation which also mitigates TAA. +CPUID.MD_CLEAR=1, they deploy MDS mitigation which also mitigates TAA. Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Tested-by: Neelima Krishnan +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Joerg Roedel +Cc: kvm ML +Cc: Paolo Bonzini +Cc: "Radim Krcmar" +Cc: Sean Christopherson +Cc: Thomas Gleixner +Cc: x86-ml --- arch/x86/kvm/x86.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 1ecadf51f154..5ccf79739b2b 100644 +index da688e726632..c68ee8d1ef8c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -1143,6 +1143,25 @@ u64 kvm_get_arch_capabilities(void) +@@ -1151,6 +1151,25 @@ u64 kvm_get_arch_capabilities(void) if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER) data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; + /* + * On TAA affected systems, export MDS_NO=0 when: -+ * - TSX is enabled on host, i.e. X86_FEATURE_RTM=1. ++ * - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1. + * - Updated microcode is present. This is detected by -+ * the presence of ARCH_CAP_TSX_CTRL_MSR. This ensures -+ * VERW clears CPU buffers. ++ * the presence of ARCH_CAP_TSX_CTRL_MSR and ensures ++ * that VERW clears CPU buffers. + * + * When MDS_NO=0 is exported, guests deploy clear CPU buffer + * mitigation and don't complain: diff --git a/debian/patches/bugfix/x86/taa/0019-TAAv6-7.patch b/debian/patches/bugfix/x86/taa/0019-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch similarity index 54% rename from debian/patches/bugfix/x86/taa/0019-TAAv6-7.patch rename to debian/patches/bugfix/x86/taa/0019-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch index c431406a5..87b428e70 100644 --- a/debian/patches/bugfix/x86/taa/0019-TAAv6-7.patch +++ b/debian/patches/bugfix/x86/taa/0019-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch @@ -1,6 +1,6 @@ -From: speck for Pawan Gupta -Date: Wed, 9 Oct 2019 16:28:56 -0700 -Subject: TAAv6 7 +From: Pawan Gupta +Date: Wed, 23 Oct 2019 12:28:57 +0200 +Subject: x86/tsx: Add "auto" option to the tsx= cmdline parameter Platforms which are not affected by X86_BUG_TAA may want the TSX feature enabled. Add "auto" option to the TSX cmdline parameter. When tsx=auto @@ -9,16 +9,30 @@ disable TSX when X86_BUG_TAA is present, otherwise enable TSX. More details on X86_BUG_TAA can be found here: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html + [ bp: Extend the arg buffer to accommodate "auto\0". ] + Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck -Tested-by: Neelima Krishnan +Cc: "H. Peter Anvin" +Cc: "Paul E. McKenney" +Cc: Andrew Morton +Cc: Ingo Molnar +Cc: Jonathan Corbet +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: linux-doc@vger.kernel.org +Cc: Mark Gross +Cc: Mauro Carvalho Chehab +Cc: Thomas Gleixner +Cc: x86-ml --- Documentation/admin-guide/kernel-parameters.txt | 5 +++++ - arch/x86/kernel/cpu/tsx.c | 5 +++++ - 2 files changed, 10 insertions(+) + arch/x86/kernel/cpu/tsx.c | 7 ++++++- + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index f03756d2addb..dffdd4d86f4b 100644 +index f03756d2addb..e6a58cbbfab8 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4715,6 +4715,11 @@ @@ -28,16 +42,25 @@ index f03756d2addb..dffdd4d86f4b 100644 + auto - Disable TSX if X86_BUG_TAA is present, + otherwise enable TSX on the system. + -+ More details on X86_BUG_TAA are here: ++ More details on X86_BUG_TAA here: + Documentation/admin-guide/hw-vuln/tsx_async_abort.rst Not specifying this option is equivalent to tsx=off. diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c -index e39b33b7cef8..e93abe6f0bb9 100644 +index e5933ef50add..89ab91eacd4f 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c -@@ -80,6 +80,11 @@ void __init tsx_init(void) +@@ -69,7 +69,7 @@ static bool __init tsx_ctrl_is_supported(void) + + void __init tsx_init(void) + { +- char arg[4] = {}; ++ char arg[5] = {}; + int ret; + + if (!tsx_ctrl_is_supported()) +@@ -81,6 +81,11 @@ void __init tsx_init(void) tsx_ctrl_state = TSX_CTRL_ENABLE; } else if (!strcmp(arg, "off")) { tsx_ctrl_state = TSX_CTRL_DISABLE; @@ -48,4 +71,4 @@ index e39b33b7cef8..e93abe6f0bb9 100644 + tsx_ctrl_state = TSX_CTRL_ENABLE; } else { tsx_ctrl_state = TSX_CTRL_DISABLE; - pr_info("tsx: invalid option, defaulting to off\n"); + pr_err("tsx: invalid option, defaulting to off\n"); diff --git a/debian/patches/bugfix/x86/taa/0020-TAAv6-8.patch b/debian/patches/bugfix/x86/taa/0020-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch similarity index 65% rename from debian/patches/bugfix/x86/taa/0020-TAAv6-8.patch rename to debian/patches/bugfix/x86/taa/0020-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch index 24a24c010..4eafcb6c3 100644 --- a/debian/patches/bugfix/x86/taa/0020-TAAv6-8.patch +++ b/debian/patches/bugfix/x86/taa/0020-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch @@ -1,16 +1,29 @@ -From: speck for Pawan Gupta -Date: Wed, 9 Oct 2019 16:29:57 -0700 -Subject: TAAv6 8 +From: Pawan Gupta +Date: Wed, 23 Oct 2019 12:32:55 +0200 +Subject: x86/speculation/taa: Add documentation for TSX Async Abort Add the documenation for TSX Async Abort. Include the description of the issue, how to check the mitigation state, control the mitigation, guidance for system administrators. -Signed-off-by: Pawan Gupta + [ bp: Add proper SPDX tags, touch ups. ] + Co-developed-by: Antonio Gomez Iglesias +Signed-off-by: Pawan Gupta Signed-off-by: Antonio Gomez Iglesias +Signed-off-by: Borislav Petkov Reviewed-by: Mark Gross Reviewed-by: Tony Luck +Cc: Andrew Morton +Cc: Fenghua Yu +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Jonathan Corbet +Cc: Josh Poimboeuf +Cc: Juergen Gross +Cc: linux-doc@vger.kernel.org +Cc: Thomas Gleixner +Cc: x86-ml [bwh: Forward-ported on top of NX: Fix conflict (neighbouring insertions) in Documentation/ABI/testing/sysfs-devices-system-cpu] [bwh: Backported to 4.19: adjust context] @@ -18,11 +31,11 @@ Signed-off-by: Ben Hutchings --- .../ABI/testing/sysfs-devices-system-cpu | 1 + Documentation/admin-guide/hw-vuln/index.rst | 1 + - .../admin-guide/hw-vuln/tsx_async_abort.rst | 240 ++++++++++++++++++ + .../admin-guide/hw-vuln/tsx_async_abort.rst | 256 ++++++++++++++++++ .../admin-guide/kernel-parameters.txt | 36 +++ Documentation/x86/index.rst | 1 + - Documentation/x86/tsx_async_abort.rst | 54 ++++ - 6 files changed, 333 insertions(+) + Documentation/x86/tsx_async_abort.rst | 117 ++++++++ + 6 files changed, 412 insertions(+) create mode 100644 Documentation/admin-guide/hw-vuln/tsx_async_abort.rst create mode 100644 Documentation/x86/tsx_async_abort.rst @@ -45,7 +58,9 @@ Signed-off-by: Ben Hutchings + tsx_async_abort --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst -@@ -0,0 +1,240 @@ +@@ -0,0 +1,256 @@ ++.. SPDX-License-Identifier: GPL-2.0 ++ +TAA - TSX Asynchronous Abort +====================================== + @@ -59,7 +74,7 @@ Signed-off-by: Ben Hutchings +This vulnerability only affects Intel processors that support Intel +Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8) +is 0 in the IA32_ARCH_CAPABILITIES MSR. On processors where the MDS_NO bit -+(bit 5)is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations ++(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations +also mitigate against TAA. + +Whether a processor is affected or not can be read out from the TAA @@ -81,13 +96,13 @@ Signed-off-by: Ben Hutchings +Problem +------- + -+When performing store, load, L1 refill operations, processors write data into -+temporary microarchitectural structures (buffers). The data in the buffer can -+be forwarded to load operations as an optimization. ++When performing store, load or L1 refill operations, processors write ++data into temporary microarchitectural structures (buffers). The data in ++those buffers can be forwarded to load operations as an optimization. + -+Intel TSX are an extension to the x86 instruction set architecture that adds ++Intel TSX is an extension to the x86 instruction set architecture that adds +hardware transactional memory support to improve performance of multi-threaded -+software. TSX lets the processor expose and exploit concurrence hidden in an ++software. TSX lets the processor expose and exploit concurrency hidden in an +application due to dynamically avoiding unnecessary synchronization. + +TSX supports atomic memory transactions that are either committed (success) or @@ -108,8 +123,8 @@ Signed-off-by: Ben Hutchings +attacker needs to begin a TSX transaction and raise an asynchronous abort +to try to leak some of data stored in the buffers. + -+Deeper technical information is available in the TAA specific x86 architecture -+section: :ref:`Documentation/x86/tsx_async_abort.rst `. ++More detailed technical information is available in the TAA specific x86 ++architecture section: :ref:`Documentation/x86/tsx_async_abort.rst `. + + +Attack scenarios @@ -180,8 +195,7 @@ Signed-off-by: Ben Hutchings + + +The mitigation can be controlled at boot time via a kernel command line option. -+See :ref:`taa_mitigation_control_command_line`. It also provides a sysfs -+interface. See :ref:`taa_mitigation_sysfs`. ++See :ref:`taa_mitigation_control_command_line`. + +.. _virt_mechanism: + @@ -254,6 +268,21 @@ Signed-off-by: Ben Hutchings +buffers. For platforms without TSX control "tsx" command line argument has no +effect. + ++For the affected platforms below table indicates the mitigation status for the ++combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO ++and TSX_CTRL_MSR. ++ ++ ======= ========= ============= ======================================== ++ MDS_NO MD_CLEAR TSX_CTRL_MSR Status ++ ======= ========= ============= ======================================== ++ 0 0 0 Vulnerable (needs ucode) ++ 0 1 0 MDS and TAA mitigated via VERW ++ 1 1 0 MDS fixed, TAA vulnerable if TSX enabled ++ because MD_CLEAR has no meaning and ++ VERW is not guaranteed to clear buffers ++ 1 X 1 MDS fixed, TAA can be mitigated by ++ VERW or TSX_CTRL_MSR ++ ======= ========= ============= ======================================== + +Mitigation selection guide +-------------------------- @@ -285,7 +314,7 @@ Signed-off-by: Ben Hutchings + +The kernel's default action for vulnerable processors is: + -+ - Deploy TSX disable mitigation (tsx_async_abort=full). ++ - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off). --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2538,6 +2538,7 @@ @@ -304,9 +333,9 @@ Signed-off-by: Ben Hutchings mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this -@@ -4528,6 +4530,40 @@ - neutralize any effect of /proc/sys/kernel/sysrq. - Useful for debugging. +@@ -4718,6 +4720,40 @@ + + Not specifying this option is equivalent to tsx=off. + tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async + Abort (TAA) vulnerability. @@ -342,9 +371,9 @@ Signed-off-by: Ben Hutchings + For details see: + Documentation/admin-guide/hw-vuln/tsx_async_abort.rst + - tcpmhash_entries= [KNL,NET] - Set the number of tcp_metrics_hash slots. - Default value is 8192 or 16384 depending on total + turbografx.map[2|3]= [HW,JOY] + TurboGraFX parallel port interface + Format: --- a/Documentation/x86/index.rst +++ b/Documentation/x86/index.rst @@ -6,3 +6,4 @@ x86 architecture specifics @@ -354,9 +383,11 @@ Signed-off-by: Ben Hutchings + tsx_async_abort --- /dev/null +++ b/Documentation/x86/tsx_async_abort.rst -@@ -0,0 +1,54 @@ +@@ -0,0 +1,117 @@ ++.. SPDX-License-Identifier: GPL-2.0 ++ +TSX Async Abort (TAA) mitigation -+================================================= ++================================ + +.. _tsx_async_abort: + @@ -376,15 +407,14 @@ Signed-off-by: Ben Hutchings +Mitigation strategy +------------------- + -+a) TSX disable - One of the mitigation is to disable TSX feature. A new MSR -+IA32_TSX_CTRL will be available in future and current processors and after a -+microcode update in which can be used to disable TSX. This MSR can be used to -+disable the TSX feature and the enumeration of the TSX feature bits(RTM and -+HLE) in CPUID. ++a) TSX disable - one of the mitigations is to disable TSX. A new MSR ++IA32_TSX_CTRL will be available in future and current processors after ++microcode update which can be used to disable TSX. In addition, it ++controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID. + -+b) CPU clear buffers - Similar to MDS, clearing the CPU buffers mitigates this -+vulnerability. More details on this approach can be found here -+https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html ++b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this ++vulnerability. More details on this approach can be found in ++:ref:`Documentation/admin-guide/hw-vuln/mds.rst `. + +Kernel internal mitigation modes +-------------------------------- @@ -406,6 +436,68 @@ Signed-off-by: Ben Hutchings + effort approach without guarantee. + ============= ============================================================ + -+If the CPU is affected and "tsx_async_abort" kernel command line parameter is ++If the CPU is affected and the "tsx_async_abort" kernel command line parameter is +not provided then the kernel selects an appropriate mitigation depending on the +status of RTM and MD_CLEAR CPUID bits. ++ ++Below tables indicate the impact of tsx=on|off|auto cmdline options on state of ++TAA mitigation, VERW behavior and TSX feature for various combinations of ++MSR_IA32_ARCH_CAPABILITIES bits. ++ ++1. "tsx=off" ++ ++========= ========= ============ ============ ============== =================== ====================== ++MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=off ++---------------------------------- ------------------------------------------------------------------------- ++TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation ++ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full ++========= ========= ============ ============ ============== =================== ====================== ++ 0 0 0 HW default Yes Same as MDS Same as MDS ++ 0 0 1 Invalid case Invalid case Invalid case Invalid case ++ 0 1 0 HW default No Need ucode update Need ucode update ++ 0 1 1 Disabled Yes TSX disabled TSX disabled ++ 1 X 1 Disabled X None needed None needed ++========= ========= ============ ============ ============== =================== ====================== ++ ++2. "tsx=on" ++ ++========= ========= ============ ============ ============== =================== ====================== ++MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=on ++---------------------------------- ------------------------------------------------------------------------- ++TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation ++ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full ++========= ========= ============ ============ ============== =================== ====================== ++ 0 0 0 HW default Yes Same as MDS Same as MDS ++ 0 0 1 Invalid case Invalid case Invalid case Invalid case ++ 0 1 0 HW default No Need ucode update Need ucode update ++ 0 1 1 Enabled Yes None Same as MDS ++ 1 X 1 Enabled X None needed None needed ++========= ========= ============ ============ ============== =================== ====================== ++ ++3. "tsx=auto" ++ ++========= ========= ============ ============ ============== =================== ====================== ++MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=auto ++---------------------------------- ------------------------------------------------------------------------- ++TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation ++ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full ++========= ========= ============ ============ ============== =================== ====================== ++ 0 0 0 HW default Yes Same as MDS Same as MDS ++ 0 0 1 Invalid case Invalid case Invalid case Invalid case ++ 0 1 0 HW default No Need ucode update Need ucode update ++ 0 1 1 Disabled Yes TSX disabled TSX disabled ++ 1 X 1 Enabled X None needed None needed ++========= ========= ============ ============ ============== =================== ====================== ++ ++In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that ++indicates whether MSR_IA32_TSX_CTRL is supported. ++ ++There are two control bits in IA32_TSX_CTRL MSR: ++ ++ Bit 0: When set it disables the Restricted Transactional Memory (RTM) ++ sub-feature of TSX (will force all transactions to abort on the ++ XBEGIN instruction). ++ ++ Bit 1: When set it disables the enumeration of the RTM and HLE feature ++ (i.e. it will make CPUID(EAX=7).EBX{bit4} and ++ CPUID(EAX=7).EBX{bit11} read as 0). diff --git a/debian/patches/bugfix/x86/taa/0021-TAAv6-9.patch b/debian/patches/bugfix/x86/taa/0021-TAAv6-9.patch deleted file mode 100644 index 9d739b982..000000000 --- a/debian/patches/bugfix/x86/taa/0021-TAAv6-9.patch +++ /dev/null @@ -1,385 +0,0 @@ -From: speck for Pawan Gupta -Date: Wed, 9 Oct 2019 16:30:57 -0700 -Subject: TAAv6 9 - -Transactional Synchronization Extensions (TSX) is an extension to the -x86 instruction set architecture (ISA) that adds Hardware Transactional -Memory (HTM) support. Changing TSX state currently requires a reboot. -This may not be desirable when rebooting imposes a huge penalty. Add -support to control TSX feature via a new sysfs file: -/sys/devices/system/cpu/hw_tx_mem - -- Writing 0|off|N|n to this file disables TSX feature on all the CPUs. - This is equivalent to boot parameter tsx=off. -- Writing 1|on|Y|y to this file enables TSX feature on all the CPUs. - This is equivalent to boot parameter tsx=on. -- Reading from this returns the status of TSX feature. -- When TSX control is not supported this interface is not visible in - sysfs. - -Changing the TSX state from this interface also updates CPUID.RTM -feature bit. From the kernel side, this feature bit doesn't result in -any ALTERNATIVE code patching. No memory allocations are done to -save/restore user state. No code paths in outside of the tests for -vulnerability to TAA are dependent on the value of the feature bit. In -general the kernel doesn't care whether RTM is present or not. - -Applications typically look at CPUID bits once at startup (or when first -calling into a library that uses the feature). So we have a couple of -cases to cover: - -1) An application started and saw that RTM was enabled, so began - to use it. Then TSX was disabled. Net result in this case is that - the application will keep trying to use RTM, but every xbegin() will - immediately abort the transaction. This has a performance impact to - the application, but it doesn't affect correctness because all users - of RTM must have a fallback path for when the transaction aborts. Note - that even if an application is in the middle of a transaction when we - disable RTM, we are safe. The XPI that we use to update the TSX_CTRL - MSR will abort the transaction (just as any interrupt would abort - a transaction). - -2) An application starts and sees RTM is not available. So it will - always use alternative paths. Even if TSX is enabled and RTM is set, - applications in general do not re-evaluate their choice so will - continue to run in non-TSX mode. - -When the TSX state is changed from the sysfs interface, TSX Async Abort -(TAA) mitigation state also needs to be updated. Set the TAA mitigation -state as per TSX and VERW static branch state. - -Signed-off-by: Pawan Gupta -Reviewed-by: Mark Gross -Reviewed-by: Tony Luck -Tested-by: Neelima Krishnan -[bwh: Backported to 4.19: adjust context] -Signed-off-by: Ben Hutchings ---- - .../ABI/testing/sysfs-devices-system-cpu | 23 ++++ - .../admin-guide/hw-vuln/tsx_async_abort.rst | 29 +++++ - arch/x86/kernel/cpu/bugs.c | 21 +++- - arch/x86/kernel/cpu/cpu.h | 3 +- - arch/x86/kernel/cpu/tsx.c | 100 +++++++++++++++++- - drivers/base/cpu.c | 32 +++++- - include/linux/cpu.h | 6 ++ - 7 files changed, 210 insertions(+), 4 deletions(-) - -diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu -index a1bd0b6766d7..2a98f6c70add 100644 ---- a/Documentation/ABI/testing/sysfs-devices-system-cpu -+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu -@@ -513,3 +513,26 @@ Description: Control Symetric Multi Threading (SMT) - - If control status is "forceoff" or "notsupported" writes - are rejected. -+ -+What: /sys/devices/system/cpu/hw_tx_mem -+Date: August 2019 -+Contact: Pawan Gupta -+ Linux kernel mailing list -+Description: Hardware Transactional Memory (HTM) control. -+ -+ Read/write interface to control HTM feature for all the CPUs in -+ the system. This interface is only present on platforms that -+ support HTM control. HTM is a hardware feature to speed up the -+ execution of multi-threaded software through lock elision. An -+ example of HTM implementation is Intel Transactional -+ Synchronization Extensions (TSX). -+ -+ Read returns the status of HTM feature. -+ -+ 0: HTM is disabled -+ 1: HTM is enabled -+ -+ Write sets the state of HTM feature. -+ -+ 0: Disables HTM -+ 1: Enables HTM -diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst -index 58f24db49615..b62bc749fd8c 100644 ---- a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst -+++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst -@@ -207,6 +207,35 @@ buffers. For platforms without TSX control "tsx" command line argument has no - effect. - - -+.. _taa_mitigation_sysfs: -+ -+Mitigation control using sysfs -+------------------------------ -+ -+For those affected systems that can not be frequently rebooted to enable or -+disable TSX, sysfs can be used as an alternative after installing the updates. -+The possible values for the file /sys/devices/system/cpu/hw_tx_mem are: -+ -+ ============ ============================================================= -+ 0 Disable TSX. Upon entering a TSX transactional region, the code -+ will immediately abort, before any instruction executes within -+ the transactional region even speculatively, and continue on -+ the fallback. Equivalent to boot parameter "tsx=off". -+ -+ 1 Enable TSX. Equivalent to boot parameter "tsx=on". -+ -+ ============ ============================================================= -+ -+Reading from this file returns the status of TSX feature. This file is only -+present on systems that support TSX control. -+ -+When disabling TSX by using the sysfs mechanism, applications that are already -+running and use TSX will see their transactional regions aborted and execution -+flow will be redirected to the fallback, losing the benefits of the -+non-blocking path. TSX needs fallback code to guarantee correct execution -+without transactional regions. -+ -+ - Mitigation selection guide - -------------------------- - -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index c435bc5dc19b..f0a998c10056 100644 ---- a/arch/x86/kernel/cpu/bugs.c -+++ b/arch/x86/kernel/cpu/bugs.c -@@ -274,7 +274,7 @@ early_param("mds", mds_cmdline); - #define pr_fmt(fmt) "TAA: " fmt - - /* Default mitigation for TAA-affected CPUs */ --static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW; -+static enum taa_mitigations taa_mitigation = TAA_MITIGATION_VERW; - static bool taa_nosmt __ro_after_init; - - static const char * const taa_strings[] = { -@@ -374,6 +374,25 @@ static int __init tsx_async_abort_cmdline(char *str) - } - early_param("tsx_async_abort", tsx_async_abort_cmdline); - -+void taa_update_mitigation(bool tsx_enabled) -+{ -+ /* -+ * When userspace changes the TSX state, update taa_mitigation -+ * so that the updated mitigation state is shown in: -+ * /sys/devices/system/cpu/vulnerabilities/tsx_async_abort -+ * -+ * Check if TSX is disabled. -+ * Check if CPU buffer clear is enabled. -+ * else the system is vulnerable. -+ */ -+ if (!tsx_enabled) -+ taa_mitigation = TAA_MITIGATION_TSX_DISABLE; -+ else if (static_key_count(&mds_user_clear.key)) -+ taa_mitigation = TAA_MITIGATION_VERW; -+ else -+ taa_mitigation = TAA_MITIGATION_OFF; -+} -+ - #undef pr_fmt - #define pr_fmt(fmt) "Spectre V1 : " fmt - -diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h -index 236582c90d3f..57fd603d367f 100644 ---- a/arch/x86/kernel/cpu/cpu.h -+++ b/arch/x86/kernel/cpu/cpu.h -@@ -52,11 +52,12 @@ enum tsx_ctrl_states { - TSX_CTRL_NOT_SUPPORTED, - }; - --extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; -+extern enum tsx_ctrl_states tsx_ctrl_state; - - extern void __init tsx_init(void); - extern void tsx_enable(void); - extern void tsx_disable(void); -+extern void taa_update_mitigation(bool tsx_enabled); - #else - static inline void tsx_init(void) { } - #endif /* CONFIG_CPU_SUP_INTEL */ -diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c -index e93abe6f0bb9..96320449abb7 100644 ---- a/arch/x86/kernel/cpu/tsx.c -+++ b/arch/x86/kernel/cpu/tsx.c -@@ -10,12 +10,15 @@ - - #include - #include -+#include - - #include - - #include "cpu.h" - --enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; -+static DEFINE_MUTEX(tsx_mutex); -+ -+enum tsx_ctrl_states tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED; - - void tsx_disable(void) - { -@@ -118,3 +121,98 @@ void __init tsx_init(void) - setup_force_cpu_cap(X86_FEATURE_RTM); - } - } -+ -+static void tsx_update_this_cpu(void *arg) -+{ -+ unsigned long enable = (unsigned long)arg; -+ -+ if (enable) -+ tsx_enable(); -+ else -+ tsx_disable(); -+} -+ -+/* Take tsx_mutex lock and update tsx_ctrl_state when calling this function */ -+static void tsx_update_on_each_cpu(bool val) -+{ -+ get_online_cpus(); -+ on_each_cpu(tsx_update_this_cpu, (void *)val, 1); -+ put_online_cpus(); -+} -+ -+ssize_t hw_tx_mem_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ return sprintf(buf, "%d\n", tsx_ctrl_state == TSX_CTRL_ENABLE ? 1 : 0); -+} -+ -+ssize_t hw_tx_mem_store(struct device *dev, struct device_attribute *attr, -+ const char *buf, size_t count) -+{ -+ enum tsx_ctrl_states requested_state; -+ ssize_t ret; -+ bool val; -+ -+ ret = kstrtobool(buf, &val); -+ if (ret) -+ return ret; -+ -+ mutex_lock(&tsx_mutex); -+ -+ if (val) -+ requested_state = TSX_CTRL_ENABLE; -+ else -+ requested_state = TSX_CTRL_DISABLE; -+ -+ /* Current state is same as the requested state, do nothing */ -+ if (tsx_ctrl_state == requested_state) -+ goto exit; -+ -+ tsx_ctrl_state = requested_state; -+ -+ /* -+ * Changing the TSX state from this interface also updates CPUID.RTM -+ * feature bit. From the kernel side, this feature bit doesn't result -+ * in any ALTERNATIVE code patching. No memory allocations are done to -+ * save/restore user state. No code paths in outside of the tests for -+ * vulnerability to TAA are dependent on the value of the feature bit. -+ * In general the kernel doesn't care whether RTM is present or not. -+ * -+ * From the user side it is a bit fuzzier. Applications typically look -+ * at CPUID bits once at startup (or when first calling into a library -+ * that uses the feature). So we have a couple of cases to cover: -+ * -+ * 1) An application started and saw that RTM was enabled, so began -+ * to use it. Then TSX was disabled. Net result in this case is -+ * that the application will keep trying to use RTM, but every -+ * xbegin() will immediately abort the transaction. This has a -+ * performance impact to the application, but it doesn't affect -+ * correctness because all users of RTM must have a fallback path -+ * for when the transaction aborts. Note that even if an application -+ * is in the middle of a transaction when we disable RTM, we are -+ * safe. The XPI that we use to update the TSX_CTRL MSR will abort -+ * the transaction (just as any interrupt would abort a -+ * transaction). -+ * -+ * 2) An application starts and sees RTM is not available. So it will -+ * always use alternative paths. Even if TSX is enabled and RTM is -+ * set, applications in general do not re-evaluate their choice so -+ * will continue to run in non-TSX mode. -+ */ -+ tsx_update_on_each_cpu(val); -+ -+ if (boot_cpu_has_bug(X86_BUG_TAA)) -+ taa_update_mitigation(val); -+exit: -+ mutex_unlock(&tsx_mutex); -+ -+ return count; -+} -+ -+umode_t hw_tx_mem_is_visible(void) -+{ -+ if (tsx_ctrl_state == TSX_CTRL_NOT_SUPPORTED) -+ return 0; -+ -+ return 0644; -+} -diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c -index e9e7fde0fe00..ebc46fd81762 100644 ---- a/drivers/base/cpu.c -+++ b/drivers/base/cpu.c -@@ -458,6 +458,34 @@ struct device *cpu_device_create(struct device *parent, void *drvdata, - } - EXPORT_SYMBOL_GPL(cpu_device_create); - -+ssize_t __weak hw_tx_mem_show(struct device *dev, struct device_attribute *a, -+ char *buf) -+{ -+ return -ENODEV; -+} -+ -+ssize_t __weak hw_tx_mem_store(struct device *dev, struct device_attribute *a, -+ const char *buf, size_t count) -+{ -+ return -ENODEV; -+} -+ -+DEVICE_ATTR_RW(hw_tx_mem); -+ -+umode_t __weak hw_tx_mem_is_visible(void) -+{ -+ return 0; -+} -+ -+static umode_t cpu_root_attrs_is_visible(struct kobject *kobj, -+ struct attribute *attr, int index) -+{ -+ if (attr == &dev_attr_hw_tx_mem.attr) -+ return hw_tx_mem_is_visible(); -+ -+ return attr->mode; -+} -+ - #ifdef CONFIG_GENERIC_CPU_AUTOPROBE - static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL); - #endif -@@ -479,11 +507,13 @@ static struct attribute *cpu_root_attrs[] = { - #ifdef CONFIG_GENERIC_CPU_AUTOPROBE - &dev_attr_modalias.attr, - #endif -+ &dev_attr_hw_tx_mem.attr, - NULL - }; - - static struct attribute_group cpu_root_attr_group = { -- .attrs = cpu_root_attrs, -+ .attrs = cpu_root_attrs, -+ .is_visible = cpu_root_attrs_is_visible, - }; - - static const struct attribute_group *cpu_root_attr_groups[] = { -diff --git a/include/linux/cpu.h b/include/linux/cpu.h -index 9d8dba19844e..7bd8ced5c000 100644 ---- a/include/linux/cpu.h -+++ b/include/linux/cpu.h -@@ -65,6 +65,12 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev, - struct device_attribute *attr, - char *buf); - -+extern ssize_t hw_tx_mem_show(struct device *dev, struct device_attribute *a, -+ char *buf); -+extern ssize_t hw_tx_mem_store(struct device *dev, struct device_attribute *a, -+ const char *buf, size_t count); -+extern umode_t hw_tx_mem_is_visible(void); -+ - extern __printf(4, 5) - struct device *cpu_device_create(struct device *parent, void *drvdata, - const struct attribute_group **groups, diff --git a/debian/patches/bugfix/x86/taa/0021-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch b/debian/patches/bugfix/x86/taa/0021-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch new file mode 100644 index 000000000..e3e1418a4 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0021-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch @@ -0,0 +1,134 @@ +From: Michal Hocko +Date: Wed, 23 Oct 2019 12:35:50 +0200 +Subject: x86/tsx: Add config options to set tsx=on|off|auto + +There is a general consensus that TSX usage is not largely spread while +the history shows there is a non trivial space for side channel attacks +possible. Therefore the tsx is disabled by default even on platforms +that might have a safe implementation of TSX according to the current +knowledge. This is a fair trade off to make. + +There are, however, workloads that really do benefit from using TSX and +updating to a newer kernel with TSX disabled might introduce a +noticeable regressions. This would be especially a problem for Linux +distributions which will provide TAA mitigations. + +Introduce config options X86_INTEL_TSX_MODE_OFF, X86_INTEL_TSX_MODE_ON +and X86_INTEL_TSX_MODE_AUTO to control the TSX feature. The config +setting can be overridden by the tsx cmdline options. + +Suggested-by: Borislav Petkov +Signed-off-by: Michal Hocko +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Thomas Gleixner +Cc: Tony Luck +Cc: x86-ml +--- + arch/x86/Kconfig | 45 +++++++++++++++++++++++++++++++++++++++ + arch/x86/kernel/cpu/tsx.c | 22 +++++++++++++------ + 2 files changed, 61 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index e76d16ac2776..a47343bb439c 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -1903,6 +1903,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS + + If unsure, say y. + ++choice ++ prompt "TSX enable mode" ++ depends on CPU_SUP_INTEL ++ default X86_INTEL_TSX_MODE_OFF ++ help ++ Intel's TSX (Transactional Synchronization Extensions) feature ++ allows to optimize locking protocols through lock elision which ++ can lead to a noticeable performance boost. ++ ++ On the other hand it has been shown that TSX can be exploited ++ to form side channel attacks (e.g. TAA) and chances are there ++ will be more of those attacks discovered in the future. ++ ++ Therefore TSX is not enabled by default (aka tsx=off). An admin ++ might override this decision by tsx=on command line parameter. This ++ has a risk that TSX will get enabled also on platforms which are ++ known to be vulnerable to attacks like TAA and a safer option is to ++ use tsx=auto command line parameter. ++ ++ This options allows to set the default tsx mode between tsx=on, off ++ and auto. See Documentation/admin-guide/kernel-parameters.txt for more ++ details. ++ ++ Say off if not sure, auto if TSX is in use but it should be used on safe ++ platforms or on if TSX is in use and the security aspect of tsx is not ++ relevant. ++ ++config X86_INTEL_TSX_MODE_OFF ++ bool "off" ++ help ++ TSX is always disabled - equals tsx=off command line parameter. ++ ++config X86_INTEL_TSX_MODE_ON ++ bool "on" ++ help ++ TSX is always enabled on TSX capable HW - equals tsx=on command line ++ parameter. ++ ++config X86_INTEL_TSX_MODE_AUTO ++ bool "auto" ++ help ++ TSX is enabled on TSX capable HW that is believed to be safe against ++ side channel attacks- equals tsx=auto command line parameter. ++endchoice ++ + config EFI + bool "EFI runtime service support" + depends on ACPI +diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c +index 89ab91eacd4f..ab400f8bbfe1 100644 +--- a/arch/x86/kernel/cpu/tsx.c ++++ b/arch/x86/kernel/cpu/tsx.c +@@ -67,6 +67,14 @@ static bool __init tsx_ctrl_is_supported(void) + return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); + } + ++static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) ++{ ++ if (boot_cpu_has_bug(X86_BUG_TAA)) ++ return TSX_CTRL_DISABLE; ++ ++ return TSX_CTRL_ENABLE; ++} ++ + void __init tsx_init(void) + { + char arg[5] = {}; +@@ -82,17 +90,19 @@ void __init tsx_init(void) + } else if (!strcmp(arg, "off")) { + tsx_ctrl_state = TSX_CTRL_DISABLE; + } else if (!strcmp(arg, "auto")) { +- if (boot_cpu_has_bug(X86_BUG_TAA)) +- tsx_ctrl_state = TSX_CTRL_DISABLE; +- else +- tsx_ctrl_state = TSX_CTRL_ENABLE; ++ tsx_ctrl_state = x86_get_tsx_auto_mode(); + } else { + tsx_ctrl_state = TSX_CTRL_DISABLE; + pr_err("tsx: invalid option, defaulting to off\n"); + } + } else { +- /* tsx= not provided, defaulting to off */ +- tsx_ctrl_state = TSX_CTRL_DISABLE; ++ /* tsx= not provided */ ++ if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO)) ++ tsx_ctrl_state = x86_get_tsx_auto_mode(); ++ else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF)) ++ tsx_ctrl_state = TSX_CTRL_DISABLE; ++ else ++ tsx_ctrl_state = TSX_CTRL_ENABLE; + } + + if (tsx_ctrl_state == TSX_CTRL_DISABLE) { diff --git a/debian/patches/series b/debian/patches/series index 1ffa003fb..02c30d1c8 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -270,15 +270,15 @@ bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker.patch bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch -bugfix/x86/taa/0013-TAAv6-1.patch -bugfix/x86/taa/0014-TAAv6-2.patch -bugfix/x86/taa/0015-TAAv6-3.patch -bugfix/x86/taa/0016-TAAv6-4.patch -bugfix/x86/taa/0017-TAAv6-5.patch -bugfix/x86/taa/0018-TAAv6-6.patch -bugfix/x86/taa/0019-TAAv6-7.patch -bugfix/x86/taa/0020-TAAv6-8.patch -bugfix/x86/taa/0021-TAAv6-9.patch +bugfix/x86/taa/0013-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch +bugfix/x86/taa/0014-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch +bugfix/x86/taa/0015-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch +bugfix/x86/taa/0016-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch +bugfix/x86/taa/0017-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch +bugfix/x86/taa/0018-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch +bugfix/x86/taa/0019-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch +bugfix/x86/taa/0020-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch +bugfix/x86/taa/0021-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch # ABI maintenance debian/abi/powerpc-avoid-abi-change-for-disabling-tm.patch From cd92ab49c40abe28c5f539d16d892e71eff79b14 Mon Sep 17 00:00:00 2001 From: Salvatore Bonaccorso Date: Thu, 7 Nov 2019 17:10:09 +0100 Subject: [PATCH 06/13] KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active --- debian/changelog | 1 + ...ys-run-with-EFER.NXE-1-when-shadow-p.patch | 68 +++++++++++++++++++ debian/patches/series | 1 + 3 files changed, 70 insertions(+) create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0009-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch diff --git a/debian/changelog b/debian/changelog index baf2fb278..dfb3dd632 100644 --- a/debian/changelog +++ b/debian/changelog @@ -10,6 +10,7 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium - KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON - KVM: x86: add tracepoints around __direct_map and FNAME(fetch) - kvm: x86, powerpc: do not allow clearing largepages debugfs entry + - KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active - x86: Add ITLB_MULTIHIT bug infrastructure - kvm: mmu: ITLB_MULTIHIT mitigation - kvm: Add helper function for creating VM worker threads diff --git a/debian/patches/bugfix/x86/itlb_multihit/0009-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch b/debian/patches/bugfix/x86/itlb_multihit/0009-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch new file mode 100644 index 000000000..cb958309d --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0009-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch @@ -0,0 +1,68 @@ +From: Paolo Bonzini +Date: Sun, 27 Oct 2019 16:23:23 +0100 +Subject: KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is + active +Origin: https://git.kernel.org/linus/9167ab79936206118cc60e47dcb926c3489f3bd5 + +VMX already does so if the host has SMEP, in order to support the combination of +CR0.WP=1 and CR4.SMEP=1. However, it is perfectly safe to always do so, and in +fact VMX already ends up running with EFER.NXE=1 on old processors that lack the +"load EFER" controls, because it may help avoiding a slow MSR write. Removing +all the conditionals simplifies the code. + +SVM does not have similar code, but it should since recent AMD processors do +support SMEP. So this patch also makes the code for the two vendors more similar +while fixing NPT=0, CR0.WP=1 and CR4.SMEP=1 on AMD processors. + +Cc: stable@vger.kernel.org +Cc: Joerg Roedel +Signed-off-by: Paolo Bonzini +[Salvatore Bonaccorso: Backport to 4.19: Adjust context, filename change back +to arch/x86/kvm/vmx.c] +--- + arch/x86/kvm/svm.c | 10 ++++++++-- + arch/x86/kvm/vmx.c | 14 +++----------- + 2 files changed, 11 insertions(+), 13 deletions(-) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -736,8 +736,14 @@ static int get_npt_level(struct kvm_vcpu + static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) + { + vcpu->arch.efer = efer; +- if (!npt_enabled && !(efer & EFER_LMA)) +- efer &= ~EFER_LME; ++ ++ if (!npt_enabled) { ++ /* Shadow paging assumes NX to be available. */ ++ efer |= EFER_NX; ++ ++ if (!(efer & EFER_LMA)) ++ efer &= ~EFER_LME; ++ } + + to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; + mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -2785,17 +2785,9 @@ static bool update_transition_efer(struc + u64 guest_efer = vmx->vcpu.arch.efer; + u64 ignore_bits = 0; + +- if (!enable_ept) { +- /* +- * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing +- * host CPUID is more efficient than testing guest CPUID +- * or CR4. Host SMEP is anyway a requirement for guest SMEP. +- */ +- if (boot_cpu_has(X86_FEATURE_SMEP)) +- guest_efer |= EFER_NX; +- else if (!(guest_efer & EFER_NX)) +- ignore_bits |= EFER_NX; +- } ++ /* Shadow paging assumes NX to be available. */ ++ if (!enable_ept) ++ guest_efer |= EFER_NX; + + /* + * LMA and LME handled by hardware; SCE meaningless outside long mode. diff --git a/debian/patches/series b/debian/patches/series index 02c30d1c8..30e12173f 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -266,6 +266,7 @@ bugfix/x86/itlb_multihit/0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustmen bugfix/x86/itlb_multihit/0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch bugfix/x86/itlb_multihit/0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch bugfix/x86/itlb_multihit/0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch +bugfix/x86/itlb_multihit/0009-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker.patch From 37baed7166355b369cb481236825143aabb02dd3 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 7 Nov 2019 18:04:08 +0000 Subject: [PATCH 07/13] [x86] Update TAA (Borislav v2) and NX (v9) fixes The upstream commits for these are now finalised, so we shouldn't need to replace patches after this (but might need to add more). --- debian/changelog | 32 +-- ...kvm_mmu_page-member-to-save-8-bytes.patch} | 2 - ...011-kvm-Convert-kvm_lock-to-a-mutex.patch} | 48 ++--- ...elease-the-page-inside-mmu_set_spte.patch} | 24 +-- ...E-fetch-and-__direct_map-more-simil.patch} | 16 +- ...ow-unneeded-hugepage-gfn-adjustment.patch} | 14 +- ...m_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch} | 4 +- ...points-around-__direct_map-and-FNAM.patch} | 14 +- ...o-not-allow-clearing-largepages-deb.patch} | 14 +- ...s-run-with-EFER.NXE-1-when-shadow-p.patch} | 12 +- ...dd-ITLB_MULTIHIT-bug-infrastructure.patch} | 132 +++++------- ...Uninline-and-export-CPU-mitigations-.patch | 98 +++++++++ ...21-kvm-mmu-ITLB_MULTIHIT-mitigation.patch} | 139 +++++++------ ...nction-for-creating-VM-worker-threa.patch} | 19 +- ...ecovery-of-shattered-NX-large-pages.patch} | 73 +++---- ...tion-Add-ITLB_MULTIHIT-documentation.patch | 194 ++++++++++++++++++ ...1-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch} | 32 +-- ...lper-function-x86_read_arch_cap_msr.patch} | 52 +++-- ...-cmdline-option-with-TSX-disabled-b.patch} | 94 ++++----- ...aa-Add-mitigation-for-TSX-Async-Abo.patch} | 64 ++---- ...aa-Add-sysfs-reporting-for-TSX-Asyn.patch} | 51 ++--- ...S_NO-0-to-guests-when-TSX-is-enable.patch} | 20 +- ...option-to-the-tsx-cmdline-parameter.patch} | 39 ++-- ...aa-Add-documentation-for-TSX-Async-.patch} | 103 ++++++---- ...nfig-options-to-set-tsx-on-off-auto.patch} | 40 ++-- debian/patches/series | 46 +++-- 26 files changed, 785 insertions(+), 591 deletions(-) rename debian/patches/bugfix/x86/itlb_multihit/{0001-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch => 0010-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch} (93%) rename debian/patches/bugfix/x86/itlb_multihit/{0002-kvm-Convert-kvm_lock-to-a-mutex.patch => 0011-kvm-Convert-kvm_lock-to-a-mutex.patch} (76%) rename debian/patches/bugfix/x86/itlb_multihit/{0003-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch => 0012-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch} (76%) rename debian/patches/bugfix/x86/itlb_multihit/{0004-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch => 0013-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch} (88%) rename debian/patches/bugfix/x86/itlb_multihit/{0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch => 0014-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch} (78%) rename debian/patches/bugfix/x86/itlb_multihit/{0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch => 0015-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch} (90%) rename debian/patches/bugfix/x86/itlb_multihit/{0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch => 0016-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch} (85%) rename debian/patches/bugfix/x86/itlb_multihit/{0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch => 0017-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch} (83%) rename debian/patches/bugfix/x86/itlb_multihit/{0009-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch => 0018-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch} (87%) rename debian/patches/bugfix/x86/itlb_multihit/{0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch => 0019-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch} (65%) create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0020-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch rename debian/patches/bugfix/x86/itlb_multihit/{0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch => 0021-kvm-mmu-ITLB_MULTIHIT-mitigation.patch} (76%) rename debian/patches/bugfix/x86/itlb_multihit/{0011-kvm-Add-helper-function-for-creating-VM-worker.patch => 0022-kvm-Add-helper-function-for-creating-VM-worker-threa.patch} (84%) rename debian/patches/bugfix/x86/itlb_multihit/{0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch => 0023-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch} (80%) create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0024-Documentation-Add-ITLB_MULTIHIT-documentation.patch rename debian/patches/bugfix/x86/taa/{0013-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch => 0001-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch} (75%) rename debian/patches/bugfix/x86/taa/{0014-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch => 0002-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch} (50%) rename debian/patches/bugfix/x86/taa/{0015-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch => 0003-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch} (71%) rename debian/patches/bugfix/x86/taa/{0016-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch => 0004-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch} (81%) rename debian/patches/bugfix/x86/taa/{0017-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch => 0005-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch} (63%) rename debian/patches/bugfix/x86/taa/{0018-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch => 0006-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch} (80%) rename debian/patches/bugfix/x86/taa/{0019-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch => 0007-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch} (58%) rename debian/patches/bugfix/x86/taa/{0020-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch => 0008-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch} (87%) rename debian/patches/bugfix/x86/taa/{0021-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch => 0009-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch} (76%) diff --git a/debian/changelog b/debian/changelog index dfb3dd632..e8b1dae6a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,20 +1,5 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium - * [x86] KVM: Add mitigation for Machine Check Error on Page Size Change - (aka iTLB multi-hit, CVE-2018-12207): - - KVM: x86: adjust kvm_mmu_page member to save 8 bytes - - kvm: Convert kvm_lock to a mutex - - kvm: x86: Do not release the page inside mmu_set_spte() - - KVM: x86: make FNAME(fetch) and __direct_map more similar - - KVM: x86: remove now unneeded hugepage gfn adjustment - - KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON - - KVM: x86: add tracepoints around __direct_map and FNAME(fetch) - - kvm: x86, powerpc: do not allow clearing largepages debugfs entry - - KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active - - x86: Add ITLB_MULTIHIT bug infrastructure - - kvm: mmu: ITLB_MULTIHIT mitigation - - kvm: Add helper function for creating VM worker threads - - kvm: x86: mmu: Recovery of shattered NX large pages * [x86] Add mitigation for TSX Asynchronous Abort (CVE-2019-11135): - x86/msr: Add the IA32_TSX_CTRL MSR - x86/cpu: Add a helper function x86_read_arch_cap_msr() @@ -27,6 +12,23 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium - x86/tsx: Add config options to set tsx=on|off|auto TSX is now disabled by default; see Documentation/admin-guide/hw-vuln/tsx_async_abort.rst + * [x86] KVM: Add mitigation for Machine Check Error on Page Size Change + (aka iTLB multi-hit, CVE-2018-12207): + - KVM: x86: adjust kvm_mmu_page member to save 8 bytes + - kvm: Convert kvm_lock to a mutex + - kvm: x86: Do not release the page inside mmu_set_spte() + - KVM: x86: make FNAME(fetch) and __direct_map more similar + - KVM: x86: remove now unneeded hugepage gfn adjustment + - KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON + - KVM: x86: add tracepoints around __direct_map and FNAME(fetch) + - kvm: x86, powerpc: do not allow clearing largepages debugfs entry + - KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active + - x86/bugs: Add ITLB_MULTIHIT bug infrastructure + - cpu/speculation: Uninline and export CPU mitigations helpers + - kvm: mmu: ITLB_MULTIHIT mitigation + - kvm: Add helper function for creating VM worker threads + - kvm: x86: mmu: Recovery of shattered NX large pages + - Documentation: Add ITLB_MULTIHIT documentation -- Ben Hutchings Sun, 20 Oct 2019 14:21:28 +0100 diff --git a/debian/patches/bugfix/x86/itlb_multihit/0001-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch b/debian/patches/bugfix/x86/itlb_multihit/0010-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch similarity index 93% rename from debian/patches/bugfix/x86/itlb_multihit/0001-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch rename to debian/patches/bugfix/x86/itlb_multihit/0010-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch index 3a332cb2a..42cd1c622 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0001-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0010-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch @@ -27,8 +27,6 @@ Signed-off-by: Ben Hutchings arch/x86/include/asm/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h -index 0d3f5cf3ff3e..90dccb5c79d9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -281,18 +281,18 @@ struct kvm_rmap_head { diff --git a/debian/patches/bugfix/x86/itlb_multihit/0002-kvm-Convert-kvm_lock-to-a-mutex.patch b/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Convert-kvm_lock-to-a-mutex.patch similarity index 76% rename from debian/patches/bugfix/x86/itlb_multihit/0002-kvm-Convert-kvm_lock-to-a-mutex.patch rename to debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Convert-kvm_lock-to-a-mutex.patch index d1f52e63d..53466c0e4 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0002-kvm-Convert-kvm_lock-to-a-mutex.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Convert-kvm_lock-to-a-mutex.patch @@ -21,11 +21,9 @@ Signed-off-by: Ben Hutchings virt/kvm/kvm_main.c | 30 +++++++++++++-------------- 6 files changed, 28 insertions(+), 30 deletions(-) -diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt -index 1bb8bcaf8497..635cd6eaf714 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt -@@ -15,8 +15,6 @@ KVM Lock Overview +@@ -15,8 +15,6 @@ The acquisition orders for mutexes are a On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. @@ -34,7 +32,7 @@ index 1bb8bcaf8497..635cd6eaf714 100644 Everything else is a leaf: no other lock is taken inside the critical sections. -@@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above. +@@ -169,7 +167,7 @@ which time it will be set using the Dirt ------------ Name: kvm_lock @@ -43,11 +41,9 @@ index 1bb8bcaf8497..635cd6eaf714 100644 Arch: any Protects: - vm_list -diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c -index fac1d4eaa426..3c317bc6b799 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c -@@ -2110,13 +2110,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +@@ -2108,13 +2108,13 @@ int kvm_arch_init_vm(struct kvm *kvm, un kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); if (!kvm->arch.sca) goto out_err; @@ -63,11 +59,9 @@ index fac1d4eaa426..3c317bc6b799 100644 sprintf(debug_name, "kvm-%u", current->pid); -diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c -index 88940261fb53..c9d4e02bd73a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c -@@ -5819,7 +5819,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) +@@ -5819,7 +5819,7 @@ mmu_shrink_scan(struct shrinker *shrink, int nr_to_scan = sc->nr_to_scan; unsigned long freed = 0; @@ -76,7 +70,7 @@ index 88940261fb53..c9d4e02bd73a 100644 list_for_each_entry(kvm, &vm_list, vm_list) { int idx; -@@ -5869,7 +5869,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) +@@ -5869,7 +5869,7 @@ unlock: break; } @@ -85,11 +79,9 @@ index 88940261fb53..c9d4e02bd73a 100644 return freed; } -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 6ae8a013af31..0c085b895e6e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -6502,7 +6502,7 @@ static void kvm_hyperv_tsc_notifier(void) +@@ -6490,7 +6490,7 @@ static void kvm_hyperv_tsc_notifier(void struct kvm_vcpu *vcpu; int cpu; @@ -98,7 +90,7 @@ index 6ae8a013af31..0c085b895e6e 100644 list_for_each_entry(kvm, &vm_list, vm_list) kvm_make_mclock_inprogress_request(kvm); -@@ -6528,7 +6528,7 @@ static void kvm_hyperv_tsc_notifier(void) +@@ -6516,7 +6516,7 @@ static void kvm_hyperv_tsc_notifier(void spin_unlock(&ka->pvclock_gtod_sync_lock); } @@ -107,7 +99,7 @@ index 6ae8a013af31..0c085b895e6e 100644 } #endif -@@ -6586,17 +6586,17 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va +@@ -6574,17 +6574,17 @@ static int kvmclock_cpufreq_notifier(str smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); @@ -128,7 +120,7 @@ index 6ae8a013af31..0c085b895e6e 100644 if (freq->old < freq->new && send_ipi) { /* -@@ -6722,12 +6722,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) +@@ -6710,12 +6710,12 @@ static void pvclock_gtod_update_fn(struc struct kvm_vcpu *vcpu; int i; @@ -143,11 +135,9 @@ index 6ae8a013af31..0c085b895e6e 100644 } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); -diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h -index d42a36e4e6c2..5246a480d15a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h -@@ -141,7 +141,7 @@ static inline bool is_error_page(struct page *page) +@@ -141,7 +141,7 @@ static inline bool is_error_page(struct extern struct kmem_cache *kvm_vcpu_cache; @@ -156,8 +146,6 @@ index d42a36e4e6c2..5246a480d15a 100644 extern struct list_head vm_list; struct kvm_io_range { -diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index 4a584a575221..6a8fe26198b9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); @@ -169,7 +157,7 @@ index 4a584a575221..6a8fe26198b9 100644 static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); -@@ -684,9 +684,9 @@ static struct kvm *kvm_create_vm(unsigned long type) +@@ -684,9 +684,9 @@ static struct kvm *kvm_create_vm(unsigne if (r) goto out_err; @@ -181,7 +169,7 @@ index 4a584a575221..6a8fe26198b9 100644 preempt_notifier_inc(); -@@ -732,9 +732,9 @@ static void kvm_destroy_vm(struct kvm *kvm) +@@ -732,9 +732,9 @@ static void kvm_destroy_vm(struct kvm *k kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); kvm_destroy_vm_debugfs(kvm); kvm_arch_sync_events(kvm); @@ -193,7 +181,7 @@ index 4a584a575221..6a8fe26198b9 100644 kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { struct kvm_io_bus *bus = kvm_get_bus(kvm, i); -@@ -3828,13 +3828,13 @@ static int vm_stat_get(void *_offset, u64 *val) +@@ -3828,13 +3828,13 @@ static int vm_stat_get(void *_offset, u6 u64 tmp_val; *val = 0; @@ -209,7 +197,7 @@ index 4a584a575221..6a8fe26198b9 100644 return 0; } -@@ -3847,12 +3847,12 @@ static int vm_stat_clear(void *_offset, u64 val) +@@ -3847,12 +3847,12 @@ static int vm_stat_clear(void *_offset, if (val) return -EINVAL; @@ -224,7 +212,7 @@ index 4a584a575221..6a8fe26198b9 100644 return 0; } -@@ -3867,13 +3867,13 @@ static int vcpu_stat_get(void *_offset, u64 *val) +@@ -3867,13 +3867,13 @@ static int vcpu_stat_get(void *_offset, u64 tmp_val; *val = 0; @@ -240,7 +228,7 @@ index 4a584a575221..6a8fe26198b9 100644 return 0; } -@@ -3886,12 +3886,12 @@ static int vcpu_stat_clear(void *_offset, u64 val) +@@ -3886,12 +3886,12 @@ static int vcpu_stat_clear(void *_offset if (val) return -EINVAL; @@ -255,7 +243,7 @@ index 4a584a575221..6a8fe26198b9 100644 return 0; } -@@ -3912,7 +3912,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) +@@ -3912,7 +3912,7 @@ static void kvm_uevent_notify_change(uns if (!kvm_dev.this_device || !kvm) return; @@ -264,7 +252,7 @@ index 4a584a575221..6a8fe26198b9 100644 if (type == KVM_EVENT_CREATE_VM) { kvm_createvm_count++; kvm_active_vms++; -@@ -3921,7 +3921,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) +@@ -3921,7 +3921,7 @@ static void kvm_uevent_notify_change(uns } created = kvm_createvm_count; active = kvm_active_vms; diff --git a/debian/patches/bugfix/x86/itlb_multihit/0003-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch b/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch similarity index 76% rename from debian/patches/bugfix/x86/itlb_multihit/0003-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch rename to debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch index 51cb71d1f..a5373b806 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0003-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch @@ -17,11 +17,9 @@ Signed-off-by: Ben Hutchings arch/x86/kvm/paging_tmpl.h | 8 +++----- 2 files changed, 10 insertions(+), 16 deletions(-) -diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c -index c9d4e02bd73a..7dc18fb42168 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c -@@ -3001,8 +3001,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, +@@ -3001,8 +3001,6 @@ static int mmu_set_spte(struct kvm_vcpu } } @@ -30,7 +28,7 @@ index c9d4e02bd73a..7dc18fb42168 100644 return ret; } -@@ -3037,9 +3035,11 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, +@@ -3037,9 +3035,11 @@ static int direct_pte_prefetch_many(stru if (ret <= 0) return -1; @@ -43,7 +41,7 @@ index c9d4e02bd73a..7dc18fb42168 100644 return 0; } -@@ -3445,6 +3445,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, +@@ -3445,6 +3445,7 @@ static int nonpaging_map(struct kvm_vcpu if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) return r; @@ -51,7 +49,7 @@ index c9d4e02bd73a..7dc18fb42168 100644 spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; -@@ -3453,14 +3454,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, +@@ -3453,14 +3454,11 @@ static int nonpaging_map(struct kvm_vcpu if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); @@ -67,7 +65,7 @@ index c9d4e02bd73a..7dc18fb42168 100644 } static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, -@@ -4082,6 +4080,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +@@ -4082,6 +4080,7 @@ static int tdp_page_fault(struct kvm_vcp if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) return r; @@ -75,7 +73,7 @@ index c9d4e02bd73a..7dc18fb42168 100644 spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; -@@ -4090,14 +4089,11 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +@@ -4090,14 +4089,11 @@ static int tdp_page_fault(struct kvm_vcp if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); @@ -91,11 +89,9 @@ index c9d4e02bd73a..7dc18fb42168 100644 } static void nonpaging_init_context(struct kvm_vcpu *vcpu, -diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h -index 14ffd973df54..569c55dae3fa 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h -@@ -522,6 +522,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, +@@ -522,6 +522,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vc mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); @@ -103,7 +99,7 @@ index 14ffd973df54..569c55dae3fa 100644 return true; } -@@ -673,7 +674,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, +@@ -673,7 +674,6 @@ static int FNAME(fetch)(struct kvm_vcpu return ret; out_gpte_changed: @@ -111,7 +107,7 @@ index 14ffd973df54..569c55dae3fa 100644 return RET_PF_RETRY; } -@@ -821,6 +821,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, +@@ -821,6 +821,7 @@ static int FNAME(page_fault)(struct kvm_ walker.pte_access &= ~ACC_EXEC_MASK; } @@ -119,7 +115,7 @@ index 14ffd973df54..569c55dae3fa 100644 spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; -@@ -834,14 +835,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, +@@ -834,14 +835,11 @@ static int FNAME(page_fault)(struct kvm_ level, pfn, map_writable, prefault); ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); diff --git a/debian/patches/bugfix/x86/itlb_multihit/0004-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch b/debian/patches/bugfix/x86/itlb_multihit/0013-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch similarity index 88% rename from debian/patches/bugfix/x86/itlb_multihit/0004-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch rename to debian/patches/bugfix/x86/itlb_multihit/0013-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch index 436fb76b1..143dd440a 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0004-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0013-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch @@ -18,11 +18,9 @@ Signed-off-by: Ben Hutchings arch/x86/kvm/paging_tmpl.h | 30 ++++++++++----------- 2 files changed, 39 insertions(+), 44 deletions(-) -diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c -index 7dc18fb42168..42a7120323bb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c -@@ -3087,40 +3087,39 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) +@@ -3087,40 +3087,39 @@ static void direct_pte_prefetch(struct k __direct_pte_prefetch(vcpu, sp, sptep); } @@ -86,7 +84,7 @@ index 7dc18fb42168..42a7120323bb 100644 } static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) -@@ -3453,8 +3452,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, +@@ -3453,8 +3452,7 @@ static int nonpaging_map(struct kvm_vcpu goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); @@ -96,7 +94,7 @@ index 7dc18fb42168..42a7120323bb 100644 out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); -@@ -4088,8 +4086,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +@@ -4088,8 +4086,7 @@ static int tdp_page_fault(struct kvm_vcp goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); @@ -106,11 +104,9 @@ index 7dc18fb42168..42a7120323bb 100644 out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); -diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h -index 569c55dae3fa..eb95d3672acd 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h -@@ -602,6 +602,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, +@@ -602,6 +602,7 @@ static int FNAME(fetch)(struct kvm_vcpu struct kvm_shadow_walk_iterator it; unsigned direct_access, access = gw->pt_access; int top_level, ret; @@ -118,7 +114,7 @@ index 569c55dae3fa..eb95d3672acd 100644 direct_access = gw->pte_access; -@@ -646,31 +647,29 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, +@@ -646,31 +647,29 @@ static int FNAME(fetch)(struct kvm_vcpu link_shadow_page(vcpu, it.sptep, sp); } @@ -163,7 +159,7 @@ index 569c55dae3fa..eb95d3672acd 100644 return ret; out_gpte_changed: -@@ -833,7 +832,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, +@@ -833,7 +832,6 @@ static int FNAME(page_fault)(struct kvm_ transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, level, pfn, map_writable, prefault); diff --git a/debian/patches/bugfix/x86/itlb_multihit/0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch b/debian/patches/bugfix/x86/itlb_multihit/0014-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch similarity index 78% rename from debian/patches/bugfix/x86/itlb_multihit/0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch rename to debian/patches/bugfix/x86/itlb_multihit/0014-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch index 37aebee2b..89bb42d1b 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0014-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch @@ -15,11 +15,9 @@ Signed-off-by: Ben Hutchings arch/x86/kvm/paging_tmpl.h | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) -diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c -index 42a7120323bb..96803f996819 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c -@@ -3155,11 +3155,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) +@@ -3155,11 +3155,10 @@ static int kvm_handle_bad_page(struct kv } static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, @@ -32,7 +30,7 @@ index 42a7120323bb..96803f996819 100644 int level = *levelp; /* -@@ -3186,8 +3185,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, +@@ -3186,8 +3185,6 @@ static void transparent_hugepage_adjust( mask = KVM_PAGES_PER_HPAGE(level) - 1; VM_BUG_ON((gfn & mask) != (pfn & mask)); if (pfn & mask) { @@ -41,7 +39,7 @@ index 42a7120323bb..96803f996819 100644 kvm_release_pfn_clean(pfn); pfn &= ~mask; kvm_get_pfn(pfn); -@@ -3451,7 +3448,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, +@@ -3451,7 +3448,7 @@ static int nonpaging_map(struct kvm_vcpu if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; if (likely(!force_pt_level)) @@ -50,7 +48,7 @@ index 42a7120323bb..96803f996819 100644 r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); -@@ -4085,7 +4082,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +@@ -4085,7 +4082,7 @@ static int tdp_page_fault(struct kvm_vcp if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; if (likely(!force_pt_level)) @@ -59,11 +57,9 @@ index 42a7120323bb..96803f996819 100644 r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); -diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h -index eb95d3672acd..4aab953f1d31 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h -@@ -829,7 +829,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, +@@ -829,7 +829,7 @@ static int FNAME(page_fault)(struct kvm_ if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; if (!force_pt_level) diff --git a/debian/patches/bugfix/x86/itlb_multihit/0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch b/debian/patches/bugfix/x86/itlb_multihit/0015-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch similarity index 90% rename from debian/patches/bugfix/x86/itlb_multihit/0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch rename to debian/patches/bugfix/x86/itlb_multihit/0015-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch index 58cd52ba5..41ccc7290 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0015-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch @@ -15,11 +15,9 @@ Signed-off-by: Ben Hutchings arch/x86/kvm/mmu.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) -diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c -index 96803f996819..68fa10d890ee 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c -@@ -1027,10 +1027,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) +@@ -1027,10 +1027,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) { diff --git a/debian/patches/bugfix/x86/itlb_multihit/0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch b/debian/patches/bugfix/x86/itlb_multihit/0016-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch similarity index 85% rename from debian/patches/bugfix/x86/itlb_multihit/0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch rename to debian/patches/bugfix/x86/itlb_multihit/0016-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch index ce11a4504..d7369b867 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0016-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch @@ -15,8 +15,6 @@ Signed-off-by: Ben Hutchings arch/x86/kvm/paging_tmpl.h | 2 ++ 3 files changed, 67 insertions(+), 7 deletions(-) -diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c -index 68fa10d890ee..7f9be921df7c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -140,9 +140,6 @@ module_param(dbg, bool, 0644); @@ -29,7 +27,7 @@ index 68fa10d890ee..7f9be921df7c 100644 #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) -@@ -261,9 +258,13 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; +@@ -261,9 +258,13 @@ static u64 __read_mostly shadow_nonprese static void mmu_spte_set(u64 *sptep, u64 spte); @@ -43,7 +41,7 @@ index 68fa10d890ee..7f9be921df7c 100644 void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) { BUG_ON((mmio_mask & mmio_value) != mmio_value); -@@ -2992,10 +2993,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, +@@ -2992,10 +2993,7 @@ static int mmu_set_spte(struct kvm_vcpu ret = RET_PF_EMULATE; pgprintk("%s: setting spte %llx\n", __func__, *sptep); @@ -55,7 +53,7 @@ index 68fa10d890ee..7f9be921df7c 100644 if (!was_rmapped && is_large_pte(*sptep)) ++vcpu->kvm->stat.lpages; -@@ -3106,6 +3104,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, +@@ -3106,6 +3104,7 @@ static int __direct_map(struct kvm_vcpu if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return RET_PF_RETRY; @@ -63,8 +61,6 @@ index 68fa10d890ee..7f9be921df7c 100644 for_each_shadow_entry(vcpu, gpa, it) { base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); if (it.level == level) -diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h -index c73bf4e4988c..918b0d5bf272 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -325,6 +325,65 @@ TRACE_EVENT( @@ -133,11 +129,9 @@ index c73bf4e4988c..918b0d5bf272 100644 #endif /* _TRACE_KVMMMU_H */ #undef TRACE_INCLUDE_PATH -diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h -index 4aab953f1d31..3b022b08b577 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h -@@ -649,6 +649,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, +@@ -649,6 +649,8 @@ static int FNAME(fetch)(struct kvm_vcpu base_gfn = gw->gfn; diff --git a/debian/patches/bugfix/x86/itlb_multihit/0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch b/debian/patches/bugfix/x86/itlb_multihit/0017-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch similarity index 83% rename from debian/patches/bugfix/x86/itlb_multihit/0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch rename to debian/patches/bugfix/x86/itlb_multihit/0017-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch index 186eef648..625e3daf3 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0017-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch @@ -19,11 +19,9 @@ Signed-off-by: Ben Hutchings virt/kvm/kvm_main.c | 10 +++++++--- 3 files changed, 12 insertions(+), 6 deletions(-) -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 0c085b895e6e..2714c1a0e59f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); +@@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif @@ -34,7 +32,7 @@ index 0c085b895e6e..2714c1a0e59f 100644 #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) -@@ -205,7 +205,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { +@@ -205,7 +205,7 @@ struct kvm_stats_debugfs_item debugfs_en { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_unsync", VM_STAT(mmu_unsync) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, @@ -43,8 +41,6 @@ index 0c085b895e6e..2714c1a0e59f 100644 { "max_mmu_page_hash_collisions", VM_STAT(max_mmu_page_hash_collisions) }, { NULL } -diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h -index 5246a480d15a..553a3115a735 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1034,6 +1034,7 @@ enum kvm_stat_kind { @@ -63,11 +59,9 @@ index 5246a480d15a..553a3115a735 100644 }; extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; -diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index 6a8fe26198b9..5482949b452c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c -@@ -616,8 +616,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) +@@ -616,8 +616,9 @@ static int kvm_create_vm_debugfs(struct stat_data->kvm = kvm; stat_data->offset = p->offset; @@ -78,7 +72,7 @@ index 6a8fe26198b9..5482949b452c 100644 stat_data, stat_fops_per_vm[p->kind]); } return 0; -@@ -3714,7 +3715,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, +@@ -3714,7 +3715,9 @@ static int kvm_debugfs_open(struct inode if (!refcount_inc_not_zero(&stat_data->kvm->users_count)) return -ENOENT; diff --git a/debian/patches/bugfix/x86/itlb_multihit/0009-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch b/debian/patches/bugfix/x86/itlb_multihit/0018-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch similarity index 87% rename from debian/patches/bugfix/x86/itlb_multihit/0009-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch rename to debian/patches/bugfix/x86/itlb_multihit/0018-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch index cb958309d..b4b534e23 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0009-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0018-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch @@ -2,7 +2,8 @@ From: Paolo Bonzini Date: Sun, 27 Oct 2019 16:23:23 +0100 Subject: KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active -Origin: https://git.kernel.org/linus/9167ab79936206118cc60e47dcb926c3489f3bd5 + +commit 9167ab79936206118cc60e47dcb926c3489f3bd5 upstream. VMX already does so if the host has SMEP, in order to support the combination of CR0.WP=1 and CR4.SMEP=1. However, it is perfectly safe to always do so, and in @@ -14,14 +15,13 @@ SVM does not have similar code, but it should since recent AMD processors do support SMEP. So this patch also makes the code for the two vendors more similar while fixing NPT=0, CR0.WP=1 and CR4.SMEP=1 on AMD processors. -Cc: stable@vger.kernel.org Cc: Joerg Roedel Signed-off-by: Paolo Bonzini -[Salvatore Bonaccorso: Backport to 4.19: Adjust context, filename change back -to arch/x86/kvm/vmx.c] +[bwh: Backported to 4.19: adjust filename] +Signed-off-by: Ben Hutchings --- - arch/x86/kvm/svm.c | 10 ++++++++-- - arch/x86/kvm/vmx.c | 14 +++----------- + arch/x86/kvm/svm.c | 10 ++++++++-- + arch/x86/kvm/vmx.c | 14 +++----------- 2 files changed, 11 insertions(+), 13 deletions(-) --- a/arch/x86/kvm/svm.c diff --git a/debian/patches/bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch b/debian/patches/bugfix/x86/itlb_multihit/0019-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch similarity index 65% rename from debian/patches/bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch rename to debian/patches/bugfix/x86/itlb_multihit/0019-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch index bbbf5f225..95c177c1a 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0019-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch @@ -1,36 +1,35 @@ -From: Pawan Gupta -Date: Thu, 24 Oct 2019 18:34:26 +0200 -Subject: x86: Add ITLB_MULTIHIT bug infrastructure +From: Vineela Tummalapalli +Date: Mon, 4 Nov 2019 12:22:01 +0100 +Subject: x86/bugs: Add ITLB_MULTIHIT bug infrastructure -Some processors may incur a machine check error possibly -resulting in an unrecoverable cpu hang when an instruction fetch -encounters a TLB multi-hit in the instruction TLB. This can occur -when the page size is changed along with either the physical -address or cache type [1]. +commit db4d30fbb71b47e4ecb11c4efa5d8aad4b03dfae upstream. + +Some processors may incur a machine check error possibly resulting in an +unrecoverable CPU lockup when an instruction fetch encounters a TLB +multi-hit in the instruction TLB. This can occur when the page size is +changed along with either the physical address or cache type. The relevant +erratum can be found here: + + https://bugzilla.kernel.org/show_bug.cgi?id=205195 + +There are other processors affected for which the erratum does not fully +disclose the impact. This issue affects both bare-metal x86 page tables and EPT. -This can be mitigated by either eliminating the use of large -pages or by using careful TLB invalidations when changing the -page size in the page tables. +It can be mitigated by either eliminating the use of large pages or by +using careful TLB invalidations when changing the page size in the page +tables. -Just like Spectre, Meltdown, L1TF and MDS, a new bit has been -allocated in MSR_IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) and will -be set on CPUs which are mitigated against this issue. - -[1] For example please refer to erratum SKL002 in "6th Generation -Intel Processor Family Specification Update" -https://www.intel.com/content/www/us/en/products/docs/processors/core/desktop-6th-gen-core-family-spec-update.html -https://www.google.com/search?q=site:intel.com+SKL002 - -There are a lot of other affected processors outside of Skylake and -that the erratum(referred above) does not fully disclose the issue -and the impact, both on Skylake and across all the affected CPUs. +Just like Spectre, Meltdown, L1TF and MDS, a new bit has been allocated in +MSR_IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) and will be set on CPUs which +are mitigated against this issue. Signed-off-by: Vineela Tummalapalli Co-developed-by: Pawan Gupta Signed-off-by: Pawan Gupta Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner [bwh: Backported to 4.19: - No support for X86_VENDOR_HYGON, ATOM_AIRMONT_NP - Adjust context] @@ -38,38 +37,32 @@ Signed-off-by: Ben Hutchings --- .../ABI/testing/sysfs-devices-system-cpu | 1 + arch/x86/include/asm/cpufeatures.h | 1 + - arch/x86/include/asm/msr-index.h | 7 ++ + arch/x86/include/asm/msr-index.h | 7 +++ arch/x86/kernel/cpu/bugs.c | 13 ++++ - arch/x86/kernel/cpu/common.c | 67 ++++++++++--------- + arch/x86/kernel/cpu/common.c | 61 ++++++++++--------- drivers/base/cpu.c | 8 +++ include/linux/cpu.h | 2 + - 7 files changed, 68 insertions(+), 31 deletions(-) + 7 files changed, 65 insertions(+), 28 deletions(-) -diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu -index 8718d4ad227b..a0edcdc7c0b8 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu -@@ -478,6 +478,7 @@ What: /sys/devices/system/cpu/vulnerabilities - /sys/devices/system/cpu/vulnerabilities/spec_store_bypass +@@ -479,6 +479,7 @@ What: /sys/devices/system/cpu/vulnerabi /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds + /sys/devices/system/cpu/vulnerabilities/tsx_async_abort + /sys/devices/system/cpu/vulnerabilities/itlb_multihit Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index 759f0a176612..ccad4f183400 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h -@@ -389,5 +389,6 @@ - #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ +@@ -390,5 +390,6 @@ #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ -+#define X86_BUG_ITLB_MULTIHIT X86_BUG(22) /* CPU may incur MCE during certain page attribute changes */ + #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ ++#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #endif /* _ASM_X86_CPUFEATURES_H */ -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index a1d22e4428f6..f58e6921cbf7 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -84,6 +84,13 @@ @@ -83,14 +76,12 @@ index a1d22e4428f6..f58e6921cbf7 100644 + * physical address or cache type + * without TLB invalidation. + */ - - #define MSR_IA32_FLUSH_CMD 0x0000010b - #define L1D_FLUSH BIT(0) /* -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index ee7d17611ead..60e47e492c2f 100644 + #define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ + #define ARCH_CAP_TAA_NO BIT(8) /* + * Not susceptible to --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c -@@ -1281,6 +1281,11 @@ static ssize_t l1tf_show_state(char *buf) +@@ -1391,6 +1391,11 @@ static ssize_t l1tf_show_state(char *buf } #endif @@ -102,9 +93,9 @@ index ee7d17611ead..60e47e492c2f 100644 static ssize_t mds_show_state(char *buf) { if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { -@@ -1366,6 +1371,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr - case X86_BUG_MDS: - return mds_show_state(buf); +@@ -1494,6 +1499,9 @@ static ssize_t cpu_show_common(struct de + case X86_BUG_TAA: + return tsx_async_abort_show_state(buf); + case X86_BUG_ITLB_MULTIHIT: + return itlb_multihit_show_state(buf); @@ -112,9 +103,9 @@ index ee7d17611ead..60e47e492c2f 100644 default: break; } -@@ -1402,4 +1410,9 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu +@@ -1535,4 +1543,9 @@ ssize_t cpu_show_tsx_async_abort(struct { - return cpu_show_common(dev, attr, buf, X86_BUG_MDS); + return cpu_show_common(dev, attr, buf, X86_BUG_TAA); } + +ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf) @@ -122,11 +113,9 @@ index ee7d17611ead..60e47e492c2f 100644 + return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); +} #endif -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index b33fdfa0ff49..128808dccd2f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c -@@ -946,13 +946,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) +@@ -946,13 +946,14 @@ static void identify_cpu_without_cpuid(s #endif } @@ -148,7 +137,7 @@ index b33fdfa0ff49..128808dccd2f 100644 #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } -@@ -970,26 +971,26 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { +@@ -970,26 +971,26 @@ static const __initconst struct x86_cpu_ VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), /* Intel Family 6 */ @@ -191,7 +180,7 @@ index b33fdfa0ff49..128808dccd2f 100644 /* * Technically, swapgs isn't serializing on AMD (despite it previously -@@ -1000,13 +1001,13 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { +@@ -1000,13 +1001,13 @@ static const __initconst struct x86_cpu_ */ /* AMD Family 0xf - 0x12 */ @@ -210,13 +199,10 @@ index b33fdfa0ff49..128808dccd2f 100644 {} }; -@@ -1021,15 +1022,19 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) +@@ -1031,6 +1032,10 @@ static void __init cpu_set_bug_bits(stru { - u64 ia32_cap = 0; + u64 ia32_cap = x86_read_arch_cap_msr(); -+ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) -+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); -+ + /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ + if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); @@ -224,20 +210,9 @@ index b33fdfa0ff49..128808dccd2f 100644 if (cpu_matches(NO_SPECULATION)) return; - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - -- if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) -- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); -- - if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && - !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) - setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); -diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c -index 2fd6ca1021c2..c21e2aec5cbb 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c -@@ -552,12 +552,19 @@ ssize_t __weak cpu_show_mds(struct device *dev, +@@ -559,6 +559,12 @@ ssize_t __weak cpu_show_tsx_async_abort( return sprintf(buf, "Not affected\n"); } @@ -250,29 +225,28 @@ index 2fd6ca1021c2..c21e2aec5cbb 100644 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); - static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); +@@ -566,6 +572,7 @@ static DEVICE_ATTR(spec_store_bypass, 04 static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); + static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); +static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, -@@ -566,6 +573,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { - &dev_attr_spec_store_bypass.attr, +@@ -575,6 +582,7 @@ static struct attribute *cpu_root_vulner &dev_attr_l1tf.attr, &dev_attr_mds.attr, + &dev_attr_tsx_async_abort.attr, + &dev_attr_itlb_multihit.attr, NULL }; -diff --git a/include/linux/cpu.h b/include/linux/cpu.h -index 006f69f9277b..7bb824b0f30e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h -@@ -59,6 +59,8 @@ extern ssize_t cpu_show_l1tf(struct device *dev, - struct device_attribute *attr, char *buf); - extern ssize_t cpu_show_mds(struct device *dev, - struct device_attribute *attr, char *buf); +@@ -62,6 +62,8 @@ extern ssize_t cpu_show_mds(struct devic + extern ssize_t cpu_show_tsx_async_abort(struct device *dev, + struct device_attribute *attr, + char *buf); +extern ssize_t cpu_show_itlb_multihit(struct device *dev, + struct device_attribute *attr, char *buf); diff --git a/debian/patches/bugfix/x86/itlb_multihit/0020-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch b/debian/patches/bugfix/x86/itlb_multihit/0020-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch new file mode 100644 index 000000000..80f62ff4a --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0020-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch @@ -0,0 +1,98 @@ +From: Tyler Hicks +Date: Mon, 4 Nov 2019 12:22:02 +0100 +Subject: cpu/speculation: Uninline and export CPU mitigations helpers + +commit 731dc9df975a5da21237a18c3384f811a7a41cc6 upstream. + +A kernel module may need to check the value of the "mitigations=" kernel +command line parameter as part of its setup when the module needs +to perform software mitigations for a CPU flaw. + +Uninline and export the helper functions surrounding the cpu_mitigations +enum to allow for their usage from a module. + +Lastly, privatize the enum and cpu_mitigations variable since the value of +cpu_mitigations can be checked with the exported helper functions. + +Signed-off-by: Tyler Hicks +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +Signed-off-by: Ben Hutchings +--- + include/linux/cpu.h | 25 ++----------------------- + kernel/cpu.c | 27 ++++++++++++++++++++++++++- + 2 files changed, 28 insertions(+), 24 deletions(-) + +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -198,28 +198,7 @@ static inline int cpuhp_smt_enable(void) + static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } + #endif + +-/* +- * These are used for a global "mitigations=" cmdline option for toggling +- * optional CPU mitigations. +- */ +-enum cpu_mitigations { +- CPU_MITIGATIONS_OFF, +- CPU_MITIGATIONS_AUTO, +- CPU_MITIGATIONS_AUTO_NOSMT, +-}; +- +-extern enum cpu_mitigations cpu_mitigations; +- +-/* mitigations=off */ +-static inline bool cpu_mitigations_off(void) +-{ +- return cpu_mitigations == CPU_MITIGATIONS_OFF; +-} +- +-/* mitigations=auto,nosmt */ +-static inline bool cpu_mitigations_auto_nosmt(void) +-{ +- return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; +-} ++extern bool cpu_mitigations_off(void); ++extern bool cpu_mitigations_auto_nosmt(void); + + #endif /* _LINUX_CPU_H_ */ +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -2282,7 +2282,18 @@ void __init boot_cpu_hotplug_init(void) + this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); + } + +-enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; ++/* ++ * These are used for a global "mitigations=" cmdline option for toggling ++ * optional CPU mitigations. ++ */ ++enum cpu_mitigations { ++ CPU_MITIGATIONS_OFF, ++ CPU_MITIGATIONS_AUTO, ++ CPU_MITIGATIONS_AUTO_NOSMT, ++}; ++ ++static enum cpu_mitigations cpu_mitigations __ro_after_init = ++ CPU_MITIGATIONS_AUTO; + + static int __init mitigations_parse_cmdline(char *arg) + { +@@ -2299,3 +2310,17 @@ static int __init mitigations_parse_cmdl + return 0; + } + early_param("mitigations", mitigations_parse_cmdline); ++ ++/* mitigations=off */ ++bool cpu_mitigations_off(void) ++{ ++ return cpu_mitigations == CPU_MITIGATIONS_OFF; ++} ++EXPORT_SYMBOL_GPL(cpu_mitigations_off); ++ ++/* mitigations=auto,nosmt */ ++bool cpu_mitigations_auto_nosmt(void) ++{ ++ return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; ++} ++EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); diff --git a/debian/patches/bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch b/debian/patches/bugfix/x86/itlb_multihit/0021-kvm-mmu-ITLB_MULTIHIT-mitigation.patch similarity index 76% rename from debian/patches/bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch rename to debian/patches/bugfix/x86/itlb_multihit/0021-kvm-mmu-ITLB_MULTIHIT-mitigation.patch index 2f1e70bb8..c2e8c3b06 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0021-kvm-mmu-ITLB_MULTIHIT-mitigation.patch @@ -1,62 +1,79 @@ From: Paolo Bonzini -Date: Thu, 24 Oct 2019 18:34:28 +0200 +Date: Mon, 4 Nov 2019 12:22:02 +0100 Subject: kvm: mmu: ITLB_MULTIHIT mitigation +commit b8e8c8303ff28c61046a4d0f6ea99aea609a7dc0 upstream. + With some Intel processors, putting the same virtual address in the TLB as both a 4 KiB and 2 MiB page can confuse the instruction fetch unit -and cause the processor to issue a machine check. Unfortunately if EPT -page tables use huge pages, it possible for a malicious guest to cause -this situation. +and cause the processor to issue a machine check resulting in a CPU lockup. -This patch adds a knob to mark huge pages as non-executable. When the -nx_huge_pages parameter is enabled (and we are using EPT), all huge pages -are marked as NX. If the guest attempts to execute in one of those pages, -the page is broken down into 4K pages, which are then marked executable. +Unfortunately when EPT page tables use huge pages, it is possible for a +malicious guest to cause this situation. + +Add a knob to mark huge pages as non-executable. When the nx_huge_pages +parameter is enabled (and we are using EPT), all huge pages are marked as +NX. If the guest attempts to execute in one of those pages, the page is +broken down into 4K pages, which are then marked executable. This is not an issue for shadow paging (except nested EPT), because then the host is in control of TLB flushes and the problematic situation cannot -happen. With nested EPT, again the nested guest can cause problems so we -treat shadow and direct EPT the same. +happen. With nested EPT, again the nested guest can cause problems shadow +and direct EPT is treated in the same way. -Signed-off-by: Junaid Shahid +[ tglx: Fixup default to auto and massage wording a bit ] + +Originally-by: Junaid Shahid Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner [bwh: Backported to 4.19: - Use kvm_mmu_invalidate_zap_all_pages() instead of kvm_mmu_zap_all_fast() - Adjust context] Signed-off-by: Ben Hutchings --- - .../admin-guide/kernel-parameters.txt | 11 ++ + .../admin-guide/kernel-parameters.txt | 19 +++ arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kernel/cpu/bugs.c | 13 +- - arch/x86/kvm/mmu.c | 135 +++++++++++++++++- + arch/x86/kvm/mmu.c | 141 +++++++++++++++++- arch/x86/kvm/paging_tmpl.h | 29 +++- arch/x86/kvm/x86.c | 9 ++ - 6 files changed, 186 insertions(+), 13 deletions(-) + 6 files changed, 200 insertions(+), 13 deletions(-) -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 16607b178b47..b2c1a5c63ab3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -1956,6 +1956,17 @@ +@@ -1956,6 +1956,19 @@ KVM MMU at runtime. Default is 0 (off) + kvm.nx_huge_pages= -+ [KVM] Controls the sw workaround for bug -+ X86_BUG_ITLB_MULTIHIT. ++ [KVM] Controls the software workaround for the ++ X86_BUG_ITLB_MULTIHIT bug. + force : Always deploy workaround. -+ off : Default. Never deploy workaround. -+ auto : Deploy workaround based on presence of ++ off : Never deploy workaround. ++ auto : Deploy workaround based on the presence of + X86_BUG_ITLB_MULTIHIT. + -+ If the sw workaround is enabled for the host, guests -+ need not enable it for nested guests. ++ Default is 'auto'. ++ ++ If the software workaround is enabled for the host, ++ guests do need not to enable it for nested guests. + kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM. Default is 1 (enabled) -diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h -index 90dccb5c79d9..59b44445ed59 100644 +@@ -2522,6 +2535,12 @@ + l1tf=off [X86] + mds=off [X86] + tsx_async_abort=off [X86] ++ kvm.nx_huge_pages=off [X86] ++ ++ Exceptions: ++ This does not have any effect on ++ kvm.nx_huge_pages when ++ kvm.nx_huge_pages=force. + + auto (default) + Mitigate all CPU vulnerabilities, but leave SMT --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -282,6 +282,7 @@ struct kvm_mmu_page { @@ -67,7 +84,7 @@ index 90dccb5c79d9..59b44445ed59 100644 /* * The following two entries are used to key the shadow page in the -@@ -890,6 +891,7 @@ struct kvm_vm_stat { +@@ -887,6 +888,7 @@ struct kvm_vm_stat { ulong mmu_unsync; ulong remote_tlb_flush; ulong lpages; @@ -75,11 +92,9 @@ index 90dccb5c79d9..59b44445ed59 100644 ulong max_mmu_page_hash_collisions; }; -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 60e47e492c2f..1e764992fa64 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c -@@ -1119,6 +1119,9 @@ void x86_spec_ctrl_setup_ap(void) +@@ -1229,6 +1229,9 @@ void x86_spec_ctrl_setup_ap(void) x86_amd_ssb_disable(); } @@ -89,7 +104,7 @@ index 60e47e492c2f..1e764992fa64 100644 #undef pr_fmt #define pr_fmt(fmt) "L1TF: " fmt -@@ -1274,17 +1277,25 @@ static ssize_t l1tf_show_state(char *buf) +@@ -1384,17 +1387,25 @@ static ssize_t l1tf_show_state(char *buf l1tf_vmx_states[l1tf_vmx_mitigation], sched_smt_active() ? "vulnerable" : "disabled"); } @@ -116,8 +131,6 @@ index 60e47e492c2f..1e764992fa64 100644 static ssize_t mds_show_state(char *buf) { -diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c -index 7f9be921df7c..19c3dc9b05cb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -49,6 +49,20 @@ @@ -141,7 +154,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 /* * When setting this variable to true it enables Two-Dimensional-Paging * where the hardware walks 2 page tables: -@@ -284,6 +298,11 @@ static inline bool spte_ad_enabled(u64 spte) +@@ -284,6 +298,11 @@ static inline bool spte_ad_enabled(u64 s return !(spte & shadow_acc_track_value); } @@ -153,7 +166,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 static inline u64 spte_shadow_accessed_mask(u64 spte) { MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value); -@@ -1096,6 +1115,15 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) +@@ -1096,6 +1115,15 @@ static void account_shadowed(struct kvm kvm_mmu_gfn_disallow_lpage(slot, gfn); } @@ -169,7 +182,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memslots *slots; -@@ -1113,6 +1141,12 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) +@@ -1113,6 +1141,12 @@ static void unaccount_shadowed(struct kv kvm_mmu_gfn_allow_lpage(slot, gfn); } @@ -182,7 +195,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, struct kvm_memory_slot *slot) { -@@ -2665,6 +2699,9 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, +@@ -2665,6 +2699,9 @@ static int kvm_mmu_prepare_zap_page(stru kvm_reload_remote_mmus(kvm); } @@ -192,7 +205,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 sp->role.invalid = 1; return ret; } -@@ -2873,6 +2910,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, +@@ -2873,6 +2910,11 @@ static int set_spte(struct kvm_vcpu *vcp if (!speculative) spte |= spte_shadow_accessed_mask(spte); @@ -204,7 +217,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 if (pte_access & ACC_EXEC_MASK) spte |= shadow_x_mask; else -@@ -3091,9 +3133,32 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) +@@ -3091,9 +3133,32 @@ static void direct_pte_prefetch(struct k __direct_pte_prefetch(vcpu, sp, sptep); } @@ -238,7 +251,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 { struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; -@@ -3106,6 +3171,12 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, +@@ -3106,6 +3171,12 @@ static int __direct_map(struct kvm_vcpu trace_kvm_mmu_spte_requested(gpa, level, pfn); for_each_shadow_entry(vcpu, gpa, it) { @@ -251,7 +264,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); if (it.level == level) break; -@@ -3116,6 +3187,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, +@@ -3116,6 +3187,8 @@ static int __direct_map(struct kvm_vcpu it.level - 1, true, ACC_ALL); link_shadow_page(vcpu, it.sptep, sp); @@ -260,7 +273,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 } } -@@ -3416,11 +3489,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, +@@ -3416,11 +3489,14 @@ static int nonpaging_map(struct kvm_vcpu { int r; int level; @@ -276,7 +289,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { /* -@@ -3454,7 +3530,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, +@@ -3454,7 +3530,8 @@ static int nonpaging_map(struct kvm_vcpu goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); @@ -286,7 +299,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); -@@ -4048,6 +4125,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +@@ -4048,6 +4125,8 @@ static int tdp_page_fault(struct kvm_vcp unsigned long mmu_seq; int write = error_code & PFERR_WRITE_MASK; bool map_writable; @@ -295,7 +308,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); -@@ -4058,8 +4137,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +@@ -4058,8 +4137,9 @@ static int tdp_page_fault(struct kvm_vcp if (r) return r; @@ -307,7 +320,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { if (level > PT_DIRECTORY_LEVEL && -@@ -4088,7 +4168,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +@@ -4088,7 +4168,8 @@ static int tdp_page_fault(struct kvm_vcp goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); @@ -317,10 +330,16 @@ index 7f9be921df7c..19c3dc9b05cb 100644 out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); -@@ -5886,10 +5967,52 @@ static void mmu_destroy_caches(void) +@@ -5886,10 +5967,58 @@ static void mmu_destroy_caches(void) kmem_cache_destroy(mmu_page_header_cache); } ++static bool get_nx_auto_mode(void) ++{ ++ /* Return true when CPU has the bug, and mitigations are ON */ ++ return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off(); ++} ++ +static void __set_nx_huge_pages(bool val) +{ + nx_huge_pages = itlb_multihit_kvm_mitigation = val; @@ -337,7 +356,7 @@ index 7f9be921df7c..19c3dc9b05cb 100644 + else if (sysfs_streq(val, "force")) + new_val = 1; + else if (sysfs_streq(val, "auto")) -+ new_val = boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT); ++ new_val = get_nx_auto_mode(); + else if (strtobool(val, &new_val) < 0) + return -EINVAL; + @@ -365,16 +384,14 @@ index 7f9be921df7c..19c3dc9b05cb 100644 int ret = -ENOMEM; + if (nx_huge_pages == -1) -+ __set_nx_huge_pages(boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT)); ++ __set_nx_huge_pages(get_nx_auto_mode()); + kvm_mmu_reset_all_pte_masks(); pte_list_desc_cache = kmem_cache_create("pte_list_desc", -diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h -index 3b022b08b577..adf42dc8d38b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h -@@ -596,13 +596,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, +@@ -596,13 +596,14 @@ static void FNAME(pte_prefetch)(struct k static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *gw, int write_fault, int hlevel, @@ -391,7 +408,7 @@ index 3b022b08b577..adf42dc8d38b 100644 direct_access = gw->pte_access; -@@ -647,13 +648,25 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, +@@ -647,13 +648,25 @@ static int FNAME(fetch)(struct kvm_vcpu link_shadow_page(vcpu, it.sptep, sp); } @@ -419,7 +436,7 @@ index 3b022b08b577..adf42dc8d38b 100644 if (it.level == hlevel) break; -@@ -665,6 +678,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, +@@ -665,6 +678,8 @@ static int FNAME(fetch)(struct kvm_vcpu sp = kvm_mmu_get_page(vcpu, base_gfn, addr, it.level - 1, true, direct_access); link_shadow_page(vcpu, it.sptep, sp); @@ -428,7 +445,7 @@ index 3b022b08b577..adf42dc8d38b 100644 } } -@@ -741,9 +756,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, +@@ -741,9 +756,11 @@ static int FNAME(page_fault)(struct kvm_ int r; kvm_pfn_t pfn; int level = PT_PAGE_TABLE_LEVEL; @@ -441,7 +458,7 @@ index 3b022b08b577..adf42dc8d38b 100644 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); -@@ -833,7 +850,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, +@@ -833,7 +850,7 @@ static int FNAME(page_fault)(struct kvm_ if (!force_pt_level) transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, @@ -450,11 +467,9 @@ index 3b022b08b577..adf42dc8d38b 100644 kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); out_unlock: -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 2714c1a0e59f..ec80bb27504f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -206,6 +206,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { +@@ -206,6 +206,7 @@ struct kvm_stats_debugfs_item debugfs_en { "mmu_unsync", VM_STAT(mmu_unsync) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { "largepages", VM_STAT(lpages, .mode = 0444) }, @@ -462,11 +477,10 @@ index 2714c1a0e59f..ec80bb27504f 100644 { "max_mmu_page_hash_collisions", VM_STAT(max_mmu_page_hash_collisions) }, { NULL } -@@ -1130,6 +1131,14 @@ u64 kvm_get_arch_capabilities(void) - +@@ -1116,6 +1117,14 @@ u64 kvm_get_arch_capabilities(void) rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data); -+ /* + /* + * If nx_huge_pages is enabled, KVM's shadow paging will ensure that + * the nested hypervisor runs with NX huge pages. If it is not, + * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other @@ -474,6 +488,7 @@ index 2714c1a0e59f..ec80bb27504f 100644 + */ + data |= ARCH_CAP_PSCHANGE_MC_NO; + - /* ++ /* * If we're doing cache flushes (either "always" or "cond") * we will do one whenever the guest does a vmlaunch/vmresume. + * If an outer hypervisor is doing the cache flush for us diff --git a/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker.patch b/debian/patches/bugfix/x86/itlb_multihit/0022-kvm-Add-helper-function-for-creating-VM-worker-threa.patch similarity index 84% rename from debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker.patch rename to debian/patches/bugfix/x86/itlb_multihit/0022-kvm-Add-helper-function-for-creating-VM-worker-threa.patch index 4af53ea23..f966978a5 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0022-kvm-Add-helper-function-for-creating-VM-worker-threa.patch @@ -1,15 +1,16 @@ From: Junaid Shahid -Date: Thu, 24 Oct 2019 18:34:29 +0200 -Subject: kvm: Add helper function for creating VM worker +Date: Mon, 4 Nov 2019 12:22:02 +0100 +Subject: kvm: Add helper function for creating VM worker threads - threads +commit c57c80467f90e5504c8df9ad3555d2c78800bf94 upstream. -This adds a function to create a kernel thread associated with a given -VM. In particular, it ensures that the worker thread inherits the -priority and cgroups of the calling thread. +Add a function to create a kernel thread associated with a given VM. In +particular, it ensures that the worker thread inherits the priority and +cgroups of the calling thread. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner [bwh: Backported to 4.19: adjust context] Signed-off-by: Ben Hutchings --- @@ -17,11 +18,9 @@ Signed-off-by: Ben Hutchings virt/kvm/kvm_main.c | 84 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) -diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h -index 553a3115a735..96207939d862 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h -@@ -1305,4 +1305,10 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) +@@ -1305,4 +1305,10 @@ static inline int kvm_arch_vcpu_run_pid_ } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ @@ -32,8 +31,6 @@ index 553a3115a735..96207939d862 100644 + struct task_struct **thread_ptr); + #endif -diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index 5482949b452c..77da54d334b2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,6 +51,7 @@ diff --git a/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch b/debian/patches/bugfix/x86/itlb_multihit/0023-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch similarity index 80% rename from debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch rename to debian/patches/bugfix/x86/itlb_multihit/0023-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch index e1962f7a7..f732d7fcb 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0023-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch @@ -1,35 +1,38 @@ From: Junaid Shahid -Date: Thu, 24 Oct 2019 18:34:30 +0200 +Date: Mon, 4 Nov 2019 12:22:03 +0100 Subject: kvm: x86: mmu: Recovery of shattered NX large pages -The page table pages corresponding to broken down large pages are -zapped in FIFO order, so that the large page can potentially -be recovered, if it is no longer being used for execution. This removes -the performance penalty for walking deeper EPT page tables. +commit 1aa9b9572b10529c2e64e2b8f44025d86e124308 upstream. + +The page table pages corresponding to broken down large pages are zapped in +FIFO order, so that the large page can potentially be recovered, if it is +not longer being used for execution. This removes the performance penalty +for walking deeper EPT page tables. By default, one large page will last about one hour once the guest reaches a steady state. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini -[bwh: Backported to 4.19: adjust context] +Signed-off-by: Thomas Gleixner +[bwh: Backported to 4.19: + - Update another error path in kvm_create_vm() to use out_err_no_mmu_notifier + - Adjust context] Signed-off-by: Ben Hutchings --- .../admin-guide/kernel-parameters.txt | 6 + - arch/x86/include/asm/kvm_host.h | 5 + + arch/x86/include/asm/kvm_host.h | 4 + arch/x86/kvm/mmu.c | 129 ++++++++++++++++++ arch/x86/kvm/mmu.h | 4 + arch/x86/kvm/x86.c | 11 ++ virt/kvm/kvm_main.c | 30 +++- - 6 files changed, 184 insertions(+), 1 deletion(-) + 6 files changed, 183 insertions(+), 1 deletion(-) -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index b2c1a5c63ab3..efdc471ed0b9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -1967,6 +1967,12 @@ - If the sw workaround is enabled for the host, guests - need not enable it for nested guests. +@@ -1969,6 +1969,12 @@ + If the software workaround is enabled for the host, + guests do need not to enable it for nested guests. + kvm.nx_huge_pages_recovery_ratio= + [KVM] Controls how many 4KiB pages are periodically zapped @@ -40,8 +43,6 @@ index b2c1a5c63ab3..efdc471ed0b9 100644 kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM. Default is 1 (enabled) -diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h -index 59b44445ed59..efe3ba61fc23 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -281,6 +281,8 @@ struct kvm_rmap_head { @@ -53,7 +54,7 @@ index 59b44445ed59..efe3ba61fc23 100644 bool unsync; bool lpage_disallowed; /* Can't be replaced by an equiv large page */ -@@ -808,6 +810,7 @@ struct kvm_arch { +@@ -805,6 +807,7 @@ struct kvm_arch { */ struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; @@ -61,17 +62,14 @@ index 59b44445ed59..efe3ba61fc23 100644 struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; -@@ -878,6 +881,8 @@ struct kvm_arch { +@@ -875,6 +878,7 @@ struct kvm_arch { bool x2apic_broadcast_quirk_disabled; bool guest_can_read_msr_platform_info; -+ + struct task_struct *nx_lpage_recovery_thread; }; struct kvm_vm_stat { -diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c -index 19c3dc9b05cb..bafb9001ce94 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -40,6 +40,7 @@ @@ -109,7 +107,7 @@ index 19c3dc9b05cb..bafb9001ce94 100644 /* * When setting this variable to true it enables Two-Dimensional-Paging -@@ -1121,6 +1132,8 @@ static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +@@ -1121,6 +1132,8 @@ static void account_huge_nx_page(struct return; ++kvm->stat.nx_lpage_splits; @@ -118,7 +116,7 @@ index 19c3dc9b05cb..bafb9001ce94 100644 sp->lpage_disallowed = true; } -@@ -1145,6 +1158,7 @@ static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +@@ -1145,6 +1158,7 @@ static void unaccount_huge_nx_page(struc { --kvm->stat.nx_lpage_splits; sp->lpage_disallowed = false; @@ -126,7 +124,7 @@ index 19c3dc9b05cb..bafb9001ce94 100644 } static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, -@@ -5999,6 +6013,8 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) +@@ -6005,6 +6019,8 @@ static int set_nx_huge_pages(const char idx = srcu_read_lock(&kvm->srcu); kvm_mmu_invalidate_zap_all_pages(kvm); srcu_read_unlock(&kvm->srcu, idx); @@ -135,7 +133,7 @@ index 19c3dc9b05cb..bafb9001ce94 100644 } mutex_unlock(&kvm_lock); } -@@ -6079,3 +6095,116 @@ void kvm_mmu_module_exit(void) +@@ -6086,3 +6102,116 @@ void kvm_mmu_module_exit(void) unregister_shrinker(&mmu_shrinker); mmu_audit_disable(); } @@ -252,11 +250,9 @@ index 19c3dc9b05cb..bafb9001ce94 100644 + if (kvm->arch.nx_lpage_recovery_thread) + kthread_stop(kvm->arch.nx_lpage_recovery_thread); +} -diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h -index 65892288bf51..f7b2de7b6382 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h -@@ -216,4 +216,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); +@@ -216,4 +216,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn); int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); @@ -265,11 +261,9 @@ index 65892288bf51..f7b2de7b6382 100644 +void kvm_mmu_pre_destroy_vm(struct kvm *kvm); + #endif -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index ec80bb27504f..da688e726632 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -8958,6 +8958,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +@@ -8952,6 +8952,7 @@ int kvm_arch_init_vm(struct kvm *kvm, un INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); @@ -277,7 +271,7 @@ index ec80bb27504f..da688e726632 100644 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); atomic_set(&kvm->arch.noncoherent_dma_count, 0); -@@ -8989,6 +8990,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +@@ -8983,6 +8984,11 @@ int kvm_arch_init_vm(struct kvm *kvm, un return 0; } @@ -289,7 +283,7 @@ index ec80bb27504f..da688e726632 100644 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { vcpu_load(vcpu); -@@ -9090,6 +9096,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) +@@ -9084,6 +9090,11 @@ int x86_set_memory_region(struct kvm *kv } EXPORT_SYMBOL_GPL(x86_set_memory_region); @@ -301,11 +295,9 @@ index ec80bb27504f..da688e726632 100644 void kvm_arch_destroy_vm(struct kvm *kvm) { if (current->mm == kvm->mm) { -diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index 77da54d334b2..7a0d86d52230 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c -@@ -625,6 +625,23 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) +@@ -625,6 +625,23 @@ static int kvm_create_vm_debugfs(struct return 0; } @@ -329,7 +321,7 @@ index 77da54d334b2..7a0d86d52230 100644 static struct kvm *kvm_create_vm(unsigned long type) { int r, i; -@@ -679,10 +696,14 @@ static struct kvm *kvm_create_vm(unsigned long type) +@@ -679,11 +696,15 @@ static struct kvm *kvm_create_vm(unsigne rcu_assign_pointer(kvm->buses[i], kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); if (!kvm->buses[i]) @@ -338,14 +330,15 @@ index 77da54d334b2..7a0d86d52230 100644 } r = kvm_init_mmu_notifier(kvm); -+ if (r) + if (r) + goto out_err_no_mmu_notifier; + + r = kvm_arch_post_init_vm(kvm); - if (r) ++ if (r) goto out_err; -@@ -695,6 +716,11 @@ static struct kvm *kvm_create_vm(unsigned long type) + mutex_lock(&kvm_lock); +@@ -695,6 +716,11 @@ static struct kvm *kvm_create_vm(unsigne return kvm; out_err: @@ -357,7 +350,7 @@ index 77da54d334b2..7a0d86d52230 100644 cleanup_srcu_struct(&kvm->irq_srcu); out_err_no_irq_srcu: cleanup_srcu_struct(&kvm->srcu); -@@ -737,6 +763,8 @@ static void kvm_destroy_vm(struct kvm *kvm) +@@ -737,6 +763,8 @@ static void kvm_destroy_vm(struct kvm *k mutex_lock(&kvm_lock); list_del(&kvm->vm_list); mutex_unlock(&kvm_lock); diff --git a/debian/patches/bugfix/x86/itlb_multihit/0024-Documentation-Add-ITLB_MULTIHIT-documentation.patch b/debian/patches/bugfix/x86/itlb_multihit/0024-Documentation-Add-ITLB_MULTIHIT-documentation.patch new file mode 100644 index 000000000..b67ee61eb --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0024-Documentation-Add-ITLB_MULTIHIT-documentation.patch @@ -0,0 +1,194 @@ +From: "Gomez Iglesias, Antonio" +Date: Mon, 4 Nov 2019 12:22:03 +0100 +Subject: Documentation: Add ITLB_MULTIHIT documentation + +commit 7f00cc8d4a51074eb0ad4c3f16c15757b1ddfb7d upstream. + +Add the initial ITLB_MULTIHIT documentation. + +[ tglx: Add it to the index so it gets actually built. ] + +Signed-off-by: Antonio Gomez Iglesias +Signed-off-by: Nelson D'Souza +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +Signed-off-by: Ben Hutchings +--- + Documentation/admin-guide/hw-vuln/index.rst | 1 + + .../admin-guide/hw-vuln/multihit.rst | 163 ++++++++++++++++++ + 2 files changed, 164 insertions(+) + create mode 100644 Documentation/admin-guide/hw-vuln/multihit.rst + +--- a/Documentation/admin-guide/hw-vuln/index.rst ++++ b/Documentation/admin-guide/hw-vuln/index.rst +@@ -13,3 +13,4 @@ are configurable at compile, boot or run + l1tf + mds + tsx_async_abort ++ multihit.rst +--- /dev/null ++++ b/Documentation/admin-guide/hw-vuln/multihit.rst +@@ -0,0 +1,163 @@ ++iTLB multihit ++============= ++ ++iTLB multihit is an erratum where some processors may incur a machine check ++error, possibly resulting in an unrecoverable CPU lockup, when an ++instruction fetch hits multiple entries in the instruction TLB. This can ++occur when the page size is changed along with either the physical address ++or cache type. A malicious guest running on a virtualized system can ++exploit this erratum to perform a denial of service attack. ++ ++ ++Affected processors ++------------------- ++ ++Variations of this erratum are present on most Intel Core and Xeon processor ++models. The erratum is not present on: ++ ++ - non-Intel processors ++ ++ - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont) ++ ++ - Intel processors that have the PSCHANGE_MC_NO bit set in the ++ IA32_ARCH_CAPABILITIES MSR. ++ ++ ++Related CVEs ++------------ ++ ++The following CVE entry is related to this issue: ++ ++ ============== ================================================= ++ CVE-2018-12207 Machine Check Error Avoidance on Page Size Change ++ ============== ================================================= ++ ++ ++Problem ++------- ++ ++Privileged software, including OS and virtual machine managers (VMM), are in ++charge of memory management. A key component in memory management is the control ++of the page tables. Modern processors use virtual memory, a technique that creates ++the illusion of a very large memory for processors. This virtual space is split ++into pages of a given size. Page tables translate virtual addresses to physical ++addresses. ++ ++To reduce latency when performing a virtual to physical address translation, ++processors include a structure, called TLB, that caches recent translations. ++There are separate TLBs for instruction (iTLB) and data (dTLB). ++ ++Under this errata, instructions are fetched from a linear address translated ++using a 4 KB translation cached in the iTLB. Privileged software modifies the ++paging structure so that the same linear address using large page size (2 MB, 4 ++MB, 1 GB) with a different physical address or memory type. After the page ++structure modification but before the software invalidates any iTLB entries for ++the linear address, a code fetch that happens on the same linear address may ++cause a machine-check error which can result in a system hang or shutdown. ++ ++ ++Attack scenarios ++---------------- ++ ++Attacks against the iTLB multihit erratum can be mounted from malicious ++guests in a virtualized system. ++ ++ ++iTLB multihit system information ++-------------------------------- ++ ++The Linux kernel provides a sysfs interface to enumerate the current iTLB ++multihit status of the system:whether the system is vulnerable and which ++mitigations are active. The relevant sysfs file is: ++ ++/sys/devices/system/cpu/vulnerabilities/itlb_multihit ++ ++The possible values in this file are: ++ ++.. list-table:: ++ ++ * - Not affected ++ - The processor is not vulnerable. ++ * - KVM: Mitigation: Split huge pages ++ - Software changes mitigate this issue. ++ * - KVM: Vulnerable ++ - The processor is vulnerable, but no mitigation enabled ++ ++ ++Enumeration of the erratum ++-------------------------------- ++ ++A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr ++and will be set on CPU's which are mitigated against this issue. ++ ++ ======================================= =========== =============================== ++ IA32_ARCH_CAPABILITIES MSR Not present Possibly vulnerable,check model ++ IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '0' Likely vulnerable,check model ++ IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '1' Not vulnerable ++ ======================================= =========== =============================== ++ ++ ++Mitigation mechanism ++------------------------- ++ ++This erratum can be mitigated by restricting the use of large page sizes to ++non-executable pages. This forces all iTLB entries to be 4K, and removes ++the possibility of multiple hits. ++ ++In order to mitigate the vulnerability, KVM initially marks all huge pages ++as non-executable. If the guest attempts to execute in one of those pages, ++the page is broken down into 4K pages, which are then marked executable. ++ ++If EPT is disabled or not available on the host, KVM is in control of TLB ++flushes and the problematic situation cannot happen. However, the shadow ++EPT paging mechanism used by nested virtualization is vulnerable, because ++the nested guest can trigger multiple iTLB hits by modifying its own ++(non-nested) page tables. For simplicity, KVM will make large pages ++non-executable in all shadow paging modes. ++ ++Mitigation control on the kernel command line and KVM - module parameter ++------------------------------------------------------------------------ ++ ++The KVM hypervisor mitigation mechanism for marking huge pages as ++non-executable can be controlled with a module parameter "nx_huge_pages=". ++The kernel command line allows to control the iTLB multihit mitigations at ++boot time with the option "kvm.nx_huge_pages=". ++ ++The valid arguments for these options are: ++ ++ ========== ================================================================ ++ force Mitigation is enabled. In this case, the mitigation implements ++ non-executable huge pages in Linux kernel KVM module. All huge ++ pages in the EPT are marked as non-executable. ++ If a guest attempts to execute in one of those pages, the page is ++ broken down into 4K pages, which are then marked executable. ++ ++ off Mitigation is disabled. ++ ++ auto Enable mitigation only if the platform is affected and the kernel ++ was not booted with the "mitigations=off" command line parameter. ++ This is the default option. ++ ========== ================================================================ ++ ++ ++Mitigation selection guide ++-------------------------- ++ ++1. No virtualization in use ++^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++ The system is protected by the kernel unconditionally and no further ++ action is required. ++ ++2. Virtualization with trusted guests ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++ If the guest comes from a trusted source, you may assume that the guest will ++ not attempt to maliciously exploit these errata and no further action is ++ required. ++ ++3. Virtualization with untrusted guests ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ If the guest comes from an untrusted source, the guest host kernel will need ++ to apply iTLB multihit mitigation via the kernel command line or kvm ++ module parameter. diff --git a/debian/patches/bugfix/x86/taa/0013-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch b/debian/patches/bugfix/x86/taa/0001-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch similarity index 75% rename from debian/patches/bugfix/x86/taa/0013-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch rename to debian/patches/bugfix/x86/taa/0001-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch index 71d885413..6e8df2a71 100644 --- a/debian/patches/bugfix/x86/taa/0013-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch +++ b/debian/patches/bugfix/x86/taa/0001-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch @@ -2,12 +2,19 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 10:45:50 +0200 Subject: x86/msr: Add the IA32_TSX_CTRL MSR +commit c2955f270a84762343000f103e0640d29c7a96f3 upstream. + Transactional Synchronization Extensions (TSX) may be used on certain processors as part of a speculative side channel attack. A microcode update for existing processors that are vulnerable to this attack will add a new MSR - IA32_TSX_CTRL to allow the system administrator the option to disable TSX as one of the possible mitigations. +The CPUs which get this new MSR after a microcode upgrade are the ones +which do not set MSR_IA32_ARCH_CAPABILITIES.MDS_NO (bit 5) because those +CPUs have CPUID.MD_CLEAR, i.e., the VERW implementation which clears all +CPU buffers takes care of the TAA case as well. + [ Note that future processors that are not vulnerable will also support the IA32_TSX_CTRL MSR. ] @@ -34,36 +41,33 @@ There are two control bits in IA32_TSX_CTRL MSR: CPUID(EAX=7).EBX{bit11} read as 0). The other TSX sub-feature, Hardware Lock Elision (HLE), is -unconditionally disabled but still enumerated as present by -CPUID(EAX=7).EBX{bit4}. +unconditionally disabled by the new microcode but still enumerated +as present by CPUID(EAX=7).EBX{bit4}, unless disabled by +IA32_TSX_CTRL_MSR[1] - TSX_CTRL_CPUID_CLEAR. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Tested-by: Neelima Krishnan Reviewed-by: Mark Gross Reviewed-by: Tony Luck -Tested-by: Neelima Krishnan -Cc: "H. Peter Anvin" -Cc: Ingo Molnar -Cc: Paolo Bonzini -Cc: Thomas Gleixner -Cc: x86-ml +Reviewed-by: Josh Poimboeuf +Signed-off-by: Ben Hutchings --- arch/x86/include/asm/msr-index.h | 5 +++++ 1 file changed, 5 insertions(+) -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index f58e6921cbf7..da7887a9f314 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h -@@ -91,6 +91,7 @@ - * physical address or cache type - * without TLB invalidation. +@@ -84,6 +84,7 @@ + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. */ +#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* -@@ -101,6 +102,10 @@ +@@ -94,6 +95,10 @@ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e diff --git a/debian/patches/bugfix/x86/taa/0014-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch b/debian/patches/bugfix/x86/taa/0002-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch similarity index 50% rename from debian/patches/bugfix/x86/taa/0014-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch rename to debian/patches/bugfix/x86/taa/0002-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch index fe671a4f1..f159a3c56 100644 --- a/debian/patches/bugfix/x86/taa/0014-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch +++ b/debian/patches/bugfix/x86/taa/0002-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch @@ -2,31 +2,26 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 10:52:35 +0200 Subject: x86/cpu: Add a helper function x86_read_arch_cap_msr() +commit 286836a70433fb64131d2590f4bf512097c255e1 upstream. + Add a helper function to read the IA32_ARCH_CAPABILITIES MSR. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Tested-by: Neelima Krishnan Reviewed-by: Mark Gross Reviewed-by: Tony Luck -Tested-by: Neelima Krishnan -Cc: Andy Lutomirski -Cc: "H. Peter Anvin" -Cc: Ingo Molnar -Cc: Peter Zijlstra -Cc: Thomas Gleixner -Cc: x86-ml -[bwh: Forward-ported on top of NX: Fix conflict (neighbouring changes) - in arch/x86/kernel/cpu/common.c] +Reviewed-by: Josh Poimboeuf Signed-off-by: Ben Hutchings --- - arch/x86/kernel/cpu/common.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) + arch/x86/kernel/cpu/common.c | 15 +++++++++++---- + arch/x86/kernel/cpu/cpu.h | 2 ++ + 2 files changed, 13 insertions(+), 4 deletions(-) -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 128808dccd2f..cee109bd7f00 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c -@@ -1018,13 +1018,20 @@ static bool __init cpu_matches(unsigned long which) +@@ -1017,19 +1017,26 @@ static bool __init cpu_matches(unsigned return m && !!(m->driver_data & which); } @@ -35,10 +30,9 @@ index 128808dccd2f..cee109bd7f00 100644 { u64 ia32_cap = 0; -- if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - ++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); ++ + return ia32_cap; +} + @@ -46,6 +40,24 @@ index 128808dccd2f..cee109bd7f00 100644 +{ + u64 ia32_cap = x86_read_arch_cap_msr(); + - /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ - if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) - setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); + if (cpu_matches(NO_SPECULATION)) + return; + + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + +- if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) +- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); +- + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && + !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); +--- a/arch/x86/kernel/cpu/cpu.h ++++ b/arch/x86/kernel/cpu/cpu.h +@@ -65,4 +65,6 @@ unsigned int aperfmperf_get_khz(int cpu) + + extern void x86_spec_ctrl_setup_ap(void); + ++extern u64 x86_read_arch_cap_msr(void); ++ + #endif /* ARCH_X86_CPU_H */ diff --git a/debian/patches/bugfix/x86/taa/0015-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch b/debian/patches/bugfix/x86/taa/0003-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch similarity index 71% rename from debian/patches/bugfix/x86/taa/0015-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch rename to debian/patches/bugfix/x86/taa/0003-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch index c4da1231d..6d0c3a99b 100644 --- a/debian/patches/bugfix/x86/taa/0015-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch +++ b/debian/patches/bugfix/x86/taa/0003-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch @@ -2,6 +2,8 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 11:01:53 +0200 Subject: x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default +commit 95c5824f75f3ba4c9e8e5a4b1a623c95390ac266 upstream. + Add a kernel cmdline parameter "tsx" to control the Transactional Synchronization Extensions (TSX) feature. On CPUs that support TSX control, use "tsx=on|off" to enable or disable TSX. Not specifying this @@ -12,46 +14,29 @@ Carve out the TSX controlling functionality into a separate compilation unit because TSX is a CPU feature while the TSX async abort control machinery will go to cpu/bugs.c. - [ bp: Massage, shorten and clear the arg buffer. ] + [ bp: - Massage, shorten and clear the arg buffer. + - Clarifications of the tsx= possible options - Josh. + - Expand on TSX_CTRL availability - Pawan. ] Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov -Cc: Andrew Morton -Cc: Andy Lutomirski -Cc: Babu Moger -Cc: Fenghua Yu -Cc: "H. Peter Anvin" -Cc: Ingo Molnar -Cc: Jonathan Corbet -Cc: Josh Poimboeuf -Cc: Juergen Gross -Cc: Kees Cook -Cc: linux-doc@vger.kernel.org -Cc: Peter Zijlstra -Cc: "Rafael J. Wysocki" -Cc: Rahul Tanwar -Cc: Ricardo Neri -Cc: Sean Christopherson -Cc: Thomas Gleixner -Cc: x86-ml -Cc: Zhao Yakui +Signed-off-by: Thomas Gleixner +Reviewed-by: Josh Poimboeuf [bwh: Backported to 4.19: adjust context] Signed-off-by: Ben Hutchings --- - .../admin-guide/kernel-parameters.txt | 11 ++ + .../admin-guide/kernel-parameters.txt | 26 ++++ arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/common.c | 2 + - arch/x86/kernel/cpu/cpu.h | 18 +++ + arch/x86/kernel/cpu/cpu.h | 16 +++ arch/x86/kernel/cpu/intel.c | 5 + - arch/x86/kernel/cpu/tsx.c | 119 ++++++++++++++++++ - 6 files changed, 156 insertions(+), 1 deletion(-) + arch/x86/kernel/cpu/tsx.c | 125 ++++++++++++++++++ + 6 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/cpu/tsx.c -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index efdc471ed0b9..f03756d2addb 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -4707,6 +4707,17 @@ +@@ -4685,6 +4685,32 @@ marks the TSC unconditionally unstable at bootup and avoids any further wobbles once the TSC watchdog notices. @@ -61,16 +46,29 @@ index efdc471ed0b9..f03756d2addb 100644 + + This parameter controls the TSX feature. The options are: + -+ on - Enable TSX on the system. -+ off - Disable TSX on the system. ++ on - Enable TSX on the system. Although there are ++ mitigations for all known security vulnerabilities, ++ TSX has been known to be an accelerator for ++ several previous speculation-related CVEs, and ++ so there may be unknown security risks associated ++ with leaving it enabled. ++ ++ off - Disable TSX on the system. (Note that this ++ option takes effect only on newer CPUs which are ++ not vulnerable to MDS, i.e., have ++ MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get ++ the new IA32_TSX_CTRL MSR through a microcode ++ update. This new MSR allows for the reliable ++ deactivation of the TSX functionality.) + + Not specifying this option is equivalent to tsx=off. ++ ++ See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst ++ for more details. + turbografx.map[2|3]= [HW,JOY] TurboGraFX parallel port interface Format: -diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile -index 347137e80bf5..320769b4807b 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -28,7 +28,7 @@ obj-y += cpuid-deps.o @@ -82,11 +80,9 @@ index 347137e80bf5..320769b4807b 100644 obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index cee109bd7f00..5f89d78fe132 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c -@@ -1487,6 +1487,8 @@ void __init identify_boot_cpu(void) +@@ -1482,6 +1482,8 @@ void __init identify_boot_cpu(void) enable_sep_cpu(); #endif cpu_detect_tlb(&boot_cpu_data); @@ -95,8 +91,6 @@ index cee109bd7f00..5f89d78fe132 100644 } void identify_secondary_cpu(struct cpuinfo_x86 *c) -diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h -index 7b229afa0a37..236582c90d3f 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -45,6 +45,22 @@ struct _tlb_table { @@ -122,18 +116,9 @@ index 7b229afa0a37..236582c90d3f 100644 extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); -@@ -65,4 +81,6 @@ unsigned int aperfmperf_get_khz(int cpu); - - extern void x86_spec_ctrl_setup_ap(void); - -+extern u64 x86_read_arch_cap_msr(void); -+ - #endif /* ARCH_X86_CPU_H */ -diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c -index fc3c07fe7df5..a5287b18a63f 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c -@@ -766,6 +766,11 @@ static void init_intel(struct cpuinfo_x86 *c) +@@ -766,6 +766,11 @@ static void init_intel(struct cpuinfo_x8 init_intel_energy_perf(c); init_intel_misc_features(c); @@ -145,12 +130,9 @@ index fc3c07fe7df5..a5287b18a63f 100644 } #ifdef CONFIG_X86_32 -diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c -new file mode 100644 -index 000000000000..e5933ef50add --- /dev/null +++ b/arch/x86/kernel/cpu/tsx.c -@@ -0,0 +1,119 @@ +@@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Transactional Synchronization Extensions (TSX) control. @@ -213,9 +195,15 @@ index 000000000000..e5933ef50add + u64 ia32_cap = x86_read_arch_cap_msr(); + + /* -+ * TSX is controlled via MSR_IA32_TSX_CTRL. However, -+ * support for this MSR is enumerated by ARCH_CAP_TSX_MSR bit -+ * in MSR_IA32_ARCH_CAPABILITIES. ++ * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this ++ * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. ++ * ++ * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a ++ * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES ++ * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get ++ * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, ++ * tsx= cmdline requests will do nothing on CPUs without ++ * MSR_IA32_TSX_CTRL support. + */ + return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); +} diff --git a/debian/patches/bugfix/x86/taa/0016-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch b/debian/patches/bugfix/x86/taa/0004-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch similarity index 81% rename from debian/patches/bugfix/x86/taa/0016-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch rename to debian/patches/bugfix/x86/taa/0004-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch index 845c8f067..e9bc1c9bc 100644 --- a/debian/patches/bugfix/x86/taa/0016-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch +++ b/debian/patches/bugfix/x86/taa/0004-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch @@ -2,6 +2,8 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 11:30:45 +0200 Subject: x86/speculation/taa: Add mitigation for TSX Async Abort +commit 1b42f017415b46c317e71d41c34ec088417a1883 upstream. + TSX Async Abort (TAA) is a side channel vulnerability to the internal buffers in some Intel processors similar to Microachitectural Data Sampling (MDS). In this case, certain loads may speculatively pass @@ -52,22 +54,8 @@ deployed. The effective mitigation state can be read from sysfs. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov -Cc: Andrew Morton -Cc: Andy Lutomirski -Cc: Fenghua Yu -Cc: Greg Kroah-Hartman -Cc: "H. Peter Anvin" -Cc: Ingo Molnar -Cc: Josh Poimboeuf -Cc: Kees Cook -Cc: Paolo Bonzini -Cc: "Peter Zijlstra (Intel)" -Cc: Sean Christopherson -Cc: Thomas Gleixner -Cc: Thomas Lendacky -Cc: x86-ml -[bwh: Forward-ported on top of NX: Renumber bug bit after - X86_BUG_ITLB_MULTIHIT] +Signed-off-by: Thomas Gleixner +Reviewed-by: Josh Poimboeuf [bwh: Backported to 4.19: Add #include "cpu.h" in bugs.c] Signed-off-by: Ben Hutchings --- @@ -75,27 +63,23 @@ Signed-off-by: Ben Hutchings arch/x86/include/asm/msr-index.h | 4 + arch/x86/include/asm/nospec-branch.h | 4 +- arch/x86/include/asm/processor.h | 7 ++ - arch/x86/kernel/cpu/bugs.c | 112 +++++++++++++++++++++++++++ + arch/x86/kernel/cpu/bugs.c | 110 +++++++++++++++++++++++++++ arch/x86/kernel/cpu/common.c | 15 ++++ - 6 files changed, 141 insertions(+), 2 deletions(-) + 6 files changed, 139 insertions(+), 2 deletions(-) -diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index ccad4f183400..5a2eecfed727 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h -@@ -390,5 +390,6 @@ +@@ -389,5 +389,6 @@ + #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ - #define X86_BUG_ITLB_MULTIHIT X86_BUG(22) /* CPU may incur MCE during certain page attribute changes */ -+#define X86_BUG_TAA X86_BUG(23) /* CPU is affected by TSX Async Abort(TAA) */ ++#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #endif /* _ASM_X86_CPUFEATURES_H */ -diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index da7887a9f314..0f4feee6d082 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h -@@ -92,6 +92,10 @@ - * without TLB invalidation. +@@ -85,6 +85,10 @@ + * Sampling (MDS) vulnerabilities. */ #define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ +#define ARCH_CAP_TAA_NO BIT(8) /* @@ -105,11 +89,9 @@ index da7887a9f314..0f4feee6d082 100644 #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* -diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index 28cb2b31527a..09c7466c4880 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h -@@ -323,7 +323,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +@@ -323,7 +323,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear) #include /** @@ -118,7 +100,7 @@ index 28cb2b31527a..09c7466c4880 100644 * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the -@@ -346,7 +346,7 @@ static inline void mds_clear_cpu_buffers(void) +@@ -346,7 +346,7 @@ static inline void mds_clear_cpu_buffers } /** @@ -127,8 +109,6 @@ index 28cb2b31527a..09c7466c4880 100644 * * Clear CPU buffers if the corresponding static key is enabled */ -diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index b54f25697beb..efb44bd3a714 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -1003,4 +1003,11 @@ enum mds_mitigations { @@ -143,8 +123,6 @@ index b54f25697beb..efb44bd3a714 100644 +}; + #endif /* _ASM_X86_PROCESSOR_H */ -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 1e764992fa64..828b2fe4bc0a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -32,11 +32,14 @@ @@ -170,11 +148,10 @@ index 1e764992fa64..828b2fe4bc0a 100644 arch_smt_update(); -@@ -266,6 +270,100 @@ static int __init mds_cmdline(char *str) - } +@@ -267,6 +271,100 @@ static int __init mds_cmdline(char *str) early_param("mds", mds_cmdline); -+#undef pr_fmt + #undef pr_fmt +#define pr_fmt(fmt) "TAA: " fmt + +/* Default mitigation for TAA-affected CPUs */ @@ -268,9 +245,10 @@ index 1e764992fa64..828b2fe4bc0a 100644 +} +early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); + - #undef pr_fmt ++#undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt + enum spectre_v1_mitigation { @@ -772,6 +870,7 @@ static void update_mds_branch_idle(void) } @@ -279,7 +257,7 @@ index 1e764992fa64..828b2fe4bc0a 100644 void arch_smt_update(void) { -@@ -804,6 +903,19 @@ void arch_smt_update(void) +@@ -804,6 +903,17 @@ void arch_smt_update(void) break; } @@ -288,8 +266,6 @@ index 1e764992fa64..828b2fe4bc0a 100644 + case TAA_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(TAA_MSG_SMT); -+ /* TSX is enabled, apply MDS idle buffer clearing. */ -+ update_mds_branch_idle(); + break; + case TAA_MITIGATION_TSX_DISABLED: + case TAA_MITIGATION_OFF: @@ -299,11 +275,9 @@ index 1e764992fa64..828b2fe4bc0a 100644 mutex_unlock(&spec_ctrl_mutex); } -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 5f89d78fe132..de1e552c9705 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c -@@ -1058,6 +1058,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) +@@ -1053,6 +1053,21 @@ static void __init cpu_set_bug_bits(stru if (!cpu_matches(NO_SWAPGS)) setup_force_cpu_bug(X86_BUG_SWAPGS); diff --git a/debian/patches/bugfix/x86/taa/0017-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch b/debian/patches/bugfix/x86/taa/0005-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch similarity index 63% rename from debian/patches/bugfix/x86/taa/0017-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch rename to debian/patches/bugfix/x86/taa/0005-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch index 867d136e1..e486aa490 100644 --- a/debian/patches/bugfix/x86/taa/0017-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch +++ b/debian/patches/bugfix/x86/taa/0005-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch @@ -2,6 +2,8 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 12:19:51 +0200 Subject: x86/speculation/taa: Add sysfs reporting for TSX Async Abort +commit 6608b45ac5ecb56f9e171252229c39580cc85f0f upstream. + Add the sysfs reporting file for TSX Async Abort. It exposes the vulnerability and the mitigation state similar to the existing files for the other hardware vulnerabilities. @@ -11,20 +13,12 @@ Sysfs file path is: Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Tested-by: Neelima Krishnan Reviewed-by: Mark Gross Reviewed-by: Tony Luck -Tested-by: Neelima Krishnan -Cc: Greg Kroah-Hartman -Cc: "H. Peter Anvin" -Cc: Ingo Molnar -Cc: Jiri Kosina -Cc: Josh Poimboeuf -Cc: Peter Zijlstra -Cc: Thomas Gleixner -Cc: x86-ml -[bwh: Forward-ported on top of NX: Fix conflicts (neighbouring - insertions) in arch/x86/kernel/cpu/bugs.c, drivers/base/cpu.c, - include/linux/cpu.h] +Reviewed-by: Greg Kroah-Hartman +Reviewed-by: Josh Poimboeuf Signed-off-by: Ben Hutchings --- arch/x86/kernel/cpu/bugs.c | 23 +++++++++++++++++++++++ @@ -32,11 +26,9 @@ Signed-off-by: Ben Hutchings include/linux/cpu.h | 3 +++ 3 files changed, 35 insertions(+) -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 828b2fe4bc0a..1ed43b858c52 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c -@@ -1426,6 +1426,21 @@ static ssize_t mds_show_state(char *buf) +@@ -1408,6 +1408,21 @@ static ssize_t mds_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } @@ -58,9 +50,9 @@ index 828b2fe4bc0a..1ed43b858c52 100644 static char *stibp_state(void) { if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) -@@ -1497,6 +1512,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr - case X86_BUG_ITLB_MULTIHIT: - return itlb_multihit_show_state(buf); +@@ -1476,6 +1491,9 @@ static ssize_t cpu_show_common(struct de + case X86_BUG_MDS: + return mds_show_state(buf); + case X86_BUG_TAA: + return tsx_async_abort_show_state(buf); @@ -68,9 +60,9 @@ index 828b2fe4bc0a..1ed43b858c52 100644 default: break; } -@@ -1538,4 +1556,9 @@ ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr +@@ -1512,4 +1530,9 @@ ssize_t cpu_show_mds(struct device *dev, { - return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); + return cpu_show_common(dev, attr, buf, X86_BUG_MDS); } + +ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf) @@ -78,11 +70,9 @@ index 828b2fe4bc0a..1ed43b858c52 100644 + return cpu_show_common(dev, attr, buf, X86_BUG_TAA); +} #endif -diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c -index c21e2aec5cbb..e9e7fde0fe00 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c -@@ -558,6 +558,13 @@ ssize_t __weak cpu_show_itlb_multihit(struct device *dev, +@@ -552,12 +552,20 @@ ssize_t __weak cpu_show_mds(struct devic return sprintf(buf, "Not affected\n"); } @@ -96,30 +86,27 @@ index c21e2aec5cbb..e9e7fde0fe00 100644 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); -@@ -565,6 +572,7 @@ static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); + static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); - static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); +static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, -@@ -574,6 +582,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { +@@ -566,6 +574,7 @@ static struct attribute *cpu_root_vulner + &dev_attr_spec_store_bypass.attr, &dev_attr_l1tf.attr, &dev_attr_mds.attr, - &dev_attr_itlb_multihit.attr, + &dev_attr_tsx_async_abort.attr, NULL }; -diff --git a/include/linux/cpu.h b/include/linux/cpu.h -index 7bb824b0f30e..9d8dba19844e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h -@@ -61,6 +61,9 @@ extern ssize_t cpu_show_mds(struct device *dev, +@@ -59,6 +59,9 @@ extern ssize_t cpu_show_l1tf(struct devi + struct device_attribute *attr, char *buf); + extern ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf); - extern ssize_t cpu_show_itlb_multihit(struct device *dev, - struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsx_async_abort(struct device *dev, + struct device_attribute *attr, + char *buf); diff --git a/debian/patches/bugfix/x86/taa/0018-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch b/debian/patches/bugfix/x86/taa/0006-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch similarity index 80% rename from debian/patches/bugfix/x86/taa/0018-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch rename to debian/patches/bugfix/x86/taa/0006-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch index 8d1c54c4d..73cbfb3cc 100644 --- a/debian/patches/bugfix/x86/taa/0018-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch +++ b/debian/patches/bugfix/x86/taa/0006-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch @@ -2,6 +2,8 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 12:23:33 +0200 Subject: kvm/x86: Export MDS_NO=0 to guests when TSX is enabled +commit e1d38b63acd843cfdd4222bf19a26700fd5c699e upstream. + Export the IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0 to guests on TSX Async Abort(TAA) affected hosts that have TSX enabled and updated microcode. This is required so that the guests don't complain, @@ -20,26 +22,18 @@ CPUID.MD_CLEAR=1, they deploy MDS mitigation which also mitigates TAA. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov -Reviewed-by: Tony Luck +Signed-off-by: Thomas Gleixner Tested-by: Neelima Krishnan -Cc: "H. Peter Anvin" -Cc: Ingo Molnar -Cc: Joerg Roedel -Cc: kvm ML -Cc: Paolo Bonzini -Cc: "Radim Krcmar" -Cc: Sean Christopherson -Cc: Thomas Gleixner -Cc: x86-ml +Reviewed-by: Tony Luck +Reviewed-by: Josh Poimboeuf +Signed-off-by: Ben Hutchings --- arch/x86/kvm/x86.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index da688e726632..c68ee8d1ef8c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -1151,6 +1151,25 @@ u64 kvm_get_arch_capabilities(void) +@@ -1127,6 +1127,25 @@ u64 kvm_get_arch_capabilities(void) if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER) data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; diff --git a/debian/patches/bugfix/x86/taa/0019-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch b/debian/patches/bugfix/x86/taa/0007-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch similarity index 58% rename from debian/patches/bugfix/x86/taa/0019-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch rename to debian/patches/bugfix/x86/taa/0007-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch index 87b428e70..1f9858b16 100644 --- a/debian/patches/bugfix/x86/taa/0019-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch +++ b/debian/patches/bugfix/x86/taa/0007-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch @@ -2,6 +2,8 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 12:28:57 +0200 Subject: x86/tsx: Add "auto" option to the tsx= cmdline parameter +commit 7531a3596e3272d1f6841e0d601a614555dc6b65 upstream. + Platforms which are not affected by X86_BUG_TAA may want the TSX feature enabled. Add "auto" option to the TSX cmdline parameter. When tsx=auto disable TSX when X86_BUG_TAA is present, otherwise enable TSX. @@ -13,45 +15,30 @@ https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck -Cc: "H. Peter Anvin" -Cc: "Paul E. McKenney" -Cc: Andrew Morton -Cc: Ingo Molnar -Cc: Jonathan Corbet -Cc: Josh Poimboeuf -Cc: Juergen Gross -Cc: linux-doc@vger.kernel.org -Cc: Mark Gross -Cc: Mauro Carvalho Chehab -Cc: Thomas Gleixner -Cc: x86-ml +Reviewed-by: Josh Poimboeuf +Signed-off-by: Ben Hutchings --- - Documentation/admin-guide/kernel-parameters.txt | 5 +++++ + Documentation/admin-guide/kernel-parameters.txt | 3 +++ arch/x86/kernel/cpu/tsx.c | 7 ++++++- - 2 files changed, 11 insertions(+), 1 deletion(-) + 2 files changed, 9 insertions(+), 1 deletion(-) -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index f03756d2addb..e6a58cbbfab8 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -4715,6 +4715,11 @@ +@@ -4706,6 +4706,9 @@ + update. This new MSR allows for the reliable + deactivation of the TSX functionality.) - on - Enable TSX on the system. - off - Disable TSX on the system. + auto - Disable TSX if X86_BUG_TAA is present, + otherwise enable TSX on the system. + -+ More details on X86_BUG_TAA here: -+ Documentation/admin-guide/hw-vuln/tsx_async_abort.rst - Not specifying this option is equivalent to tsx=off. -diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c -index e5933ef50add..89ab91eacd4f 100644 + See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c -@@ -69,7 +69,7 @@ static bool __init tsx_ctrl_is_supported(void) +@@ -75,7 +75,7 @@ static bool __init tsx_ctrl_is_supported void __init tsx_init(void) { @@ -60,7 +47,7 @@ index e5933ef50add..89ab91eacd4f 100644 int ret; if (!tsx_ctrl_is_supported()) -@@ -81,6 +81,11 @@ void __init tsx_init(void) +@@ -87,6 +87,11 @@ void __init tsx_init(void) tsx_ctrl_state = TSX_CTRL_ENABLE; } else if (!strcmp(arg, "off")) { tsx_ctrl_state = TSX_CTRL_DISABLE; diff --git a/debian/patches/bugfix/x86/taa/0020-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch b/debian/patches/bugfix/x86/taa/0008-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch similarity index 87% rename from debian/patches/bugfix/x86/taa/0020-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch rename to debian/patches/bugfix/x86/taa/0008-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch index 4eafcb6c3..fb18eeed4 100644 --- a/debian/patches/bugfix/x86/taa/0020-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch +++ b/debian/patches/bugfix/x86/taa/0008-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch @@ -2,49 +2,42 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 12:32:55 +0200 Subject: x86/speculation/taa: Add documentation for TSX Async Abort +commit a7a248c593e4fd7a67c50b5f5318fe42a0db335e upstream. + Add the documenation for TSX Async Abort. Include the description of the issue, how to check the mitigation state, control the mitigation, guidance for system administrators. - [ bp: Add proper SPDX tags, touch ups. ] + [ bp: Add proper SPDX tags, touch ups by Josh and me. ] Co-developed-by: Antonio Gomez Iglesias + Signed-off-by: Pawan Gupta Signed-off-by: Antonio Gomez Iglesias Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner Reviewed-by: Mark Gross Reviewed-by: Tony Luck -Cc: Andrew Morton -Cc: Fenghua Yu -Cc: "H. Peter Anvin" -Cc: Ingo Molnar -Cc: Jonathan Corbet -Cc: Josh Poimboeuf -Cc: Juergen Gross -Cc: linux-doc@vger.kernel.org -Cc: Thomas Gleixner -Cc: x86-ml -[bwh: Forward-ported on top of NX: Fix conflict (neighbouring - insertions) in Documentation/ABI/testing/sysfs-devices-system-cpu] +Reviewed-by: Josh Poimboeuf [bwh: Backported to 4.19: adjust context] Signed-off-by: Ben Hutchings --- .../ABI/testing/sysfs-devices-system-cpu | 1 + Documentation/admin-guide/hw-vuln/index.rst | 1 + - .../admin-guide/hw-vuln/tsx_async_abort.rst | 256 ++++++++++++++++++ - .../admin-guide/kernel-parameters.txt | 36 +++ + .../admin-guide/hw-vuln/tsx_async_abort.rst | 276 ++++++++++++++++++ + .../admin-guide/kernel-parameters.txt | 38 +++ Documentation/x86/index.rst | 1 + Documentation/x86/tsx_async_abort.rst | 117 ++++++++ - 6 files changed, 412 insertions(+) + 6 files changed, 434 insertions(+) create mode 100644 Documentation/admin-guide/hw-vuln/tsx_async_abort.rst create mode 100644 Documentation/x86/tsx_async_abort.rst --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu -@@ -479,6 +479,7 @@ What: /sys/devices/system/cpu/vulnerabi +@@ -478,6 +478,7 @@ What: /sys/devices/system/cpu/vulnerabi + /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds - /sys/devices/system/cpu/vulnerabilities/itlb_multihit + /sys/devices/system/cpu/vulnerabilities/tsx_async_abort Date: January 2018 Contact: Linux kernel mailing list @@ -58,7 +51,7 @@ Signed-off-by: Ben Hutchings + tsx_async_abort --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst -@@ -0,0 +1,256 @@ +@@ -0,0 +1,276 @@ +.. SPDX-License-Identifier: GPL-2.0 + +TAA - TSX Asynchronous Abort @@ -121,7 +114,7 @@ Signed-off-by: Ben Hutchings + +The victim of a malicious actor does not need to make use of TSX. Only the +attacker needs to begin a TSX transaction and raise an asynchronous abort -+to try to leak some of data stored in the buffers. ++which in turn potenitally leaks data stored in the buffers. + +More detailed technical information is available in the TAA specific x86 +architecture section: :ref:`Documentation/x86/tsx_async_abort.rst `. @@ -133,9 +126,9 @@ Signed-off-by: Ben Hutchings +Attacks against the TAA vulnerability can be implemented from unprivileged +applications running on hosts or guests. + -+As for MDS, the attacker has no control over the memory addresses that can be -+leaked. Only the victim is responsible for bringing data to the CPU. As a -+result, the malicious actor has to first sample as much data as possible and ++As for MDS, the attacker has no control over the memory addresses that can ++be leaked. Only the victim is responsible for bringing data to the CPU. As ++a result, the malicious actor has to sample as much data as possible and +then postprocess it to try to infer any useful information from it. + +A potential attacker only has read access to the data. Also, there is no direct @@ -202,11 +195,12 @@ Signed-off-by: Ben Hutchings +Virtualization mitigation +^^^^^^^^^^^^^^^^^^^^^^^^^ + -+Affected systems where the host has the TAA microcode and the TAA mitigation is -+ON (with TSX disabled) are not vulnerable regardless of the status of the VMs. ++Affected systems where the host has TAA microcode and TAA is mitigated by ++having disabled TSX previously, are not vulnerable regardless of the status ++of the VMs. + -+In all other cases, if the host either does not have the TAA microcode or the -+kernel is not mitigated, the system might be vulnerable. ++In all other cases, if the host either does not have the TAA microcode or ++the kernel is not mitigated, the system might be vulnerable. + + +.. _taa_mitigation_control_command_line: @@ -244,11 +238,23 @@ Signed-off-by: Ben Hutchings +The valid options are: + + ============ ============================================================= -+ off Disables TSX. ++ off Disables TSX on the system. ++ ++ Note that this option takes effect only on newer CPUs which are ++ not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 ++ and which get the new IA32_TSX_CTRL MSR through a microcode ++ update. This new MSR allows for the reliable deactivation of ++ the TSX functionality. + + on Enables TSX. + -+ auto Disables TSX on affected platform, otherwise enables TSX. ++ Although there are mitigations for all known security ++ vulnerabilities, TSX has been known to be an accelerator for ++ several previous speculation-related CVEs, and so there may be ++ unknown security risks associated with leaving it enabled. ++ ++ auto Disables TSX if X86_BUG_TAA is present, otherwise enables TSX ++ on the system. + ============ ============================================================= + +Not specifying this option is equivalent to "tsx=off". @@ -256,17 +262,24 @@ Signed-off-by: Ben Hutchings +The following combinations of the "tsx_async_abort" and "tsx" are possible. For +affected platforms tsx=auto is equivalent to tsx=off and the result will be: + -+ ========= ==================== ========================================= -+ tsx=on tsx_async_abort=full The system will use VERW to clear CPU -+ buffers. -+ tsx=on tsx_async_abort=off The system is vulnerable. -+ tsx=off tsx_async_abort=full TSX is disabled. System is not vulnerable. -+ tsx=off tsx_async_abort=off TSX is disabled. System is not vulnerable. -+ ========= ==================== ========================================= ++ ========= ========================== ========================================= ++ tsx=on tsx_async_abort=full The system will use VERW to clear CPU ++ buffers. Cross-thread attacks are still ++ possible on SMT machines. ++ tsx=on tsx_async_abort=full,nosmt As above, cross-thread attacks on SMT ++ mitigated. ++ tsx=on tsx_async_abort=off The system is vulnerable. ++ tsx=off tsx_async_abort=full TSX might be disabled if microcode ++ provides a TSX control MSR. If so, ++ system is not vulnerable. ++ tsx=off tsx_async_abort=full,nosmt Ditto ++ tsx=off tsx_async_abort=off ditto ++ ========= ========================== ========================================= ++ + +For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU -+buffers. For platforms without TSX control "tsx" command line argument has no -+effect. ++buffers. For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0) ++"tsx" command line argument has no effect. + +For the affected platforms below table indicates the mitigation status for the +combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO @@ -275,7 +288,7 @@ Signed-off-by: Ben Hutchings + ======= ========= ============= ======================================== + MDS_NO MD_CLEAR TSX_CTRL_MSR Status + ======= ========= ============= ======================================== -+ 0 0 0 Vulnerable (needs ucode) ++ 0 0 0 Vulnerable (needs microcode) + 0 1 0 MDS and TAA mitigated via VERW + 1 1 0 MDS fixed, TAA vulnerable if TSX enabled + because MD_CLEAR has no meaning and @@ -317,7 +330,7 @@ Signed-off-by: Ben Hutchings + - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off). --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -2538,6 +2538,7 @@ +@@ -2521,6 +2521,7 @@ spec_store_bypass_disable=off [X86,PPC] l1tf=off [X86] mds=off [X86] @@ -325,7 +338,7 @@ Signed-off-by: Ben Hutchings auto (default) Mitigate all CPU vulnerabilities, but leave SMT -@@ -2553,6 +2554,7 @@ +@@ -2536,6 +2537,7 @@ be fully mitigated, even if it means losing SMT. Equivalent to: l1tf=flush,nosmt [X86] mds=full,nosmt [X86] @@ -333,9 +346,9 @@ Signed-off-by: Ben Hutchings mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this -@@ -4718,6 +4720,40 @@ - - Not specifying this option is equivalent to tsx=off. +@@ -4714,6 +4716,42 @@ + See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst + for more details. + tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async + Abort (TAA) vulnerability. @@ -356,6 +369,8 @@ Signed-off-by: Ben Hutchings + options are: + + full - Enable TAA mitigation on vulnerable CPUs ++ if TSX is enabled. ++ + full,nosmt - Enable TAA mitigation and disable SMT on + vulnerable CPUs. If TSX is disabled, SMT + is not disabled because CPU is not diff --git a/debian/patches/bugfix/x86/taa/0021-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch b/debian/patches/bugfix/x86/taa/0009-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch similarity index 76% rename from debian/patches/bugfix/x86/taa/0021-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch rename to debian/patches/bugfix/x86/taa/0009-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch index e3e1418a4..01800f468 100644 --- a/debian/patches/bugfix/x86/taa/0021-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch +++ b/debian/patches/bugfix/x86/taa/0009-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch @@ -2,6 +2,8 @@ From: Michal Hocko Date: Wed, 23 Oct 2019 12:35:50 +0200 Subject: x86/tsx: Add config options to set tsx=on|off|auto +commit db616173d787395787ecc93eef075fa975227b10 upstream. + There is a general consensus that TSX usage is not largely spread while the history shows there is a non trivial space for side channel attacks possible. Therefore the tsx is disabled by default even on platforms @@ -17,22 +19,20 @@ Introduce config options X86_INTEL_TSX_MODE_OFF, X86_INTEL_TSX_MODE_ON and X86_INTEL_TSX_MODE_AUTO to control the TSX feature. The config setting can be overridden by the tsx cmdline options. + [ bp: Text cleanups from Josh. ] + Suggested-by: Borislav Petkov Signed-off-by: Michal Hocko Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov -Cc: "H. Peter Anvin" -Cc: Ingo Molnar -Cc: Thomas Gleixner -Cc: Tony Luck -Cc: x86-ml +Signed-off-by: Thomas Gleixner +Reviewed-by: Josh Poimboeuf +Signed-off-by: Ben Hutchings --- arch/x86/Kconfig | 45 +++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/tsx.c | 22 +++++++++++++------ 2 files changed, 61 insertions(+), 6 deletions(-) -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index e76d16ac2776..a47343bb439c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1903,6 +1903,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS @@ -53,13 +53,13 @@ index e76d16ac2776..a47343bb439c 100644 + will be more of those attacks discovered in the future. + + Therefore TSX is not enabled by default (aka tsx=off). An admin -+ might override this decision by tsx=on command line parameter. This -+ has a risk that TSX will get enabled also on platforms which are -+ known to be vulnerable to attacks like TAA and a safer option is to -+ use tsx=auto command line parameter. ++ might override this decision by tsx=on the command line parameter. ++ Even with TSX enabled, the kernel will attempt to enable the best ++ possible TAA mitigation setting depending on the microcode available ++ for the particular machine. + -+ This options allows to set the default tsx mode between tsx=on, off -+ and auto. See Documentation/admin-guide/kernel-parameters.txt for more ++ This option allows to set the default tsx mode between tsx=on, =off ++ and =auto. See Documentation/admin-guide/kernel-parameters.txt for more + details. + + Say off if not sure, auto if TSX is in use but it should be used on safe @@ -69,29 +69,27 @@ index e76d16ac2776..a47343bb439c 100644 +config X86_INTEL_TSX_MODE_OFF + bool "off" + help -+ TSX is always disabled - equals tsx=off command line parameter. ++ TSX is disabled if possible - equals to tsx=off command line parameter. + +config X86_INTEL_TSX_MODE_ON + bool "on" + help -+ TSX is always enabled on TSX capable HW - equals tsx=on command line -+ parameter. ++ TSX is always enabled on TSX capable HW - equals the tsx=on command ++ line parameter. + +config X86_INTEL_TSX_MODE_AUTO + bool "auto" + help + TSX is enabled on TSX capable HW that is believed to be safe against -+ side channel attacks- equals tsx=auto command line parameter. ++ side channel attacks- equals the tsx=auto command line parameter. +endchoice + config EFI bool "EFI runtime service support" depends on ACPI -diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c -index 89ab91eacd4f..ab400f8bbfe1 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c -@@ -67,6 +67,14 @@ static bool __init tsx_ctrl_is_supported(void) +@@ -73,6 +73,14 @@ static bool __init tsx_ctrl_is_supported return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); } @@ -106,7 +104,7 @@ index 89ab91eacd4f..ab400f8bbfe1 100644 void __init tsx_init(void) { char arg[5] = {}; -@@ -82,17 +90,19 @@ void __init tsx_init(void) +@@ -88,17 +96,19 @@ void __init tsx_init(void) } else if (!strcmp(arg, "off")) { tsx_ctrl_state = TSX_CTRL_DISABLE; } else if (!strcmp(arg, "auto")) { diff --git a/debian/patches/series b/debian/patches/series index 30e12173f..9b65b8dc2 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -258,28 +258,30 @@ bugfix/all/ALSA-usb-audio-Fix-a-stack-buffer-overflow-bug-in-check_input_term.pa bugfix/all/vhost-make-sure-log_num-in_num.patch bugfix/x86/x86-ptrace-fix-up-botched-merge-of-spectrev1-fix.patch bugfix/all/KVM-coalesced_mmio-add-bounds-checking.patch -bugfix/x86/itlb_multihit/0001-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch -bugfix/x86/itlb_multihit/0002-kvm-Convert-kvm_lock-to-a-mutex.patch -bugfix/x86/itlb_multihit/0003-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch -bugfix/x86/itlb_multihit/0004-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch -bugfix/x86/itlb_multihit/0005-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch -bugfix/x86/itlb_multihit/0006-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch -bugfix/x86/itlb_multihit/0007-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch -bugfix/x86/itlb_multihit/0008-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch -bugfix/x86/itlb_multihit/0009-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch -bugfix/x86/itlb_multihit/0009-x86-Add-ITLB_MULTIHIT-bug-infrastructure.patch -bugfix/x86/itlb_multihit/0010-kvm-mmu-ITLB_MULTIHIT-mitigation.patch -bugfix/x86/itlb_multihit/0011-kvm-Add-helper-function-for-creating-VM-worker.patch -bugfix/x86/itlb_multihit/0012-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch -bugfix/x86/taa/0013-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch -bugfix/x86/taa/0014-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch -bugfix/x86/taa/0015-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch -bugfix/x86/taa/0016-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch -bugfix/x86/taa/0017-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch -bugfix/x86/taa/0018-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch -bugfix/x86/taa/0019-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch -bugfix/x86/taa/0020-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch -bugfix/x86/taa/0021-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch +bugfix/x86/taa/0001-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch +bugfix/x86/taa/0002-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch +bugfix/x86/taa/0003-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch +bugfix/x86/taa/0004-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch +bugfix/x86/taa/0005-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch +bugfix/x86/taa/0006-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch +bugfix/x86/taa/0007-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch +bugfix/x86/taa/0008-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch +bugfix/x86/taa/0009-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch +bugfix/x86/itlb_multihit/0010-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch +bugfix/x86/itlb_multihit/0011-kvm-Convert-kvm_lock-to-a-mutex.patch +bugfix/x86/itlb_multihit/0012-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch +bugfix/x86/itlb_multihit/0013-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch +bugfix/x86/itlb_multihit/0014-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch +bugfix/x86/itlb_multihit/0015-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch +bugfix/x86/itlb_multihit/0016-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch +bugfix/x86/itlb_multihit/0017-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch +bugfix/x86/itlb_multihit/0018-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch +bugfix/x86/itlb_multihit/0019-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch +bugfix/x86/itlb_multihit/0020-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch +bugfix/x86/itlb_multihit/0021-kvm-mmu-ITLB_MULTIHIT-mitigation.patch +bugfix/x86/itlb_multihit/0022-kvm-Add-helper-function-for-creating-VM-worker-threa.patch +bugfix/x86/itlb_multihit/0023-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch +bugfix/x86/itlb_multihit/0024-Documentation-Add-ITLB_MULTIHIT-documentation.patch # ABI maintenance debian/abi/powerpc-avoid-abi-change-for-disabling-tm.patch From be004c1b692503f24b35da3151f38116cacc2d61 Mon Sep 17 00:00:00 2001 From: Salvatore Bonaccorso Date: Fri, 8 Nov 2019 00:14:08 +0100 Subject: [PATCH 08/13] x86/speculation/taa: Fix printing of TAA_MSG_SMT on IBRS_ALL CPUs --- debian/changelog | 1 + ...taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch | 49 +++++++++++++++++++ debian/patches/series | 1 + 3 files changed, 51 insertions(+) create mode 100644 debian/patches/bugfix/x86/taa/0010-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch diff --git a/debian/changelog b/debian/changelog index e8b1dae6a..3b67206b0 100644 --- a/debian/changelog +++ b/debian/changelog @@ -10,6 +10,7 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium - x86/tsx: Add "auto" option to the tsx= cmdline parameter - x86/speculation/taa: Add documentation for TSX Async Abort - x86/tsx: Add config options to set tsx=on|off|auto + - x86/speculation/taa: Fix printing of TAA_MSG_SMT on IBRS_ALL CPUs TSX is now disabled by default; see Documentation/admin-guide/hw-vuln/tsx_async_abort.rst * [x86] KVM: Add mitigation for Machine Check Error on Page Size Change diff --git a/debian/patches/bugfix/x86/taa/0010-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch b/debian/patches/bugfix/x86/taa/0010-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch new file mode 100644 index 000000000..e2a39e5cf --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0010-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch @@ -0,0 +1,49 @@ +From: Josh Poimboeuf +Date: Wed, 6 Nov 2019 20:26:46 -0600 +Subject: x86/speculation/taa: Fix printing of TAA_MSG_SMT on IBRS_ALL CPUs +Origin: https://git.kernel.org/linus/012206a822a8b6ac09125bfaa210a95b9eb8f1c1 +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-11135 + +For new IBRS_ALL CPUs, the Enhanced IBRS check at the beginning of +cpu_bugs_smt_update() causes the function to return early, unintentionally +skipping the MDS and TAA logic. + +This is not a problem for MDS, because there appears to be no overlap +between IBRS_ALL and MDS-affected CPUs. So the MDS mitigation would be +disabled and nothing would need to be done in this function anyway. + +But for TAA, the TAA_MSG_SMT string will never get printed on Cascade +Lake and newer. + +The check is superfluous anyway: when 'spectre_v2_enabled' is +SPECTRE_V2_IBRS_ENHANCED, 'spectre_v2_user' is always +SPECTRE_V2_USER_NONE, and so the 'spectre_v2_user' switch statement +handles it appropriately by doing nothing. So just remove the check. + +Fixes: 1b42f017415b ("x86/speculation/taa: Add mitigation for TSX Async Abort") +Signed-off-by: Josh Poimboeuf +Signed-off-by: Thomas Gleixner +Reviewed-by: Tyler Hicks +Reviewed-by: Borislav Petkov +--- + arch/x86/kernel/cpu/bugs.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 8237b86ba6dc..10d11586f805 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -886,10 +886,6 @@ static void update_mds_branch_idle(void) + + void arch_smt_update(void) + { +- /* Enhanced IBRS implies STIBP. No update required. */ +- if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) +- return; +- + mutex_lock(&spec_ctrl_mutex); + + switch (spectre_v2_user) { +-- +2.24.0 + diff --git a/debian/patches/series b/debian/patches/series index 9b65b8dc2..7052e4b61 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -267,6 +267,7 @@ bugfix/x86/taa/0006-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch bugfix/x86/taa/0007-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch bugfix/x86/taa/0008-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch bugfix/x86/taa/0009-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch +bugfix/x86/taa/0010-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch bugfix/x86/itlb_multihit/0010-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch bugfix/x86/itlb_multihit/0011-kvm-Convert-kvm_lock-to-a-mutex.patch bugfix/x86/itlb_multihit/0012-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch From c2443a2e978a8b5e007b7000797fdec1e636cd0d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 9 Nov 2019 20:16:45 +0000 Subject: [PATCH 09/13] [x86] Update TAA and NX fixes to pending stable backports --- debian/changelog | 4 +- ...-kvm_mmu_page-member-to-save-8-bytes.patch | 52 ----------------- ...dd-ITLB_MULTIHIT-bug-infrastructure.patch} | 12 ++-- ...ont-to-the-cpu-vulnerability-whiteli.patch | 30 ++++++++++ ...ninline-and-export-CPU-mitigations-.patch} | 3 +- ...ion-Add-ITLB_MULTIHIT-documentation.patch} | 3 +- ...o-not-allow-clearing-largepages-deb.patch} | 8 +-- ...017-kvm-Convert-kvm_lock-to-a-mutex.patch} | 29 +++++----- ...elease-the-page-inside-mmu_set_spte.patch} | 6 +- ...E-fetch-and-__direct_map-more-simil.patch} | 5 +- ...ow-unneeded-hugepage-gfn-adjustment.patch} | 4 +- ...m_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch} | 4 +- ...points-around-__direct_map-and-FNAM.patch} | 18 +++--- ...s-run-with-EFER.NXE-1-when-shadow-p.patch} | 19 +++--- ...24-kvm-mmu-ITLB_MULTIHIT-mitigation.patch} | 52 ++++++++--------- ...nction-for-creating-VM-worker-threa.patch} | 8 +-- ...ecovery-of-shattered-NX-large-pages.patch} | 36 ++++++------ ...l-speculation-bugs-and-features-as-d.patch | 58 +++++++++++++++++++ ...2-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch} | 3 +- ...lper-function-x86_read_arch_cap_msr.patch} | 3 +- ...-cmdline-option-with-TSX-disabled-b.patch} | 11 ++-- ...aa-Add-mitigation-for-TSX-Async-Abo.patch} | 4 +- ...aa-Add-sysfs-reporting-for-TSX-Asyn.patch} | 3 +- ...S_NO-0-to-guests-when-TSX-is-enable.patch} | 11 ++-- ...option-to-the-tsx-cmdline-parameter.patch} | 3 +- ...aa-Add-documentation-for-TSX-Async-.patch} | 4 +- ...nfig-options-to-set-tsx-on-off-auto.patch} | 3 +- ...aa-Fix-printing-of-TAA_MSG_SMT-on-I.patch} | 11 +--- debian/patches/series | 51 ++++++++-------- 29 files changed, 233 insertions(+), 225 deletions(-) delete mode 100644 debian/patches/bugfix/x86/itlb_multihit/0010-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch rename debian/patches/bugfix/x86/itlb_multihit/{0019-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch => 0011-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch} (96%) create mode 100644 debian/patches/bugfix/x86/itlb_multihit/0012-x86-cpu-Add-Tremont-to-the-cpu-vulnerability-whiteli.patch rename debian/patches/bugfix/x86/itlb_multihit/{0020-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch => 0013-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch} (96%) rename debian/patches/bugfix/x86/itlb_multihit/{0024-Documentation-Add-ITLB_MULTIHIT-documentation.patch => 0014-Documentation-Add-ITLB_MULTIHIT-documentation.patch} (98%) rename debian/patches/bugfix/x86/itlb_multihit/{0017-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch => 0016-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch} (93%) rename debian/patches/bugfix/x86/itlb_multihit/{0011-kvm-Convert-kvm_lock-to-a-mutex.patch => 0017-kvm-Convert-kvm_lock-to-a-mutex.patch} (87%) rename debian/patches/bugfix/x86/itlb_multihit/{0012-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch => 0018-kvm-mmu-Do-not-release-the-page-inside-mmu_set_spte.patch} (95%) rename debian/patches/bugfix/x86/itlb_multihit/{0013-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch => 0019-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch} (97%) rename debian/patches/bugfix/x86/itlb_multihit/{0014-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch => 0020-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch} (95%) rename debian/patches/bugfix/x86/itlb_multihit/{0015-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch => 0021-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch} (91%) rename debian/patches/bugfix/x86/itlb_multihit/{0016-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch => 0022-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch} (88%) rename debian/patches/bugfix/x86/itlb_multihit/{0018-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch => 0023-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch} (79%) rename debian/patches/bugfix/x86/itlb_multihit/{0021-kvm-mmu-ITLB_MULTIHIT-mitigation.patch => 0024-kvm-mmu-ITLB_MULTIHIT-mitigation.patch} (90%) rename debian/patches/bugfix/x86/itlb_multihit/{0022-kvm-Add-helper-function-for-creating-VM-worker-threa.patch => 0025-kvm-Add-helper-function-for-creating-VM-worker-threa.patch} (94%) rename debian/patches/bugfix/x86/itlb_multihit/{0023-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch => 0026-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch} (91%) create mode 100644 debian/patches/bugfix/x86/taa/0001-KVM-x86-use-Intel-speculation-bugs-and-features-as-d.patch rename debian/patches/bugfix/x86/taa/{0001-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch => 0002-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch} (96%) rename debian/patches/bugfix/x86/taa/{0002-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch => 0003-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch} (94%) rename debian/patches/bugfix/x86/taa/{0003-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch => 0004-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch} (96%) rename debian/patches/bugfix/x86/taa/{0004-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch => 0005-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch} (98%) rename debian/patches/bugfix/x86/taa/{0005-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch => 0006-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch} (97%) rename debian/patches/bugfix/x86/taa/{0006-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch => 0007-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch} (86%) rename debian/patches/bugfix/x86/taa/{0007-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch => 0008-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch} (95%) rename debian/patches/bugfix/x86/taa/{0008-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch => 0009-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch} (99%) rename debian/patches/bugfix/x86/taa/{0009-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch => 0010-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch} (97%) rename debian/patches/bugfix/x86/taa/{0010-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch => 0015-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch} (81%) diff --git a/debian/changelog b/debian/changelog index 3b67206b0..6e50be20e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,8 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium * [x86] Add mitigation for TSX Asynchronous Abort (CVE-2019-11135): + - KVM: x86: use Intel speculation bugs and features as derived in generic + x86 code - x86/msr: Add the IA32_TSX_CTRL MSR - x86/cpu: Add a helper function x86_read_arch_cap_msr() - x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default @@ -15,7 +17,6 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium Documentation/admin-guide/hw-vuln/tsx_async_abort.rst * [x86] KVM: Add mitigation for Machine Check Error on Page Size Change (aka iTLB multi-hit, CVE-2018-12207): - - KVM: x86: adjust kvm_mmu_page member to save 8 bytes - kvm: Convert kvm_lock to a mutex - kvm: x86: Do not release the page inside mmu_set_spte() - KVM: x86: make FNAME(fetch) and __direct_map more similar @@ -26,6 +27,7 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium - KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active - x86/bugs: Add ITLB_MULTIHIT bug infrastructure - cpu/speculation: Uninline and export CPU mitigations helpers + - x86/cpu: Add Tremont to the cpu vulnerability whitelist - kvm: mmu: ITLB_MULTIHIT mitigation - kvm: Add helper function for creating VM worker threads - kvm: x86: mmu: Recovery of shattered NX large pages diff --git a/debian/patches/bugfix/x86/itlb_multihit/0010-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch b/debian/patches/bugfix/x86/itlb_multihit/0010-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch deleted file mode 100644 index 42cd1c622..000000000 --- a/debian/patches/bugfix/x86/itlb_multihit/0010-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch +++ /dev/null @@ -1,52 +0,0 @@ -From: Wei Yang -Date: Thu, 6 Sep 2018 05:58:16 +0800 -Subject: KVM: x86: adjust kvm_mmu_page member to save 8 bytes - -commit 3ff519f29d98ecdc1961d825d105d68711093b6b upstream. - -On a 64bits machine, struct is naturally aligned with 8 bytes. Since -kvm_mmu_page member *unsync* and *role* are less then 4 bytes, we can -rearrange the sequence to compace the struct. - -As the comment shows, *role* and *gfn* are used to key the shadow page. In -order to keep the comment valid, this patch moves the *unsync* up and -exchange the position of *role* and *gfn*. - -From /proc/slabinfo, it shows the size of kvm_mmu_page is 8 bytes less and -with one more object per slap after applying this patch. - - # name - kvm_mmu_page_header 0 0 168 24 - - kvm_mmu_page_header 0 0 160 25 - -Signed-off-by: Wei Yang -Signed-off-by: Paolo Bonzini -Signed-off-by: Ben Hutchings ---- - arch/x86/include/asm/kvm_host.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/arch/x86/include/asm/kvm_host.h -+++ b/arch/x86/include/asm/kvm_host.h -@@ -281,18 +281,18 @@ struct kvm_rmap_head { - struct kvm_mmu_page { - struct list_head link; - struct hlist_node hash_link; -+ bool unsync; - - /* - * The following two entries are used to key the shadow page in the - * hash table. - */ -- gfn_t gfn; - union kvm_mmu_page_role role; -+ gfn_t gfn; - - u64 *spt; - /* hold the gfn of each spte inside spt */ - gfn_t *gfns; -- bool unsync; - int root_count; /* Currently serving as active root */ - unsigned int unsync_children; - struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ diff --git a/debian/patches/bugfix/x86/itlb_multihit/0019-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch b/debian/patches/bugfix/x86/itlb_multihit/0011-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch similarity index 96% rename from debian/patches/bugfix/x86/itlb_multihit/0019-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch rename to debian/patches/bugfix/x86/itlb_multihit/0011-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch index 95c177c1a..8917da9e7 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0019-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0011-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch @@ -2,7 +2,7 @@ From: Vineela Tummalapalli Date: Mon, 4 Nov 2019 12:22:01 +0100 Subject: x86/bugs: Add ITLB_MULTIHIT bug infrastructure -commit db4d30fbb71b47e4ecb11c4efa5d8aad4b03dfae upstream. +commit db4d30fbb71b47e4ecb11c4efa5d8aad4b03dfae upstream Some processors may incur a machine check error possibly resulting in an unrecoverable CPU lockup when an instruction fetch encounters a TLB @@ -30,10 +30,6 @@ Co-developed-by: Pawan Gupta Signed-off-by: Pawan Gupta Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner -[bwh: Backported to 4.19: - - No support for X86_VENDOR_HYGON, ATOM_AIRMONT_NP - - Adjust context] -Signed-off-by: Ben Hutchings --- .../ABI/testing/sysfs-devices-system-cpu | 1 + arch/x86/include/asm/cpufeatures.h | 1 + @@ -81,7 +77,7 @@ Signed-off-by: Ben Hutchings * Not susceptible to --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c -@@ -1391,6 +1391,11 @@ static ssize_t l1tf_show_state(char *buf +@@ -1387,6 +1387,11 @@ static ssize_t l1tf_show_state(char *buf } #endif @@ -93,7 +89,7 @@ Signed-off-by: Ben Hutchings static ssize_t mds_show_state(char *buf) { if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { -@@ -1494,6 +1499,9 @@ static ssize_t cpu_show_common(struct de +@@ -1490,6 +1495,9 @@ static ssize_t cpu_show_common(struct de case X86_BUG_TAA: return tsx_async_abort_show_state(buf); @@ -103,7 +99,7 @@ Signed-off-by: Ben Hutchings default: break; } -@@ -1535,4 +1543,9 @@ ssize_t cpu_show_tsx_async_abort(struct +@@ -1531,4 +1539,9 @@ ssize_t cpu_show_tsx_async_abort(struct { return cpu_show_common(dev, attr, buf, X86_BUG_TAA); } diff --git a/debian/patches/bugfix/x86/itlb_multihit/0012-x86-cpu-Add-Tremont-to-the-cpu-vulnerability-whiteli.patch b/debian/patches/bugfix/x86/itlb_multihit/0012-x86-cpu-Add-Tremont-to-the-cpu-vulnerability-whiteli.patch new file mode 100644 index 000000000..f9237aadb --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0012-x86-cpu-Add-Tremont-to-the-cpu-vulnerability-whiteli.patch @@ -0,0 +1,30 @@ +From: Pawan Gupta +Date: Mon, 4 Nov 2019 12:22:01 +0100 +Subject: x86/cpu: Add Tremont to the cpu vulnerability whitelist + +commit cad14885a8d32c1c0d8eaa7bf5c0152a22b6080e upstream + +Add the new cpu family ATOM_TREMONT_D to the cpu vunerability +whitelist. ATOM_TREMONT_D is not affected by X86_BUG_ITLB_MULTIHIT. + +ATOM_TREMONT_D might have mitigations against other issues as well, but +only the ITLB multihit mitigation is confirmed at this point. + +Signed-off-by: Pawan Gupta +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + arch/x86/kernel/cpu/common.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1000,6 +1000,8 @@ static const __initconst struct x86_cpu_ + * good enough for our purposes. + */ + ++ VULNWL_INTEL(ATOM_TREMONT_X, NO_ITLB_MULTIHIT), ++ + /* AMD Family 0xf - 0x12 */ + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), diff --git a/debian/patches/bugfix/x86/itlb_multihit/0020-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch b/debian/patches/bugfix/x86/itlb_multihit/0013-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch similarity index 96% rename from debian/patches/bugfix/x86/itlb_multihit/0020-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch rename to debian/patches/bugfix/x86/itlb_multihit/0013-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch index 80f62ff4a..2d9aafa2e 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0020-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0013-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch @@ -2,7 +2,7 @@ From: Tyler Hicks Date: Mon, 4 Nov 2019 12:22:02 +0100 Subject: cpu/speculation: Uninline and export CPU mitigations helpers -commit 731dc9df975a5da21237a18c3384f811a7a41cc6 upstream. +commit 731dc9df975a5da21237a18c3384f811a7a41cc6 upstream A kernel module may need to check the value of the "mitigations=" kernel command line parameter as part of its setup when the module needs @@ -17,7 +17,6 @@ cpu_mitigations can be checked with the exported helper functions. Signed-off-by: Tyler Hicks Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner -Signed-off-by: Ben Hutchings --- include/linux/cpu.h | 25 ++----------------------- kernel/cpu.c | 27 ++++++++++++++++++++++++++- diff --git a/debian/patches/bugfix/x86/itlb_multihit/0024-Documentation-Add-ITLB_MULTIHIT-documentation.patch b/debian/patches/bugfix/x86/itlb_multihit/0014-Documentation-Add-ITLB_MULTIHIT-documentation.patch similarity index 98% rename from debian/patches/bugfix/x86/itlb_multihit/0024-Documentation-Add-ITLB_MULTIHIT-documentation.patch rename to debian/patches/bugfix/x86/itlb_multihit/0014-Documentation-Add-ITLB_MULTIHIT-documentation.patch index b67ee61eb..79e454194 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0024-Documentation-Add-ITLB_MULTIHIT-documentation.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0014-Documentation-Add-ITLB_MULTIHIT-documentation.patch @@ -2,7 +2,7 @@ From: "Gomez Iglesias, Antonio" Date: Mon, 4 Nov 2019 12:22:03 +0100 Subject: Documentation: Add ITLB_MULTIHIT documentation -commit 7f00cc8d4a51074eb0ad4c3f16c15757b1ddfb7d upstream. +commit 7f00cc8d4a51074eb0ad4c3f16c15757b1ddfb7d upstream Add the initial ITLB_MULTIHIT documentation. @@ -12,7 +12,6 @@ Signed-off-by: Antonio Gomez Iglesias Signed-off-by: Nelson D'Souza Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner -Signed-off-by: Ben Hutchings --- Documentation/admin-guide/hw-vuln/index.rst | 1 + .../admin-guide/hw-vuln/multihit.rst | 163 ++++++++++++++++++ diff --git a/debian/patches/bugfix/x86/itlb_multihit/0017-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch b/debian/patches/bugfix/x86/itlb_multihit/0016-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch similarity index 93% rename from debian/patches/bugfix/x86/itlb_multihit/0017-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch rename to debian/patches/bugfix/x86/itlb_multihit/0016-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch index 625e3daf3..cf84618ee 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0017-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0016-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch @@ -1,8 +1,8 @@ From: Paolo Bonzini -Date: Mon, 30 Sep 2019 18:48:44 +0200 +Date: Fri, 11 Oct 2019 11:59:48 +0200 Subject: kvm: x86, powerpc: do not allow clearing largepages debugfs entry -commit 833b45de69a6016c4b0cebe6765d526a31a81580 upstream. +commit 833b45de69a6016c4b0cebe6765d526a31a81580 upstream The largepages debugfs entry is incremented/decremented as shadow pages are created or destroyed. Clearing it will result in an @@ -11,8 +11,8 @@ misinterpreted by tools that use debugfs information), so make this particular statistic read-only. Signed-off-by: Paolo Bonzini -[bwh: Backported to 4.19: drop powerpc changes and the Cc to kvm-ppc] -Signed-off-by: Ben Hutchings +Signed-off-by: Thomas Gleixner +Cc: kvm-ppc@vger.kernel.org --- arch/x86/kvm/x86.c | 6 +++--- include/linux/kvm_host.h | 2 ++ diff --git a/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Convert-kvm_lock-to-a-mutex.patch b/debian/patches/bugfix/x86/itlb_multihit/0017-kvm-Convert-kvm_lock-to-a-mutex.patch similarity index 87% rename from debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Convert-kvm_lock-to-a-mutex.patch rename to debian/patches/bugfix/x86/itlb_multihit/0017-kvm-Convert-kvm_lock-to-a-mutex.patch index 53466c0e4..e5b02c07d 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0011-kvm-Convert-kvm_lock-to-a-mutex.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0017-kvm-Convert-kvm_lock-to-a-mutex.patch @@ -2,7 +2,7 @@ From: Junaid Shahid Date: Thu, 3 Jan 2019 17:14:28 -0800 Subject: kvm: Convert kvm_lock to a mutex -commit 0d9ce162cf46c99628cc5da9510b959c7976735b upstream. +commit 0d9ce162cf46c99628cc5da9510b959c7976735b upstream It doesn't seem as if there is any particular need for kvm_lock to be a spinlock, so convert the lock to a mutex so that sleepable functions (in @@ -10,8 +10,7 @@ particular cond_resched()) can be called while holding it. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini -[bwh: Backported to 4.19: adjust context] -Signed-off-by: Ben Hutchings +Signed-off-by: Thomas Gleixner --- Documentation/virtual/kvm/locking.txt | 4 +--- arch/s390/kvm/kvm-s390.c | 4 ++-- @@ -81,7 +80,7 @@ Signed-off-by: Ben Hutchings --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -6490,7 +6490,7 @@ static void kvm_hyperv_tsc_notifier(void +@@ -6498,7 +6498,7 @@ static void kvm_hyperv_tsc_notifier(void struct kvm_vcpu *vcpu; int cpu; @@ -90,7 +89,7 @@ Signed-off-by: Ben Hutchings list_for_each_entry(kvm, &vm_list, vm_list) kvm_make_mclock_inprogress_request(kvm); -@@ -6516,7 +6516,7 @@ static void kvm_hyperv_tsc_notifier(void +@@ -6524,7 +6524,7 @@ static void kvm_hyperv_tsc_notifier(void spin_unlock(&ka->pvclock_gtod_sync_lock); } @@ -99,7 +98,7 @@ Signed-off-by: Ben Hutchings } #endif -@@ -6574,17 +6574,17 @@ static int kvmclock_cpufreq_notifier(str +@@ -6582,17 +6582,17 @@ static int kvmclock_cpufreq_notifier(str smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); @@ -120,7 +119,7 @@ Signed-off-by: Ben Hutchings if (freq->old < freq->new && send_ipi) { /* -@@ -6710,12 +6710,12 @@ static void pvclock_gtod_update_fn(struc +@@ -6718,12 +6718,12 @@ static void pvclock_gtod_update_fn(struc struct kvm_vcpu *vcpu; int i; @@ -157,7 +156,7 @@ Signed-off-by: Ben Hutchings static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); -@@ -684,9 +684,9 @@ static struct kvm *kvm_create_vm(unsigne +@@ -685,9 +685,9 @@ static struct kvm *kvm_create_vm(unsigne if (r) goto out_err; @@ -169,7 +168,7 @@ Signed-off-by: Ben Hutchings preempt_notifier_inc(); -@@ -732,9 +732,9 @@ static void kvm_destroy_vm(struct kvm *k +@@ -733,9 +733,9 @@ static void kvm_destroy_vm(struct kvm *k kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); kvm_destroy_vm_debugfs(kvm); kvm_arch_sync_events(kvm); @@ -181,7 +180,7 @@ Signed-off-by: Ben Hutchings kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { struct kvm_io_bus *bus = kvm_get_bus(kvm, i); -@@ -3828,13 +3828,13 @@ static int vm_stat_get(void *_offset, u6 +@@ -3831,13 +3831,13 @@ static int vm_stat_get(void *_offset, u6 u64 tmp_val; *val = 0; @@ -197,7 +196,7 @@ Signed-off-by: Ben Hutchings return 0; } -@@ -3847,12 +3847,12 @@ static int vm_stat_clear(void *_offset, +@@ -3850,12 +3850,12 @@ static int vm_stat_clear(void *_offset, if (val) return -EINVAL; @@ -212,7 +211,7 @@ Signed-off-by: Ben Hutchings return 0; } -@@ -3867,13 +3867,13 @@ static int vcpu_stat_get(void *_offset, +@@ -3870,13 +3870,13 @@ static int vcpu_stat_get(void *_offset, u64 tmp_val; *val = 0; @@ -228,7 +227,7 @@ Signed-off-by: Ben Hutchings return 0; } -@@ -3886,12 +3886,12 @@ static int vcpu_stat_clear(void *_offset +@@ -3889,12 +3889,12 @@ static int vcpu_stat_clear(void *_offset if (val) return -EINVAL; @@ -243,7 +242,7 @@ Signed-off-by: Ben Hutchings return 0; } -@@ -3912,7 +3912,7 @@ static void kvm_uevent_notify_change(uns +@@ -3915,7 +3915,7 @@ static void kvm_uevent_notify_change(uns if (!kvm_dev.this_device || !kvm) return; @@ -252,7 +251,7 @@ Signed-off-by: Ben Hutchings if (type == KVM_EVENT_CREATE_VM) { kvm_createvm_count++; kvm_active_vms++; -@@ -3921,7 +3921,7 @@ static void kvm_uevent_notify_change(uns +@@ -3924,7 +3924,7 @@ static void kvm_uevent_notify_change(uns } created = kvm_createvm_count; active = kvm_active_vms; diff --git a/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch b/debian/patches/bugfix/x86/itlb_multihit/0018-kvm-mmu-Do-not-release-the-page-inside-mmu_set_spte.patch similarity index 95% rename from debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch rename to debian/patches/bugfix/x86/itlb_multihit/0018-kvm-mmu-Do-not-release-the-page-inside-mmu_set_spte.patch index a5373b806..09f40e362 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0012-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0018-kvm-mmu-Do-not-release-the-page-inside-mmu_set_spte.patch @@ -1,8 +1,8 @@ From: Junaid Shahid Date: Thu, 3 Jan 2019 16:22:21 -0800 -Subject: kvm: x86: Do not release the page inside mmu_set_spte() +Subject: kvm: mmu: Do not release the page inside mmu_set_spte() -commit 43fdcda96e2550c6d1c46fb8a78801aa2f7276ed upstream. +commit 43fdcda96e2550c6d1c46fb8a78801aa2f7276ed upstream Release the page at the call-site where it was originally acquired. This makes the exit code cleaner for most call sites, since they @@ -11,7 +11,7 @@ label. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini -Signed-off-by: Ben Hutchings +Signed-off-by: Thomas Gleixner --- arch/x86/kvm/mmu.c | 18 +++++++----------- arch/x86/kvm/paging_tmpl.h | 8 +++----- diff --git a/debian/patches/bugfix/x86/itlb_multihit/0013-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch b/debian/patches/bugfix/x86/itlb_multihit/0019-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch similarity index 97% rename from debian/patches/bugfix/x86/itlb_multihit/0013-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch rename to debian/patches/bugfix/x86/itlb_multihit/0019-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch index 143dd440a..4803be6a9 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0013-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0019-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch @@ -2,7 +2,7 @@ From: Paolo Bonzini Date: Mon, 24 Jun 2019 13:06:21 +0200 Subject: KVM: x86: make FNAME(fetch) and __direct_map more similar -commit 3fcf2d1bdeb6a513523cb2c77012a6b047aa859c upstream. +commit 3fcf2d1bdeb6a513523cb2c77012a6b047aa859c upstream These two functions are basically doing the same thing through kvm_mmu_get_page, link_shadow_page and mmu_set_spte; yet, for historical @@ -11,8 +11,7 @@ best of each and make them very similar, so that it is easy to understand changes that apply to both of them. Signed-off-by: Paolo Bonzini -[bwh: Backported to 4.19: adjust context] -Signed-off-by: Ben Hutchings +Signed-off-by: Thomas Gleixner --- arch/x86/kvm/mmu.c | 53 ++++++++++++++++++-------------------- arch/x86/kvm/paging_tmpl.h | 30 ++++++++++----------- diff --git a/debian/patches/bugfix/x86/itlb_multihit/0014-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch b/debian/patches/bugfix/x86/itlb_multihit/0020-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch similarity index 95% rename from debian/patches/bugfix/x86/itlb_multihit/0014-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch rename to debian/patches/bugfix/x86/itlb_multihit/0020-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch index 89bb42d1b..da7810bd7 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0014-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0020-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch @@ -2,14 +2,14 @@ From: Paolo Bonzini Date: Sun, 23 Jun 2019 19:15:49 +0200 Subject: KVM: x86: remove now unneeded hugepage gfn adjustment -commit d679b32611c0102ce33b9e1a4e4b94854ed1812a upstream. +commit d679b32611c0102ce33b9e1a4e4b94854ed1812a upstream After the previous patch, the low bits of the gfn are masked in both FNAME(fetch) and __direct_map, so we do not need to clear them in transparent_hugepage_adjust. Signed-off-by: Paolo Bonzini -Signed-off-by: Ben Hutchings +Signed-off-by: Thomas Gleixner --- arch/x86/kvm/mmu.c | 9 +++------ arch/x86/kvm/paging_tmpl.h | 2 +- diff --git a/debian/patches/bugfix/x86/itlb_multihit/0015-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch b/debian/patches/bugfix/x86/itlb_multihit/0021-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch similarity index 91% rename from debian/patches/bugfix/x86/itlb_multihit/0015-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch rename to debian/patches/bugfix/x86/itlb_multihit/0021-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch index 41ccc7290..c25a411fa 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0015-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0021-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch @@ -2,7 +2,7 @@ From: Paolo Bonzini Date: Sun, 30 Jun 2019 08:36:21 -0400 Subject: KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON -commit e9f2a760b158551bfbef6db31d2cae45ab8072e5 upstream. +commit e9f2a760b158551bfbef6db31d2cae45ab8072e5 upstream Note that in such a case it is quite likely that KVM will BUG_ON in __pte_list_remove when the VM is closed. However, there is no @@ -10,7 +10,7 @@ immediate risk of memory corruption in the host so a WARN_ON is enough and it lets you gather traces for debugging. Signed-off-by: Paolo Bonzini -Signed-off-by: Ben Hutchings +Signed-off-by: Thomas Gleixner --- arch/x86/kvm/mmu.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/debian/patches/bugfix/x86/itlb_multihit/0016-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch b/debian/patches/bugfix/x86/itlb_multihit/0022-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch similarity index 88% rename from debian/patches/bugfix/x86/itlb_multihit/0016-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch rename to debian/patches/bugfix/x86/itlb_multihit/0022-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch index d7369b867..7d0d7e42a 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0016-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0022-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch @@ -1,19 +1,18 @@ From: Paolo Bonzini -Date: Mon, 1 Jul 2019 06:22:57 -0400 +Date: Thu, 4 Jul 2019 05:14:13 -0400 Subject: KVM: x86: add tracepoints around __direct_map and FNAME(fetch) -commit 335e192a3fa415e1202c8b9ecdaaecd643f823cc upstream. +commit 335e192a3fa415e1202c8b9ecdaaecd643f823cc upstream These are useful in debugging shadow paging. Signed-off-by: Paolo Bonzini -[bwh: Backported to 4.19: adjust context] -Signed-off-by: Ben Hutchings +Signed-off-by: Thomas Gleixner --- - arch/x86/kvm/mmu.c | 13 ++++----- + arch/x86/kvm/mmu.c | 14 ++++----- arch/x86/kvm/mmutrace.h | 59 ++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/paging_tmpl.h | 2 ++ - 3 files changed, 67 insertions(+), 7 deletions(-) + 3 files changed, 68 insertions(+), 7 deletions(-) --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -27,7 +26,7 @@ Signed-off-by: Ben Hutchings #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) -@@ -261,9 +258,13 @@ static u64 __read_mostly shadow_nonprese +@@ -261,9 +258,14 @@ static u64 __read_mostly shadow_nonprese static void mmu_spte_set(u64 *sptep, u64 spte); @@ -37,11 +36,12 @@ Signed-off-by: Ben Hutchings +#define CREATE_TRACE_POINTS +#include "mmutrace.h" ++ + void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) { BUG_ON((mmio_mask & mmio_value) != mmio_value); -@@ -2992,10 +2993,7 @@ static int mmu_set_spte(struct kvm_vcpu +@@ -2992,10 +2994,7 @@ static int mmu_set_spte(struct kvm_vcpu ret = RET_PF_EMULATE; pgprintk("%s: setting spte %llx\n", __func__, *sptep); @@ -53,7 +53,7 @@ Signed-off-by: Ben Hutchings if (!was_rmapped && is_large_pte(*sptep)) ++vcpu->kvm->stat.lpages; -@@ -3106,6 +3104,7 @@ static int __direct_map(struct kvm_vcpu +@@ -3106,6 +3105,7 @@ static int __direct_map(struct kvm_vcpu if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return RET_PF_RETRY; diff --git a/debian/patches/bugfix/x86/itlb_multihit/0018-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch b/debian/patches/bugfix/x86/itlb_multihit/0023-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch similarity index 79% rename from debian/patches/bugfix/x86/itlb_multihit/0018-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch rename to debian/patches/bugfix/x86/itlb_multihit/0023-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch index b4b534e23..ee63fbe1e 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0018-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0023-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch @@ -1,24 +1,23 @@ From: Paolo Bonzini -Date: Sun, 27 Oct 2019 16:23:23 +0100 +Date: Sun, 27 Oct 2019 09:36:37 +0100 Subject: KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active -commit 9167ab79936206118cc60e47dcb926c3489f3bd5 upstream. +commit 9167ab79936206118cc60e47dcb926c3489f3bd5 upstream VMX already does so if the host has SMEP, in order to support the combination of CR0.WP=1 and CR4.SMEP=1. However, it is perfectly safe to always do so, and in -fact VMX already ends up running with EFER.NXE=1 on old processors that lack the -"load EFER" controls, because it may help avoiding a slow MSR write. Removing -all the conditionals simplifies the code. +fact VMX also ends up running with EFER.NXE=1 on old processors that lack the +"load EFER" controls, because it may help avoiding a slow MSR write. SVM does not have similar code, but it should since recent AMD processors do -support SMEP. So this patch also makes the code for the two vendors more similar -while fixing NPT=0, CR0.WP=1 and CR4.SMEP=1 on AMD processors. +support SMEP. So this patch makes the code for the two vendors simpler and +more similar, while fixing an issue with CR0.WP=1 and CR4.SMEP=1 on AMD. -Cc: Joerg Roedel Signed-off-by: Paolo Bonzini -[bwh: Backported to 4.19: adjust filename] -Signed-off-by: Ben Hutchings +Signed-off-by: Thomas Gleixner +Cc: Joerg Roedel +Cc: stable@vger.kernel.org --- arch/x86/kvm/svm.c | 10 ++++++++-- arch/x86/kvm/vmx.c | 14 +++----------- diff --git a/debian/patches/bugfix/x86/itlb_multihit/0021-kvm-mmu-ITLB_MULTIHIT-mitigation.patch b/debian/patches/bugfix/x86/itlb_multihit/0024-kvm-mmu-ITLB_MULTIHIT-mitigation.patch similarity index 90% rename from debian/patches/bugfix/x86/itlb_multihit/0021-kvm-mmu-ITLB_MULTIHIT-mitigation.patch rename to debian/patches/bugfix/x86/itlb_multihit/0024-kvm-mmu-ITLB_MULTIHIT-mitigation.patch index c2e8c3b06..d97596d8b 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0021-kvm-mmu-ITLB_MULTIHIT-mitigation.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0024-kvm-mmu-ITLB_MULTIHIT-mitigation.patch @@ -2,7 +2,7 @@ From: Paolo Bonzini Date: Mon, 4 Nov 2019 12:22:02 +0100 Subject: kvm: mmu: ITLB_MULTIHIT mitigation -commit b8e8c8303ff28c61046a4d0f6ea99aea609a7dc0 upstream. +commit b8e8c8303ff28c61046a4d0f6ea99aea609a7dc0 upstream With some Intel processors, putting the same virtual address in the TLB as both a 4 KiB and 2 MiB page can confuse the instruction fetch unit @@ -26,10 +26,8 @@ and direct EPT is treated in the same way. Originally-by: Junaid Shahid Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner -[bwh: Backported to 4.19: - - Use kvm_mmu_invalidate_zap_all_pages() instead of kvm_mmu_zap_all_fast() - - Adjust context] -Signed-off-by: Ben Hutchings +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner --- .../admin-guide/kernel-parameters.txt | 19 +++ arch/x86/include/asm/kvm_host.h | 2 + @@ -76,14 +74,14 @@ Signed-off-by: Ben Hutchings Mitigate all CPU vulnerabilities, but leave SMT --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h -@@ -282,6 +282,7 @@ struct kvm_mmu_page { - struct list_head link; - struct hlist_node hash_link; +@@ -293,6 +293,7 @@ struct kvm_mmu_page { + /* hold the gfn of each spte inside spt */ + gfn_t *gfns; bool unsync; + bool lpage_disallowed; /* Can't be replaced by an equiv large page */ - - /* - * The following two entries are used to key the shadow page in the + int root_count; /* Currently serving as active root */ + unsigned int unsync_children; + struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ @@ -887,6 +888,7 @@ struct kvm_vm_stat { ulong mmu_unsync; ulong remote_tlb_flush; @@ -94,7 +92,7 @@ Signed-off-by: Ben Hutchings --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c -@@ -1229,6 +1229,9 @@ void x86_spec_ctrl_setup_ap(void) +@@ -1225,6 +1225,9 @@ void x86_spec_ctrl_setup_ap(void) x86_amd_ssb_disable(); } @@ -104,7 +102,7 @@ Signed-off-by: Ben Hutchings #undef pr_fmt #define pr_fmt(fmt) "L1TF: " fmt -@@ -1384,17 +1387,25 @@ static ssize_t l1tf_show_state(char *buf +@@ -1380,17 +1383,25 @@ static ssize_t l1tf_show_state(char *buf l1tf_vmx_states[l1tf_vmx_mitigation], sched_smt_active() ? "vulnerable" : "disabled"); } @@ -154,7 +152,7 @@ Signed-off-by: Ben Hutchings /* * When setting this variable to true it enables Two-Dimensional-Paging * where the hardware walks 2 page tables: -@@ -284,6 +298,11 @@ static inline bool spte_ad_enabled(u64 s +@@ -285,6 +299,11 @@ static inline bool spte_ad_enabled(u64 s return !(spte & shadow_acc_track_value); } @@ -166,7 +164,7 @@ Signed-off-by: Ben Hutchings static inline u64 spte_shadow_accessed_mask(u64 spte) { MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value); -@@ -1096,6 +1115,15 @@ static void account_shadowed(struct kvm +@@ -1097,6 +1116,15 @@ static void account_shadowed(struct kvm kvm_mmu_gfn_disallow_lpage(slot, gfn); } @@ -182,7 +180,7 @@ Signed-off-by: Ben Hutchings static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memslots *slots; -@@ -1113,6 +1141,12 @@ static void unaccount_shadowed(struct kv +@@ -1114,6 +1142,12 @@ static void unaccount_shadowed(struct kv kvm_mmu_gfn_allow_lpage(slot, gfn); } @@ -195,7 +193,7 @@ Signed-off-by: Ben Hutchings static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, struct kvm_memory_slot *slot) { -@@ -2665,6 +2699,9 @@ static int kvm_mmu_prepare_zap_page(stru +@@ -2666,6 +2700,9 @@ static int kvm_mmu_prepare_zap_page(stru kvm_reload_remote_mmus(kvm); } @@ -205,7 +203,7 @@ Signed-off-by: Ben Hutchings sp->role.invalid = 1; return ret; } -@@ -2873,6 +2910,11 @@ static int set_spte(struct kvm_vcpu *vcp +@@ -2874,6 +2911,11 @@ static int set_spte(struct kvm_vcpu *vcp if (!speculative) spte |= spte_shadow_accessed_mask(spte); @@ -217,7 +215,7 @@ Signed-off-by: Ben Hutchings if (pte_access & ACC_EXEC_MASK) spte |= shadow_x_mask; else -@@ -3091,9 +3133,32 @@ static void direct_pte_prefetch(struct k +@@ -3092,9 +3134,32 @@ static void direct_pte_prefetch(struct k __direct_pte_prefetch(vcpu, sp, sptep); } @@ -251,7 +249,7 @@ Signed-off-by: Ben Hutchings { struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; -@@ -3106,6 +3171,12 @@ static int __direct_map(struct kvm_vcpu +@@ -3107,6 +3172,12 @@ static int __direct_map(struct kvm_vcpu trace_kvm_mmu_spte_requested(gpa, level, pfn); for_each_shadow_entry(vcpu, gpa, it) { @@ -264,7 +262,7 @@ Signed-off-by: Ben Hutchings base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); if (it.level == level) break; -@@ -3116,6 +3187,8 @@ static int __direct_map(struct kvm_vcpu +@@ -3117,6 +3188,8 @@ static int __direct_map(struct kvm_vcpu it.level - 1, true, ACC_ALL); link_shadow_page(vcpu, it.sptep, sp); @@ -273,7 +271,7 @@ Signed-off-by: Ben Hutchings } } -@@ -3416,11 +3489,14 @@ static int nonpaging_map(struct kvm_vcpu +@@ -3417,11 +3490,14 @@ static int nonpaging_map(struct kvm_vcpu { int r; int level; @@ -289,7 +287,7 @@ Signed-off-by: Ben Hutchings level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { /* -@@ -3454,7 +3530,8 @@ static int nonpaging_map(struct kvm_vcpu +@@ -3455,7 +3531,8 @@ static int nonpaging_map(struct kvm_vcpu goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); @@ -299,7 +297,7 @@ Signed-off-by: Ben Hutchings out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); -@@ -4048,6 +4125,8 @@ static int tdp_page_fault(struct kvm_vcp +@@ -4049,6 +4126,8 @@ static int tdp_page_fault(struct kvm_vcp unsigned long mmu_seq; int write = error_code & PFERR_WRITE_MASK; bool map_writable; @@ -308,7 +306,7 @@ Signed-off-by: Ben Hutchings MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); -@@ -4058,8 +4137,9 @@ static int tdp_page_fault(struct kvm_vcp +@@ -4059,8 +4138,9 @@ static int tdp_page_fault(struct kvm_vcp if (r) return r; @@ -320,7 +318,7 @@ Signed-off-by: Ben Hutchings level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { if (level > PT_DIRECTORY_LEVEL && -@@ -4088,7 +4168,8 @@ static int tdp_page_fault(struct kvm_vcp +@@ -4089,7 +4169,8 @@ static int tdp_page_fault(struct kvm_vcp goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); @@ -330,7 +328,7 @@ Signed-off-by: Ben Hutchings out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); -@@ -5886,10 +5967,58 @@ static void mmu_destroy_caches(void) +@@ -5887,10 +5968,58 @@ static void mmu_destroy_caches(void) kmem_cache_destroy(mmu_page_header_cache); } diff --git a/debian/patches/bugfix/x86/itlb_multihit/0022-kvm-Add-helper-function-for-creating-VM-worker-threa.patch b/debian/patches/bugfix/x86/itlb_multihit/0025-kvm-Add-helper-function-for-creating-VM-worker-threa.patch similarity index 94% rename from debian/patches/bugfix/x86/itlb_multihit/0022-kvm-Add-helper-function-for-creating-VM-worker-threa.patch rename to debian/patches/bugfix/x86/itlb_multihit/0025-kvm-Add-helper-function-for-creating-VM-worker-threa.patch index f966978a5..7396e3992 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0022-kvm-Add-helper-function-for-creating-VM-worker-threa.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0025-kvm-Add-helper-function-for-creating-VM-worker-threa.patch @@ -1,8 +1,8 @@ From: Junaid Shahid -Date: Mon, 4 Nov 2019 12:22:02 +0100 +Date: Fri, 1 Nov 2019 00:14:08 +0100 Subject: kvm: Add helper function for creating VM worker threads -commit c57c80467f90e5504c8df9ad3555d2c78800bf94 upstream. +commit c57c80467f90e5504c8df9ad3555d2c78800bf94 upstream Add a function to create a kernel thread associated with a given VM. In particular, it ensures that the worker thread inherits the priority and @@ -11,8 +11,8 @@ cgroups of the calling thread. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner -[bwh: Backported to 4.19: adjust context] -Signed-off-by: Ben Hutchings +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner --- include/linux/kvm_host.h | 6 +++ virt/kvm/kvm_main.c | 84 ++++++++++++++++++++++++++++++++++++++++ diff --git a/debian/patches/bugfix/x86/itlb_multihit/0023-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch b/debian/patches/bugfix/x86/itlb_multihit/0026-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch similarity index 91% rename from debian/patches/bugfix/x86/itlb_multihit/0023-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch rename to debian/patches/bugfix/x86/itlb_multihit/0026-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch index f732d7fcb..af180b791 100644 --- a/debian/patches/bugfix/x86/itlb_multihit/0023-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch +++ b/debian/patches/bugfix/x86/itlb_multihit/0026-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch @@ -1,8 +1,8 @@ From: Junaid Shahid -Date: Mon, 4 Nov 2019 12:22:03 +0100 +Date: Fri, 1 Nov 2019 00:14:14 +0100 Subject: kvm: x86: mmu: Recovery of shattered NX large pages -commit 1aa9b9572b10529c2e64e2b8f44025d86e124308 upstream. +commit 1aa9b9572b10529c2e64e2b8f44025d86e124308 upstream The page table pages corresponding to broken down large pages are zapped in FIFO order, so that the large page can potentially be recovered, if it is @@ -15,10 +15,8 @@ reaches a steady state. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner -[bwh: Backported to 4.19: - - Update another error path in kvm_create_vm() to use out_err_no_mmu_notifier - - Adjust context] -Signed-off-by: Ben Hutchings +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner --- .../admin-guide/kernel-parameters.txt | 6 + arch/x86/include/asm/kvm_host.h | 4 + @@ -45,16 +43,15 @@ Signed-off-by: Ben Hutchings --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h -@@ -281,6 +281,8 @@ struct kvm_rmap_head { +@@ -281,6 +281,7 @@ struct kvm_rmap_head { struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; + struct list_head lpage_disallowed_link; -+ - bool unsync; - bool lpage_disallowed; /* Can't be replaced by an equiv large page */ -@@ -805,6 +807,7 @@ struct kvm_arch { + /* + * The following two entries are used to key the shadow page in the +@@ -805,6 +806,7 @@ struct kvm_arch { */ struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; @@ -62,10 +59,11 @@ Signed-off-by: Ben Hutchings struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; -@@ -875,6 +878,7 @@ struct kvm_arch { +@@ -875,6 +877,8 @@ struct kvm_arch { bool x2apic_broadcast_quirk_disabled; bool guest_can_read_msr_platform_info; ++ + struct task_struct *nx_lpage_recovery_thread; }; @@ -107,7 +105,7 @@ Signed-off-by: Ben Hutchings /* * When setting this variable to true it enables Two-Dimensional-Paging -@@ -1121,6 +1132,8 @@ static void account_huge_nx_page(struct +@@ -1122,6 +1133,8 @@ static void account_huge_nx_page(struct return; ++kvm->stat.nx_lpage_splits; @@ -116,7 +114,7 @@ Signed-off-by: Ben Hutchings sp->lpage_disallowed = true; } -@@ -1145,6 +1158,7 @@ static void unaccount_huge_nx_page(struc +@@ -1146,6 +1159,7 @@ static void unaccount_huge_nx_page(struc { --kvm->stat.nx_lpage_splits; sp->lpage_disallowed = false; @@ -124,7 +122,7 @@ Signed-off-by: Ben Hutchings } static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, -@@ -6005,6 +6019,8 @@ static int set_nx_huge_pages(const char +@@ -6006,6 +6020,8 @@ static int set_nx_huge_pages(const char idx = srcu_read_lock(&kvm->srcu); kvm_mmu_invalidate_zap_all_pages(kvm); srcu_read_unlock(&kvm->srcu, idx); @@ -133,7 +131,7 @@ Signed-off-by: Ben Hutchings } mutex_unlock(&kvm_lock); } -@@ -6086,3 +6102,116 @@ void kvm_mmu_module_exit(void) +@@ -6087,3 +6103,116 @@ void kvm_mmu_module_exit(void) unregister_shrinker(&mmu_shrinker); mmu_audit_disable(); } @@ -263,7 +261,7 @@ Signed-off-by: Ben Hutchings #endif --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -8952,6 +8952,7 @@ int kvm_arch_init_vm(struct kvm *kvm, un +@@ -8960,6 +8960,7 @@ int kvm_arch_init_vm(struct kvm *kvm, un INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); @@ -271,7 +269,7 @@ Signed-off-by: Ben Hutchings INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); atomic_set(&kvm->arch.noncoherent_dma_count, 0); -@@ -8983,6 +8984,11 @@ int kvm_arch_init_vm(struct kvm *kvm, un +@@ -8991,6 +8992,11 @@ int kvm_arch_init_vm(struct kvm *kvm, un return 0; } @@ -283,7 +281,7 @@ Signed-off-by: Ben Hutchings static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { vcpu_load(vcpu); -@@ -9084,6 +9090,11 @@ int x86_set_memory_region(struct kvm *kv +@@ -9092,6 +9098,11 @@ int x86_set_memory_region(struct kvm *kv } EXPORT_SYMBOL_GPL(x86_set_memory_region); diff --git a/debian/patches/bugfix/x86/taa/0001-KVM-x86-use-Intel-speculation-bugs-and-features-as-d.patch b/debian/patches/bugfix/x86/taa/0001-KVM-x86-use-Intel-speculation-bugs-and-features-as-d.patch new file mode 100644 index 000000000..605dbae1d --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0001-KVM-x86-use-Intel-speculation-bugs-and-features-as-d.patch @@ -0,0 +1,58 @@ +From: Paolo Bonzini +Date: Mon, 19 Aug 2019 17:24:07 +0200 +Subject: KVM: x86: use Intel speculation bugs and features as derived in + generic x86 code + +commit 0c54914d0c52a15db9954a76ce80fee32cf318f4 upstream + +Similar to AMD bits, set the Intel bits from the vendor-independent +feature and bug flags, because KVM_GET_SUPPORTED_CPUID does not care +about the vendor and they should be set on AMD processors as well. + +Suggested-by: Jim Mattson +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + arch/x86/kvm/cpuid.c | 8 ++++++++ + arch/x86/kvm/x86.c | 8 ++++++++ + 2 files changed, 16 insertions(+) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -501,8 +501,16 @@ static inline int __do_cpuid_ent(struct + /* PKU is not yet implemented for shadow paging. */ + if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) + entry->ecx &= ~F(PKU); ++ + entry->edx &= kvm_cpuid_7_0_edx_x86_features; + cpuid_mask(&entry->edx, CPUID_7_EDX); ++ if (boot_cpu_has(X86_FEATURE_IBPB) && ++ boot_cpu_has(X86_FEATURE_IBRS)) ++ entry->edx |= F(SPEC_CTRL); ++ if (boot_cpu_has(X86_FEATURE_STIBP)) ++ entry->edx |= F(INTEL_STIBP); ++ if (boot_cpu_has(X86_FEATURE_SSBD)) ++ entry->edx |= F(SPEC_CTRL_SSBD); + /* + * We emulate ARCH_CAPABILITIES in software even + * if the host doesn't support it. +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1127,8 +1127,16 @@ u64 kvm_get_arch_capabilities(void) + if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER) + data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; + ++ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) ++ data |= ARCH_CAP_RDCL_NO; ++ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) ++ data |= ARCH_CAP_SSB_NO; ++ if (!boot_cpu_has_bug(X86_BUG_MDS)) ++ data |= ARCH_CAP_MDS_NO; ++ + return data; + } ++ + EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); + + static int kvm_get_msr_feature(struct kvm_msr_entry *msr) diff --git a/debian/patches/bugfix/x86/taa/0001-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch b/debian/patches/bugfix/x86/taa/0002-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch similarity index 96% rename from debian/patches/bugfix/x86/taa/0001-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch rename to debian/patches/bugfix/x86/taa/0002-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch index 6e8df2a71..ca6ffcec0 100644 --- a/debian/patches/bugfix/x86/taa/0001-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch +++ b/debian/patches/bugfix/x86/taa/0002-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch @@ -2,7 +2,7 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 10:45:50 +0200 Subject: x86/msr: Add the IA32_TSX_CTRL MSR -commit c2955f270a84762343000f103e0640d29c7a96f3 upstream. +commit c2955f270a84762343000f103e0640d29c7a96f3 upstream Transactional Synchronization Extensions (TSX) may be used on certain processors as part of a speculative side channel attack. A microcode @@ -52,7 +52,6 @@ Tested-by: Neelima Krishnan Reviewed-by: Mark Gross Reviewed-by: Tony Luck Reviewed-by: Josh Poimboeuf -Signed-off-by: Ben Hutchings --- arch/x86/include/asm/msr-index.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/debian/patches/bugfix/x86/taa/0002-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch b/debian/patches/bugfix/x86/taa/0003-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch similarity index 94% rename from debian/patches/bugfix/x86/taa/0002-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch rename to debian/patches/bugfix/x86/taa/0003-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch index f159a3c56..29cd36f2c 100644 --- a/debian/patches/bugfix/x86/taa/0002-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch +++ b/debian/patches/bugfix/x86/taa/0003-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch @@ -2,7 +2,7 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 10:52:35 +0200 Subject: x86/cpu: Add a helper function x86_read_arch_cap_msr() -commit 286836a70433fb64131d2590f4bf512097c255e1 upstream. +commit 286836a70433fb64131d2590f4bf512097c255e1 upstream Add a helper function to read the IA32_ARCH_CAPABILITIES MSR. @@ -13,7 +13,6 @@ Tested-by: Neelima Krishnan Reviewed-by: Mark Gross Reviewed-by: Tony Luck Reviewed-by: Josh Poimboeuf -Signed-off-by: Ben Hutchings --- arch/x86/kernel/cpu/common.c | 15 +++++++++++---- arch/x86/kernel/cpu/cpu.h | 2 ++ diff --git a/debian/patches/bugfix/x86/taa/0003-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch b/debian/patches/bugfix/x86/taa/0004-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch similarity index 96% rename from debian/patches/bugfix/x86/taa/0003-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch rename to debian/patches/bugfix/x86/taa/0004-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch index 6d0c3a99b..87373788c 100644 --- a/debian/patches/bugfix/x86/taa/0003-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch +++ b/debian/patches/bugfix/x86/taa/0004-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch @@ -2,7 +2,7 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 11:01:53 +0200 Subject: x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default -commit 95c5824f75f3ba4c9e8e5a4b1a623c95390ac266 upstream. +commit 95c5824f75f3ba4c9e8e5a4b1a623c95390ac266 upstream Add a kernel cmdline parameter "tsx" to control the Transactional Synchronization Extensions (TSX) feature. On CPUs that support TSX @@ -22,16 +22,14 @@ Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf -[bwh: Backported to 4.19: adjust context] -Signed-off-by: Ben Hutchings --- .../admin-guide/kernel-parameters.txt | 26 ++++ arch/x86/kernel/cpu/Makefile | 2 +- - arch/x86/kernel/cpu/common.c | 2 + + arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/cpu/cpu.h | 16 +++ arch/x86/kernel/cpu/intel.c | 5 + arch/x86/kernel/cpu/tsx.c | 125 ++++++++++++++++++ - 6 files changed, 175 insertions(+), 1 deletion(-) + 6 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/cpu/tsx.c --- a/Documentation/admin-guide/kernel-parameters.txt @@ -82,11 +80,10 @@ Signed-off-by: Ben Hutchings obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c -@@ -1482,6 +1482,8 @@ void __init identify_boot_cpu(void) +@@ -1482,6 +1482,7 @@ void __init identify_boot_cpu(void) enable_sep_cpu(); #endif cpu_detect_tlb(&boot_cpu_data); -+ + tsx_init(); } diff --git a/debian/patches/bugfix/x86/taa/0004-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch b/debian/patches/bugfix/x86/taa/0005-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch similarity index 98% rename from debian/patches/bugfix/x86/taa/0004-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch rename to debian/patches/bugfix/x86/taa/0005-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch index e9bc1c9bc..91ece79d8 100644 --- a/debian/patches/bugfix/x86/taa/0004-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch +++ b/debian/patches/bugfix/x86/taa/0005-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch @@ -2,7 +2,7 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 11:30:45 +0200 Subject: x86/speculation/taa: Add mitigation for TSX Async Abort -commit 1b42f017415b46c317e71d41c34ec088417a1883 upstream. +commit 1b42f017415b46c317e71d41c34ec088417a1883 upstream TSX Async Abort (TAA) is a side channel vulnerability to the internal buffers in some Intel processors similar to Microachitectural Data @@ -56,8 +56,6 @@ Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf -[bwh: Backported to 4.19: Add #include "cpu.h" in bugs.c] -Signed-off-by: Ben Hutchings --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 4 + diff --git a/debian/patches/bugfix/x86/taa/0005-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch b/debian/patches/bugfix/x86/taa/0006-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch similarity index 97% rename from debian/patches/bugfix/x86/taa/0005-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch rename to debian/patches/bugfix/x86/taa/0006-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch index e486aa490..7f1c4f982 100644 --- a/debian/patches/bugfix/x86/taa/0005-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch +++ b/debian/patches/bugfix/x86/taa/0006-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch @@ -2,7 +2,7 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 12:19:51 +0200 Subject: x86/speculation/taa: Add sysfs reporting for TSX Async Abort -commit 6608b45ac5ecb56f9e171252229c39580cc85f0f upstream. +commit 6608b45ac5ecb56f9e171252229c39580cc85f0f upstream Add the sysfs reporting file for TSX Async Abort. It exposes the vulnerability and the mitigation state similar to the existing files for @@ -19,7 +19,6 @@ Reviewed-by: Mark Gross Reviewed-by: Tony Luck Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf -Signed-off-by: Ben Hutchings --- arch/x86/kernel/cpu/bugs.c | 23 +++++++++++++++++++++++ drivers/base/cpu.c | 9 +++++++++ diff --git a/debian/patches/bugfix/x86/taa/0006-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch b/debian/patches/bugfix/x86/taa/0007-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch similarity index 86% rename from debian/patches/bugfix/x86/taa/0006-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch rename to debian/patches/bugfix/x86/taa/0007-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch index 73cbfb3cc..869858647 100644 --- a/debian/patches/bugfix/x86/taa/0006-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch +++ b/debian/patches/bugfix/x86/taa/0007-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch @@ -2,7 +2,7 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 12:23:33 +0200 Subject: kvm/x86: Export MDS_NO=0 to guests when TSX is enabled -commit e1d38b63acd843cfdd4222bf19a26700fd5c699e upstream. +commit e1d38b63acd843cfdd4222bf19a26700fd5c699e upstream Export the IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0 to guests on TSX Async Abort(TAA) affected hosts that have TSX enabled and updated @@ -26,16 +26,15 @@ Signed-off-by: Thomas Gleixner Tested-by: Neelima Krishnan Reviewed-by: Tony Luck Reviewed-by: Josh Poimboeuf -Signed-off-by: Ben Hutchings --- arch/x86/kvm/x86.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -1127,6 +1127,25 @@ u64 kvm_get_arch_capabilities(void) - if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER) - data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; +@@ -1134,6 +1134,25 @@ u64 kvm_get_arch_capabilities(void) + if (!boot_cpu_has_bug(X86_BUG_MDS)) + data |= ARCH_CAP_MDS_NO; + /* + * On TAA affected systems, export MDS_NO=0 when: @@ -58,4 +57,4 @@ Signed-off-by: Ben Hutchings + return data; } - EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); + diff --git a/debian/patches/bugfix/x86/taa/0007-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch b/debian/patches/bugfix/x86/taa/0008-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch similarity index 95% rename from debian/patches/bugfix/x86/taa/0007-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch rename to debian/patches/bugfix/x86/taa/0008-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch index 1f9858b16..1f668f4cb 100644 --- a/debian/patches/bugfix/x86/taa/0007-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch +++ b/debian/patches/bugfix/x86/taa/0008-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch @@ -2,7 +2,7 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 12:28:57 +0200 Subject: x86/tsx: Add "auto" option to the tsx= cmdline parameter -commit 7531a3596e3272d1f6841e0d601a614555dc6b65 upstream. +commit 7531a3596e3272d1f6841e0d601a614555dc6b65 upstream Platforms which are not affected by X86_BUG_TAA may want the TSX feature enabled. Add "auto" option to the TSX cmdline parameter. When tsx=auto @@ -18,7 +18,6 @@ Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck Reviewed-by: Josh Poimboeuf -Signed-off-by: Ben Hutchings --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ arch/x86/kernel/cpu/tsx.c | 7 ++++++- diff --git a/debian/patches/bugfix/x86/taa/0008-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch b/debian/patches/bugfix/x86/taa/0009-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch similarity index 99% rename from debian/patches/bugfix/x86/taa/0008-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch rename to debian/patches/bugfix/x86/taa/0009-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch index fb18eeed4..5b474ca9b 100644 --- a/debian/patches/bugfix/x86/taa/0008-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch +++ b/debian/patches/bugfix/x86/taa/0009-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch @@ -2,7 +2,7 @@ From: Pawan Gupta Date: Wed, 23 Oct 2019 12:32:55 +0200 Subject: x86/speculation/taa: Add documentation for TSX Async Abort -commit a7a248c593e4fd7a67c50b5f5318fe42a0db335e upstream. +commit a7a248c593e4fd7a67c50b5f5318fe42a0db335e upstream Add the documenation for TSX Async Abort. Include the description of the issue, how to check the mitigation state, control the mitigation, @@ -19,8 +19,6 @@ Signed-off-by: Thomas Gleixner Reviewed-by: Mark Gross Reviewed-by: Tony Luck Reviewed-by: Josh Poimboeuf -[bwh: Backported to 4.19: adjust context] -Signed-off-by: Ben Hutchings --- .../ABI/testing/sysfs-devices-system-cpu | 1 + Documentation/admin-guide/hw-vuln/index.rst | 1 + diff --git a/debian/patches/bugfix/x86/taa/0009-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch b/debian/patches/bugfix/x86/taa/0010-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch similarity index 97% rename from debian/patches/bugfix/x86/taa/0009-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch rename to debian/patches/bugfix/x86/taa/0010-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch index 01800f468..ae61df728 100644 --- a/debian/patches/bugfix/x86/taa/0009-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch +++ b/debian/patches/bugfix/x86/taa/0010-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch @@ -2,7 +2,7 @@ From: Michal Hocko Date: Wed, 23 Oct 2019 12:35:50 +0200 Subject: x86/tsx: Add config options to set tsx=on|off|auto -commit db616173d787395787ecc93eef075fa975227b10 upstream. +commit db616173d787395787ecc93eef075fa975227b10 upstream There is a general consensus that TSX usage is not largely spread while the history shows there is a non trivial space for side channel attacks @@ -27,7 +27,6 @@ Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf -Signed-off-by: Ben Hutchings --- arch/x86/Kconfig | 45 +++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/tsx.c | 22 +++++++++++++------ diff --git a/debian/patches/bugfix/x86/taa/0010-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch b/debian/patches/bugfix/x86/taa/0015-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch similarity index 81% rename from debian/patches/bugfix/x86/taa/0010-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch rename to debian/patches/bugfix/x86/taa/0015-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch index e2a39e5cf..7b58d708b 100644 --- a/debian/patches/bugfix/x86/taa/0010-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch +++ b/debian/patches/bugfix/x86/taa/0015-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch @@ -1,8 +1,8 @@ From: Josh Poimboeuf Date: Wed, 6 Nov 2019 20:26:46 -0600 Subject: x86/speculation/taa: Fix printing of TAA_MSG_SMT on IBRS_ALL CPUs -Origin: https://git.kernel.org/linus/012206a822a8b6ac09125bfaa210a95b9eb8f1c1 -Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-11135 + +commit 012206a822a8b6ac09125bfaa210a95b9eb8f1c1 upstream For new IBRS_ALL CPUs, the Enhanced IBRS check at the beginning of cpu_bugs_smt_update() causes the function to return early, unintentionally @@ -29,11 +29,9 @@ Reviewed-by: Borislav Petkov arch/x86/kernel/cpu/bugs.c | 4 ---- 1 file changed, 4 deletions(-) -diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index 8237b86ba6dc..10d11586f805 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c -@@ -886,10 +886,6 @@ static void update_mds_branch_idle(void) +@@ -874,10 +874,6 @@ static void update_mds_branch_idle(void) void arch_smt_update(void) { @@ -44,6 +42,3 @@ index 8237b86ba6dc..10d11586f805 100644 mutex_lock(&spec_ctrl_mutex); switch (spectre_v2_user) { --- -2.24.0 - diff --git a/debian/patches/series b/debian/patches/series index 7052e4b61..8ebe2b831 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -258,31 +258,32 @@ bugfix/all/ALSA-usb-audio-Fix-a-stack-buffer-overflow-bug-in-check_input_term.pa bugfix/all/vhost-make-sure-log_num-in_num.patch bugfix/x86/x86-ptrace-fix-up-botched-merge-of-spectrev1-fix.patch bugfix/all/KVM-coalesced_mmio-add-bounds-checking.patch -bugfix/x86/taa/0001-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch -bugfix/x86/taa/0002-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch -bugfix/x86/taa/0003-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch -bugfix/x86/taa/0004-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch -bugfix/x86/taa/0005-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch -bugfix/x86/taa/0006-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch -bugfix/x86/taa/0007-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch -bugfix/x86/taa/0008-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch -bugfix/x86/taa/0009-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch -bugfix/x86/taa/0010-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch -bugfix/x86/itlb_multihit/0010-KVM-x86-adjust-kvm_mmu_page-member-to-save-8-bytes.patch -bugfix/x86/itlb_multihit/0011-kvm-Convert-kvm_lock-to-a-mutex.patch -bugfix/x86/itlb_multihit/0012-kvm-x86-Do-not-release-the-page-inside-mmu_set_spte.patch -bugfix/x86/itlb_multihit/0013-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch -bugfix/x86/itlb_multihit/0014-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch -bugfix/x86/itlb_multihit/0015-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch -bugfix/x86/itlb_multihit/0016-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch -bugfix/x86/itlb_multihit/0017-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch -bugfix/x86/itlb_multihit/0018-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch -bugfix/x86/itlb_multihit/0019-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch -bugfix/x86/itlb_multihit/0020-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch -bugfix/x86/itlb_multihit/0021-kvm-mmu-ITLB_MULTIHIT-mitigation.patch -bugfix/x86/itlb_multihit/0022-kvm-Add-helper-function-for-creating-VM-worker-threa.patch -bugfix/x86/itlb_multihit/0023-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch -bugfix/x86/itlb_multihit/0024-Documentation-Add-ITLB_MULTIHIT-documentation.patch +bugfix/x86/taa/0001-KVM-x86-use-Intel-speculation-bugs-and-features-as-d.patch +bugfix/x86/taa/0002-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch +bugfix/x86/taa/0003-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch +bugfix/x86/taa/0004-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch +bugfix/x86/taa/0005-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch +bugfix/x86/taa/0006-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch +bugfix/x86/taa/0007-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch +bugfix/x86/taa/0008-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch +bugfix/x86/taa/0009-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch +bugfix/x86/taa/0010-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch +bugfix/x86/taa/0015-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch +bugfix/x86/itlb_multihit/0011-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch +bugfix/x86/itlb_multihit/0012-x86-cpu-Add-Tremont-to-the-cpu-vulnerability-whiteli.patch +bugfix/x86/itlb_multihit/0013-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch +bugfix/x86/itlb_multihit/0014-Documentation-Add-ITLB_MULTIHIT-documentation.patch +bugfix/x86/itlb_multihit/0016-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch +bugfix/x86/itlb_multihit/0017-kvm-Convert-kvm_lock-to-a-mutex.patch +bugfix/x86/itlb_multihit/0018-kvm-mmu-Do-not-release-the-page-inside-mmu_set_spte.patch +bugfix/x86/itlb_multihit/0019-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch +bugfix/x86/itlb_multihit/0020-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch +bugfix/x86/itlb_multihit/0021-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch +bugfix/x86/itlb_multihit/0022-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch +bugfix/x86/itlb_multihit/0023-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch +bugfix/x86/itlb_multihit/0024-kvm-mmu-ITLB_MULTIHIT-mitigation.patch +bugfix/x86/itlb_multihit/0025-kvm-Add-helper-function-for-creating-VM-worker-threa.patch +bugfix/x86/itlb_multihit/0026-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch # ABI maintenance debian/abi/powerpc-avoid-abi-change-for-disabling-tm.patch From feec1caa946082b7cb2b1042f4d92460e3d81f9a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 10 Nov 2019 02:53:32 +0000 Subject: [PATCH 10/13] [x86] i915: Add mitigations for two hardware security flaws --- debian/changelog | 14 + ...rm-i915-Rename-gen7-cmdparser-tables.patch | 176 ++++++++ ...i915-Disable-Secure-Batches-for-gen6.patch | 93 ++++ ...-Remove-Master-tables-from-cmdparser.patch | 295 +++++++++++++ ...Add-support-for-mandatory-cmdparsing.patch | 110 +++++ ...-ro-ppgtt-mapped-cmdparser-shadow-bu.patch | 198 +++++++++ ...915-Allow-parsing-of-unsized-batches.patch | 57 +++ ...007-drm-i915-Add-gen9-BCS-cmdparsing.patch | 258 +++++++++++ ...er-Use-explicit-goto-for-error-paths.patch | 94 ++++ ...arser-Add-support-for-backward-jumps.patch | 404 ++++++++++++++++++ ...er-Ignore-Length-operands-during-com.patch | 37 ++ ...r-RM-timeout-to-avoid-DSI-hard-hangs.patch | 72 ++++ ...-i915-gen8-Add-RC6-CTX-corruption-WA.patch | 282 ++++++++++++ debian/patches/series | 12 + 14 files changed, 2102 insertions(+) create mode 100644 debian/patches/bugfix/x86/i915/0001-drm-i915-Rename-gen7-cmdparser-tables.patch create mode 100644 debian/patches/bugfix/x86/i915/0002-drm-i915-Disable-Secure-Batches-for-gen6.patch create mode 100644 debian/patches/bugfix/x86/i915/0003-drm-i915-Remove-Master-tables-from-cmdparser.patch create mode 100644 debian/patches/bugfix/x86/i915/0004-drm-i915-Add-support-for-mandatory-cmdparsing.patch create mode 100644 debian/patches/bugfix/x86/i915/0005-drm-i915-Support-ro-ppgtt-mapped-cmdparser-shadow-bu.patch create mode 100644 debian/patches/bugfix/x86/i915/0006-drm-i915-Allow-parsing-of-unsized-batches.patch create mode 100644 debian/patches/bugfix/x86/i915/0007-drm-i915-Add-gen9-BCS-cmdparsing.patch create mode 100644 debian/patches/bugfix/x86/i915/0008-drm-i915-cmdparser-Use-explicit-goto-for-error-paths.patch create mode 100644 debian/patches/bugfix/x86/i915/0009-drm-i915-cmdparser-Add-support-for-backward-jumps.patch create mode 100644 debian/patches/bugfix/x86/i915/0010-drm-i915-cmdparser-Ignore-Length-operands-during-com.patch create mode 100644 debian/patches/bugfix/x86/i915/0011-drm-i915-Lower-RM-timeout-to-avoid-DSI-hard-hangs.patch create mode 100644 debian/patches/bugfix/x86/i915/0012-drm-i915-gen8-Add-RC6-CTX-corruption-WA.patch diff --git a/debian/changelog b/debian/changelog index 6e50be20e..21c508f0a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -32,6 +32,20 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium - kvm: Add helper function for creating VM worker threads - kvm: x86: mmu: Recovery of shattered NX large pages - Documentation: Add ITLB_MULTIHIT documentation + * [x86] i915: Mitigate local privilege escalation on gen9 (CVE-2019-0155): + - drm/i915: Rename gen7 cmdparser tables + - drm/i915: Disable Secure Batches for gen6+ + - drm/i915: Remove Master tables from cmdparser + - drm/i915: Add support for mandatory cmdparsing + - drm/i915: Support ro ppgtt mapped cmdparser shadow buffers + - drm/i915: Allow parsing of unsized batches + - drm/i915: Add gen9 BCS cmdparsing + - drm/i915/cmdparser: Use explicit goto for error paths + - drm/i915/cmdparser: Add support for backward jumps + - drm/i915/cmdparser: Ignore Length operands during command matching + * [x86] i915: Mitigate local denial-of-service on gen8/gen9 (CVE-2019-0154): + - drm/i915: Lower RM timeout to avoid DSI hard hangs + - drm/i915/gen8+: Add RC6 CTX corruption WA -- Ben Hutchings Sun, 20 Oct 2019 14:21:28 +0100 diff --git a/debian/patches/bugfix/x86/i915/0001-drm-i915-Rename-gen7-cmdparser-tables.patch b/debian/patches/bugfix/x86/i915/0001-drm-i915-Rename-gen7-cmdparser-tables.patch new file mode 100644 index 000000000..b5cdb94b3 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0001-drm-i915-Rename-gen7-cmdparser-tables.patch @@ -0,0 +1,176 @@ +From: Jon Bloomfield +Date: Fri, 20 Apr 2018 14:26:01 -0700 +Subject: drm/i915: Rename gen7 cmdparser tables +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 0a2f661b6c21815a7fa60e30babe975fee8e73c6 upstream. + +We're about to introduce some new tables for later gens, and the +current naming for the gen7 tables will no longer make sense. + +v2: rebase + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Mika Kuoppala +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 70 +++++++++++++------------- + 1 file changed, 35 insertions(+), 35 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -211,7 +211,7 @@ struct drm_i915_cmd_table { + + /* Command Mask Fixed Len Action + ---------------------------------------------------------- */ +-static const struct drm_i915_cmd_descriptor common_cmds[] = { ++static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { + CMD( MI_NOOP, SMI, F, 1, S ), + CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), +@@ -244,7 +244,7 @@ static const struct drm_i915_cmd_descrip + CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), + }; + +-static const struct drm_i915_cmd_descriptor render_cmds[] = { ++static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = { + CMD( MI_FLUSH, SMI, F, 1, S ), + CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), + CMD( MI_PREDICATE, SMI, F, 1, S ), +@@ -328,7 +328,7 @@ static const struct drm_i915_cmd_descrip + CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), + }; + +-static const struct drm_i915_cmd_descriptor video_cmds[] = { ++static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = { + CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), + CMD( MI_SET_APPID, SMI, F, 1, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, +@@ -372,7 +372,7 @@ static const struct drm_i915_cmd_descrip + CMD( MFX_WAIT, SMFX, F, 1, S ), + }; + +-static const struct drm_i915_cmd_descriptor vecs_cmds[] = { ++static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = { + CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), + CMD( MI_SET_APPID, SMI, F, 1, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, +@@ -410,7 +410,7 @@ static const struct drm_i915_cmd_descrip + }}, ), + }; + +-static const struct drm_i915_cmd_descriptor blt_cmds[] = { ++static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { + CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, + .bits = {{ +@@ -463,35 +463,35 @@ static const struct drm_i915_cmd_descrip + #undef B + #undef M + +-static const struct drm_i915_cmd_table gen7_render_cmds[] = { +- { common_cmds, ARRAY_SIZE(common_cmds) }, +- { render_cmds, ARRAY_SIZE(render_cmds) }, ++static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { ++ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, ++ { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, + }; + +-static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = { +- { common_cmds, ARRAY_SIZE(common_cmds) }, +- { render_cmds, ARRAY_SIZE(render_cmds) }, ++static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = { ++ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, ++ { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, + { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, + }; + +-static const struct drm_i915_cmd_table gen7_video_cmds[] = { +- { common_cmds, ARRAY_SIZE(common_cmds) }, +- { video_cmds, ARRAY_SIZE(video_cmds) }, ++static const struct drm_i915_cmd_table gen7_video_cmd_table[] = { ++ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, ++ { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) }, + }; + +-static const struct drm_i915_cmd_table hsw_vebox_cmds[] = { +- { common_cmds, ARRAY_SIZE(common_cmds) }, +- { vecs_cmds, ARRAY_SIZE(vecs_cmds) }, ++static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = { ++ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, ++ { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) }, + }; + +-static const struct drm_i915_cmd_table gen7_blt_cmds[] = { +- { common_cmds, ARRAY_SIZE(common_cmds) }, +- { blt_cmds, ARRAY_SIZE(blt_cmds) }, ++static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = { ++ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, ++ { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, + }; + +-static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { +- { common_cmds, ARRAY_SIZE(common_cmds) }, +- { blt_cmds, ARRAY_SIZE(blt_cmds) }, ++static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { ++ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, ++ { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, + { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, + }; + +@@ -871,12 +871,12 @@ void intel_engine_init_cmd_parser(struct + switch (engine->id) { + case RCS: + if (IS_HASWELL(engine->i915)) { +- cmd_tables = hsw_render_ring_cmds; ++ cmd_tables = hsw_render_ring_cmd_table; + cmd_table_count = +- ARRAY_SIZE(hsw_render_ring_cmds); ++ ARRAY_SIZE(hsw_render_ring_cmd_table); + } else { +- cmd_tables = gen7_render_cmds; +- cmd_table_count = ARRAY_SIZE(gen7_render_cmds); ++ cmd_tables = gen7_render_cmd_table; ++ cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table); + } + + if (IS_HASWELL(engine->i915)) { +@@ -890,17 +890,17 @@ void intel_engine_init_cmd_parser(struct + engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; + break; + case VCS: +- cmd_tables = gen7_video_cmds; +- cmd_table_count = ARRAY_SIZE(gen7_video_cmds); ++ cmd_tables = gen7_video_cmd_table; ++ cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table); + engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; + break; + case BCS: + if (IS_HASWELL(engine->i915)) { +- cmd_tables = hsw_blt_ring_cmds; +- cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); ++ cmd_tables = hsw_blt_ring_cmd_table; ++ cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); + } else { +- cmd_tables = gen7_blt_cmds; +- cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); ++ cmd_tables = gen7_blt_cmd_table; ++ cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); + } + + if (IS_HASWELL(engine->i915)) { +@@ -914,8 +914,8 @@ void intel_engine_init_cmd_parser(struct + engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + break; + case VECS: +- cmd_tables = hsw_vebox_cmds; +- cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); ++ cmd_tables = hsw_vebox_cmd_table; ++ cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table); + /* VECS can use the same length_mask function as VCS */ + engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; + break; diff --git a/debian/patches/bugfix/x86/i915/0002-drm-i915-Disable-Secure-Batches-for-gen6.patch b/debian/patches/bugfix/x86/i915/0002-drm-i915-Disable-Secure-Batches-for-gen6.patch new file mode 100644 index 000000000..0d347919d --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0002-drm-i915-Disable-Secure-Batches-for-gen6.patch @@ -0,0 +1,93 @@ +From: Jon Bloomfield +Date: Fri, 8 Jun 2018 08:53:46 -0700 +Subject: drm/i915: Disable Secure Batches for gen6+ +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 44157641d448cbc0c4b73c5231d2b911f0cb0427 upstream. + +Retroactively stop reporting support for secure batches +through the api for gen6+ so that older binaries trigger +the fallback path instead. + +Older binaries use secure batches pre gen6 to access resources +that are not available to normal usermode processes. However, +all known userspace explicitly checks for HAS_SECURE_BATCHES +before relying on the secure batch feature. + +Since there are no known binaries relying on this for newer gens +we can kill secure batches from gen6, via I915_PARAM_HAS_SECURE_BATCHES. + +v2: rebase (Mika) +v3: rebase (Mika) + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Mika Kuoppala +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_drv.c | 2 +- + drivers/gpu/drm/i915/i915_drv.h | 2 ++ + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++++++++++-- + 3 files changed, 13 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_drv.c ++++ b/drivers/gpu/drm/i915/i915_drv.c +@@ -351,7 +351,7 @@ static int i915_getparam_ioctl(struct dr + value = HAS_LEGACY_SEMAPHORES(dev_priv); + break; + case I915_PARAM_HAS_SECURE_BATCHES: +- value = capable(CAP_SYS_ADMIN); ++ value = HAS_SECURE_BATCHES(dev_priv) && capable(CAP_SYS_ADMIN); + break; + case I915_PARAM_CMD_PARSER_VERSION: + value = i915_cmd_parser_get_version(dev_priv); +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -2517,6 +2517,8 @@ intel_info(const struct drm_i915_private + + #define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN7(dev_priv) + ++#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6) ++ + #define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc) + #define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop) + #define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED)) +--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c +@@ -2177,6 +2177,7 @@ i915_gem_do_execbuffer(struct drm_device + struct drm_i915_gem_exec_object2 *exec, + struct drm_syncobj **fences) + { ++ struct drm_i915_private *i915 = to_i915(dev); + struct i915_execbuffer eb; + struct dma_fence *in_fence = NULL; + struct sync_file *out_fence = NULL; +@@ -2187,7 +2188,7 @@ i915_gem_do_execbuffer(struct drm_device + BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & + ~__EXEC_OBJECT_UNKNOWN_FLAGS); + +- eb.i915 = to_i915(dev); ++ eb.i915 = i915; + eb.file = file; + eb.args = args; + if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) +@@ -2209,8 +2210,15 @@ i915_gem_do_execbuffer(struct drm_device + + eb.batch_flags = 0; + if (args->flags & I915_EXEC_SECURE) { ++ if (INTEL_GEN(i915) >= 11) ++ return -ENODEV; ++ ++ /* Return -EPERM to trigger fallback code on old binaries. */ ++ if (!HAS_SECURE_BATCHES(i915)) ++ return -EPERM; ++ + if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) +- return -EPERM; ++ return -EPERM; + + eb.batch_flags |= I915_DISPATCH_SECURE; + } diff --git a/debian/patches/bugfix/x86/i915/0003-drm-i915-Remove-Master-tables-from-cmdparser.patch b/debian/patches/bugfix/x86/i915/0003-drm-i915-Remove-Master-tables-from-cmdparser.patch new file mode 100644 index 000000000..dddacff03 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0003-drm-i915-Remove-Master-tables-from-cmdparser.patch @@ -0,0 +1,295 @@ +From: Jon Bloomfield +Date: Fri, 8 Jun 2018 10:05:26 -0700 +Subject: drm/i915: Remove Master tables from cmdparser +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 66d8aba1cd6db34af10de465c0d52af679288cb6 upstream. + +The previous patch has killed support for secure batches +on gen6+, and hence the cmdparsers master tables are +now dead code. Remove them. + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 84 ++++++---------------- + drivers/gpu/drm/i915/i915_drv.h | 3 +- + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 7 +- + 3 files changed, 26 insertions(+), 68 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -51,13 +51,11 @@ + * granting userspace undue privileges. There are three categories of privilege. + * + * First, commands which are explicitly defined as privileged or which should +- * only be used by the kernel driver. The parser generally rejects such +- * commands, though it may allow some from the drm master process. ++ * only be used by the kernel driver. The parser rejects such commands + * + * Second, commands which access registers. To support correct/enhanced + * userspace functionality, particularly certain OpenGL extensions, the parser +- * provides a whitelist of registers which userspace may safely access (for both +- * normal and drm master processes). ++ * provides a whitelist of registers which userspace may safely access + * + * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). + * The parser always rejects such commands. +@@ -82,9 +80,9 @@ + * in the per-engine command tables. + * + * Other command table entries map fairly directly to high level categories +- * mentioned above: rejected, master-only, register whitelist. The parser +- * implements a number of checks, including the privileged memory checks, via a +- * general bitmasking mechanism. ++ * mentioned above: rejected, register whitelist. The parser implements a number ++ * of checks, including the privileged memory checks, via a general bitmasking ++ * mechanism. + */ + + /* +@@ -102,8 +100,6 @@ struct drm_i915_cmd_descriptor { + * CMD_DESC_REJECT: The command is never allowed + * CMD_DESC_REGISTER: The command should be checked against the + * register whitelist for the appropriate ring +- * CMD_DESC_MASTER: The command is allowed if the submitting process +- * is the DRM master + */ + u32 flags; + #define CMD_DESC_FIXED (1<<0) +@@ -111,7 +107,6 @@ struct drm_i915_cmd_descriptor { + #define CMD_DESC_REJECT (1<<2) + #define CMD_DESC_REGISTER (1<<3) + #define CMD_DESC_BITMASK (1<<4) +-#define CMD_DESC_MASTER (1<<5) + + /* + * The command's unique identification bits and the bitmask to get them. +@@ -207,14 +202,13 @@ struct drm_i915_cmd_table { + #define R CMD_DESC_REJECT + #define W CMD_DESC_REGISTER + #define B CMD_DESC_BITMASK +-#define M CMD_DESC_MASTER + + /* Command Mask Fixed Len Action + ---------------------------------------------------------- */ + static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { + CMD( MI_NOOP, SMI, F, 1, S ), + CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), +- CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), ++ CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ), + CMD( MI_ARB_CHECK, SMI, F, 1, S ), + CMD( MI_REPORT_HEAD, SMI, F, 1, S ), + CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), +@@ -311,7 +305,7 @@ static const struct drm_i915_cmd_descrip + CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), + CMD( MI_SET_APPID, SMI, F, 1, S ), + CMD( MI_RS_CONTEXT, SMI, F, 1, S ), +- CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), ++ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), +@@ -444,7 +438,7 @@ static const struct drm_i915_cmd_descrip + }; + + static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { +- CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), ++ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), + }; + +@@ -461,7 +455,6 @@ static const struct drm_i915_cmd_descrip + #undef R + #undef W + #undef B +-#undef M + + static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, +@@ -610,47 +603,29 @@ static const struct drm_i915_reg_descrip + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + }; + +-static const struct drm_i915_reg_descriptor ivb_master_regs[] = { +- REG32(FORCEWAKE_MT), +- REG32(DERRMR), +- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), +- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), +- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), +-}; +- +-static const struct drm_i915_reg_descriptor hsw_master_regs[] = { +- REG32(FORCEWAKE_MT), +- REG32(DERRMR), +-}; +- + #undef REG64 + #undef REG32 + + struct drm_i915_reg_table { + const struct drm_i915_reg_descriptor *regs; + int num_regs; +- bool master; + }; + + static const struct drm_i915_reg_table ivb_render_reg_tables[] = { +- { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, +- { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, ++ { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, + }; + + static const struct drm_i915_reg_table ivb_blt_reg_tables[] = { +- { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, +- { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, ++ { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, + }; + + static const struct drm_i915_reg_table hsw_render_reg_tables[] = { +- { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, +- { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false }, +- { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, ++ { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, ++ { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) }, + }; + + static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { +- { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, +- { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, ++ { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, + }; + + static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) +@@ -1027,22 +1002,16 @@ __find_reg(const struct drm_i915_reg_des + } + + static const struct drm_i915_reg_descriptor * +-find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) ++find_reg(const struct intel_engine_cs *engine, u32 addr) + { + const struct drm_i915_reg_table *table = engine->reg_tables; ++ const struct drm_i915_reg_descriptor *reg = NULL; + int count = engine->reg_table_count; + +- for (; count > 0; ++table, --count) { +- if (!table->master || is_master) { +- const struct drm_i915_reg_descriptor *reg; +- +- reg = __find_reg(table->regs, table->num_regs, addr); +- if (reg != NULL) +- return reg; +- } +- } ++ for (; !reg && (count > 0); ++table, --count) ++ reg = __find_reg(table->regs, table->num_regs, addr); + +- return NULL; ++ return reg; + } + + /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ +@@ -1127,8 +1096,7 @@ unpin_src: + + static bool check_cmd(const struct intel_engine_cs *engine, + const struct drm_i915_cmd_descriptor *desc, +- const u32 *cmd, u32 length, +- const bool is_master) ++ const u32 *cmd, u32 length) + { + if (desc->flags & CMD_DESC_SKIP) + return true; +@@ -1138,12 +1106,6 @@ static bool check_cmd(const struct intel + return false; + } + +- if ((desc->flags & CMD_DESC_MASTER) && !is_master) { +- DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", +- *cmd); +- return false; +- } +- + if (desc->flags & CMD_DESC_REGISTER) { + /* + * Get the distance between individual register offset +@@ -1157,7 +1119,7 @@ static bool check_cmd(const struct intel + offset += step) { + const u32 reg_addr = cmd[offset] & desc->reg.mask; + const struct drm_i915_reg_descriptor *reg = +- find_reg(engine, is_master, reg_addr); ++ find_reg(engine, reg_addr); + + if (!reg) { + DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", +@@ -1244,7 +1206,6 @@ static bool check_cmd(const struct intel + * @shadow_batch_obj: copy of the batch buffer in question + * @batch_start_offset: byte offset in the batch at which execution starts + * @batch_len: length of the commands in batch_obj +- * @is_master: is the submitting process the drm master? + * + * Parses the specified batch buffer looking for privilege violations as + * described in the overview. +@@ -1256,8 +1217,7 @@ int intel_engine_cmd_parser(struct intel + struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, + u32 batch_start_offset, +- u32 batch_len, +- bool is_master) ++ u32 batch_len) + { + u32 *cmd, *batch_end; + struct drm_i915_cmd_descriptor default_desc = noop_desc; +@@ -1323,7 +1283,7 @@ int intel_engine_cmd_parser(struct intel + break; + } + +- if (!check_cmd(engine, desc, cmd, length, is_master)) { ++ if (!check_cmd(engine, desc, cmd, length)) { + ret = -EACCES; + break; + } +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -3343,8 +3343,7 @@ int intel_engine_cmd_parser(struct intel + struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, + u32 batch_start_offset, +- u32 batch_len, +- bool is_master); ++ u32 batch_len); + + /* i915_perf.c */ + extern void i915_perf_init(struct drm_i915_private *dev_priv); +--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c +@@ -1893,7 +1893,7 @@ static int i915_reset_gen7_sol_offsets(s + return 0; + } + +-static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) ++static struct i915_vma *eb_parse(struct i915_execbuffer *eb) + { + struct drm_i915_gem_object *shadow_batch_obj; + struct i915_vma *vma; +@@ -1908,8 +1908,7 @@ static struct i915_vma *eb_parse(struct + eb->batch->obj, + shadow_batch_obj, + eb->batch_start_offset, +- eb->batch_len, +- is_master); ++ eb->batch_len); + if (err) { + if (err == -EACCES) /* unhandled chained batch */ + vma = NULL; +@@ -2308,7 +2307,7 @@ i915_gem_do_execbuffer(struct drm_device + if (eb_use_cmdparser(&eb)) { + struct i915_vma *vma; + +- vma = eb_parse(&eb, drm_is_current_master(file)); ++ vma = eb_parse(&eb); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_vma; diff --git a/debian/patches/bugfix/x86/i915/0004-drm-i915-Add-support-for-mandatory-cmdparsing.patch b/debian/patches/bugfix/x86/i915/0004-drm-i915-Add-support-for-mandatory-cmdparsing.patch new file mode 100644 index 000000000..47ac2482f --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0004-drm-i915-Add-support-for-mandatory-cmdparsing.patch @@ -0,0 +1,110 @@ +From: Jon Bloomfield +Date: Wed, 1 Aug 2018 09:33:59 -0700 +Subject: drm/i915: Add support for mandatory cmdparsing +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 311a50e76a33d1e029563c24b2ff6db0c02b5afe upstream. + +The existing cmdparser for gen7 can be bypassed by specifying +batch_len=0 in the execbuf call. This is safe because bypassing +simply reduces the cmd-set available. + +In a later patch we will introduce cmdparsing for gen9, as a +security measure, which must be strictly enforced since without +it we are vulnerable to DoS attacks. + +Introduce the concept of 'required' cmd parsing that cannot be +bypassed by submitting zero-length bb's. + +v2: rebase (Mika) +v2: rebase (Mika) +v3: fix conflict on engine flags (Mika) + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Mika Kuoppala +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +++--- + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 ++- + drivers/gpu/drm/i915/intel_ringbuffer.h | 17 ++++++++++++----- + 3 files changed, 17 insertions(+), 9 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -916,7 +916,7 @@ void intel_engine_init_cmd_parser(struct + return; + } + +- engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER; ++ engine->flags |= I915_ENGINE_USING_CMD_PARSER; + } + + /** +@@ -928,7 +928,7 @@ void intel_engine_init_cmd_parser(struct + */ + void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) + { +- if (!intel_engine_needs_cmd_parser(engine)) ++ if (!intel_engine_using_cmd_parser(engine)) + return; + + fini_hash_table(engine); +@@ -1317,7 +1317,7 @@ int i915_cmd_parser_get_version(struct d + + /* If the command parser is not enabled, report 0 - unsupported */ + for_each_engine(engine, dev_priv, id) { +- if (intel_engine_needs_cmd_parser(engine)) { ++ if (intel_engine_using_cmd_parser(engine)) { + active = true; + break; + } +--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c +@@ -309,7 +309,8 @@ static inline u64 gen8_noncanonical_addr + + static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) + { +- return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; ++ return intel_engine_requires_cmd_parser(eb->engine) || ++ (intel_engine_using_cmd_parser(eb->engine) && eb->batch_len); + } + + static int eb_create(struct i915_execbuffer *eb) +--- a/drivers/gpu/drm/i915/intel_ringbuffer.h ++++ b/drivers/gpu/drm/i915/intel_ringbuffer.h +@@ -584,9 +584,10 @@ struct intel_engine_cs { + + struct intel_engine_hangcheck hangcheck; + +-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +-#define I915_ENGINE_SUPPORTS_STATS BIT(1) +-#define I915_ENGINE_HAS_PREEMPTION BIT(2) ++#define I915_ENGINE_USING_CMD_PARSER BIT(0) ++#define I915_ENGINE_SUPPORTS_STATS BIT(1) ++#define I915_ENGINE_HAS_PREEMPTION BIT(2) ++#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(3) + unsigned int flags; + + /* +@@ -647,9 +648,15 @@ struct intel_engine_cs { + }; + + static inline bool +-intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) ++intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) + { +- return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; ++ return engine->flags & I915_ENGINE_USING_CMD_PARSER; ++} ++ ++static inline bool ++intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) ++{ ++ return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; + } + + static inline bool diff --git a/debian/patches/bugfix/x86/i915/0005-drm-i915-Support-ro-ppgtt-mapped-cmdparser-shadow-bu.patch b/debian/patches/bugfix/x86/i915/0005-drm-i915-Support-ro-ppgtt-mapped-cmdparser-shadow-bu.patch new file mode 100644 index 000000000..d03d396f5 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0005-drm-i915-Support-ro-ppgtt-mapped-cmdparser-shadow-bu.patch @@ -0,0 +1,198 @@ +From: Jon Bloomfield +Date: Tue, 22 May 2018 13:59:06 -0700 +Subject: drm/i915: Support ro ppgtt mapped cmdparser shadow buffers +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 4f7af1948abcb18b4772fe1bcd84d7d27d96258c upstream. + +For Gen7, the original cmdparser motive was to permit limited +use of register read/write instructions in unprivileged BB's. +This worked by copying the user supplied bb to a kmd owned +bb, and running it in secure mode, from the ggtt, only if +the scanner finds no unsafe commands or registers. + +For Gen8+ we can't use this same technique because running bb's +from the ggtt also disables access to ppgtt space. But we also +do not actually require 'secure' execution since we are only +trying to reduce the available command/register set. Instead we +will copy the user buffer to a kmd owned read-only bb in ppgtt, +and run in the usual non-secure mode. + +Note that ro pages are only supported by ppgtt (not ggtt), but +luckily that's exactly what we need. + +Add the required paths to map the shadow buffer to ppgtt ro for Gen8+ + +v2: IS_GEN7/IS_GEN (Mika) +v3: rebase +v4: rebase +v5: rebase + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Mika Kuoppala +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_drv.h | 14 ++++++ + drivers/gpu/drm/i915/i915_gem.c | 16 +++++- + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 57 +++++++++++++++------- + 3 files changed, 68 insertions(+), 19 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -2496,6 +2496,12 @@ intel_info(const struct drm_i915_private + #define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv)) + #define IS_GEN9_BC(dev_priv) (IS_GEN9(dev_priv) && !IS_LP(dev_priv)) + ++/* ++ * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution ++ * All later gens can run the final buffer from the ppgtt ++ */ ++#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN7(dev_priv) ++ + #define ENGINE_MASK(id) BIT(id) + #define RENDER_RING ENGINE_MASK(RCS) + #define BSD_RING ENGINE_MASK(VCS) +@@ -2946,6 +2952,14 @@ i915_gem_object_ggtt_pin(struct drm_i915 + u64 alignment, + u64 flags); + ++struct i915_vma * __must_check ++i915_gem_object_pin(struct drm_i915_gem_object *obj, ++ struct i915_address_space *vm, ++ const struct i915_ggtt_view *view, ++ u64 size, ++ u64 alignment, ++ u64 flags); ++ + int i915_gem_object_unbind(struct drm_i915_gem_object *obj); + void i915_gem_release_mmap(struct drm_i915_gem_object *obj); + +--- a/drivers/gpu/drm/i915/i915_gem.c ++++ b/drivers/gpu/drm/i915/i915_gem.c +@@ -4414,6 +4414,20 @@ i915_gem_object_ggtt_pin(struct drm_i915 + { + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_address_space *vm = &dev_priv->ggtt.vm; ++ ++ return i915_gem_object_pin(obj, vm, view, size, alignment, ++ flags | PIN_GLOBAL); ++} ++ ++struct i915_vma * ++i915_gem_object_pin(struct drm_i915_gem_object *obj, ++ struct i915_address_space *vm, ++ const struct i915_ggtt_view *view, ++ u64 size, ++ u64 alignment, ++ u64 flags) ++{ ++ struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_vma *vma; + int ret; + +@@ -4477,7 +4491,7 @@ i915_gem_object_ggtt_pin(struct drm_i915 + return ERR_PTR(ret); + } + +- ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); ++ ret = i915_vma_pin(vma, size, alignment, flags); + if (ret) + return ERR_PTR(ret); + +--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c +@@ -1894,6 +1894,33 @@ static int i915_reset_gen7_sol_offsets(s + return 0; + } + ++static struct i915_vma * ++shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) ++{ ++ struct drm_i915_private *dev_priv = eb->i915; ++ struct i915_address_space *vm; ++ u64 flags; ++ ++ /* ++ * PPGTT backed shadow buffers must be mapped RO, to prevent ++ * post-scan tampering ++ */ ++ if (CMDPARSER_USES_GGTT(dev_priv)) { ++ flags = PIN_GLOBAL; ++ vm = &dev_priv->ggtt.vm; ++ eb->batch_flags |= I915_DISPATCH_SECURE; ++ } else if (eb->vm->has_read_only) { ++ flags = PIN_USER; ++ vm = eb->vm; ++ i915_gem_object_set_readonly(obj); ++ } else { ++ DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags); ++} ++ + static struct i915_vma *eb_parse(struct i915_execbuffer *eb) + { + struct drm_i915_gem_object *shadow_batch_obj; +@@ -1911,14 +1938,21 @@ static struct i915_vma *eb_parse(struct + eb->batch_start_offset, + eb->batch_len); + if (err) { +- if (err == -EACCES) /* unhandled chained batch */ ++ /* ++ * Unsafe GGTT-backed buffers can still be submitted safely ++ * as non-secure. ++ * For PPGTT backing however, we have no choice but to forcibly ++ * reject unsafe buffers ++ */ ++ if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES)) ++ /* Execute original buffer non-secure */ + vma = NULL; + else + vma = ERR_PTR(err); + goto out; + } + +- vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); ++ vma = shadow_batch_pin(eb, shadow_batch_obj); + if (IS_ERR(vma)) + goto out; + +@@ -1927,7 +1961,9 @@ static struct i915_vma *eb_parse(struct + __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; + vma->exec_flags = &eb->flags[eb->buffer_count]; + eb->buffer_count++; +- ++ eb->batch_start_offset = 0; ++ eb->batch = vma; ++ /* eb->batch_len unchanged */ + out: + i915_gem_object_unpin_pages(shadow_batch_obj); + return vma; +@@ -2313,21 +2349,6 @@ i915_gem_do_execbuffer(struct drm_device + err = PTR_ERR(vma); + goto err_vma; + } +- +- if (vma) { +- /* +- * Batch parsed and accepted: +- * +- * Set the DISPATCH_SECURE bit to remove the NON_SECURE +- * bit from MI_BATCH_BUFFER_START commands issued in +- * the dispatch_execbuffer implementations. We +- * specifically don't want that set on batches the +- * command parser has accepted. +- */ +- eb.batch_flags |= I915_DISPATCH_SECURE; +- eb.batch_start_offset = 0; +- eb.batch = vma; +- } + } + + if (eb.batch_len == 0) diff --git a/debian/patches/bugfix/x86/i915/0006-drm-i915-Allow-parsing-of-unsized-batches.patch b/debian/patches/bugfix/x86/i915/0006-drm-i915-Allow-parsing-of-unsized-batches.patch new file mode 100644 index 000000000..93a70de0f --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0006-drm-i915-Allow-parsing-of-unsized-batches.patch @@ -0,0 +1,57 @@ +From: Jon Bloomfield +Date: Wed, 1 Aug 2018 09:45:50 -0700 +Subject: drm/i915: Allow parsing of unsized batches +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 435e8fc059dbe0eec823a75c22da2972390ba9e0 upstream. + +In "drm/i915: Add support for mandatory cmdparsing" we introduced the +concept of mandatory parsing. This allows the cmdparser to be invoked +even when user passes batch_len=0 to the execbuf ioctl's. + +However, the cmdparser needs to know the extents of the buffer being +scanned. Refactor the code to ensure the cmdparser uses the actual +object size, instead of the incoming length, if user passes 0. + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c +@@ -310,7 +310,8 @@ static inline u64 gen8_noncanonical_addr + static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) + { + return intel_engine_requires_cmd_parser(eb->engine) || +- (intel_engine_using_cmd_parser(eb->engine) && eb->batch_len); ++ (intel_engine_using_cmd_parser(eb->engine) && ++ eb->args->batch_len); + } + + static int eb_create(struct i915_execbuffer *eb) +@@ -2341,6 +2342,9 @@ i915_gem_do_execbuffer(struct drm_device + goto err_vma; + } + ++ if (eb.batch_len == 0) ++ eb.batch_len = eb.batch->size - eb.batch_start_offset; ++ + if (eb_use_cmdparser(&eb)) { + struct i915_vma *vma; + +@@ -2351,9 +2355,6 @@ i915_gem_do_execbuffer(struct drm_device + } + } + +- if (eb.batch_len == 0) +- eb.batch_len = eb.batch->size - eb.batch_start_offset; +- + /* + * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure + * batch" bit. Hence we need to pin secure batches into the global gtt. diff --git a/debian/patches/bugfix/x86/i915/0007-drm-i915-Add-gen9-BCS-cmdparsing.patch b/debian/patches/bugfix/x86/i915/0007-drm-i915-Add-gen9-BCS-cmdparsing.patch new file mode 100644 index 000000000..e9a9981c3 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0007-drm-i915-Add-gen9-BCS-cmdparsing.patch @@ -0,0 +1,258 @@ +From: Jon Bloomfield +Date: Mon, 23 Apr 2018 11:12:15 -0700 +Subject: drm/i915: Add gen9 BCS cmdparsing +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 0f2f39758341df70202ae1c42d5a1e4ee392b6d3 upstream. + +For gen9 we enable cmdparsing on the BCS ring, specifically +to catch inadvertent accesses to sensitive registers + +Unlike gen7/hsw, we use the parser only to block certain +registers. We can rely on h/w to block restricted commands, +so the command tables only provide enough info to allow the +parser to delineate each command, and identify commands that +access registers. + +Note: This patch deliberately ignores checkpatch issues in +favour of matching the style of the surrounding code. We'll +correct the entire file in one go in a later patch. + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Mika Kuoppala +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 116 ++++++++++++++++++++++--- + drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +- + drivers/gpu/drm/i915/i915_reg.h | 4 + + 3 files changed, 112 insertions(+), 11 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -442,6 +442,47 @@ static const struct drm_i915_cmd_descrip + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), + }; + ++/* ++ * For Gen9 we can still rely on the h/w to enforce cmd security, and only ++ * need to re-enforce the register access checks. We therefore only need to ++ * teach the cmdparser how to find the end of each command, and identify ++ * register accesses. The table doesn't need to reject any commands, and so ++ * the only commands listed here are: ++ * 1) Those that touch registers ++ * 2) Those that do not have the default 8-bit length ++ * ++ * Note that the default MI length mask chosen for this table is 0xFF, not ++ * the 0x3F used on older devices. This is because the vast majority of MI ++ * cmds on Gen9 use a standard 8-bit Length field. ++ * All the Gen9 blitter instructions are standard 0xFF length mask, and ++ * none allow access to non-general registers, so in fact no BLT cmds are ++ * included in the table at all. ++ * ++ */ ++static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { ++ CMD( MI_NOOP, SMI, F, 1, S ), ++ CMD( MI_USER_INTERRUPT, SMI, F, 1, S ), ++ CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ), ++ CMD( MI_FLUSH, SMI, F, 1, S ), ++ CMD( MI_ARB_CHECK, SMI, F, 1, S ), ++ CMD( MI_REPORT_HEAD, SMI, F, 1, S ), ++ CMD( MI_ARB_ON_OFF, SMI, F, 1, S ), ++ CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), ++ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ), ++ CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ), ++ CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), ++ CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, ++ .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), ++ CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ), ++ CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W, ++ .reg = { .offset = 1, .mask = 0x007FFFFC } ), ++ CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ), ++ CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W, ++ .reg = { .offset = 1, .mask = 0x007FFFFC } ), ++ CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, ++ .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), ++}; ++ + static const struct drm_i915_cmd_descriptor noop_desc = + CMD(MI_NOOP, SMI, F, 1, S); + +@@ -488,6 +529,11 @@ static const struct drm_i915_cmd_table h + { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, + }; + ++static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = { ++ { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) }, ++}; ++ ++ + /* + * Register whitelists, sorted by increasing register offset. + */ +@@ -603,6 +649,29 @@ static const struct drm_i915_reg_descrip + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + }; + ++static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { ++ REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), ++ REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), ++ REG32(BCS_SWCTRL), ++ REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), ++ REG64_IDX(BCS_GPR, 0), ++ REG64_IDX(BCS_GPR, 1), ++ REG64_IDX(BCS_GPR, 2), ++ REG64_IDX(BCS_GPR, 3), ++ REG64_IDX(BCS_GPR, 4), ++ REG64_IDX(BCS_GPR, 5), ++ REG64_IDX(BCS_GPR, 6), ++ REG64_IDX(BCS_GPR, 7), ++ REG64_IDX(BCS_GPR, 8), ++ REG64_IDX(BCS_GPR, 9), ++ REG64_IDX(BCS_GPR, 10), ++ REG64_IDX(BCS_GPR, 11), ++ REG64_IDX(BCS_GPR, 12), ++ REG64_IDX(BCS_GPR, 13), ++ REG64_IDX(BCS_GPR, 14), ++ REG64_IDX(BCS_GPR, 15), ++}; ++ + #undef REG64 + #undef REG32 + +@@ -628,6 +697,10 @@ static const struct drm_i915_reg_table h + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, + }; + ++static const struct drm_i915_reg_table gen9_blt_reg_tables[] = { ++ { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) }, ++}; ++ + static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) + { + u32 client = cmd_header >> INSTR_CLIENT_SHIFT; +@@ -683,6 +756,17 @@ static u32 gen7_blt_get_cmd_length_mask( + return 0; + } + ++static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header) ++{ ++ u32 client = cmd_header >> INSTR_CLIENT_SHIFT; ++ ++ if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT) ++ return 0xFF; ++ ++ DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); ++ return 0; ++} ++ + static bool validate_cmds_sorted(const struct intel_engine_cs *engine, + const struct drm_i915_cmd_table *cmd_tables, + int cmd_table_count) +@@ -840,7 +924,8 @@ void intel_engine_init_cmd_parser(struct + int cmd_table_count; + int ret; + +- if (!IS_GEN7(engine->i915)) ++ if (!IS_GEN7(engine->i915) && !(IS_GEN9(engine->i915) && ++ engine->id == BCS)) + return; + + switch (engine->id) { +@@ -861,7 +946,6 @@ void intel_engine_init_cmd_parser(struct + engine->reg_tables = ivb_render_reg_tables; + engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables); + } +- + engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; + break; + case VCS: +@@ -870,7 +954,16 @@ void intel_engine_init_cmd_parser(struct + engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; + break; + case BCS: +- if (IS_HASWELL(engine->i915)) { ++ engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; ++ if (IS_GEN9(engine->i915)) { ++ cmd_tables = gen9_blt_cmd_table; ++ cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); ++ engine->get_cmd_length_mask = ++ gen9_blt_get_cmd_length_mask; ++ ++ /* BCS Engine unsafe without parser */ ++ engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER; ++ } else if (IS_HASWELL(engine->i915)) { + cmd_tables = hsw_blt_ring_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); + } else { +@@ -878,15 +971,17 @@ void intel_engine_init_cmd_parser(struct + cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); + } + +- if (IS_HASWELL(engine->i915)) { ++ if (IS_GEN9(engine->i915)) { ++ engine->reg_tables = gen9_blt_reg_tables; ++ engine->reg_table_count = ++ ARRAY_SIZE(gen9_blt_reg_tables); ++ } else if (IS_HASWELL(engine->i915)) { + engine->reg_tables = hsw_blt_reg_tables; + engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables); + } else { + engine->reg_tables = ivb_blt_reg_tables; + engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables); + } +- +- engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + break; + case VECS: + cmd_tables = hsw_vebox_cmd_table; +@@ -1260,9 +1355,9 @@ int intel_engine_cmd_parser(struct intel + } + + /* +- * If the batch buffer contains a chained batch, return an +- * error that tells the caller to abort and dispatch the +- * workload as a non-secure batch. ++ * We don't try to handle BATCH_BUFFER_START because it adds ++ * non-trivial complexity. Instead we abort the scan and return ++ * and error to indicate that the batch is unsafe. + */ + if (desc->cmd.value == MI_BATCH_BUFFER_START) { + ret = -EACCES; +@@ -1342,6 +1437,7 @@ int i915_cmd_parser_get_version(struct d + * the parser enabled. + * 9. Don't whitelist or handle oacontrol specially, as ownership + * for oacontrol state is moving to i915-perf. ++ * 10. Support for Gen9 BCS Parsing + */ +- return 9; ++ return 10; + } +--- a/drivers/gpu/drm/i915/i915_gem_gtt.c ++++ b/drivers/gpu/drm/i915/i915_gem_gtt.c +@@ -158,7 +158,8 @@ int intel_sanitize_enable_ppgtt(struct d + if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) + return 0; + +- if (enable_ppgtt == 1) ++ /* Full PPGTT is required by the Gen9 cmdparser */ ++ if (enable_ppgtt == 1 && INTEL_GEN(dev_priv) != 9) + return 1; + + if (enable_ppgtt == 2 && has_full_ppgtt) +--- a/drivers/gpu/drm/i915/i915_reg.h ++++ b/drivers/gpu/drm/i915/i915_reg.h +@@ -471,6 +471,10 @@ static inline bool i915_mmio_reg_valid(i + */ + #define BCS_SWCTRL _MMIO(0x22200) + ++/* There are 16 GPR registers */ ++#define BCS_GPR(n) _MMIO(0x22600 + (n) * 8) ++#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4) ++ + #define GPGPU_THREADS_DISPATCHED _MMIO(0x2290) + #define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4) + #define HS_INVOCATION_COUNT _MMIO(0x2300) diff --git a/debian/patches/bugfix/x86/i915/0008-drm-i915-cmdparser-Use-explicit-goto-for-error-paths.patch b/debian/patches/bugfix/x86/i915/0008-drm-i915-cmdparser-Use-explicit-goto-for-error-paths.patch new file mode 100644 index 000000000..9677432ba --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0008-drm-i915-cmdparser-Use-explicit-goto-for-error-paths.patch @@ -0,0 +1,94 @@ +From: Jon Bloomfield +Date: Thu, 27 Sep 2018 10:23:17 -0700 +Subject: drm/i915/cmdparser: Use explicit goto for error paths +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 0546a29cd884fb8184731c79ab008927ca8859d0 upstream. + +In the next patch we will be adding a second valid +termination condition which will require a small +amount of refactoring to share logic with the BB_END +case. + +Refactor all error conditions to jump to a dedicated +exit path, with 'break' reserved only for a successful +parse. + +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Jon Bloomfield +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 25 +++++++++++++------------ + 1 file changed, 13 insertions(+), 12 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -1337,21 +1337,15 @@ int intel_engine_cmd_parser(struct intel + do { + u32 length; + +- if (*cmd == MI_BATCH_BUFFER_END) { +- if (needs_clflush_after) { +- void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); +- drm_clflush_virt_range(ptr, +- (void *)(cmd + 1) - ptr); +- } ++ if (*cmd == MI_BATCH_BUFFER_END) + break; +- } + + desc = find_cmd(engine, *cmd, desc, &default_desc); + if (!desc) { + DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", + *cmd); + ret = -EINVAL; +- break; ++ goto err; + } + + /* +@@ -1361,7 +1355,7 @@ int intel_engine_cmd_parser(struct intel + */ + if (desc->cmd.value == MI_BATCH_BUFFER_START) { + ret = -EACCES; +- break; ++ goto err; + } + + if (desc->flags & CMD_DESC_FIXED) +@@ -1375,22 +1369,29 @@ int intel_engine_cmd_parser(struct intel + length, + batch_end - cmd); + ret = -EINVAL; +- break; ++ goto err; + } + + if (!check_cmd(engine, desc, cmd, length)) { + ret = -EACCES; +- break; ++ goto err; + } + + cmd += length; + if (cmd >= batch_end) { + DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); + ret = -EINVAL; +- break; ++ goto err; + } + } while (1); + ++ if (needs_clflush_after) { ++ void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); ++ ++ drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); ++ } ++ ++err: + i915_gem_object_unpin_map(shadow_batch_obj); + return ret; + } diff --git a/debian/patches/bugfix/x86/i915/0009-drm-i915-cmdparser-Add-support-for-backward-jumps.patch b/debian/patches/bugfix/x86/i915/0009-drm-i915-cmdparser-Add-support-for-backward-jumps.patch new file mode 100644 index 000000000..ac91adbd2 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0009-drm-i915-cmdparser-Add-support-for-backward-jumps.patch @@ -0,0 +1,404 @@ +From: Jon Bloomfield +Date: Thu, 20 Sep 2018 09:58:36 -0700 +Subject: drm/i915/cmdparser: Add support for backward jumps +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit f8c08d8faee5567803c8c533865296ca30286bbf upstream. + +To keep things manageable, the pre-gen9 cmdparser does not +attempt to track any form of nested BB_START's. This did not +prevent usermode from using nested starts, or even chained +batches because the cmdparser is not strictly enforced pre gen9. + +Instead, the existence of a nested BB_START would cause the batch +to be emitted in insecure mode, and any privileged capabilities +would not be available. + +For Gen9, the cmdparser becomes mandatory (for BCS at least), and +so not providing any form of nested BB_START support becomes +overly restrictive. Any such batch will simply not run. + +We make heavy use of backward jumps in igt, and it is much easier +to add support for this restricted subset of nested jumps, than to +rewrite the whole of our test suite to avoid them. + +Add the required logic to support limited backward jumps, to +instructions that have already been validated by the parser. + +Note that it's not sufficient to simply approve any BB_START +that jumps backwards in the buffer because this would allow an +attacker to embed a rogue instruction sequence within the +operand words of a harmless instruction (say LRI) and jump to +that. + +We introduce a bit array to track every instr offset successfully +validated, and test the target of BB_START against this. If the +target offset hits, it is re-written to the same offset in the +shadow buffer and the BB_START cmd is allowed. + +Note: This patch deliberately ignores checkpatch issues in the +cmdtables, in order to match the style of the surrounding code. +We'll correct the entire file in one go in a later patch. + +v2: set dispatch secure late (Mika) +v3: rebase (Mika) +v4: Clear whitelist on each parse +Minor review updates (Chris) +v5: Correct backward jump batching +v6: fix compilation error due to struct eb shuffle (Mika) + +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Jon Bloomfield +Signed-off-by: Mika Kuoppala +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 151 +++++++++++++++++++-- + drivers/gpu/drm/i915/i915_drv.h | 9 +- + drivers/gpu/drm/i915/i915_gem_context.c | 5 + + drivers/gpu/drm/i915/i915_gem_context.h | 6 + + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 34 +++-- + 5 files changed, 179 insertions(+), 26 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -481,6 +481,19 @@ static const struct drm_i915_cmd_descrip + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), ++ ++ /* ++ * We allow BB_START but apply further checks. We just sanitize the ++ * basic fields here. ++ */ ++#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0) ++#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1) ++ CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B, ++ .bits = {{ ++ .offset = 0, ++ .mask = MI_BB_START_OPERAND_MASK, ++ .expected = MI_BB_START_OPERAND_EXPECT, ++ }}, ), + }; + + static const struct drm_i915_cmd_descriptor noop_desc = +@@ -1292,15 +1305,113 @@ static bool check_cmd(const struct intel + return true; + } + ++static int check_bbstart(const struct i915_gem_context *ctx, ++ u32 *cmd, u32 offset, u32 length, ++ u32 batch_len, ++ u64 batch_start, ++ u64 shadow_batch_start) ++{ ++ u64 jump_offset, jump_target; ++ u32 target_cmd_offset, target_cmd_index; ++ ++ /* For igt compatibility on older platforms */ ++ if (CMDPARSER_USES_GGTT(ctx->i915)) { ++ DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); ++ return -EACCES; ++ } ++ ++ if (length != 3) { ++ DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n", ++ length); ++ return -EINVAL; ++ } ++ ++ jump_target = *(u64*)(cmd+1); ++ jump_offset = jump_target - batch_start; ++ ++ /* ++ * Any underflow of jump_target is guaranteed to be outside the range ++ * of a u32, so >= test catches both too large and too small ++ */ ++ if (jump_offset >= batch_len) { ++ DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n", ++ jump_target); ++ return -EINVAL; ++ } ++ ++ /* ++ * This cannot overflow a u32 because we already checked jump_offset ++ * is within the BB, and the batch_len is a u32 ++ */ ++ target_cmd_offset = lower_32_bits(jump_offset); ++ target_cmd_index = target_cmd_offset / sizeof(u32); ++ ++ *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset; ++ ++ if (target_cmd_index == offset) ++ return 0; ++ ++ if (ctx->jump_whitelist_cmds <= target_cmd_index) { ++ DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n"); ++ return -EINVAL; ++ } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) { ++ DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", ++ jump_target); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len) ++{ ++ const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32)); ++ const u32 exact_size = BITS_TO_LONGS(batch_cmds); ++ u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds)); ++ unsigned long *next_whitelist; ++ ++ if (CMDPARSER_USES_GGTT(ctx->i915)) ++ return; ++ ++ if (batch_cmds <= ctx->jump_whitelist_cmds) { ++ memset(ctx->jump_whitelist, 0, exact_size * sizeof(u32)); ++ return; ++ } ++ ++again: ++ next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL); ++ if (next_whitelist) { ++ kfree(ctx->jump_whitelist); ++ ctx->jump_whitelist = next_whitelist; ++ ctx->jump_whitelist_cmds = ++ next_size * BITS_PER_BYTE * sizeof(long); ++ return; ++ } ++ ++ if (next_size > exact_size) { ++ next_size = exact_size; ++ goto again; ++ } ++ ++ DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); ++ memset(ctx->jump_whitelist, 0, ++ BITS_TO_LONGS(ctx->jump_whitelist_cmds) * sizeof(u32)); ++ ++ return; ++} ++ + #define LENGTH_BIAS 2 + + /** + * i915_parse_cmds() - parse a submitted batch buffer for privilege violations ++ * @ctx: the context in which the batch is to execute + * @engine: the engine on which the batch is to execute + * @batch_obj: the batch buffer in question +- * @shadow_batch_obj: copy of the batch buffer in question ++ * @batch_start: Canonical base address of batch + * @batch_start_offset: byte offset in the batch at which execution starts + * @batch_len: length of the commands in batch_obj ++ * @shadow_batch_obj: copy of the batch buffer in question ++ * @shadow_batch_start: Canonical base address of shadow_batch_obj + * + * Parses the specified batch buffer looking for privilege violations as + * described in the overview. +@@ -1308,13 +1419,17 @@ static bool check_cmd(const struct intel + * Return: non-zero if the parser finds violations or otherwise fails; -EACCES + * if the batch appears legal but should use hardware parsing + */ +-int intel_engine_cmd_parser(struct intel_engine_cs *engine, ++ ++int intel_engine_cmd_parser(struct i915_gem_context *ctx, ++ struct intel_engine_cs *engine, + struct drm_i915_gem_object *batch_obj, +- struct drm_i915_gem_object *shadow_batch_obj, ++ u64 batch_start, + u32 batch_start_offset, +- u32 batch_len) ++ u32 batch_len, ++ struct drm_i915_gem_object *shadow_batch_obj, ++ u64 shadow_batch_start) + { +- u32 *cmd, *batch_end; ++ u32 *cmd, *batch_end, offset = 0; + struct drm_i915_cmd_descriptor default_desc = noop_desc; + const struct drm_i915_cmd_descriptor *desc = &default_desc; + bool needs_clflush_after = false; +@@ -1328,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel + return PTR_ERR(cmd); + } + ++ init_whitelist(ctx, batch_len); ++ + /* + * We use the batch length as size because the shadow object is as + * large or larger and copy_batch() will write MI_NOPs to the extra +@@ -1348,16 +1465,6 @@ int intel_engine_cmd_parser(struct intel + goto err; + } + +- /* +- * We don't try to handle BATCH_BUFFER_START because it adds +- * non-trivial complexity. Instead we abort the scan and return +- * and error to indicate that the batch is unsafe. +- */ +- if (desc->cmd.value == MI_BATCH_BUFFER_START) { +- ret = -EACCES; +- goto err; +- } +- + if (desc->flags & CMD_DESC_FIXED) + length = desc->length.fixed; + else +@@ -1377,7 +1484,21 @@ int intel_engine_cmd_parser(struct intel + goto err; + } + ++ if (desc->cmd.value == MI_BATCH_BUFFER_START) { ++ ret = check_bbstart(ctx, cmd, offset, length, ++ batch_len, batch_start, ++ shadow_batch_start); ++ ++ if (ret) ++ goto err; ++ break; ++ } ++ ++ if (ctx->jump_whitelist_cmds > offset) ++ set_bit(offset, ctx->jump_whitelist); ++ + cmd += length; ++ offset += length; + if (cmd >= batch_end) { + DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); + ret = -EINVAL; +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -3353,11 +3353,14 @@ const char *i915_cache_level_str(struct + int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); + void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); + void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); +-int intel_engine_cmd_parser(struct intel_engine_cs *engine, ++int intel_engine_cmd_parser(struct i915_gem_context *cxt, ++ struct intel_engine_cs *engine, + struct drm_i915_gem_object *batch_obj, +- struct drm_i915_gem_object *shadow_batch_obj, ++ u64 user_batch_start, + u32 batch_start_offset, +- u32 batch_len); ++ u32 batch_len, ++ struct drm_i915_gem_object *shadow_batch_obj, ++ u64 shadow_batch_start); + + /* i915_perf.c */ + extern void i915_perf_init(struct drm_i915_private *dev_priv); +--- a/drivers/gpu/drm/i915/i915_gem_context.c ++++ b/drivers/gpu/drm/i915/i915_gem_context.c +@@ -124,6 +124,8 @@ static void i915_gem_context_free(struct + + i915_ppgtt_put(ctx->ppgtt); + ++ kfree(ctx->jump_whitelist); ++ + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { + struct intel_context *ce = &ctx->__engine[n]; + +@@ -339,6 +341,9 @@ __create_hw_context(struct drm_i915_priv + else + ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE; + ++ ctx->jump_whitelist = NULL; ++ ctx->jump_whitelist_cmds = 0; ++ + return ctx; + + err_pid: +--- a/drivers/gpu/drm/i915/i915_gem_context.h ++++ b/drivers/gpu/drm/i915/i915_gem_context.h +@@ -183,6 +183,12 @@ struct i915_gem_context { + /** remap_slice: Bitmask of cache lines that need remapping */ + u8 remap_slice; + ++ /** jump_whitelist: Bit array for tracking cmds during cmdparsing */ ++ unsigned long *jump_whitelist; ++ ++ /** jump_whitelist_cmds: No of cmd slots available */ ++ u32 jump_whitelist_cmds; ++ + /** handles_vma: rbtree to look up our context specific obj/vma for + * the user handle. (user handles are per fd, but the binding is + * per vm, which may be one per context or shared with the global GTT) +--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c +@@ -1909,7 +1909,6 @@ shadow_batch_pin(struct i915_execbuffer + if (CMDPARSER_USES_GGTT(dev_priv)) { + flags = PIN_GLOBAL; + vm = &dev_priv->ggtt.vm; +- eb->batch_flags |= I915_DISPATCH_SECURE; + } else if (eb->vm->has_read_only) { + flags = PIN_USER; + vm = eb->vm; +@@ -1926,6 +1925,8 @@ static struct i915_vma *eb_parse(struct + { + struct drm_i915_gem_object *shadow_batch_obj; + struct i915_vma *vma; ++ u64 batch_start; ++ u64 shadow_batch_start; + int err; + + shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, +@@ -1933,12 +1934,27 @@ static struct i915_vma *eb_parse(struct + if (IS_ERR(shadow_batch_obj)) + return ERR_CAST(shadow_batch_obj); + +- err = intel_engine_cmd_parser(eb->engine, ++ vma = shadow_batch_pin(eb, shadow_batch_obj); ++ if (IS_ERR(vma)) ++ goto out; ++ ++ batch_start = gen8_canonical_addr(eb->batch->node.start) + ++ eb->batch_start_offset; ++ ++ shadow_batch_start = gen8_canonical_addr(vma->node.start); ++ ++ err = intel_engine_cmd_parser(eb->ctx, ++ eb->engine, + eb->batch->obj, +- shadow_batch_obj, ++ batch_start, + eb->batch_start_offset, +- eb->batch_len); ++ eb->batch_len, ++ shadow_batch_obj, ++ shadow_batch_start); ++ + if (err) { ++ i915_vma_unpin(vma); ++ + /* + * Unsafe GGTT-backed buffers can still be submitted safely + * as non-secure. +@@ -1950,12 +1966,9 @@ static struct i915_vma *eb_parse(struct + vma = NULL; + else + vma = ERR_PTR(err); +- goto out; +- } + +- vma = shadow_batch_pin(eb, shadow_batch_obj); +- if (IS_ERR(vma)) + goto out; ++ } + + eb->vma[eb->buffer_count] = i915_vma_get(vma); + eb->flags[eb->buffer_count] = +@@ -1964,7 +1977,12 @@ static struct i915_vma *eb_parse(struct + eb->buffer_count++; + eb->batch_start_offset = 0; + eb->batch = vma; ++ + /* eb->batch_len unchanged */ ++ ++ if (CMDPARSER_USES_GGTT(eb->i915)) ++ eb->batch_flags |= I915_DISPATCH_SECURE; ++ + out: + i915_gem_object_unpin_pages(shadow_batch_obj); + return vma; diff --git a/debian/patches/bugfix/x86/i915/0010-drm-i915-cmdparser-Ignore-Length-operands-during-com.patch b/debian/patches/bugfix/x86/i915/0010-drm-i915-cmdparser-Ignore-Length-operands-during-com.patch new file mode 100644 index 000000000..615e994b5 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0010-drm-i915-cmdparser-Ignore-Length-operands-during-com.patch @@ -0,0 +1,37 @@ +From: Jon Bloomfield +Date: Thu, 20 Sep 2018 09:45:10 -0700 +Subject: drm/i915/cmdparser: Ignore Length operands during command matching +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 926abff21a8f29ef159a3ac893b05c6e50e043c3 upstream. + +Some of the gen instruction macros (e.g. MI_DISPLAY_FLIP) have the +length directly encoded in them. Since these are used directly in +the tables, the Length becomes part of the comparison used for +matching during parsing. Thus, if the cmd being parsed has a +different length to that in the table, it is not matched and the +cmd is accepted via the default variable length path. + +Fix by masking out everything except the Opcode in the cmd tables + +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Jon Bloomfield +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -187,7 +187,7 @@ struct drm_i915_cmd_table { + #define CMD(op, opm, f, lm, fl, ...) \ + { \ + .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ +- .cmd = { (op), ~0u << (opm) }, \ ++ .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \ + .length = { (lm) }, \ + __VA_ARGS__ \ + } diff --git a/debian/patches/bugfix/x86/i915/0011-drm-i915-Lower-RM-timeout-to-avoid-DSI-hard-hangs.patch b/debian/patches/bugfix/x86/i915/0011-drm-i915-Lower-RM-timeout-to-avoid-DSI-hard-hangs.patch new file mode 100644 index 000000000..84acd8734 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0011-drm-i915-Lower-RM-timeout-to-avoid-DSI-hard-hangs.patch @@ -0,0 +1,72 @@ +From: Uma Shankar +Date: Tue, 7 Aug 2018 21:15:35 +0530 +Subject: drm/i915: Lower RM timeout to avoid DSI hard hangs +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0154 + +commit 1d85a299c4db57c55e0229615132c964d17aa765 upstream. + +In BXT/APL, device 2 MMIO reads from MIPI controller requires its PLL +to be turned ON. When MIPI PLL is turned off (MIPI Display is not +active or connected), and someone (host or GT engine) tries to read +MIPI registers, it causes hard hang. This is a hardware restriction +or limitation. + +Driver by itself doesn't read MIPI registers when MIPI display is off. +But any userspace application can submit unprivileged batch buffer for +execution. In that batch buffer there can be mmio reads. And these +reads are allowed even for unprivileged applications. If these +register reads are for MIPI DSI controller and MIPI display is not +active during that time, then the MMIO read operation causes system +hard hang and only way to recover is hard reboot. A genuine +process/application won't submit batch buffer like this and doesn't +cause any issue. But on a compromised system, a malign userspace +process/app can generate such batch buffer and can trigger system +hard hang (denial of service attack). + +The fix is to lower the internal MMIO timeout value to an optimum +value of 950us as recommended by hardware team. If the timeout is +beyond 1ms (which will hit for any value we choose if MMIO READ on a +DSI specific register is performed without PLL ON), it causes the +system hang. But if the timeout value is lower than it will be below +the threshold (even if timeout happens) and system will not get into +a hung state. This will avoid a system hang without losing any +programming or GT interrupts, taking the worst case of lowest CDCLK +frequency and early DC5 abort into account. + +Signed-off-by: Uma Shankar +Reviewed-by: Jon Bloomfield +--- + drivers/gpu/drm/i915/i915_reg.h | 4 ++++ + drivers/gpu/drm/i915/intel_pm.c | 8 ++++++++ + 2 files changed, 12 insertions(+) + +--- a/drivers/gpu/drm/i915/i915_reg.h ++++ b/drivers/gpu/drm/i915/i915_reg.h +@@ -7009,6 +7009,10 @@ enum { + #define SKL_CSR_DC5_DC6_COUNT _MMIO(0x8002C) + #define BXT_CSR_DC3_DC5_COUNT _MMIO(0x80038) + ++/* Display Internal Timeout Register */ ++#define RM_TIMEOUT _MMIO(0x42060) ++#define MMIO_TIMEOUT_US(us) ((us) << 0) ++ + /* interrupts */ + #define DE_MASTER_IRQ_CONTROL (1 << 31) + #define DE_SPRITEB_FLIP_DONE (1 << 29) +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -114,6 +114,14 @@ static void bxt_init_clock_gating(struct + */ + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + PWM1_GATING_DIS | PWM2_GATING_DIS); ++ ++ /* ++ * Lower the display internal timeout. ++ * This is needed to avoid any hard hangs when DSI port PLL ++ * is off and a MMIO access is attempted by any privilege ++ * application, using batch buffers or any other means. ++ */ ++ I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950)); + } + + static void glk_init_clock_gating(struct drm_i915_private *dev_priv) diff --git a/debian/patches/bugfix/x86/i915/0012-drm-i915-gen8-Add-RC6-CTX-corruption-WA.patch b/debian/patches/bugfix/x86/i915/0012-drm-i915-gen8-Add-RC6-CTX-corruption-WA.patch new file mode 100644 index 000000000..e555b72d0 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0012-drm-i915-gen8-Add-RC6-CTX-corruption-WA.patch @@ -0,0 +1,282 @@ +From: Imre Deak +Date: Mon, 9 Jul 2018 18:24:27 +0300 +Subject: drm/i915/gen8+: Add RC6 CTX corruption WA +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0154 + +commit 7e34f4e4aad3fd34c02b294a3cf2321adf5b4438 upstream. + +In some circumstances the RC6 context can get corrupted. We can detect +this and take the required action, that is disable RC6 and runtime PM. +The HW recovers from the corrupted state after a system suspend/resume +cycle, so detect the recovery and re-enable RC6 and runtime PM. + +v2: rebase (Mika) +v3: +- Move intel_suspend_gt_powersave() to the end of the GEM suspend + sequence. +- Add commit message. +v4: +- Rebased on intel_uncore_forcewake_put(i915->uncore, ...) API + change. +v5: rebased on gem/gt split (Mika) + +Signed-off-by: Imre Deak +Signed-off-by: Mika Kuoppala +--- + drivers/gpu/drm/i915/i915_drv.c | 3 + + drivers/gpu/drm/i915/i915_drv.h | 7 +- + drivers/gpu/drm/i915/i915_gem.c | 8 +++ + drivers/gpu/drm/i915/i915_reg.h | 2 + + drivers/gpu/drm/i915/intel_drv.h | 3 + + drivers/gpu/drm/i915/intel_pm.c | 107 ++++++++++++++++++++++++++++++- + 6 files changed, 126 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_drv.c ++++ b/drivers/gpu/drm/i915/i915_drv.c +@@ -1621,6 +1621,7 @@ static int i915_drm_suspend_late(struct + i915_gem_suspend_late(dev_priv); + + intel_display_set_init_power(dev_priv, false); ++ i915_rc6_ctx_wa_suspend(dev_priv); + intel_uncore_suspend(dev_priv); + + /* +@@ -1847,6 +1848,8 @@ static int i915_drm_resume_early(struct + else + intel_display_set_init_power(dev_priv, true); + ++ i915_rc6_ctx_wa_resume(dev_priv); ++ + intel_engines_sanitize(dev_priv); + + enable_rpm_wakeref_asserts(dev_priv); +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -801,6 +801,7 @@ struct intel_rps { + + struct intel_rc6 { + bool enabled; ++ bool ctx_corrupted; + u64 prev_hw_residency[4]; + u64 cur_residency[4]; + }; +@@ -2557,10 +2558,12 @@ intel_info(const struct drm_i915_private + /* Early gen2 have a totally busted CS tlb and require pinned batches. */ + #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) + ++#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \ ++ (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) == 9) ++ + /* WaRsDisableCoarsePowerGating:skl,cnl */ + #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ +- (IS_CANNONLAKE(dev_priv) || \ +- IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) ++ (IS_CANNONLAKE(dev_priv) || INTEL_GEN(dev_priv) == 9) + + #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4) + #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \ +--- a/drivers/gpu/drm/i915/i915_gem.c ++++ b/drivers/gpu/drm/i915/i915_gem.c +@@ -174,6 +174,11 @@ static u32 __i915_gem_park(struct drm_i9 + if (INTEL_GEN(i915) >= 6) + gen6_rps_idle(i915); + ++ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) { ++ i915_rc6_ctx_wa_check(i915); ++ intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); ++ } ++ + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); + + intel_runtime_pm_put(i915); +@@ -220,6 +225,9 @@ void i915_gem_unpark(struct drm_i915_pri + */ + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + ++ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) ++ intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); ++ + i915->gt.awake = true; + if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ + i915->gt.epoch = 1; +--- a/drivers/gpu/drm/i915/i915_reg.h ++++ b/drivers/gpu/drm/i915/i915_reg.h +@@ -387,6 +387,8 @@ static inline bool i915_mmio_reg_valid(i + #define ECOCHK_PPGTT_WT_HSW (0x2 << 3) + #define ECOCHK_PPGTT_WB_HSW (0x3 << 3) + ++#define GEN8_RC6_CTX_INFO _MMIO(0x8504) ++ + #define GAC_ECO_BITS _MMIO(0x14090) + #define ECOBITS_SNB_BIT (1 << 13) + #define ECOBITS_PPGTT_CACHE64B (3 << 8) +--- a/drivers/gpu/drm/i915/intel_drv.h ++++ b/drivers/gpu/drm/i915/intel_drv.h +@@ -2064,6 +2064,9 @@ void intel_sanitize_gt_powersave(struct + void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); + void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); + void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); ++bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915); ++void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915); ++void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915); + void gen6_rps_busy(struct drm_i915_private *dev_priv); + void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); + void gen6_rps_idle(struct drm_i915_private *dev_priv); +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -8196,6 +8196,95 @@ static void intel_init_emon(struct drm_i + dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); + } + ++static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv) ++{ ++ return !I915_READ(GEN8_RC6_CTX_INFO); ++} ++ ++static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915) ++{ ++ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) ++ return; ++ ++ if (i915_rc6_ctx_corrupted(i915)) { ++ DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); ++ i915->gt_pm.rc6.ctx_corrupted = true; ++ intel_runtime_pm_get(i915); ++ } ++} ++ ++static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915) ++{ ++ if (i915->gt_pm.rc6.ctx_corrupted) { ++ intel_runtime_pm_put(i915); ++ i915->gt_pm.rc6.ctx_corrupted = false; ++ } ++} ++ ++/** ++ * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA ++ * @i915: i915 device ++ * ++ * Perform any steps needed to clean up the RC6 CTX WA before system suspend. ++ */ ++void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915) ++{ ++ if (i915->gt_pm.rc6.ctx_corrupted) ++ intel_runtime_pm_put(i915); ++} ++ ++/** ++ * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA ++ * @i915: i915 device ++ * ++ * Perform any steps needed to re-init the RC6 CTX WA after system resume. ++ */ ++void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915) ++{ ++ if (!i915->gt_pm.rc6.ctx_corrupted) ++ return; ++ ++ if (i915_rc6_ctx_corrupted(i915)) { ++ intel_runtime_pm_get(i915); ++ return; ++ } ++ ++ DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); ++ i915->gt_pm.rc6.ctx_corrupted = false; ++} ++ ++static void intel_disable_rc6(struct drm_i915_private *dev_priv); ++ ++/** ++ * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption ++ * @i915: i915 device ++ * ++ * Check if an RC6 CTX corruption has happened since the last check and if so ++ * disable RC6 and runtime power management. ++ * ++ * Return false if no context corruption has happened since the last call of ++ * this function, true otherwise. ++*/ ++bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915) ++{ ++ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) ++ return false; ++ ++ if (i915->gt_pm.rc6.ctx_corrupted) ++ return false; ++ ++ if (!i915_rc6_ctx_corrupted(i915)) ++ return false; ++ ++ DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); ++ ++ intel_disable_rc6(i915); ++ i915->gt_pm.rc6.ctx_corrupted = true; ++ intel_runtime_pm_get_noresume(i915); ++ ++ return true; ++} ++ + void intel_init_gt_powersave(struct drm_i915_private *dev_priv) + { + struct intel_rps *rps = &dev_priv->gt_pm.rps; +@@ -8211,6 +8300,8 @@ void intel_init_gt_powersave(struct drm_ + + mutex_lock(&dev_priv->pcu_lock); + ++ i915_rc6_ctx_wa_init(dev_priv); ++ + /* Initialize RPS limits (for userspace) */ + if (IS_CHERRYVIEW(dev_priv)) + cherryview_init_gt_powersave(dev_priv); +@@ -8257,6 +8348,8 @@ void intel_cleanup_gt_powersave(struct d + if (IS_VALLEYVIEW(dev_priv)) + valleyview_cleanup_gt_powersave(dev_priv); + ++ i915_rc6_ctx_wa_cleanup(dev_priv); ++ + if (!HAS_RC6(dev_priv)) + intel_runtime_pm_put(dev_priv); + } +@@ -8301,7 +8394,7 @@ static inline void intel_disable_llc_pst + i915->gt_pm.llc_pstate.enabled = false; + } + +-static void intel_disable_rc6(struct drm_i915_private *dev_priv) ++static void __intel_disable_rc6(struct drm_i915_private *dev_priv) + { + lockdep_assert_held(&dev_priv->pcu_lock); + +@@ -8320,6 +8413,13 @@ static void intel_disable_rc6(struct drm + dev_priv->gt_pm.rc6.enabled = false; + } + ++static void intel_disable_rc6(struct drm_i915_private *dev_priv) ++{ ++ mutex_lock(&dev_priv->pcu_lock); ++ __intel_disable_rc6(dev_priv); ++ mutex_unlock(&dev_priv->pcu_lock); ++} ++ + static void intel_disable_rps(struct drm_i915_private *dev_priv) + { + lockdep_assert_held(&dev_priv->pcu_lock); +@@ -8345,7 +8445,7 @@ void intel_disable_gt_powersave(struct d + { + mutex_lock(&dev_priv->pcu_lock); + +- intel_disable_rc6(dev_priv); ++ __intel_disable_rc6(dev_priv); + intel_disable_rps(dev_priv); + if (HAS_LLC(dev_priv)) + intel_disable_llc_pstate(dev_priv); +@@ -8372,6 +8472,9 @@ static void intel_enable_rc6(struct drm_ + if (dev_priv->gt_pm.rc6.enabled) + return; + ++ if (dev_priv->gt_pm.rc6.ctx_corrupted) ++ return; ++ + if (IS_CHERRYVIEW(dev_priv)) + cherryview_enable_rc6(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) diff --git a/debian/patches/series b/debian/patches/series index 8ebe2b831..dcef9840b 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -284,6 +284,18 @@ bugfix/x86/itlb_multihit/0023-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow bugfix/x86/itlb_multihit/0024-kvm-mmu-ITLB_MULTIHIT-mitigation.patch bugfix/x86/itlb_multihit/0025-kvm-Add-helper-function-for-creating-VM-worker-threa.patch bugfix/x86/itlb_multihit/0026-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch +bugfix/x86/i915/0001-drm-i915-Rename-gen7-cmdparser-tables.patch +bugfix/x86/i915/0002-drm-i915-Disable-Secure-Batches-for-gen6.patch +bugfix/x86/i915/0003-drm-i915-Remove-Master-tables-from-cmdparser.patch +bugfix/x86/i915/0004-drm-i915-Add-support-for-mandatory-cmdparsing.patch +bugfix/x86/i915/0005-drm-i915-Support-ro-ppgtt-mapped-cmdparser-shadow-bu.patch +bugfix/x86/i915/0006-drm-i915-Allow-parsing-of-unsized-batches.patch +bugfix/x86/i915/0007-drm-i915-Add-gen9-BCS-cmdparsing.patch +bugfix/x86/i915/0008-drm-i915-cmdparser-Use-explicit-goto-for-error-paths.patch +bugfix/x86/i915/0009-drm-i915-cmdparser-Add-support-for-backward-jumps.patch +bugfix/x86/i915/0010-drm-i915-cmdparser-Ignore-Length-operands-during-com.patch +bugfix/x86/i915/0011-drm-i915-Lower-RM-timeout-to-avoid-DSI-hard-hangs.patch +bugfix/x86/i915/0012-drm-i915-gen8-Add-RC6-CTX-corruption-WA.patch # ABI maintenance debian/abi/powerpc-avoid-abi-change-for-disabling-tm.patch From 6d8b0092bbe65952c60421a7cd8907f91cfa154d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 10 Nov 2019 22:41:41 +0000 Subject: [PATCH 11/13] [x86] drm/i915/cmdparser: Fix jump whitelist clearing Fix a flaw I found in the mitigation for CVE-2019-0155. --- debian/changelog | 1 + ...mdparser-fix-jump-whitelist-clearing.patch | 44 +++++++++++++++++++ debian/patches/series | 1 + 3 files changed, 46 insertions(+) create mode 100644 debian/patches/bugfix/x86/i915/drm-i915-cmdparser-fix-jump-whitelist-clearing.patch diff --git a/debian/changelog b/debian/changelog index 21c508f0a..dc72468ff 100644 --- a/debian/changelog +++ b/debian/changelog @@ -43,6 +43,7 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium - drm/i915/cmdparser: Use explicit goto for error paths - drm/i915/cmdparser: Add support for backward jumps - drm/i915/cmdparser: Ignore Length operands during command matching + - drm/i915/cmdparser: Fix jump whitelist clearing * [x86] i915: Mitigate local denial-of-service on gen8/gen9 (CVE-2019-0154): - drm/i915: Lower RM timeout to avoid DSI hard hangs - drm/i915/gen8+: Add RC6 CTX corruption WA diff --git a/debian/patches/bugfix/x86/i915/drm-i915-cmdparser-fix-jump-whitelist-clearing.patch b/debian/patches/bugfix/x86/i915/drm-i915-cmdparser-fix-jump-whitelist-clearing.patch new file mode 100644 index 000000000..210c58c19 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/drm-i915-cmdparser-fix-jump-whitelist-clearing.patch @@ -0,0 +1,44 @@ +From: Ben Hutchings +Date: Sun, 10 Nov 2019 22:08:12 +0000 +Subject: drm/i915/cmdparser: Fix jump whitelist clearing +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +When a jump_whitelist bitmap is reused, it needs to be cleared. +Currently this is done with memset() and the size calculation assumes +bitmaps are made of 32-bit words, not longs. So on 64-bit +architectures, only the first half of the bitmap is cleared. + +If some whitelist bits are carried over between successive batches +submitted on the same context, this will presumably allow embedding +the rogue instructions that we're trying to reject. + +Use bitmap_zero() instead, which gets the calculation right. + +Fixes: f8c08d8faee5 ("drm/i915/cmdparser: Add support for backward jumps") +Cc: stable@vger.kernel.org +Signed-off-by: Ben Hutchings +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -1374,7 +1374,7 @@ static void init_whitelist(struct i915_g + return; + + if (batch_cmds <= ctx->jump_whitelist_cmds) { +- memset(ctx->jump_whitelist, 0, exact_size * sizeof(u32)); ++ bitmap_zero(ctx->jump_whitelist, batch_cmds); + return; + } + +@@ -1394,8 +1394,7 @@ again: + } + + DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); +- memset(ctx->jump_whitelist, 0, +- BITS_TO_LONGS(ctx->jump_whitelist_cmds) * sizeof(u32)); ++ bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds); + + return; + } diff --git a/debian/patches/series b/debian/patches/series index dcef9840b..9cf67909f 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -296,6 +296,7 @@ bugfix/x86/i915/0009-drm-i915-cmdparser-Add-support-for-backward-jumps.patch bugfix/x86/i915/0010-drm-i915-cmdparser-Ignore-Length-operands-during-com.patch bugfix/x86/i915/0011-drm-i915-Lower-RM-timeout-to-avoid-DSI-hard-hangs.patch bugfix/x86/i915/0012-drm-i915-gen8-Add-RC6-CTX-corruption-WA.patch +bugfix/x86/i915/drm-i915-cmdparser-fix-jump-whitelist-clearing.patch # ABI maintenance debian/abi/powerpc-avoid-abi-change-for-disabling-tm.patch From 9a2df80e9dec8b79b1c10798c85bf6dec6963938 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 11 Nov 2019 00:29:38 +0000 Subject: [PATCH 12/13] Drop "x86/cpu: Add Tremont to the cpu vulnerability whitelist" We don't have this CPU ID, and I don't see the point in adding it right now. --- debian/changelog | 1 - ...ont-to-the-cpu-vulnerability-whiteli.patch | 30 ------------------- debian/patches/series | 1 - 3 files changed, 32 deletions(-) delete mode 100644 debian/patches/bugfix/x86/itlb_multihit/0012-x86-cpu-Add-Tremont-to-the-cpu-vulnerability-whiteli.patch diff --git a/debian/changelog b/debian/changelog index dc72468ff..800b4ec98 100644 --- a/debian/changelog +++ b/debian/changelog @@ -27,7 +27,6 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium - KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active - x86/bugs: Add ITLB_MULTIHIT bug infrastructure - cpu/speculation: Uninline and export CPU mitigations helpers - - x86/cpu: Add Tremont to the cpu vulnerability whitelist - kvm: mmu: ITLB_MULTIHIT mitigation - kvm: Add helper function for creating VM worker threads - kvm: x86: mmu: Recovery of shattered NX large pages diff --git a/debian/patches/bugfix/x86/itlb_multihit/0012-x86-cpu-Add-Tremont-to-the-cpu-vulnerability-whiteli.patch b/debian/patches/bugfix/x86/itlb_multihit/0012-x86-cpu-Add-Tremont-to-the-cpu-vulnerability-whiteli.patch deleted file mode 100644 index f9237aadb..000000000 --- a/debian/patches/bugfix/x86/itlb_multihit/0012-x86-cpu-Add-Tremont-to-the-cpu-vulnerability-whiteli.patch +++ /dev/null @@ -1,30 +0,0 @@ -From: Pawan Gupta -Date: Mon, 4 Nov 2019 12:22:01 +0100 -Subject: x86/cpu: Add Tremont to the cpu vulnerability whitelist - -commit cad14885a8d32c1c0d8eaa7bf5c0152a22b6080e upstream - -Add the new cpu family ATOM_TREMONT_D to the cpu vunerability -whitelist. ATOM_TREMONT_D is not affected by X86_BUG_ITLB_MULTIHIT. - -ATOM_TREMONT_D might have mitigations against other issues as well, but -only the ITLB multihit mitigation is confirmed at this point. - -Signed-off-by: Pawan Gupta -Signed-off-by: Paolo Bonzini -Signed-off-by: Thomas Gleixner ---- - arch/x86/kernel/cpu/common.c | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -1000,6 +1000,8 @@ static const __initconst struct x86_cpu_ - * good enough for our purposes. - */ - -+ VULNWL_INTEL(ATOM_TREMONT_X, NO_ITLB_MULTIHIT), -+ - /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), diff --git a/debian/patches/series b/debian/patches/series index 9cf67909f..27ee5a48f 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -270,7 +270,6 @@ bugfix/x86/taa/0009-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch bugfix/x86/taa/0010-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch bugfix/x86/taa/0015-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch bugfix/x86/itlb_multihit/0011-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch -bugfix/x86/itlb_multihit/0012-x86-cpu-Add-Tremont-to-the-cpu-vulnerability-whiteli.patch bugfix/x86/itlb_multihit/0013-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch bugfix/x86/itlb_multihit/0014-Documentation-Add-ITLB_MULTIHIT-documentation.patch bugfix/x86/itlb_multihit/0016-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch From c3649501d025688c60fe60694d04d77b44f14b9e Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 11 Nov 2019 00:30:56 +0000 Subject: [PATCH 13/13] Prepare to release linux (4.19.67-2+deb10u2). --- debian/changelog | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index 800b4ec98..fa814a3a3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium +linux (4.19.67-2+deb10u2) buster-security; urgency=high * [x86] Add mitigation for TSX Asynchronous Abort (CVE-2019-11135): - KVM: x86: use Intel speculation bugs and features as derived in generic @@ -47,7 +47,7 @@ linux (4.19.67-2+deb10u2) UNRELEASED; urgency=medium - drm/i915: Lower RM timeout to avoid DSI hard hangs - drm/i915/gen8+: Add RC6 CTX corruption WA - -- Ben Hutchings Sun, 20 Oct 2019 14:21:28 +0100 + -- Ben Hutchings Mon, 11 Nov 2019 00:30:56 +0000 linux (4.19.67-2+deb10u1) buster-security; urgency=high