x86/mm: Add barriers and document switch_mm()-vs-flush synchronization (CVE-2016-2069)
Plus a follow-up fix to the comments.
This commit is contained in:
parent
985fe43552
commit
20ed8bdbac
|
@ -54,6 +54,9 @@ linux (4.3.4-1) UNRELEASED; urgency=medium
|
||||||
[ Ben Hutchings ]
|
[ Ben Hutchings ]
|
||||||
* fuse: break infinite loop in fuse_fill_write_pages() (CVE-2015-8785)
|
* fuse: break infinite loop in fuse_fill_write_pages() (CVE-2015-8785)
|
||||||
* SCSI: fix crashes in sd and sr runtime PM (Closes: #801925)
|
* SCSI: fix crashes in sd and sr runtime PM (Closes: #801925)
|
||||||
|
* [x86] mm: Add barriers and document switch_mm()-vs-flush synchronization
|
||||||
|
(CVE-2016-2069)
|
||||||
|
* [x86] mm: Improve switch_mm() barrier comments
|
||||||
|
|
||||||
[ Salvatore Bonaccorso ]
|
[ Salvatore Bonaccorso ]
|
||||||
* tcp: fix zero cwnd in tcp_cwnd_reduction (CVE-2016-2070)
|
* tcp: fix zero cwnd in tcp_cwnd_reduction (CVE-2016-2070)
|
||||||
|
|
158
debian/patches/bugfix/x86/x86-mm-Add-barriers-and-document-switch_mm-vs-flush-.patch
vendored
Normal file
158
debian/patches/bugfix/x86/x86-mm-Add-barriers-and-document-switch_mm-vs-flush-.patch
vendored
Normal file
|
@ -0,0 +1,158 @@
|
||||||
|
From: Andy Lutomirski <luto@kernel.org>
|
||||||
|
Date: Wed, 6 Jan 2016 12:21:01 -0800
|
||||||
|
Subject: x86/mm: Add barriers and document switch_mm()-vs-flush
|
||||||
|
synchronization
|
||||||
|
Origin: https://git.kernel.org/linus/71b3c126e61177eb693423f2e18a1914205b165e
|
||||||
|
|
||||||
|
When switch_mm() activates a new PGD, it also sets a bit that
|
||||||
|
tells other CPUs that the PGD is in use so that TLB flush IPIs
|
||||||
|
will be sent. In order for that to work correctly, the bit
|
||||||
|
needs to be visible prior to loading the PGD and therefore
|
||||||
|
starting to fill the local TLB.
|
||||||
|
|
||||||
|
Document all the barriers that make this work correctly and add
|
||||||
|
a couple that were missing.
|
||||||
|
|
||||||
|
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||||
|
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||||
|
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||||
|
Cc: Borislav Petkov <bp@alien8.de>
|
||||||
|
Cc: Brian Gerst <brgerst@gmail.com>
|
||||||
|
Cc: Dave Hansen <dave.hansen@linux.intel.com>
|
||||||
|
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||||
|
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||||
|
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||||
|
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||||
|
Cc: Rik van Riel <riel@redhat.com>
|
||||||
|
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||||
|
Cc: linux-mm@kvack.org
|
||||||
|
Cc: stable@vger.kernel.org
|
||||||
|
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||||
|
---
|
||||||
|
arch/x86/include/asm/mmu_context.h | 33 ++++++++++++++++++++++++++++++++-
|
||||||
|
arch/x86/mm/tlb.c | 29 ++++++++++++++++++++++++++---
|
||||||
|
2 files changed, 58 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||||
|
index 379cd3658799..1edc9cd198b8 100644
|
||||||
|
--- a/arch/x86/include/asm/mmu_context.h
|
||||||
|
+++ b/arch/x86/include/asm/mmu_context.h
|
||||||
|
@@ -116,8 +116,34 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||||
|
#endif
|
||||||
|
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||||
|
|
||||||
|
- /* Re-load page tables */
|
||||||
|
+ /*
|
||||||
|
+ * Re-load page tables.
|
||||||
|
+ *
|
||||||
|
+ * This logic has an ordering constraint:
|
||||||
|
+ *
|
||||||
|
+ * CPU 0: Write to a PTE for 'next'
|
||||||
|
+ * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
|
||||||
|
+ * CPU 1: set bit 1 in next's mm_cpumask
|
||||||
|
+ * CPU 1: load from the PTE that CPU 0 writes (implicit)
|
||||||
|
+ *
|
||||||
|
+ * We need to prevent an outcome in which CPU 1 observes
|
||||||
|
+ * the new PTE value and CPU 0 observes bit 1 clear in
|
||||||
|
+ * mm_cpumask. (If that occurs, then the IPI will never
|
||||||
|
+ * be sent, and CPU 0's TLB will contain a stale entry.)
|
||||||
|
+ *
|
||||||
|
+ * The bad outcome can occur if either CPU's load is
|
||||||
|
+ * reordered before that CPU's store, so both CPUs much
|
||||||
|
+ * execute full barriers to prevent this from happening.
|
||||||
|
+ *
|
||||||
|
+ * Thus, switch_mm needs a full barrier between the
|
||||||
|
+ * store to mm_cpumask and any operation that could load
|
||||||
|
+ * from next->pgd. This barrier synchronizes with
|
||||||
|
+ * remote TLB flushers. Fortunately, load_cr3 is
|
||||||
|
+ * serializing and thus acts as a full barrier.
|
||||||
|
+ *
|
||||||
|
+ */
|
||||||
|
load_cr3(next->pgd);
|
||||||
|
+
|
||||||
|
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||||
|
|
||||||
|
/* Stop flush ipis for the previous mm */
|
||||||
|
@@ -156,10 +182,15 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||||
|
* schedule, protecting us from simultaneous changes.
|
||||||
|
*/
|
||||||
|
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* We were in lazy tlb mode and leave_mm disabled
|
||||||
|
* tlb flush IPI delivery. We must reload CR3
|
||||||
|
* to make sure to use no freed page tables.
|
||||||
|
+ *
|
||||||
|
+ * As above, this is a barrier that forces
|
||||||
|
+ * TLB repopulation to be ordered after the
|
||||||
|
+ * store to mm_cpumask.
|
||||||
|
*/
|
||||||
|
load_cr3(next->pgd);
|
||||||
|
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||||
|
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
||||||
|
index 8ddb5d0d66fb..8f4cc3dfac32 100644
|
||||||
|
--- a/arch/x86/mm/tlb.c
|
||||||
|
+++ b/arch/x86/mm/tlb.c
|
||||||
|
@@ -161,7 +161,10 @@ void flush_tlb_current_task(void)
|
||||||
|
preempt_disable();
|
||||||
|
|
||||||
|
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||||
|
+
|
||||||
|
+ /* This is an implicit full barrier that synchronizes with switch_mm. */
|
||||||
|
local_flush_tlb();
|
||||||
|
+
|
||||||
|
trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
|
||||||
|
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
|
||||||
|
flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
|
||||||
|
@@ -188,17 +191,29 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||||
|
unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
|
||||||
|
|
||||||
|
preempt_disable();
|
||||||
|
- if (current->active_mm != mm)
|
||||||
|
+ if (current->active_mm != mm) {
|
||||||
|
+ /* Synchronize with switch_mm. */
|
||||||
|
+ smp_mb();
|
||||||
|
+
|
||||||
|
goto out;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
if (!current->mm) {
|
||||||
|
leave_mm(smp_processor_id());
|
||||||
|
+
|
||||||
|
+ /* Synchronize with switch_mm. */
|
||||||
|
+ smp_mb();
|
||||||
|
+
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
|
||||||
|
base_pages_to_flush = (end - start) >> PAGE_SHIFT;
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * Both branches below are implicit full barriers (MOV to CR or
|
||||||
|
+ * INVLPG) that synchronize with switch_mm.
|
||||||
|
+ */
|
||||||
|
if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
|
||||||
|
base_pages_to_flush = TLB_FLUSH_ALL;
|
||||||
|
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||||
|
@@ -228,10 +243,18 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
|
||||||
|
preempt_disable();
|
||||||
|
|
||||||
|
if (current->active_mm == mm) {
|
||||||
|
- if (current->mm)
|
||||||
|
+ if (current->mm) {
|
||||||
|
+ /*
|
||||||
|
+ * Implicit full barrier (INVLPG) that synchronizes
|
||||||
|
+ * with switch_mm.
|
||||||
|
+ */
|
||||||
|
__flush_tlb_one(start);
|
||||||
|
- else
|
||||||
|
+ } else {
|
||||||
|
leave_mm(smp_processor_id());
|
||||||
|
+
|
||||||
|
+ /* Synchronize with switch_mm. */
|
||||||
|
+ smp_mb();
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
|
|
@ -0,0 +1,64 @@
|
||||||
|
From: Andy Lutomirski <luto@kernel.org>
|
||||||
|
Date: Tue, 12 Jan 2016 12:47:40 -0800
|
||||||
|
Subject: x86/mm: Improve switch_mm() barrier comments
|
||||||
|
Origin: https://git.kernel.org/linus/4eaffdd5a5fe6ff9f95e1ab4de1ac904d5e0fa8b
|
||||||
|
|
||||||
|
My previous comments were still a bit confusing and there was a
|
||||||
|
typo. Fix it up.
|
||||||
|
|
||||||
|
Reported-by: Peter Zijlstra <peterz@infradead.org>
|
||||||
|
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||||
|
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||||
|
Cc: Borislav Petkov <bp@alien8.de>
|
||||||
|
Cc: Brian Gerst <brgerst@gmail.com>
|
||||||
|
Cc: Dave Hansen <dave.hansen@linux.intel.com>
|
||||||
|
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||||
|
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||||
|
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||||
|
Cc: Rik van Riel <riel@redhat.com>
|
||||||
|
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||||
|
Cc: stable@vger.kernel.org
|
||||||
|
Fixes: 71b3c126e611 ("x86/mm: Add barriers and document switch_mm()-vs-flush synchronization")
|
||||||
|
Link: http://lkml.kernel.org/r/0a0b43cdcdd241c5faaaecfbcc91a155ddedc9a1.1452631609.git.luto@kernel.org
|
||||||
|
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||||
|
---
|
||||||
|
arch/x86/include/asm/mmu_context.h | 15 ++++++++-------
|
||||||
|
1 file changed, 8 insertions(+), 7 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
|
||||||
|
index 1edc9cd198b8..bfd9b2a35a0b 100644
|
||||||
|
--- a/arch/x86/include/asm/mmu_context.h
|
||||||
|
+++ b/arch/x86/include/asm/mmu_context.h
|
||||||
|
@@ -132,14 +132,16 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||||
|
* be sent, and CPU 0's TLB will contain a stale entry.)
|
||||||
|
*
|
||||||
|
* The bad outcome can occur if either CPU's load is
|
||||||
|
- * reordered before that CPU's store, so both CPUs much
|
||||||
|
+ * reordered before that CPU's store, so both CPUs must
|
||||||
|
* execute full barriers to prevent this from happening.
|
||||||
|
*
|
||||||
|
* Thus, switch_mm needs a full barrier between the
|
||||||
|
* store to mm_cpumask and any operation that could load
|
||||||
|
- * from next->pgd. This barrier synchronizes with
|
||||||
|
- * remote TLB flushers. Fortunately, load_cr3 is
|
||||||
|
- * serializing and thus acts as a full barrier.
|
||||||
|
+ * from next->pgd. TLB fills are special and can happen
|
||||||
|
+ * due to instruction fetches or for no reason at all,
|
||||||
|
+ * and neither LOCK nor MFENCE orders them.
|
||||||
|
+ * Fortunately, load_cr3() is serializing and gives the
|
||||||
|
+ * ordering guarantee we need.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
load_cr3(next->pgd);
|
||||||
|
@@ -188,9 +190,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||||
|
* tlb flush IPI delivery. We must reload CR3
|
||||||
|
* to make sure to use no freed page tables.
|
||||||
|
*
|
||||||
|
- * As above, this is a barrier that forces
|
||||||
|
- * TLB repopulation to be ordered after the
|
||||||
|
- * store to mm_cpumask.
|
||||||
|
+ * As above, load_cr3() is serializing and orders TLB
|
||||||
|
+ * fills with respect to the mm_cpumask write.
|
||||||
|
*/
|
||||||
|
load_cr3(next->pgd);
|
||||||
|
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
|
@ -145,3 +145,5 @@ bugfix/all/fuse-break-infinite-loop-in-fuse_fill_write_pages.patch
|
||||||
bugfix/all/tcp-fix-zero-cwnd-in-tcp_cwnd_reduction.patch
|
bugfix/all/tcp-fix-zero-cwnd-in-tcp_cwnd_reduction.patch
|
||||||
bugfix/all/scsi-fix-crashes-in-sd-and-sr-runtime-pm.patch
|
bugfix/all/scsi-fix-crashes-in-sd-and-sr-runtime-pm.patch
|
||||||
bugfix/all/netfilter-nf_nat_redirect-add-missing-NULL-pointer-c.patch
|
bugfix/all/netfilter-nf_nat_redirect-add-missing-NULL-pointer-c.patch
|
||||||
|
bugfix/x86/x86-mm-Add-barriers-and-document-switch_mm-vs-flush-.patch
|
||||||
|
bugfix/x86/x86-mm-Improve-switch_mm-barrier-comments.patch
|
||||||
|
|
Loading…
Reference in New Issue