diff --git a/debian/changelog b/debian/changelog index a258de0e0..1ad6903a4 100644 --- a/debian/changelog +++ b/debian/changelog @@ -54,6 +54,9 @@ linux (4.3.4-1) UNRELEASED; urgency=medium [ Ben Hutchings ] * fuse: break infinite loop in fuse_fill_write_pages() (CVE-2015-8785) * SCSI: fix crashes in sd and sr runtime PM (Closes: #801925) + * [x86] mm: Add barriers and document switch_mm()-vs-flush synchronization + (CVE-2016-2069) + * [x86] mm: Improve switch_mm() barrier comments [ Salvatore Bonaccorso ] * tcp: fix zero cwnd in tcp_cwnd_reduction (CVE-2016-2070) diff --git a/debian/patches/bugfix/x86/x86-mm-Add-barriers-and-document-switch_mm-vs-flush-.patch b/debian/patches/bugfix/x86/x86-mm-Add-barriers-and-document-switch_mm-vs-flush-.patch new file mode 100644 index 000000000..0ef087561 --- /dev/null +++ b/debian/patches/bugfix/x86/x86-mm-Add-barriers-and-document-switch_mm-vs-flush-.patch @@ -0,0 +1,158 @@ +From: Andy Lutomirski +Date: Wed, 6 Jan 2016 12:21:01 -0800 +Subject: x86/mm: Add barriers and document switch_mm()-vs-flush + synchronization +Origin: https://git.kernel.org/linus/71b3c126e61177eb693423f2e18a1914205b165e + +When switch_mm() activates a new PGD, it also sets a bit that +tells other CPUs that the PGD is in use so that TLB flush IPIs +will be sent. In order for that to work correctly, the bit +needs to be visible prior to loading the PGD and therefore +starting to fill the local TLB. + +Document all the barriers that make this work correctly and add +a couple that were missing. + +Signed-off-by: Andy Lutomirski +Cc: Andrew Morton +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Rik van Riel +Cc: Thomas Gleixner +Cc: linux-mm@kvack.org +Cc: stable@vger.kernel.org +Signed-off-by: Ingo Molnar +--- + arch/x86/include/asm/mmu_context.h | 33 ++++++++++++++++++++++++++++++++- + arch/x86/mm/tlb.c | 29 ++++++++++++++++++++++++++--- + 2 files changed, 58 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h +index 379cd3658799..1edc9cd198b8 100644 +--- a/arch/x86/include/asm/mmu_context.h ++++ b/arch/x86/include/asm/mmu_context.h +@@ -116,8 +116,34 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + #endif + cpumask_set_cpu(cpu, mm_cpumask(next)); + +- /* Re-load page tables */ ++ /* ++ * Re-load page tables. ++ * ++ * This logic has an ordering constraint: ++ * ++ * CPU 0: Write to a PTE for 'next' ++ * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. ++ * CPU 1: set bit 1 in next's mm_cpumask ++ * CPU 1: load from the PTE that CPU 0 writes (implicit) ++ * ++ * We need to prevent an outcome in which CPU 1 observes ++ * the new PTE value and CPU 0 observes bit 1 clear in ++ * mm_cpumask. (If that occurs, then the IPI will never ++ * be sent, and CPU 0's TLB will contain a stale entry.) ++ * ++ * The bad outcome can occur if either CPU's load is ++ * reordered before that CPU's store, so both CPUs much ++ * execute full barriers to prevent this from happening. ++ * ++ * Thus, switch_mm needs a full barrier between the ++ * store to mm_cpumask and any operation that could load ++ * from next->pgd. This barrier synchronizes with ++ * remote TLB flushers. Fortunately, load_cr3 is ++ * serializing and thus acts as a full barrier. ++ * ++ */ + load_cr3(next->pgd); ++ + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + + /* Stop flush ipis for the previous mm */ +@@ -156,10 +182,15 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + * schedule, protecting us from simultaneous changes. + */ + cpumask_set_cpu(cpu, mm_cpumask(next)); ++ + /* + * We were in lazy tlb mode and leave_mm disabled + * tlb flush IPI delivery. We must reload CR3 + * to make sure to use no freed page tables. ++ * ++ * As above, this is a barrier that forces ++ * TLB repopulation to be ordered after the ++ * store to mm_cpumask. + */ + load_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index 8ddb5d0d66fb..8f4cc3dfac32 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -161,7 +161,10 @@ void flush_tlb_current_task(void) + preempt_disable(); + + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); ++ ++ /* This is an implicit full barrier that synchronizes with switch_mm. */ + local_flush_tlb(); ++ + trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); + if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); +@@ -188,17 +191,29 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long base_pages_to_flush = TLB_FLUSH_ALL; + + preempt_disable(); +- if (current->active_mm != mm) ++ if (current->active_mm != mm) { ++ /* Synchronize with switch_mm. */ ++ smp_mb(); ++ + goto out; ++ } + + if (!current->mm) { + leave_mm(smp_processor_id()); ++ ++ /* Synchronize with switch_mm. */ ++ smp_mb(); ++ + goto out; + } + + if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) + base_pages_to_flush = (end - start) >> PAGE_SHIFT; + ++ /* ++ * Both branches below are implicit full barriers (MOV to CR or ++ * INVLPG) that synchronize with switch_mm. ++ */ + if (base_pages_to_flush > tlb_single_page_flush_ceiling) { + base_pages_to_flush = TLB_FLUSH_ALL; + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); +@@ -228,10 +243,18 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) + preempt_disable(); + + if (current->active_mm == mm) { +- if (current->mm) ++ if (current->mm) { ++ /* ++ * Implicit full barrier (INVLPG) that synchronizes ++ * with switch_mm. ++ */ + __flush_tlb_one(start); +- else ++ } else { + leave_mm(smp_processor_id()); ++ ++ /* Synchronize with switch_mm. */ ++ smp_mb(); ++ } + } + + if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) diff --git a/debian/patches/bugfix/x86/x86-mm-Improve-switch_mm-barrier-comments.patch b/debian/patches/bugfix/x86/x86-mm-Improve-switch_mm-barrier-comments.patch new file mode 100644 index 000000000..5e3f9326c --- /dev/null +++ b/debian/patches/bugfix/x86/x86-mm-Improve-switch_mm-barrier-comments.patch @@ -0,0 +1,64 @@ +From: Andy Lutomirski +Date: Tue, 12 Jan 2016 12:47:40 -0800 +Subject: x86/mm: Improve switch_mm() barrier comments +Origin: https://git.kernel.org/linus/4eaffdd5a5fe6ff9f95e1ab4de1ac904d5e0fa8b + +My previous comments were still a bit confusing and there was a +typo. Fix it up. + +Reported-by: Peter Zijlstra +Signed-off-by: Andy Lutomirski +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Rik van Riel +Cc: Thomas Gleixner +Cc: stable@vger.kernel.org +Fixes: 71b3c126e611 ("x86/mm: Add barriers and document switch_mm()-vs-flush synchronization") +Link: http://lkml.kernel.org/r/0a0b43cdcdd241c5faaaecfbcc91a155ddedc9a1.1452631609.git.luto@kernel.org +Signed-off-by: Ingo Molnar +--- + arch/x86/include/asm/mmu_context.h | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h +index 1edc9cd198b8..bfd9b2a35a0b 100644 +--- a/arch/x86/include/asm/mmu_context.h ++++ b/arch/x86/include/asm/mmu_context.h +@@ -132,14 +132,16 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + * be sent, and CPU 0's TLB will contain a stale entry.) + * + * The bad outcome can occur if either CPU's load is +- * reordered before that CPU's store, so both CPUs much ++ * reordered before that CPU's store, so both CPUs must + * execute full barriers to prevent this from happening. + * + * Thus, switch_mm needs a full barrier between the + * store to mm_cpumask and any operation that could load +- * from next->pgd. This barrier synchronizes with +- * remote TLB flushers. Fortunately, load_cr3 is +- * serializing and thus acts as a full barrier. ++ * from next->pgd. TLB fills are special and can happen ++ * due to instruction fetches or for no reason at all, ++ * and neither LOCK nor MFENCE orders them. ++ * Fortunately, load_cr3() is serializing and gives the ++ * ordering guarantee we need. + * + */ + load_cr3(next->pgd); +@@ -188,9 +190,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + * tlb flush IPI delivery. We must reload CR3 + * to make sure to use no freed page tables. + * +- * As above, this is a barrier that forces +- * TLB repopulation to be ordered after the +- * store to mm_cpumask. ++ * As above, load_cr3() is serializing and orders TLB ++ * fills with respect to the mm_cpumask write. + */ + load_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); diff --git a/debian/patches/series b/debian/patches/series index 6c7346ba6..9afb768e7 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -145,3 +145,5 @@ bugfix/all/fuse-break-infinite-loop-in-fuse_fill_write_pages.patch bugfix/all/tcp-fix-zero-cwnd-in-tcp_cwnd_reduction.patch bugfix/all/scsi-fix-crashes-in-sd-and-sr-runtime-pm.patch bugfix/all/netfilter-nf_nat_redirect-add-missing-NULL-pointer-c.patch +bugfix/x86/x86-mm-Add-barriers-and-document-switch_mm-vs-flush-.patch +bugfix/x86/x86-mm-Improve-switch_mm-barrier-comments.patch