133 lines
4.6 KiB
Diff
133 lines
4.6 KiB
Diff
|
From 54324ebc2ae2c404f1fe97050af832f0a031287e Mon Sep 17 00:00:00 2001
|
||
|
From: Christian Borntraeger <borntraeger@de.ibm.com>
|
||
|
Date: Wed, 15 Apr 2020 15:21:01 +0200
|
||
|
Subject: [PATCH] s390/mm: fix page table upgrade vs 2ndary address mode
|
||
|
accesses
|
||
|
|
||
|
A page table upgrade in a kernel section that uses secondary address
|
||
|
mode will mess up the kernel instructions as follows:
|
||
|
|
||
|
Consider the following scenario: two threads are sharing memory.
|
||
|
On CPU1 thread 1 does e.g. strnlen_user(). That gets to
|
||
|
old_fs = enable_sacf_uaccess();
|
||
|
len = strnlen_user_srst(src, size);
|
||
|
and
|
||
|
" la %2,0(%1)\n"
|
||
|
" la %3,0(%0,%1)\n"
|
||
|
" slgr %0,%0\n"
|
||
|
" sacf 256\n"
|
||
|
"0: srst %3,%2\n"
|
||
|
in strnlen_user_srst(). At that point we are in secondary space mode,
|
||
|
control register 1 points to kernel page table and instruction fetching
|
||
|
happens via c1, rather than usual c13. Interrupts are not disabled, for
|
||
|
obvious reasons.
|
||
|
|
||
|
On CPU2 thread 2 does MAP_FIXED mmap(), forcing the upgrade of page table
|
||
|
from 3-level to e.g. 4-level one. We'd allocated new top-level table,
|
||
|
set it up and now we hit this:
|
||
|
notify = 1;
|
||
|
spin_unlock_bh(&mm->page_table_lock);
|
||
|
}
|
||
|
if (notify)
|
||
|
on_each_cpu(__crst_table_upgrade, mm, 0);
|
||
|
OK, we need to actually change over to use of new page table and we
|
||
|
need that to happen in all threads that are currently running. Which
|
||
|
happens to include the thread 1. IPI is delivered and we have
|
||
|
static void __crst_table_upgrade(void *arg)
|
||
|
{
|
||
|
struct mm_struct *mm = arg;
|
||
|
|
||
|
if (current->active_mm == mm)
|
||
|
set_user_asce(mm);
|
||
|
__tlb_flush_local();
|
||
|
}
|
||
|
run on CPU1. That does
|
||
|
static inline void set_user_asce(struct mm_struct *mm)
|
||
|
{
|
||
|
S390_lowcore.user_asce = mm->context.asce;
|
||
|
OK, user page table address updated...
|
||
|
__ctl_load(S390_lowcore.user_asce, 1, 1);
|
||
|
... and control register 1 set to it.
|
||
|
clear_cpu_flag(CIF_ASCE_PRIMARY);
|
||
|
}
|
||
|
|
||
|
IPI is run in home space mode, so it's fine - insns are fetched
|
||
|
using c13, which always points to kernel page table. But as soon
|
||
|
as we return from the interrupt, previous PSW is restored, putting
|
||
|
CPU1 back into secondary space mode, at which point we no longer
|
||
|
get the kernel instructions from the kernel mapping.
|
||
|
|
||
|
The fix is to only fixup the control registers that are currently in use
|
||
|
for user processes during the page table update. We must also disable
|
||
|
interrupts in enable_sacf_uaccess to synchronize the cr and
|
||
|
thread.mm_segment updates against the on_each-cpu.
|
||
|
|
||
|
Fixes: 0aaba41b58bc ("s390: remove all code using the access register mode")
|
||
|
Cc: stable@vger.kernel.org # 4.15+
|
||
|
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
|
||
|
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
|
||
|
References: CVE-2020-11884
|
||
|
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
|
||
|
---
|
||
|
arch/s390/lib/uaccess.c | 4 ++++
|
||
|
arch/s390/mm/pgalloc.c | 16 ++++++++++++++--
|
||
|
2 files changed, 18 insertions(+), 2 deletions(-)
|
||
|
|
||
|
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
|
||
|
index c4f8039a35e8..0267405ab7c6 100644
|
||
|
--- a/arch/s390/lib/uaccess.c
|
||
|
+++ b/arch/s390/lib/uaccess.c
|
||
|
@@ -64,10 +64,13 @@ mm_segment_t enable_sacf_uaccess(void)
|
||
|
{
|
||
|
mm_segment_t old_fs;
|
||
|
unsigned long asce, cr;
|
||
|
+ unsigned long flags;
|
||
|
|
||
|
old_fs = current->thread.mm_segment;
|
||
|
if (old_fs & 1)
|
||
|
return old_fs;
|
||
|
+ /* protect against a concurrent page table upgrade */
|
||
|
+ local_irq_save(flags);
|
||
|
current->thread.mm_segment |= 1;
|
||
|
asce = S390_lowcore.kernel_asce;
|
||
|
if (likely(old_fs == USER_DS)) {
|
||
|
@@ -83,6 +86,7 @@ mm_segment_t enable_sacf_uaccess(void)
|
||
|
__ctl_load(asce, 7, 7);
|
||
|
set_cpu_flag(CIF_ASCE_SECONDARY);
|
||
|
}
|
||
|
+ local_irq_restore(flags);
|
||
|
return old_fs;
|
||
|
}
|
||
|
EXPORT_SYMBOL(enable_sacf_uaccess);
|
||
|
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
|
||
|
index 498c98a312f4..fff169d64711 100644
|
||
|
--- a/arch/s390/mm/pgalloc.c
|
||
|
+++ b/arch/s390/mm/pgalloc.c
|
||
|
@@ -70,8 +70,20 @@ static void __crst_table_upgrade(void *arg)
|
||
|
{
|
||
|
struct mm_struct *mm = arg;
|
||
|
|
||
|
- if (current->active_mm == mm)
|
||
|
- set_user_asce(mm);
|
||
|
+ /* we must change all active ASCEs to avoid the creation of new TLBs */
|
||
|
+ if (current->active_mm == mm) {
|
||
|
+ S390_lowcore.user_asce = mm->context.asce;
|
||
|
+ if (current->thread.mm_segment == USER_DS) {
|
||
|
+ __ctl_load(S390_lowcore.user_asce, 1, 1);
|
||
|
+ /* Mark user-ASCE present in CR1 */
|
||
|
+ clear_cpu_flag(CIF_ASCE_PRIMARY);
|
||
|
+ }
|
||
|
+ if (current->thread.mm_segment == USER_DS_SACF) {
|
||
|
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
|
||
|
+ /* enable_sacf_uaccess does all or nothing */
|
||
|
+ WARN_ON(!test_cpu_flag(CIF_ASCE_SECONDARY));
|
||
|
+ }
|
||
|
+ }
|
||
|
__tlb_flush_local();
|
||
|
}
|
||
|
|
||
|
--
|
||
|
2.25.2
|
||
|
|
||
|
|