201 lines
6.5 KiB
Diff
201 lines
6.5 KiB
Diff
From: Josh Poimboeuf <jpoimboe@redhat.com>
|
|
Date: Mon, 8 Jul 2019 11:52:25 -0500
|
|
Subject: x86/speculation: Prepare entry code for Spectre v1 swapgs mitigations
|
|
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=befb822c062b4c3d93380a58d5fd479395e8b267
|
|
Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-1125
|
|
|
|
commit 18ec54fdd6d18d92025af097cd042a75cf0ea24c upstream
|
|
|
|
Spectre v1 isn't only about array bounds checks. It can affect any
|
|
conditional checks. The kernel entry code interrupt, exception, and NMI
|
|
handlers all have conditional swapgs checks. Those may be problematic in
|
|
the context of Spectre v1, as kernel code can speculatively run with a user
|
|
GS.
|
|
|
|
For example:
|
|
|
|
if (coming from user space)
|
|
swapgs
|
|
mov %gs:<percpu_offset>, %reg
|
|
mov (%reg), %reg1
|
|
|
|
When coming from user space, the CPU can speculatively skip the swapgs, and
|
|
then do a speculative percpu load using the user GS value. So the user can
|
|
speculatively force a read of any kernel value. If a gadget exists which
|
|
uses the percpu value as an address in another load/store, then the
|
|
contents of the kernel value may become visible via an L1 side channel
|
|
attack.
|
|
|
|
A similar attack exists when coming from kernel space. The CPU can
|
|
speculatively do the swapgs, causing the user GS to get used for the rest
|
|
of the speculative window.
|
|
|
|
The mitigation is similar to a traditional Spectre v1 mitigation, except:
|
|
|
|
a) index masking isn't possible; because the index (percpu offset)
|
|
isn't user-controlled; and
|
|
|
|
b) an lfence is needed in both the "from user" swapgs path and the
|
|
"from kernel" non-swapgs path (because of the two attacks described
|
|
above).
|
|
|
|
The user entry swapgs paths already have SWITCH_TO_KERNEL_CR3, which has a
|
|
CR3 write when PTI is enabled. Since CR3 writes are serializing, the
|
|
lfences can be skipped in those cases.
|
|
|
|
On the other hand, the kernel entry swapgs paths don't depend on PTI.
|
|
|
|
To avoid unnecessary lfences for the user entry case, create two separate
|
|
features for alternative patching:
|
|
|
|
X86_FEATURE_FENCE_SWAPGS_USER
|
|
X86_FEATURE_FENCE_SWAPGS_KERNEL
|
|
|
|
Use these features in entry code to patch in lfences where needed.
|
|
|
|
The features aren't enabled yet, so there's no functional change.
|
|
|
|
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
|
|
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
|
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
|
|
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
|
---
|
|
arch/x86/entry/calling.h | 17 +++++++++++++++++
|
|
arch/x86/entry/entry_64.S | 21 ++++++++++++++++++---
|
|
arch/x86/include/asm/cpufeatures.h | 2 ++
|
|
3 files changed, 37 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
|
|
index e699b2041665..578b5455334f 100644
|
|
--- a/arch/x86/entry/calling.h
|
|
+++ b/arch/x86/entry/calling.h
|
|
@@ -329,6 +329,23 @@ For 32-bit we have the following conventions - kernel is built with
|
|
|
|
#endif
|
|
|
|
+/*
|
|
+ * Mitigate Spectre v1 for conditional swapgs code paths.
|
|
+ *
|
|
+ * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
|
|
+ * prevent a speculative swapgs when coming from kernel space.
|
|
+ *
|
|
+ * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path,
|
|
+ * to prevent the swapgs from getting speculatively skipped when coming from
|
|
+ * user space.
|
|
+ */
|
|
+.macro FENCE_SWAPGS_USER_ENTRY
|
|
+ ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER
|
|
+.endm
|
|
+.macro FENCE_SWAPGS_KERNEL_ENTRY
|
|
+ ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL
|
|
+.endm
|
|
+
|
|
#endif /* CONFIG_X86_64 */
|
|
|
|
/*
|
|
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
|
index e7572a209fbe..7d8da285e185 100644
|
|
--- a/arch/x86/entry/entry_64.S
|
|
+++ b/arch/x86/entry/entry_64.S
|
|
@@ -582,7 +582,7 @@ ENTRY(interrupt_entry)
|
|
testb $3, CS-ORIG_RAX+8(%rsp)
|
|
jz 1f
|
|
SWAPGS
|
|
-
|
|
+ FENCE_SWAPGS_USER_ENTRY
|
|
/*
|
|
* Switch to the thread stack. The IRET frame and orig_ax are
|
|
* on the stack, as well as the return address. RDI..R12 are
|
|
@@ -612,8 +612,10 @@ ENTRY(interrupt_entry)
|
|
UNWIND_HINT_FUNC
|
|
|
|
movq (%rdi), %rdi
|
|
+ jmpq 2f
|
|
1:
|
|
-
|
|
+ FENCE_SWAPGS_KERNEL_ENTRY
|
|
+2:
|
|
PUSH_AND_CLEAR_REGS save_ret=1
|
|
ENCODE_FRAME_POINTER 8
|
|
|
|
@@ -1240,6 +1242,13 @@ ENTRY(paranoid_entry)
|
|
*/
|
|
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
|
|
|
|
+ /*
|
|
+ * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
|
|
+ * unconditional CR3 write, even in the PTI case. So do an lfence
|
|
+ * to prevent GS speculation, regardless of whether PTI is enabled.
|
|
+ */
|
|
+ FENCE_SWAPGS_KERNEL_ENTRY
|
|
+
|
|
ret
|
|
END(paranoid_entry)
|
|
|
|
@@ -1290,6 +1299,7 @@ ENTRY(error_entry)
|
|
* from user mode due to an IRET fault.
|
|
*/
|
|
SWAPGS
|
|
+ FENCE_SWAPGS_USER_ENTRY
|
|
/* We have user CR3. Change to kernel CR3. */
|
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
|
|
|
@@ -1311,6 +1321,8 @@ ENTRY(error_entry)
|
|
CALL_enter_from_user_mode
|
|
ret
|
|
|
|
+.Lerror_entry_done_lfence:
|
|
+ FENCE_SWAPGS_KERNEL_ENTRY
|
|
.Lerror_entry_done:
|
|
TRACE_IRQS_OFF
|
|
ret
|
|
@@ -1329,7 +1341,7 @@ ENTRY(error_entry)
|
|
cmpq %rax, RIP+8(%rsp)
|
|
je .Lbstep_iret
|
|
cmpq $.Lgs_change, RIP+8(%rsp)
|
|
- jne .Lerror_entry_done
|
|
+ jne .Lerror_entry_done_lfence
|
|
|
|
/*
|
|
* hack: .Lgs_change can fail with user gsbase. If this happens, fix up
|
|
@@ -1337,6 +1349,7 @@ ENTRY(error_entry)
|
|
* .Lgs_change's error handler with kernel gsbase.
|
|
*/
|
|
SWAPGS
|
|
+ FENCE_SWAPGS_USER_ENTRY
|
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
|
jmp .Lerror_entry_done
|
|
|
|
@@ -1351,6 +1364,7 @@ ENTRY(error_entry)
|
|
* gsbase and CR3. Switch to kernel gsbase and CR3:
|
|
*/
|
|
SWAPGS
|
|
+ FENCE_SWAPGS_USER_ENTRY
|
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
|
|
|
/*
|
|
@@ -1442,6 +1456,7 @@ ENTRY(nmi)
|
|
|
|
swapgs
|
|
cld
|
|
+ FENCE_SWAPGS_USER_ENTRY
|
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
|
|
movq %rsp, %rdx
|
|
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
|
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
|
|
index 5041f19918f2..e0f47f6a1017 100644
|
|
--- a/arch/x86/include/asm/cpufeatures.h
|
|
+++ b/arch/x86/include/asm/cpufeatures.h
|
|
@@ -281,6 +281,8 @@
|
|
#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
|
|
#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
|
|
#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
|
|
+#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
|
|
+#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
|
|
|
|
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
|
|
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
|
|
--
|
|
2.20.1
|
|
|