91 lines
3.3 KiB
Diff
91 lines
3.3 KiB
Diff
From: Andy Lutomirski <luto@kernel.org>
|
|
Date: Wed, 15 Jul 2015 10:29:38 -0700
|
|
Subject: x86/nmi/64: Use DF to avoid userspace RSP confusing nested NMI
|
|
detection
|
|
Origin: https://git.kernel.org/linus/810bc075f78ff2c221536eb3008eac6a492dba2d
|
|
|
|
We have a tricky bug in the nested NMI code: if we see RSP
|
|
pointing to the NMI stack on NMI entry from kernel mode, we
|
|
assume that we are executing a nested NMI.
|
|
|
|
This isn't quite true. A malicious userspace program can point
|
|
RSP at the NMI stack, issue SYSCALL, and arrange for an NMI to
|
|
happen while RSP is still pointing at the NMI stack.
|
|
|
|
Fix it with a sneaky trick. Set DF in the region of code that
|
|
the RSP check is intended to detect. IRET will clear DF
|
|
atomically.
|
|
|
|
( Note: other than paravirt, there's little need for all this
|
|
complexity. We could check RIP instead of RSP. )
|
|
|
|
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
|
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
|
|
Cc: Borislav Petkov <bp@suse.de>
|
|
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
|
Cc: Peter Zijlstra <peterz@infradead.org>
|
|
Cc: Thomas Gleixner <tglx@linutronix.de>
|
|
Cc: stable@vger.kernel.org
|
|
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
|
[bwh: Backported to 4.1: adjust filename, context]
|
|
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
|
---
|
|
arch/x86/kernel/entry_64.S | 29 +++++++++++++++++++++++++----
|
|
1 file changed, 25 insertions(+), 4 deletions(-)
|
|
|
|
--- a/arch/x86/kernel/entry_64.S
|
|
+++ b/arch/x86/kernel/entry_64.S
|
|
@@ -1581,7 +1581,14 @@ ENTRY(nmi)
|
|
/*
|
|
* Now test if the previous stack was an NMI stack. This covers
|
|
* the case where we interrupt an outer NMI after it clears
|
|
- * "NMI executing" but before IRET.
|
|
+ * "NMI executing" but before IRET. We need to be careful, though:
|
|
+ * there is one case in which RSP could point to the NMI stack
|
|
+ * despite there being no NMI active: naughty userspace controls
|
|
+ * RSP at the very beginning of the SYSCALL targets. We can
|
|
+ * pull a fast one on naughty userspace, though: we program
|
|
+ * SYSCALL to mask DF, so userspace cannot cause DF to be set
|
|
+ * if it controls the kernel's RSP. We set DF before we clear
|
|
+ * "NMI executing".
|
|
*/
|
|
lea 6*8(%rsp), %rdx
|
|
/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
|
|
@@ -1592,10 +1599,16 @@ ENTRY(nmi)
|
|
cmpq %rdx, 4*8(%rsp)
|
|
/* If it is below the NMI stack, it is a normal NMI */
|
|
jb first_nmi
|
|
- /* Ah, it is within the NMI stack, treat it as nested */
|
|
+
|
|
+ /* Ah, it is within the NMI stack. */
|
|
+
|
|
+ testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp)
|
|
+ jz first_nmi /* RSP was user controlled. */
|
|
|
|
CFI_REMEMBER_STATE
|
|
|
|
+ /* This is a nested NMI. */
|
|
+
|
|
nested_nmi:
|
|
/*
|
|
* Modify the "iret" frame to point to repeat_nmi, forcing another
|
|
@@ -1710,8 +1723,16 @@ nmi_restore:
|
|
/* Point RSP at the "iret" frame. */
|
|
REMOVE_PT_GPREGS_FROM_STACK 6*8
|
|
|
|
- /* Clear "NMI executing". */
|
|
- movq $0, 5*8(%rsp)
|
|
+ /*
|
|
+ * Clear "NMI executing". Set DF first so that we can easily
|
|
+ * distinguish the remaining code between here and IRET from
|
|
+ * the SYSCALL entry and exit paths. On a native kernel, we
|
|
+ * could just inspect RIP, but, on paravirt kernels,
|
|
+ * INTERRUPT_RETURN can translate into a jump into a
|
|
+ * hypercall page.
|
|
+ */
|
|
+ std
|
|
+ movq $0, 5*8(%rsp) /* clear "NMI executing" */
|
|
|
|
/*
|
|
* INTERRUPT_RETURN reads the "iret" frame and exits the NMI
|