From ca72e3b523e41094bb28df0710e5a25384f9b30c Mon Sep 17 00:00:00 2001 Message-Id: In-Reply-To: <5b5a156f9808b1acf1205606e03da117214549ea.1601675151.git.zanussi@kernel.org> References: <5b5a156f9808b1acf1205606e03da117214549ea.1601675151.git.zanussi@kernel.org> From: Sebastian Andrzej Siewior Date: Thu, 30 Nov 2017 13:40:10 +0100 Subject: [PATCH 222/333] crypto: limit more FPU-enabled sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.148-rt64.tar.xz Those crypto drivers use SSE/AVX/… for their crypto work and in order to do so in kernel they need to enable the "FPU" in kernel mode which disables preemption. There are two problems with the way they are used: - the while loop which processes X bytes may create latency spikes and should be avoided or limited. - the cipher-walk-next part may allocate/free memory and may use kmap_atomic(). The whole kernel_fpu_begin()/end() processing isn't probably that cheap. It most likely makes sense to process as much of those as possible in one go. The new *_fpu_sched_rt() schedules only if a RT task is pending. Probably we should measure the performance those ciphers in pure SW mode and with this optimisations to see if it makes sense to keep them for RT. This kernel_fpu_resched() makes the code more preemptible which might hurt performance. Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior --- arch/x86/crypto/chacha20_glue.c | 9 +++++---- arch/x86/include/asm/fpu/api.h | 1 + arch/x86/kernel/fpu/core.c | 12 ++++++++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index dce7c5d39c2f..6194160b7fbc 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -81,23 +81,24 @@ static int chacha20_simd(struct skcipher_request *req) crypto_chacha20_init(state, ctx, walk.iv); - kernel_fpu_begin(); - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { + kernel_fpu_begin(); + chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); + kernel_fpu_end(); err = skcipher_walk_done(&walk, walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { + kernel_fpu_begin(); chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); + kernel_fpu_end(); err = skcipher_walk_done(&walk, 0); } - kernel_fpu_end(); - return err; } diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index b56d504af654..e51c7094075d 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -20,6 +20,7 @@ */ extern void kernel_fpu_begin(void); extern void kernel_fpu_end(void); +extern void kernel_fpu_resched(void); extern bool irq_fpu_usable(void); /* diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 2e5003fef51a..768c53767bb2 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -136,6 +136,18 @@ void kernel_fpu_end(void) } EXPORT_SYMBOL_GPL(kernel_fpu_end); +void kernel_fpu_resched(void) +{ + WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); + + if (should_resched(PREEMPT_OFFSET)) { + kernel_fpu_end(); + cond_resched(); + kernel_fpu_begin(); + } +} +EXPORT_SYMBOL_GPL(kernel_fpu_resched); + /* * Save the FPU state (mark it for reload if necessary): * -- 2.17.1