2018-08-27 14:32:32 +00:00
|
|
|
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
|
|
|
Date: Fri, 21 Feb 2014 17:24:04 +0100
|
|
|
|
Subject: crypto: Reduce preempt disabled regions, more algos
|
2018-09-13 17:28:08 +00:00
|
|
|
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18.7-rt5.tar.xz
|
2018-08-27 14:32:32 +00:00
|
|
|
|
|
|
|
Don Estabrook reported
|
|
|
|
| kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100()
|
|
|
|
| kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2462 migrate_enable+0x17b/0x200()
|
|
|
|
| kernel: WARNING: CPU: 3 PID: 865 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100()
|
|
|
|
|
|
|
|
and his backtrace showed some crypto functions which looked fine.
|
|
|
|
|
|
|
|
The problem is the following sequence:
|
|
|
|
|
|
|
|
glue_xts_crypt_128bit()
|
|
|
|
{
|
|
|
|
blkcipher_walk_virt(); /* normal migrate_disable() */
|
|
|
|
|
|
|
|
glue_fpu_begin(); /* get atomic */
|
|
|
|
|
|
|
|
while (nbytes) {
|
|
|
|
__glue_xts_crypt_128bit();
|
|
|
|
blkcipher_walk_done(); /* with nbytes = 0, migrate_enable()
|
|
|
|
* while we are atomic */
|
|
|
|
};
|
|
|
|
glue_fpu_end() /* no longer atomic */
|
|
|
|
}
|
|
|
|
|
|
|
|
and this is why the counter get out of sync and the warning is printed.
|
|
|
|
The other problem is that we are non-preemptible between
|
|
|
|
glue_fpu_begin() and glue_fpu_end() and the latency grows. To fix this,
|
|
|
|
I shorten the FPU off region and ensure blkcipher_walk_done() is called
|
|
|
|
with preemption enabled. This might hurt the performance because we now
|
|
|
|
enable/disable the FPU state more often but we gain lower latency and
|
|
|
|
the bug is gone.
|
|
|
|
|
|
|
|
|
|
|
|
Reported-by: Don Estabrook <don.estabrook@gmail.com>
|
|
|
|
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
|
|
|
---
|
|
|
|
arch/x86/crypto/cast5_avx_glue.c | 21 +++++++++------------
|
|
|
|
arch/x86/crypto/glue_helper.c | 31 ++++++++++++++++---------------
|
|
|
|
2 files changed, 25 insertions(+), 27 deletions(-)
|
|
|
|
|
|
|
|
--- a/arch/x86/crypto/cast5_avx_glue.c
|
|
|
|
+++ b/arch/x86/crypto/cast5_avx_glue.c
|
|
|
|
@@ -61,7 +61,7 @@ static inline void cast5_fpu_end(bool fp
|
|
|
|
|
|
|
|
static int ecb_crypt(struct skcipher_request *req, bool enc)
|
|
|
|
{
|
|
|
|
- bool fpu_enabled = false;
|
|
|
|
+ bool fpu_enabled;
|
|
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
|
|
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
|
|
struct skcipher_walk walk;
|
|
|
|
@@ -76,7 +76,7 @@ static int ecb_crypt(struct skcipher_req
|
|
|
|
u8 *wsrc = walk.src.virt.addr;
|
|
|
|
u8 *wdst = walk.dst.virt.addr;
|
|
|
|
|
|
|
|
- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
|
|
|
|
+ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes);
|
|
|
|
|
|
|
|
/* Process multi-block batch */
|
|
|
|
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
|
|
|
|
@@ -105,10 +105,9 @@ static int ecb_crypt(struct skcipher_req
|
|
|
|
} while (nbytes >= bsize);
|
|
|
|
|
|
|
|
done:
|
|
|
|
+ cast5_fpu_end(fpu_enabled);
|
|
|
|
err = skcipher_walk_done(&walk, nbytes);
|
|
|
|
}
|
|
|
|
-
|
|
|
|
- cast5_fpu_end(fpu_enabled);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
@@ -212,7 +211,7 @@ static int cbc_decrypt(struct skcipher_r
|
|
|
|
{
|
|
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
|
|
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
|
|
- bool fpu_enabled = false;
|
|
|
|
+ bool fpu_enabled;
|
|
|
|
struct skcipher_walk walk;
|
|
|
|
unsigned int nbytes;
|
|
|
|
int err;
|
|
|
|
@@ -220,12 +219,11 @@ static int cbc_decrypt(struct skcipher_r
|
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
|
|
|
|
|
|
|
while ((nbytes = walk.nbytes)) {
|
|
|
|
- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
|
|
|
|
+ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes);
|
|
|
|
nbytes = __cbc_decrypt(ctx, &walk);
|
|
|
|
+ cast5_fpu_end(fpu_enabled);
|
|
|
|
err = skcipher_walk_done(&walk, nbytes);
|
|
|
|
}
|
|
|
|
-
|
|
|
|
- cast5_fpu_end(fpu_enabled);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
@@ -292,7 +290,7 @@ static int ctr_crypt(struct skcipher_req
|
|
|
|
{
|
|
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
|
|
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
|
|
- bool fpu_enabled = false;
|
|
|
|
+ bool fpu_enabled;
|
|
|
|
struct skcipher_walk walk;
|
|
|
|
unsigned int nbytes;
|
|
|
|
int err;
|
|
|
|
@@ -300,13 +298,12 @@ static int ctr_crypt(struct skcipher_req
|
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
|
|
|
|
|
|
|
while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
|
|
|
|
- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
|
|
|
|
+ fpu_enabled = cast5_fpu_begin(false, &walk, nbytes);
|
|
|
|
nbytes = __ctr_crypt(&walk, ctx);
|
|
|
|
+ cast5_fpu_end(fpu_enabled);
|
|
|
|
err = skcipher_walk_done(&walk, nbytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
- cast5_fpu_end(fpu_enabled);
|
|
|
|
-
|
|
|
|
if (walk.nbytes) {
|
|
|
|
ctr_crypt_final(&walk, ctx);
|
|
|
|
err = skcipher_walk_done(&walk, 0);
|
|
|
|
--- a/arch/x86/crypto/glue_helper.c
|
|
|
|
+++ b/arch/x86/crypto/glue_helper.c
|
|
|
|
@@ -38,7 +38,7 @@ int glue_ecb_req_128bit(const struct com
|
|
|
|
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
|
|
|
|
const unsigned int bsize = 128 / 8;
|
|
|
|
struct skcipher_walk walk;
|
|
|
|
- bool fpu_enabled = false;
|
|
|
|
+ bool fpu_enabled;
|
|
|
|
unsigned int nbytes;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
@@ -51,7 +51,7 @@ int glue_ecb_req_128bit(const struct com
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
|
|
|
- &walk, fpu_enabled, nbytes);
|
|
|
|
+ &walk, false, nbytes);
|
|
|
|
for (i = 0; i < gctx->num_funcs; i++) {
|
|
|
|
func_bytes = bsize * gctx->funcs[i].num_blocks;
|
|
|
|
|
|
|
|
@@ -69,10 +69,9 @@ int glue_ecb_req_128bit(const struct com
|
|
|
|
if (nbytes < bsize)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
+ glue_fpu_end(fpu_enabled);
|
|
|
|
err = skcipher_walk_done(&walk, nbytes);
|
|
|
|
}
|
|
|
|
-
|
|
|
|
- glue_fpu_end(fpu_enabled);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(glue_ecb_req_128bit);
|
|
|
|
@@ -115,7 +114,7 @@ int glue_cbc_decrypt_req_128bit(const st
|
|
|
|
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
|
|
|
|
const unsigned int bsize = 128 / 8;
|
|
|
|
struct skcipher_walk walk;
|
|
|
|
- bool fpu_enabled = false;
|
|
|
|
+ bool fpu_enabled;
|
|
|
|
unsigned int nbytes;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
@@ -129,7 +128,7 @@ int glue_cbc_decrypt_req_128bit(const st
|
|
|
|
u128 last_iv;
|
|
|
|
|
|
|
|
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
|
|
|
- &walk, fpu_enabled, nbytes);
|
|
|
|
+ &walk, false, nbytes);
|
|
|
|
/* Start of the last block. */
|
|
|
|
src += nbytes / bsize - 1;
|
|
|
|
dst += nbytes / bsize - 1;
|
|
|
|
@@ -161,10 +160,10 @@ int glue_cbc_decrypt_req_128bit(const st
|
|
|
|
done:
|
|
|
|
u128_xor(dst, dst, (u128 *)walk.iv);
|
|
|
|
*(u128 *)walk.iv = last_iv;
|
|
|
|
+ glue_fpu_end(fpu_enabled);
|
|
|
|
err = skcipher_walk_done(&walk, nbytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
- glue_fpu_end(fpu_enabled);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit);
|
|
|
|
@@ -175,7 +174,7 @@ int glue_ctr_req_128bit(const struct com
|
|
|
|
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
|
|
|
|
const unsigned int bsize = 128 / 8;
|
|
|
|
struct skcipher_walk walk;
|
|
|
|
- bool fpu_enabled = false;
|
|
|
|
+ bool fpu_enabled;
|
|
|
|
unsigned int nbytes;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
@@ -189,7 +188,7 @@ int glue_ctr_req_128bit(const struct com
|
|
|
|
le128 ctrblk;
|
|
|
|
|
|
|
|
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
|
|
|
- &walk, fpu_enabled, nbytes);
|
|
|
|
+ &walk, false, nbytes);
|
|
|
|
|
|
|
|
be128_to_le128(&ctrblk, (be128 *)walk.iv);
|
|
|
|
|
|
|
|
@@ -213,11 +212,10 @@ int glue_ctr_req_128bit(const struct com
|
|
|
|
}
|
|
|
|
|
|
|
|
le128_to_be128((be128 *)walk.iv, &ctrblk);
|
|
|
|
+ glue_fpu_end(fpu_enabled);
|
|
|
|
err = skcipher_walk_done(&walk, nbytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
- glue_fpu_end(fpu_enabled);
|
|
|
|
-
|
|
|
|
if (nbytes) {
|
|
|
|
le128 ctrblk;
|
|
|
|
u128 tmp;
|
|
|
|
@@ -278,7 +276,7 @@ int glue_xts_req_128bit(const struct com
|
|
|
|
{
|
|
|
|
const unsigned int bsize = 128 / 8;
|
|
|
|
struct skcipher_walk walk;
|
|
|
|
- bool fpu_enabled = false;
|
|
|
|
+ bool fpu_enabled;
|
|
|
|
unsigned int nbytes;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
@@ -289,21 +287,24 @@ int glue_xts_req_128bit(const struct com
|
|
|
|
|
|
|
|
/* set minimum length to bsize, for tweak_fn */
|
|
|
|
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
|
|
|
- &walk, fpu_enabled,
|
|
|
|
+ &walk, false,
|
|
|
|
nbytes < bsize ? bsize : nbytes);
|
|
|
|
|
|
|
|
/* calculate first value of T */
|
|
|
|
tweak_fn(tweak_ctx, walk.iv, walk.iv);
|
|
|
|
|
|
|
|
while (nbytes) {
|
|
|
|
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
|
|
|
+ &walk, fpu_enabled,
|
|
|
|
+ nbytes < bsize ? bsize : nbytes);
|
|
|
|
nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk);
|
|
|
|
|
|
|
|
+ glue_fpu_end(fpu_enabled);
|
|
|
|
+ fpu_enabled = false;
|
|
|
|
err = skcipher_walk_done(&walk, nbytes);
|
|
|
|
nbytes = walk.nbytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
- glue_fpu_end(fpu_enabled);
|
|
|
|
-
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
|