Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto fixes from Herbert Xu:
 "This fixes a potential scheduling latency problem for the algorithms
  used by WireGuard"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: arch/nhpoly1305 - process in explicit 4k chunks
  crypto: arch/lib - limit simd usage to 4k chunks
This commit is contained in:
Linus Torvalds 2020-05-06 10:20:00 -07:00
commit 3c40cdb0e9
11 changed files with 69 additions and 34 deletions

View file

@ -91,9 +91,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
return; return;
} }
kernel_neon_begin(); do {
chacha_doneon(state, dst, src, bytes, nrounds); unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_neon_end();
kernel_neon_begin();
chacha_doneon(state, dst, src, todo, nrounds);
kernel_neon_end();
bytes -= todo;
src += todo;
dst += todo;
} while (bytes);
} }
EXPORT_SYMBOL(chacha_crypt_arch); EXPORT_SYMBOL(chacha_crypt_arch);

View file

@ -30,7 +30,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
return crypto_nhpoly1305_update(desc, src, srclen); return crypto_nhpoly1305_update(desc, src, srclen);
do { do {
unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_neon_begin(); kernel_neon_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);

View file

@ -160,13 +160,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
if (static_branch_likely(&have_neon) && do_neon) { if (static_branch_likely(&have_neon) && do_neon) {
kernel_neon_begin(); do {
poly1305_blocks_neon(&dctx->h, src, len, 1); unsigned int todo = min_t(unsigned int, len, SZ_4K);
kernel_neon_end();
kernel_neon_begin();
poly1305_blocks_neon(&dctx->h, src, todo, 1);
kernel_neon_end();
len -= todo;
src += todo;
} while (len);
} else { } else {
poly1305_blocks_arm(&dctx->h, src, len, 1); poly1305_blocks_arm(&dctx->h, src, len, 1);
src += len;
} }
src += len;
nbytes %= POLY1305_BLOCK_SIZE; nbytes %= POLY1305_BLOCK_SIZE;
} }

View file

@ -87,9 +87,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
!crypto_simd_usable()) !crypto_simd_usable())
return chacha_crypt_generic(state, dst, src, bytes, nrounds); return chacha_crypt_generic(state, dst, src, bytes, nrounds);
kernel_neon_begin(); do {
chacha_doneon(state, dst, src, bytes, nrounds); unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_neon_end();
kernel_neon_begin();
chacha_doneon(state, dst, src, todo, nrounds);
kernel_neon_end();
bytes -= todo;
src += todo;
dst += todo;
} while (bytes);
} }
EXPORT_SYMBOL(chacha_crypt_arch); EXPORT_SYMBOL(chacha_crypt_arch);

View file

@ -30,7 +30,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
return crypto_nhpoly1305_update(desc, src, srclen); return crypto_nhpoly1305_update(desc, src, srclen);
do { do {
unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_neon_begin(); kernel_neon_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);

View file

@ -143,13 +143,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
if (static_branch_likely(&have_neon) && crypto_simd_usable()) { if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
kernel_neon_begin(); do {
poly1305_blocks_neon(&dctx->h, src, len, 1); unsigned int todo = min_t(unsigned int, len, SZ_4K);
kernel_neon_end();
kernel_neon_begin();
poly1305_blocks_neon(&dctx->h, src, todo, 1);
kernel_neon_end();
len -= todo;
src += todo;
} while (len);
} else { } else {
poly1305_blocks(&dctx->h, src, len, 1); poly1305_blocks(&dctx->h, src, len, 1);
src += len;
} }
src += len;
nbytes %= POLY1305_BLOCK_SIZE; nbytes %= POLY1305_BLOCK_SIZE;
} }

View file

@ -32,16 +32,16 @@ void blake2s_compress_arch(struct blake2s_state *state,
const u32 inc) const u32 inc)
{ {
/* SIMD disables preemption, so relax after processing each page. */ /* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8); BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
blake2s_compress_generic(state, block, nblocks, inc); blake2s_compress_generic(state, block, nblocks, inc);
return; return;
} }
for (;;) { do {
const size_t blocks = min_t(size_t, nblocks, const size_t blocks = min_t(size_t, nblocks,
PAGE_SIZE / BLAKE2S_BLOCK_SIZE); SZ_4K / BLAKE2S_BLOCK_SIZE);
kernel_fpu_begin(); kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) && if (IS_ENABLED(CONFIG_AS_AVX512) &&
@ -52,10 +52,8 @@ void blake2s_compress_arch(struct blake2s_state *state,
kernel_fpu_end(); kernel_fpu_end();
nblocks -= blocks; nblocks -= blocks;
if (!nblocks)
break;
block += blocks * BLAKE2S_BLOCK_SIZE; block += blocks * BLAKE2S_BLOCK_SIZE;
} } while (nblocks);
} }
EXPORT_SYMBOL(blake2s_compress_arch); EXPORT_SYMBOL(blake2s_compress_arch);

View file

@ -153,9 +153,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
bytes <= CHACHA_BLOCK_SIZE) bytes <= CHACHA_BLOCK_SIZE)
return chacha_crypt_generic(state, dst, src, bytes, nrounds); return chacha_crypt_generic(state, dst, src, bytes, nrounds);
kernel_fpu_begin(); do {
chacha_dosimd(state, dst, src, bytes, nrounds); unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_fpu_end();
kernel_fpu_begin();
chacha_dosimd(state, dst, src, todo, nrounds);
kernel_fpu_end();
bytes -= todo;
src += todo;
dst += todo;
} while (bytes);
} }
EXPORT_SYMBOL(chacha_crypt_arch); EXPORT_SYMBOL(chacha_crypt_arch);

View file

@ -29,7 +29,7 @@ static int nhpoly1305_avx2_update(struct shash_desc *desc,
return crypto_nhpoly1305_update(desc, src, srclen); return crypto_nhpoly1305_update(desc, src, srclen);
do { do {
unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_fpu_begin(); kernel_fpu_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2); crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2);

View file

@ -29,7 +29,7 @@ static int nhpoly1305_sse2_update(struct shash_desc *desc,
return crypto_nhpoly1305_update(desc, src, srclen); return crypto_nhpoly1305_update(desc, src, srclen);
do { do {
unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_fpu_begin(); kernel_fpu_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2); crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2);

View file

@ -91,8 +91,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
struct poly1305_arch_internal *state = ctx; struct poly1305_arch_internal *state = ctx;
/* SIMD disables preemption, so relax after processing each page. */ /* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
PAGE_SIZE % POLY1305_BLOCK_SIZE); SZ_4K % POLY1305_BLOCK_SIZE);
if (!static_branch_likely(&poly1305_use_avx) || if (!static_branch_likely(&poly1305_use_avx) ||
(len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
@ -102,8 +102,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
return; return;
} }
for (;;) { do {
const size_t bytes = min_t(size_t, len, PAGE_SIZE); const size_t bytes = min_t(size_t, len, SZ_4K);
kernel_fpu_begin(); kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
@ -113,11 +113,10 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
else else
poly1305_blocks_avx(ctx, inp, bytes, padbit); poly1305_blocks_avx(ctx, inp, bytes, padbit);
kernel_fpu_end(); kernel_fpu_end();
len -= bytes; len -= bytes;
if (!len)
break;
inp += bytes; inp += bytes;
} } while (len);
} }
static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],