crypto: arm64/aes-ccm - avoid by-ref argument for ce_aes_ccm_auth_data

With the SIMD code path removed, we can clean up the CCM auth-only path
a bit further, by passing the 'macp' input buffer pointer by value,
rather than by reference, and taking the output value from the
function's return value.

This way, the compiler is no longer forced to allocate macp on the
stack. This is not expected to make any difference in practice, it just
makes for slightly cleaner code.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Ard Biesheuvel 2021-08-27 09:03:42 +02:00 committed by Herbert Xu
parent 741691c446
commit 898387e40c
2 changed files with 18 additions and 24 deletions

View File

@ -12,22 +12,21 @@
.arch armv8-a+crypto .arch armv8-a+crypto
/* /*
* void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
* u32 *macp, u8 const rk[], u32 rounds); * u32 macp, u8 const rk[], u32 rounds);
*/ */
SYM_FUNC_START(ce_aes_ccm_auth_data) SYM_FUNC_START(ce_aes_ccm_auth_data)
ldr w8, [x3] /* leftover from prev round? */
ld1 {v0.16b}, [x0] /* load mac */ ld1 {v0.16b}, [x0] /* load mac */
cbz w8, 1f cbz w3, 1f
sub w8, w8, #16 sub w3, w3, #16
eor v1.16b, v1.16b, v1.16b eor v1.16b, v1.16b, v1.16b
0: ldrb w7, [x1], #1 /* get 1 byte of input */ 0: ldrb w7, [x1], #1 /* get 1 byte of input */
subs w2, w2, #1 subs w2, w2, #1
add w8, w8, #1 add w3, w3, #1
ins v1.b[0], w7 ins v1.b[0], w7
ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
beq 8f /* out of input? */ beq 8f /* out of input? */
cbnz w8, 0b cbnz w3, 0b
eor v0.16b, v0.16b, v1.16b eor v0.16b, v0.16b, v1.16b
1: ld1 {v3.4s}, [x4] /* load first round key */ 1: ld1 {v3.4s}, [x4] /* load first round key */
prfm pldl1strm, [x1] prfm pldl1strm, [x1]
@ -62,7 +61,7 @@ SYM_FUNC_START(ce_aes_ccm_auth_data)
beq 10f beq 10f
adds w2, w2, #16 adds w2, w2, #16
beq 10f beq 10f
mov w8, w2 mov w3, w2
7: ldrb w7, [x1], #1 7: ldrb w7, [x1], #1
umov w6, v0.b[0] umov w6, v0.b[0]
eor w6, w6, w7 eor w6, w6, w7
@ -71,15 +70,15 @@ SYM_FUNC_START(ce_aes_ccm_auth_data)
beq 10f beq 10f
ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
b 7b b 7b
8: cbz w8, 91f 8: cbz w3, 91f
mov w7, w8 mov w7, w3
add w8, w8, #16 add w3, w3, #16
9: ext v1.16b, v1.16b, v1.16b, #1 9: ext v1.16b, v1.16b, v1.16b, #1
adds w7, w7, #1 adds w7, w7, #1
bne 9b bne 9b
91: eor v0.16b, v0.16b, v1.16b 91: eor v0.16b, v0.16b, v1.16b
st1 {v0.16b}, [x0] st1 {v0.16b}, [x0]
10: str w8, [x3] 10: mov w0, w3
ret ret
SYM_FUNC_END(ce_aes_ccm_auth_data) SYM_FUNC_END(ce_aes_ccm_auth_data)

View File

@ -27,8 +27,8 @@ static int num_rounds(struct crypto_aes_ctx *ctx)
return 6 + ctx->key_length / 4; return 6 + ctx->key_length / 4;
} }
asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, asmlinkage u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
u32 *macp, u32 const rk[], u32 rounds); u32 macp, u32 const rk[], u32 rounds);
asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
u32 const rk[], u32 rounds, u8 mac[], u32 const rk[], u32 rounds, u8 mac[],
@ -94,13 +94,6 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
return 0; return 0;
} }
static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
u32 abytes, u32 *macp)
{
ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
num_rounds(key));
}
static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
{ {
struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_aead *aead = crypto_aead_reqtfm(req);
@ -120,7 +113,8 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
ltag.len = 6; ltag.len = 6;
} }
ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp); macp = ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, macp,
ctx->key_enc, num_rounds(ctx));
scatterwalk_start(&walk, req->src); scatterwalk_start(&walk, req->src);
do { do {
@ -133,13 +127,14 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
} }
n = min_t(u32, n, SZ_4K); /* yield NEON at least every 4k */ n = min_t(u32, n, SZ_4K); /* yield NEON at least every 4k */
p = scatterwalk_map(&walk); p = scatterwalk_map(&walk);
ccm_update_mac(ctx, mac, p, n, &macp);
macp = ce_aes_ccm_auth_data(mac, p, n, macp, ctx->key_enc,
num_rounds(ctx));
if (len / SZ_4K > (len - n) / SZ_4K) { if (len / SZ_4K > (len - n) / SZ_4K) {
kernel_neon_end(); kernel_neon_end();
kernel_neon_begin(); kernel_neon_begin();
} }
len -= n; len -= n;
scatterwalk_unmap(p); scatterwalk_unmap(p);