This update includes the following changes:

API:
 
 - Use kmap_local instead of kmap_atomic.
 - Change request callback to take void pointer.
 - Print FIPS status in /proc/crypto (when enabled).
 
 Algorithms:
 
 - Add rfc4106/gcm support on arm64.
 - Add ARIA AVX2/512 support on x86.
 
 Drivers:
 
 - Add TRNG driver for StarFive SoC.
 - Delete ux500/hash driver (subsumed by stm32/hash).
 - Add zlib support in qat.
 - Add RSA support in aspeed.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEn51F/lCuNhUwmDeSxycdCkmxi6cFAmPzAiwACgkQxycdCkmx
 i6et8xAAoO3w5MZFGXMzWsYhfSZFdceXBEQfDR7JOCdHxpMIQhw0FLlb0uttFk6m
 SeWrdP9wiifBDoCmw7qffFJml8ZftPL/XeXjob2d9v7jKbPyw3lDSIdsNfN/5EEL
 oIc9915zwrgawvahPAa+PQ4Ue03qRjUyOcV42dpd1W3NYhzDVHoK5OUU+mEFYDvx
 Sgw/YUugKf0VXkVDFzG5049+CPcheyRZqclAo9jyl2eZiXujgUyV33nxRCtqIA+t
 7jlHKwi+6QzFHY0CX5BvShR8xyEuH5MLoU3H/jYGXnRb3nEpRYAEO4VZchIHqF0F
 Y6pKIKc6Q8OyIVY8RsjQY3hioCqYnQFZ5Xtc1zGtOYEitVLbkmItMG0mVn0XOfyt
 gJDi6gkEw5uPUbEQdI4R1xEgJ8eCckMsOJ+uRxqTm+uLqNDxPbsB9bohKniMogXV
 lDlVXjU23AA9VeKtqU8FvWjfgqsN47X4aoq1j4/4aI7X9F7P9FOP21TZloP7+ssj
 PFrzNaRXUrMEsvyS1wqPegIh987lj6WkH4hyU0wjzaIq4IQELidHsSXFS12iWIPH
 kTEoC/trAVoYSr0zXKWUCs4h/x0FztVNbjs4KiDP2FLXX1RzeVZ0WlaXZhryHr+n
 1+8yCuS6tVofAbSX0wNkZdf0x5+3CIBw4kqSIvjKDPYYEfIDaT0=
 =dMYe
 -----END PGP SIGNATURE-----

Merge tag 'v6.3-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto update from Herbert Xu:
 "API:
   - Use kmap_local instead of kmap_atomic
   - Change request callback to take void pointer
   - Print FIPS status in /proc/crypto (when enabled)

  Algorithms:
   - Add rfc4106/gcm support on arm64
   - Add ARIA AVX2/512 support on x86

  Drivers:
   - Add TRNG driver for StarFive SoC
   - Delete ux500/hash driver (subsumed by stm32/hash)
   - Add zlib support in qat
   - Add RSA support in aspeed"

* tag 'v6.3-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (156 commits)
  crypto: x86/aria-avx - Do not use avx2 instructions
  crypto: aspeed - Fix modular aspeed-acry
  crypto: hisilicon/qm - fix coding style issues
  crypto: hisilicon/qm - update comments to match function
  crypto: hisilicon/qm - change function names
  crypto: hisilicon/qm - use min() instead of min_t()
  crypto: hisilicon/qm - remove some unused defines
  crypto: proc - Print fips status
  crypto: crypto4xx - Call dma_unmap_page when done
  crypto: octeontx2 - Fix objects shared between several modules
  crypto: nx - Fix sparse warnings
  crypto: ecc - Silence sparse warning
  tls: Pass rec instead of aead_req into tls_encrypt_done
  crypto: api - Remove completion function scaffolding
  tls: Remove completion function scaffolding
  tipc: Remove completion function scaffolding
  net: ipv6: Remove completion function scaffolding
  net: ipv4: Remove completion function scaffolding
  net: macsec: Remove completion function scaffolding
  dm: Remove completion function scaffolding
  ...
This commit is contained in:
Linus Torvalds 2023-02-21 18:10:50 -08:00
commit 36289a03bc
186 changed files with 6382 additions and 4056 deletions

View File

@ -1,6 +1,6 @@
What: /sys/bus/pci/devices/<BDF>/qat/state
Date: June 2022
KernelVersion: 5.20
KernelVersion: 6.0
Contact: qat-linux@intel.com
Description: (RW) Reports the current state of the QAT device. Write to
the file to start or stop the device.
@ -18,7 +18,7 @@ Description: (RW) Reports the current state of the QAT device. Write to
What: /sys/bus/pci/devices/<BDF>/qat/cfg_services
Date: June 2022
KernelVersion: 5.20
KernelVersion: 6.0
Contact: qat-linux@intel.com
Description: (RW) Reports the current configuration of the QAT device.
Write to the file to change the configured services.

View File

@ -0,0 +1,37 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/bus/aspeed,ast2600-ahbc.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: ASPEED Advanced High-Performance Bus Controller (AHBC)
maintainers:
- Neal Liu <neal_liu@aspeedtech.com>
- Chia-Wei Wang <chiawei_wang@aspeedtech.com>
description: |
Advanced High-performance Bus Controller (AHBC) supports plenty of mechanisms
including a priority arbiter, an address decoder and a data multiplexer
to control the overall operations of Advanced High-performance Bus (AHB).
properties:
compatible:
enum:
- aspeed,ast2600-ahbc
reg:
maxItems: 1
required:
- compatible
- reg
additionalProperties: false
examples:
- |
ahbc@1e600000 {
compatible = "aspeed,ast2600-ahbc";
reg = <0x1e600000 0x100>;
};

View File

@ -14,6 +14,7 @@ properties:
enum:
- allwinner,sun8i-h3-crypto
- allwinner,sun8i-r40-crypto
- allwinner,sun20i-d1-crypto
- allwinner,sun50i-a64-crypto
- allwinner,sun50i-h5-crypto
- allwinner,sun50i-h6-crypto
@ -29,6 +30,7 @@ properties:
- description: Bus clock
- description: Module clock
- description: MBus clock
- description: TRNG clock (RC oscillator)
minItems: 2
clock-names:
@ -36,6 +38,7 @@ properties:
- const: bus
- const: mod
- const: ram
- const: trng
minItems: 2
resets:
@ -44,19 +47,33 @@ properties:
if:
properties:
compatible:
const: allwinner,sun50i-h6-crypto
enum:
- allwinner,sun20i-d1-crypto
then:
properties:
clocks:
minItems: 3
minItems: 4
clock-names:
minItems: 3
minItems: 4
else:
properties:
clocks:
maxItems: 2
clock-names:
maxItems: 2
if:
properties:
compatible:
const: allwinner,sun50i-h6-crypto
then:
properties:
clocks:
minItems: 3
maxItems: 3
clock-names:
minItems: 3
maxItems: 3
else:
properties:
clocks:
maxItems: 2
clock-names:
maxItems: 2
required:
- compatible

View File

@ -0,0 +1,49 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/crypto/aspeed,ast2600-acry.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: ASPEED ACRY ECDSA/RSA Hardware Accelerator Engines
maintainers:
- Neal Liu <neal_liu@aspeedtech.com>
description:
The ACRY ECDSA/RSA engines is designed to accelerate the throughput
of ECDSA/RSA signature and verification. Basically, ACRY can be
divided into two independent engines - ECC Engine and RSA Engine.
properties:
compatible:
enum:
- aspeed,ast2600-acry
reg:
items:
- description: acry base address & size
- description: acry sram base address & size
clocks:
maxItems: 1
interrupts:
maxItems: 1
required:
- compatible
- reg
- clocks
- interrupts
additionalProperties: false
examples:
- |
#include <dt-bindings/clock/ast2600-clock.h>
acry: crypto@1e6fa000 {
compatible = "aspeed,ast2600-acry";
reg = <0x1e6fa000 0x400>, <0x1e710000 0x1800>;
interrupts = <160>;
clocks = <&syscon ASPEED_CLK_GATE_RSACLK>;
};

View File

@ -6,12 +6,18 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: STMicroelectronics STM32 HASH
description: The STM32 HASH block is built on the HASH block found in
the STn8820 SoC introduced in 2007, and subsequently used in the U8500
SoC in 2010.
maintainers:
- Lionel Debieve <lionel.debieve@foss.st.com>
properties:
compatible:
enum:
- st,stn8820-hash
- stericsson,ux500-hash
- st,stm32f456-hash
- st,stm32f756-hash
@ -41,11 +47,26 @@ properties:
maximum: 2
default: 0
power-domains:
maxItems: 1
required:
- compatible
- reg
- clocks
- interrupts
allOf:
- if:
properties:
compatible:
items:
const: stericsson,ux500-hash
then:
properties:
interrupts: false
else:
required:
- interrupts
additionalProperties: false

View File

@ -0,0 +1,55 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/rng/starfive,jh7110-trng.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: StarFive SoC TRNG Module
maintainers:
- Jia Jie Ho <jiajie.ho@starfivetech.com>
properties:
compatible:
const: starfive,jh7110-trng
reg:
maxItems: 1
clocks:
items:
- description: Hardware reference clock
- description: AHB reference clock
clock-names:
items:
- const: hclk
- const: ahb
resets:
maxItems: 1
interrupts:
maxItems: 1
required:
- compatible
- reg
- clocks
- clock-names
- resets
- interrupts
additionalProperties: false
examples:
- |
rng: rng@1600C000 {
compatible = "starfive,jh7110-trng";
reg = <0x1600C000 0x4000>;
clocks = <&clk 15>, <&clk 16>;
clock-names = "hclk", "ahb";
resets = <&reset 3>;
interrupts = <30>;
};
...

View File

@ -3149,7 +3149,7 @@ ASPEED CRYPTO DRIVER
M: Neal Liu <neal_liu@aspeedtech.com>
L: linux-aspeed@lists.ozlabs.org (moderated for non-subscribers)
S: Maintained
F: Documentation/devicetree/bindings/crypto/aspeed,ast2500-hace.yaml
F: Documentation/devicetree/bindings/crypto/aspeed,*
F: drivers/crypto/aspeed/
ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS
@ -19769,6 +19769,12 @@ F: Documentation/devicetree/bindings/reset/starfive,jh7100-reset.yaml
F: drivers/reset/reset-starfive-jh7100.c
F: include/dt-bindings/reset/starfive-jh7100.h
STARFIVE TRNG DRIVER
M: Jia Jie Ho <jiajie.ho@starfivetech.com>
S: Supported
F: Documentation/devicetree/bindings/rng/starfive*
F: drivers/char/hw_random/jh7110-trng.c
STATIC BRANCH/CALL
M: Peter Zijlstra <peterz@infradead.org>
M: Josh Poimboeuf <jpoimboe@kernel.org>

View File

@ -98,6 +98,11 @@
<0x40466000 0x2000>;
};
ahbc: bus@1e600000 {
compatible = "aspeed,ast2600-ahbc", "syscon";
reg = <0x1e600000 0x100>;
};
fmc: spi@1e620000 {
reg = <0x1e620000 0xc4>, <0x20000000 0x10000000>;
#address-cells = <1>;
@ -431,6 +436,14 @@
reg = <0x1e6f2000 0x1000>;
};
acry: crypto@1e6fa000 {
compatible = "aspeed,ast2600-acry";
reg = <0x1e6fa000 0x400>, <0x1e710000 0x1800>;
interrupts = <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&syscon ASPEED_CLK_GATE_RSACLK>;
aspeed,ahbc = <&ahbc>;
};
video: video@1e700000 {
compatible = "aspeed,ast2600-video-engine";
reg = <0x1e700000 0x1000>;

View File

@ -21,31 +21,29 @@
#include "sha1.h"
asmlinkage void sha1_block_data_order(u32 *digest,
const unsigned char *data, unsigned int rounds);
asmlinkage void sha1_block_data_order(struct sha1_state *digest,
const u8 *data, int rounds);
int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
/* make sure casting to sha1_block_fn() is safe */
/* make sure signature matches sha1_block_fn() */
BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
return sha1_base_do_update(desc, data, len,
(sha1_block_fn *)sha1_block_data_order);
return sha1_base_do_update(desc, data, len, sha1_block_data_order);
}
EXPORT_SYMBOL_GPL(sha1_update_arm);
static int sha1_final(struct shash_desc *desc, u8 *out)
{
sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order);
sha1_base_do_finalize(desc, sha1_block_data_order);
return sha1_base_finish(desc, out);
}
int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
sha1_base_do_update(desc, data, len,
(sha1_block_fn *)sha1_block_data_order);
sha1_base_do_update(desc, data, len, sha1_block_data_order);
return sha1_final(desc, out);
}
EXPORT_SYMBOL_GPL(sha1_finup_arm);

View File

@ -161,43 +161,39 @@ static int ccm_encrypt(struct aead_request *req)
memcpy(buf, req->iv, AES_BLOCK_SIZE);
err = skcipher_walk_aead_encrypt(&walk, req, false);
if (unlikely(err))
return err;
kernel_neon_begin();
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
do {
while (walk.nbytes) {
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
bool final = walk.nbytes == walk.total;
if (walk.nbytes == walk.total)
if (final)
tail = 0;
ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes - tail, ctx->key_enc,
num_rounds(ctx), mac, walk.iv);
if (walk.nbytes == walk.total)
ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
if (!final)
kernel_neon_end();
err = skcipher_walk_done(&walk, tail);
if (!final)
kernel_neon_begin();
}
kernel_neon_end();
ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
if (walk.nbytes) {
err = skcipher_walk_done(&walk, tail);
if (unlikely(err))
return err;
if (unlikely(walk.nbytes))
kernel_neon_begin();
}
} while (walk.nbytes);
kernel_neon_end();
/* copy authtag to end of dst */
scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
crypto_aead_authsize(aead), 1);
return 0;
return err;
}
static int ccm_decrypt(struct aead_request *req)
@ -219,37 +215,36 @@ static int ccm_decrypt(struct aead_request *req)
memcpy(buf, req->iv, AES_BLOCK_SIZE);
err = skcipher_walk_aead_decrypt(&walk, req, false);
if (unlikely(err))
return err;
kernel_neon_begin();
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
do {
while (walk.nbytes) {
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
bool final = walk.nbytes == walk.total;
if (walk.nbytes == walk.total)
if (final)
tail = 0;
ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes - tail, ctx->key_enc,
num_rounds(ctx), mac, walk.iv);
if (walk.nbytes == walk.total)
ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
if (!final)
kernel_neon_end();
err = skcipher_walk_done(&walk, tail);
if (!final)
kernel_neon_begin();
}
kernel_neon_end();
ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
if (walk.nbytes) {
err = skcipher_walk_done(&walk, tail);
if (unlikely(err))
return err;
if (unlikely(walk.nbytes))
kernel_neon_begin();
}
} while (walk.nbytes);
kernel_neon_end();
if (unlikely(err))
return err;
/* compare calculated auth tag with the stored one */
scatterwalk_map_and_copy(buf, req->src,

View File

@ -9,6 +9,7 @@
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/aes.h>
#include <crypto/gcm.h>
#include <crypto/algapi.h>
#include <crypto/b128ops.h>
#include <crypto/gf128mul.h>
@ -28,7 +29,8 @@ MODULE_ALIAS_CRYPTO("ghash");
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
#define GCM_IV_SIZE 12
#define RFC4106_NONCE_SIZE 4
struct ghash_key {
be128 k;
@ -43,6 +45,7 @@ struct ghash_desc_ctx {
struct gcm_aes_ctx {
struct crypto_aes_ctx aes_key;
u8 nonce[RFC4106_NONCE_SIZE];
struct ghash_key ghash_key;
};
@ -226,8 +229,8 @@ static int num_rounds(struct crypto_aes_ctx *ctx)
return 6 + ctx->key_length / 4;
}
static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
unsigned int keylen)
static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey,
unsigned int keylen)
{
struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
u8 key[GHASH_BLOCK_SIZE];
@ -258,17 +261,9 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
return 0;
}
static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
{
switch (authsize) {
case 4:
case 8:
case 12 ... 16:
break;
default:
return -EINVAL;
}
return 0;
return crypto_gcm_check_authsize(authsize);
}
static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
@ -302,13 +297,12 @@ static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
}
}
static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
u8 buf[GHASH_BLOCK_SIZE];
struct scatter_walk walk;
u32 len = req->assoclen;
int buf_count = 0;
scatterwalk_start(&walk, req->src);
@ -338,27 +332,25 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
}
}
static int gcm_encrypt(struct aead_request *req)
static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
int nrounds = num_rounds(&ctx->aes_key);
struct skcipher_walk walk;
u8 buf[AES_BLOCK_SIZE];
u8 iv[AES_BLOCK_SIZE];
u64 dg[2] = {};
be128 lengths;
u8 *tag;
int err;
lengths.a = cpu_to_be64(req->assoclen * 8);
lengths.a = cpu_to_be64(assoclen * 8);
lengths.b = cpu_to_be64(req->cryptlen * 8);
if (req->assoclen)
gcm_calculate_auth_mac(req, dg);
if (assoclen)
gcm_calculate_auth_mac(req, dg, assoclen);
memcpy(iv, req->iv, GCM_IV_SIZE);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
put_unaligned_be32(2, iv + GCM_AES_IV_SIZE);
err = skcipher_walk_aead_encrypt(&walk, req, false);
@ -403,7 +395,7 @@ static int gcm_encrypt(struct aead_request *req)
return 0;
}
static int gcm_decrypt(struct aead_request *req)
static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
@ -412,21 +404,19 @@ static int gcm_decrypt(struct aead_request *req)
struct skcipher_walk walk;
u8 otag[AES_BLOCK_SIZE];
u8 buf[AES_BLOCK_SIZE];
u8 iv[AES_BLOCK_SIZE];
u64 dg[2] = {};
be128 lengths;
u8 *tag;
int ret;
int err;
lengths.a = cpu_to_be64(req->assoclen * 8);
lengths.a = cpu_to_be64(assoclen * 8);
lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8);
if (req->assoclen)
gcm_calculate_auth_mac(req, dg);
if (assoclen)
gcm_calculate_auth_mac(req, dg, assoclen);
memcpy(iv, req->iv, GCM_IV_SIZE);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
put_unaligned_be32(2, iv + GCM_AES_IV_SIZE);
scatterwalk_map_and_copy(otag, req->src,
req->assoclen + req->cryptlen - authsize,
@ -471,14 +461,76 @@ static int gcm_decrypt(struct aead_request *req)
return ret ? -EBADMSG : 0;
}
static struct aead_alg gcm_aes_alg = {
.ivsize = GCM_IV_SIZE,
static int gcm_aes_encrypt(struct aead_request *req)
{
u8 iv[AES_BLOCK_SIZE];
memcpy(iv, req->iv, GCM_AES_IV_SIZE);
return gcm_encrypt(req, iv, req->assoclen);
}
static int gcm_aes_decrypt(struct aead_request *req)
{
u8 iv[AES_BLOCK_SIZE];
memcpy(iv, req->iv, GCM_AES_IV_SIZE);
return gcm_decrypt(req, iv, req->assoclen);
}
static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey,
unsigned int keylen)
{
struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
int err;
keylen -= RFC4106_NONCE_SIZE;
err = gcm_aes_setkey(tfm, inkey, keylen);
if (err)
return err;
memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE);
return 0;
}
static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
{
return crypto_rfc4106_check_authsize(authsize);
}
static int rfc4106_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
u8 iv[AES_BLOCK_SIZE];
memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE);
memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE);
return crypto_ipsec_check_assoclen(req->assoclen) ?:
gcm_encrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE);
}
static int rfc4106_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
u8 iv[AES_BLOCK_SIZE];
memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE);
memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE);
return crypto_ipsec_check_assoclen(req->assoclen) ?:
gcm_decrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE);
}
static struct aead_alg gcm_aes_algs[] = {{
.ivsize = GCM_AES_IV_SIZE,
.chunksize = AES_BLOCK_SIZE,
.maxauthsize = AES_BLOCK_SIZE,
.setkey = gcm_setkey,
.setauthsize = gcm_setauthsize,
.encrypt = gcm_encrypt,
.decrypt = gcm_decrypt,
.setkey = gcm_aes_setkey,
.setauthsize = gcm_aes_setauthsize,
.encrypt = gcm_aes_encrypt,
.decrypt = gcm_aes_decrypt,
.base.cra_name = "gcm(aes)",
.base.cra_driver_name = "gcm-aes-ce",
@ -487,7 +539,23 @@ static struct aead_alg gcm_aes_alg = {
.base.cra_ctxsize = sizeof(struct gcm_aes_ctx) +
4 * sizeof(u64[2]),
.base.cra_module = THIS_MODULE,
};
}, {
.ivsize = GCM_RFC4106_IV_SIZE,
.chunksize = AES_BLOCK_SIZE,
.maxauthsize = AES_BLOCK_SIZE,
.setkey = rfc4106_setkey,
.setauthsize = rfc4106_setauthsize,
.encrypt = rfc4106_encrypt,
.decrypt = rfc4106_decrypt,
.base.cra_name = "rfc4106(gcm(aes))",
.base.cra_driver_name = "rfc4106-gcm-aes-ce",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct gcm_aes_ctx) +
4 * sizeof(u64[2]),
.base.cra_module = THIS_MODULE,
}};
static int __init ghash_ce_mod_init(void)
{
@ -495,7 +563,8 @@ static int __init ghash_ce_mod_init(void)
return -ENODEV;
if (cpu_have_named_feature(PMULL))
return crypto_register_aead(&gcm_aes_alg);
return crypto_register_aeads(gcm_aes_algs,
ARRAY_SIZE(gcm_aes_algs));
return crypto_register_shash(&ghash_alg);
}
@ -503,7 +572,7 @@ static int __init ghash_ce_mod_init(void)
static void __exit ghash_ce_mod_exit(void)
{
if (cpu_have_named_feature(PMULL))
crypto_unregister_aead(&gcm_aes_alg);
crypto_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs));
else
crypto_unregister_shash(&ghash_alg);
}

View File

@ -166,7 +166,7 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk,
unsigned int nbytes, u8 *mac))
{
u8 __aligned(8) ctr0[SM4_BLOCK_SIZE];
int err;
int err = 0;
/* preserve the initial ctr0 for the TAG */
memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE);
@ -177,33 +177,37 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk,
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
do {
while (walk->nbytes && walk->nbytes != walk->total) {
unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
const u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
if (walk->nbytes == walk->total)
tail = 0;
if (walk->nbytes - tail)
sm4_ce_ccm_crypt(rkey_enc, dst, src, walk->iv,
walk->nbytes - tail, mac);
if (walk->nbytes == walk->total)
sm4_ce_ccm_final(rkey_enc, ctr0, mac);
sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
walk->src.virt.addr, walk->iv,
walk->nbytes - tail, mac);
kernel_neon_end();
if (walk->nbytes) {
err = skcipher_walk_done(walk, tail);
if (err)
return err;
if (walk->nbytes)
kernel_neon_begin();
}
} while (walk->nbytes > 0);
err = skcipher_walk_done(walk, tail);
return 0;
kernel_neon_begin();
}
if (walk->nbytes) {
sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
walk->src.virt.addr, walk->iv,
walk->nbytes, mac);
sm4_ce_ccm_final(rkey_enc, ctr0, mac);
kernel_neon_end();
err = skcipher_walk_done(walk, 0);
} else {
sm4_ce_ccm_final(rkey_enc, ctr0, mac);
kernel_neon_end();
}
return err;
}
static int ccm_encrypt(struct aead_request *req)

View File

@ -135,22 +135,23 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u8 ghash[])
}
static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
struct sm4_gcm_ctx *ctx, u8 ghash[],
u8 ghash[], int err,
void (*sm4_ce_pmull_gcm_crypt)(const u32 *rkey_enc,
u8 *dst, const u8 *src, u8 *iv,
unsigned int nbytes, u8 *ghash,
const u8 *ghash_table, const u8 *lengths))
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) iv[SM4_BLOCK_SIZE];
be128 __aligned(8) lengths;
int err;
memset(ghash, 0, SM4_BLOCK_SIZE);
lengths.a = cpu_to_be64(req->assoclen * 8);
lengths.b = cpu_to_be64(walk->total * 8);
memcpy(iv, walk->iv, GCM_IV_SIZE);
memcpy(iv, req->iv, GCM_IV_SIZE);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
kernel_neon_begin();
@ -158,49 +159,51 @@ static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
if (req->assoclen)
gcm_calculate_auth_mac(req, ghash);
do {
while (walk->nbytes) {
unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
const u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
if (walk->nbytes == walk->total) {
tail = 0;
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
walk->nbytes, ghash,
ctx->ghash_table,
(const u8 *)&lengths);
} else if (walk->nbytes - tail) {
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
walk->nbytes - tail, ghash,
ctx->ghash_table, NULL);
kernel_neon_end();
return skcipher_walk_done(walk, 0);
}
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
walk->nbytes - tail, ghash,
ctx->ghash_table, NULL);
kernel_neon_end();
err = skcipher_walk_done(walk, tail);
if (err)
return err;
if (walk->nbytes)
kernel_neon_begin();
} while (walk->nbytes > 0);
return 0;
kernel_neon_begin();
}
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, NULL, NULL, iv,
walk->nbytes, ghash, ctx->ghash_table,
(const u8 *)&lengths);
kernel_neon_end();
return err;
}
static int gcm_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
struct skcipher_walk walk;
int err;
err = skcipher_walk_aead_encrypt(&walk, req, false);
if (err)
return err;
err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_enc);
err = gcm_crypt(req, &walk, ghash, err, sm4_ce_pmull_gcm_enc);
if (err)
return err;
@ -215,17 +218,13 @@ static int gcm_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
unsigned int authsize = crypto_aead_authsize(aead);
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
u8 authtag[SM4_BLOCK_SIZE];
struct skcipher_walk walk;
int err;
err = skcipher_walk_aead_decrypt(&walk, req, false);
if (err)
return err;
err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_dec);
err = gcm_crypt(req, &walk, ghash, err, sm4_ce_pmull_gcm_dec);
if (err)
return err;

View File

@ -398,10 +398,6 @@ static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
if (err)
return err;
/* In fips mode only 128 bit or 256 bit keys are valid */
if (fips_enabled && key_len != 32 && key_len != 64)
return -EINVAL;
/* Pick the correct function code based on the key length */
fc = (key_len == 32) ? CPACF_KM_XTS_128 :
(key_len == 64) ? CPACF_KM_XTS_256 : 0;

View File

@ -474,7 +474,7 @@ static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
return rc;
/*
* xts_check_key verifies the key length is not odd and makes
* xts_verify_key verifies the key length is not odd and makes
* sure that the two keys are not the same. This can be done
* on the two protected keys as well
*/

View File

@ -19,3 +19,8 @@ config AS_TPAUSE
def_bool $(as-instr,tpause %ecx)
help
Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7
config AS_GFNI
def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2)
help
Supported by binutils >= 2.30 and LLVM integrated assembler

View File

@ -304,6 +304,44 @@ config CRYPTO_ARIA_AESNI_AVX_X86_64
Processes 16 blocks in parallel.
config CRYPTO_ARIA_AESNI_AVX2_X86_64
tristate "Ciphers: ARIA with modes: ECB, CTR (AES-NI/AVX2/GFNI)"
depends on X86 && 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_ARIA
select CRYPTO_ARIA_AESNI_AVX_X86_64
help
Length-preserving cipher: ARIA cipher algorithms
(RFC 5794) with ECB and CTR modes
Architecture: x86_64 using:
- AES-NI (AES New Instructions)
- AVX2 (Advanced Vector Extensions)
- GFNI (Galois Field New Instructions)
Processes 32 blocks in parallel.
config CRYPTO_ARIA_GFNI_AVX512_X86_64
tristate "Ciphers: ARIA with modes: ECB, CTR (AVX512/GFNI)"
depends on X86 && 64BIT && AS_AVX512 && AS_GFNI
select CRYPTO_SKCIPHER
select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_ARIA
select CRYPTO_ARIA_AESNI_AVX_X86_64
select CRYPTO_ARIA_AESNI_AVX2_X86_64
help
Length-preserving cipher: ARIA cipher algorithms
(RFC 5794) with ECB and CTR modes
Architecture: x86_64 using:
- AVX512 (Advanced Vector Extensions)
- GFNI (Galois Field New Instructions)
Processes 64 blocks in parallel.
config CRYPTO_CHACHA20_X86_64
tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (SSSE3/AVX2/AVX-512VL)"
depends on X86 && 64BIT

View File

@ -103,6 +103,12 @@ sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o
obj-$(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) += aria-aesni-avx-x86_64.o
aria-aesni-avx-x86_64-y := aria-aesni-avx-asm_64.o aria_aesni_avx_glue.o
obj-$(CONFIG_CRYPTO_ARIA_AESNI_AVX2_X86_64) += aria-aesni-avx2-x86_64.o
aria-aesni-avx2-x86_64-y := aria-aesni-avx2-asm_64.o aria_aesni_avx2_glue.o
obj-$(CONFIG_CRYPTO_ARIA_GFNI_AVX512_X86_64) += aria-gfni-avx512-x86_64.o
aria-gfni-avx512-x86_64-y := aria-gfni-avx512-asm_64.o aria_gfni_avx512_glue.o
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $< > $@
$(obj)/%.S: $(src)/%.pl FORCE

View File

@ -8,13 +8,9 @@
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#include <asm/asm-offsets.h>
#include <asm/frame.h>
/* struct aria_ctx: */
#define enc_key 0
#define dec_key 272
#define rounds 544
/* register macros */
#define CTX %rdi
@ -271,34 +267,44 @@
#define aria_ark_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
t0, rk, idx, round) \
t0, t1, t2, rk, \
idx, round) \
/* AddRoundKey */ \
vpbroadcastb ((round * 16) + idx + 3)(rk), t0; \
vpxor t0, x0, x0; \
vpbroadcastb ((round * 16) + idx + 2)(rk), t0; \
vpxor t0, x1, x1; \
vpbroadcastb ((round * 16) + idx + 1)(rk), t0; \
vpxor t0, x2, x2; \
vpbroadcastb ((round * 16) + idx + 0)(rk), t0; \
vpxor t0, x3, x3; \
vpbroadcastb ((round * 16) + idx + 7)(rk), t0; \
vpxor t0, x4, x4; \
vpbroadcastb ((round * 16) + idx + 6)(rk), t0; \
vpxor t0, x5, x5; \
vpbroadcastb ((round * 16) + idx + 5)(rk), t0; \
vpxor t0, x6, x6; \
vpbroadcastb ((round * 16) + idx + 4)(rk), t0; \
vpxor t0, x7, x7;
vbroadcastss ((round * 16) + idx + 0)(rk), t0; \
vpsrld $24, t0, t2; \
vpshufb t1, t2, t2; \
vpxor t2, x0, x0; \
vpsrld $16, t0, t2; \
vpshufb t1, t2, t2; \
vpxor t2, x1, x1; \
vpsrld $8, t0, t2; \
vpshufb t1, t2, t2; \
vpxor t2, x2, x2; \
vpshufb t1, t0, t2; \
vpxor t2, x3, x3; \
vbroadcastss ((round * 16) + idx + 4)(rk), t0; \
vpsrld $24, t0, t2; \
vpshufb t1, t2, t2; \
vpxor t2, x4, x4; \
vpsrld $16, t0, t2; \
vpshufb t1, t2, t2; \
vpxor t2, x5, x5; \
vpsrld $8, t0, t2; \
vpshufb t1, t2, t2; \
vpxor t2, x6, x6; \
vpshufb t1, t0, t2; \
vpxor t2, x7, x7;
#ifdef CONFIG_AS_GFNI
#define aria_sbox_8way_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
t0, t1, t2, t3, \
t4, t5, t6, t7) \
vpbroadcastq .Ltf_s2_bitmatrix, t0; \
vpbroadcastq .Ltf_inv_bitmatrix, t1; \
vpbroadcastq .Ltf_id_bitmatrix, t2; \
vpbroadcastq .Ltf_aff_bitmatrix, t3; \
vpbroadcastq .Ltf_x2_bitmatrix, t4; \
vmovdqa .Ltf_s2_bitmatrix, t0; \
vmovdqa .Ltf_inv_bitmatrix, t1; \
vmovdqa .Ltf_id_bitmatrix, t2; \
vmovdqa .Ltf_aff_bitmatrix, t3; \
vmovdqa .Ltf_x2_bitmatrix, t4; \
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
@ -312,14 +318,15 @@
vgf2p8affineinvqb $0, t2, x3, x3; \
vgf2p8affineinvqb $0, t2, x7, x7
#endif /* CONFIG_AS_GFNI */
#define aria_sbox_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
t0, t1, t2, t3, \
t4, t5, t6, t7) \
vpxor t7, t7, t7; \
vmovdqa .Linv_shift_row, t0; \
vmovdqa .Lshift_row, t1; \
vpbroadcastd .L0f0f0f0f, t6; \
vbroadcastss .L0f0f0f0f, t6; \
vmovdqa .Ltf_lo__inv_aff__and__s2, t2; \
vmovdqa .Ltf_hi__inv_aff__and__s2, t3; \
vmovdqa .Ltf_lo__x2__and__fwd_aff, t4; \
@ -414,8 +421,9 @@
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, rk, round) \
vpxor y7, y7, y7; \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 8, round); \
y0, y7, y2, rk, 8, round); \
\
aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \
y0, y1, y2, y3, y4, y5, y6, y7); \
@ -430,7 +438,7 @@
x4, x5, x6, x7, \
mem_tmp, 0); \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 0, round); \
y0, y7, y2, rk, 0, round); \
\
aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \
y0, y1, y2, y3, y4, y5, y6, y7); \
@ -468,8 +476,9 @@
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, rk, round) \
vpxor y7, y7, y7; \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 8, round); \
y0, y7, y2, rk, 8, round); \
\
aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, y1, y2, y3, y4, y5, y6, y7); \
@ -484,7 +493,7 @@
x4, x5, x6, x7, \
mem_tmp, 0); \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 0, round); \
y0, y7, y2, rk, 0, round); \
\
aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, y1, y2, y3, y4, y5, y6, y7); \
@ -522,14 +531,15 @@
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, rk, round, last_round) \
vpxor y7, y7, y7; \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 8, round); \
y0, y7, y2, rk, 8, round); \
\
aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \
y0, y1, y2, y3, y4, y5, y6, y7); \
\
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 8, last_round); \
y0, y7, y2, rk, 8, last_round); \
\
aria_store_state_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
@ -539,25 +549,27 @@
x4, x5, x6, x7, \
mem_tmp, 0); \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 0, round); \
y0, y7, y2, rk, 0, round); \
\
aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \
y0, y1, y2, y3, y4, y5, y6, y7); \
\
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 0, last_round); \
y0, y7, y2, rk, 0, last_round); \
\
aria_load_state_8way(y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, 8);
#ifdef CONFIG_AS_GFNI
#define aria_fe_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, rk, round) \
vpxor y7, y7, y7; \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 8, round); \
y0, y7, y2, rk, 8, round); \
\
aria_sbox_8way_gfni(x2, x3, x0, x1, \
x6, x7, x4, x5, \
@ -574,7 +586,7 @@
x4, x5, x6, x7, \
mem_tmp, 0); \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 0, round); \
y0, y7, y2, rk, 0, round); \
\
aria_sbox_8way_gfni(x2, x3, x0, x1, \
x6, x7, x4, x5, \
@ -614,8 +626,9 @@
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, rk, round) \
vpxor y7, y7, y7; \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 8, round); \
y0, y7, y2, rk, 8, round); \
\
aria_sbox_8way_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
@ -632,7 +645,7 @@
x4, x5, x6, x7, \
mem_tmp, 0); \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 0, round); \
y0, y7, y2, rk, 0, round); \
\
aria_sbox_8way_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
@ -672,8 +685,9 @@
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, rk, round, last_round) \
vpxor y7, y7, y7; \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 8, round); \
y0, y7, y2, rk, 8, round); \
\
aria_sbox_8way_gfni(x2, x3, x0, x1, \
x6, x7, x4, x5, \
@ -681,7 +695,7 @@
y4, y5, y6, y7); \
\
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 8, last_round); \
y0, y7, y2, rk, 8, last_round); \
\
aria_store_state_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
@ -691,7 +705,7 @@
x4, x5, x6, x7, \
mem_tmp, 0); \
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 0, round); \
y0, y7, y2, rk, 0, round); \
\
aria_sbox_8way_gfni(x2, x3, x0, x1, \
x6, x7, x4, x5, \
@ -699,12 +713,14 @@
y4, y5, y6, y7); \
\
aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, rk, 0, last_round); \
y0, y7, y2, rk, 0, last_round); \
\
aria_load_state_8way(y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, 8);
#endif /* CONFIG_AS_GFNI */
/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
.section .rodata.cst16, "aM", @progbits, 16
.align 16
@ -756,6 +772,7 @@
.Ltf_hi__x2__and__fwd_aff:
.octa 0x3F893781E95FE1576CDA64D2BA0CB204
#ifdef CONFIG_AS_GFNI
.section .rodata.cst8, "aM", @progbits, 8
.align 8
/* AES affine: */
@ -769,6 +786,14 @@
BV8(0, 1, 1, 1, 1, 1, 0, 0),
BV8(0, 0, 1, 1, 1, 1, 1, 0),
BV8(0, 0, 0, 1, 1, 1, 1, 1))
.quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1),
BV8(1, 1, 0, 0, 0, 1, 1, 1),
BV8(1, 1, 1, 0, 0, 0, 1, 1),
BV8(1, 1, 1, 1, 0, 0, 0, 1),
BV8(1, 1, 1, 1, 1, 0, 0, 0),
BV8(0, 1, 1, 1, 1, 1, 0, 0),
BV8(0, 0, 1, 1, 1, 1, 1, 0),
BV8(0, 0, 0, 1, 1, 1, 1, 1))
/* AES inverse affine: */
#define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0)
@ -781,6 +806,14 @@
BV8(0, 0, 1, 0, 1, 0, 0, 1),
BV8(1, 0, 0, 1, 0, 1, 0, 0),
BV8(0, 1, 0, 0, 1, 0, 1, 0))
.quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1),
BV8(1, 0, 0, 1, 0, 0, 1, 0),
BV8(0, 1, 0, 0, 1, 0, 0, 1),
BV8(1, 0, 1, 0, 0, 1, 0, 0),
BV8(0, 1, 0, 1, 0, 0, 1, 0),
BV8(0, 0, 1, 0, 1, 0, 0, 1),
BV8(1, 0, 0, 1, 0, 1, 0, 0),
BV8(0, 1, 0, 0, 1, 0, 1, 0))
/* S2: */
#define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1)
@ -793,6 +826,14 @@
BV8(1, 1, 0, 0, 1, 1, 1, 0),
BV8(0, 1, 1, 0, 0, 0, 1, 1),
BV8(1, 1, 1, 1, 0, 1, 1, 0))
.quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1),
BV8(0, 0, 1, 1, 1, 1, 1, 1),
BV8(1, 1, 1, 0, 1, 1, 0, 1),
BV8(1, 1, 0, 0, 0, 0, 1, 1),
BV8(0, 1, 0, 0, 0, 0, 1, 1),
BV8(1, 1, 0, 0, 1, 1, 1, 0),
BV8(0, 1, 1, 0, 0, 0, 1, 1),
BV8(1, 1, 1, 1, 0, 1, 1, 0))
/* X2: */
#define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0)
@ -805,6 +846,14 @@
BV8(0, 1, 1, 0, 1, 0, 1, 1),
BV8(1, 0, 1, 1, 1, 1, 0, 1),
BV8(1, 0, 0, 1, 0, 0, 1, 1))
.quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0),
BV8(0, 0, 1, 0, 0, 1, 1, 0),
BV8(0, 0, 0, 0, 1, 0, 1, 0),
BV8(1, 1, 1, 0, 0, 0, 1, 1),
BV8(1, 1, 1, 0, 1, 1, 0, 0),
BV8(0, 1, 1, 0, 1, 0, 1, 1),
BV8(1, 0, 1, 1, 1, 1, 0, 1),
BV8(1, 0, 0, 1, 0, 0, 1, 1))
/* Identity matrix: */
.Ltf_id_bitmatrix:
@ -816,6 +865,15 @@
BV8(0, 0, 0, 0, 0, 1, 0, 0),
BV8(0, 0, 0, 0, 0, 0, 1, 0),
BV8(0, 0, 0, 0, 0, 0, 0, 1))
.quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0),
BV8(0, 1, 0, 0, 0, 0, 0, 0),
BV8(0, 0, 1, 0, 0, 0, 0, 0),
BV8(0, 0, 0, 1, 0, 0, 0, 0),
BV8(0, 0, 0, 0, 1, 0, 0, 0),
BV8(0, 0, 0, 0, 0, 1, 0, 0),
BV8(0, 0, 0, 0, 0, 0, 1, 0),
BV8(0, 0, 0, 0, 0, 0, 0, 1))
#endif /* CONFIG_AS_GFNI */
/* 4-bit mask */
.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
@ -874,7 +932,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way)
aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15,
%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%rax, %r9, 10);
cmpl $12, rounds(CTX);
cmpl $12, ARIA_CTX_rounds(CTX);
jne .Laria_192;
aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@ -887,7 +945,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way)
aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15,
%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%rax, %r9, 12);
cmpl $14, rounds(CTX);
cmpl $14, ARIA_CTX_rounds(CTX);
jne .Laria_256;
aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@ -923,7 +981,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_encrypt_16way)
FRAME_BEGIN
leaq enc_key(CTX), %r9;
leaq ARIA_CTX_enc_key(CTX), %r9;
inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@ -948,7 +1006,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_decrypt_16way)
FRAME_BEGIN
leaq dec_key(CTX), %r9;
leaq ARIA_CTX_dec_key(CTX), %r9;
inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@ -1056,7 +1114,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way)
leaq (%rdx), %r11;
leaq (%rcx), %rsi;
leaq (%rcx), %rdx;
leaq enc_key(CTX), %r9;
leaq ARIA_CTX_enc_key(CTX), %r9;
call __aria_aesni_avx_crypt_16way;
@ -1084,6 +1142,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way)
RET;
SYM_FUNC_END(aria_aesni_avx_ctr_crypt_16way)
#ifdef CONFIG_AS_GFNI
SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way)
/* input:
* %r9: rk
@ -1157,7 +1216,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way)
%xmm0, %xmm1, %xmm2, %xmm3,
%xmm4, %xmm5, %xmm6, %xmm7,
%rax, %r9, 10);
cmpl $12, rounds(CTX);
cmpl $12, ARIA_CTX_rounds(CTX);
jne .Laria_gfni_192;
aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@ -1174,7 +1233,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way)
%xmm0, %xmm1, %xmm2, %xmm3,
%xmm4, %xmm5, %xmm6, %xmm7,
%rax, %r9, 12);
cmpl $14, rounds(CTX);
cmpl $14, ARIA_CTX_rounds(CTX);
jne .Laria_gfni_256;
aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2,
%xmm4, %xmm5, %xmm6, %xmm7,
@ -1218,7 +1277,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_encrypt_16way)
FRAME_BEGIN
leaq enc_key(CTX), %r9;
leaq ARIA_CTX_enc_key(CTX), %r9;
inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@ -1243,7 +1302,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_decrypt_16way)
FRAME_BEGIN
leaq dec_key(CTX), %r9;
leaq ARIA_CTX_dec_key(CTX), %r9;
inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
@ -1275,7 +1334,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way)
leaq (%rdx), %r11;
leaq (%rcx), %rsi;
leaq (%rcx), %rdx;
leaq enc_key(CTX), %r9;
leaq ARIA_CTX_enc_key(CTX), %r9;
call __aria_aesni_avx_gfni_crypt_16way;
@ -1302,3 +1361,4 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way)
FRAME_END
RET;
SYM_FUNC_END(aria_aesni_avx_gfni_ctr_crypt_16way)
#endif /* CONFIG_AS_GFNI */

File diff suppressed because it is too large Load Diff

View File

@ -5,12 +5,58 @@
#include <linux/types.h>
#define ARIA_AESNI_PARALLEL_BLOCKS 16
#define ARIA_AESNI_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * 16)
#define ARIA_AESNI_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_AESNI_PARALLEL_BLOCKS)
#define ARIA_AESNI_AVX2_PARALLEL_BLOCKS 32
#define ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_AESNI_AVX2_PARALLEL_BLOCKS)
#define ARIA_GFNI_AVX512_PARALLEL_BLOCKS 64
#define ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_GFNI_AVX512_PARALLEL_BLOCKS)
asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
asmlinkage void aria_aesni_avx2_encrypt_32way(const void *ctx, u8 *dst,
const u8 *src);
asmlinkage void aria_aesni_avx2_decrypt_32way(const void *ctx, u8 *dst,
const u8 *src);
asmlinkage void aria_aesni_avx2_ctr_crypt_32way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
asmlinkage void aria_aesni_avx2_gfni_encrypt_32way(const void *ctx, u8 *dst,
const u8 *src);
asmlinkage void aria_aesni_avx2_gfni_decrypt_32way(const void *ctx, u8 *dst,
const u8 *src);
asmlinkage void aria_aesni_avx2_gfni_ctr_crypt_32way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
struct aria_avx_ops {
void (*aria_encrypt_16way)(const void *ctx, u8 *dst, const u8 *src);
void (*aria_decrypt_16way)(const void *ctx, u8 *dst, const u8 *src);
void (*aria_ctr_crypt_16way)(const void *ctx, u8 *dst, const u8 *src,
u8 *keystream, u8 *iv);
void (*aria_encrypt_32way)(const void *ctx, u8 *dst, const u8 *src);
void (*aria_decrypt_32way)(const void *ctx, u8 *dst, const u8 *src);
void (*aria_ctr_crypt_32way)(const void *ctx, u8 *dst, const u8 *src,
u8 *keystream, u8 *iv);
void (*aria_encrypt_64way)(const void *ctx, u8 *dst, const u8 *src);
void (*aria_decrypt_64way)(const void *ctx, u8 *dst, const u8 *src);
void (*aria_ctr_crypt_64way)(const void *ctx, u8 *dst, const u8 *src,
u8 *keystream, u8 *iv);
};
#endif

View File

@ -0,0 +1,971 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* ARIA Cipher 64-way parallel algorithm (AVX512)
*
* Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com>
*
*/
#include <linux/linkage.h>
#include <asm/frame.h>
#include <asm/asm-offsets.h>
#include <linux/cfi_types.h>
/* register macros */
#define CTX %rdi
#define BV8(a0, a1, a2, a3, a4, a5, a6, a7) \
( (((a0) & 1) << 0) | \
(((a1) & 1) << 1) | \
(((a2) & 1) << 2) | \
(((a3) & 1) << 3) | \
(((a4) & 1) << 4) | \
(((a5) & 1) << 5) | \
(((a6) & 1) << 6) | \
(((a7) & 1) << 7) )
#define BM8X8(l0, l1, l2, l3, l4, l5, l6, l7) \
( ((l7) << (0 * 8)) | \
((l6) << (1 * 8)) | \
((l5) << (2 * 8)) | \
((l4) << (3 * 8)) | \
((l3) << (4 * 8)) | \
((l2) << (5 * 8)) | \
((l1) << (6 * 8)) | \
((l0) << (7 * 8)) )
#define add_le128(out, in, lo_counter, hi_counter1) \
vpaddq lo_counter, in, out; \
vpcmpuq $1, lo_counter, out, %k1; \
kaddb %k1, %k1, %k1; \
vpaddq hi_counter1, out, out{%k1};
#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
vpandq x, mask4bit, tmp0; \
vpandqn x, mask4bit, x; \
vpsrld $4, x, x; \
\
vpshufb tmp0, lo_t, tmp0; \
vpshufb x, hi_t, x; \
vpxorq tmp0, x, x;
#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
vpunpckhdq x1, x0, t2; \
vpunpckldq x1, x0, x0; \
\
vpunpckldq x3, x2, t1; \
vpunpckhdq x3, x2, x2; \
\
vpunpckhqdq t1, x0, x1; \
vpunpcklqdq t1, x0, x0; \
\
vpunpckhqdq x2, t2, x3; \
vpunpcklqdq x2, t2, x2;
#define byteslice_16x16b(a0, b0, c0, d0, \
a1, b1, c1, d1, \
a2, b2, c2, d2, \
a3, b3, c3, d3, \
st0, st1) \
vmovdqu64 d2, st0; \
vmovdqu64 d3, st1; \
transpose_4x4(a0, a1, a2, a3, d2, d3); \
transpose_4x4(b0, b1, b2, b3, d2, d3); \
vmovdqu64 st0, d2; \
vmovdqu64 st1, d3; \
\
vmovdqu64 a0, st0; \
vmovdqu64 a1, st1; \
transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \
\
vbroadcasti64x2 .Lshufb_16x16b, a0; \
vmovdqu64 st1, a1; \
vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \
vpshufb a0, b0, b0; \
vpshufb a0, b1, b1; \
vpshufb a0, b2, b2; \
vpshufb a0, b3, b3; \
vpshufb a0, a1, a1; \
vpshufb a0, c0, c0; \
vpshufb a0, c1, c1; \
vpshufb a0, c2, c2; \
vpshufb a0, c3, c3; \
vpshufb a0, d0, d0; \
vpshufb a0, d1, d1; \
vpshufb a0, d2, d2; \
vpshufb a0, d3, d3; \
vmovdqu64 d3, st1; \
vmovdqu64 st0, d3; \
vpshufb a0, d3, a0; \
vmovdqu64 d2, st0; \
\
transpose_4x4(a0, b0, c0, d0, d2, d3); \
transpose_4x4(a1, b1, c1, d1, d2, d3); \
vmovdqu64 st0, d2; \
vmovdqu64 st1, d3; \
\
vmovdqu64 b0, st0; \
vmovdqu64 b1, st1; \
transpose_4x4(a2, b2, c2, d2, b0, b1); \
transpose_4x4(a3, b3, c3, d3, b0, b1); \
vmovdqu64 st0, b0; \
vmovdqu64 st1, b1; \
/* does not adjust output bytes inside vectors */
#define debyteslice_16x16b(a0, b0, c0, d0, \
a1, b1, c1, d1, \
a2, b2, c2, d2, \
a3, b3, c3, d3, \
st0, st1) \
vmovdqu64 d2, st0; \
vmovdqu64 d3, st1; \
transpose_4x4(a0, a1, a2, a3, d2, d3); \
transpose_4x4(b0, b1, b2, b3, d2, d3); \
vmovdqu64 st0, d2; \
vmovdqu64 st1, d3; \
\
vmovdqu64 a0, st0; \
vmovdqu64 a1, st1; \
transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \
\
vbroadcasti64x2 .Lshufb_16x16b, a0; \
vmovdqu64 st1, a1; \
vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \
vpshufb a0, b0, b0; \
vpshufb a0, b1, b1; \
vpshufb a0, b2, b2; \
vpshufb a0, b3, b3; \
vpshufb a0, a1, a1; \
vpshufb a0, c0, c0; \
vpshufb a0, c1, c1; \
vpshufb a0, c2, c2; \
vpshufb a0, c3, c3; \
vpshufb a0, d0, d0; \
vpshufb a0, d1, d1; \
vpshufb a0, d2, d2; \
vpshufb a0, d3, d3; \
vmovdqu64 d3, st1; \
vmovdqu64 st0, d3; \
vpshufb a0, d3, a0; \
vmovdqu64 d2, st0; \
\
transpose_4x4(c0, d0, a0, b0, d2, d3); \
transpose_4x4(c1, d1, a1, b1, d2, d3); \
vmovdqu64 st0, d2; \
vmovdqu64 st1, d3; \
\
vmovdqu64 b0, st0; \
vmovdqu64 b1, st1; \
transpose_4x4(c2, d2, a2, b2, b0, b1); \
transpose_4x4(c3, d3, a3, b3, b0, b1); \
vmovdqu64 st0, b0; \
vmovdqu64 st1, b1; \
/* does not adjust output bytes inside vectors */
/* load blocks to registers and apply pre-whitening */
#define inpack16_pre(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
rio) \
vmovdqu64 (0 * 64)(rio), x0; \
vmovdqu64 (1 * 64)(rio), x1; \
vmovdqu64 (2 * 64)(rio), x2; \
vmovdqu64 (3 * 64)(rio), x3; \
vmovdqu64 (4 * 64)(rio), x4; \
vmovdqu64 (5 * 64)(rio), x5; \
vmovdqu64 (6 * 64)(rio), x6; \
vmovdqu64 (7 * 64)(rio), x7; \
vmovdqu64 (8 * 64)(rio), y0; \
vmovdqu64 (9 * 64)(rio), y1; \
vmovdqu64 (10 * 64)(rio), y2; \
vmovdqu64 (11 * 64)(rio), y3; \
vmovdqu64 (12 * 64)(rio), y4; \
vmovdqu64 (13 * 64)(rio), y5; \
vmovdqu64 (14 * 64)(rio), y6; \
vmovdqu64 (15 * 64)(rio), y7;
/* byteslice pre-whitened blocks and store to temporary memory */
#define inpack16_post(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_ab, mem_cd) \
byteslice_16x16b(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
(mem_ab), (mem_cd)); \
\
vmovdqu64 x0, 0 * 64(mem_ab); \
vmovdqu64 x1, 1 * 64(mem_ab); \
vmovdqu64 x2, 2 * 64(mem_ab); \
vmovdqu64 x3, 3 * 64(mem_ab); \
vmovdqu64 x4, 4 * 64(mem_ab); \
vmovdqu64 x5, 5 * 64(mem_ab); \
vmovdqu64 x6, 6 * 64(mem_ab); \
vmovdqu64 x7, 7 * 64(mem_ab); \
vmovdqu64 y0, 0 * 64(mem_cd); \
vmovdqu64 y1, 1 * 64(mem_cd); \
vmovdqu64 y2, 2 * 64(mem_cd); \
vmovdqu64 y3, 3 * 64(mem_cd); \
vmovdqu64 y4, 4 * 64(mem_cd); \
vmovdqu64 y5, 5 * 64(mem_cd); \
vmovdqu64 y6, 6 * 64(mem_cd); \
vmovdqu64 y7, 7 * 64(mem_cd);
#define write_output(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem) \
vmovdqu64 x0, 0 * 64(mem); \
vmovdqu64 x1, 1 * 64(mem); \
vmovdqu64 x2, 2 * 64(mem); \
vmovdqu64 x3, 3 * 64(mem); \
vmovdqu64 x4, 4 * 64(mem); \
vmovdqu64 x5, 5 * 64(mem); \
vmovdqu64 x6, 6 * 64(mem); \
vmovdqu64 x7, 7 * 64(mem); \
vmovdqu64 y0, 8 * 64(mem); \
vmovdqu64 y1, 9 * 64(mem); \
vmovdqu64 y2, 10 * 64(mem); \
vmovdqu64 y3, 11 * 64(mem); \
vmovdqu64 y4, 12 * 64(mem); \
vmovdqu64 y5, 13 * 64(mem); \
vmovdqu64 y6, 14 * 64(mem); \
vmovdqu64 y7, 15 * 64(mem); \
#define aria_store_state_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
mem_tmp, idx) \
vmovdqu64 x0, ((idx + 0) * 64)(mem_tmp); \
vmovdqu64 x1, ((idx + 1) * 64)(mem_tmp); \
vmovdqu64 x2, ((idx + 2) * 64)(mem_tmp); \
vmovdqu64 x3, ((idx + 3) * 64)(mem_tmp); \
vmovdqu64 x4, ((idx + 4) * 64)(mem_tmp); \
vmovdqu64 x5, ((idx + 5) * 64)(mem_tmp); \
vmovdqu64 x6, ((idx + 6) * 64)(mem_tmp); \
vmovdqu64 x7, ((idx + 7) * 64)(mem_tmp);
#define aria_load_state_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
mem_tmp, idx) \
vmovdqu64 ((idx + 0) * 64)(mem_tmp), x0; \
vmovdqu64 ((idx + 1) * 64)(mem_tmp), x1; \
vmovdqu64 ((idx + 2) * 64)(mem_tmp), x2; \
vmovdqu64 ((idx + 3) * 64)(mem_tmp), x3; \
vmovdqu64 ((idx + 4) * 64)(mem_tmp), x4; \
vmovdqu64 ((idx + 5) * 64)(mem_tmp), x5; \
vmovdqu64 ((idx + 6) * 64)(mem_tmp), x6; \
vmovdqu64 ((idx + 7) * 64)(mem_tmp), x7;
#define aria_ark_16way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
t0, rk, round) \
/* AddRoundKey */ \
vpbroadcastb ((round * 16) + 3)(rk), t0; \
vpxorq t0, x0, x0; \
vpbroadcastb ((round * 16) + 2)(rk), t0; \
vpxorq t0, x1, x1; \
vpbroadcastb ((round * 16) + 1)(rk), t0; \
vpxorq t0, x2, x2; \
vpbroadcastb ((round * 16) + 0)(rk), t0; \
vpxorq t0, x3, x3; \
vpbroadcastb ((round * 16) + 7)(rk), t0; \
vpxorq t0, x4, x4; \
vpbroadcastb ((round * 16) + 6)(rk), t0; \
vpxorq t0, x5, x5; \
vpbroadcastb ((round * 16) + 5)(rk), t0; \
vpxorq t0, x6, x6; \
vpbroadcastb ((round * 16) + 4)(rk), t0; \
vpxorq t0, x7, x7; \
vpbroadcastb ((round * 16) + 11)(rk), t0; \
vpxorq t0, y0, y0; \
vpbroadcastb ((round * 16) + 10)(rk), t0; \
vpxorq t0, y1, y1; \
vpbroadcastb ((round * 16) + 9)(rk), t0; \
vpxorq t0, y2, y2; \
vpbroadcastb ((round * 16) + 8)(rk), t0; \
vpxorq t0, y3, y3; \
vpbroadcastb ((round * 16) + 15)(rk), t0; \
vpxorq t0, y4, y4; \
vpbroadcastb ((round * 16) + 14)(rk), t0; \
vpxorq t0, y5, y5; \
vpbroadcastb ((round * 16) + 13)(rk), t0; \
vpxorq t0, y6, y6; \
vpbroadcastb ((round * 16) + 12)(rk), t0; \
vpxorq t0, y7, y7;
#define aria_sbox_8way_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
t0, t1, t2, t3, \
t4, t5, t6, t7) \
vpbroadcastq .Ltf_s2_bitmatrix, t0; \
vpbroadcastq .Ltf_inv_bitmatrix, t1; \
vpbroadcastq .Ltf_id_bitmatrix, t2; \
vpbroadcastq .Ltf_aff_bitmatrix, t3; \
vpbroadcastq .Ltf_x2_bitmatrix, t4; \
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \
vgf2p8affineinvqb $0, t2, x2, x2; \
vgf2p8affineinvqb $0, t2, x6, x6; \
vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \
vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \
vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \
vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \
vgf2p8affineinvqb $0, t2, x3, x3; \
vgf2p8affineinvqb $0, t2, x7, x7;
#define aria_sbox_16way_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
t0, t1, t2, t3, \
t4, t5, t6, t7) \
vpbroadcastq .Ltf_s2_bitmatrix, t0; \
vpbroadcastq .Ltf_inv_bitmatrix, t1; \
vpbroadcastq .Ltf_id_bitmatrix, t2; \
vpbroadcastq .Ltf_aff_bitmatrix, t3; \
vpbroadcastq .Ltf_x2_bitmatrix, t4; \
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \
vgf2p8affineinvqb $0, t2, x2, x2; \
vgf2p8affineinvqb $0, t2, x6, x6; \
vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \
vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \
vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \
vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \
vgf2p8affineinvqb $0, t2, x3, x3; \
vgf2p8affineinvqb $0, t2, x7, x7; \
vgf2p8affineinvqb $(tf_s2_const), t0, y1, y1; \
vgf2p8affineinvqb $(tf_s2_const), t0, y5, y5; \
vgf2p8affineqb $(tf_inv_const), t1, y2, y2; \
vgf2p8affineqb $(tf_inv_const), t1, y6, y6; \
vgf2p8affineinvqb $0, t2, y2, y2; \
vgf2p8affineinvqb $0, t2, y6, y6; \
vgf2p8affineinvqb $(tf_aff_const), t3, y0, y0; \
vgf2p8affineinvqb $(tf_aff_const), t3, y4, y4; \
vgf2p8affineqb $(tf_x2_const), t4, y3, y3; \
vgf2p8affineqb $(tf_x2_const), t4, y7, y7; \
vgf2p8affineinvqb $0, t2, y3, y3; \
vgf2p8affineinvqb $0, t2, y7, y7;
#define aria_diff_m(x0, x1, x2, x3, \
t0, t1, t2, t3) \
/* T = rotr32(X, 8); */ \
/* X ^= T */ \
vpxorq x0, x3, t0; \
vpxorq x1, x0, t1; \
vpxorq x2, x1, t2; \
vpxorq x3, x2, t3; \
/* X = T ^ rotr(X, 16); */ \
vpxorq t2, x0, x0; \
vpxorq x1, t3, t3; \
vpxorq t0, x2, x2; \
vpxorq t1, x3, x1; \
vmovdqu64 t3, x3;
#define aria_diff_word(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7) \
/* t1 ^= t2; */ \
vpxorq y0, x4, x4; \
vpxorq y1, x5, x5; \
vpxorq y2, x6, x6; \
vpxorq y3, x7, x7; \
\
/* t2 ^= t3; */ \
vpxorq y4, y0, y0; \
vpxorq y5, y1, y1; \
vpxorq y6, y2, y2; \
vpxorq y7, y3, y3; \
\
/* t0 ^= t1; */ \
vpxorq x4, x0, x0; \
vpxorq x5, x1, x1; \
vpxorq x6, x2, x2; \
vpxorq x7, x3, x3; \
\
/* t3 ^= t1; */ \
vpxorq x4, y4, y4; \
vpxorq x5, y5, y5; \
vpxorq x6, y6, y6; \
vpxorq x7, y7, y7; \
\
/* t2 ^= t0; */ \
vpxorq x0, y0, y0; \
vpxorq x1, y1, y1; \
vpxorq x2, y2, y2; \
vpxorq x3, y3, y3; \
\
/* t1 ^= t2; */ \
vpxorq y0, x4, x4; \
vpxorq y1, x5, x5; \
vpxorq y2, x6, x6; \
vpxorq y3, x7, x7;
#define aria_fe_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
z0, z1, z2, z3, \
z4, z5, z6, z7, \
mem_tmp, rk, round) \
aria_ark_16way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, y1, y2, y3, y4, y5, y6, y7, \
z0, rk, round); \
\
aria_sbox_16way_gfni(x2, x3, x0, x1, \
x6, x7, x4, x5, \
y2, y3, y0, y1, \
y6, y7, y4, y5, \
z0, z1, z2, z3, \
z4, z5, z6, z7); \
\
aria_diff_m(x0, x1, x2, x3, z0, z1, z2, z3); \
aria_diff_m(x4, x5, x6, x7, z0, z1, z2, z3); \
aria_diff_m(y0, y1, y2, y3, z0, z1, z2, z3); \
aria_diff_m(y4, y5, y6, y7, z0, z1, z2, z3); \
aria_diff_word(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7); \
/* aria_diff_byte() \
* T3 = ABCD -> BADC \
* T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \
* T0 = ABCD -> CDAB \
* T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \
* T1 = ABCD -> DCBA \
* T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \
*/ \
aria_diff_word(x2, x3, x0, x1, \
x7, x6, x5, x4, \
y0, y1, y2, y3, \
y5, y4, y7, y6); \
#define aria_fo_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
z0, z1, z2, z3, \
z4, z5, z6, z7, \
mem_tmp, rk, round) \
aria_ark_16way(x0, x1, x2, x3, x4, x5, x6, x7, \
y0, y1, y2, y3, y4, y5, y6, y7, \
z0, rk, round); \
\
aria_sbox_16way_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
z0, z1, z2, z3, \
z4, z5, z6, z7); \
\
aria_diff_m(x0, x1, x2, x3, z0, z1, z2, z3); \
aria_diff_m(x4, x5, x6, x7, z0, z1, z2, z3); \
aria_diff_m(y0, y1, y2, y3, z0, z1, z2, z3); \
aria_diff_m(y4, y5, y6, y7, z0, z1, z2, z3); \
aria_diff_word(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7); \
/* aria_diff_byte() \
* T1 = ABCD -> BADC \
* T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \
* T2 = ABCD -> CDAB \
* T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \
* T3 = ABCD -> DCBA \
* T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \
*/ \
aria_diff_word(x0, x1, x2, x3, \
x5, x4, x7, x6, \
y2, y3, y0, y1, \
y7, y6, y5, y4);
#define aria_ff_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
z0, z1, z2, z3, \
z4, z5, z6, z7, \
mem_tmp, rk, round, last_round) \
aria_ark_16way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
z0, rk, round); \
aria_sbox_16way_gfni(x2, x3, x0, x1, \
x6, x7, x4, x5, \
y2, y3, y0, y1, \
y6, y7, y4, y5, \
z0, z1, z2, z3, \
z4, z5, z6, z7); \
aria_ark_16way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
y0, y1, y2, y3, \
y4, y5, y6, y7, \
z0, rk, last_round);
.section .rodata.cst64, "aM", @progbits, 64
.align 64
.Lcounter0123_lo:
.quad 0, 0
.quad 1, 0
.quad 2, 0
.quad 3, 0
.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32
.align 32
#define SHUFB_BYTES(idx) \
0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
.Lshufb_16x16b:
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
.section .rodata.cst16, "aM", @progbits, 16
.align 16
.Lcounter4444_lo:
.quad 4, 0
.Lcounter8888_lo:
.quad 8, 0
.Lcounter16161616_lo:
.quad 16, 0
.Lcounter1111_hi:
.quad 0, 1
/* For CTR-mode IV byteswap */
.Lbswap128_mask:
.byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
.byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
.section .rodata.cst8, "aM", @progbits, 8
.align 8
/* AES affine: */
#define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0)
.Ltf_aff_bitmatrix:
.quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1),
BV8(1, 1, 0, 0, 0, 1, 1, 1),
BV8(1, 1, 1, 0, 0, 0, 1, 1),
BV8(1, 1, 1, 1, 0, 0, 0, 1),
BV8(1, 1, 1, 1, 1, 0, 0, 0),
BV8(0, 1, 1, 1, 1, 1, 0, 0),
BV8(0, 0, 1, 1, 1, 1, 1, 0),
BV8(0, 0, 0, 1, 1, 1, 1, 1))
/* AES inverse affine: */
#define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0)
.Ltf_inv_bitmatrix:
.quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1),
BV8(1, 0, 0, 1, 0, 0, 1, 0),
BV8(0, 1, 0, 0, 1, 0, 0, 1),
BV8(1, 0, 1, 0, 0, 1, 0, 0),
BV8(0, 1, 0, 1, 0, 0, 1, 0),
BV8(0, 0, 1, 0, 1, 0, 0, 1),
BV8(1, 0, 0, 1, 0, 1, 0, 0),
BV8(0, 1, 0, 0, 1, 0, 1, 0))
/* S2: */
#define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1)
.Ltf_s2_bitmatrix:
.quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1),
BV8(0, 0, 1, 1, 1, 1, 1, 1),
BV8(1, 1, 1, 0, 1, 1, 0, 1),
BV8(1, 1, 0, 0, 0, 0, 1, 1),
BV8(0, 1, 0, 0, 0, 0, 1, 1),
BV8(1, 1, 0, 0, 1, 1, 1, 0),
BV8(0, 1, 1, 0, 0, 0, 1, 1),
BV8(1, 1, 1, 1, 0, 1, 1, 0))
/* X2: */
#define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0)
.Ltf_x2_bitmatrix:
.quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0),
BV8(0, 0, 1, 0, 0, 1, 1, 0),
BV8(0, 0, 0, 0, 1, 0, 1, 0),
BV8(1, 1, 1, 0, 0, 0, 1, 1),
BV8(1, 1, 1, 0, 1, 1, 0, 0),
BV8(0, 1, 1, 0, 1, 0, 1, 1),
BV8(1, 0, 1, 1, 1, 1, 0, 1),
BV8(1, 0, 0, 1, 0, 0, 1, 1))
/* Identity matrix: */
.Ltf_id_bitmatrix:
.quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0),
BV8(0, 1, 0, 0, 0, 0, 0, 0),
BV8(0, 0, 1, 0, 0, 0, 0, 0),
BV8(0, 0, 0, 1, 0, 0, 0, 0),
BV8(0, 0, 0, 0, 1, 0, 0, 0),
BV8(0, 0, 0, 0, 0, 1, 0, 0),
BV8(0, 0, 0, 0, 0, 0, 1, 0),
BV8(0, 0, 0, 0, 0, 0, 0, 1))
.text
SYM_FUNC_START_LOCAL(__aria_gfni_avx512_crypt_64way)
/* input:
* %r9: rk
* %rsi: dst
* %rdx: src
* %zmm0..%zmm15: byte-sliced blocks
*/
FRAME_BEGIN
movq %rsi, %rax;
leaq 8 * 64(%rax), %r8;
inpack16_post(%zmm0, %zmm1, %zmm2, %zmm3,
%zmm4, %zmm5, %zmm6, %zmm7,
%zmm8, %zmm9, %zmm10, %zmm11,
%zmm12, %zmm13, %zmm14,
%zmm15, %rax, %r8);
aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
%zmm4, %zmm5, %zmm6, %zmm7,
%zmm8, %zmm9, %zmm10, %zmm11,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 0);
aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
%zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 1);
aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
%zmm4, %zmm5, %zmm6, %zmm7,
%zmm8, %zmm9, %zmm10, %zmm11,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 2);
aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
%zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 3);
aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
%zmm4, %zmm5, %zmm6, %zmm7,
%zmm8, %zmm9, %zmm10, %zmm11,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 4);
aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
%zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 5);
aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
%zmm4, %zmm5, %zmm6, %zmm7,
%zmm8, %zmm9, %zmm10, %zmm11,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 6);
aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
%zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 7);
aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
%zmm4, %zmm5, %zmm6, %zmm7,
%zmm8, %zmm9, %zmm10, %zmm11,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 8);
aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
%zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 9);
aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
%zmm4, %zmm5, %zmm6, %zmm7,
%zmm8, %zmm9, %zmm10, %zmm11,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 10);
cmpl $12, ARIA_CTX_rounds(CTX);
jne .Laria_gfni_192;
aria_ff_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
%zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 11, 12);
jmp .Laria_gfni_end;
.Laria_gfni_192:
aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
%zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 11);
aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
%zmm4, %zmm5, %zmm6, %zmm7,
%zmm8, %zmm9, %zmm10, %zmm11,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 12);
cmpl $14, ARIA_CTX_rounds(CTX);
jne .Laria_gfni_256;
aria_ff_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
%zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 13, 14);
jmp .Laria_gfni_end;
.Laria_gfni_256:
aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
%zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 13);
aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3,
%zmm4, %zmm5, %zmm6, %zmm7,
%zmm8, %zmm9, %zmm10, %zmm11,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 14);
aria_ff_gfni(%zmm3, %zmm2, %zmm1, %zmm0,
%zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10,
%zmm12, %zmm13, %zmm14, %zmm15,
%zmm24, %zmm25, %zmm26, %zmm27,
%zmm28, %zmm29, %zmm30, %zmm31,
%rax, %r9, 15, 16);
.Laria_gfni_end:
debyteslice_16x16b(%zmm9, %zmm12, %zmm3, %zmm6,
%zmm8, %zmm13, %zmm2, %zmm7,
%zmm11, %zmm14, %zmm1, %zmm4,
%zmm10, %zmm15, %zmm0, %zmm5,
(%rax), (%r8));
FRAME_END
RET;
SYM_FUNC_END(__aria_gfni_avx512_crypt_64way)
SYM_TYPED_FUNC_START(aria_gfni_avx512_encrypt_64way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
leaq ARIA_CTX_enc_key(CTX), %r9;
inpack16_pre(%zmm0, %zmm1, %zmm2, %zmm3, %zmm4, %zmm5, %zmm6, %zmm7,
%zmm8, %zmm9, %zmm10, %zmm11, %zmm12, %zmm13, %zmm14,
%zmm15, %rdx);
call __aria_gfni_avx512_crypt_64way;
write_output(%zmm3, %zmm2, %zmm1, %zmm0, %zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10, %zmm12, %zmm13, %zmm14,
%zmm15, %rax);
FRAME_END
RET;
SYM_FUNC_END(aria_gfni_avx512_encrypt_64way)
SYM_TYPED_FUNC_START(aria_gfni_avx512_decrypt_64way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
leaq ARIA_CTX_dec_key(CTX), %r9;
inpack16_pre(%zmm0, %zmm1, %zmm2, %zmm3, %zmm4, %zmm5, %zmm6, %zmm7,
%zmm8, %zmm9, %zmm10, %zmm11, %zmm12, %zmm13, %zmm14,
%zmm15, %rdx);
call __aria_gfni_avx512_crypt_64way;
write_output(%zmm3, %zmm2, %zmm1, %zmm0, %zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10, %zmm12, %zmm13, %zmm14,
%zmm15, %rax);
FRAME_END
RET;
SYM_FUNC_END(aria_gfni_avx512_decrypt_64way)
SYM_FUNC_START_LOCAL(__aria_gfni_avx512_ctr_gen_keystream_64way)
/* input:
* %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: keystream
* %r8: iv (big endian, 128bit)
*/
FRAME_BEGIN
vbroadcasti64x2 .Lbswap128_mask (%rip), %zmm19;
vmovdqa64 .Lcounter0123_lo (%rip), %zmm21;
vbroadcasti64x2 .Lcounter4444_lo (%rip), %zmm22;
vbroadcasti64x2 .Lcounter8888_lo (%rip), %zmm23;
vbroadcasti64x2 .Lcounter16161616_lo (%rip), %zmm24;
vbroadcasti64x2 .Lcounter1111_hi (%rip), %zmm25;
/* load IV and byteswap */
movq 8(%r8), %r11;
movq (%r8), %r10;
bswapq %r11;
bswapq %r10;
vbroadcasti64x2 (%r8), %zmm20;
vpshufb %zmm19, %zmm20, %zmm20;
/* check need for handling 64-bit overflow and carry */
cmpq $(0xffffffffffffffff - 64), %r11;
ja .Lload_ctr_carry;
/* construct IVs */
vpaddq %zmm21, %zmm20, %zmm0; /* +0:+1:+2:+3 */
vpaddq %zmm22, %zmm0, %zmm1; /* +4:+5:+6:+7 */
vpaddq %zmm23, %zmm0, %zmm2; /* +8:+9:+10:+11 */
vpaddq %zmm23, %zmm1, %zmm3; /* +12:+13:+14:+15 */
vpaddq %zmm24, %zmm0, %zmm4; /* +16... */
vpaddq %zmm24, %zmm1, %zmm5; /* +20... */
vpaddq %zmm24, %zmm2, %zmm6; /* +24... */
vpaddq %zmm24, %zmm3, %zmm7; /* +28... */
vpaddq %zmm24, %zmm4, %zmm8; /* +32... */
vpaddq %zmm24, %zmm5, %zmm9; /* +36... */
vpaddq %zmm24, %zmm6, %zmm10; /* +40... */
vpaddq %zmm24, %zmm7, %zmm11; /* +44... */
vpaddq %zmm24, %zmm8, %zmm12; /* +48... */
vpaddq %zmm24, %zmm9, %zmm13; /* +52... */
vpaddq %zmm24, %zmm10, %zmm14; /* +56... */
vpaddq %zmm24, %zmm11, %zmm15; /* +60... */
jmp .Lload_ctr_done;
.Lload_ctr_carry:
/* construct IVs */
add_le128(%zmm0, %zmm20, %zmm21, %zmm25); /* +0:+1:+2:+3 */
add_le128(%zmm1, %zmm0, %zmm22, %zmm25); /* +4:+5:+6:+7 */
add_le128(%zmm2, %zmm0, %zmm23, %zmm25); /* +8:+9:+10:+11 */
add_le128(%zmm3, %zmm1, %zmm23, %zmm25); /* +12:+13:+14:+15 */
add_le128(%zmm4, %zmm0, %zmm24, %zmm25); /* +16... */
add_le128(%zmm5, %zmm1, %zmm24, %zmm25); /* +20... */
add_le128(%zmm6, %zmm2, %zmm24, %zmm25); /* +24... */
add_le128(%zmm7, %zmm3, %zmm24, %zmm25); /* +28... */
add_le128(%zmm8, %zmm4, %zmm24, %zmm25); /* +32... */
add_le128(%zmm9, %zmm5, %zmm24, %zmm25); /* +36... */
add_le128(%zmm10, %zmm6, %zmm24, %zmm25); /* +40... */
add_le128(%zmm11, %zmm7, %zmm24, %zmm25); /* +44... */
add_le128(%zmm12, %zmm8, %zmm24, %zmm25); /* +48... */
add_le128(%zmm13, %zmm9, %zmm24, %zmm25); /* +52... */
add_le128(%zmm14, %zmm10, %zmm24, %zmm25); /* +56... */
add_le128(%zmm15, %zmm11, %zmm24, %zmm25); /* +60... */
.Lload_ctr_done:
/* Byte-swap IVs and update counter. */
addq $64, %r11;
adcq $0, %r10;
vpshufb %zmm19, %zmm15, %zmm15;
vpshufb %zmm19, %zmm14, %zmm14;
vpshufb %zmm19, %zmm13, %zmm13;
vpshufb %zmm19, %zmm12, %zmm12;
vpshufb %zmm19, %zmm11, %zmm11;
vpshufb %zmm19, %zmm10, %zmm10;
vpshufb %zmm19, %zmm9, %zmm9;
vpshufb %zmm19, %zmm8, %zmm8;
bswapq %r11;
bswapq %r10;
vpshufb %zmm19, %zmm7, %zmm7;
vpshufb %zmm19, %zmm6, %zmm6;
vpshufb %zmm19, %zmm5, %zmm5;
vpshufb %zmm19, %zmm4, %zmm4;
vpshufb %zmm19, %zmm3, %zmm3;
vpshufb %zmm19, %zmm2, %zmm2;
vpshufb %zmm19, %zmm1, %zmm1;
vpshufb %zmm19, %zmm0, %zmm0;
movq %r11, 8(%r8);
movq %r10, (%r8);
FRAME_END
RET;
SYM_FUNC_END(__aria_gfni_avx512_ctr_gen_keystream_64way)
SYM_TYPED_FUNC_START(aria_gfni_avx512_ctr_crypt_64way)
/* input:
* %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: keystream
* %r8: iv (big endian, 128bit)
*/
FRAME_BEGIN
call __aria_gfni_avx512_ctr_gen_keystream_64way
leaq (%rsi), %r10;
leaq (%rdx), %r11;
leaq (%rcx), %rsi;
leaq (%rcx), %rdx;
leaq ARIA_CTX_enc_key(CTX), %r9;
call __aria_gfni_avx512_crypt_64way;
vpxorq (0 * 64)(%r11), %zmm3, %zmm3;
vpxorq (1 * 64)(%r11), %zmm2, %zmm2;
vpxorq (2 * 64)(%r11), %zmm1, %zmm1;
vpxorq (3 * 64)(%r11), %zmm0, %zmm0;
vpxorq (4 * 64)(%r11), %zmm6, %zmm6;
vpxorq (5 * 64)(%r11), %zmm7, %zmm7;
vpxorq (6 * 64)(%r11), %zmm4, %zmm4;
vpxorq (7 * 64)(%r11), %zmm5, %zmm5;
vpxorq (8 * 64)(%r11), %zmm9, %zmm9;
vpxorq (9 * 64)(%r11), %zmm8, %zmm8;
vpxorq (10 * 64)(%r11), %zmm11, %zmm11;
vpxorq (11 * 64)(%r11), %zmm10, %zmm10;
vpxorq (12 * 64)(%r11), %zmm12, %zmm12;
vpxorq (13 * 64)(%r11), %zmm13, %zmm13;
vpxorq (14 * 64)(%r11), %zmm14, %zmm14;
vpxorq (15 * 64)(%r11), %zmm15, %zmm15;
write_output(%zmm3, %zmm2, %zmm1, %zmm0, %zmm6, %zmm7, %zmm4, %zmm5,
%zmm9, %zmm8, %zmm11, %zmm10, %zmm12, %zmm13, %zmm14,
%zmm15, %r10);
FRAME_END
RET;
SYM_FUNC_END(aria_gfni_avx512_ctr_crypt_64way)

View File

@ -0,0 +1,254 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Glue Code for the AVX2/AES-NI/GFNI assembler implementation of the ARIA Cipher
*
* Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com>
*/
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <crypto/aria.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/types.h>
#include "ecb_cbc_helpers.h"
#include "aria-avx.h"
asmlinkage void aria_aesni_avx2_encrypt_32way(const void *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(aria_aesni_avx2_encrypt_32way);
asmlinkage void aria_aesni_avx2_decrypt_32way(const void *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(aria_aesni_avx2_decrypt_32way);
asmlinkage void aria_aesni_avx2_ctr_crypt_32way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
EXPORT_SYMBOL_GPL(aria_aesni_avx2_ctr_crypt_32way);
#ifdef CONFIG_AS_GFNI
asmlinkage void aria_aesni_avx2_gfni_encrypt_32way(const void *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_encrypt_32way);
asmlinkage void aria_aesni_avx2_gfni_decrypt_32way(const void *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_decrypt_32way);
asmlinkage void aria_aesni_avx2_gfni_ctr_crypt_32way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_ctr_crypt_32way);
#endif /* CONFIG_AS_GFNI */
static struct aria_avx_ops aria_ops;
struct aria_avx2_request_ctx {
u8 keystream[ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE];
};
static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey)
{
ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS);
ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_encrypt_32way);
ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_encrypt_16way);
ECB_BLOCK(1, aria_encrypt);
ECB_WALK_END();
}
static int ecb_do_decrypt(struct skcipher_request *req, const u32 *rkey)
{
ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS);
ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_decrypt_32way);
ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_decrypt_16way);
ECB_BLOCK(1, aria_decrypt);
ECB_WALK_END();
}
static int aria_avx2_ecb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
return ecb_do_encrypt(req, ctx->enc_key[0]);
}
static int aria_avx2_ecb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
return ecb_do_decrypt(req, ctx->dec_key[0]);
}
static int aria_avx2_set_key(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
return aria_set_key(&tfm->base, key, keylen);
}
static int aria_avx2_ctr_encrypt(struct skcipher_request *req)
{
struct aria_avx2_request_ctx *req_ctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
while (nbytes >= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE) {
kernel_fpu_begin();
aria_ops.aria_ctr_crypt_32way(ctx, dst, src,
&req_ctx->keystream[0],
walk.iv);
kernel_fpu_end();
dst += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE;
src += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE;
nbytes -= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE;
}
while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) {
kernel_fpu_begin();
aria_ops.aria_ctr_crypt_16way(ctx, dst, src,
&req_ctx->keystream[0],
walk.iv);
kernel_fpu_end();
dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE;
src += ARIA_AESNI_PARALLEL_BLOCK_SIZE;
nbytes -= ARIA_AESNI_PARALLEL_BLOCK_SIZE;
}
while (nbytes >= ARIA_BLOCK_SIZE) {
memcpy(&req_ctx->keystream[0], walk.iv, ARIA_BLOCK_SIZE);
crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
aria_encrypt(ctx, &req_ctx->keystream[0],
&req_ctx->keystream[0]);
crypto_xor_cpy(dst, src, &req_ctx->keystream[0],
ARIA_BLOCK_SIZE);
dst += ARIA_BLOCK_SIZE;
src += ARIA_BLOCK_SIZE;
nbytes -= ARIA_BLOCK_SIZE;
}
if (walk.nbytes == walk.total && nbytes > 0) {
memcpy(&req_ctx->keystream[0], walk.iv,
ARIA_BLOCK_SIZE);
crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
aria_encrypt(ctx, &req_ctx->keystream[0],
&req_ctx->keystream[0]);
crypto_xor_cpy(dst, src, &req_ctx->keystream[0],
nbytes);
dst += nbytes;
src += nbytes;
nbytes = 0;
}
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int aria_avx2_init_tfm(struct crypto_skcipher *tfm)
{
crypto_skcipher_set_reqsize(tfm, sizeof(struct aria_avx2_request_ctx));
return 0;
}
static struct skcipher_alg aria_algs[] = {
{
.base.cra_name = "__ecb(aria)",
.base.cra_driver_name = "__ecb-aria-avx2",
.base.cra_priority = 500,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = ARIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aria_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = ARIA_MIN_KEY_SIZE,
.max_keysize = ARIA_MAX_KEY_SIZE,
.setkey = aria_avx2_set_key,
.encrypt = aria_avx2_ecb_encrypt,
.decrypt = aria_avx2_ecb_decrypt,
}, {
.base.cra_name = "__ctr(aria)",
.base.cra_driver_name = "__ctr-aria-avx2",
.base.cra_priority = 500,
.base.cra_flags = CRYPTO_ALG_INTERNAL |
CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct aria_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = ARIA_MIN_KEY_SIZE,
.max_keysize = ARIA_MAX_KEY_SIZE,
.ivsize = ARIA_BLOCK_SIZE,
.chunksize = ARIA_BLOCK_SIZE,
.setkey = aria_avx2_set_key,
.encrypt = aria_avx2_ctr_encrypt,
.decrypt = aria_avx2_ctr_encrypt,
.init = aria_avx2_init_tfm,
}
};
static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)];
static int __init aria_avx2_init(void)
{
const char *feature_name;
if (!boot_cpu_has(X86_FEATURE_AVX) ||
!boot_cpu_has(X86_FEATURE_AVX2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX2 or AES-NI instructions are not detected.\n");
return -ENODEV;
}
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) {
aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way;
aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way;
aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way;
aria_ops.aria_encrypt_32way = aria_aesni_avx2_gfni_encrypt_32way;
aria_ops.aria_decrypt_32way = aria_aesni_avx2_gfni_decrypt_32way;
aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_gfni_ctr_crypt_32way;
} else {
aria_ops.aria_encrypt_16way = aria_aesni_avx_encrypt_16way;
aria_ops.aria_decrypt_16way = aria_aesni_avx_decrypt_16way;
aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_ctr_crypt_16way;
aria_ops.aria_encrypt_32way = aria_aesni_avx2_encrypt_32way;
aria_ops.aria_decrypt_32way = aria_aesni_avx2_decrypt_32way;
aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_ctr_crypt_32way;
}
return simd_register_skciphers_compat(aria_algs,
ARRAY_SIZE(aria_algs),
aria_simd_algs);
}
static void __exit aria_avx2_exit(void)
{
simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs),
aria_simd_algs);
}
module_init(aria_avx2_init);
module_exit(aria_avx2_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>");
MODULE_DESCRIPTION("ARIA Cipher Algorithm, AVX2/AES-NI/GFNI optimized");
MODULE_ALIAS_CRYPTO("aria");
MODULE_ALIAS_CRYPTO("aria-aesni-avx2");

View File

@ -18,21 +18,33 @@
asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(aria_aesni_avx_encrypt_16way);
asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(aria_aesni_avx_decrypt_16way);
asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
EXPORT_SYMBOL_GPL(aria_aesni_avx_ctr_crypt_16way);
#ifdef CONFIG_AS_GFNI
asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_encrypt_16way);
asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_decrypt_16way);
asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_ctr_crypt_16way);
#endif /* CONFIG_AS_GFNI */
static struct aria_avx_ops aria_ops;
struct aria_avx_request_ctx {
u8 keystream[ARIA_AESNI_PARALLEL_BLOCK_SIZE];
};
static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey)
{
ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS);
@ -73,6 +85,7 @@ static int aria_avx_set_key(struct crypto_skcipher *tfm, const u8 *key,
static int aria_avx_ctr_encrypt(struct skcipher_request *req)
{
struct aria_avx_request_ctx *req_ctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
@ -86,10 +99,9 @@ static int aria_avx_ctr_encrypt(struct skcipher_request *req)
u8 *dst = walk.dst.virt.addr;
while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) {
u8 keystream[ARIA_AESNI_PARALLEL_BLOCK_SIZE];
kernel_fpu_begin();
aria_ops.aria_ctr_crypt_16way(ctx, dst, src, keystream,
aria_ops.aria_ctr_crypt_16way(ctx, dst, src,
&req_ctx->keystream[0],
walk.iv);
kernel_fpu_end();
dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE;
@ -98,28 +110,29 @@ static int aria_avx_ctr_encrypt(struct skcipher_request *req)
}
while (nbytes >= ARIA_BLOCK_SIZE) {
u8 keystream[ARIA_BLOCK_SIZE];
memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE);
memcpy(&req_ctx->keystream[0], walk.iv, ARIA_BLOCK_SIZE);
crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
aria_encrypt(ctx, keystream, keystream);
aria_encrypt(ctx, &req_ctx->keystream[0],
&req_ctx->keystream[0]);
crypto_xor_cpy(dst, src, keystream, ARIA_BLOCK_SIZE);
crypto_xor_cpy(dst, src, &req_ctx->keystream[0],
ARIA_BLOCK_SIZE);
dst += ARIA_BLOCK_SIZE;
src += ARIA_BLOCK_SIZE;
nbytes -= ARIA_BLOCK_SIZE;
}
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[ARIA_BLOCK_SIZE];
memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE);
memcpy(&req_ctx->keystream[0], walk.iv,
ARIA_BLOCK_SIZE);
crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
aria_encrypt(ctx, keystream, keystream);
aria_encrypt(ctx, &req_ctx->keystream[0],
&req_ctx->keystream[0]);
crypto_xor_cpy(dst, src, keystream, nbytes);
crypto_xor_cpy(dst, src, &req_ctx->keystream[0],
nbytes);
dst += nbytes;
src += nbytes;
nbytes = 0;
@ -130,6 +143,13 @@ static int aria_avx_ctr_encrypt(struct skcipher_request *req)
return err;
}
static int aria_avx_init_tfm(struct crypto_skcipher *tfm)
{
crypto_skcipher_set_reqsize(tfm, sizeof(struct aria_avx_request_ctx));
return 0;
}
static struct skcipher_alg aria_algs[] = {
{
.base.cra_name = "__ecb(aria)",
@ -160,6 +180,7 @@ static struct skcipher_alg aria_algs[] = {
.setkey = aria_avx_set_key,
.encrypt = aria_avx_ctr_encrypt,
.decrypt = aria_avx_ctr_encrypt,
.init = aria_avx_init_tfm,
}
};
@ -182,7 +203,7 @@ static int __init aria_avx_init(void)
return -ENODEV;
}
if (boot_cpu_has(X86_FEATURE_GFNI)) {
if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) {
aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way;
aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way;
aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way;

View File

@ -0,0 +1,250 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Glue Code for the AVX512/GFNI assembler implementation of the ARIA Cipher
*
* Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com>
*/
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <crypto/aria.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/types.h>
#include "ecb_cbc_helpers.h"
#include "aria-avx.h"
asmlinkage void aria_gfni_avx512_encrypt_64way(const void *ctx, u8 *dst,
const u8 *src);
asmlinkage void aria_gfni_avx512_decrypt_64way(const void *ctx, u8 *dst,
const u8 *src);
asmlinkage void aria_gfni_avx512_ctr_crypt_64way(const void *ctx, u8 *dst,
const u8 *src,
u8 *keystream, u8 *iv);
static struct aria_avx_ops aria_ops;
struct aria_avx512_request_ctx {
u8 keystream[ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE];
};
static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey)
{
ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS);
ECB_BLOCK(ARIA_GFNI_AVX512_PARALLEL_BLOCKS, aria_ops.aria_encrypt_64way);
ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_encrypt_32way);
ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_encrypt_16way);
ECB_BLOCK(1, aria_encrypt);
ECB_WALK_END();
}
static int ecb_do_decrypt(struct skcipher_request *req, const u32 *rkey)
{
ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS);
ECB_BLOCK(ARIA_GFNI_AVX512_PARALLEL_BLOCKS, aria_ops.aria_decrypt_64way);
ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_decrypt_32way);
ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_decrypt_16way);
ECB_BLOCK(1, aria_decrypt);
ECB_WALK_END();
}
static int aria_avx512_ecb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
return ecb_do_encrypt(req, ctx->enc_key[0]);
}
static int aria_avx512_ecb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
return ecb_do_decrypt(req, ctx->dec_key[0]);
}
static int aria_avx512_set_key(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
return aria_set_key(&tfm->base, key, keylen);
}
static int aria_avx512_ctr_encrypt(struct skcipher_request *req)
{
struct aria_avx512_request_ctx *req_ctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
while (nbytes >= ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE) {
kernel_fpu_begin();
aria_ops.aria_ctr_crypt_64way(ctx, dst, src,
&req_ctx->keystream[0],
walk.iv);
kernel_fpu_end();
dst += ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE;
src += ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE;
nbytes -= ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE;
}
while (nbytes >= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE) {
kernel_fpu_begin();
aria_ops.aria_ctr_crypt_32way(ctx, dst, src,
&req_ctx->keystream[0],
walk.iv);
kernel_fpu_end();
dst += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE;
src += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE;
nbytes -= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE;
}
while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) {
kernel_fpu_begin();
aria_ops.aria_ctr_crypt_16way(ctx, dst, src,
&req_ctx->keystream[0],
walk.iv);
kernel_fpu_end();
dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE;
src += ARIA_AESNI_PARALLEL_BLOCK_SIZE;
nbytes -= ARIA_AESNI_PARALLEL_BLOCK_SIZE;
}
while (nbytes >= ARIA_BLOCK_SIZE) {
memcpy(&req_ctx->keystream[0], walk.iv,
ARIA_BLOCK_SIZE);
crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
aria_encrypt(ctx, &req_ctx->keystream[0],
&req_ctx->keystream[0]);
crypto_xor_cpy(dst, src, &req_ctx->keystream[0],
ARIA_BLOCK_SIZE);
dst += ARIA_BLOCK_SIZE;
src += ARIA_BLOCK_SIZE;
nbytes -= ARIA_BLOCK_SIZE;
}
if (walk.nbytes == walk.total && nbytes > 0) {
memcpy(&req_ctx->keystream[0], walk.iv,
ARIA_BLOCK_SIZE);
crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
aria_encrypt(ctx, &req_ctx->keystream[0],
&req_ctx->keystream[0]);
crypto_xor_cpy(dst, src, &req_ctx->keystream[0],
nbytes);
dst += nbytes;
src += nbytes;
nbytes = 0;
}
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int aria_avx512_init_tfm(struct crypto_skcipher *tfm)
{
crypto_skcipher_set_reqsize(tfm,
sizeof(struct aria_avx512_request_ctx));
return 0;
}
static struct skcipher_alg aria_algs[] = {
{
.base.cra_name = "__ecb(aria)",
.base.cra_driver_name = "__ecb-aria-avx512",
.base.cra_priority = 600,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = ARIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aria_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = ARIA_MIN_KEY_SIZE,
.max_keysize = ARIA_MAX_KEY_SIZE,
.setkey = aria_avx512_set_key,
.encrypt = aria_avx512_ecb_encrypt,
.decrypt = aria_avx512_ecb_decrypt,
}, {
.base.cra_name = "__ctr(aria)",
.base.cra_driver_name = "__ctr-aria-avx512",
.base.cra_priority = 600,
.base.cra_flags = CRYPTO_ALG_INTERNAL |
CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct aria_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = ARIA_MIN_KEY_SIZE,
.max_keysize = ARIA_MAX_KEY_SIZE,
.ivsize = ARIA_BLOCK_SIZE,
.chunksize = ARIA_BLOCK_SIZE,
.setkey = aria_avx512_set_key,
.encrypt = aria_avx512_ctr_encrypt,
.decrypt = aria_avx512_ctr_encrypt,
.init = aria_avx512_init_tfm,
}
};
static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)];
static int __init aria_avx512_init(void)
{
const char *feature_name;
if (!boot_cpu_has(X86_FEATURE_AVX) ||
!boot_cpu_has(X86_FEATURE_AVX2) ||
!boot_cpu_has(X86_FEATURE_AVX512F) ||
!boot_cpu_has(X86_FEATURE_AVX512VL) ||
!boot_cpu_has(X86_FEATURE_GFNI) ||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX512/GFNI instructions are not detected.\n");
return -ENODEV;
}
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
XFEATURE_MASK_AVX512, &feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way;
aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way;
aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way;
aria_ops.aria_encrypt_32way = aria_aesni_avx2_gfni_encrypt_32way;
aria_ops.aria_decrypt_32way = aria_aesni_avx2_gfni_decrypt_32way;
aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_gfni_ctr_crypt_32way;
aria_ops.aria_encrypt_64way = aria_gfni_avx512_encrypt_64way;
aria_ops.aria_decrypt_64way = aria_gfni_avx512_decrypt_64way;
aria_ops.aria_ctr_crypt_64way = aria_gfni_avx512_ctr_crypt_64way;
return simd_register_skciphers_compat(aria_algs,
ARRAY_SIZE(aria_algs),
aria_simd_algs);
}
static void __exit aria_avx512_exit(void)
{
simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs),
aria_simd_algs);
}
module_init(aria_avx512_init);
module_exit(aria_avx512_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>");
MODULE_DESCRIPTION("ARIA Cipher Algorithm, AVX512/GFNI optimized");
MODULE_ALIAS_CRYPTO("aria");
MODULE_ALIAS_CRYPTO("aria-gfni-avx512");

View File

@ -6,7 +6,6 @@
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
.file "blowfish-x86_64-asm.S"
.text
@ -100,16 +99,11 @@
bswapq RX0; \
movq RX0, (RIO);
#define xor_block() \
bswapq RX0; \
xorq RX0, (RIO);
SYM_FUNC_START(__blowfish_enc_blk)
SYM_FUNC_START(blowfish_enc_blk)
/* input:
* %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
movq %r12, %r11;
@ -130,19 +124,13 @@ SYM_FUNC_START(__blowfish_enc_blk)
add_roundkey_enc(16);
movq %r11, %r12;
movq %r10, RIO;
test %cl, %cl;
jnz .L__enc_xor;
write_block();
RET;
.L__enc_xor:
xor_block();
RET;
SYM_FUNC_END(__blowfish_enc_blk)
SYM_FUNC_END(blowfish_enc_blk)
SYM_TYPED_FUNC_START(blowfish_dec_blk)
SYM_FUNC_START(blowfish_dec_blk)
/* input:
* %rdi: ctx
* %rsi: dst
@ -272,28 +260,26 @@ SYM_FUNC_END(blowfish_dec_blk)
movq RX3, 24(RIO);
#define xor_block4() \
bswapq RX0; \
xorq RX0, (RIO); \
movq (RIO), RT0; \
bswapq RT0; \
xorq RT0, RX1; \
\
bswapq RX1; \
xorq RX1, 8(RIO); \
movq 8(RIO), RT2; \
bswapq RT2; \
xorq RT2, RX2; \
\
bswapq RX2; \
xorq RX2, 16(RIO); \
\
bswapq RX3; \
xorq RX3, 24(RIO);
movq 16(RIO), RT3; \
bswapq RT3; \
xorq RT3, RX3;
SYM_FUNC_START(__blowfish_enc_blk_4way)
SYM_FUNC_START(blowfish_enc_blk_4way)
/* input:
* %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
pushq %r12;
pushq %rbx;
pushq %rcx;
movq %rdi, CTX
movq %rsi, %r11;
@ -313,37 +299,28 @@ SYM_FUNC_START(__blowfish_enc_blk_4way)
round_enc4(14);
add_preloaded_roundkey4();
popq %r12;
movq %r11, RIO;
test %r12b, %r12b;
jnz .L__enc_xor4;
write_block4();
popq %rbx;
popq %r12;
RET;
SYM_FUNC_END(blowfish_enc_blk_4way)
.L__enc_xor4:
xor_block4();
popq %rbx;
popq %r12;
RET;
SYM_FUNC_END(__blowfish_enc_blk_4way)
SYM_TYPED_FUNC_START(blowfish_dec_blk_4way)
SYM_FUNC_START(__blowfish_dec_blk_4way)
/* input:
* %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: cbc (bool)
*/
pushq %r12;
pushq %rbx;
pushq %rcx;
pushq %rdx;
movq %rdi, CTX;
movq %rsi, %r11
movq %rsi, %r11;
movq %rdx, RIO;
preload_roundkey_dec(17);
@ -359,6 +336,14 @@ SYM_TYPED_FUNC_START(blowfish_dec_blk_4way)
round_dec4(3);
add_preloaded_roundkey4();
popq RIO;
popq %r12;
testq %r12, %r12;
jz .L_no_cbc_xor;
xor_block4();
.L_no_cbc_xor:
movq %r11, RIO;
write_block4();
@ -366,4 +351,4 @@ SYM_TYPED_FUNC_START(blowfish_dec_blk_4way)
popq %r12;
RET;
SYM_FUNC_END(blowfish_dec_blk_4way)
SYM_FUNC_END(__blowfish_dec_blk_4way)

View File

@ -16,26 +16,28 @@
#include <linux/module.h>
#include <linux/types.h>
#include "ecb_cbc_helpers.h"
/* regular block cipher functions */
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
bool xor);
asmlinkage void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
/* 4-way parallel cipher functions */
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
asmlinkage void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void __blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src, bool cbc);
static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
static inline void blowfish_dec_ecb_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, false);
return __blowfish_dec_blk_4way(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
static inline void blowfish_dec_cbc_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, false);
return __blowfish_dec_blk_4way(ctx, dst, src, true);
}
static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
@ -54,183 +56,35 @@ static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm,
return blowfish_setkey(&tfm->base, key, keylen);
}
static int ecb_crypt(struct skcipher_request *req,
void (*fn)(struct bf_ctx *, u8 *, const u8 *),
void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
{
unsigned int bsize = BF_BLOCK_SIZE;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
u8 *wsrc = walk.src.virt.addr;
u8 *wdst = walk.dst.virt.addr;
/* Process four block batch */
if (nbytes >= bsize * 4) {
do {
fn_4way(ctx, wdst, wsrc);
wsrc += bsize * 4;
wdst += bsize * 4;
nbytes -= bsize * 4;
} while (nbytes >= bsize * 4);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
fn(ctx, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int ecb_encrypt(struct skcipher_request *req)
{
return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way);
ECB_WALK_START(req, BF_BLOCK_SIZE, -1);
ECB_BLOCK(4, blowfish_enc_blk_4way);
ECB_BLOCK(1, blowfish_enc_blk);
ECB_WALK_END();
}
static int ecb_decrypt(struct skcipher_request *req)
{
return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way);
}
static unsigned int __cbc_encrypt(struct bf_ctx *ctx,
struct skcipher_walk *walk)
{
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 *iv = (u64 *)walk->iv;
do {
*dst = *src ^ *iv;
blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
*(u64 *)walk->iv = *iv;
return nbytes;
ECB_WALK_START(req, BF_BLOCK_SIZE, -1);
ECB_BLOCK(4, blowfish_dec_ecb_4way);
ECB_BLOCK(1, blowfish_dec_blk);
ECB_WALK_END();
}
static int cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while (walk.nbytes) {
nbytes = __cbc_encrypt(ctx, &walk);
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct bf_ctx *ctx,
struct skcipher_walk *walk)
{
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 ivs[4 - 1];
u64 last_iv;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process four block batch */
if (nbytes >= bsize * 4) {
do {
nbytes -= bsize * 4 - bsize;
src -= 4 - 1;
dst -= 4 - 1;
ivs[0] = src[0];
ivs[1] = src[1];
ivs[2] = src[2];
blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
dst[1] ^= ivs[0];
dst[2] ^= ivs[1];
dst[3] ^= ivs[2];
nbytes -= bsize;
if (nbytes < bsize)
goto done;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * 4);
}
/* Handle leftovers */
for (;;) {
blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
}
done:
*dst ^= *(u64 *)walk->iv;
*(u64 *)walk->iv = last_iv;
return nbytes;
CBC_WALK_START(req, BF_BLOCK_SIZE, -1);
CBC_ENC_BLOCK(blowfish_enc_blk);
CBC_WALK_END();
}
static int cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while (walk.nbytes) {
nbytes = __cbc_decrypt(ctx, &walk);
err = skcipher_walk_done(&walk, nbytes);
}
return err;
CBC_WALK_START(req, BF_BLOCK_SIZE, -1);
CBC_DEC_BLOCK(4, blowfish_dec_cbc_4way);
CBC_DEC_BLOCK(1, blowfish_dec_blk);
CBC_WALK_END();
}
static struct crypto_alg bf_cipher_alg = {

View File

@ -13,13 +13,14 @@
#define ECB_WALK_START(req, bsize, fpu_blocks) do { \
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); \
const int __fpu_blocks = (fpu_blocks); \
const int __bsize = (bsize); \
struct skcipher_walk walk; \
int err = skcipher_walk_virt(&walk, (req), false); \
while (walk.nbytes > 0) { \
unsigned int nbytes = walk.nbytes; \
bool do_fpu = (fpu_blocks) != -1 && \
nbytes >= (fpu_blocks) * __bsize; \
bool do_fpu = __fpu_blocks != -1 && \
nbytes >= __fpu_blocks * __bsize; \
const u8 *src = walk.src.virt.addr; \
u8 *dst = walk.dst.virt.addr; \
u8 __maybe_unused buf[(bsize)]; \
@ -35,7 +36,12 @@
} while (0)
#define ECB_BLOCK(blocks, func) do { \
while (nbytes >= (blocks) * __bsize) { \
const int __blocks = (blocks); \
if (do_fpu && __blocks < __fpu_blocks) { \
kernel_fpu_end(); \
do_fpu = false; \
} \
while (nbytes >= __blocks * __bsize) { \
(func)(ctx, dst, src); \
ECB_WALK_ADVANCE(blocks); \
} \
@ -53,7 +59,12 @@
} while (0)
#define CBC_DEC_BLOCK(blocks, func) do { \
while (nbytes >= (blocks) * __bsize) { \
const int __blocks = (blocks); \
if (do_fpu && __blocks < __fpu_blocks) { \
kernel_fpu_end(); \
do_fpu = false; \
} \
while (nbytes >= __blocks * __bsize) { \
const u8 *__iv = src + ((blocks) - 1) * __bsize; \
if (dst == src) \
__iv = memcpy(buf, __iv, __bsize); \

View File

@ -4,7 +4,7 @@
* instructions. This file contains accelerated part of ghash
* implementation. More information about PCLMULQDQ can be found at:
*
* http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
* https://www.intel.com/content/dam/develop/external/us/en/documents/clmul-wp-rev-2-02-2014-04-20.pdf
*
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
@ -88,7 +88,7 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
RET
SYM_FUNC_END(__clmul_gf128mul_ble)
/* void clmul_ghash_mul(char *dst, const u128 *shash) */
/* void clmul_ghash_mul(char *dst, const le128 *shash) */
SYM_FUNC_START(clmul_ghash_mul)
FRAME_BEGIN
movups (%rdi), DATA
@ -104,7 +104,7 @@ SYM_FUNC_END(clmul_ghash_mul)
/*
* void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
* const u128 *shash);
* const le128 *shash);
*/
SYM_FUNC_START(clmul_ghash_update)
FRAME_BEGIN

View File

@ -19,21 +19,22 @@
#include <crypto/internal/simd.h>
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
#include <asm/unaligned.h>
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
void clmul_ghash_mul(char *dst, const u128 *shash);
void clmul_ghash_mul(char *dst, const le128 *shash);
void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
const u128 *shash);
const le128 *shash);
struct ghash_async_ctx {
struct cryptd_ahash *cryptd_tfm;
};
struct ghash_ctx {
u128 shash;
le128 shash;
};
struct ghash_desc_ctx {
@ -54,22 +55,40 @@ static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
be128 *x = (be128 *)key;
u64 a, b;
if (keylen != GHASH_BLOCK_SIZE)
return -EINVAL;
/* perform multiplication by 'x' in GF(2^128) */
a = be64_to_cpu(x->a);
b = be64_to_cpu(x->b);
ctx->shash.a = (b << 1) | (a >> 63);
ctx->shash.b = (a << 1) | (b >> 63);
/*
* GHASH maps bits to polynomial coefficients backwards, which makes it
* hard to implement. But it can be shown that the GHASH multiplication
*
* D * K (mod x^128 + x^7 + x^2 + x + 1)
*
* (where D is a data block and K is the key) is equivalent to:
*
* bitreflect(D) * bitreflect(K) * x^(-127)
* (mod x^128 + x^127 + x^126 + x^121 + 1)
*
* So, the code below precomputes:
*
* bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1)
*
* ... but in Montgomery form (so that Montgomery multiplication can be
* used), i.e. with an extra x^128 factor, which means actually:
*
* bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1)
*
* The within-a-byte part of bitreflect() cancels out GHASH's built-in
* reflection, and thus bitreflect() is actually a byteswap.
*/
a = get_unaligned_be64(key);
b = get_unaligned_be64(key + 8);
ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63));
ctx->shash.b = cpu_to_le64((b << 1) | (a >> 63));
if (a >> 63)
ctx->shash.b ^= ((u64)0xc2) << 56;
ctx->shash.a ^= cpu_to_le64((u64)0xc2 << 56);
return 0;
}

View File

@ -7,6 +7,7 @@
#define COMPILE_OFFSETS
#include <linux/crypto.h>
#include <crypto/aria.h>
#include <linux/sched.h>
#include <linux/stddef.h>
#include <linux/hardirq.h>
@ -111,5 +112,12 @@ static void __used common(void)
#ifdef CONFIG_CALL_DEPTH_TRACKING
OFFSET(X86_call_depth, pcpu_hot, call_depth);
#endif
#if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64)
/* Offset for fields in aria_ctx */
BLANK();
OFFSET(ARIA_CTX_enc_key, aria_ctx, enc_key);
OFFSET(ARIA_CTX_dec_key, aria_ctx, dec_key);
OFFSET(ARIA_CTX_rounds, aria_ctx, rounds);
#endif
}

View File

@ -308,10 +308,9 @@ static int adiantum_finish(struct skcipher_request *req)
return 0;
}
static void adiantum_streamcipher_done(struct crypto_async_request *areq,
int err)
static void adiantum_streamcipher_done(void *data, int err)
{
struct skcipher_request *req = areq->data;
struct skcipher_request *req = data;
if (!err)
err = adiantum_finish(req);

View File

@ -1186,7 +1186,7 @@ EXPORT_SYMBOL_GPL(af_alg_free_resources);
/**
* af_alg_async_cb - AIO callback handler
* @_req: async request info
* @data: async request completion data
* @err: if non-zero, error result to be returned via ki_complete();
* otherwise return the AIO output length via ki_complete().
*
@ -1196,9 +1196,9 @@ EXPORT_SYMBOL_GPL(af_alg_free_resources);
* The number of bytes to be generated with the AIO operation must be set
* in areq->outlen before the AIO callback handler is invoked.
*/
void af_alg_async_cb(struct crypto_async_request *_req, int err)
void af_alg_async_cb(void *data, int err)
{
struct af_alg_async_req *areq = _req->data;
struct af_alg_async_req *areq = data;
struct sock *sk = areq->sk;
struct kiocb *iocb = areq->iocb;
unsigned int resultlen;

View File

@ -45,7 +45,7 @@ static int hash_walk_next(struct crypto_hash_walk *walk)
unsigned int nbytes = min(walk->entrylen,
((unsigned int)(PAGE_SIZE)) - offset);
walk->data = kmap_atomic(walk->pg);
walk->data = kmap_local_page(walk->pg);
walk->data += offset;
if (offset & alignmask) {
@ -95,7 +95,7 @@ int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err)
}
}
kunmap_atomic(walk->data);
kunmap_local(walk->data);
crypto_yield(walk->flags);
if (err)
@ -190,133 +190,98 @@ int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
}
EXPORT_SYMBOL_GPL(crypto_ahash_setkey);
static inline unsigned int ahash_align_buffer_size(unsigned len,
unsigned long mask)
{
return len + (mask & ~(crypto_tfm_ctx_alignment() - 1));
}
static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt)
static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt,
bool has_state)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
unsigned long alignmask = crypto_ahash_alignmask(tfm);
unsigned int ds = crypto_ahash_digestsize(tfm);
struct ahash_request_priv *priv;
struct ahash_request *subreq;
unsigned int subreq_size;
unsigned int reqsize;
u8 *result;
gfp_t gfp;
u32 flags;
priv = kmalloc(sizeof(*priv) + ahash_align_buffer_size(ds, alignmask),
(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
GFP_KERNEL : GFP_ATOMIC);
if (!priv)
subreq_size = sizeof(*subreq);
reqsize = crypto_ahash_reqsize(tfm);
reqsize = ALIGN(reqsize, crypto_tfm_ctx_alignment());
subreq_size += reqsize;
subreq_size += ds;
subreq_size += alignmask & ~(crypto_tfm_ctx_alignment() - 1);
flags = ahash_request_flags(req);
gfp = (flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC;
subreq = kmalloc(subreq_size, gfp);
if (!subreq)
return -ENOMEM;
/*
* WARNING: Voodoo programming below!
*
* The code below is obscure and hard to understand, thus explanation
* is necessary. See include/crypto/hash.h and include/linux/crypto.h
* to understand the layout of structures used here!
*
* The code here will replace portions of the ORIGINAL request with
* pointers to new code and buffers so the hashing operation can store
* the result in aligned buffer. We will call the modified request
* an ADJUSTED request.
*
* The newly mangled request will look as such:
*
* req {
* .result = ADJUSTED[new aligned buffer]
* .base.complete = ADJUSTED[pointer to completion function]
* .base.data = ADJUSTED[*req (pointer to self)]
* .priv = ADJUSTED[new priv] {
* .result = ORIGINAL(result)
* .complete = ORIGINAL(base.complete)
* .data = ORIGINAL(base.data)
* }
*/
ahash_request_set_tfm(subreq, tfm);
ahash_request_set_callback(subreq, flags, cplt, req);
priv->result = req->result;
priv->complete = req->base.complete;
priv->data = req->base.data;
priv->flags = req->base.flags;
result = (u8 *)(subreq + 1) + reqsize;
result = PTR_ALIGN(result, alignmask + 1);
/*
* WARNING: We do not backup req->priv here! The req->priv
* is for internal use of the Crypto API and the
* user must _NOT_ _EVER_ depend on it's content!
*/
ahash_request_set_crypt(subreq, req->src, result, req->nbytes);
req->result = PTR_ALIGN((u8 *)priv->ubuf, alignmask + 1);
req->base.complete = cplt;
req->base.data = req;
req->priv = priv;
if (has_state) {
void *state;
state = kmalloc(crypto_ahash_statesize(tfm), gfp);
if (!state) {
kfree(subreq);
return -ENOMEM;
}
crypto_ahash_export(req, state);
crypto_ahash_import(subreq, state);
kfree_sensitive(state);
}
req->priv = subreq;
return 0;
}
static void ahash_restore_req(struct ahash_request *req, int err)
{
struct ahash_request_priv *priv = req->priv;
struct ahash_request *subreq = req->priv;
if (!err)
memcpy(priv->result, req->result,
memcpy(req->result, subreq->result,
crypto_ahash_digestsize(crypto_ahash_reqtfm(req)));
/* Restore the original crypto request. */
req->result = priv->result;
ahash_request_set_callback(req, priv->flags,
priv->complete, priv->data);
req->priv = NULL;
/* Free the req->priv.priv from the ADJUSTED request. */
kfree_sensitive(priv);
kfree_sensitive(subreq);
}
static void ahash_notify_einprogress(struct ahash_request *req)
static void ahash_op_unaligned_done(void *data, int err)
{
struct ahash_request_priv *priv = req->priv;
struct crypto_async_request oreq;
struct ahash_request *areq = data;
oreq.data = priv->data;
priv->complete(&oreq, -EINPROGRESS);
}
static void ahash_op_unaligned_done(struct crypto_async_request *req, int err)
{
struct ahash_request *areq = req->data;
if (err == -EINPROGRESS) {
ahash_notify_einprogress(areq);
return;
}
/*
* Restore the original request, see ahash_op_unaligned() for what
* goes where.
*
* The "struct ahash_request *req" here is in fact the "req.base"
* from the ADJUSTED request from ahash_op_unaligned(), thus as it
* is a pointer to self, it is also the ADJUSTED "req" .
*/
if (err == -EINPROGRESS)
goto out;
/* First copy req->result into req->priv.result */
ahash_restore_req(areq, err);
out:
/* Complete the ORIGINAL request. */
areq->base.complete(&areq->base, err);
ahash_request_complete(areq, err);
}
static int ahash_op_unaligned(struct ahash_request *req,
int (*op)(struct ahash_request *))
int (*op)(struct ahash_request *),
bool has_state)
{
int err;
err = ahash_save_req(req, ahash_op_unaligned_done);
err = ahash_save_req(req, ahash_op_unaligned_done, has_state);
if (err)
return err;
err = op(req);
err = op(req->priv);
if (err == -EINPROGRESS || err == -EBUSY)
return err;
@ -326,13 +291,14 @@ static int ahash_op_unaligned(struct ahash_request *req,
}
static int crypto_ahash_op(struct ahash_request *req,
int (*op)(struct ahash_request *))
int (*op)(struct ahash_request *),
bool has_state)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
unsigned long alignmask = crypto_ahash_alignmask(tfm);
if ((unsigned long)req->result & alignmask)
return ahash_op_unaligned(req, op);
return ahash_op_unaligned(req, op, has_state);
return op(req);
}
@ -345,7 +311,7 @@ int crypto_ahash_final(struct ahash_request *req)
int ret;
crypto_stats_get(alg);
ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final, true);
crypto_stats_ahash_final(nbytes, ret, alg);
return ret;
}
@ -359,7 +325,7 @@ int crypto_ahash_finup(struct ahash_request *req)
int ret;
crypto_stats_get(alg);
ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup, true);
crypto_stats_ahash_final(nbytes, ret, alg);
return ret;
}
@ -376,32 +342,34 @@ int crypto_ahash_digest(struct ahash_request *req)
if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
ret = -ENOKEY;
else
ret = crypto_ahash_op(req, tfm->digest);
ret = crypto_ahash_op(req, tfm->digest, false);
crypto_stats_ahash_final(nbytes, ret, alg);
return ret;
}
EXPORT_SYMBOL_GPL(crypto_ahash_digest);
static void ahash_def_finup_done2(struct crypto_async_request *req, int err)
static void ahash_def_finup_done2(void *data, int err)
{
struct ahash_request *areq = req->data;
struct ahash_request *areq = data;
if (err == -EINPROGRESS)
return;
ahash_restore_req(areq, err);
areq->base.complete(&areq->base, err);
ahash_request_complete(areq, err);
}
static int ahash_def_finup_finish1(struct ahash_request *req, int err)
{
struct ahash_request *subreq = req->priv;
if (err)
goto out;
req->base.complete = ahash_def_finup_done2;
subreq->base.complete = ahash_def_finup_done2;
err = crypto_ahash_reqtfm(req)->final(req);
err = crypto_ahash_reqtfm(req)->final(subreq);
if (err == -EINPROGRESS || err == -EBUSY)
return err;
@ -410,22 +378,23 @@ out:
return err;
}
static void ahash_def_finup_done1(struct crypto_async_request *req, int err)
static void ahash_def_finup_done1(void *data, int err)
{
struct ahash_request *areq = req->data;
struct ahash_request *areq = data;
struct ahash_request *subreq;
if (err == -EINPROGRESS) {
ahash_notify_einprogress(areq);
return;
}
if (err == -EINPROGRESS)
goto out;
areq->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
subreq = areq->priv;
subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
err = ahash_def_finup_finish1(areq, err);
if (areq->priv)
if (err == -EINPROGRESS || err == -EBUSY)
return;
areq->base.complete(&areq->base, err);
out:
ahash_request_complete(areq, err);
}
static int ahash_def_finup(struct ahash_request *req)
@ -433,11 +402,11 @@ static int ahash_def_finup(struct ahash_request *req)
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
int err;
err = ahash_save_req(req, ahash_def_finup_done1);
err = ahash_save_req(req, ahash_def_finup_done1, true);
if (err)
return err;
err = tfm->update(req);
err = tfm->update(req->priv);
if (err == -EINPROGRESS || err == -EBUSY)
return err;

View File

@ -643,9 +643,9 @@ int crypto_has_alg(const char *name, u32 type, u32 mask)
}
EXPORT_SYMBOL_GPL(crypto_has_alg);
void crypto_req_done(struct crypto_async_request *req, int err)
void crypto_req_done(void *data, int err)
{
struct crypto_wait *wait = req->data;
struct crypto_wait *wait = data;
if (err == -EINPROGRESS)
return;

View File

@ -178,6 +178,10 @@ int aria_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len)
if (key_len != 16 && key_len != 24 && key_len != 32)
return -EINVAL;
BUILD_BUG_ON(sizeof(ctx->enc_key) != 272);
BUILD_BUG_ON(sizeof(ctx->dec_key) != 272);
BUILD_BUG_ON(sizeof(int) != sizeof(ctx->rounds));
ctx->key_length = key_len;
ctx->rounds = (key_len + 32) / 4;

View File

@ -109,9 +109,9 @@ out:
return err;
}
static void authenc_geniv_ahash_done(struct crypto_async_request *areq, int err)
static void authenc_geniv_ahash_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
struct crypto_aead *authenc = crypto_aead_reqtfm(req);
struct aead_instance *inst = aead_alg_instance(authenc);
struct authenc_instance_ctx *ictx = aead_instance_ctx(inst);
@ -160,10 +160,9 @@ static int crypto_authenc_genicv(struct aead_request *req, unsigned int flags)
return 0;
}
static void crypto_authenc_encrypt_done(struct crypto_async_request *req,
int err)
static void crypto_authenc_encrypt_done(void *data, int err)
{
struct aead_request *areq = req->data;
struct aead_request *areq = data;
if (err)
goto out;
@ -261,10 +260,9 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req,
return crypto_skcipher_decrypt(skreq);
}
static void authenc_verify_ahash_done(struct crypto_async_request *areq,
int err)
static void authenc_verify_ahash_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
if (err)
goto out;

View File

@ -107,10 +107,9 @@ static int crypto_authenc_esn_genicv_tail(struct aead_request *req,
return 0;
}
static void authenc_esn_geniv_ahash_done(struct crypto_async_request *areq,
int err)
static void authenc_esn_geniv_ahash_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
err = err ?: crypto_authenc_esn_genicv_tail(req, 0);
aead_request_complete(req, err);
@ -153,10 +152,9 @@ static int crypto_authenc_esn_genicv(struct aead_request *req,
}
static void crypto_authenc_esn_encrypt_done(struct crypto_async_request *req,
int err)
static void crypto_authenc_esn_encrypt_done(void *data, int err)
{
struct aead_request *areq = req->data;
struct aead_request *areq = data;
if (!err)
err = crypto_authenc_esn_genicv(areq, 0);
@ -258,10 +256,9 @@ decrypt:
return crypto_skcipher_decrypt(skreq);
}
static void authenc_esn_verify_ahash_done(struct crypto_async_request *areq,
int err)
static void authenc_esn_verify_ahash_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
err = err ?: crypto_authenc_esn_decrypt_tail(req, 0);
authenc_esn_request_complete(req, err);

View File

@ -224,9 +224,9 @@ out:
return err;
}
static void crypto_ccm_encrypt_done(struct crypto_async_request *areq, int err)
static void crypto_ccm_encrypt_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
u8 *odata = pctx->odata;
@ -320,10 +320,9 @@ static int crypto_ccm_encrypt(struct aead_request *req)
return err;
}
static void crypto_ccm_decrypt_done(struct crypto_async_request *areq,
int err)
static void crypto_ccm_decrypt_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
struct crypto_aead *aead = crypto_aead_reqtfm(req);
unsigned int authsize = crypto_aead_authsize(aead);

View File

@ -115,9 +115,9 @@ static int poly_copy_tag(struct aead_request *req)
return 0;
}
static void chacha_decrypt_done(struct crypto_async_request *areq, int err)
static void chacha_decrypt_done(void *data, int err)
{
async_done_continue(areq->data, err, poly_verify_tag);
async_done_continue(data, err, poly_verify_tag);
}
static int chacha_decrypt(struct aead_request *req)
@ -161,9 +161,9 @@ static int poly_tail_continue(struct aead_request *req)
return chacha_decrypt(req);
}
static void poly_tail_done(struct crypto_async_request *areq, int err)
static void poly_tail_done(void *data, int err)
{
async_done_continue(areq->data, err, poly_tail_continue);
async_done_continue(data, err, poly_tail_continue);
}
static int poly_tail(struct aead_request *req)
@ -191,9 +191,9 @@ static int poly_tail(struct aead_request *req)
return poly_tail_continue(req);
}
static void poly_cipherpad_done(struct crypto_async_request *areq, int err)
static void poly_cipherpad_done(void *data, int err)
{
async_done_continue(areq->data, err, poly_tail);
async_done_continue(data, err, poly_tail);
}
static int poly_cipherpad(struct aead_request *req)
@ -220,9 +220,9 @@ static int poly_cipherpad(struct aead_request *req)
return poly_tail(req);
}
static void poly_cipher_done(struct crypto_async_request *areq, int err)
static void poly_cipher_done(void *data, int err)
{
async_done_continue(areq->data, err, poly_cipherpad);
async_done_continue(data, err, poly_cipherpad);
}
static int poly_cipher(struct aead_request *req)
@ -250,9 +250,9 @@ static int poly_cipher(struct aead_request *req)
return poly_cipherpad(req);
}
static void poly_adpad_done(struct crypto_async_request *areq, int err)
static void poly_adpad_done(void *data, int err)
{
async_done_continue(areq->data, err, poly_cipher);
async_done_continue(data, err, poly_cipher);
}
static int poly_adpad(struct aead_request *req)
@ -279,9 +279,9 @@ static int poly_adpad(struct aead_request *req)
return poly_cipher(req);
}
static void poly_ad_done(struct crypto_async_request *areq, int err)
static void poly_ad_done(void *data, int err)
{
async_done_continue(areq->data, err, poly_adpad);
async_done_continue(data, err, poly_adpad);
}
static int poly_ad(struct aead_request *req)
@ -303,9 +303,9 @@ static int poly_ad(struct aead_request *req)
return poly_adpad(req);
}
static void poly_setkey_done(struct crypto_async_request *areq, int err)
static void poly_setkey_done(void *data, int err)
{
async_done_continue(areq->data, err, poly_ad);
async_done_continue(data, err, poly_ad);
}
static int poly_setkey(struct aead_request *req)
@ -329,9 +329,9 @@ static int poly_setkey(struct aead_request *req)
return poly_ad(req);
}
static void poly_init_done(struct crypto_async_request *areq, int err)
static void poly_init_done(void *data, int err)
{
async_done_continue(areq->data, err, poly_setkey);
async_done_continue(data, err, poly_setkey);
}
static int poly_init(struct aead_request *req)
@ -352,9 +352,9 @@ static int poly_init(struct aead_request *req)
return poly_setkey(req);
}
static void poly_genkey_done(struct crypto_async_request *areq, int err)
static void poly_genkey_done(void *data, int err)
{
async_done_continue(areq->data, err, poly_init);
async_done_continue(data, err, poly_init);
}
static int poly_genkey(struct aead_request *req)
@ -391,9 +391,9 @@ static int poly_genkey(struct aead_request *req)
return poly_init(req);
}
static void chacha_encrypt_done(struct crypto_async_request *areq, int err)
static void chacha_encrypt_done(void *data, int err)
{
async_done_continue(areq->data, err, poly_genkey);
async_done_continue(data, err, poly_genkey);
}
static int chacha_encrypt(struct aead_request *req)

View File

@ -72,7 +72,6 @@ struct cryptd_skcipher_ctx {
};
struct cryptd_skcipher_request_ctx {
crypto_completion_t complete;
struct skcipher_request req;
};
@ -83,6 +82,7 @@ struct cryptd_hash_ctx {
struct cryptd_hash_request_ctx {
crypto_completion_t complete;
void *data;
struct shash_desc desc;
};
@ -92,7 +92,7 @@ struct cryptd_aead_ctx {
};
struct cryptd_aead_request_ctx {
crypto_completion_t complete;
struct aead_request req;
};
static void cryptd_queue_worker(struct work_struct *work);
@ -177,8 +177,8 @@ static void cryptd_queue_worker(struct work_struct *work)
return;
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
req->complete(req, 0);
crypto_request_complete(backlog, -EINPROGRESS);
crypto_request_complete(req, 0);
if (cpu_queue->queue.qlen)
queue_work(cryptd_wq, &cpu_queue->work);
@ -237,75 +237,76 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
return crypto_skcipher_setkey(child, key, keylen);
}
static void cryptd_skcipher_complete(struct skcipher_request *req, int err)
static struct skcipher_request *cryptd_skcipher_prepare(
struct skcipher_request *req, int err)
{
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
struct skcipher_request *subreq = &rctx->req;
struct cryptd_skcipher_ctx *ctx;
struct crypto_skcipher *child;
req->base.complete = subreq->base.complete;
req->base.data = subreq->base.data;
if (unlikely(err == -EINPROGRESS))
return NULL;
ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
child = ctx->child;
skcipher_request_set_tfm(subreq, child);
skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
NULL, NULL);
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
req->iv);
return subreq;
}
static void cryptd_skcipher_complete(struct skcipher_request *req, int err,
crypto_completion_t complete)
{
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
struct skcipher_request *subreq = &rctx->req;
int refcnt = refcount_read(&ctx->refcnt);
local_bh_disable();
rctx->complete(&req->base, err);
skcipher_request_complete(req, err);
local_bh_enable();
if (err != -EINPROGRESS && refcnt && refcount_dec_and_test(&ctx->refcnt))
if (unlikely(err == -EINPROGRESS)) {
subreq->base.complete = req->base.complete;
subreq->base.data = req->base.data;
req->base.complete = complete;
req->base.data = req;
} else if (refcnt && refcount_dec_and_test(&ctx->refcnt))
crypto_free_skcipher(tfm);
}
static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
int err)
static void cryptd_skcipher_encrypt(void *data, int err)
{
struct skcipher_request *req = skcipher_request_cast(base);
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_request *subreq = &rctx->req;
struct crypto_skcipher *child = ctx->child;
struct skcipher_request *req = data;
struct skcipher_request *subreq;
if (unlikely(err == -EINPROGRESS))
goto out;
subreq = cryptd_skcipher_prepare(req, err);
if (likely(subreq))
err = crypto_skcipher_encrypt(subreq);
skcipher_request_set_tfm(subreq, child);
skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
NULL, NULL);
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
req->iv);
err = crypto_skcipher_encrypt(subreq);
skcipher_request_zero(subreq);
req->base.complete = rctx->complete;
out:
cryptd_skcipher_complete(req, err);
cryptd_skcipher_complete(req, err, cryptd_skcipher_encrypt);
}
static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
int err)
static void cryptd_skcipher_decrypt(void *data, int err)
{
struct skcipher_request *req = skcipher_request_cast(base);
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_request *subreq = &rctx->req;
struct crypto_skcipher *child = ctx->child;
struct skcipher_request *req = data;
struct skcipher_request *subreq;
if (unlikely(err == -EINPROGRESS))
goto out;
subreq = cryptd_skcipher_prepare(req, err);
if (likely(subreq))
err = crypto_skcipher_decrypt(subreq);
skcipher_request_set_tfm(subreq, child);
skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
NULL, NULL);
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
req->iv);
err = crypto_skcipher_decrypt(subreq);
skcipher_request_zero(subreq);
req->base.complete = rctx->complete;
out:
cryptd_skcipher_complete(req, err);
cryptd_skcipher_complete(req, err, cryptd_skcipher_decrypt);
}
static int cryptd_skcipher_enqueue(struct skcipher_request *req,
@ -313,11 +314,14 @@ static int cryptd_skcipher_enqueue(struct skcipher_request *req,
{
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct skcipher_request *subreq = &rctx->req;
struct cryptd_queue *queue;
queue = cryptd_get_queue(crypto_skcipher_tfm(tfm));
rctx->complete = req->base.complete;
subreq->base.complete = req->base.complete;
subreq->base.data = req->base.data;
req->base.complete = compl;
req->base.data = req;
return cryptd_enqueue_request(queue, &req->base);
}
@ -470,45 +474,63 @@ static int cryptd_hash_enqueue(struct ahash_request *req,
cryptd_get_queue(crypto_ahash_tfm(tfm));
rctx->complete = req->base.complete;
rctx->data = req->base.data;
req->base.complete = compl;
req->base.data = req;
return cryptd_enqueue_request(queue, &req->base);
}
static void cryptd_hash_complete(struct ahash_request *req, int err)
static struct shash_desc *cryptd_hash_prepare(struct ahash_request *req,
int err)
{
struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
req->base.complete = rctx->complete;
req->base.data = rctx->data;
if (unlikely(err == -EINPROGRESS))
return NULL;
return &rctx->desc;
}
static void cryptd_hash_complete(struct ahash_request *req, int err,
crypto_completion_t complete)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
int refcnt = refcount_read(&ctx->refcnt);
local_bh_disable();
rctx->complete(&req->base, err);
ahash_request_complete(req, err);
local_bh_enable();
if (err != -EINPROGRESS && refcnt && refcount_dec_and_test(&ctx->refcnt))
if (err == -EINPROGRESS) {
req->base.complete = complete;
req->base.data = req;
} else if (refcnt && refcount_dec_and_test(&ctx->refcnt))
crypto_free_ahash(tfm);
}
static void cryptd_hash_init(struct crypto_async_request *req_async, int err)
static void cryptd_hash_init(void *data, int err)
{
struct cryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
struct ahash_request *req = data;
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm);
struct crypto_shash *child = ctx->child;
struct ahash_request *req = ahash_request_cast(req_async);
struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
struct shash_desc *desc = &rctx->desc;
struct shash_desc *desc;
if (unlikely(err == -EINPROGRESS))
desc = cryptd_hash_prepare(req, err);
if (unlikely(!desc))
goto out;
desc->tfm = child;
err = crypto_shash_init(desc);
req->base.complete = rctx->complete;
out:
cryptd_hash_complete(req, err);
cryptd_hash_complete(req, err, cryptd_hash_init);
}
static int cryptd_hash_init_enqueue(struct ahash_request *req)
@ -516,22 +538,16 @@ static int cryptd_hash_init_enqueue(struct ahash_request *req)
return cryptd_hash_enqueue(req, cryptd_hash_init);
}
static void cryptd_hash_update(struct crypto_async_request *req_async, int err)
static void cryptd_hash_update(void *data, int err)
{
struct ahash_request *req = ahash_request_cast(req_async);
struct cryptd_hash_request_ctx *rctx;
struct ahash_request *req = data;
struct shash_desc *desc;
rctx = ahash_request_ctx(req);
desc = cryptd_hash_prepare(req, err);
if (likely(desc))
err = shash_ahash_update(req, desc);
if (unlikely(err == -EINPROGRESS))
goto out;
err = shash_ahash_update(req, &rctx->desc);
req->base.complete = rctx->complete;
out:
cryptd_hash_complete(req, err);
cryptd_hash_complete(req, err, cryptd_hash_update);
}
static int cryptd_hash_update_enqueue(struct ahash_request *req)
@ -539,20 +555,16 @@ static int cryptd_hash_update_enqueue(struct ahash_request *req)
return cryptd_hash_enqueue(req, cryptd_hash_update);
}
static void cryptd_hash_final(struct crypto_async_request *req_async, int err)
static void cryptd_hash_final(void *data, int err)
{
struct ahash_request *req = ahash_request_cast(req_async);
struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
struct ahash_request *req = data;
struct shash_desc *desc;
if (unlikely(err == -EINPROGRESS))
goto out;
desc = cryptd_hash_prepare(req, err);
if (likely(desc))
err = crypto_shash_final(desc, req->result);
err = crypto_shash_final(&rctx->desc, req->result);
req->base.complete = rctx->complete;
out:
cryptd_hash_complete(req, err);
cryptd_hash_complete(req, err, cryptd_hash_final);
}
static int cryptd_hash_final_enqueue(struct ahash_request *req)
@ -560,20 +572,16 @@ static int cryptd_hash_final_enqueue(struct ahash_request *req)
return cryptd_hash_enqueue(req, cryptd_hash_final);
}
static void cryptd_hash_finup(struct crypto_async_request *req_async, int err)
static void cryptd_hash_finup(void *data, int err)
{
struct ahash_request *req = ahash_request_cast(req_async);
struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
struct ahash_request *req = data;
struct shash_desc *desc;
if (unlikely(err == -EINPROGRESS))
goto out;
desc = cryptd_hash_prepare(req, err);
if (likely(desc))
err = shash_ahash_finup(req, desc);
err = shash_ahash_finup(req, &rctx->desc);
req->base.complete = rctx->complete;
out:
cryptd_hash_complete(req, err);
cryptd_hash_complete(req, err, cryptd_hash_finup);
}
static int cryptd_hash_finup_enqueue(struct ahash_request *req)
@ -581,25 +589,24 @@ static int cryptd_hash_finup_enqueue(struct ahash_request *req)
return cryptd_hash_enqueue(req, cryptd_hash_finup);
}
static void cryptd_hash_digest(struct crypto_async_request *req_async, int err)
static void cryptd_hash_digest(void *data, int err)
{
struct cryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
struct ahash_request *req = data;
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm);
struct crypto_shash *child = ctx->child;
struct ahash_request *req = ahash_request_cast(req_async);
struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
struct shash_desc *desc = &rctx->desc;
struct shash_desc *desc;
if (unlikely(err == -EINPROGRESS))
desc = cryptd_hash_prepare(req, err);
if (unlikely(!desc))
goto out;
desc->tfm = child;
err = shash_ahash_digest(req, desc);
req->base.complete = rctx->complete;
out:
cryptd_hash_complete(req, err);
cryptd_hash_complete(req, err, cryptd_hash_digest);
}
static int cryptd_hash_digest_enqueue(struct ahash_request *req)
@ -712,56 +719,74 @@ static int cryptd_aead_setauthsize(struct crypto_aead *parent,
}
static void cryptd_aead_crypt(struct aead_request *req,
struct crypto_aead *child,
int err,
int (*crypt)(struct aead_request *req))
struct crypto_aead *child, int err,
int (*crypt)(struct aead_request *req),
crypto_completion_t compl)
{
struct cryptd_aead_request_ctx *rctx;
struct aead_request *subreq;
struct cryptd_aead_ctx *ctx;
crypto_completion_t compl;
struct crypto_aead *tfm;
int refcnt;
rctx = aead_request_ctx(req);
compl = rctx->complete;
subreq = &rctx->req;
req->base.complete = subreq->base.complete;
req->base.data = subreq->base.data;
tfm = crypto_aead_reqtfm(req);
if (unlikely(err == -EINPROGRESS))
goto out;
aead_request_set_tfm(req, child);
err = crypt( req );
aead_request_set_tfm(subreq, child);
aead_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
NULL, NULL);
aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
req->iv);
aead_request_set_ad(subreq, req->assoclen);
err = crypt(subreq);
out:
ctx = crypto_aead_ctx(tfm);
refcnt = refcount_read(&ctx->refcnt);
local_bh_disable();
compl(&req->base, err);
aead_request_complete(req, err);
local_bh_enable();
if (err != -EINPROGRESS && refcnt && refcount_dec_and_test(&ctx->refcnt))
if (err == -EINPROGRESS) {
subreq->base.complete = req->base.complete;
subreq->base.data = req->base.data;
req->base.complete = compl;
req->base.data = req;
} else if (refcnt && refcount_dec_and_test(&ctx->refcnt))
crypto_free_aead(tfm);
}
static void cryptd_aead_encrypt(struct crypto_async_request *areq, int err)
static void cryptd_aead_encrypt(void *data, int err)
{
struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(areq->tfm);
struct crypto_aead *child = ctx->child;
struct aead_request *req;
struct aead_request *req = data;
struct cryptd_aead_ctx *ctx;
struct crypto_aead *child;
req = container_of(areq, struct aead_request, base);
cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->encrypt);
ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
child = ctx->child;
cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->encrypt,
cryptd_aead_encrypt);
}
static void cryptd_aead_decrypt(struct crypto_async_request *areq, int err)
static void cryptd_aead_decrypt(void *data, int err)
{
struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(areq->tfm);
struct crypto_aead *child = ctx->child;
struct aead_request *req;
struct aead_request *req = data;
struct cryptd_aead_ctx *ctx;
struct crypto_aead *child;
req = container_of(areq, struct aead_request, base);
cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->decrypt);
ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
child = ctx->child;
cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->decrypt,
cryptd_aead_decrypt);
}
static int cryptd_aead_enqueue(struct aead_request *req,
@ -770,9 +795,12 @@ static int cryptd_aead_enqueue(struct aead_request *req,
struct cryptd_aead_request_ctx *rctx = aead_request_ctx(req);
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct cryptd_queue *queue = cryptd_get_queue(crypto_aead_tfm(tfm));
struct aead_request *subreq = &rctx->req;
rctx->complete = req->base.complete;
subreq->base.complete = req->base.complete;
subreq->base.data = req->base.data;
req->base.complete = compl;
req->base.data = req;
return cryptd_enqueue_request(queue, &req->base);
}
@ -800,8 +828,8 @@ static int cryptd_aead_init_tfm(struct crypto_aead *tfm)
ctx->child = cipher;
crypto_aead_set_reqsize(
tfm, max((unsigned)sizeof(struct cryptd_aead_request_ctx),
crypto_aead_reqsize(cipher)));
tfm, sizeof(struct cryptd_aead_request_ctx) +
crypto_aead_reqsize(cipher));
return 0;
}

View File

@ -54,7 +54,7 @@ static void crypto_finalize_request(struct crypto_engine *engine,
}
}
lockdep_assert_in_softirq();
req->complete(req, err);
crypto_request_complete(req, err);
kthread_queue_work(engine->kworker, &engine->pump_requests);
}
@ -130,7 +130,7 @@ start_request:
engine->cur_req = async_req;
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
crypto_request_complete(backlog, -EINPROGRESS);
if (engine->busy)
was_busy = true;
@ -214,7 +214,7 @@ req_err_1:
}
req_err_2:
async_req->complete(async_req, ret);
crypto_request_complete(async_req, ret);
retry:
/* If retry mechanism is supported, send new requests to engine */
@ -499,7 +499,7 @@ EXPORT_SYMBOL_GPL(crypto_engine_stop);
* This has the form:
* callback(struct crypto_engine *engine)
* where:
* @engine: the crypto engine structure.
* engine: the crypto engine structure.
* @rt: whether this queue is set to run as a realtime task
* @qlen: maximum size of the crypto-engine queue
*

View File

@ -85,9 +85,9 @@ static int crypto_cts_setkey(struct crypto_skcipher *parent, const u8 *key,
return crypto_skcipher_setkey(child, key, keylen);
}
static void cts_cbc_crypt_done(struct crypto_async_request *areq, int err)
static void cts_cbc_crypt_done(void *data, int err)
{
struct skcipher_request *req = areq->data;
struct skcipher_request *req = data;
if (err == -EINPROGRESS)
return;
@ -125,9 +125,9 @@ static int cts_cbc_encrypt(struct skcipher_request *req)
return crypto_skcipher_encrypt(subreq);
}
static void crypto_cts_encrypt_done(struct crypto_async_request *areq, int err)
static void crypto_cts_encrypt_done(void *data, int err)
{
struct skcipher_request *req = areq->data;
struct skcipher_request *req = data;
if (err)
goto out;
@ -219,9 +219,9 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
return crypto_skcipher_decrypt(subreq);
}
static void crypto_cts_decrypt_done(struct crypto_async_request *areq, int err)
static void crypto_cts_decrypt_done(void *data, int err)
{
struct skcipher_request *req = areq->data;
struct skcipher_request *req = data;
if (err)
goto out;

View File

@ -503,10 +503,9 @@ out:
return err;
}
static void dh_safe_prime_complete_req(struct crypto_async_request *dh_req,
int err)
static void dh_safe_prime_complete_req(void *data, int err)
{
struct kpp_request *req = dh_req->data;
struct kpp_request *req = data;
kpp_request_complete(req, err);
}

View File

@ -1384,7 +1384,8 @@ void ecc_point_mult_shamir(const struct ecc_point *result,
num_bits = max(vli_num_bits(u1, ndigits), vli_num_bits(u2, ndigits));
i = num_bits - 1;
idx = (!!vli_test_bit(u1, i)) | ((!!vli_test_bit(u2, i)) << 1);
idx = !!vli_test_bit(u1, i);
idx |= (!!vli_test_bit(u2, i)) << 1;
point = points[idx];
vli_set(rx, point->x, ndigits);
@ -1394,7 +1395,8 @@ void ecc_point_mult_shamir(const struct ecc_point *result,
for (--i; i >= 0; i--) {
ecc_point_double_jacobian(rx, ry, z, curve);
idx = (!!vli_test_bit(u1, i)) | ((!!vli_test_bit(u2, i)) << 1);
idx = !!vli_test_bit(u1, i);
idx |= (!!vli_test_bit(u2, i)) << 1;
point = points[idx];
if (point) {
u64 tx[ECC_MAX_DIGITS];

View File

@ -131,9 +131,9 @@ static int essiv_aead_setauthsize(struct crypto_aead *tfm,
return crypto_aead_setauthsize(tctx->u.aead, authsize);
}
static void essiv_skcipher_done(struct crypto_async_request *areq, int err)
static void essiv_skcipher_done(void *data, int err)
{
struct skcipher_request *req = areq->data;
struct skcipher_request *req = data;
skcipher_request_complete(req, err);
}
@ -166,12 +166,17 @@ static int essiv_skcipher_decrypt(struct skcipher_request *req)
return essiv_skcipher_crypt(req, false);
}
static void essiv_aead_done(struct crypto_async_request *areq, int err)
static void essiv_aead_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
struct essiv_aead_request_ctx *rctx = aead_request_ctx(req);
if (err == -EINPROGRESS)
goto out;
kfree(rctx->assoc);
out:
aead_request_complete(req, err);
}
@ -247,7 +252,7 @@ static int essiv_aead_crypt(struct aead_request *req, bool enc)
err = enc ? crypto_aead_encrypt(subreq) :
crypto_aead_decrypt(subreq);
if (rctx->assoc && err != -EINPROGRESS)
if (rctx->assoc && err != -EINPROGRESS && err != -EBUSY)
kfree(rctx->assoc);
return err;
}

View File

@ -197,7 +197,7 @@ static inline unsigned int gcm_remain(unsigned int len)
return len ? 16 - len : 0;
}
static void gcm_hash_len_done(struct crypto_async_request *areq, int err);
static void gcm_hash_len_done(void *data, int err);
static int gcm_hash_update(struct aead_request *req,
crypto_completion_t compl,
@ -246,9 +246,9 @@ static int gcm_hash_len_continue(struct aead_request *req, u32 flags)
return gctx->complete(req, flags);
}
static void gcm_hash_len_done(struct crypto_async_request *areq, int err)
static void gcm_hash_len_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
if (err)
goto out;
@ -267,10 +267,9 @@ static int gcm_hash_crypt_remain_continue(struct aead_request *req, u32 flags)
gcm_hash_len_continue(req, flags);
}
static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq,
int err)
static void gcm_hash_crypt_remain_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
if (err)
goto out;
@ -298,9 +297,9 @@ static int gcm_hash_crypt_continue(struct aead_request *req, u32 flags)
return gcm_hash_crypt_remain_continue(req, flags);
}
static void gcm_hash_crypt_done(struct crypto_async_request *areq, int err)
static void gcm_hash_crypt_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
if (err)
goto out;
@ -326,10 +325,9 @@ static int gcm_hash_assoc_remain_continue(struct aead_request *req, u32 flags)
return gcm_hash_crypt_remain_continue(req, flags);
}
static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq,
int err)
static void gcm_hash_assoc_remain_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
if (err)
goto out;
@ -355,9 +353,9 @@ static int gcm_hash_assoc_continue(struct aead_request *req, u32 flags)
return gcm_hash_assoc_remain_continue(req, flags);
}
static void gcm_hash_assoc_done(struct crypto_async_request *areq, int err)
static void gcm_hash_assoc_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
if (err)
goto out;
@ -380,9 +378,9 @@ static int gcm_hash_init_continue(struct aead_request *req, u32 flags)
return gcm_hash_assoc_remain_continue(req, flags);
}
static void gcm_hash_init_done(struct crypto_async_request *areq, int err)
static void gcm_hash_init_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
if (err)
goto out;
@ -433,9 +431,9 @@ static int gcm_encrypt_continue(struct aead_request *req, u32 flags)
return gcm_hash(req, flags);
}
static void gcm_encrypt_done(struct crypto_async_request *areq, int err)
static void gcm_encrypt_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
if (err)
goto out;
@ -477,9 +475,9 @@ static int crypto_gcm_verify(struct aead_request *req)
return crypto_memneq(iauth_tag, auth_tag, authsize) ? -EBADMSG : 0;
}
static void gcm_decrypt_done(struct crypto_async_request *areq, int err)
static void gcm_decrypt_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
if (!err)
err = crypto_gcm_verify(req);

View File

@ -252,10 +252,9 @@ static int hctr2_finish(struct skcipher_request *req)
return 0;
}
static void hctr2_xctr_done(struct crypto_async_request *areq,
int err)
static void hctr2_xctr_done(void *data, int err)
{
struct skcipher_request *req = areq->data;
struct skcipher_request *req = data;
if (!err)
err = hctr2_finish(req);

View File

@ -205,9 +205,9 @@ static int lrw_xor_tweak_post(struct skcipher_request *req)
return lrw_xor_tweak(req, true);
}
static void lrw_crypt_done(struct crypto_async_request *areq, int err)
static void lrw_crypt_done(void *data, int err)
{
struct skcipher_request *req = areq->data;
struct skcipher_request *req = data;
if (!err) {
struct lrw_request_ctx *rctx = skcipher_request_ctx(req);

View File

@ -63,9 +63,9 @@ static void pcrypt_aead_serial(struct padata_priv *padata)
aead_request_complete(req->base.data, padata->info);
}
static void pcrypt_aead_done(struct crypto_async_request *areq, int err)
static void pcrypt_aead_done(void *data, int err)
{
struct aead_request *req = areq->data;
struct aead_request *req = data;
struct pcrypt_request *preq = aead_request_ctx(req);
struct padata_priv *padata = pcrypt_request_padata(preq);

View File

@ -11,6 +11,7 @@
#include <linux/atomic.h>
#include <linux/init.h>
#include <linux/crypto.h>
#include <linux/fips.h>
#include <linux/module.h> /* for module_name() */
#include <linux/rwsem.h>
#include <linux/proc_fs.h>
@ -48,6 +49,11 @@ static int c_show(struct seq_file *m, void *p)
seq_printf(m, "internal : %s\n",
(alg->cra_flags & CRYPTO_ALG_INTERNAL) ?
"yes" : "no");
if (fips_enabled) {
seq_printf(m, "fips : %s\n",
(alg->cra_flags & CRYPTO_ALG_FIPS_INTERNAL) ?
"no" : "yes");
}
if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
seq_printf(m, "type : larval\n");

View File

@ -190,7 +190,7 @@ static int pkcs1pad_encrypt_sign_complete(struct akcipher_request *req, int err)
if (likely(!pad_len))
goto out;
out_buf = kzalloc(ctx->key_size, GFP_KERNEL);
out_buf = kzalloc(ctx->key_size, GFP_ATOMIC);
err = -ENOMEM;
if (!out_buf)
goto out;
@ -210,20 +210,17 @@ out:
return err;
}
static void pkcs1pad_encrypt_sign_complete_cb(
struct crypto_async_request *child_async_req, int err)
static void pkcs1pad_encrypt_sign_complete_cb(void *data, int err)
{
struct akcipher_request *req = child_async_req->data;
struct crypto_async_request async_req;
struct akcipher_request *req = data;
if (err == -EINPROGRESS)
return;
goto out;
async_req.data = req->base.data;
async_req.tfm = crypto_akcipher_tfm(crypto_akcipher_reqtfm(req));
async_req.flags = child_async_req->flags;
req->base.complete(&async_req,
pkcs1pad_encrypt_sign_complete(req, err));
err = pkcs1pad_encrypt_sign_complete(req, err);
out:
akcipher_request_complete(req, err);
}
static int pkcs1pad_encrypt(struct akcipher_request *req)
@ -328,19 +325,17 @@ done:
return err;
}
static void pkcs1pad_decrypt_complete_cb(
struct crypto_async_request *child_async_req, int err)
static void pkcs1pad_decrypt_complete_cb(void *data, int err)
{
struct akcipher_request *req = child_async_req->data;
struct crypto_async_request async_req;
struct akcipher_request *req = data;
if (err == -EINPROGRESS)
return;
goto out;
async_req.data = req->base.data;
async_req.tfm = crypto_akcipher_tfm(crypto_akcipher_reqtfm(req));
async_req.flags = child_async_req->flags;
req->base.complete(&async_req, pkcs1pad_decrypt_complete(req, err));
err = pkcs1pad_decrypt_complete(req, err);
out:
akcipher_request_complete(req, err);
}
static int pkcs1pad_decrypt(struct akcipher_request *req)
@ -509,19 +504,17 @@ done:
return err;
}
static void pkcs1pad_verify_complete_cb(
struct crypto_async_request *child_async_req, int err)
static void pkcs1pad_verify_complete_cb(void *data, int err)
{
struct akcipher_request *req = child_async_req->data;
struct crypto_async_request async_req;
struct akcipher_request *req = data;
if (err == -EINPROGRESS)
return;
goto out;
async_req.data = req->base.data;
async_req.tfm = crypto_akcipher_tfm(crypto_akcipher_reqtfm(req));
async_req.flags = child_async_req->flags;
req->base.complete(&async_req, pkcs1pad_verify_complete(req, err));
err = pkcs1pad_verify_complete(req, err);
out:
akcipher_request_complete(req, err);
}
/*

View File

@ -23,7 +23,7 @@ static void seqiv_aead_encrypt_complete2(struct aead_request *req, int err)
struct aead_request *subreq = aead_request_ctx(req);
struct crypto_aead *geniv;
if (err == -EINPROGRESS)
if (err == -EINPROGRESS || err == -EBUSY)
return;
if (err)
@ -36,10 +36,9 @@ out:
kfree_sensitive(subreq->iv);
}
static void seqiv_aead_encrypt_complete(struct crypto_async_request *base,
int err)
static void seqiv_aead_encrypt_complete(void *data, int err)
{
struct aead_request *req = base->data;
struct aead_request *req = data;
seqiv_aead_encrypt_complete2(req, err);
aead_request_complete(req, err);

View File

@ -320,10 +320,10 @@ int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc)
nbytes <= min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset))) {
void *data;
data = kmap_atomic(sg_page(sg));
data = kmap_local_page(sg_page(sg));
err = crypto_shash_digest(desc, data + offset, nbytes,
req->result);
kunmap_atomic(data);
kunmap_local(data);
} else
err = crypto_shash_init(desc) ?:
shash_ahash_finup(req, desc);

View File

@ -42,38 +42,24 @@ struct skcipher_walk_buffer {
static int skcipher_walk_next(struct skcipher_walk *walk);
static inline void skcipher_unmap(struct scatter_walk *walk, void *vaddr)
{
if (PageHighMem(scatterwalk_page(walk)))
kunmap_atomic(vaddr);
}
static inline void *skcipher_map(struct scatter_walk *walk)
{
struct page *page = scatterwalk_page(walk);
return (PageHighMem(page) ? kmap_atomic(page) : page_address(page)) +
offset_in_page(walk->offset);
}
static inline void skcipher_map_src(struct skcipher_walk *walk)
{
walk->src.virt.addr = skcipher_map(&walk->in);
walk->src.virt.addr = scatterwalk_map(&walk->in);
}
static inline void skcipher_map_dst(struct skcipher_walk *walk)
{
walk->dst.virt.addr = skcipher_map(&walk->out);
walk->dst.virt.addr = scatterwalk_map(&walk->out);
}
static inline void skcipher_unmap_src(struct skcipher_walk *walk)
{
skcipher_unmap(&walk->in, walk->src.virt.addr);
scatterwalk_unmap(walk->src.virt.addr);
}
static inline void skcipher_unmap_dst(struct skcipher_walk *walk)
{
skcipher_unmap(&walk->out, walk->dst.virt.addr);
scatterwalk_unmap(walk->dst.virt.addr);
}
static inline gfp_t skcipher_walk_gfp(struct skcipher_walk *walk)

View File

@ -2044,11 +2044,11 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
case 211:
test_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec,
NULL, 0, 16, 16, aead_speed_template_20);
NULL, 0, 16, 16, aead_speed_template_20_28_36);
test_aead_speed("gcm(aes)", ENCRYPT, sec,
NULL, 0, 16, 8, speed_template_16_24_32);
test_aead_speed("rfc4106(gcm(aes))", DECRYPT, sec,
NULL, 0, 16, 16, aead_speed_template_20);
NULL, 0, 16, 16, aead_speed_template_20_28_36);
test_aead_speed("gcm(aes)", DECRYPT, sec,
NULL, 0, 16, 8, speed_template_16_24_32);
break;
@ -2074,11 +2074,11 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
case 215:
test_mb_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec, NULL,
0, 16, 16, aead_speed_template_20, num_mb);
0, 16, 16, aead_speed_template_20_28_36, num_mb);
test_mb_aead_speed("gcm(aes)", ENCRYPT, sec, NULL, 0, 16, 8,
speed_template_16_24_32, num_mb);
test_mb_aead_speed("rfc4106(gcm(aes))", DECRYPT, sec, NULL,
0, 16, 16, aead_speed_template_20, num_mb);
0, 16, 16, aead_speed_template_20_28_36, num_mb);
test_mb_aead_speed("gcm(aes)", DECRYPT, sec, NULL, 0, 16, 8,
speed_template_16_24_32, num_mb);
break;

View File

@ -62,7 +62,7 @@ static u8 speed_template_32[] = {32, 0};
* AEAD speed tests
*/
static u8 aead_speed_template_19[] = {19, 0};
static u8 aead_speed_template_20[] = {20, 0};
static u8 aead_speed_template_20_28_36[] = {20, 28, 36, 0};
static u8 aead_speed_template_36[] = {36, 0};
/*

View File

@ -356,6 +356,14 @@ static const struct testvec_config default_cipher_testvec_configs[] = {
{ .proportion_of_total = 5000 },
{ .proportion_of_total = 5000 },
},
}, {
.name = "one src, two even splits dst",
.inplace_mode = OUT_OF_PLACE,
.src_divs = { { .proportion_of_total = 10000 } },
.dst_divs = {
{ .proportion_of_total = 5000 },
{ .proportion_of_total = 5000 },
},
}, {
.name = "uneven misaligned splits, may sleep",
.req_flags = CRYPTO_TFM_REQ_MAY_SLEEP,
@ -4501,7 +4509,6 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
#endif
.alg = "cbcmac(aes)",
.fips_allowed = 1,
.test = alg_test_hash,
.suite = {
.hash = __VECS(aes_cbcmac_tv_template)
@ -4782,7 +4789,6 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
/* covered by drbg_nopr_hmac_sha256 test */
.alg = "drbg_nopr_hmac_sha384",
.fips_allowed = 1,
.test = alg_test_null,
}, {
.alg = "drbg_nopr_hmac_sha512",
@ -4805,7 +4811,6 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
/* covered by drbg_nopr_sha256 test */
.alg = "drbg_nopr_sha384",
.fips_allowed = 1,
.test = alg_test_null,
}, {
.alg = "drbg_nopr_sha512",
@ -4841,7 +4846,6 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
/* covered by drbg_pr_hmac_sha256 test */
.alg = "drbg_pr_hmac_sha384",
.fips_allowed = 1,
.test = alg_test_null,
}, {
.alg = "drbg_pr_hmac_sha512",
@ -4861,7 +4865,6 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
/* covered by drbg_pr_sha256 test */
.alg = "drbg_pr_sha384",
.fips_allowed = 1,
.test = alg_test_null,
}, {
.alg = "drbg_pr_sha512",
@ -5035,12 +5038,14 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "ecdsa-nist-p256",
.test = alg_test_akcipher,
.fips_allowed = 1,
.suite = {
.akcipher = __VECS(ecdsa_nist_p256_tv_template)
}
}, {
.alg = "ecdsa-nist-p384",
.test = alg_test_akcipher,
.fips_allowed = 1,
.suite = {
.akcipher = __VECS(ecdsa_nist_p384_tv_template)
}
@ -5126,7 +5131,6 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "ghash",
.test = alg_test_hash,
.fips_allowed = 1,
.suite = {
.hash = __VECS(ghash_tv_template)
}

View File

@ -779,7 +779,7 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = {
* The core Whirlpool transform.
*/
static void wp512_process_buffer(struct wp512_ctx *wctx) {
static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx) {
int i, r;
u64 K[8]; /* the round key */
u64 block[8]; /* mu(buffer) */

View File

@ -140,9 +140,9 @@ static int xts_xor_tweak_post(struct skcipher_request *req, bool enc)
return xts_xor_tweak(req, true, enc);
}
static void xts_cts_done(struct crypto_async_request *areq, int err)
static void xts_cts_done(void *data, int err)
{
struct skcipher_request *req = areq->data;
struct skcipher_request *req = data;
le128 b;
if (!err) {
@ -196,19 +196,19 @@ static int xts_cts_final(struct skcipher_request *req,
return 0;
}
static void xts_encrypt_done(struct crypto_async_request *areq, int err)
static void xts_encrypt_done(void *data, int err)
{
struct skcipher_request *req = areq->data;
struct skcipher_request *req = data;
if (!err) {
struct xts_request_ctx *rctx = skcipher_request_ctx(req);
rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
err = xts_xor_tweak_post(req, true);
if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
err = xts_cts_final(req, crypto_skcipher_encrypt);
if (err == -EINPROGRESS)
if (err == -EINPROGRESS || err == -EBUSY)
return;
}
}
@ -216,19 +216,19 @@ static void xts_encrypt_done(struct crypto_async_request *areq, int err)
skcipher_request_complete(req, err);
}
static void xts_decrypt_done(struct crypto_async_request *areq, int err)
static void xts_decrypt_done(void *data, int err)
{
struct skcipher_request *req = areq->data;
struct skcipher_request *req = data;
if (!err) {
struct xts_request_ctx *rctx = skcipher_request_ctx(req);
rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
err = xts_xor_tweak_post(req, false);
if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
err = xts_cts_final(req, crypto_skcipher_decrypt);
if (err == -EINPROGRESS)
if (err == -EINPROGRESS || err == -EBUSY)
return;
}
}

View File

@ -549,6 +549,16 @@ config HW_RANDOM_CN10K
To compile this driver as a module, choose M here.
The module will be called cn10k_rng. If unsure, say Y.
config HW_RANDOM_JH7110
tristate "StarFive JH7110 Random Number Generator support"
depends on SOC_STARFIVE || COMPILE_TEST
help
This driver provides support for the True Random Number
Generator in StarFive JH7110 SoCs.
To compile this driver as a module, choose M here.
The module will be called jh7110-trng.
endif # HW_RANDOM
config UML_RANDOM

View File

@ -47,3 +47,4 @@ obj-$(CONFIG_HW_RANDOM_XIPHERA) += xiphera-trng.o
obj-$(CONFIG_HW_RANDOM_ARM_SMCCC_TRNG) += arm_smccc_trng.o
obj-$(CONFIG_HW_RANDOM_CN10K) += cn10k-rng.o
obj-$(CONFIG_HW_RANDOM_POLARFIRE_SOC) += mpfs-rng.o
obj-$(CONFIG_HW_RANDOM_JH7110) += jh7110-trng.o

View File

@ -0,0 +1,393 @@
// SPDX-License-Identifier: GPL-2.0
/*
* TRNG driver for the StarFive JH7110 SoC
*
* Copyright (C) 2022 StarFive Technology Co.
*/
#include <linux/clk.h>
#include <linux/completion.h>
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/hw_random.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/random.h>
#include <linux/reset.h>
/* trng register offset */
#define STARFIVE_CTRL 0x00
#define STARFIVE_STAT 0x04
#define STARFIVE_MODE 0x08
#define STARFIVE_SMODE 0x0C
#define STARFIVE_IE 0x10
#define STARFIVE_ISTAT 0x14
#define STARFIVE_RAND0 0x20
#define STARFIVE_RAND1 0x24
#define STARFIVE_RAND2 0x28
#define STARFIVE_RAND3 0x2C
#define STARFIVE_RAND4 0x30
#define STARFIVE_RAND5 0x34
#define STARFIVE_RAND6 0x38
#define STARFIVE_RAND7 0x3C
#define STARFIVE_AUTO_RQSTS 0x60
#define STARFIVE_AUTO_AGE 0x64
/* CTRL CMD */
#define STARFIVE_CTRL_EXEC_NOP 0x0
#define STARFIVE_CTRL_GENE_RANDNUM 0x1
#define STARFIVE_CTRL_EXEC_RANDRESEED 0x2
/* STAT */
#define STARFIVE_STAT_NONCE_MODE BIT(2)
#define STARFIVE_STAT_R256 BIT(3)
#define STARFIVE_STAT_MISSION_MODE BIT(8)
#define STARFIVE_STAT_SEEDED BIT(9)
#define STARFIVE_STAT_LAST_RESEED(x) ((x) << 16)
#define STARFIVE_STAT_SRVC_RQST BIT(27)
#define STARFIVE_STAT_RAND_GENERATING BIT(30)
#define STARFIVE_STAT_RAND_SEEDING BIT(31)
/* MODE */
#define STARFIVE_MODE_R256 BIT(3)
/* SMODE */
#define STARFIVE_SMODE_NONCE_MODE BIT(2)
#define STARFIVE_SMODE_MISSION_MODE BIT(8)
#define STARFIVE_SMODE_MAX_REJECTS(x) ((x) << 16)
/* IE */
#define STARFIVE_IE_RAND_RDY_EN BIT(0)
#define STARFIVE_IE_SEED_DONE_EN BIT(1)
#define STARFIVE_IE_LFSR_LOCKUP_EN BIT(4)
#define STARFIVE_IE_GLBL_EN BIT(31)
#define STARFIVE_IE_ALL (STARFIVE_IE_GLBL_EN | \
STARFIVE_IE_RAND_RDY_EN | \
STARFIVE_IE_SEED_DONE_EN | \
STARFIVE_IE_LFSR_LOCKUP_EN)
/* ISTAT */
#define STARFIVE_ISTAT_RAND_RDY BIT(0)
#define STARFIVE_ISTAT_SEED_DONE BIT(1)
#define STARFIVE_ISTAT_LFSR_LOCKUP BIT(4)
#define STARFIVE_RAND_LEN sizeof(u32)
#define to_trng(p) container_of(p, struct starfive_trng, rng)
enum reseed {
RANDOM_RESEED,
NONCE_RESEED,
};
enum mode {
PRNG_128BIT,
PRNG_256BIT,
};
struct starfive_trng {
struct device *dev;
void __iomem *base;
struct clk *hclk;
struct clk *ahb;
struct reset_control *rst;
struct hwrng rng;
struct completion random_done;
struct completion reseed_done;
u32 mode;
u32 mission;
u32 reseed;
/* protects against concurrent write to ctrl register */
spinlock_t write_lock;
};
static u16 autoreq;
module_param(autoreq, ushort, 0);
MODULE_PARM_DESC(autoreq, "Auto-reseeding after random number requests by host reaches specified counter:\n"
" 0 - disable counter\n"
" other - reload value for internal counter");
static u16 autoage;
module_param(autoage, ushort, 0);
MODULE_PARM_DESC(autoage, "Auto-reseeding after specified timer countdowns to 0:\n"
" 0 - disable timer\n"
" other - reload value for internal timer");
static inline int starfive_trng_wait_idle(struct starfive_trng *trng)
{
u32 stat;
return readl_relaxed_poll_timeout(trng->base + STARFIVE_STAT, stat,
!(stat & (STARFIVE_STAT_RAND_GENERATING |
STARFIVE_STAT_RAND_SEEDING)),
10, 100000);
}
static inline void starfive_trng_irq_mask_clear(struct starfive_trng *trng)
{
/* clear register: ISTAT */
u32 data = readl(trng->base + STARFIVE_ISTAT);
writel(data, trng->base + STARFIVE_ISTAT);
}
static int starfive_trng_cmd(struct starfive_trng *trng, u32 cmd, bool wait)
{
int wait_time = 1000;
/* allow up to 40 us for wait == 0 */
if (!wait)
wait_time = 40;
switch (cmd) {
case STARFIVE_CTRL_GENE_RANDNUM:
reinit_completion(&trng->random_done);
spin_lock_irq(&trng->write_lock);
writel(cmd, trng->base + STARFIVE_CTRL);
spin_unlock_irq(&trng->write_lock);
if (!wait_for_completion_timeout(&trng->random_done, usecs_to_jiffies(wait_time)))
return -ETIMEDOUT;
break;
case STARFIVE_CTRL_EXEC_RANDRESEED:
reinit_completion(&trng->reseed_done);
spin_lock_irq(&trng->write_lock);
writel(cmd, trng->base + STARFIVE_CTRL);
spin_unlock_irq(&trng->write_lock);
if (!wait_for_completion_timeout(&trng->reseed_done, usecs_to_jiffies(wait_time)))
return -ETIMEDOUT;
break;
default:
return -EINVAL;
}
return 0;
}
static int starfive_trng_init(struct hwrng *rng)
{
struct starfive_trng *trng = to_trng(rng);
u32 mode, intr = 0;
/* setup Auto Request/Age register */
writel(autoage, trng->base + STARFIVE_AUTO_AGE);
writel(autoreq, trng->base + STARFIVE_AUTO_RQSTS);
/* clear register: ISTAT */
starfive_trng_irq_mask_clear(trng);
intr |= STARFIVE_IE_ALL;
writel(intr, trng->base + STARFIVE_IE);
mode = readl(trng->base + STARFIVE_MODE);
switch (trng->mode) {
case PRNG_128BIT:
mode &= ~STARFIVE_MODE_R256;
break;
case PRNG_256BIT:
mode |= STARFIVE_MODE_R256;
break;
default:
mode |= STARFIVE_MODE_R256;
break;
}
writel(mode, trng->base + STARFIVE_MODE);
return starfive_trng_cmd(trng, STARFIVE_CTRL_EXEC_RANDRESEED, 1);
}
static irqreturn_t starfive_trng_irq(int irq, void *priv)
{
u32 status;
struct starfive_trng *trng = (struct starfive_trng *)priv;
status = readl(trng->base + STARFIVE_ISTAT);
if (status & STARFIVE_ISTAT_RAND_RDY) {
writel(STARFIVE_ISTAT_RAND_RDY, trng->base + STARFIVE_ISTAT);
complete(&trng->random_done);
}
if (status & STARFIVE_ISTAT_SEED_DONE) {
writel(STARFIVE_ISTAT_SEED_DONE, trng->base + STARFIVE_ISTAT);
complete(&trng->reseed_done);
}
if (status & STARFIVE_ISTAT_LFSR_LOCKUP) {
writel(STARFIVE_ISTAT_LFSR_LOCKUP, trng->base + STARFIVE_ISTAT);
/* SEU occurred, reseeding required*/
spin_lock(&trng->write_lock);
writel(STARFIVE_CTRL_EXEC_RANDRESEED, trng->base + STARFIVE_CTRL);
spin_unlock(&trng->write_lock);
}
return IRQ_HANDLED;
}
static void starfive_trng_cleanup(struct hwrng *rng)
{
struct starfive_trng *trng = to_trng(rng);
writel(0, trng->base + STARFIVE_CTRL);
reset_control_assert(trng->rst);
clk_disable_unprepare(trng->hclk);
clk_disable_unprepare(trng->ahb);
}
static int starfive_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
{
struct starfive_trng *trng = to_trng(rng);
int ret;
pm_runtime_get_sync(trng->dev);
if (trng->mode == PRNG_256BIT)
max = min_t(size_t, max, (STARFIVE_RAND_LEN * 8));
else
max = min_t(size_t, max, (STARFIVE_RAND_LEN * 4));
if (wait) {
ret = starfive_trng_wait_idle(trng);
if (ret)
return -ETIMEDOUT;
}
ret = starfive_trng_cmd(trng, STARFIVE_CTRL_GENE_RANDNUM, wait);
if (ret)
return ret;
memcpy_fromio(buf, trng->base + STARFIVE_RAND0, max);
pm_runtime_put_sync_autosuspend(trng->dev);
return max;
}
static int starfive_trng_probe(struct platform_device *pdev)
{
int ret;
int irq;
struct starfive_trng *trng;
trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
if (!trng)
return -ENOMEM;
platform_set_drvdata(pdev, trng);
trng->dev = &pdev->dev;
trng->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(trng->base))
return dev_err_probe(&pdev->dev, PTR_ERR(trng->base),
"Error remapping memory for platform device.\n");
irq = platform_get_irq(pdev, 0);
if (irq < 0)
return irq;
init_completion(&trng->random_done);
init_completion(&trng->reseed_done);
spin_lock_init(&trng->write_lock);
ret = devm_request_irq(&pdev->dev, irq, starfive_trng_irq, 0, pdev->name,
(void *)trng);
if (ret)
return dev_err_probe(&pdev->dev, irq,
"Failed to register interrupt handler\n");
trng->hclk = devm_clk_get(&pdev->dev, "hclk");
if (IS_ERR(trng->hclk))
return dev_err_probe(&pdev->dev, PTR_ERR(trng->hclk),
"Error getting hardware reference clock\n");
trng->ahb = devm_clk_get(&pdev->dev, "ahb");
if (IS_ERR(trng->ahb))
return dev_err_probe(&pdev->dev, PTR_ERR(trng->ahb),
"Error getting ahb reference clock\n");
trng->rst = devm_reset_control_get_shared(&pdev->dev, NULL);
if (IS_ERR(trng->rst))
return dev_err_probe(&pdev->dev, PTR_ERR(trng->rst),
"Error getting hardware reset line\n");
clk_prepare_enable(trng->hclk);
clk_prepare_enable(trng->ahb);
reset_control_deassert(trng->rst);
trng->rng.name = dev_driver_string(&pdev->dev);
trng->rng.init = starfive_trng_init;
trng->rng.cleanup = starfive_trng_cleanup;
trng->rng.read = starfive_trng_read;
trng->mode = PRNG_256BIT;
trng->mission = 1;
trng->reseed = RANDOM_RESEED;
pm_runtime_use_autosuspend(&pdev->dev);
pm_runtime_set_autosuspend_delay(&pdev->dev, 100);
pm_runtime_enable(&pdev->dev);
ret = devm_hwrng_register(&pdev->dev, &trng->rng);
if (ret) {
pm_runtime_disable(&pdev->dev);
reset_control_assert(trng->rst);
clk_disable_unprepare(trng->ahb);
clk_disable_unprepare(trng->hclk);
return dev_err_probe(&pdev->dev, ret, "Failed to register hwrng\n");
}
return 0;
}
static int __maybe_unused starfive_trng_suspend(struct device *dev)
{
struct starfive_trng *trng = dev_get_drvdata(dev);
clk_disable_unprepare(trng->hclk);
clk_disable_unprepare(trng->ahb);
return 0;
}
static int __maybe_unused starfive_trng_resume(struct device *dev)
{
struct starfive_trng *trng = dev_get_drvdata(dev);
clk_prepare_enable(trng->hclk);
clk_prepare_enable(trng->ahb);
return 0;
}
static DEFINE_SIMPLE_DEV_PM_OPS(starfive_trng_pm_ops, starfive_trng_suspend,
starfive_trng_resume);
static const struct of_device_id trng_dt_ids[] __maybe_unused = {
{ .compatible = "starfive,jh7110-trng" },
{ }
};
MODULE_DEVICE_TABLE(of, trng_dt_ids);
static struct platform_driver starfive_trng_driver = {
.probe = starfive_trng_probe,
.driver = {
.name = "jh7110-trng",
.pm = &starfive_trng_pm_ops,
.of_match_table = of_match_ptr(trng_dt_ids),
},
};
module_platform_driver(starfive_trng_driver);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("StarFive True Random Number Generator");

View File

@ -390,16 +390,6 @@ if CRYPTO_DEV_NX
source "drivers/crypto/nx/Kconfig"
endif
config CRYPTO_DEV_UX500
tristate "Driver for ST-Ericsson UX500 crypto hardware acceleration"
depends on ARCH_U8500
help
Driver for ST-Ericsson UX500 crypto engine.
if CRYPTO_DEV_UX500
source "drivers/crypto/ux500/Kconfig"
endif # if CRYPTO_DEV_UX500
config CRYPTO_DEV_ATMEL_AUTHENC
bool "Support for Atmel IPSEC/SSL hw accelerator"
depends on ARCH_AT91 || COMPILE_TEST

View File

@ -43,7 +43,6 @@ obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/
obj-y += stm32/
obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/

View File

@ -118,6 +118,7 @@ static const struct ce_variant ce_d1_variant = {
{ "bus", 0, 200000000 },
{ "mod", 300000000, 0 },
{ "ram", 0, 400000000 },
{ "trng", 0, 0 },
},
.esr = ESR_D1,
.prng = CE_ALG_PRNG,

View File

@ -105,7 +105,7 @@
#define MAX_SG 8
#define CE_MAX_CLOCKS 3
#define CE_MAX_CLOCKS 4
#define MAXFLOW 4

View File

@ -452,7 +452,7 @@ int sun8i_ss_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
}
kfree_sensitive(op->key);
op->keylen = keylen;
op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA);
op->key = kmemdup(key, keylen, GFP_KERNEL);
if (!op->key)
return -ENOMEM;
@ -475,7 +475,7 @@ int sun8i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
kfree_sensitive(op->key);
op->keylen = keylen;
op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA);
op->key = kmemdup(key, keylen, GFP_KERNEL);
if (!op->key)
return -ENOMEM;

View File

@ -16,6 +16,7 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
@ -527,7 +528,7 @@ static int allocate_flows(struct sun8i_ss_dev *ss)
init_completion(&ss->flows[i].complete);
ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
GFP_KERNEL | GFP_DMA);
GFP_KERNEL);
if (!ss->flows[i].biv) {
err = -ENOMEM;
goto error_engine;
@ -535,7 +536,7 @@ static int allocate_flows(struct sun8i_ss_dev *ss)
for (j = 0; j < MAX_SG; j++) {
ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
GFP_KERNEL | GFP_DMA);
GFP_KERNEL);
if (!ss->flows[i].iv[j]) {
err = -ENOMEM;
goto error_engine;
@ -544,13 +545,15 @@ static int allocate_flows(struct sun8i_ss_dev *ss)
/* the padding could be up to two block. */
ss->flows[i].pad = devm_kmalloc(ss->dev, MAX_PAD_SIZE,
GFP_KERNEL | GFP_DMA);
GFP_KERNEL);
if (!ss->flows[i].pad) {
err = -ENOMEM;
goto error_engine;
}
ss->flows[i].result = devm_kmalloc(ss->dev, SHA256_DIGEST_SIZE,
GFP_KERNEL | GFP_DMA);
ss->flows[i].result =
devm_kmalloc(ss->dev, max(SHA256_DIGEST_SIZE,
dma_get_cache_alignment()),
GFP_KERNEL);
if (!ss->flows[i].result) {
err = -ENOMEM;
goto error_engine;

View File

@ -79,10 +79,10 @@ int sun8i_ss_hmac_setkey(struct crypto_ahash *ahash, const u8 *key,
memcpy(tfmctx->key, key, keylen);
}
tfmctx->ipad = kzalloc(bs, GFP_KERNEL | GFP_DMA);
tfmctx->ipad = kzalloc(bs, GFP_KERNEL);
if (!tfmctx->ipad)
return -ENOMEM;
tfmctx->opad = kzalloc(bs, GFP_KERNEL | GFP_DMA);
tfmctx->opad = kzalloc(bs, GFP_KERNEL);
if (!tfmctx->opad) {
ret = -ENOMEM;
goto err_opad;

View File

@ -11,6 +11,8 @@
*/
#include "sun8i-ss.h"
#include <linux/dma-mapping.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/pm_runtime.h>
#include <crypto/internal/rng.h>
@ -25,7 +27,7 @@ int sun8i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed,
ctx->seed = NULL;
}
if (!ctx->seed)
ctx->seed = kmalloc(slen, GFP_KERNEL | GFP_DMA);
ctx->seed = kmalloc(slen, GFP_KERNEL);
if (!ctx->seed)
return -ENOMEM;
@ -58,6 +60,7 @@ int sun8i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
struct sun8i_ss_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm);
struct rng_alg *alg = crypto_rng_alg(tfm);
struct sun8i_ss_alg_template *algt;
unsigned int todo_with_padding;
struct sun8i_ss_dev *ss;
dma_addr_t dma_iv, dma_dst;
unsigned int todo;
@ -81,7 +84,11 @@ int sun8i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
todo = dlen + PRNG_SEED_SIZE + PRNG_DATA_SIZE;
todo -= todo % PRNG_DATA_SIZE;
d = kzalloc(todo, GFP_KERNEL | GFP_DMA);
todo_with_padding = ALIGN(todo, dma_get_cache_alignment());
if (todo_with_padding < todo || todo < dlen)
return -EOVERFLOW;
d = kzalloc(todo_with_padding, GFP_KERNEL);
if (!d)
return -ENOMEM;

View File

@ -522,7 +522,6 @@ static void crypto4xx_cipher_done(struct crypto4xx_device *dev,
{
struct skcipher_request *req;
struct scatterlist *dst;
dma_addr_t addr;
req = skcipher_request_cast(pd_uinfo->async_req);
@ -531,8 +530,8 @@ static void crypto4xx_cipher_done(struct crypto4xx_device *dev,
req->cryptlen, req->dst);
} else {
dst = pd_uinfo->dest_va;
addr = dma_map_page(dev->core_dev->device, sg_page(dst),
dst->offset, dst->length, DMA_FROM_DEVICE);
dma_unmap_page(dev->core_dev->device, pd->dest, dst->length,
DMA_FROM_DEVICE);
}
if (pd_uinfo->sa_va->sa_command_0.bf.save_iv == SA_SAVE_IV) {
@ -557,10 +556,9 @@ static void crypto4xx_ahash_done(struct crypto4xx_device *dev,
struct ahash_request *ahash_req;
ahash_req = ahash_request_cast(pd_uinfo->async_req);
ctx = crypto_tfm_ctx(ahash_req->base.tfm);
ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(ahash_req));
crypto4xx_copy_digest_to_dst(ahash_req->result, pd_uinfo,
crypto_tfm_ctx(ahash_req->base.tfm));
crypto4xx_copy_digest_to_dst(ahash_req->result, pd_uinfo, ctx);
crypto4xx_ret_sg_desc(dev, pd_uinfo);
if (pd_uinfo->state & PD_ENTRY_BUSY)

View File

@ -46,3 +46,14 @@ config CRYPTO_DEV_ASPEED_HACE_CRYPTO
crypto driver.
Supports AES/DES symmetric-key encryption and decryption
with ECB/CBC/CFB/OFB/CTR options.
config CRYPTO_DEV_ASPEED_ACRY
bool "Enable Aspeed ACRY RSA Engine"
depends on CRYPTO_DEV_ASPEED
select CRYPTO_ENGINE
select CRYPTO_RSA
help
Select here to enable Aspeed ECC/RSA Engine (ACRY)
RSA driver.
Supports 256 bits to 4096 bits RSA encryption/decryption
and signature/verification.

View File

@ -5,3 +5,7 @@ obj-$(CONFIG_CRYPTO_DEV_ASPEED) += aspeed_crypto.o
aspeed_crypto-objs := aspeed-hace.o \
$(hace-hash-y) \
$(hace-crypto-y)
aspeed_acry-$(CONFIG_CRYPTO_DEV_ASPEED_ACRY) += aspeed-acry.o
obj-$(CONFIG_CRYPTO_DEV_ASPEED) += $(aspeed_acry-y)

View File

@ -0,0 +1,828 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright 2021 Aspeed Technology Inc.
*/
#include <crypto/akcipher.h>
#include <crypto/algapi.h>
#include <crypto/engine.h>
#include <crypto/internal/akcipher.h>
#include <crypto/internal/rsa.h>
#include <crypto/scatterwalk.h>
#include <linux/clk.h>
#include <linux/platform_device.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/mfd/syscon.h>
#include <linux/interrupt.h>
#include <linux/count_zeros.h>
#include <linux/err.h>
#include <linux/dma-mapping.h>
#include <linux/regmap.h>
#ifdef CONFIG_CRYPTO_DEV_ASPEED_DEBUG
#define ACRY_DBG(d, fmt, ...) \
dev_info((d)->dev, "%s() " fmt, __func__, ##__VA_ARGS__)
#else
#define ACRY_DBG(d, fmt, ...) \
dev_dbg((d)->dev, "%s() " fmt, __func__, ##__VA_ARGS__)
#endif
/*****************************
* *
* ACRY register definitions *
* *
* ***************************/
#define ASPEED_ACRY_TRIGGER 0x000 /* ACRY Engine Control: trigger */
#define ASPEED_ACRY_DMA_CMD 0x048 /* ACRY Engine Control: Command */
#define ASPEED_ACRY_DMA_SRC_BASE 0x04C /* ACRY DRAM base address for DMA */
#define ASPEED_ACRY_DMA_LEN 0x050 /* ACRY Data Length of DMA */
#define ASPEED_ACRY_RSA_KEY_LEN 0x058 /* ACRY RSA Exp/Mod Key Length (Bits) */
#define ASPEED_ACRY_INT_MASK 0x3F8 /* ACRY Interrupt Mask */
#define ASPEED_ACRY_STATUS 0x3FC /* ACRY Interrupt Status */
/* rsa trigger */
#define ACRY_CMD_RSA_TRIGGER BIT(0)
#define ACRY_CMD_DMA_RSA_TRIGGER BIT(1)
/* rsa dma cmd */
#define ACRY_CMD_DMA_SRAM_MODE_RSA (0x3 << 4)
#define ACRY_CMD_DMEM_AHB BIT(8)
#define ACRY_CMD_DMA_SRAM_AHB_ENGINE 0
/* rsa key len */
#define RSA_E_BITS_LEN(x) ((x) << 16)
#define RSA_M_BITS_LEN(x) (x)
/* acry isr */
#define ACRY_RSA_ISR BIT(1)
#define ASPEED_ACRY_BUFF_SIZE 0x1800 /* DMA buffer size */
#define ASPEED_ACRY_SRAM_MAX_LEN 2048 /* ACRY SRAM maximum length (Bytes) */
#define ASPEED_ACRY_RSA_MAX_KEY_LEN 512 /* ACRY RSA maximum key length (Bytes) */
#define CRYPTO_FLAGS_BUSY BIT(1)
#define BYTES_PER_DWORD 4
/*****************************
* *
* AHBC register definitions *
* *
* ***************************/
#define AHBC_REGION_PROT 0x240
#define REGION_ACRYM BIT(23)
#define ast_acry_write(acry, val, offset) \
writel((val), (acry)->regs + (offset))
#define ast_acry_read(acry, offset) \
readl((acry)->regs + (offset))
struct aspeed_acry_dev;
typedef int (*aspeed_acry_fn_t)(struct aspeed_acry_dev *);
struct aspeed_acry_dev {
void __iomem *regs;
struct device *dev;
int irq;
struct clk *clk;
struct regmap *ahbc;
struct akcipher_request *req;
struct tasklet_struct done_task;
aspeed_acry_fn_t resume;
unsigned long flags;
/* ACRY output SRAM buffer */
void __iomem *acry_sram;
/* ACRY input DMA buffer */
void *buf_addr;
dma_addr_t buf_dma_addr;
struct crypto_engine *crypt_engine_rsa;
/* ACRY SRAM memory mapped */
int exp_dw_mapping[ASPEED_ACRY_RSA_MAX_KEY_LEN];
int mod_dw_mapping[ASPEED_ACRY_RSA_MAX_KEY_LEN];
int data_byte_mapping[ASPEED_ACRY_SRAM_MAX_LEN];
};
struct aspeed_acry_ctx {
struct crypto_engine_ctx enginectx;
struct aspeed_acry_dev *acry_dev;
struct rsa_key key;
int enc;
u8 *n;
u8 *e;
u8 *d;
size_t n_sz;
size_t e_sz;
size_t d_sz;
aspeed_acry_fn_t trigger;
struct crypto_akcipher *fallback_tfm;
};
struct aspeed_acry_alg {
struct aspeed_acry_dev *acry_dev;
struct akcipher_alg akcipher;
};
enum aspeed_rsa_key_mode {
ASPEED_RSA_EXP_MODE = 0,
ASPEED_RSA_MOD_MODE,
ASPEED_RSA_DATA_MODE,
};
static inline struct akcipher_request *
akcipher_request_cast(struct crypto_async_request *req)
{
return container_of(req, struct akcipher_request, base);
}
static int aspeed_acry_do_fallback(struct akcipher_request *req)
{
struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req);
struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher);
int err;
akcipher_request_set_tfm(req, ctx->fallback_tfm);
if (ctx->enc)
err = crypto_akcipher_encrypt(req);
else
err = crypto_akcipher_decrypt(req);
akcipher_request_set_tfm(req, cipher);
return err;
}
static bool aspeed_acry_need_fallback(struct akcipher_request *req)
{
struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req);
struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher);
return ctx->key.n_sz > ASPEED_ACRY_RSA_MAX_KEY_LEN;
}
static int aspeed_acry_handle_queue(struct aspeed_acry_dev *acry_dev,
struct akcipher_request *req)
{
if (aspeed_acry_need_fallback(req)) {
ACRY_DBG(acry_dev, "SW fallback\n");
return aspeed_acry_do_fallback(req);
}
return crypto_transfer_akcipher_request_to_engine(acry_dev->crypt_engine_rsa, req);
}
static int aspeed_acry_do_request(struct crypto_engine *engine, void *areq)
{
struct akcipher_request *req = akcipher_request_cast(areq);
struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req);
struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher);
struct aspeed_acry_dev *acry_dev = ctx->acry_dev;
acry_dev->req = req;
acry_dev->flags |= CRYPTO_FLAGS_BUSY;
return ctx->trigger(acry_dev);
}
static int aspeed_acry_complete(struct aspeed_acry_dev *acry_dev, int err)
{
struct akcipher_request *req = acry_dev->req;
acry_dev->flags &= ~CRYPTO_FLAGS_BUSY;
crypto_finalize_akcipher_request(acry_dev->crypt_engine_rsa, req, err);
return err;
}
/*
* Copy Data to DMA buffer for engine used.
*/
static void aspeed_acry_rsa_sg_copy_to_buffer(struct aspeed_acry_dev *acry_dev,
u8 *buf, struct scatterlist *src,
size_t nbytes)
{
static u8 dram_buffer[ASPEED_ACRY_SRAM_MAX_LEN];
int i = 0, j;
int data_idx;
ACRY_DBG(acry_dev, "\n");
scatterwalk_map_and_copy(dram_buffer, src, 0, nbytes, 0);
for (j = nbytes - 1; j >= 0; j--) {
data_idx = acry_dev->data_byte_mapping[i];
buf[data_idx] = dram_buffer[j];
i++;
}
for (; i < ASPEED_ACRY_SRAM_MAX_LEN; i++) {
data_idx = acry_dev->data_byte_mapping[i];
buf[data_idx] = 0;
}
}
/*
* Copy Exp/Mod to DMA buffer for engine used.
*
* Params:
* - mode 0 : Exponential
* - mode 1 : Modulus
*
* Example:
* - DRAM memory layout:
* D[0], D[4], D[8], D[12]
* - ACRY SRAM memory layout should reverse the order of source data:
* D[12], D[8], D[4], D[0]
*/
static int aspeed_acry_rsa_ctx_copy(struct aspeed_acry_dev *acry_dev, void *buf,
const void *xbuf, size_t nbytes,
enum aspeed_rsa_key_mode mode)
{
const u8 *src = xbuf;
__le32 *dw_buf = buf;
int nbits, ndw;
int i, j, idx;
u32 data = 0;
ACRY_DBG(acry_dev, "nbytes:%zu, mode:%d\n", nbytes, mode);
if (nbytes > ASPEED_ACRY_RSA_MAX_KEY_LEN)
return -ENOMEM;
/* Remove the leading zeros */
while (nbytes > 0 && src[0] == 0) {
src++;
nbytes--;
}
nbits = nbytes * 8;
if (nbytes > 0)
nbits -= count_leading_zeros(src[0]) - (BITS_PER_LONG - 8);
/* double-world alignment */
ndw = DIV_ROUND_UP(nbytes, BYTES_PER_DWORD);
if (nbytes > 0) {
i = BYTES_PER_DWORD - nbytes % BYTES_PER_DWORD;
i %= BYTES_PER_DWORD;
for (j = ndw; j > 0; j--) {
for (; i < BYTES_PER_DWORD; i++) {
data <<= 8;
data |= *src++;
}
i = 0;
if (mode == ASPEED_RSA_EXP_MODE)
idx = acry_dev->exp_dw_mapping[j - 1];
else if (mode == ASPEED_RSA_MOD_MODE)
idx = acry_dev->mod_dw_mapping[j - 1];
dw_buf[idx] = cpu_to_le32(data);
}
}
return nbits;
}
static int aspeed_acry_rsa_transfer(struct aspeed_acry_dev *acry_dev)
{
struct akcipher_request *req = acry_dev->req;
u8 __iomem *sram_buffer = acry_dev->acry_sram;
struct scatterlist *out_sg = req->dst;
static u8 dram_buffer[ASPEED_ACRY_SRAM_MAX_LEN];
int leading_zero = 1;
int result_nbytes;
int i = 0, j;
int data_idx;
/* Set Data Memory to AHB(CPU) Access Mode */
ast_acry_write(acry_dev, ACRY_CMD_DMEM_AHB, ASPEED_ACRY_DMA_CMD);
/* Disable ACRY SRAM protection */
regmap_update_bits(acry_dev->ahbc, AHBC_REGION_PROT,
REGION_ACRYM, 0);
result_nbytes = ASPEED_ACRY_SRAM_MAX_LEN;
for (j = ASPEED_ACRY_SRAM_MAX_LEN - 1; j >= 0; j--) {
data_idx = acry_dev->data_byte_mapping[j];
if (readb(sram_buffer + data_idx) == 0 && leading_zero) {
result_nbytes--;
} else {
leading_zero = 0;
dram_buffer[i] = readb(sram_buffer + data_idx);
i++;
}
}
ACRY_DBG(acry_dev, "result_nbytes:%d, req->dst_len:%d\n",
result_nbytes, req->dst_len);
if (result_nbytes <= req->dst_len) {
scatterwalk_map_and_copy(dram_buffer, out_sg, 0, result_nbytes,
1);
req->dst_len = result_nbytes;
} else {
dev_err(acry_dev->dev, "RSA engine error!\n");
}
memzero_explicit(acry_dev->buf_addr, ASPEED_ACRY_BUFF_SIZE);
return aspeed_acry_complete(acry_dev, 0);
}
static int aspeed_acry_rsa_trigger(struct aspeed_acry_dev *acry_dev)
{
struct akcipher_request *req = acry_dev->req;
struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req);
struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher);
int ne, nm;
if (!ctx->n || !ctx->n_sz) {
dev_err(acry_dev->dev, "%s: key n is not set\n", __func__);
return -EINVAL;
}
memzero_explicit(acry_dev->buf_addr, ASPEED_ACRY_BUFF_SIZE);
/* Copy source data to DMA buffer */
aspeed_acry_rsa_sg_copy_to_buffer(acry_dev, acry_dev->buf_addr,
req->src, req->src_len);
nm = aspeed_acry_rsa_ctx_copy(acry_dev, acry_dev->buf_addr, ctx->n,
ctx->n_sz, ASPEED_RSA_MOD_MODE);
if (ctx->enc) {
if (!ctx->e || !ctx->e_sz) {
dev_err(acry_dev->dev, "%s: key e is not set\n",
__func__);
return -EINVAL;
}
/* Copy key e to DMA buffer */
ne = aspeed_acry_rsa_ctx_copy(acry_dev, acry_dev->buf_addr,
ctx->e, ctx->e_sz,
ASPEED_RSA_EXP_MODE);
} else {
if (!ctx->d || !ctx->d_sz) {
dev_err(acry_dev->dev, "%s: key d is not set\n",
__func__);
return -EINVAL;
}
/* Copy key d to DMA buffer */
ne = aspeed_acry_rsa_ctx_copy(acry_dev, acry_dev->buf_addr,
ctx->key.d, ctx->key.d_sz,
ASPEED_RSA_EXP_MODE);
}
ast_acry_write(acry_dev, acry_dev->buf_dma_addr,
ASPEED_ACRY_DMA_SRC_BASE);
ast_acry_write(acry_dev, (ne << 16) + nm,
ASPEED_ACRY_RSA_KEY_LEN);
ast_acry_write(acry_dev, ASPEED_ACRY_BUFF_SIZE,
ASPEED_ACRY_DMA_LEN);
acry_dev->resume = aspeed_acry_rsa_transfer;
/* Enable ACRY SRAM protection */
regmap_update_bits(acry_dev->ahbc, AHBC_REGION_PROT,
REGION_ACRYM, REGION_ACRYM);
ast_acry_write(acry_dev, ACRY_RSA_ISR, ASPEED_ACRY_INT_MASK);
ast_acry_write(acry_dev, ACRY_CMD_DMA_SRAM_MODE_RSA |
ACRY_CMD_DMA_SRAM_AHB_ENGINE, ASPEED_ACRY_DMA_CMD);
/* Trigger RSA engines */
ast_acry_write(acry_dev, ACRY_CMD_RSA_TRIGGER |
ACRY_CMD_DMA_RSA_TRIGGER, ASPEED_ACRY_TRIGGER);
return 0;
}
static int aspeed_acry_rsa_enc(struct akcipher_request *req)
{
struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req);
struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher);
struct aspeed_acry_dev *acry_dev = ctx->acry_dev;
ctx->trigger = aspeed_acry_rsa_trigger;
ctx->enc = 1;
return aspeed_acry_handle_queue(acry_dev, req);
}
static int aspeed_acry_rsa_dec(struct akcipher_request *req)
{
struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req);
struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher);
struct aspeed_acry_dev *acry_dev = ctx->acry_dev;
ctx->trigger = aspeed_acry_rsa_trigger;
ctx->enc = 0;
return aspeed_acry_handle_queue(acry_dev, req);
}
static u8 *aspeed_rsa_key_copy(u8 *src, size_t len)
{
return kmemdup(src, len, GFP_KERNEL);
}
static int aspeed_rsa_set_n(struct aspeed_acry_ctx *ctx, u8 *value,
size_t len)
{
ctx->n_sz = len;
ctx->n = aspeed_rsa_key_copy(value, len);
if (!ctx->n)
return -ENOMEM;
return 0;
}
static int aspeed_rsa_set_e(struct aspeed_acry_ctx *ctx, u8 *value,
size_t len)
{
ctx->e_sz = len;
ctx->e = aspeed_rsa_key_copy(value, len);
if (!ctx->e)
return -ENOMEM;
return 0;
}
static int aspeed_rsa_set_d(struct aspeed_acry_ctx *ctx, u8 *value,
size_t len)
{
ctx->d_sz = len;
ctx->d = aspeed_rsa_key_copy(value, len);
if (!ctx->d)
return -ENOMEM;
return 0;
}
static void aspeed_rsa_key_free(struct aspeed_acry_ctx *ctx)
{
kfree_sensitive(ctx->n);
kfree_sensitive(ctx->e);
kfree_sensitive(ctx->d);
ctx->n_sz = 0;
ctx->e_sz = 0;
ctx->d_sz = 0;
}
static int aspeed_acry_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
unsigned int keylen, int priv)
{
struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm);
struct aspeed_acry_dev *acry_dev = ctx->acry_dev;
int ret;
if (priv)
ret = rsa_parse_priv_key(&ctx->key, key, keylen);
else
ret = rsa_parse_pub_key(&ctx->key, key, keylen);
if (ret) {
dev_err(acry_dev->dev, "rsa parse key failed, ret:0x%x\n",
ret);
return ret;
}
/* Aspeed engine supports up to 4096 bits,
* Use software fallback instead.
*/
if (ctx->key.n_sz > ASPEED_ACRY_RSA_MAX_KEY_LEN)
return 0;
ret = aspeed_rsa_set_n(ctx, (u8 *)ctx->key.n, ctx->key.n_sz);
if (ret)
goto err;
ret = aspeed_rsa_set_e(ctx, (u8 *)ctx->key.e, ctx->key.e_sz);
if (ret)
goto err;
if (priv) {
ret = aspeed_rsa_set_d(ctx, (u8 *)ctx->key.d, ctx->key.d_sz);
if (ret)
goto err;
}
return 0;
err:
dev_err(acry_dev->dev, "rsa set key failed\n");
aspeed_rsa_key_free(ctx);
return ret;
}
static int aspeed_acry_rsa_set_pub_key(struct crypto_akcipher *tfm,
const void *key,
unsigned int keylen)
{
struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm);
int ret;
ret = crypto_akcipher_set_pub_key(ctx->fallback_tfm, key, keylen);
if (ret)
return ret;
return aspeed_acry_rsa_setkey(tfm, key, keylen, 0);
}
static int aspeed_acry_rsa_set_priv_key(struct crypto_akcipher *tfm,
const void *key,
unsigned int keylen)
{
struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm);
int ret;
ret = crypto_akcipher_set_priv_key(ctx->fallback_tfm, key, keylen);
if (ret)
return ret;
return aspeed_acry_rsa_setkey(tfm, key, keylen, 1);
}
static unsigned int aspeed_acry_rsa_max_size(struct crypto_akcipher *tfm)
{
struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm);
if (ctx->key.n_sz > ASPEED_ACRY_RSA_MAX_KEY_LEN)
return crypto_akcipher_maxsize(ctx->fallback_tfm);
return ctx->n_sz;
}
static int aspeed_acry_rsa_init_tfm(struct crypto_akcipher *tfm)
{
struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm);
struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
const char *name = crypto_tfm_alg_name(&tfm->base);
struct aspeed_acry_alg *acry_alg;
acry_alg = container_of(alg, struct aspeed_acry_alg, akcipher);
ctx->acry_dev = acry_alg->acry_dev;
ctx->fallback_tfm = crypto_alloc_akcipher(name, 0, CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(ctx->fallback_tfm)) {
dev_err(ctx->acry_dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
name, PTR_ERR(ctx->fallback_tfm));
return PTR_ERR(ctx->fallback_tfm);
}
ctx->enginectx.op.do_one_request = aspeed_acry_do_request;
ctx->enginectx.op.prepare_request = NULL;
ctx->enginectx.op.unprepare_request = NULL;
return 0;
}
static void aspeed_acry_rsa_exit_tfm(struct crypto_akcipher *tfm)
{
struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm);
crypto_free_akcipher(ctx->fallback_tfm);
}
static struct aspeed_acry_alg aspeed_acry_akcipher_algs[] = {
{
.akcipher = {
.encrypt = aspeed_acry_rsa_enc,
.decrypt = aspeed_acry_rsa_dec,
.sign = aspeed_acry_rsa_dec,
.verify = aspeed_acry_rsa_enc,
.set_pub_key = aspeed_acry_rsa_set_pub_key,
.set_priv_key = aspeed_acry_rsa_set_priv_key,
.max_size = aspeed_acry_rsa_max_size,
.init = aspeed_acry_rsa_init_tfm,
.exit = aspeed_acry_rsa_exit_tfm,
.base = {
.cra_name = "rsa",
.cra_driver_name = "aspeed-rsa",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_AKCIPHER |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY |
CRYPTO_ALG_NEED_FALLBACK,
.cra_module = THIS_MODULE,
.cra_ctxsize = sizeof(struct aspeed_acry_ctx),
},
},
},
};
static void aspeed_acry_register(struct aspeed_acry_dev *acry_dev)
{
int i, rc;
for (i = 0; i < ARRAY_SIZE(aspeed_acry_akcipher_algs); i++) {
aspeed_acry_akcipher_algs[i].acry_dev = acry_dev;
rc = crypto_register_akcipher(&aspeed_acry_akcipher_algs[i].akcipher);
if (rc) {
ACRY_DBG(acry_dev, "Failed to register %s\n",
aspeed_acry_akcipher_algs[i].akcipher.base.cra_name);
}
}
}
static void aspeed_acry_unregister(struct aspeed_acry_dev *acry_dev)
{
int i;
for (i = 0; i < ARRAY_SIZE(aspeed_acry_akcipher_algs); i++)
crypto_unregister_akcipher(&aspeed_acry_akcipher_algs[i].akcipher);
}
/* ACRY interrupt service routine. */
static irqreturn_t aspeed_acry_irq(int irq, void *dev)
{
struct aspeed_acry_dev *acry_dev = (struct aspeed_acry_dev *)dev;
u32 sts;
sts = ast_acry_read(acry_dev, ASPEED_ACRY_STATUS);
ast_acry_write(acry_dev, sts, ASPEED_ACRY_STATUS);
ACRY_DBG(acry_dev, "irq sts:0x%x\n", sts);
if (sts & ACRY_RSA_ISR) {
/* Stop RSA engine */
ast_acry_write(acry_dev, 0, ASPEED_ACRY_TRIGGER);
if (acry_dev->flags & CRYPTO_FLAGS_BUSY)
tasklet_schedule(&acry_dev->done_task);
else
dev_err(acry_dev->dev, "RSA no active requests.\n");
}
return IRQ_HANDLED;
}
/*
* ACRY SRAM has its own memory layout.
* Set the DRAM to SRAM indexing for future used.
*/
static void aspeed_acry_sram_mapping(struct aspeed_acry_dev *acry_dev)
{
int i, j = 0;
for (i = 0; i < (ASPEED_ACRY_SRAM_MAX_LEN / BYTES_PER_DWORD); i++) {
acry_dev->exp_dw_mapping[i] = j;
acry_dev->mod_dw_mapping[i] = j + 4;
acry_dev->data_byte_mapping[(i * 4)] = (j + 8) * 4;
acry_dev->data_byte_mapping[(i * 4) + 1] = (j + 8) * 4 + 1;
acry_dev->data_byte_mapping[(i * 4) + 2] = (j + 8) * 4 + 2;
acry_dev->data_byte_mapping[(i * 4) + 3] = (j + 8) * 4 + 3;
j++;
j = j % 4 ? j : j + 8;
}
}
static void aspeed_acry_done_task(unsigned long data)
{
struct aspeed_acry_dev *acry_dev = (struct aspeed_acry_dev *)data;
(void)acry_dev->resume(acry_dev);
}
static const struct of_device_id aspeed_acry_of_matches[] = {
{ .compatible = "aspeed,ast2600-acry", },
{},
};
static int aspeed_acry_probe(struct platform_device *pdev)
{
struct aspeed_acry_dev *acry_dev;
struct device *dev = &pdev->dev;
struct resource *res;
int rc;
acry_dev = devm_kzalloc(dev, sizeof(struct aspeed_acry_dev),
GFP_KERNEL);
if (!acry_dev)
return -ENOMEM;
acry_dev->dev = dev;
platform_set_drvdata(pdev, acry_dev);
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
acry_dev->regs = devm_ioremap_resource(dev, res);
if (IS_ERR(acry_dev->regs))
return PTR_ERR(acry_dev->regs);
res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
acry_dev->acry_sram = devm_ioremap_resource(dev, res);
if (IS_ERR(acry_dev->acry_sram))
return PTR_ERR(acry_dev->acry_sram);
/* Get irq number and register it */
acry_dev->irq = platform_get_irq(pdev, 0);
if (acry_dev->irq < 0)
return -ENXIO;
rc = devm_request_irq(dev, acry_dev->irq, aspeed_acry_irq, 0,
dev_name(dev), acry_dev);
if (rc) {
dev_err(dev, "Failed to request irq.\n");
return rc;
}
acry_dev->clk = devm_clk_get_enabled(dev, NULL);
if (IS_ERR(acry_dev->clk)) {
dev_err(dev, "Failed to get acry clk\n");
return PTR_ERR(acry_dev->clk);
}
acry_dev->ahbc = syscon_regmap_lookup_by_phandle(dev->of_node,
"aspeed,ahbc");
if (IS_ERR(acry_dev->ahbc)) {
dev_err(dev, "Failed to get AHBC regmap\n");
return -ENODEV;
}
/* Initialize crypto hardware engine structure for RSA */
acry_dev->crypt_engine_rsa = crypto_engine_alloc_init(dev, true);
if (!acry_dev->crypt_engine_rsa) {
rc = -ENOMEM;
goto clk_exit;
}
rc = crypto_engine_start(acry_dev->crypt_engine_rsa);
if (rc)
goto err_engine_rsa_start;
tasklet_init(&acry_dev->done_task, aspeed_acry_done_task,
(unsigned long)acry_dev);
/* Set Data Memory to AHB(CPU) Access Mode */
ast_acry_write(acry_dev, ACRY_CMD_DMEM_AHB, ASPEED_ACRY_DMA_CMD);
/* Initialize ACRY SRAM index */
aspeed_acry_sram_mapping(acry_dev);
acry_dev->buf_addr = dmam_alloc_coherent(dev, ASPEED_ACRY_BUFF_SIZE,
&acry_dev->buf_dma_addr,
GFP_KERNEL);
memzero_explicit(acry_dev->buf_addr, ASPEED_ACRY_BUFF_SIZE);
aspeed_acry_register(acry_dev);
dev_info(dev, "Aspeed ACRY Accelerator successfully registered\n");
return 0;
err_engine_rsa_start:
crypto_engine_exit(acry_dev->crypt_engine_rsa);
clk_exit:
clk_disable_unprepare(acry_dev->clk);
return rc;
}
static int aspeed_acry_remove(struct platform_device *pdev)
{
struct aspeed_acry_dev *acry_dev = platform_get_drvdata(pdev);
aspeed_acry_unregister(acry_dev);
crypto_engine_exit(acry_dev->crypt_engine_rsa);
tasklet_kill(&acry_dev->done_task);
clk_disable_unprepare(acry_dev->clk);
return 0;
}
MODULE_DEVICE_TABLE(of, aspeed_acry_of_matches);
static struct platform_driver aspeed_acry_driver = {
.probe = aspeed_acry_probe,
.remove = aspeed_acry_remove,
.driver = {
.name = KBUILD_MODNAME,
.of_match_table = aspeed_acry_of_matches,
},
};
module_platform_driver(aspeed_acry_driver);
MODULE_AUTHOR("Neal Liu <neal_liu@aspeedtech.com>");
MODULE_DESCRIPTION("ASPEED ACRY driver for hardware RSA Engine");
MODULE_LICENSE("GPL");

View File

@ -99,7 +99,6 @@ static int aspeed_hace_probe(struct platform_device *pdev)
const struct of_device_id *hace_dev_id;
struct aspeed_engine_hash *hash_engine;
struct aspeed_hace_dev *hace_dev;
struct resource *res;
int rc;
hace_dev = devm_kzalloc(&pdev->dev, sizeof(struct aspeed_hace_dev),
@ -118,11 +117,9 @@ static int aspeed_hace_probe(struct platform_device *pdev)
hash_engine = &hace_dev->hash_engine;
crypto_engine = &hace_dev->crypto_engine;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
platform_set_drvdata(pdev, hace_dev);
hace_dev->regs = devm_ioremap_resource(&pdev->dev, res);
hace_dev->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
if (IS_ERR(hace_dev->regs))
return PTR_ERR(hace_dev->regs);

View File

@ -183,7 +183,7 @@ struct aspeed_sham_ctx {
struct aspeed_hace_dev *hace_dev;
unsigned long flags; /* hmac flag */
struct aspeed_sha_hmac_ctx base[0];
struct aspeed_sha_hmac_ctx base[];
};
struct aspeed_sham_reqctx {

View File

@ -554,7 +554,7 @@ static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
}
if (dd->is_async)
dd->areq->complete(dd->areq, err);
crypto_request_complete(dd->areq, err);
tasklet_schedule(&dd->queue_task);
@ -955,7 +955,7 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd,
return ret;
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
crypto_request_complete(backlog, -EINPROGRESS);
ctx = crypto_tfm_ctx(areq->tfm);
@ -1879,7 +1879,7 @@ static int atmel_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err;
err = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen);
err = xts_verify_key(tfm, key, keylen);
if (err)
return err;
@ -2510,6 +2510,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
/* keep only major version number */
switch (dd->hw_version & 0xff0) {
case 0x700:
case 0x600:
case 0x500:
dd->caps.has_dualbuff = 1;
dd->caps.has_cfb64 = 1;

View File

@ -313,11 +313,10 @@ static struct kpp_alg atmel_ecdh_nist_p256 = {
static int atmel_ecc_probe(struct i2c_client *client)
{
const struct i2c_device_id *id = i2c_client_get_device_id(client);
struct atmel_i2c_client_priv *i2c_priv;
int ret;
ret = atmel_i2c_probe(client, id);
ret = atmel_i2c_probe(client);
if (ret)
return ret;

View File

@ -59,7 +59,7 @@ void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd)
* Read the word from Configuration zone that contains the lock bytes
* (UserExtra, Selector, LockValue, LockConfig).
*/
cmd->param1 = CONFIG_ZONE;
cmd->param1 = CONFIGURATION_ZONE;
cmd->param2 = cpu_to_le16(DEVICE_LOCK_ADDR);
cmd->count = READ_COUNT;
@ -324,7 +324,7 @@ free_cmd:
return ret;
}
int atmel_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id)
int atmel_i2c_probe(struct i2c_client *client)
{
struct atmel_i2c_client_priv *i2c_priv;
struct device *dev = &client->dev;

View File

@ -63,7 +63,7 @@ struct atmel_i2c_cmd {
#define STATUS_WAKE_SUCCESSFUL 0x11
/* Definitions for eeprom organization */
#define CONFIG_ZONE 0
#define CONFIGURATION_ZONE 0
/* Definitions for Indexes common to all commands */
#define RSP_DATA_IDX 1 /* buffer index of data in response */
@ -167,7 +167,7 @@ struct atmel_i2c_work_data {
struct atmel_i2c_cmd cmd;
};
int atmel_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id);
int atmel_i2c_probe(struct i2c_client *client);
void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data,
void (*cbk)(struct atmel_i2c_work_data *work_data,

View File

@ -292,7 +292,7 @@ static inline int atmel_sha_complete(struct atmel_sha_dev *dd, int err)
clk_disable(dd->iclk);
if ((dd->is_async || dd->force_complete) && req->base.complete)
req->base.complete(&req->base, err);
ahash_request_complete(req, err);
/* handle new request */
tasklet_schedule(&dd->queue_task);
@ -1080,7 +1080,7 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
return ret;
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
crypto_request_complete(backlog, -EINPROGRESS);
ctx = crypto_tfm_ctx(async_req->tfm);
@ -2099,10 +2099,9 @@ struct atmel_sha_authenc_reqctx {
unsigned int digestlen;
};
static void atmel_sha_authenc_complete(struct crypto_async_request *areq,
int err)
static void atmel_sha_authenc_complete(void *data, int err)
{
struct ahash_request *req = areq->data;
struct ahash_request *req = data;
struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req);
authctx->cb(authctx->aes_dev, err, authctx->base.dd->is_async);
@ -2509,6 +2508,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd)
/* keep only major version number */
switch (dd->hw_version & 0xff0) {
case 0x700:
case 0x600:
case 0x510:
dd->caps.has_dma = 1;
dd->caps.has_dualbuff = 1;

View File

@ -93,11 +93,10 @@ static int atmel_sha204a_rng_read(struct hwrng *rng, void *data, size_t max,
static int atmel_sha204a_probe(struct i2c_client *client)
{
const struct i2c_device_id *id = i2c_client_get_device_id(client);
struct atmel_i2c_client_priv *i2c_priv;
int ret;
ret = atmel_i2c_probe(client, id);
ret = atmel_i2c_probe(client);
if (ret)
return ret;

View File

@ -590,7 +590,7 @@ static void atmel_tdes_finish_req(struct atmel_tdes_dev *dd, int err)
if (!err && (rctx->mode & TDES_FLAGS_OPMODE_MASK) != TDES_FLAGS_ECB)
atmel_tdes_set_iv_as_last_ciphertext_block(dd);
req->base.complete(&req->base, err);
skcipher_request_complete(req, err);
}
static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd,
@ -619,7 +619,7 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd,
return ret;
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
crypto_request_complete(backlog, -EINPROGRESS);
req = skcipher_request_cast(async_req);

View File

@ -1621,7 +1621,7 @@ artpec6_crypto_xts_set_key(struct crypto_skcipher *cipher, const u8 *key,
crypto_skcipher_ctx(cipher);
int ret;
ret = xts_check_key(&cipher->base, key, keylen);
ret = xts_verify_key(cipher, key, keylen);
if (ret)
return ret;
@ -2143,13 +2143,13 @@ static void artpec6_crypto_task(unsigned long data)
list_for_each_entry_safe(req, n, &complete_in_progress,
complete_in_progress) {
req->req->complete(req->req, -EINPROGRESS);
crypto_request_complete(req->req, -EINPROGRESS);
}
}
static void artpec6_crypto_complete_crypto(struct crypto_async_request *req)
{
req->complete(req, 0);
crypto_request_complete(req, 0);
}
static void
@ -2161,7 +2161,7 @@ artpec6_crypto_complete_cbc_decrypt(struct crypto_async_request *req)
scatterwalk_map_and_copy(cipher_req->iv, cipher_req->src,
cipher_req->cryptlen - AES_BLOCK_SIZE,
AES_BLOCK_SIZE, 0);
req->complete(req, 0);
skcipher_request_complete(cipher_req, 0);
}
static void
@ -2173,7 +2173,7 @@ artpec6_crypto_complete_cbc_encrypt(struct crypto_async_request *req)
scatterwalk_map_and_copy(cipher_req->iv, cipher_req->dst,
cipher_req->cryptlen - AES_BLOCK_SIZE,
AES_BLOCK_SIZE, 0);
req->complete(req, 0);
skcipher_request_complete(cipher_req, 0);
}
static void artpec6_crypto_complete_aead(struct crypto_async_request *req)
@ -2211,12 +2211,12 @@ static void artpec6_crypto_complete_aead(struct crypto_async_request *req)
}
}
req->complete(req, result);
aead_request_complete(areq, result);
}
static void artpec6_crypto_complete_hash(struct crypto_async_request *req)
{
req->complete(req, 0);
crypto_request_complete(req, 0);
}

View File

@ -1614,7 +1614,7 @@ static void finish_req(struct iproc_reqctx_s *rctx, int err)
spu_chunk_cleanup(rctx);
if (areq)
areq->complete(areq, err);
crypto_request_complete(areq, err);
}
/**
@ -2570,66 +2570,29 @@ static int aead_need_fallback(struct aead_request *req)
return payload_len > ctx->max_payload;
}
static void aead_complete(struct crypto_async_request *areq, int err)
{
struct aead_request *req =
container_of(areq, struct aead_request, base);
struct iproc_reqctx_s *rctx = aead_request_ctx(req);
struct crypto_aead *aead = crypto_aead_reqtfm(req);
flow_log("%s() err:%d\n", __func__, err);
areq->tfm = crypto_aead_tfm(aead);
areq->complete = rctx->old_complete;
areq->data = rctx->old_data;
areq->complete(areq, err);
}
static int aead_do_fallback(struct aead_request *req, bool is_encrypt)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct crypto_tfm *tfm = crypto_aead_tfm(aead);
struct iproc_reqctx_s *rctx = aead_request_ctx(req);
struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm);
int err;
u32 req_flags;
struct aead_request *subreq;
flow_log("%s() enc:%u\n", __func__, is_encrypt);
if (ctx->fallback_cipher) {
/* Store the cipher tfm and then use the fallback tfm */
rctx->old_tfm = tfm;
aead_request_set_tfm(req, ctx->fallback_cipher);
/*
* Save the callback and chain ourselves in, so we can restore
* the tfm
*/
rctx->old_complete = req->base.complete;
rctx->old_data = req->base.data;
req_flags = aead_request_flags(req);
aead_request_set_callback(req, req_flags, aead_complete, req);
err = is_encrypt ? crypto_aead_encrypt(req) :
crypto_aead_decrypt(req);
if (!ctx->fallback_cipher)
return -EINVAL;
if (err == 0) {
/*
* fallback was synchronous (did not return
* -EINPROGRESS). So restore request state here.
*/
aead_request_set_callback(req, req_flags,
rctx->old_complete, req);
req->base.data = rctx->old_data;
aead_request_set_tfm(req, aead);
flow_log("%s() fallback completed successfully\n\n",
__func__);
}
} else {
err = -EINVAL;
}
subreq = &rctx->req;
aead_request_set_tfm(subreq, ctx->fallback_cipher);
aead_request_set_callback(subreq, aead_request_flags(req),
req->base.complete, req->base.data);
aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
req->iv);
aead_request_set_ad(subreq, req->assoclen);
return err;
return is_encrypt ? crypto_aead_encrypt(req) :
crypto_aead_decrypt(req);
}
static int aead_enqueue(struct aead_request *req, bool is_encrypt)
@ -4243,6 +4206,7 @@ static int ahash_cra_init(struct crypto_tfm *tfm)
static int aead_cra_init(struct crypto_aead *aead)
{
unsigned int reqsize = sizeof(struct iproc_reqctx_s);
struct crypto_tfm *tfm = crypto_aead_tfm(aead);
struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm);
struct crypto_alg *alg = tfm->__crt_alg;
@ -4254,7 +4218,6 @@ static int aead_cra_init(struct crypto_aead *aead)
flow_log("%s()\n", __func__);
crypto_aead_set_reqsize(aead, sizeof(struct iproc_reqctx_s));
ctx->is_esp = false;
ctx->salt_len = 0;
ctx->salt_offset = 0;
@ -4263,22 +4226,29 @@ static int aead_cra_init(struct crypto_aead *aead)
get_random_bytes(ctx->iv, MAX_IV_SIZE);
flow_dump(" iv: ", ctx->iv, MAX_IV_SIZE);
if (!err) {
if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
flow_log("%s() creating fallback cipher\n", __func__);
if (err)
goto out;
ctx->fallback_cipher =
crypto_alloc_aead(alg->cra_name, 0,
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(ctx->fallback_cipher)) {
pr_err("%s() Error: failed to allocate fallback for %s\n",
__func__, alg->cra_name);
return PTR_ERR(ctx->fallback_cipher);
}
}
if (!(alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK))
goto reqsize;
flow_log("%s() creating fallback cipher\n", __func__);
ctx->fallback_cipher = crypto_alloc_aead(alg->cra_name, 0,
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(ctx->fallback_cipher)) {
pr_err("%s() Error: failed to allocate fallback for %s\n",
__func__, alg->cra_name);
return PTR_ERR(ctx->fallback_cipher);
}
reqsize += crypto_aead_reqsize(ctx->fallback_cipher);
reqsize:
crypto_aead_set_reqsize(aead, reqsize);
out:
return err;
}

View File

@ -339,15 +339,12 @@ struct iproc_reqctx_s {
/* hmac context */
bool is_sw_hmac;
/* aead context */
struct crypto_tfm *old_tfm;
crypto_completion_t old_complete;
void *old_data;
gfp_t gfp;
/* Buffers used to build SPU request and response messages */
struct spu_msg_buf msg_buf;
struct aead_request req;
};
/*

View File

@ -83,7 +83,7 @@ int caam_process_blob(struct caam_blob_priv *priv,
output_len = info->input_len - CAAM_BLOB_OVERHEAD;
}
desc = kzalloc(CAAM_BLOB_DESC_BYTES_MAX, GFP_KERNEL | GFP_DMA);
desc = kzalloc(CAAM_BLOB_DESC_BYTES_MAX, GFP_KERNEL);
if (!desc)
return -ENOMEM;

View File

@ -59,6 +59,8 @@
#include <crypto/engine.h>
#include <crypto/xts.h>
#include <asm/unaligned.h>
#include <linux/dma-mapping.h>
#include <linux/kernel.h>
/*
* crypto alg
@ -1379,8 +1381,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
/* allocate space for base edesc and hw desc commands, link tables */
edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes,
GFP_DMA | flags);
edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes, flags);
if (!edesc) {
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
0, 0, 0);
@ -1608,6 +1609,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
u8 *iv;
int ivsize = crypto_skcipher_ivsize(skcipher);
int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes;
unsigned int aligned_size;
src_nents = sg_nents_for_len(req->src, req->cryptlen);
if (unlikely(src_nents < 0)) {
@ -1681,15 +1683,18 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
/*
* allocate space for base edesc and hw desc commands, link tables, IV
*/
edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes + ivsize,
GFP_DMA | flags);
if (!edesc) {
aligned_size = ALIGN(ivsize, __alignof__(*edesc));
aligned_size += sizeof(*edesc) + desc_bytes + sec4_sg_bytes;
aligned_size = ALIGN(aligned_size, dma_get_cache_alignment());
iv = kzalloc(aligned_size, flags);
if (!iv) {
dev_err(jrdev, "could not allocate extended descriptor\n");
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
0, 0, 0);
return ERR_PTR(-ENOMEM);
}
edesc = (void *)(iv + ALIGN(ivsize, __alignof__(*edesc)));
edesc->src_nents = src_nents;
edesc->dst_nents = dst_nents;
edesc->mapped_src_nents = mapped_src_nents;
@ -1701,7 +1706,6 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
/* Make sure IV is located in a DMAable area */
if (ivsize) {
iv = (u8 *)edesc->sec4_sg + sec4_sg_bytes;
memcpy(iv, req->iv, ivsize);
iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_BIDIRECTIONAL);

View File

@ -20,6 +20,8 @@
#include "caamalg_desc.h"
#include <crypto/xts.h>
#include <asm/unaligned.h>
#include <linux/dma-mapping.h>
#include <linux/kernel.h>
/*
* crypto alg
@ -959,7 +961,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
return (struct aead_edesc *)drv_ctx;
/* allocate space for base edesc and hw desc commands, link tables */
edesc = qi_cache_alloc(GFP_DMA | flags);
edesc = qi_cache_alloc(flags);
if (unlikely(!edesc)) {
dev_err(qidev, "could not allocate extended descriptor\n");
return ERR_PTR(-ENOMEM);
@ -1317,8 +1319,9 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
qm_sg_ents = 1 + pad_sg_nents(qm_sg_ents);
qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
if (unlikely(offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes +
ivsize > CAAM_QI_MEMCACHE_SIZE)) {
if (unlikely(ALIGN(ivsize, __alignof__(*edesc)) +
offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes >
CAAM_QI_MEMCACHE_SIZE)) {
dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
qm_sg_ents, ivsize);
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
@ -1327,17 +1330,18 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
}
/* allocate space for base edesc, link tables and IV */
edesc = qi_cache_alloc(GFP_DMA | flags);
if (unlikely(!edesc)) {
iv = qi_cache_alloc(flags);
if (unlikely(!iv)) {
dev_err(qidev, "could not allocate extended descriptor\n");
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
0, DMA_NONE, 0, 0);
return ERR_PTR(-ENOMEM);
}
edesc = (void *)(iv + ALIGN(ivsize, __alignof__(*edesc)));
/* Make sure IV is located in a DMAable area */
sg_table = &edesc->sgt[0];
iv = (u8 *)(sg_table + qm_sg_ents);
memcpy(iv, req->iv, ivsize);
iv_dma = dma_map_single(qidev, iv, ivsize, DMA_BIDIRECTIONAL);

View File

@ -16,7 +16,9 @@
#include "caamalg_desc.h"
#include "caamhash_desc.h"
#include "dpseci-debugfs.h"
#include <linux/dma-mapping.h>
#include <linux/fsl/mc.h>
#include <linux/kernel.h>
#include <soc/fsl/dpaa2-io.h>
#include <soc/fsl/dpaa2-fd.h>
#include <crypto/xts.h>
@ -370,7 +372,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
struct dpaa2_sg_entry *sg_table;
/* allocate space for base edesc, link tables and IV */
edesc = qi_cache_zalloc(GFP_DMA | flags);
edesc = qi_cache_zalloc(flags);
if (unlikely(!edesc)) {
dev_err(dev, "could not allocate extended descriptor\n");
return ERR_PTR(-ENOMEM);
@ -1189,7 +1191,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req)
}
/* allocate space for base edesc, link tables and IV */
edesc = qi_cache_zalloc(GFP_DMA | flags);
edesc = qi_cache_zalloc(flags);
if (unlikely(!edesc)) {
dev_err(dev, "could not allocate extended descriptor\n");
caam_unmap(dev, req->src, req->dst, src_nents, dst_nents, 0,
@ -3220,14 +3222,14 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key,
int ret = -ENOMEM;
struct dpaa2_fl_entry *in_fle, *out_fle;
req_ctx = kzalloc(sizeof(*req_ctx), GFP_KERNEL | GFP_DMA);
req_ctx = kzalloc(sizeof(*req_ctx), GFP_KERNEL);
if (!req_ctx)
return -ENOMEM;
in_fle = &req_ctx->fd_flt[1];
out_fle = &req_ctx->fd_flt[0];
flc = kzalloc(sizeof(*flc), GFP_KERNEL | GFP_DMA);
flc = kzalloc(sizeof(*flc), GFP_KERNEL);
if (!flc)
goto err_flc;
@ -3316,7 +3318,13 @@ static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key,
dev_dbg(ctx->dev, "keylen %d blocksize %d\n", keylen, blocksize);
if (keylen > blocksize) {
hashed_key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA);
unsigned int aligned_len =
ALIGN(keylen, dma_get_cache_alignment());
if (aligned_len < keylen)
return -EOVERFLOW;
hashed_key = kmemdup(key, aligned_len, GFP_KERNEL);
if (!hashed_key)
return -ENOMEM;
ret = hash_digest_key(ctx, &keylen, hashed_key, digestsize);
@ -3411,7 +3419,7 @@ static void ahash_done(void *cbk_ctx, u32 status)
DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
ctx->ctx_len, 1);
req->base.complete(&req->base, ecode);
ahash_request_complete(req, ecode);
}
static void ahash_done_bi(void *cbk_ctx, u32 status)
@ -3449,7 +3457,7 @@ static void ahash_done_bi(void *cbk_ctx, u32 status)
DUMP_PREFIX_ADDRESS, 16, 4, req->result,
crypto_ahash_digestsize(ahash), 1);
req->base.complete(&req->base, ecode);
ahash_request_complete(req, ecode);
}
static void ahash_done_ctx_src(void *cbk_ctx, u32 status)
@ -3476,7 +3484,7 @@ static void ahash_done_ctx_src(void *cbk_ctx, u32 status)
DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
ctx->ctx_len, 1);
req->base.complete(&req->base, ecode);
ahash_request_complete(req, ecode);
}
static void ahash_done_ctx_dst(void *cbk_ctx, u32 status)
@ -3514,7 +3522,7 @@ static void ahash_done_ctx_dst(void *cbk_ctx, u32 status)
DUMP_PREFIX_ADDRESS, 16, 4, req->result,
crypto_ahash_digestsize(ahash), 1);
req->base.complete(&req->base, ecode);
ahash_request_complete(req, ecode);
}
static int ahash_update_ctx(struct ahash_request *req)
@ -3560,7 +3568,7 @@ static int ahash_update_ctx(struct ahash_request *req)
}
/* allocate space for base edesc and link tables */
edesc = qi_cache_zalloc(GFP_DMA | flags);
edesc = qi_cache_zalloc(flags);
if (!edesc) {
dma_unmap_sg(ctx->dev, req->src, src_nents,
DMA_TO_DEVICE);
@ -3654,7 +3662,7 @@ static int ahash_final_ctx(struct ahash_request *req)
int ret;
/* allocate space for base edesc and link tables */
edesc = qi_cache_zalloc(GFP_DMA | flags);
edesc = qi_cache_zalloc(flags);
if (!edesc)
return -ENOMEM;
@ -3743,7 +3751,7 @@ static int ahash_finup_ctx(struct ahash_request *req)
}
/* allocate space for base edesc and link tables */
edesc = qi_cache_zalloc(GFP_DMA | flags);
edesc = qi_cache_zalloc(flags);
if (!edesc) {
dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE);
return -ENOMEM;
@ -3836,7 +3844,7 @@ static int ahash_digest(struct ahash_request *req)
}
/* allocate space for base edesc and link tables */
edesc = qi_cache_zalloc(GFP_DMA | flags);
edesc = qi_cache_zalloc(flags);
if (!edesc) {
dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE);
return ret;
@ -3913,7 +3921,7 @@ static int ahash_final_no_ctx(struct ahash_request *req)
int ret = -ENOMEM;
/* allocate space for base edesc and link tables */
edesc = qi_cache_zalloc(GFP_DMA | flags);
edesc = qi_cache_zalloc(flags);
if (!edesc)
return ret;
@ -4012,7 +4020,7 @@ static int ahash_update_no_ctx(struct ahash_request *req)
}
/* allocate space for base edesc and link tables */
edesc = qi_cache_zalloc(GFP_DMA | flags);
edesc = qi_cache_zalloc(flags);
if (!edesc) {
dma_unmap_sg(ctx->dev, req->src, src_nents,
DMA_TO_DEVICE);
@ -4125,7 +4133,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
}
/* allocate space for base edesc and link tables */
edesc = qi_cache_zalloc(GFP_DMA | flags);
edesc = qi_cache_zalloc(flags);
if (!edesc) {
dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE);
return ret;
@ -4230,7 +4238,7 @@ static int ahash_update_first(struct ahash_request *req)
}
/* allocate space for base edesc and link tables */
edesc = qi_cache_zalloc(GFP_DMA | flags);
edesc = qi_cache_zalloc(flags);
if (!edesc) {
dma_unmap_sg(ctx->dev, req->src, src_nents,
DMA_TO_DEVICE);
@ -4926,6 +4934,7 @@ static int dpaa2_dpseci_congestion_setup(struct dpaa2_caam_priv *priv,
{
struct dpseci_congestion_notification_cfg cong_notif_cfg = { 0 };
struct device *dev = priv->dev;
unsigned int alignmask;
int err;
/*
@ -4936,13 +4945,14 @@ static int dpaa2_dpseci_congestion_setup(struct dpaa2_caam_priv *priv,
!(priv->dpseci_attr.options & DPSECI_OPT_HAS_CG))
return 0;
priv->cscn_mem = kzalloc(DPAA2_CSCN_SIZE + DPAA2_CSCN_ALIGN,
GFP_KERNEL | GFP_DMA);
alignmask = DPAA2_CSCN_ALIGN - 1;
alignmask |= dma_get_cache_alignment() - 1;
priv->cscn_mem = kzalloc(ALIGN(DPAA2_CSCN_SIZE, alignmask + 1),
GFP_KERNEL);
if (!priv->cscn_mem)
return -ENOMEM;
priv->cscn_mem_aligned = PTR_ALIGN(priv->cscn_mem, DPAA2_CSCN_ALIGN);
priv->cscn_dma = dma_map_single(dev, priv->cscn_mem_aligned,
priv->cscn_dma = dma_map_single(dev, priv->cscn_mem,
DPAA2_CSCN_SIZE, DMA_FROM_DEVICE);
if (dma_mapping_error(dev, priv->cscn_dma)) {
dev_err(dev, "Error mapping CSCN memory area\n");
@ -5174,7 +5184,7 @@ static int dpaa2_caam_probe(struct fsl_mc_device *dpseci_dev)
priv->domain = iommu_get_domain_for_dev(dev);
qi_cache = kmem_cache_create("dpaa2_caamqicache", CAAM_QI_MEMCACHE_SIZE,
0, SLAB_CACHE_DMA, NULL);
0, 0, NULL);
if (!qi_cache) {
dev_err(dev, "Can't allocate SEC cache\n");
return -ENOMEM;
@ -5451,7 +5461,7 @@ int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req)
dma_sync_single_for_cpu(priv->dev, priv->cscn_dma,
DPAA2_CSCN_SIZE,
DMA_FROM_DEVICE);
if (unlikely(dpaa2_cscn_state_congested(priv->cscn_mem_aligned))) {
if (unlikely(dpaa2_cscn_state_congested(priv->cscn_mem))) {
dev_dbg_ratelimited(dev, "Dropping request\n");
return -EBUSY;
}

View File

@ -7,13 +7,14 @@
#ifndef _CAAMALG_QI2_H_
#define _CAAMALG_QI2_H_
#include <crypto/internal/skcipher.h>
#include <linux/compiler_attributes.h>
#include <soc/fsl/dpaa2-io.h>
#include <soc/fsl/dpaa2-fd.h>
#include <linux/threads.h>
#include <linux/netdevice.h>
#include "dpseci.h"
#include "desc_constr.h"
#include <crypto/skcipher.h>
#define DPAA2_CAAM_STORE_SIZE 16
/* NAPI weight *must* be a multiple of the store size. */
@ -36,8 +37,6 @@
* @tx_queue_attr: array of Tx queue attributes
* @cscn_mem: pointer to memory region containing the congestion SCN
* it's size is larger than to accommodate alignment
* @cscn_mem_aligned: pointer to congestion SCN; it is computed as
* PTR_ALIGN(cscn_mem, DPAA2_CSCN_ALIGN)
* @cscn_dma: dma address used by the QMAN to write CSCN messages
* @dev: device associated with the DPSECI object
* @mc_io: pointer to MC portal's I/O object
@ -58,7 +57,6 @@ struct dpaa2_caam_priv {
/* congestion */
void *cscn_mem;
void *cscn_mem_aligned;
dma_addr_t cscn_dma;
struct device *dev;
@ -158,7 +156,7 @@ struct ahash_edesc {
struct caam_flc {
u32 flc[16];
u32 sh_desc[MAX_SDLEN];
} ____cacheline_aligned;
} __aligned(CRYPTO_DMA_ALIGN);
enum optype {
ENCRYPT = 0,
@ -180,7 +178,7 @@ enum optype {
* @edesc: extended descriptor; points to one of {skcipher,aead}_edesc
*/
struct caam_request {
struct dpaa2_fl_entry fd_flt[2];
struct dpaa2_fl_entry fd_flt[2] __aligned(CRYPTO_DMA_ALIGN);
dma_addr_t fd_flt_dma;
struct caam_flc *flc;
dma_addr_t flc_dma;

View File

@ -66,6 +66,8 @@
#include "key_gen.h"
#include "caamhash_desc.h"
#include <crypto/engine.h>
#include <linux/dma-mapping.h>
#include <linux/kernel.h>
#define CAAM_CRA_PRIORITY 3000
@ -365,7 +367,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key,
dma_addr_t key_dma;
int ret;
desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL);
if (!desc) {
dev_err(jrdev, "unable to allocate key input memory\n");
return -ENOMEM;
@ -432,7 +434,13 @@ static int ahash_setkey(struct crypto_ahash *ahash,
dev_dbg(jrdev, "keylen %d\n", keylen);
if (keylen > blocksize) {
hashed_key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA);
unsigned int aligned_len =
ALIGN(keylen, dma_get_cache_alignment());
if (aligned_len < keylen)
return -EOVERFLOW;
hashed_key = kmemdup(key, keylen, GFP_KERNEL);
if (!hashed_key)
return -ENOMEM;
ret = hash_digest_key(ctx, &keylen, hashed_key, digestsize);
@ -606,7 +614,7 @@ static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err,
* by CAAM, not crypto engine.
*/
if (!has_bklog)
req->base.complete(&req->base, ecode);
ahash_request_complete(req, ecode);
else
crypto_finalize_hash_request(jrp->engine, req, ecode);
}
@ -668,7 +676,7 @@ static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err,
* by CAAM, not crypto engine.
*/
if (!has_bklog)
req->base.complete(&req->base, ecode);
ahash_request_complete(req, ecode);
else
crypto_finalize_hash_request(jrp->engine, req, ecode);
@ -702,7 +710,7 @@ static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req,
struct ahash_edesc *edesc;
unsigned int sg_size = sg_num * sizeof(struct sec4_sg_entry);
edesc = kzalloc(sizeof(*edesc) + sg_size, GFP_DMA | flags);
edesc = kzalloc(sizeof(*edesc) + sg_size, flags);
if (!edesc) {
dev_err(ctx->jrdev, "could not allocate extended descriptor\n");
return NULL;

View File

@ -16,6 +16,8 @@
#include "desc_constr.h"
#include "sg_sw_sec4.h"
#include "caampkc.h"
#include <linux/dma-mapping.h>
#include <linux/kernel.h>
#define DESC_RSA_PUB_LEN (2 * CAAM_CMD_SZ + SIZEOF_RSA_PUB_PDB)
#define DESC_RSA_PRIV_F1_LEN (2 * CAAM_CMD_SZ + \
@ -310,8 +312,7 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
/* allocate space for base edesc, hw desc commands and link tables */
edesc = kzalloc(sizeof(*edesc) + desclen + sec4_sg_bytes,
GFP_DMA | flags);
edesc = kzalloc(sizeof(*edesc) + desclen + sec4_sg_bytes, flags);
if (!edesc)
goto dst_fail;
@ -898,7 +899,7 @@ static u8 *caam_read_rsa_crt(const u8 *ptr, size_t nbytes, size_t dstlen)
if (!nbytes)
return NULL;
dst = kzalloc(dstlen, GFP_DMA | GFP_KERNEL);
dst = kzalloc(dstlen, GFP_KERNEL);
if (!dst)
return NULL;
@ -910,7 +911,7 @@ static u8 *caam_read_rsa_crt(const u8 *ptr, size_t nbytes, size_t dstlen)
/**
* caam_read_raw_data - Read a raw byte stream as a positive integer.
* The function skips buffer's leading zeros, copies the remained data
* to a buffer allocated in the GFP_DMA | GFP_KERNEL zone and returns
* to a buffer allocated in the GFP_KERNEL zone and returns
* the address of the new buffer.
*
* @buf : The data to read
@ -923,7 +924,7 @@ static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes)
if (!*nbytes)
return NULL;
return kmemdup(buf, *nbytes, GFP_DMA | GFP_KERNEL);
return kmemdup(buf, *nbytes, GFP_KERNEL);
}
static int caam_rsa_check_key_length(unsigned int len)
@ -949,13 +950,13 @@ static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key,
return ret;
/* Copy key in DMA zone */
rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_DMA | GFP_KERNEL);
rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_KERNEL);
if (!rsa_key->e)
goto err;
/*
* Skip leading zeros and copy the positive integer to a buffer
* allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor
* allocated in the GFP_KERNEL zone. The decryption descriptor
* expects a positive integer for the RSA modulus and uses its length as
* decryption output length.
*/
@ -983,6 +984,7 @@ static void caam_rsa_set_priv_key_form(struct caam_rsa_ctx *ctx,
struct caam_rsa_key *rsa_key = &ctx->key;
size_t p_sz = raw_key->p_sz;
size_t q_sz = raw_key->q_sz;
unsigned aligned_size;
rsa_key->p = caam_read_raw_data(raw_key->p, &p_sz);
if (!rsa_key->p)
@ -994,11 +996,13 @@ static void caam_rsa_set_priv_key_form(struct caam_rsa_ctx *ctx,
goto free_p;
rsa_key->q_sz = q_sz;
rsa_key->tmp1 = kzalloc(raw_key->p_sz, GFP_DMA | GFP_KERNEL);
aligned_size = ALIGN(raw_key->p_sz, dma_get_cache_alignment());
rsa_key->tmp1 = kzalloc(aligned_size, GFP_KERNEL);
if (!rsa_key->tmp1)
goto free_q;
rsa_key->tmp2 = kzalloc(raw_key->q_sz, GFP_DMA | GFP_KERNEL);
aligned_size = ALIGN(raw_key->q_sz, dma_get_cache_alignment());
rsa_key->tmp2 = kzalloc(aligned_size, GFP_KERNEL);
if (!rsa_key->tmp2)
goto free_tmp1;
@ -1051,17 +1055,17 @@ static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
return ret;
/* Copy key in DMA zone */
rsa_key->d = kmemdup(raw_key.d, raw_key.d_sz, GFP_DMA | GFP_KERNEL);
rsa_key->d = kmemdup(raw_key.d, raw_key.d_sz, GFP_KERNEL);
if (!rsa_key->d)
goto err;
rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_DMA | GFP_KERNEL);
rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_KERNEL);
if (!rsa_key->e)
goto err;
/*
* Skip leading zeros and copy the positive integer to a buffer
* allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor
* allocated in the GFP_KERNEL zone. The decryption descriptor
* expects a positive integer for the RSA modulus and uses its length as
* decryption output length.
*/
@ -1185,8 +1189,7 @@ int caam_pkc_init(struct device *ctrldev)
return 0;
/* allocate zero buffer, used for padding input */
zero_buffer = kzalloc(CAAM_RSA_MAX_INPUT_SIZE - 1, GFP_DMA |
GFP_KERNEL);
zero_buffer = kzalloc(CAAM_RSA_MAX_INPUT_SIZE - 1, GFP_KERNEL);
if (!zero_buffer)
return -ENOMEM;

View File

@ -8,6 +8,8 @@
#include <linux/completion.h>
#include <crypto/internal/rng.h>
#include <linux/dma-mapping.h>
#include <linux/kernel.h>
#include "compat.h"
#include "regs.h"
#include "intern.h"
@ -75,6 +77,7 @@ static int caam_prng_generate(struct crypto_rng *tfm,
const u8 *src, unsigned int slen,
u8 *dst, unsigned int dlen)
{
unsigned int aligned_dlen = ALIGN(dlen, dma_get_cache_alignment());
struct caam_prng_ctx ctx;
struct device *jrdev;
dma_addr_t dst_dma;
@ -82,7 +85,10 @@ static int caam_prng_generate(struct crypto_rng *tfm,
u8 *buf;
int ret;
buf = kzalloc(dlen, GFP_KERNEL);
if (aligned_dlen < dlen)
return -EOVERFLOW;
buf = kzalloc(aligned_dlen, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@ -94,7 +100,7 @@ static int caam_prng_generate(struct crypto_rng *tfm,
return ret;
}
desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL | GFP_DMA);
desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL);
if (!desc) {
ret = -ENOMEM;
goto out1;
@ -156,7 +162,7 @@ static int caam_prng_seed(struct crypto_rng *tfm,
return ret;
}
desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL | GFP_DMA);
desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL);
if (!desc) {
caam_jr_free(jrdev);
return -ENOMEM;

View File

@ -12,6 +12,8 @@
#include <linux/hw_random.h>
#include <linux/completion.h>
#include <linux/atomic.h>
#include <linux/dma-mapping.h>
#include <linux/kernel.h>
#include <linux/kfifo.h>
#include "compat.h"
@ -176,17 +178,18 @@ static int caam_init(struct hwrng *rng)
int err;
ctx->desc_sync = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN,
GFP_DMA | GFP_KERNEL);
GFP_KERNEL);
if (!ctx->desc_sync)
return -ENOMEM;
ctx->desc_async = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN,
GFP_DMA | GFP_KERNEL);
GFP_KERNEL);
if (!ctx->desc_async)
return -ENOMEM;
if (kfifo_alloc(&ctx->fifo, CAAM_RNG_MAX_FIFO_STORE_SIZE,
GFP_DMA | GFP_KERNEL))
if (kfifo_alloc(&ctx->fifo, ALIGN(CAAM_RNG_MAX_FIFO_STORE_SIZE,
dma_get_cache_alignment()),
GFP_KERNEL))
return -ENOMEM;
INIT_WORK(&ctx->worker, caam_rng_worker);

View File

@ -199,7 +199,7 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask)
u32 *desc, status;
int sh_idx, ret = 0;
desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL | GFP_DMA);
desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL);
if (!desc)
return -ENOMEM;
@ -276,7 +276,7 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
int ret = 0, sh_idx;
ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl;
desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL | GFP_DMA);
desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL);
if (!desc)
return -ENOMEM;

View File

@ -163,7 +163,8 @@ static inline void append_data(u32 * const desc, const void *data, int len)
{
u32 *offset = desc_end(desc);
if (len) /* avoid sparse warning: memcpy with byte count of 0 */
/* Avoid gcc warning: memcpy with data == NULL */
if (!IS_ENABLED(CONFIG_CRYPTO_DEV_FSL_CAAM_DEBUG) || data)
memcpy(offset, data, len);
(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) +

View File

@ -64,7 +64,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out,
if (local_max > max_keylen)
return -EINVAL;
desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL);
if (!desc) {
dev_err(jrdev, "unable to allocate key input memory\n");
return ret;

View File

@ -614,7 +614,7 @@ static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu)
struct qman_fq *fq;
int ret;
fq = kzalloc(sizeof(*fq), GFP_KERNEL | GFP_DMA);
fq = kzalloc(sizeof(*fq), GFP_KERNEL);
if (!fq)
return -ENOMEM;
@ -756,7 +756,7 @@ int caam_qi_init(struct platform_device *caam_pdev)
}
qi_cache = kmem_cache_create("caamqicache", CAAM_QI_MEMCACHE_SIZE, 0,
SLAB_CACHE_DMA, NULL);
0, NULL);
if (!qi_cache) {
dev_err(qidev, "Can't allocate CAAM cache\n");
free_rsp_fqs();

Some files were not shown because too many files have changed in this diff Show More