Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "Algorithms:

   - Add AES-NI/AVX/x86_64 implementation of SM4.

  Drivers:

   - Add Arm SMCCC TRNG based driver"

[ And obviously a lot of random fixes and updates  - Linus]

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (84 commits)
  crypto: sha512 - remove imaginary and mystifying clearing of variables
  crypto: aesni - xts_crypt() return if walk.nbytes is 0
  padata: Remove repeated verbose license text
  crypto: ccp - Add support for new CCP/PSP device ID
  crypto: x86/sm4 - add AES-NI/AVX2/x86_64 implementation
  crypto: x86/sm4 - export reusable AESNI/AVX functions
  crypto: rmd320 - remove rmd320 in Makefile
  crypto: skcipher - in_irq() cleanup
  crypto: hisilicon - check _PS0 and _PR0 method
  crypto: hisilicon - change parameter passing of debugfs function
  crypto: hisilicon - support runtime PM for accelerator device
  crypto: hisilicon - add runtime PM ops
  crypto: hisilicon - using 'debugfs_create_file' instead of 'debugfs_create_regset32'
  crypto: tcrypt - add GCM/CCM mode test for SM4 algorithm
  crypto: testmgr - Add GCM/CCM mode test of SM4 algorithm
  crypto: tcrypt - Fix missing return value check
  crypto: hisilicon/sec - modify the hardware endian configuration
  crypto: hisilicon/sec - fix the abnormal exiting process
  crypto: qat - store vf.compatible flag
  crypto: qat - do not export adf_iov_putmsg()
  ...
This commit is contained in:
Linus Torvalds 2021-08-30 12:57:10 -07:00
commit 44a7d44411
87 changed files with 3696 additions and 840 deletions

View File

@ -112,7 +112,7 @@ static struct kpp_alg curve25519_alg = {
.max_size = curve25519_max_size,
};
static int __init mod_init(void)
static int __init arm_curve25519_init(void)
{
if (elf_hwcap & HWCAP_NEON) {
static_branch_enable(&have_neon);
@ -122,14 +122,14 @@ static int __init mod_init(void)
return 0;
}
static void __exit mod_exit(void)
static void __exit arm_curve25519_exit(void)
{
if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON)
crypto_unregister_kpp(&curve25519_alg);
}
module_init(mod_init);
module_exit(mod_exit);
module_init(arm_curve25519_init);
module_exit(arm_curve25519_exit);
MODULE_ALIAS_CRYPTO("curve25519");
MODULE_ALIAS_CRYPTO("curve25519-neon");

View File

@ -51,7 +51,7 @@ config CRYPTO_SM4_ARM64_CE
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_SM4
select CRYPTO_LIB_SM4
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"

View File

@ -17,12 +17,20 @@ MODULE_LICENSE("GPL v2");
asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
return sm4_expandkey(ctx, key, key_len);
}
static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (!crypto_simd_usable()) {
crypto_sm4_encrypt(tfm, out, in);
sm4_crypt_block(ctx->rkey_enc, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_enc, out, in);
@ -32,10 +40,10 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (!crypto_simd_usable()) {
crypto_sm4_decrypt(tfm, out, in);
sm4_crypt_block(ctx->rkey_dec, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_dec, out, in);
@ -49,12 +57,12 @@ static struct crypto_alg sm4_ce_alg = {
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_sm4_ctx),
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
.cra_u.cipher = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
.cia_setkey = crypto_sm4_set_key,
.cia_setkey = sm4_ce_setkey,
.cia_encrypt = sm4_ce_encrypt,
.cia_decrypt = sm4_ce_decrypt
}

View File

@ -88,6 +88,12 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o
sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $< > $@
$(obj)/%.S: $(src)/%.pl FORCE

View File

@ -849,6 +849,8 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
if (!walk.nbytes)
return err;
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
@ -862,7 +864,10 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
skcipher_request_set_crypt(&subreq, req->src, req->dst,
blocks * AES_BLOCK_SIZE, req->iv);
req = &subreq;
err = skcipher_walk_virt(&walk, req, false);
if (err)
return err;
} else {
tail = 0;
}

View File

@ -0,0 +1,589 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 Cipher Algorithm, AES-NI/AVX optimized.
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi>
* Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at:
* https://github.com/mjosaarinen/sm4ni
*/
#include <linux/linkage.h>
#include <asm/frame.h>
#define rRIP (%rip)
#define RX0 %xmm0
#define RX1 %xmm1
#define MASK_4BIT %xmm2
#define RTMP0 %xmm3
#define RTMP1 %xmm4
#define RTMP2 %xmm5
#define RTMP3 %xmm6
#define RTMP4 %xmm7
#define RA0 %xmm8
#define RA1 %xmm9
#define RA2 %xmm10
#define RA3 %xmm11
#define RB0 %xmm12
#define RB1 %xmm13
#define RB2 %xmm14
#define RB3 %xmm15
#define RNOT %xmm0
#define RBSWAP %xmm1
/* Transpose four 32-bit words between 128-bit vectors. */
#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
vpunpckhdq x1, x0, t2; \
vpunpckldq x1, x0, x0; \
\
vpunpckldq x3, x2, t1; \
vpunpckhdq x3, x2, x2; \
\
vpunpckhqdq t1, x0, x1; \
vpunpcklqdq t1, x0, x0; \
\
vpunpckhqdq x2, t2, x3; \
vpunpcklqdq x2, t2, x2;
/* pre-SubByte transform. */
#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
vpand x, mask4bit, tmp0; \
vpandn x, mask4bit, x; \
vpsrld $4, x, x; \
\
vpshufb tmp0, lo_t, tmp0; \
vpshufb x, hi_t, x; \
vpxor tmp0, x, x;
/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
* 'vaeslastenc' instruction.
*/
#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
vpandn mask4bit, x, tmp0; \
vpsrld $4, x, x; \
vpand x, mask4bit, x; \
\
vpshufb tmp0, lo_t, tmp0; \
vpshufb x, hi_t, x; \
vpxor tmp0, x, x;
.section .rodata.cst164, "aM", @progbits, 164
.align 16
/*
* Following four affine transform look-up tables are from work by
* Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
*
* These allow exposing SM4 S-Box from AES SubByte.
*/
/* pre-SubByte affine transform, from SM4 field to AES field. */
.Lpre_tf_lo_s:
.quad 0x9197E2E474720701, 0xC7C1B4B222245157
.Lpre_tf_hi_s:
.quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
/* post-SubByte affine transform, from AES field to SM4 field. */
.Lpost_tf_lo_s:
.quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
.Lpost_tf_hi_s:
.quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
/* For isolating SubBytes from AESENCLAST, inverse shift row */
.Linv_shift_row:
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
.Linv_shift_row_rol_8:
.byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
.byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
.Linv_shift_row_rol_16:
.byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
.byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
.Linv_shift_row_rol_24:
.byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
.byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
/* For CTR-mode IV byteswap */
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
/* For input word byte-swap */
.Lbswap32_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.align 4
/* 4-bit mask */
.L0f0f0f0f:
.long 0x0f0f0f0f
.text
.align 16
/*
* void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
* const u8 *src, int nblocks)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx_crypt4)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (1..4 blocks)
* %rdx: src (1..4 blocks)
* %rcx: num blocks (1..4)
*/
FRAME_BEGIN
vmovdqu 0*16(%rdx), RA0;
vmovdqa RA0, RA1;
vmovdqa RA0, RA2;
vmovdqa RA0, RA3;
cmpq $2, %rcx;
jb .Lblk4_load_input_done;
vmovdqu 1*16(%rdx), RA1;
je .Lblk4_load_input_done;
vmovdqu 2*16(%rdx), RA2;
cmpq $3, %rcx;
je .Lblk4_load_input_done;
vmovdqu 3*16(%rdx), RA3;
.Lblk4_load_input_done:
vmovdqa .Lbswap32_mask rRIP, RTMP2;
vpshufb RTMP2, RA0, RA0;
vpshufb RTMP2, RA1, RA1;
vpshufb RTMP2, RA2, RA2;
vpshufb RTMP2, RA3, RA3;
vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
vmovdqa .Lpre_tf_lo_s rRIP, RTMP4;
vmovdqa .Lpre_tf_hi_s rRIP, RB0;
vmovdqa .Lpost_tf_lo_s rRIP, RB1;
vmovdqa .Lpost_tf_hi_s rRIP, RB2;
vmovdqa .Linv_shift_row rRIP, RB3;
vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP2;
vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP3;
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
#define ROUND(round, s0, s1, s2, s3) \
vbroadcastss (4*(round))(%rdi), RX0; \
vpxor s1, RX0, RX0; \
vpxor s2, RX0, RX0; \
vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
\
/* sbox, non-linear part */ \
transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \
vaesenclast MASK_4BIT, RX0, RX0; \
transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \
\
/* linear part */ \
vpshufb RB3, RX0, RTMP0; \
vpxor RTMP0, s0, s0; /* s0 ^ x */ \
vpshufb RTMP2, RX0, RTMP1; \
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
vpshufb RTMP3, RX0, RTMP1; \
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
vpshufb .Linv_shift_row_rol_24 rRIP, RX0, RTMP1; \
vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
vpslld $2, RTMP0, RTMP1; \
vpsrld $30, RTMP0, RTMP0; \
vpxor RTMP0, s0, s0; \
/* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
vpxor RTMP1, s0, s0;
leaq (32*4)(%rdi), %rax;
.align 16
.Lroundloop_blk4:
ROUND(0, RA0, RA1, RA2, RA3);
ROUND(1, RA1, RA2, RA3, RA0);
ROUND(2, RA2, RA3, RA0, RA1);
ROUND(3, RA3, RA0, RA1, RA2);
leaq (4*4)(%rdi), %rdi;
cmpq %rax, %rdi;
jne .Lroundloop_blk4;
#undef ROUND
vmovdqa .Lbswap128_mask rRIP, RTMP2;
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
vpshufb RTMP2, RA0, RA0;
vpshufb RTMP2, RA1, RA1;
vpshufb RTMP2, RA2, RA2;
vpshufb RTMP2, RA3, RA3;
vmovdqu RA0, 0*16(%rsi);
cmpq $2, %rcx;
jb .Lblk4_store_output_done;
vmovdqu RA1, 1*16(%rsi);
je .Lblk4_store_output_done;
vmovdqu RA2, 2*16(%rsi);
cmpq $3, %rcx;
je .Lblk4_store_output_done;
vmovdqu RA3, 3*16(%rsi);
.Lblk4_store_output_done:
vzeroall;
FRAME_END
ret;
SYM_FUNC_END(sm4_aesni_avx_crypt4)
.align 8
SYM_FUNC_START_LOCAL(__sm4_crypt_blk8)
/* input:
* %rdi: round key array, CTX
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
* plaintext blocks
* output:
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
* ciphertext blocks
*/
FRAME_BEGIN
vmovdqa .Lbswap32_mask rRIP, RTMP2;
vpshufb RTMP2, RA0, RA0;
vpshufb RTMP2, RA1, RA1;
vpshufb RTMP2, RA2, RA2;
vpshufb RTMP2, RA3, RA3;
vpshufb RTMP2, RB0, RB0;
vpshufb RTMP2, RB1, RB1;
vpshufb RTMP2, RB2, RB2;
vpshufb RTMP2, RB3, RB3;
vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
vbroadcastss (4*(round))(%rdi), RX0; \
vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; \
vmovdqa .Lpre_tf_hi_s rRIP, RTMP1; \
vmovdqa RX0, RX1; \
vpxor s1, RX0, RX0; \
vpxor s2, RX0, RX0; \
vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
vmovdqa .Lpost_tf_lo_s rRIP, RTMP2; \
vmovdqa .Lpost_tf_hi_s rRIP, RTMP3; \
vpxor r1, RX1, RX1; \
vpxor r2, RX1, RX1; \
vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
\
/* sbox, non-linear part */ \
transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
vmovdqa .Linv_shift_row rRIP, RTMP4; \
vaesenclast MASK_4BIT, RX0, RX0; \
vaesenclast MASK_4BIT, RX1, RX1; \
transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
\
/* linear part */ \
vpshufb RTMP4, RX0, RTMP0; \
vpxor RTMP0, s0, s0; /* s0 ^ x */ \
vpshufb RTMP4, RX1, RTMP2; \
vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP4; \
vpxor RTMP2, r0, r0; /* r0 ^ x */ \
vpshufb RTMP4, RX0, RTMP1; \
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
vpshufb RTMP4, RX1, RTMP3; \
vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP4; \
vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
vpshufb RTMP4, RX0, RTMP1; \
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
vpshufb RTMP4, RX1, RTMP3; \
vmovdqa .Linv_shift_row_rol_24 rRIP, RTMP4; \
vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
vpshufb RTMP4, RX0, RTMP1; \
vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
/* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
vpslld $2, RTMP0, RTMP1; \
vpsrld $30, RTMP0, RTMP0; \
vpxor RTMP0, s0, s0; \
vpxor RTMP1, s0, s0; \
vpshufb RTMP4, RX1, RTMP3; \
vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
/* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
vpslld $2, RTMP2, RTMP3; \
vpsrld $30, RTMP2, RTMP2; \
vpxor RTMP2, r0, r0; \
vpxor RTMP3, r0, r0;
leaq (32*4)(%rdi), %rax;
.align 16
.Lroundloop_blk8:
ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
leaq (4*4)(%rdi), %rdi;
cmpq %rax, %rdi;
jne .Lroundloop_blk8;
#undef ROUND
vmovdqa .Lbswap128_mask rRIP, RTMP2;
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
vpshufb RTMP2, RA0, RA0;
vpshufb RTMP2, RA1, RA1;
vpshufb RTMP2, RA2, RA2;
vpshufb RTMP2, RA3, RA3;
vpshufb RTMP2, RB0, RB0;
vpshufb RTMP2, RB1, RB1;
vpshufb RTMP2, RB2, RB2;
vpshufb RTMP2, RB3, RB3;
FRAME_END
ret;
SYM_FUNC_END(__sm4_crypt_blk8)
/*
* void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
* const u8 *src, int nblocks)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx_crypt8)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (1..8 blocks)
* %rdx: src (1..8 blocks)
* %rcx: num blocks (1..8)
*/
FRAME_BEGIN
cmpq $5, %rcx;
jb sm4_aesni_avx_crypt4;
vmovdqu (0 * 16)(%rdx), RA0;
vmovdqu (1 * 16)(%rdx), RA1;
vmovdqu (2 * 16)(%rdx), RA2;
vmovdqu (3 * 16)(%rdx), RA3;
vmovdqu (4 * 16)(%rdx), RB0;
vmovdqa RB0, RB1;
vmovdqa RB0, RB2;
vmovdqa RB0, RB3;
je .Lblk8_load_input_done;
vmovdqu (5 * 16)(%rdx), RB1;
cmpq $7, %rcx;
jb .Lblk8_load_input_done;
vmovdqu (6 * 16)(%rdx), RB2;
je .Lblk8_load_input_done;
vmovdqu (7 * 16)(%rdx), RB3;
.Lblk8_load_input_done:
call __sm4_crypt_blk8;
cmpq $6, %rcx;
vmovdqu RA0, (0 * 16)(%rsi);
vmovdqu RA1, (1 * 16)(%rsi);
vmovdqu RA2, (2 * 16)(%rsi);
vmovdqu RA3, (3 * 16)(%rsi);
vmovdqu RB0, (4 * 16)(%rsi);
jb .Lblk8_store_output_done;
vmovdqu RB1, (5 * 16)(%rsi);
je .Lblk8_store_output_done;
vmovdqu RB2, (6 * 16)(%rsi);
cmpq $7, %rcx;
je .Lblk8_store_output_done;
vmovdqu RB3, (7 * 16)(%rsi);
.Lblk8_store_output_done:
vzeroall;
FRAME_END
ret;
SYM_FUNC_END(sm4_aesni_avx_crypt8)
/*
* void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (8 blocks)
* %rdx: src (8 blocks)
* %rcx: iv (big endian, 128bit)
*/
FRAME_BEGIN
/* load IV and byteswap */
vmovdqu (%rcx), RA0;
vmovdqa .Lbswap128_mask rRIP, RBSWAP;
vpshufb RBSWAP, RA0, RTMP0; /* be => le */
vpcmpeqd RNOT, RNOT, RNOT;
vpsrldq $8, RNOT, RNOT; /* low: -1, high: 0 */
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
vpsubq minus_one, x, x; \
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
/* construct IVs */
inc_le128(RTMP0, RNOT, RTMP2); /* +1 */
vpshufb RBSWAP, RTMP0, RA1;
inc_le128(RTMP0, RNOT, RTMP2); /* +2 */
vpshufb RBSWAP, RTMP0, RA2;
inc_le128(RTMP0, RNOT, RTMP2); /* +3 */
vpshufb RBSWAP, RTMP0, RA3;
inc_le128(RTMP0, RNOT, RTMP2); /* +4 */
vpshufb RBSWAP, RTMP0, RB0;
inc_le128(RTMP0, RNOT, RTMP2); /* +5 */
vpshufb RBSWAP, RTMP0, RB1;
inc_le128(RTMP0, RNOT, RTMP2); /* +6 */
vpshufb RBSWAP, RTMP0, RB2;
inc_le128(RTMP0, RNOT, RTMP2); /* +7 */
vpshufb RBSWAP, RTMP0, RB3;
inc_le128(RTMP0, RNOT, RTMP2); /* +8 */
vpshufb RBSWAP, RTMP0, RTMP1;
/* store new IV */
vmovdqu RTMP1, (%rcx);
call __sm4_crypt_blk8;
vpxor (0 * 16)(%rdx), RA0, RA0;
vpxor (1 * 16)(%rdx), RA1, RA1;
vpxor (2 * 16)(%rdx), RA2, RA2;
vpxor (3 * 16)(%rdx), RA3, RA3;
vpxor (4 * 16)(%rdx), RB0, RB0;
vpxor (5 * 16)(%rdx), RB1, RB1;
vpxor (6 * 16)(%rdx), RB2, RB2;
vpxor (7 * 16)(%rdx), RB3, RB3;
vmovdqu RA0, (0 * 16)(%rsi);
vmovdqu RA1, (1 * 16)(%rsi);
vmovdqu RA2, (2 * 16)(%rsi);
vmovdqu RA3, (3 * 16)(%rsi);
vmovdqu RB0, (4 * 16)(%rsi);
vmovdqu RB1, (5 * 16)(%rsi);
vmovdqu RB2, (6 * 16)(%rsi);
vmovdqu RB3, (7 * 16)(%rsi);
vzeroall;
FRAME_END
ret;
SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8)
/*
* void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (8 blocks)
* %rdx: src (8 blocks)
* %rcx: iv
*/
FRAME_BEGIN
vmovdqu (0 * 16)(%rdx), RA0;
vmovdqu (1 * 16)(%rdx), RA1;
vmovdqu (2 * 16)(%rdx), RA2;
vmovdqu (3 * 16)(%rdx), RA3;
vmovdqu (4 * 16)(%rdx), RB0;
vmovdqu (5 * 16)(%rdx), RB1;
vmovdqu (6 * 16)(%rdx), RB2;
vmovdqu (7 * 16)(%rdx), RB3;
call __sm4_crypt_blk8;
vmovdqu (7 * 16)(%rdx), RNOT;
vpxor (%rcx), RA0, RA0;
vpxor (0 * 16)(%rdx), RA1, RA1;
vpxor (1 * 16)(%rdx), RA2, RA2;
vpxor (2 * 16)(%rdx), RA3, RA3;
vpxor (3 * 16)(%rdx), RB0, RB0;
vpxor (4 * 16)(%rdx), RB1, RB1;
vpxor (5 * 16)(%rdx), RB2, RB2;
vpxor (6 * 16)(%rdx), RB3, RB3;
vmovdqu RNOT, (%rcx); /* store new IV */
vmovdqu RA0, (0 * 16)(%rsi);
vmovdqu RA1, (1 * 16)(%rsi);
vmovdqu RA2, (2 * 16)(%rsi);
vmovdqu RA3, (3 * 16)(%rsi);
vmovdqu RB0, (4 * 16)(%rsi);
vmovdqu RB1, (5 * 16)(%rsi);
vmovdqu RB2, (6 * 16)(%rsi);
vmovdqu RB3, (7 * 16)(%rsi);
vzeroall;
FRAME_END
ret;
SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8)
/*
* void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (8 blocks)
* %rdx: src (8 blocks)
* %rcx: iv
*/
FRAME_BEGIN
/* Load input */
vmovdqu (%rcx), RA0;
vmovdqu 0 * 16(%rdx), RA1;
vmovdqu 1 * 16(%rdx), RA2;
vmovdqu 2 * 16(%rdx), RA3;
vmovdqu 3 * 16(%rdx), RB0;
vmovdqu 4 * 16(%rdx), RB1;
vmovdqu 5 * 16(%rdx), RB2;
vmovdqu 6 * 16(%rdx), RB3;
/* Update IV */
vmovdqu 7 * 16(%rdx), RNOT;
vmovdqu RNOT, (%rcx);
call __sm4_crypt_blk8;
vpxor (0 * 16)(%rdx), RA0, RA0;
vpxor (1 * 16)(%rdx), RA1, RA1;
vpxor (2 * 16)(%rdx), RA2, RA2;
vpxor (3 * 16)(%rdx), RA3, RA3;
vpxor (4 * 16)(%rdx), RB0, RB0;
vpxor (5 * 16)(%rdx), RB1, RB1;
vpxor (6 * 16)(%rdx), RB2, RB2;
vpxor (7 * 16)(%rdx), RB3, RB3;
vmovdqu RA0, (0 * 16)(%rsi);
vmovdqu RA1, (1 * 16)(%rsi);
vmovdqu RA2, (2 * 16)(%rsi);
vmovdqu RA3, (3 * 16)(%rsi);
vmovdqu RB0, (4 * 16)(%rsi);
vmovdqu RB1, (5 * 16)(%rsi);
vmovdqu RB2, (6 * 16)(%rsi);
vmovdqu RB3, (7 * 16)(%rsi);
vzeroall;
FRAME_END
ret;
SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8)

View File

@ -0,0 +1,497 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi>
* Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at:
* https://github.com/mjosaarinen/sm4ni
*/
#include <linux/linkage.h>
#include <asm/frame.h>
#define rRIP (%rip)
/* vector registers */
#define RX0 %ymm0
#define RX1 %ymm1
#define MASK_4BIT %ymm2
#define RTMP0 %ymm3
#define RTMP1 %ymm4
#define RTMP2 %ymm5
#define RTMP3 %ymm6
#define RTMP4 %ymm7
#define RA0 %ymm8
#define RA1 %ymm9
#define RA2 %ymm10
#define RA3 %ymm11
#define RB0 %ymm12
#define RB1 %ymm13
#define RB2 %ymm14
#define RB3 %ymm15
#define RNOT %ymm0
#define RBSWAP %ymm1
#define RX0x %xmm0
#define RX1x %xmm1
#define MASK_4BITx %xmm2
#define RNOTx %xmm0
#define RBSWAPx %xmm1
#define RTMP0x %xmm3
#define RTMP1x %xmm4
#define RTMP2x %xmm5
#define RTMP3x %xmm6
#define RTMP4x %xmm7
/* helper macros */
/* Transpose four 32-bit words between 128-bit vector lanes. */
#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
vpunpckhdq x1, x0, t2; \
vpunpckldq x1, x0, x0; \
\
vpunpckldq x3, x2, t1; \
vpunpckhdq x3, x2, x2; \
\
vpunpckhqdq t1, x0, x1; \
vpunpcklqdq t1, x0, x0; \
\
vpunpckhqdq x2, t2, x3; \
vpunpcklqdq x2, t2, x2;
/* post-SubByte transform. */
#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
vpand x, mask4bit, tmp0; \
vpandn x, mask4bit, x; \
vpsrld $4, x, x; \
\
vpshufb tmp0, lo_t, tmp0; \
vpshufb x, hi_t, x; \
vpxor tmp0, x, x;
/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
* 'vaeslastenc' instruction. */
#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
vpandn mask4bit, x, tmp0; \
vpsrld $4, x, x; \
vpand x, mask4bit, x; \
\
vpshufb tmp0, lo_t, tmp0; \
vpshufb x, hi_t, x; \
vpxor tmp0, x, x;
.section .rodata.cst164, "aM", @progbits, 164
.align 16
/*
* Following four affine transform look-up tables are from work by
* Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
*
* These allow exposing SM4 S-Box from AES SubByte.
*/
/* pre-SubByte affine transform, from SM4 field to AES field. */
.Lpre_tf_lo_s:
.quad 0x9197E2E474720701, 0xC7C1B4B222245157
.Lpre_tf_hi_s:
.quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
/* post-SubByte affine transform, from AES field to SM4 field. */
.Lpost_tf_lo_s:
.quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
.Lpost_tf_hi_s:
.quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
/* For isolating SubBytes from AESENCLAST, inverse shift row */
.Linv_shift_row:
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
.Linv_shift_row_rol_8:
.byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
.byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
.Linv_shift_row_rol_16:
.byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
.byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
.Linv_shift_row_rol_24:
.byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
.byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
/* For CTR-mode IV byteswap */
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
/* For input word byte-swap */
.Lbswap32_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.align 4
/* 4-bit mask */
.L0f0f0f0f:
.long 0x0f0f0f0f
.text
.align 16
.align 8
SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)
/* input:
* %rdi: round key array, CTX
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
* plaintext blocks
* output:
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
* ciphertext blocks
*/
FRAME_BEGIN
vbroadcasti128 .Lbswap32_mask rRIP, RTMP2;
vpshufb RTMP2, RA0, RA0;
vpshufb RTMP2, RA1, RA1;
vpshufb RTMP2, RA2, RA2;
vpshufb RTMP2, RA3, RA3;
vpshufb RTMP2, RB0, RB0;
vpshufb RTMP2, RB1, RB1;
vpshufb RTMP2, RB2, RB2;
vpshufb RTMP2, RB3, RB3;
vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT;
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
vpbroadcastd (4*(round))(%rdi), RX0; \
vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \
vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \
vmovdqa RX0, RX1; \
vpxor s1, RX0, RX0; \
vpxor s2, RX0, RX0; \
vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \
vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \
vpxor r1, RX1, RX1; \
vpxor r2, RX1, RX1; \
vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
\
/* sbox, non-linear part */ \
transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
vextracti128 $1, RX0, RTMP4x; \
vextracti128 $1, RX1, RTMP0x; \
vaesenclast MASK_4BITx, RX0x, RX0x; \
vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \
vaesenclast MASK_4BITx, RX1x, RX1x; \
vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \
vinserti128 $1, RTMP4x, RX0, RX0; \
vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \
vinserti128 $1, RTMP0x, RX1, RX1; \
transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
\
/* linear part */ \
vpshufb RTMP4, RX0, RTMP0; \
vpxor RTMP0, s0, s0; /* s0 ^ x */ \
vpshufb RTMP4, RX1, RTMP2; \
vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \
vpxor RTMP2, r0, r0; /* r0 ^ x */ \
vpshufb RTMP4, RX0, RTMP1; \
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
vpshufb RTMP4, RX1, RTMP3; \
vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \
vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
vpshufb RTMP4, RX0, RTMP1; \
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
vpshufb RTMP4, RX1, RTMP3; \
vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \
vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
vpshufb RTMP4, RX0, RTMP1; \
vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
vpslld $2, RTMP0, RTMP1; \
vpsrld $30, RTMP0, RTMP0; \
vpxor RTMP0, s0, s0; \
/* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
vpxor RTMP1, s0, s0; \
vpshufb RTMP4, RX1, RTMP3; \
vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
vpslld $2, RTMP2, RTMP3; \
vpsrld $30, RTMP2, RTMP2; \
vpxor RTMP2, r0, r0; \
/* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
vpxor RTMP3, r0, r0;
leaq (32*4)(%rdi), %rax;
.align 16
.Lroundloop_blk8:
ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
leaq (4*4)(%rdi), %rdi;
cmpq %rax, %rdi;
jne .Lroundloop_blk8;
#undef ROUND
vbroadcasti128 .Lbswap128_mask rRIP, RTMP2;
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
vpshufb RTMP2, RA0, RA0;
vpshufb RTMP2, RA1, RA1;
vpshufb RTMP2, RA2, RA2;
vpshufb RTMP2, RA3, RA3;
vpshufb RTMP2, RB0, RB0;
vpshufb RTMP2, RB1, RB1;
vpshufb RTMP2, RB2, RB2;
vpshufb RTMP2, RB3, RB3;
FRAME_END
ret;
SYM_FUNC_END(__sm4_crypt_blk16)
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
vpsubq minus_one, x, x; \
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
/*
* void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (big endian, 128bit)
*/
FRAME_BEGIN
movq 8(%rcx), %rax;
bswapq %rax;
vzeroupper;
vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
vpcmpeqd RNOT, RNOT, RNOT;
vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */
vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
/* load IV and byteswap */
vmovdqu (%rcx), RTMP4x;
vpshufb RTMP3x, RTMP4x, RTMP4x;
vmovdqa RTMP4x, RTMP0x;
inc_le128(RTMP4x, RNOTx, RTMP1x);
vinserti128 $1, RTMP4x, RTMP0, RTMP0;
vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
/* check need for handling 64-bit overflow and carry */
cmpq $(0xffffffffffffffff - 16), %rax;
ja .Lhandle_ctr_carry;
/* construct IVs */
vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
vpshufb RTMP3, RTMP0, RA1;
vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
vpshufb RTMP3, RTMP0, RA2;
vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
vpshufb RTMP3, RTMP0, RA3;
vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
vpshufb RTMP3, RTMP0, RB0;
vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
vpshufb RTMP3, RTMP0, RB1;
vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
vpshufb RTMP3, RTMP0, RB2;
vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
vpshufb RTMP3, RTMP0, RB3;
vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
vpshufb RTMP3x, RTMP0x, RTMP0x;
jmp .Lctr_carry_done;
.Lhandle_ctr_carry:
/* construct IVs */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */
inc_le128(RTMP0, RNOT, RTMP1);
inc_le128(RTMP0, RNOT, RTMP1);
vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */
inc_le128(RTMP0, RNOT, RTMP1);
vextracti128 $1, RTMP0, RTMP0x;
vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
.align 4
.Lctr_carry_done:
/* store new IV */
vmovdqu RTMP0x, (%rcx);
call __sm4_crypt_blk16;
vpxor (0 * 32)(%rdx), RA0, RA0;
vpxor (1 * 32)(%rdx), RA1, RA1;
vpxor (2 * 32)(%rdx), RA2, RA2;
vpxor (3 * 32)(%rdx), RA3, RA3;
vpxor (4 * 32)(%rdx), RB0, RB0;
vpxor (5 * 32)(%rdx), RB1, RB1;
vpxor (6 * 32)(%rdx), RB2, RB2;
vpxor (7 * 32)(%rdx), RB3, RB3;
vmovdqu RA0, (0 * 32)(%rsi);
vmovdqu RA1, (1 * 32)(%rsi);
vmovdqu RA2, (2 * 32)(%rsi);
vmovdqu RA3, (3 * 32)(%rsi);
vmovdqu RB0, (4 * 32)(%rsi);
vmovdqu RB1, (5 * 32)(%rsi);
vmovdqu RB2, (6 * 32)(%rsi);
vmovdqu RB3, (7 * 32)(%rsi);
vzeroall;
FRAME_END
ret;
SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
/*
* void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv
*/
FRAME_BEGIN
vzeroupper;
vmovdqu (0 * 32)(%rdx), RA0;
vmovdqu (1 * 32)(%rdx), RA1;
vmovdqu (2 * 32)(%rdx), RA2;
vmovdqu (3 * 32)(%rdx), RA3;
vmovdqu (4 * 32)(%rdx), RB0;
vmovdqu (5 * 32)(%rdx), RB1;
vmovdqu (6 * 32)(%rdx), RB2;
vmovdqu (7 * 32)(%rdx), RB3;
call __sm4_crypt_blk16;
vmovdqu (%rcx), RNOTx;
vinserti128 $1, (%rdx), RNOT, RNOT;
vpxor RNOT, RA0, RA0;
vpxor (0 * 32 + 16)(%rdx), RA1, RA1;
vpxor (1 * 32 + 16)(%rdx), RA2, RA2;
vpxor (2 * 32 + 16)(%rdx), RA3, RA3;
vpxor (3 * 32 + 16)(%rdx), RB0, RB0;
vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
vpxor (5 * 32 + 16)(%rdx), RB2, RB2;
vpxor (6 * 32 + 16)(%rdx), RB3, RB3;
vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
vmovdqu RNOTx, (%rcx); /* store new IV */
vmovdqu RA0, (0 * 32)(%rsi);
vmovdqu RA1, (1 * 32)(%rsi);
vmovdqu RA2, (2 * 32)(%rsi);
vmovdqu RA3, (3 * 32)(%rsi);
vmovdqu RB0, (4 * 32)(%rsi);
vmovdqu RB1, (5 * 32)(%rsi);
vmovdqu RB2, (6 * 32)(%rsi);
vmovdqu RB3, (7 * 32)(%rsi);
vzeroall;
FRAME_END
ret;
SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
/*
* void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv
*/
FRAME_BEGIN
vzeroupper;
/* Load input */
vmovdqu (%rcx), RNOTx;
vinserti128 $1, (%rdx), RNOT, RA0;
vmovdqu (0 * 32 + 16)(%rdx), RA1;
vmovdqu (1 * 32 + 16)(%rdx), RA2;
vmovdqu (2 * 32 + 16)(%rdx), RA3;
vmovdqu (3 * 32 + 16)(%rdx), RB0;
vmovdqu (4 * 32 + 16)(%rdx), RB1;
vmovdqu (5 * 32 + 16)(%rdx), RB2;
vmovdqu (6 * 32 + 16)(%rdx), RB3;
/* Update IV */
vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
vmovdqu RNOTx, (%rcx);
call __sm4_crypt_blk16;
vpxor (0 * 32)(%rdx), RA0, RA0;
vpxor (1 * 32)(%rdx), RA1, RA1;
vpxor (2 * 32)(%rdx), RA2, RA2;
vpxor (3 * 32)(%rdx), RA3, RA3;
vpxor (4 * 32)(%rdx), RB0, RB0;
vpxor (5 * 32)(%rdx), RB1, RB1;
vpxor (6 * 32)(%rdx), RB2, RB2;
vpxor (7 * 32)(%rdx), RB3, RB3;
vmovdqu RA0, (0 * 32)(%rsi);
vmovdqu RA1, (1 * 32)(%rsi);
vmovdqu RA2, (2 * 32)(%rsi);
vmovdqu RA3, (3 * 32)(%rsi);
vmovdqu RB0, (4 * 32)(%rsi);
vmovdqu RB1, (5 * 32)(%rsi);
vmovdqu RB2, (6 * 32)(%rsi);
vmovdqu RB3, (7 * 32)(%rsi);
vzeroall;
FRAME_END
ret;
SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16)

24
arch/x86/crypto/sm4-avx.h Normal file
View File

@ -0,0 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef ASM_X86_SM4_AVX_H
#define ASM_X86_SM4_AVX_H
#include <linux/types.h>
#include <crypto/sm4.h>
typedef void (*sm4_crypt_func)(const u32 *rk, u8 *dst, const u8 *src, u8 *iv);
int sm4_avx_ecb_encrypt(struct skcipher_request *req);
int sm4_avx_ecb_decrypt(struct skcipher_request *req);
int sm4_cbc_encrypt(struct skcipher_request *req);
int sm4_avx_cbc_decrypt(struct skcipher_request *req,
unsigned int bsize, sm4_crypt_func func);
int sm4_cfb_encrypt(struct skcipher_request *req);
int sm4_avx_cfb_decrypt(struct skcipher_request *req,
unsigned int bsize, sm4_crypt_func func);
int sm4_avx_ctr_crypt(struct skcipher_request *req,
unsigned int bsize, sm4_crypt_func func);
#endif

View File

@ -0,0 +1,169 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (c) 2021, Alibaba Group.
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <asm/simd.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
#include "sm4-avx.h"
#define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16)
asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
const u8 *src, u8 *iv);
asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
const u8 *src, u8 *iv);
asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
const u8 *src, u8 *iv);
static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_expandkey(ctx, key, key_len);
}
static int cbc_decrypt(struct skcipher_request *req)
{
return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
sm4_aesni_avx2_cbc_dec_blk16);
}
static int cfb_decrypt(struct skcipher_request *req)
{
return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
sm4_aesni_avx2_cfb_dec_blk16);
}
static int ctr_crypt(struct skcipher_request *req)
{
return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE,
sm4_aesni_avx2_ctr_enc_blk16);
}
static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
{
.base = {
.cra_name = "__ecb(sm4)",
.cra_driver_name = "__ecb-sm4-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.walksize = 16 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = sm4_avx_ecb_encrypt,
.decrypt = sm4_avx_ecb_decrypt,
}, {
.base = {
.cra_name = "__cbc(sm4)",
.cra_driver_name = "__cbc-sm4-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.walksize = 16 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = sm4_cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base = {
.cra_name = "__cfb(sm4)",
.cra_driver_name = "__cfb-sm4-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.walksize = 16 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = sm4_cfb_encrypt,
.decrypt = cfb_decrypt,
}, {
.base = {
.cra_name = "__ctr(sm4)",
.cra_driver_name = "__ctr-sm4-aesni-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.walksize = 16 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
}
};
static struct simd_skcipher_alg *
simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)];
static int __init sm4_init(void)
{
const char *feature_name;
if (!boot_cpu_has(X86_FEATURE_AVX) ||
!boot_cpu_has(X86_FEATURE_AVX2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX2 or AES-NI instructions are not detected.\n");
return -ENODEV;
}
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers,
ARRAY_SIZE(sm4_aesni_avx2_skciphers),
simd_sm4_aesni_avx2_skciphers);
}
static void __exit sm4_exit(void)
{
simd_unregister_skciphers(sm4_aesni_avx2_skciphers,
ARRAY_SIZE(sm4_aesni_avx2_skciphers),
simd_sm4_aesni_avx2_skciphers);
}
module_init(sm4_init);
module_exit(sm4_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized");
MODULE_ALIAS_CRYPTO("sm4");
MODULE_ALIAS_CRYPTO("sm4-aesni-avx2");

View File

@ -0,0 +1,487 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 Cipher Algorithm, AES-NI/AVX optimized.
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (c) 2021, Alibaba Group.
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <asm/simd.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
#include "sm4-avx.h"
#define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8)
asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
const u8 *src, int nblocks);
asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
const u8 *src, int nblocks);
asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
const u8 *src, u8 *iv);
asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
const u8 *src, u8 *iv);
asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
const u8 *src, u8 *iv);
static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_expandkey(ctx, key, key_len);
}
static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
{
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
kernel_fpu_begin();
while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) {
sm4_aesni_avx_crypt8(rkey, dst, src, 8);
dst += SM4_CRYPT8_BLOCK_SIZE;
src += SM4_CRYPT8_BLOCK_SIZE;
nbytes -= SM4_CRYPT8_BLOCK_SIZE;
}
while (nbytes >= SM4_BLOCK_SIZE) {
unsigned int nblocks = min(nbytes >> 4, 4u);
sm4_aesni_avx_crypt4(rkey, dst, src, nblocks);
dst += nblocks * SM4_BLOCK_SIZE;
src += nblocks * SM4_BLOCK_SIZE;
nbytes -= nblocks * SM4_BLOCK_SIZE;
}
kernel_fpu_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
int sm4_avx_ecb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return ecb_do_crypt(req, ctx->rkey_enc);
}
EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt);
int sm4_avx_ecb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return ecb_do_crypt(req, ctx->rkey_dec);
}
EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt);
int sm4_cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *iv = walk.iv;
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
while (nbytes >= SM4_BLOCK_SIZE) {
crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE);
sm4_crypt_block(ctx->rkey_enc, dst, dst);
iv = dst;
src += SM4_BLOCK_SIZE;
dst += SM4_BLOCK_SIZE;
nbytes -= SM4_BLOCK_SIZE;
}
if (iv != walk.iv)
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
EXPORT_SYMBOL_GPL(sm4_cbc_encrypt);
int sm4_avx_cbc_decrypt(struct skcipher_request *req,
unsigned int bsize, sm4_crypt_func func)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
kernel_fpu_begin();
while (nbytes >= bsize) {
func(ctx->rkey_dec, dst, src, walk.iv);
dst += bsize;
src += bsize;
nbytes -= bsize;
}
while (nbytes >= SM4_BLOCK_SIZE) {
u8 keystream[SM4_BLOCK_SIZE * 8];
u8 iv[SM4_BLOCK_SIZE];
unsigned int nblocks = min(nbytes >> 4, 8u);
int i;
sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream,
src, nblocks);
src += ((int)nblocks - 2) * SM4_BLOCK_SIZE;
dst += (nblocks - 1) * SM4_BLOCK_SIZE;
memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
for (i = nblocks - 1; i > 0; i--) {
crypto_xor_cpy(dst, src,
&keystream[i * SM4_BLOCK_SIZE],
SM4_BLOCK_SIZE);
src -= SM4_BLOCK_SIZE;
dst -= SM4_BLOCK_SIZE;
}
crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE);
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
dst += nblocks * SM4_BLOCK_SIZE;
src += (nblocks + 1) * SM4_BLOCK_SIZE;
nbytes -= nblocks * SM4_BLOCK_SIZE;
}
kernel_fpu_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt);
static int cbc_decrypt(struct skcipher_request *req)
{
return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
sm4_aesni_avx_cbc_dec_blk8);
}
int sm4_cfb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
u8 keystream[SM4_BLOCK_SIZE];
const u8 *iv = walk.iv;
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
while (nbytes >= SM4_BLOCK_SIZE) {
sm4_crypt_block(ctx->rkey_enc, keystream, iv);
crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE);
iv = dst;
src += SM4_BLOCK_SIZE;
dst += SM4_BLOCK_SIZE;
nbytes -= SM4_BLOCK_SIZE;
}
if (iv != walk.iv)
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
EXPORT_SYMBOL_GPL(sm4_cfb_encrypt);
int sm4_avx_cfb_decrypt(struct skcipher_request *req,
unsigned int bsize, sm4_crypt_func func)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
kernel_fpu_begin();
while (nbytes >= bsize) {
func(ctx->rkey_enc, dst, src, walk.iv);
dst += bsize;
src += bsize;
nbytes -= bsize;
}
while (nbytes >= SM4_BLOCK_SIZE) {
u8 keystream[SM4_BLOCK_SIZE * 8];
unsigned int nblocks = min(nbytes >> 4, 8u);
memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
if (nblocks > 1)
memcpy(&keystream[SM4_BLOCK_SIZE], src,
(nblocks - 1) * SM4_BLOCK_SIZE);
memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE,
SM4_BLOCK_SIZE);
sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
keystream, nblocks);
crypto_xor_cpy(dst, src, keystream,
nblocks * SM4_BLOCK_SIZE);
dst += nblocks * SM4_BLOCK_SIZE;
src += nblocks * SM4_BLOCK_SIZE;
nbytes -= nblocks * SM4_BLOCK_SIZE;
}
kernel_fpu_end();
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt);
static int cfb_decrypt(struct skcipher_request *req)
{
return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
sm4_aesni_avx_cfb_dec_blk8);
}
int sm4_avx_ctr_crypt(struct skcipher_request *req,
unsigned int bsize, sm4_crypt_func func)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
kernel_fpu_begin();
while (nbytes >= bsize) {
func(ctx->rkey_enc, dst, src, walk.iv);
dst += bsize;
src += bsize;
nbytes -= bsize;
}
while (nbytes >= SM4_BLOCK_SIZE) {
u8 keystream[SM4_BLOCK_SIZE * 8];
unsigned int nblocks = min(nbytes >> 4, 8u);
int i;
for (i = 0; i < nblocks; i++) {
memcpy(&keystream[i * SM4_BLOCK_SIZE],
walk.iv, SM4_BLOCK_SIZE);
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
}
sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
keystream, nblocks);
crypto_xor_cpy(dst, src, keystream,
nblocks * SM4_BLOCK_SIZE);
dst += nblocks * SM4_BLOCK_SIZE;
src += nblocks * SM4_BLOCK_SIZE;
nbytes -= nblocks * SM4_BLOCK_SIZE;
}
kernel_fpu_end();
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
sm4_crypt_block(ctx->rkey_enc, keystream, keystream);
crypto_xor_cpy(dst, src, keystream, nbytes);
dst += nbytes;
src += nbytes;
nbytes = 0;
}
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt);
static int ctr_crypt(struct skcipher_request *req)
{
return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE,
sm4_aesni_avx_ctr_enc_blk8);
}
static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
{
.base = {
.cra_name = "__ecb(sm4)",
.cra_driver_name = "__ecb-sm4-aesni-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.walksize = 8 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = sm4_avx_ecb_encrypt,
.decrypt = sm4_avx_ecb_decrypt,
}, {
.base = {
.cra_name = "__cbc(sm4)",
.cra_driver_name = "__cbc-sm4-aesni-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.walksize = 8 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = sm4_cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base = {
.cra_name = "__cfb(sm4)",
.cra_driver_name = "__cfb-sm4-aesni-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.walksize = 8 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = sm4_cfb_encrypt,
.decrypt = cfb_decrypt,
}, {
.base = {
.cra_name = "__ctr(sm4)",
.cra_driver_name = "__ctr-sm4-aesni-avx",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.walksize = 8 * SM4_BLOCK_SIZE,
.setkey = sm4_skcipher_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
}
};
static struct simd_skcipher_alg *
simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];
static int __init sm4_init(void)
{
const char *feature_name;
if (!boot_cpu_has(X86_FEATURE_AVX) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX or AES-NI instructions are not detected.\n");
return -ENODEV;
}
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
return simd_register_skciphers_compat(sm4_aesni_avx_skciphers,
ARRAY_SIZE(sm4_aesni_avx_skciphers),
simd_sm4_aesni_avx_skciphers);
}
static void __exit sm4_exit(void)
{
simd_unregister_skciphers(sm4_aesni_avx_skciphers,
ARRAY_SIZE(sm4_aesni_avx_skciphers),
simd_sm4_aesni_avx_skciphers);
}
module_init(sm4_init);
module_exit(sm4_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized");
MODULE_ALIAS_CRYPTO("sm4");
MODULE_ALIAS_CRYPTO("sm4-aesni-avx");

View File

@ -1547,6 +1547,7 @@ config CRYPTO_SERPENT_AVX2_X86_64
config CRYPTO_SM4
tristate "SM4 cipher algorithm"
select CRYPTO_ALGAPI
select CRYPTO_LIB_SM4
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016).
@ -1569,6 +1570,49 @@ config CRYPTO_SM4
If unsure, say N.
config CRYPTO_SM4_AESNI_AVX_X86_64
tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX)"
depends on X86 && 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_LIB_SM4
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX).
SM4 (GBT.32907-2016) is a cryptographic standard issued by the
Organization of State Commercial Administration of China (OSCCA)
as an authorized cryptographic algorithms for the use within China.
This is SM4 optimized implementation using AES-NI/AVX/x86_64
instruction set for block cipher. Through two affine transforms,
we can use the AES S-Box to simulate the SM4 S-Box to achieve the
effect of instruction acceleration.
If unsure, say N.
config CRYPTO_SM4_AESNI_AVX2_X86_64
tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX2)"
depends on X86 && 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_LIB_SM4
select CRYPTO_SM4_AESNI_AVX_X86_64
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2).
SM4 (GBT.32907-2016) is a cryptographic standard issued by the
Organization of State Commercial Administration of China (OSCCA)
as an authorized cryptographic algorithms for the use within China.
This is SM4 optimized implementation using AES-NI/AVX2/x86_64
instruction set for block cipher. Through two affine transforms,
we can use the AES S-Box to simulate the SM4 S-Box to achieve the
effect of instruction acceleration.
If unsure, say N.
config CRYPTO_TEA
tristate "TEA, XTEA and XETA cipher algorithms"
depends on CRYPTO_USER_API_ENABLE_OBSOLETE

View File

@ -74,7 +74,6 @@ obj-$(CONFIG_CRYPTO_NULL2) += crypto_null.o
obj-$(CONFIG_CRYPTO_MD4) += md4.o
obj-$(CONFIG_CRYPTO_MD5) += md5.o
obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o
obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o
obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o

View File

@ -27,6 +27,7 @@
#define _CRYPTO_ECC_H
#include <crypto/ecc_curve.h>
#include <asm/unaligned.h>
/* One digit is u64 qword. */
#define ECC_CURVE_NIST_P192_DIGITS 3
@ -46,13 +47,13 @@
* @out: Output array
* @ndigits: Number of digits to copy
*/
static inline void ecc_swap_digits(const u64 *in, u64 *out, unsigned int ndigits)
static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigits)
{
const __be64 *src = (__force __be64 *)in;
int i;
for (i = 0; i < ndigits; i++)
out[i] = be64_to_cpu(src[ndigits - 1 - i]);
out[i] = get_unaligned_be64(&src[ndigits - 1 - i]);
}
/**

View File

@ -143,9 +143,6 @@ sha512_transform(u64 *state, const u8 *input)
state[0] += a; state[1] += b; state[2] += c; state[3] += d;
state[4] += e; state[5] += f; state[6] += g; state[7] += h;
/* erase our data */
a = b = c = d = e = f = g = h = t1 = t2 = 0;
}
static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src,

View File

@ -431,7 +431,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk)
static int skcipher_walk_first(struct skcipher_walk *walk)
{
if (WARN_ON_ONCE(in_irq()))
if (WARN_ON_ONCE(in_hardirq()))
return -EDEADLK;
walk->buffer = NULL;

View File

@ -16,191 +16,43 @@
#include <asm/byteorder.h>
#include <asm/unaligned.h>
static const u32 fk[4] = {
0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
};
static const u8 sbox[256] = {
0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
};
static const u32 ck[] = {
0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
};
static u32 sm4_t_non_lin_sub(u32 x)
{
int i;
u8 *b = (u8 *)&x;
for (i = 0; i < 4; ++i)
b[i] = sbox[b[i]];
return x;
}
static u32 sm4_key_lin_sub(u32 x)
{
return x ^ rol32(x, 13) ^ rol32(x, 23);
}
static u32 sm4_enc_lin_sub(u32 x)
{
return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
}
static u32 sm4_key_sub(u32 x)
{
return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
}
static u32 sm4_enc_sub(u32 x)
{
return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
}
static u32 sm4_round(const u32 *x, const u32 rk)
{
return x[0] ^ sm4_enc_sub(x[1] ^ x[2] ^ x[3] ^ rk);
}
/**
* crypto_sm4_expand_key - Expands the SM4 key as described in GB/T 32907-2016
* @ctx: The location where the computed key will be stored.
* @in_key: The supplied key.
* @key_len: The length of the supplied key.
*
* Returns 0 on success. The function fails only if an invalid key size (or
* pointer) is supplied.
*/
int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
unsigned int key_len)
{
u32 rk[4], t;
const u32 *key = (u32 *)in_key;
int i;
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
for (i = 0; i < 4; ++i)
rk[i] = get_unaligned_be32(&key[i]) ^ fk[i];
for (i = 0; i < 32; ++i) {
t = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i]);
ctx->rkey_enc[i] = t;
rk[0] = rk[1];
rk[1] = rk[2];
rk[2] = rk[3];
rk[3] = t;
}
for (i = 0; i < 32; ++i)
ctx->rkey_dec[i] = ctx->rkey_enc[31 - i];
return 0;
}
EXPORT_SYMBOL_GPL(crypto_sm4_expand_key);
/**
* crypto_sm4_set_key - Set the SM4 key.
* sm4_setkey - Set the SM4 key.
* @tfm: The %crypto_tfm that is used in the context.
* @in_key: The input key.
* @key_len: The size of the key.
*
* This function uses crypto_sm4_expand_key() to expand the key.
* &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is
* This function uses sm4_expandkey() to expand the key.
* &sm4_ctx _must_ be the private data embedded in @tfm which is
* retrieved with crypto_tfm_ctx().
*
* Return: 0 on success; -EINVAL on failure (only happens for bad key lengths)
*/
int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
static int sm4_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
return crypto_sm4_expand_key(ctx, in_key, key_len);
}
EXPORT_SYMBOL_GPL(crypto_sm4_set_key);
static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in)
{
u32 x[4], i, t;
for (i = 0; i < 4; ++i)
x[i] = get_unaligned_be32(&in[i]);
for (i = 0; i < 32; ++i) {
t = sm4_round(x, rk[i]);
x[0] = x[1];
x[1] = x[2];
x[2] = x[3];
x[3] = t;
}
for (i = 0; i < 4; ++i)
put_unaligned_be32(x[3 - i], &out[i]);
return sm4_expandkey(ctx, in_key, key_len);
}
/* encrypt a block of text */
void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in);
sm4_crypt_block(ctx->rkey_enc, out, in);
}
EXPORT_SYMBOL_GPL(crypto_sm4_encrypt);
/* decrypt a block of text */
void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in);
sm4_crypt_block(ctx->rkey_dec, out, in);
}
EXPORT_SYMBOL_GPL(crypto_sm4_decrypt);
static struct crypto_alg sm4_alg = {
.cra_name = "sm4",
@ -208,15 +60,15 @@ static struct crypto_alg sm4_alg = {
.cra_priority = 100,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_sm4_ctx),
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
.cia_setkey = crypto_sm4_set_key,
.cia_encrypt = crypto_sm4_encrypt,
.cia_decrypt = crypto_sm4_decrypt
.cia_setkey = sm4_setkey,
.cia_encrypt = sm4_encrypt,
.cia_decrypt = sm4_decrypt
}
}
};

View File

@ -77,7 +77,7 @@ static const char *check[] = {
NULL
};
static const int block_sizes[] = { 16, 64, 256, 1024, 1420, 4096, 0 };
static const int block_sizes[] = { 16, 64, 128, 256, 1024, 1420, 4096, 0 };
static const int aead_sizes[] = { 16, 64, 256, 512, 1024, 1420, 4096, 8192, 0 };
#define XBUFSIZE 8
@ -290,6 +290,11 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
}
ret = crypto_aead_setauthsize(tfm, authsize);
if (ret) {
pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo,
ret);
goto out_free_tfm;
}
for (i = 0; i < num_mb; ++i)
if (testmgr_alloc_buf(data[i].xbuf)) {
@ -315,7 +320,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
for (i = 0; i < num_mb; ++i) {
data[i].req = aead_request_alloc(tfm, GFP_KERNEL);
if (!data[i].req) {
pr_err("alg: skcipher: Failed to allocate request for %s\n",
pr_err("alg: aead: Failed to allocate request for %s\n",
algo);
while (i--)
aead_request_free(data[i].req);
@ -567,13 +572,19 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
sgout = &sg[9];
tfm = crypto_alloc_aead(algo, 0, 0);
if (IS_ERR(tfm)) {
pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo,
PTR_ERR(tfm));
goto out_notfm;
}
ret = crypto_aead_setauthsize(tfm, authsize);
if (ret) {
pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo,
ret);
goto out_noreq;
}
crypto_init_wait(&wait);
printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo,
get_driver_name(crypto_aead, tfm), e);
@ -611,8 +622,13 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
break;
}
}
ret = crypto_aead_setkey(tfm, key, *keysize);
ret = crypto_aead_setauthsize(tfm, authsize);
if (ret) {
pr_err("setkey() failed flags=%x: %d\n",
crypto_aead_get_flags(tfm), ret);
goto out;
}
iv_len = crypto_aead_ivsize(tfm);
if (iv_len)
@ -622,15 +638,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ",
i, *keysize * 8, bs);
memset(tvmem[0], 0xff, PAGE_SIZE);
if (ret) {
pr_err("setkey() failed flags=%x\n",
crypto_aead_get_flags(tfm));
goto out;
}
sg_init_aead(sg, xbuf, bs + (enc ? 0 : authsize),
assoc, aad_size);
@ -1907,6 +1916,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("streebog512");
break;
case 55:
ret += tcrypt_test("gcm(sm4)");
break;
case 56:
ret += tcrypt_test("ccm(sm4)");
break;
case 100:
ret += tcrypt_test("hmac(md5)");
break;
@ -1998,6 +2015,15 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
case 157:
ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))");
break;
case 158:
ret += tcrypt_test("cbcmac(sm4)");
break;
case 159:
ret += tcrypt_test("cmac(sm4)");
break;
case 181:
ret += tcrypt_test("authenc(hmac(sha1),cbc(des))");
break;
@ -2031,6 +2057,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
case 191:
ret += tcrypt_test("ecb(sm4)");
ret += tcrypt_test("cbc(sm4)");
ret += tcrypt_test("cfb(sm4)");
ret += tcrypt_test("ctr(sm4)");
break;
case 200:
@ -2289,6 +2316,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_16);
test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
@ -2322,6 +2353,34 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
NULL, 0, 16, 8, speed_template_16);
break;
case 222:
test_aead_speed("gcm(sm4)", ENCRYPT, sec,
NULL, 0, 16, 8, speed_template_16);
test_aead_speed("gcm(sm4)", DECRYPT, sec,
NULL, 0, 16, 8, speed_template_16);
break;
case 223:
test_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec,
NULL, 0, 16, 16, aead_speed_template_19);
test_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec,
NULL, 0, 16, 16, aead_speed_template_19);
break;
case 224:
test_mb_aead_speed("gcm(sm4)", ENCRYPT, sec, NULL, 0, 16, 8,
speed_template_16, num_mb);
test_mb_aead_speed("gcm(sm4)", DECRYPT, sec, NULL, 0, 16, 8,
speed_template_16, num_mb);
break;
case 225:
test_mb_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec, NULL, 0,
16, 16, aead_speed_template_19, num_mb);
test_mb_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec, NULL, 0,
16, 16, aead_speed_template_19, num_mb);
break;
case 300:
if (alg) {
test_hash_speed(alg, sec, generic_hash_speed_template);
@ -2757,6 +2816,25 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_8_32);
break;
case 518:
test_acipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
break;
case 600:
test_mb_skcipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
speed_template_16_24_32, num_mb);

View File

@ -4450,6 +4450,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.hash = __VECS(aes_cbcmac_tv_template)
}
}, {
.alg = "cbcmac(sm4)",
.test = alg_test_hash,
.suite = {
.hash = __VECS(sm4_cbcmac_tv_template)
}
}, {
.alg = "ccm(aes)",
.generic_driver = "ccm_base(ctr(aes-generic),cbcmac(aes-generic))",
@ -4461,6 +4467,16 @@ static const struct alg_test_desc alg_test_descs[] = {
.einval_allowed = 1,
}
}
}, {
.alg = "ccm(sm4)",
.generic_driver = "ccm_base(ctr(sm4-generic),cbcmac(sm4-generic))",
.test = alg_test_aead,
.suite = {
.aead = {
____VECS(sm4_ccm_tv_template),
.einval_allowed = 1,
}
}
}, {
.alg = "cfb(aes)",
.test = alg_test_skcipher,
@ -4494,6 +4510,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.hash = __VECS(des3_ede_cmac64_tv_template)
}
}, {
.alg = "cmac(sm4)",
.test = alg_test_hash,
.suite = {
.hash = __VECS(sm4_cmac128_tv_template)
}
}, {
.alg = "compress_null",
.test = alg_test_null,
@ -4967,6 +4989,13 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.aead = __VECS(aes_gcm_tv_template)
}
}, {
.alg = "gcm(sm4)",
.generic_driver = "gcm_base(ctr(sm4-generic),ghash-generic)",
.test = alg_test_aead,
.suite = {
.aead = __VECS(sm4_gcm_tv_template)
}
}, {
.alg = "ghash",
.test = alg_test_hash,

View File

@ -13328,6 +13328,154 @@ static const struct cipher_testvec sm4_cfb_tv_template[] = {
}
};
static const struct aead_testvec sm4_gcm_tv_template[] = {
{ /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */
.key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
"\xFE\xDC\xBA\x98\x76\x54\x32\x10",
.klen = 16,
.iv = "\x00\x00\x12\x34\x56\x78\x00\x00"
"\x00\x00\xAB\xCD",
.ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
"\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB"
"\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC"
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA",
.plen = 64,
.assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
"\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
"\xAB\xAD\xDA\xD2",
.alen = 20,
.ctext = "\x17\xF3\x99\xF0\x8C\x67\xD5\xEE"
"\x19\xD0\xDC\x99\x69\xC4\xBB\x7D"
"\x5F\xD4\x6F\xD3\x75\x64\x89\x06"
"\x91\x57\xB2\x82\xBB\x20\x07\x35"
"\xD8\x27\x10\xCA\x5C\x22\xF0\xCC"
"\xFA\x7C\xBF\x93\xD4\x96\xAC\x15"
"\xA5\x68\x34\xCB\xCF\x98\xC3\x97"
"\xB4\x02\x4A\x26\x91\x23\x3B\x8D"
"\x83\xDE\x35\x41\xE4\xC2\xB5\x81"
"\x77\xE0\x65\xA9\xBF\x7B\x62\xEC",
.clen = 80,
}
};
static const struct aead_testvec sm4_ccm_tv_template[] = {
{ /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.2 */
.key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
"\xFE\xDC\xBA\x98\x76\x54\x32\x10",
.klen = 16,
.iv = "\x02\x00\x00\x12\x34\x56\x78\x00"
"\x00\x00\x00\xAB\xCD\x00\x00\x00",
.ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
"\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB"
"\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC"
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA",
.plen = 64,
.assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
"\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
"\xAB\xAD\xDA\xD2",
.alen = 20,
.ctext = "\x48\xAF\x93\x50\x1F\xA6\x2A\xDB"
"\xCD\x41\x4C\xCE\x60\x34\xD8\x95"
"\xDD\xA1\xBF\x8F\x13\x2F\x04\x20"
"\x98\x66\x15\x72\xE7\x48\x30\x94"
"\xFD\x12\xE5\x18\xCE\x06\x2C\x98"
"\xAC\xEE\x28\xD9\x5D\xF4\x41\x6B"
"\xED\x31\xA2\xF0\x44\x76\xC1\x8B"
"\xB4\x0C\x84\xA7\x4B\x97\xDC\x5B"
"\x16\x84\x2D\x4F\xA1\x86\xF5\x6A"
"\xB3\x32\x56\x97\x1F\xA1\x10\xF4",
.clen = 80,
}
};
static const struct hash_testvec sm4_cbcmac_tv_template[] = {
{
.key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88"
"\x77\x66\x55\x44\x33\x22\x11\x00",
.plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef"
"\xfe\xdc\xba\x98\x76\x54\x32\x10",
.digest = "\x97\xb4\x75\x8f\x84\x92\x3d\x3f"
"\x86\x81\x0e\x0e\xea\x14\x6d\x73",
.psize = 16,
.ksize = 16,
}, {
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
.plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
"\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
"\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
"\xee",
.digest = "\xc7\xdb\x17\x71\xa1\x5c\x0d\x22"
"\xa3\x39\x3a\x31\x88\x91\x49\xa1",
.psize = 33,
.ksize = 16,
}, {
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
.plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16"
"\xf9\xdd\xbe\x91\x73\x53\x37\x1a"
"\xfe\xdd\xba\x97\x7e\x53\x3c\x1c"
"\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11"
"\xf0\xd8\xbc\x96\x73\x5c\x34\x11"
"\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f"
"\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17"
"\xfd\xdb\xb1\x9b\x76\x5c\x37",
.digest = "\x9b\x07\x88\x7f\xd5\x95\x23\x12"
"\x64\x0a\x66\x7f\x4e\x25\xca\xd0",
.psize = 63,
.ksize = 16,
}
};
static const struct hash_testvec sm4_cmac128_tv_template[] = {
{
.key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88"
"\x77\x66\x55\x44\x33\x22\x11\x00",
.plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef"
"\xfe\xdc\xba\x98\x76\x54\x32\x10",
.digest = "\x00\xd4\x63\xb4\x9a\xf3\x52\xe2"
"\x74\xa9\x00\x55\x13\x54\x2a\xd1",
.psize = 16,
.ksize = 16,
}, {
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
.plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
"\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
"\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
"\xee",
.digest = "\x8a\x8a\xe9\xc0\xc8\x97\x0e\x85"
"\x21\x57\x02\x10\x1a\xbf\x9c\xc6",
.psize = 33,
.ksize = 16,
}, {
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
.plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16"
"\xf9\xdd\xbe\x91\x73\x53\x37\x1a"
"\xfe\xdd\xba\x97\x7e\x53\x3c\x1c"
"\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11"
"\xf0\xd8\xbc\x96\x73\x5c\x34\x11"
"\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f"
"\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17"
"\xfd\xdb\xb1\x9b\x76\x5c\x37",
.digest = "\x5f\x14\xc9\xa9\x20\xb2\xb4\xf0"
"\x76\xe0\xd8\xd6\xdc\x4f\xe1\xbc",
.psize = 63,
.ksize = 16,
}
};
/* Cast6 test vectors from RFC 2612 */
static const struct cipher_testvec cast6_tv_template[] = {
{

View File

@ -775,7 +775,7 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = {
0xca2dbf07ad5a8333ULL,
};
/**
/*
* The core Whirlpool transform.
*/

View File

@ -524,6 +524,20 @@ config HW_RANDOM_XIPHERA
To compile this driver as a module, choose M here: the
module will be called xiphera-trng.
config HW_RANDOM_ARM_SMCCC_TRNG
tristate "Arm SMCCC TRNG firmware interface support"
depends on HAVE_ARM_SMCCC_DISCOVERY
default HW_RANDOM
help
Say 'Y' to enable the True Random Number Generator driver using
the Arm SMCCC TRNG firmware interface. This reads entropy from
higher exception levels (firmware, hypervisor). Uses SMCCC for
communicating with the firmware:
https://developer.arm.com/documentation/den0098/latest/
To compile this driver as a module, choose M here: the
module will be called arm_smccc_trng.
endif # HW_RANDOM
config UML_RANDOM

View File

@ -45,3 +45,4 @@ obj-$(CONFIG_HW_RANDOM_OPTEE) += optee-rng.o
obj-$(CONFIG_HW_RANDOM_NPCM) += npcm-rng.o
obj-$(CONFIG_HW_RANDOM_CCTRNG) += cctrng.o
obj-$(CONFIG_HW_RANDOM_XIPHERA) += xiphera-trng.o
obj-$(CONFIG_HW_RANDOM_ARM_SMCCC_TRNG) += arm_smccc_trng.o

View File

@ -124,7 +124,7 @@ static struct hwrng amd_rng = {
.read = amd_rng_read,
};
static int __init mod_init(void)
static int __init amd_rng_mod_init(void)
{
int err;
struct pci_dev *pdev = NULL;
@ -188,7 +188,7 @@ out:
return err;
}
static void __exit mod_exit(void)
static void __exit amd_rng_mod_exit(void)
{
struct amd768_priv *priv;
@ -203,8 +203,8 @@ static void __exit mod_exit(void)
kfree(priv);
}
module_init(mod_init);
module_exit(mod_exit);
module_init(amd_rng_mod_init);
module_exit(amd_rng_mod_exit);
MODULE_AUTHOR("The Linux Kernel team");
MODULE_DESCRIPTION("H/W RNG driver for AMD chipsets");

View File

@ -0,0 +1,123 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Randomness driver for the ARM SMCCC TRNG Firmware Interface
* https://developer.arm.com/documentation/den0098/latest/
*
* Copyright (C) 2020 Arm Ltd.
*
* The ARM TRNG firmware interface specifies a protocol to read entropy
* from a higher exception level, to abstract from any machine specific
* implemenations and allow easier use in hypervisors.
*
* The firmware interface is realised using the SMCCC specification.
*/
#include <linux/bits.h>
#include <linux/device.h>
#include <linux/hw_random.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/arm-smccc.h>
#ifdef CONFIG_ARM64
#define ARM_SMCCC_TRNG_RND ARM_SMCCC_TRNG_RND64
#define MAX_BITS_PER_CALL (3 * 64UL)
#else
#define ARM_SMCCC_TRNG_RND ARM_SMCCC_TRNG_RND32
#define MAX_BITS_PER_CALL (3 * 32UL)
#endif
/* We don't want to allow the firmware to stall us forever. */
#define SMCCC_TRNG_MAX_TRIES 20
#define SMCCC_RET_TRNG_INVALID_PARAMETER -2
#define SMCCC_RET_TRNG_NO_ENTROPY -3
static int copy_from_registers(char *buf, struct arm_smccc_res *res,
size_t bytes)
{
unsigned int chunk, copied;
if (bytes == 0)
return 0;
chunk = min(bytes, sizeof(long));
memcpy(buf, &res->a3, chunk);
copied = chunk;
if (copied >= bytes)
return copied;
chunk = min((bytes - copied), sizeof(long));
memcpy(&buf[copied], &res->a2, chunk);
copied += chunk;
if (copied >= bytes)
return copied;
chunk = min((bytes - copied), sizeof(long));
memcpy(&buf[copied], &res->a1, chunk);
return copied + chunk;
}
static int smccc_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
{
struct arm_smccc_res res;
u8 *buf = data;
unsigned int copied = 0;
int tries = 0;
while (copied < max) {
size_t bits = min_t(size_t, (max - copied) * BITS_PER_BYTE,
MAX_BITS_PER_CALL);
arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND, bits, &res);
if ((int)res.a0 < 0)
return (int)res.a0;
switch ((int)res.a0) {
case SMCCC_RET_SUCCESS:
copied += copy_from_registers(buf + copied, &res,
bits / BITS_PER_BYTE);
tries = 0;
break;
case SMCCC_RET_TRNG_NO_ENTROPY:
if (!wait)
return copied;
tries++;
if (tries >= SMCCC_TRNG_MAX_TRIES)
return copied;
cond_resched();
break;
}
}
return copied;
}
static int smccc_trng_probe(struct platform_device *pdev)
{
struct hwrng *trng;
trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
if (!trng)
return -ENOMEM;
trng->name = "smccc_trng";
trng->read = smccc_trng_read;
platform_set_drvdata(pdev, trng);
return devm_hwrng_register(&pdev->dev, trng);
}
static struct platform_driver smccc_trng_driver = {
.driver = {
.name = "smccc_trng",
},
.probe = smccc_trng_probe,
};
module_platform_driver(smccc_trng_driver);
MODULE_ALIAS("platform:smccc_trng");
MODULE_AUTHOR("Andre Przywara");
MODULE_LICENSE("GPL");

View File

@ -83,7 +83,7 @@ static struct hwrng geode_rng = {
};
static int __init mod_init(void)
static int __init geode_rng_init(void)
{
int err = -ENODEV;
struct pci_dev *pdev = NULL;
@ -124,7 +124,7 @@ err_unmap:
goto out;
}
static void __exit mod_exit(void)
static void __exit geode_rng_exit(void)
{
void __iomem *mem = (void __iomem *)geode_rng.priv;
@ -132,8 +132,8 @@ static void __exit mod_exit(void)
iounmap(mem);
}
module_init(mod_init);
module_exit(mod_exit);
module_init(geode_rng_init);
module_exit(geode_rng_exit);
MODULE_DESCRIPTION("H/W RNG driver for AMD Geode LX CPUs");
MODULE_LICENSE("GPL");

View File

@ -325,7 +325,7 @@ PFX "RNG, try using the 'no_fwh_detect' option.\n";
}
static int __init mod_init(void)
static int __init intel_rng_mod_init(void)
{
int err = -ENODEV;
int i;
@ -403,7 +403,7 @@ out:
}
static void __exit mod_exit(void)
static void __exit intel_rng_mod_exit(void)
{
void __iomem *mem = (void __iomem *)intel_rng.priv;
@ -411,8 +411,8 @@ static void __exit mod_exit(void)
iounmap(mem);
}
module_init(mod_init);
module_exit(mod_exit);
module_init(intel_rng_mod_init);
module_exit(intel_rng_mod_exit);
MODULE_DESCRIPTION("H/W RNG driver for Intel chipsets");
MODULE_LICENSE("GPL");

View File

@ -192,7 +192,7 @@ static struct hwrng via_rng = {
};
static int __init mod_init(void)
static int __init via_rng_mod_init(void)
{
int err;
@ -209,13 +209,13 @@ static int __init mod_init(void)
out:
return err;
}
module_init(mod_init);
module_init(via_rng_mod_init);
static void __exit mod_exit(void)
static void __exit via_rng_mod_exit(void)
{
hwrng_unregister(&via_rng);
}
module_exit(mod_exit);
module_exit(via_rng_mod_exit);
static struct x86_cpu_id __maybe_unused via_rng_cpu_id[] = {
X86_MATCH_FEATURE(X86_FEATURE_XSTORE, NULL),

View File

@ -26,8 +26,7 @@ void sun8i_ce_prng_exit(struct crypto_tfm *tfm)
{
struct sun8i_ce_rng_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
memzero_explicit(ctx->seed, ctx->slen);
kfree(ctx->seed);
kfree_sensitive(ctx->seed);
ctx->seed = NULL;
ctx->slen = 0;
}
@ -38,8 +37,7 @@ int sun8i_ce_prng_seed(struct crypto_rng *tfm, const u8 *seed,
struct sun8i_ce_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm);
if (ctx->seed && ctx->slen != slen) {
memzero_explicit(ctx->seed, ctx->slen);
kfree(ctx->seed);
kfree_sensitive(ctx->seed);
ctx->slen = 0;
ctx->seed = NULL;
}
@ -157,9 +155,8 @@ err_dst:
memcpy(dst, d, dlen);
memcpy(ctx->seed, d + dlen, ctx->slen);
}
memzero_explicit(d, todo);
err_iv:
kfree(d);
kfree_sensitive(d);
err_mem:
return err;
}

View File

@ -95,9 +95,8 @@ err_pm:
memcpy(data, d, max);
err = max;
}
memzero_explicit(d, todo);
err_dst:
kfree(d);
kfree_sensitive(d);
return err;
}

View File

@ -20,8 +20,7 @@ int sun8i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed,
struct sun8i_ss_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm);
if (ctx->seed && ctx->slen != slen) {
memzero_explicit(ctx->seed, ctx->slen);
kfree(ctx->seed);
kfree_sensitive(ctx->seed);
ctx->slen = 0;
ctx->seed = NULL;
}
@ -48,8 +47,7 @@ void sun8i_ss_prng_exit(struct crypto_tfm *tfm)
{
struct sun8i_ss_rng_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
memzero_explicit(ctx->seed, ctx->slen);
kfree(ctx->seed);
kfree_sensitive(ctx->seed);
ctx->seed = NULL;
ctx->slen = 0;
}
@ -167,9 +165,8 @@ err_iv:
/* Update seed */
memcpy(ctx->seed, d + dlen, ctx->slen);
}
memzero_explicit(d, todo);
err_free:
kfree(d);
kfree_sensitive(d);
return err;
}

View File

@ -143,6 +143,7 @@ struct atmel_aes_xts_ctx {
struct atmel_aes_base_ctx base;
u32 key2[AES_KEYSIZE_256 / sizeof(u32)];
struct crypto_skcipher *fallback_tfm;
};
#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
@ -155,6 +156,7 @@ struct atmel_aes_authenc_ctx {
struct atmel_aes_reqctx {
unsigned long mode;
u8 lastc[AES_BLOCK_SIZE];
struct skcipher_request fallback_req;
};
#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
@ -418,24 +420,15 @@ static inline size_t atmel_aes_padlen(size_t len, size_t block_size)
return len ? block_size - len : 0;
}
static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_base_ctx *ctx)
static struct atmel_aes_dev *atmel_aes_dev_alloc(struct atmel_aes_base_ctx *ctx)
{
struct atmel_aes_dev *aes_dd = NULL;
struct atmel_aes_dev *tmp;
struct atmel_aes_dev *aes_dd;
spin_lock_bh(&atmel_aes.lock);
if (!ctx->dd) {
list_for_each_entry(tmp, &atmel_aes.dev_list, list) {
aes_dd = tmp;
break;
}
ctx->dd = aes_dd;
} else {
aes_dd = ctx->dd;
}
/* One AES IP per SoC. */
aes_dd = list_first_entry_or_null(&atmel_aes.dev_list,
struct atmel_aes_dev, list);
spin_unlock_bh(&atmel_aes.lock);
return aes_dd;
}
@ -967,7 +960,6 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd,
ctx = crypto_tfm_ctx(areq->tfm);
dd->areq = areq;
dd->ctx = ctx;
start_async = (areq != new_areq);
dd->is_async = start_async;
@ -1083,12 +1075,48 @@ static int atmel_aes_ctr_start(struct atmel_aes_dev *dd)
return atmel_aes_ctr_transfer(dd);
}
static int atmel_aes_xts_fallback(struct skcipher_request *req, bool enc)
{
struct atmel_aes_reqctx *rctx = skcipher_request_ctx(req);
struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(
crypto_skcipher_reqtfm(req));
skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
skcipher_request_set_callback(&rctx->fallback_req, req->base.flags,
req->base.complete, req->base.data);
skcipher_request_set_crypt(&rctx->fallback_req, req->src, req->dst,
req->cryptlen, req->iv);
return enc ? crypto_skcipher_encrypt(&rctx->fallback_req) :
crypto_skcipher_decrypt(&rctx->fallback_req);
}
static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
{
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
struct atmel_aes_base_ctx *ctx = crypto_skcipher_ctx(skcipher);
struct atmel_aes_reqctx *rctx;
struct atmel_aes_dev *dd;
u32 opmode = mode & AES_FLAGS_OPMODE_MASK;
if (opmode == AES_FLAGS_XTS) {
if (req->cryptlen < XTS_BLOCK_SIZE)
return -EINVAL;
if (!IS_ALIGNED(req->cryptlen, XTS_BLOCK_SIZE))
return atmel_aes_xts_fallback(req,
mode & AES_FLAGS_ENCRYPT);
}
/*
* ECB, CBC, CFB, OFB or CTR mode require the plaintext and ciphertext
* to have a positve integer length.
*/
if (!req->cryptlen && opmode != AES_FLAGS_XTS)
return 0;
if ((opmode == AES_FLAGS_ECB || opmode == AES_FLAGS_CBC) &&
!IS_ALIGNED(req->cryptlen, crypto_skcipher_blocksize(skcipher)))
return -EINVAL;
switch (mode & AES_FLAGS_OPMODE_MASK) {
case AES_FLAGS_CFB8:
@ -1113,14 +1141,10 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
}
ctx->is_aead = false;
dd = atmel_aes_find_dev(ctx);
if (!dd)
return -ENODEV;
rctx = skcipher_request_ctx(req);
rctx->mode = mode;
if ((mode & AES_FLAGS_OPMODE_MASK) != AES_FLAGS_ECB &&
if (opmode != AES_FLAGS_ECB &&
!(mode & AES_FLAGS_ENCRYPT) && req->src == req->dst) {
unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
@ -1130,7 +1154,7 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
ivsize, 0);
}
return atmel_aes_handle_queue(dd, &req->base);
return atmel_aes_handle_queue(ctx->dd, &req->base);
}
static int atmel_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
@ -1242,8 +1266,15 @@ static int atmel_aes_ctr_decrypt(struct skcipher_request *req)
static int atmel_aes_init_tfm(struct crypto_skcipher *tfm)
{
struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct atmel_aes_dev *dd;
dd = atmel_aes_dev_alloc(&ctx->base);
if (!dd)
return -ENODEV;
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
ctx->base.dd = dd;
ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_start;
return 0;
@ -1252,8 +1283,15 @@ static int atmel_aes_init_tfm(struct crypto_skcipher *tfm)
static int atmel_aes_ctr_init_tfm(struct crypto_skcipher *tfm)
{
struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct atmel_aes_dev *dd;
dd = atmel_aes_dev_alloc(&ctx->base);
if (!dd)
return -ENODEV;
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
ctx->base.dd = dd;
ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_ctr_start;
return 0;
@ -1290,7 +1328,7 @@ static struct skcipher_alg aes_algs[] = {
{
.base.cra_name = "ofb(aes)",
.base.cra_driver_name = "atmel-ofb-aes",
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct atmel_aes_ctx),
.init = atmel_aes_init_tfm,
@ -1691,20 +1729,15 @@ static int atmel_aes_gcm_crypt(struct aead_request *req,
{
struct atmel_aes_base_ctx *ctx;
struct atmel_aes_reqctx *rctx;
struct atmel_aes_dev *dd;
ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
ctx->block_size = AES_BLOCK_SIZE;
ctx->is_aead = true;
dd = atmel_aes_find_dev(ctx);
if (!dd)
return -ENODEV;
rctx = aead_request_ctx(req);
rctx->mode = AES_FLAGS_GCM | mode;
return atmel_aes_handle_queue(dd, &req->base);
return atmel_aes_handle_queue(ctx->dd, &req->base);
}
static int atmel_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
@ -1742,8 +1775,15 @@ static int atmel_aes_gcm_decrypt(struct aead_request *req)
static int atmel_aes_gcm_init(struct crypto_aead *tfm)
{
struct atmel_aes_gcm_ctx *ctx = crypto_aead_ctx(tfm);
struct atmel_aes_dev *dd;
dd = atmel_aes_dev_alloc(&ctx->base);
if (!dd)
return -ENODEV;
crypto_aead_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
ctx->base.dd = dd;
ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_gcm_start;
return 0;
@ -1819,12 +1859,8 @@ static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd)
* the order of the ciphered tweak bytes need to be reversed before
* writing them into the ODATARx registers.
*/
for (i = 0; i < AES_BLOCK_SIZE/2; ++i) {
u8 tmp = tweak_bytes[AES_BLOCK_SIZE - 1 - i];
tweak_bytes[AES_BLOCK_SIZE - 1 - i] = tweak_bytes[i];
tweak_bytes[i] = tmp;
}
for (i = 0; i < AES_BLOCK_SIZE/2; ++i)
swap(tweak_bytes[i], tweak_bytes[AES_BLOCK_SIZE - 1 - i]);
/* Process the data. */
atmel_aes_write_ctrl(dd, use_dma, NULL);
@ -1849,6 +1885,13 @@ static int atmel_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (err)
return err;
crypto_skcipher_clear_flags(ctx->fallback_tfm, CRYPTO_TFM_REQ_MASK);
crypto_skcipher_set_flags(ctx->fallback_tfm, tfm->base.crt_flags &
CRYPTO_TFM_REQ_MASK);
err = crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
if (err)
return err;
memcpy(ctx->base.key, key, keylen/2);
memcpy(ctx->key2, key + keylen/2, keylen/2);
ctx->base.keylen = keylen/2;
@ -1869,18 +1912,40 @@ static int atmel_aes_xts_decrypt(struct skcipher_request *req)
static int atmel_aes_xts_init_tfm(struct crypto_skcipher *tfm)
{
struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
struct atmel_aes_dev *dd;
const char *tfm_name = crypto_tfm_alg_name(&tfm->base);
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
dd = atmel_aes_dev_alloc(&ctx->base);
if (!dd)
return -ENODEV;
ctx->fallback_tfm = crypto_alloc_skcipher(tfm_name, 0,
CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(ctx->fallback_tfm))
return PTR_ERR(ctx->fallback_tfm);
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx) +
crypto_skcipher_reqsize(ctx->fallback_tfm));
ctx->base.dd = dd;
ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_xts_start;
return 0;
}
static void atmel_aes_xts_exit_tfm(struct crypto_skcipher *tfm)
{
struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
crypto_free_skcipher(ctx->fallback_tfm);
}
static struct skcipher_alg aes_xts_alg = {
.base.cra_name = "xts(aes)",
.base.cra_driver_name = "atmel-xts-aes",
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct atmel_aes_xts_ctx),
.base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
@ -1889,6 +1954,7 @@ static struct skcipher_alg aes_xts_alg = {
.encrypt = atmel_aes_xts_encrypt,
.decrypt = atmel_aes_xts_decrypt,
.init = atmel_aes_xts_init_tfm,
.exit = atmel_aes_xts_exit_tfm,
};
#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
@ -2075,6 +2141,11 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm,
{
struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm);
unsigned int auth_reqsize = atmel_sha_authenc_get_reqsize();
struct atmel_aes_dev *dd;
dd = atmel_aes_dev_alloc(&ctx->base);
if (!dd)
return -ENODEV;
ctx->auth = atmel_sha_authenc_spawn(auth_mode);
if (IS_ERR(ctx->auth))
@ -2082,6 +2153,8 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm,
crypto_aead_set_reqsize(tfm, (sizeof(struct atmel_aes_authenc_reqctx) +
auth_reqsize));
ctx->base.dd = dd;
ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_authenc_start;
return 0;
@ -2127,7 +2200,6 @@ static int atmel_aes_authenc_crypt(struct aead_request *req,
struct atmel_aes_base_ctx *ctx = crypto_aead_ctx(tfm);
u32 authsize = crypto_aead_authsize(tfm);
bool enc = (mode & AES_FLAGS_ENCRYPT);
struct atmel_aes_dev *dd;
/* Compute text length. */
if (!enc && req->cryptlen < authsize)
@ -2146,11 +2218,7 @@ static int atmel_aes_authenc_crypt(struct aead_request *req,
ctx->block_size = AES_BLOCK_SIZE;
ctx->is_aead = true;
dd = atmel_aes_find_dev(ctx);
if (!dd)
return -ENODEV;
return atmel_aes_handle_queue(dd, &req->base);
return atmel_aes_handle_queue(ctx->dd, &req->base);
}
static int atmel_aes_authenc_cbc_aes_encrypt(struct aead_request *req)
@ -2358,7 +2426,7 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd)
static void atmel_aes_crypto_alg_init(struct crypto_alg *alg)
{
alg->cra_flags = CRYPTO_ALG_ASYNC;
alg->cra_flags |= CRYPTO_ALG_ASYNC;
alg->cra_alignmask = 0xf;
alg->cra_priority = ATMEL_AES_PRIORITY;
alg->cra_module = THIS_MODULE;

View File

@ -196,23 +196,15 @@ static void atmel_tdes_write_n(struct atmel_tdes_dev *dd, u32 offset,
atmel_tdes_write(dd, offset, *value);
}
static struct atmel_tdes_dev *atmel_tdes_find_dev(struct atmel_tdes_ctx *ctx)
static struct atmel_tdes_dev *atmel_tdes_dev_alloc(void)
{
struct atmel_tdes_dev *tdes_dd = NULL;
struct atmel_tdes_dev *tmp;
struct atmel_tdes_dev *tdes_dd;
spin_lock_bh(&atmel_tdes.lock);
if (!ctx->dd) {
list_for_each_entry(tmp, &atmel_tdes.dev_list, list) {
tdes_dd = tmp;
break;
}
ctx->dd = tdes_dd;
} else {
tdes_dd = ctx->dd;
}
/* One TDES IP per SoC. */
tdes_dd = list_first_entry_or_null(&atmel_tdes.dev_list,
struct atmel_tdes_dev, list);
spin_unlock_bh(&atmel_tdes.lock);
return tdes_dd;
}
@ -320,7 +312,7 @@ static int atmel_tdes_crypt_pdc_stop(struct atmel_tdes_dev *dd)
dd->buf_out, dd->buflen, dd->dma_size, 1);
if (count != dd->dma_size) {
err = -EINVAL;
pr_err("not all data converted: %zu\n", count);
dev_dbg(dd->dev, "not all data converted: %zu\n", count);
}
}
@ -337,24 +329,24 @@ static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd)
dd->buflen &= ~(DES_BLOCK_SIZE - 1);
if (!dd->buf_in || !dd->buf_out) {
dev_err(dd->dev, "unable to alloc pages.\n");
dev_dbg(dd->dev, "unable to alloc pages.\n");
goto err_alloc;
}
/* MAP here */
dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in,
dd->buflen, DMA_TO_DEVICE);
if (dma_mapping_error(dd->dev, dd->dma_addr_in)) {
dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen);
err = -EINVAL;
err = dma_mapping_error(dd->dev, dd->dma_addr_in);
if (err) {
dev_dbg(dd->dev, "dma %zd bytes error\n", dd->buflen);
goto err_map_in;
}
dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out,
dd->buflen, DMA_FROM_DEVICE);
if (dma_mapping_error(dd->dev, dd->dma_addr_out)) {
dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen);
err = -EINVAL;
err = dma_mapping_error(dd->dev, dd->dma_addr_out);
if (err) {
dev_dbg(dd->dev, "dma %zd bytes error\n", dd->buflen);
goto err_map_out;
}
@ -367,8 +359,6 @@ err_map_in:
err_alloc:
free_page((unsigned long)dd->buf_out);
free_page((unsigned long)dd->buf_in);
if (err)
pr_err("error: %d\n", err);
return err;
}
@ -520,14 +510,14 @@ static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd)
err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
if (!err) {
dev_err(dd->dev, "dma_map_sg() error\n");
dev_dbg(dd->dev, "dma_map_sg() error\n");
return -EINVAL;
}
err = dma_map_sg(dd->dev, dd->out_sg, 1,
DMA_FROM_DEVICE);
if (!err) {
dev_err(dd->dev, "dma_map_sg() error\n");
dev_dbg(dd->dev, "dma_map_sg() error\n");
dma_unmap_sg(dd->dev, dd->in_sg, 1,
DMA_TO_DEVICE);
return -EINVAL;
@ -646,7 +636,6 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd,
rctx->mode &= TDES_FLAGS_MODE_MASK;
dd->flags = (dd->flags & ~TDES_FLAGS_MODE_MASK) | rctx->mode;
dd->ctx = ctx;
ctx->dd = dd;
err = atmel_tdes_write_ctrl(dd);
if (!err)
@ -679,7 +668,7 @@ static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd)
dd->buf_out, dd->buflen, dd->dma_size, 1);
if (count != dd->dma_size) {
err = -EINVAL;
pr_err("not all data converted: %zu\n", count);
dev_dbg(dd->dev, "not all data converted: %zu\n", count);
}
}
}
@ -691,11 +680,15 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(skcipher);
struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(req);
struct device *dev = ctx->dd->dev;
if (!req->cryptlen)
return 0;
switch (mode & TDES_FLAGS_OPMODE_MASK) {
case TDES_FLAGS_CFB8:
if (!IS_ALIGNED(req->cryptlen, CFB8_BLOCK_SIZE)) {
pr_err("request size is not exact amount of CFB8 blocks\n");
dev_dbg(dev, "request size is not exact amount of CFB8 blocks\n");
return -EINVAL;
}
ctx->block_size = CFB8_BLOCK_SIZE;
@ -703,7 +696,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
case TDES_FLAGS_CFB16:
if (!IS_ALIGNED(req->cryptlen, CFB16_BLOCK_SIZE)) {
pr_err("request size is not exact amount of CFB16 blocks\n");
dev_dbg(dev, "request size is not exact amount of CFB16 blocks\n");
return -EINVAL;
}
ctx->block_size = CFB16_BLOCK_SIZE;
@ -711,7 +704,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
case TDES_FLAGS_CFB32:
if (!IS_ALIGNED(req->cryptlen, CFB32_BLOCK_SIZE)) {
pr_err("request size is not exact amount of CFB32 blocks\n");
dev_dbg(dev, "request size is not exact amount of CFB32 blocks\n");
return -EINVAL;
}
ctx->block_size = CFB32_BLOCK_SIZE;
@ -719,7 +712,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
default:
if (!IS_ALIGNED(req->cryptlen, DES_BLOCK_SIZE)) {
pr_err("request size is not exact amount of DES blocks\n");
dev_dbg(dev, "request size is not exact amount of DES blocks\n");
return -EINVAL;
}
ctx->block_size = DES_BLOCK_SIZE;
@ -897,14 +890,13 @@ static int atmel_tdes_ofb_decrypt(struct skcipher_request *req)
static int atmel_tdes_init_tfm(struct crypto_skcipher *tfm)
{
struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct atmel_tdes_dev *dd;
ctx->dd = atmel_tdes_dev_alloc();
if (!ctx->dd)
return -ENODEV;
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_tdes_reqctx));
dd = atmel_tdes_find_dev(ctx);
if (!dd)
return -ENODEV;
return 0;
}
@ -999,7 +991,7 @@ static struct skcipher_alg tdes_algs[] = {
{
.base.cra_name = "ofb(des)",
.base.cra_driver_name = "atmel-ofb-des",
.base.cra_blocksize = DES_BLOCK_SIZE,
.base.cra_blocksize = 1,
.base.cra_alignmask = 0x7,
.min_keysize = DES_KEY_SIZE,

View File

@ -300,6 +300,9 @@ static int __sev_platform_shutdown_locked(int *error)
struct sev_device *sev = psp_master->sev_data;
int ret;
if (sev->state == SEV_STATE_UNINIT)
return 0;
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
if (ret)
return ret;
@ -1019,6 +1022,20 @@ e_err:
return ret;
}
static void sev_firmware_shutdown(struct sev_device *sev)
{
sev_platform_shutdown(NULL);
if (sev_es_tmr) {
/* The TMR area was encrypted, flush it from the cache */
wbinvd_on_all_cpus();
free_pages((unsigned long)sev_es_tmr,
get_order(SEV_ES_TMR_SIZE));
sev_es_tmr = NULL;
}
}
void sev_dev_destroy(struct psp_device *psp)
{
struct sev_device *sev = psp->sev_data;
@ -1026,6 +1043,8 @@ void sev_dev_destroy(struct psp_device *psp)
if (!sev)
return;
sev_firmware_shutdown(sev);
if (sev->misc)
kref_put(&misc_dev->refcount, sev_exit);
@ -1056,21 +1075,6 @@ void sev_pci_init(void)
if (sev_get_api_version())
goto err;
/*
* If platform is not in UNINIT state then firmware upgrade and/or
* platform INIT command will fail. These command require UNINIT state.
*
* In a normal boot we should never run into case where the firmware
* is not in UNINIT state on boot. But in case of kexec boot, a reboot
* may not go through a typical shutdown sequence and may leave the
* firmware in INIT or WORKING state.
*/
if (sev->state != SEV_STATE_UNINIT) {
sev_platform_shutdown(NULL);
sev->state = SEV_STATE_UNINIT;
}
if (sev_version_greater_or_equal(0, 15) &&
sev_update_firmware(sev->dev) == 0)
sev_get_api_version();
@ -1115,17 +1119,10 @@ err:
void sev_pci_exit(void)
{
if (!psp_master->sev_data)
struct sev_device *sev = psp_master->sev_data;
if (!sev)
return;
sev_platform_shutdown(NULL);
if (sev_es_tmr) {
/* The TMR area was encrypted, flush it from the cache */
wbinvd_on_all_cpus();
free_pages((unsigned long)sev_es_tmr,
get_order(SEV_ES_TMR_SIZE));
sev_es_tmr = NULL;
}
sev_firmware_shutdown(sev);
}

View File

@ -241,6 +241,17 @@ e_err:
return ret;
}
static void sp_pci_shutdown(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
struct sp_device *sp = dev_get_drvdata(dev);
if (!sp)
return;
sp_destroy(sp);
}
static void sp_pci_remove(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
@ -349,6 +360,12 @@ static const struct sp_dev_vdata dev_vdata[] = {
#endif
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
.psp_vdata = &pspv3,
#endif
},
{ /* 5 */
.bar = 2,
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
.psp_vdata = &pspv2,
#endif
},
};
@ -359,6 +376,7 @@ static const struct pci_device_id sp_pci_table[] = {
{ PCI_VDEVICE(AMD, 0x1486), (kernel_ulong_t)&dev_vdata[3] },
{ PCI_VDEVICE(AMD, 0x15DF), (kernel_ulong_t)&dev_vdata[4] },
{ PCI_VDEVICE(AMD, 0x1649), (kernel_ulong_t)&dev_vdata[4] },
{ PCI_VDEVICE(AMD, 0x14CA), (kernel_ulong_t)&dev_vdata[5] },
/* Last entry must be zero */
{ 0, }
};
@ -371,6 +389,7 @@ static struct pci_driver sp_pci_driver = {
.id_table = sp_pci_table,
.probe = sp_pci_probe,
.remove = sp_pci_remove,
.shutdown = sp_pci_shutdown,
.driver.pm = &sp_pci_pm_ops,
};

View File

@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <linux/topology.h>
#include <linux/uacce.h>
#include "hpre.h"
@ -81,6 +82,16 @@
#define HPRE_PREFETCH_DISABLE BIT(30)
#define HPRE_SVA_DISABLE_READY (BIT(4) | BIT(8))
/* clock gate */
#define HPRE_CLKGATE_CTL 0x301a10
#define HPRE_PEH_CFG_AUTO_GATE 0x301a2c
#define HPRE_CLUSTER_DYN_CTL 0x302010
#define HPRE_CORE_SHB_CFG 0x302088
#define HPRE_CLKGATE_CTL_EN BIT(0)
#define HPRE_PEH_CFG_AUTO_GATE_EN BIT(0)
#define HPRE_CLUSTER_DYN_CTL_EN BIT(0)
#define HPRE_CORE_GATE_EN (BIT(30) | BIT(31))
#define HPRE_AM_OOO_SHUTDOWN_ENB 0x301044
#define HPRE_AM_OOO_SHUTDOWN_ENABLE BIT(0)
#define HPRE_WR_MSI_PORT BIT(2)
@ -417,12 +428,63 @@ static void hpre_close_sva_prefetch(struct hisi_qm *qm)
pci_err(qm->pdev, "failed to close sva prefetch\n");
}
static void hpre_enable_clock_gate(struct hisi_qm *qm)
{
u32 val;
if (qm->ver < QM_HW_V3)
return;
val = readl(qm->io_base + HPRE_CLKGATE_CTL);
val |= HPRE_CLKGATE_CTL_EN;
writel(val, qm->io_base + HPRE_CLKGATE_CTL);
val = readl(qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
val |= HPRE_PEH_CFG_AUTO_GATE_EN;
writel(val, qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
val = readl(qm->io_base + HPRE_CLUSTER_DYN_CTL);
val |= HPRE_CLUSTER_DYN_CTL_EN;
writel(val, qm->io_base + HPRE_CLUSTER_DYN_CTL);
val = readl_relaxed(qm->io_base + HPRE_CORE_SHB_CFG);
val |= HPRE_CORE_GATE_EN;
writel(val, qm->io_base + HPRE_CORE_SHB_CFG);
}
static void hpre_disable_clock_gate(struct hisi_qm *qm)
{
u32 val;
if (qm->ver < QM_HW_V3)
return;
val = readl(qm->io_base + HPRE_CLKGATE_CTL);
val &= ~HPRE_CLKGATE_CTL_EN;
writel(val, qm->io_base + HPRE_CLKGATE_CTL);
val = readl(qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
val &= ~HPRE_PEH_CFG_AUTO_GATE_EN;
writel(val, qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
val = readl(qm->io_base + HPRE_CLUSTER_DYN_CTL);
val &= ~HPRE_CLUSTER_DYN_CTL_EN;
writel(val, qm->io_base + HPRE_CLUSTER_DYN_CTL);
val = readl_relaxed(qm->io_base + HPRE_CORE_SHB_CFG);
val &= ~HPRE_CORE_GATE_EN;
writel(val, qm->io_base + HPRE_CORE_SHB_CFG);
}
static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
u32 val;
int ret;
/* disabel dynamic clock gate before sram init */
hpre_disable_clock_gate(qm);
writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_ARUSER_M_CFG_ENABLE);
writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_AWUSER_M_CFG_ENABLE);
writel_relaxed(HPRE_QM_AXI_CFG_MASK, qm->io_base + QM_AXI_M_CFG);
@ -473,6 +535,8 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
/* Config data buffer pasid needed by Kunpeng 920 */
hpre_config_pasid(qm);
hpre_enable_clock_gate(qm);
return ret;
}
@ -595,10 +659,15 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
struct hpre_debugfs_file *file = filp->private_data;
struct hisi_qm *qm = hpre_file_to_qm(file);
char tbuf[HPRE_DBGFS_VAL_MAX_LEN];
u32 val;
int ret;
ret = hisi_qm_get_dfx_access(qm);
if (ret)
return ret;
spin_lock_irq(&file->lock);
switch (file->type) {
case HPRE_CLEAR_ENABLE:
@ -608,18 +677,25 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf,
val = hpre_cluster_inqry_read(file);
break;
default:
spin_unlock_irq(&file->lock);
return -EINVAL;
goto err_input;
}
spin_unlock_irq(&file->lock);
hisi_qm_put_dfx_access(qm);
ret = snprintf(tbuf, HPRE_DBGFS_VAL_MAX_LEN, "%u\n", val);
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
err_input:
spin_unlock_irq(&file->lock);
hisi_qm_put_dfx_access(qm);
return -EINVAL;
}
static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct hpre_debugfs_file *file = filp->private_data;
struct hisi_qm *qm = hpre_file_to_qm(file);
char tbuf[HPRE_DBGFS_VAL_MAX_LEN];
unsigned long val;
int len, ret;
@ -639,6 +715,10 @@ static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
if (kstrtoul(tbuf, 0, &val))
return -EFAULT;
ret = hisi_qm_get_dfx_access(qm);
if (ret)
return ret;
spin_lock_irq(&file->lock);
switch (file->type) {
case HPRE_CLEAR_ENABLE:
@ -655,12 +735,12 @@ static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
ret = -EINVAL;
goto err_input;
}
spin_unlock_irq(&file->lock);
return count;
ret = count;
err_input:
spin_unlock_irq(&file->lock);
hisi_qm_put_dfx_access(qm);
return ret;
}
@ -700,6 +780,24 @@ static int hpre_debugfs_atomic64_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get,
hpre_debugfs_atomic64_set, "%llu\n");
static int hpre_com_regs_show(struct seq_file *s, void *unused)
{
hisi_qm_regs_dump(s, s->private);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(hpre_com_regs);
static int hpre_cluster_regs_show(struct seq_file *s, void *unused)
{
hisi_qm_regs_dump(s, s->private);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs);
static int hpre_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
enum hpre_ctrl_dbgfs_file type, int indx)
{
@ -737,8 +835,11 @@ static int hpre_pf_comm_regs_debugfs_init(struct hisi_qm *qm)
regset->regs = hpre_com_dfx_regs;
regset->nregs = ARRAY_SIZE(hpre_com_dfx_regs);
regset->base = qm->io_base;
regset->dev = dev;
debugfs_create_file("regs", 0444, qm->debug.debug_root,
regset, &hpre_com_regs_fops);
debugfs_create_regset32("regs", 0444, qm->debug.debug_root, regset);
return 0;
}
@ -764,8 +865,10 @@ static int hpre_cluster_debugfs_init(struct hisi_qm *qm)
regset->regs = hpre_cluster_dfx_regs;
regset->nregs = ARRAY_SIZE(hpre_cluster_dfx_regs);
regset->base = qm->io_base + hpre_cluster_offsets[i];
regset->dev = dev;
debugfs_create_regset32("regs", 0444, tmp_d, regset);
debugfs_create_file("regs", 0444, tmp_d, regset,
&hpre_cluster_regs_fops);
ret = hpre_create_debugfs_file(qm, tmp_d, HPRE_CLUSTER_CTRL,
i + HPRE_CLUSTER_CTRL);
if (ret)
@ -1017,6 +1120,8 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_with_alg_register;
}
hisi_qm_pm_init(qm);
return 0;
err_with_alg_register:
@ -1040,6 +1145,7 @@ static void hpre_remove(struct pci_dev *pdev)
struct hisi_qm *qm = pci_get_drvdata(pdev);
int ret;
hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &hpre_devices);
hisi_qm_alg_unregister(qm, &hpre_devices);
if (qm->fun_type == QM_HW_PF && qm->vfs_num) {
@ -1062,6 +1168,10 @@ static void hpre_remove(struct pci_dev *pdev)
hisi_qm_uninit(qm);
}
static const struct dev_pm_ops hpre_pm_ops = {
SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL)
};
static const struct pci_error_handlers hpre_err_handler = {
.error_detected = hisi_qm_dev_err_detected,
.slot_reset = hisi_qm_dev_slot_reset,
@ -1078,6 +1188,7 @@ static struct pci_driver hpre_pci_driver = {
hisi_qm_sriov_configure : NULL,
.err_handler = &hpre_err_handler,
.shutdown = hisi_qm_dev_shutdown,
.driver.pm = &hpre_pm_ops,
};
static void hpre_register_debugfs(void)

View File

@ -4,12 +4,12 @@
#include <linux/acpi.h>
#include <linux/aer.h>
#include <linux/bitmap.h>
#include <linux/debugfs.h>
#include <linux/dma-mapping.h>
#include <linux/idr.h>
#include <linux/io.h>
#include <linux/irqreturn.h>
#include <linux/log2.h>
#include <linux/pm_runtime.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/uacce.h>
@ -270,6 +270,8 @@
#define QM_QOS_MAX_CIR_S 11
#define QM_QOS_VAL_MAX_LEN 32
#define QM_AUTOSUSPEND_DELAY 3000
#define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
(((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \
((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \
@ -734,6 +736,34 @@ static u32 qm_get_irq_num_v3(struct hisi_qm *qm)
return QM_IRQ_NUM_VF_V3;
}
static int qm_pm_get_sync(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
int ret;
if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
return 0;
ret = pm_runtime_resume_and_get(dev);
if (ret < 0) {
dev_err(dev, "failed to get_sync(%d).\n", ret);
return ret;
}
return 0;
}
static void qm_pm_put_sync(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
return;
pm_runtime_mark_last_busy(dev);
pm_runtime_put_autosuspend(dev);
}
static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
{
u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
@ -1173,16 +1203,13 @@ static struct hisi_qm *file_to_qm(struct debugfs_file *file)
return container_of(debug, struct hisi_qm, debug);
}
static u32 current_q_read(struct debugfs_file *file)
static u32 current_q_read(struct hisi_qm *qm)
{
struct hisi_qm *qm = file_to_qm(file);
return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT;
}
static int current_q_write(struct debugfs_file *file, u32 val)
static int current_q_write(struct hisi_qm *qm, u32 val)
{
struct hisi_qm *qm = file_to_qm(file);
u32 tmp;
if (val >= qm->debug.curr_qm_qp_num)
@ -1199,18 +1226,14 @@ static int current_q_write(struct debugfs_file *file, u32 val)
return 0;
}
static u32 clear_enable_read(struct debugfs_file *file)
static u32 clear_enable_read(struct hisi_qm *qm)
{
struct hisi_qm *qm = file_to_qm(file);
return readl(qm->io_base + QM_DFX_CNT_CLR_CE);
}
/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */
static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl)
static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl)
{
struct hisi_qm *qm = file_to_qm(file);
if (rd_clr_ctrl > 1)
return -EINVAL;
@ -1219,16 +1242,13 @@ static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl)
return 0;
}
static u32 current_qm_read(struct debugfs_file *file)
static u32 current_qm_read(struct hisi_qm *qm)
{
struct hisi_qm *qm = file_to_qm(file);
return readl(qm->io_base + QM_DFX_MB_CNT_VF);
}
static int current_qm_write(struct debugfs_file *file, u32 val)
static int current_qm_write(struct hisi_qm *qm, u32 val)
{
struct hisi_qm *qm = file_to_qm(file);
u32 tmp;
if (val > qm->vfs_num)
@ -1259,29 +1279,39 @@ static ssize_t qm_debug_read(struct file *filp, char __user *buf,
{
struct debugfs_file *file = filp->private_data;
enum qm_debug_file index = file->index;
struct hisi_qm *qm = file_to_qm(file);
char tbuf[QM_DBG_TMP_BUF_LEN];
u32 val;
int ret;
ret = hisi_qm_get_dfx_access(qm);
if (ret)
return ret;
mutex_lock(&file->lock);
switch (index) {
case CURRENT_QM:
val = current_qm_read(file);
val = current_qm_read(qm);
break;
case CURRENT_Q:
val = current_q_read(file);
val = current_q_read(qm);
break;
case CLEAR_ENABLE:
val = clear_enable_read(file);
val = clear_enable_read(qm);
break;
default:
mutex_unlock(&file->lock);
return -EINVAL;
goto err_input;
}
mutex_unlock(&file->lock);
hisi_qm_put_dfx_access(qm);
ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val);
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
err_input:
mutex_unlock(&file->lock);
hisi_qm_put_dfx_access(qm);
return -EINVAL;
}
static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
@ -1289,6 +1319,7 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
{
struct debugfs_file *file = filp->private_data;
enum qm_debug_file index = file->index;
struct hisi_qm *qm = file_to_qm(file);
unsigned long val;
char tbuf[QM_DBG_TMP_BUF_LEN];
int len, ret;
@ -1308,22 +1339,28 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
if (kstrtoul(tbuf, 0, &val))
return -EFAULT;
ret = hisi_qm_get_dfx_access(qm);
if (ret)
return ret;
mutex_lock(&file->lock);
switch (index) {
case CURRENT_QM:
ret = current_qm_write(file, val);
ret = current_qm_write(qm, val);
break;
case CURRENT_Q:
ret = current_q_write(file, val);
ret = current_q_write(qm, val);
break;
case CLEAR_ENABLE:
ret = clear_enable_write(file, val);
ret = clear_enable_write(qm, val);
break;
default:
ret = -EINVAL;
}
mutex_unlock(&file->lock);
hisi_qm_put_dfx_access(qm);
if (ret)
return ret;
@ -1337,13 +1374,8 @@ static const struct file_operations qm_debug_fops = {
.write = qm_debug_write,
};
struct qm_dfx_registers {
char *reg_name;
u64 reg_offset;
};
#define CNT_CYC_REGS_NUM 10
static struct qm_dfx_registers qm_dfx_regs[] = {
static const struct debugfs_reg32 qm_dfx_regs[] = {
/* XXX_CNT are reading clear register */
{"QM_ECC_1BIT_CNT ", 0x104000ull},
{"QM_ECC_MBIT_CNT ", 0x104008ull},
@ -1369,31 +1401,59 @@ static struct qm_dfx_registers qm_dfx_regs[] = {
{"QM_DFX_FF_ST5 ", 0x1040dcull},
{"QM_DFX_FF_ST6 ", 0x1040e0ull},
{"QM_IN_IDLE_ST ", 0x1040e4ull},
{ NULL, 0}
};
static struct qm_dfx_registers qm_vf_dfx_regs[] = {
static const struct debugfs_reg32 qm_vf_dfx_regs[] = {
{"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull},
{ NULL, 0}
};
/**
* hisi_qm_regs_dump() - Dump registers's value.
* @s: debugfs file handle.
* @regset: accelerator registers information.
*
* Dump accelerator registers.
*/
void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset)
{
struct pci_dev *pdev = to_pci_dev(regset->dev);
struct hisi_qm *qm = pci_get_drvdata(pdev);
const struct debugfs_reg32 *regs = regset->regs;
int regs_len = regset->nregs;
int i, ret;
u32 val;
ret = hisi_qm_get_dfx_access(qm);
if (ret)
return;
for (i = 0; i < regs_len; i++) {
val = readl(regset->base + regs[i].offset);
seq_printf(s, "%s= 0x%08x\n", regs[i].name, val);
}
hisi_qm_put_dfx_access(qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_regs_dump);
static int qm_regs_show(struct seq_file *s, void *unused)
{
struct hisi_qm *qm = s->private;
struct qm_dfx_registers *regs;
u32 val;
struct debugfs_regset32 regset;
if (qm->fun_type == QM_HW_PF)
regs = qm_dfx_regs;
else
regs = qm_vf_dfx_regs;
while (regs->reg_name) {
val = readl(qm->io_base + regs->reg_offset);
seq_printf(s, "%s= 0x%08x\n", regs->reg_name, val);
regs++;
if (qm->fun_type == QM_HW_PF) {
regset.regs = qm_dfx_regs;
regset.nregs = ARRAY_SIZE(qm_dfx_regs);
} else {
regset.regs = qm_vf_dfx_regs;
regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs);
}
regset.base = qm->io_base;
regset.dev = &qm->pdev->dev;
hisi_qm_regs_dump(s, &regset);
return 0;
}
@ -1823,16 +1883,24 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
if (*pos)
return 0;
ret = hisi_qm_get_dfx_access(qm);
if (ret)
return ret;
/* Judge if the instance is being reset. */
if (unlikely(atomic_read(&qm->status.flags) == QM_STOP))
return 0;
if (count > QM_DBG_WRITE_LEN)
return -ENOSPC;
if (count > QM_DBG_WRITE_LEN) {
ret = -ENOSPC;
goto put_dfx_access;
}
cmd_buf = memdup_user_nul(buffer, count);
if (IS_ERR(cmd_buf))
return PTR_ERR(cmd_buf);
if (IS_ERR(cmd_buf)) {
ret = PTR_ERR(cmd_buf);
goto put_dfx_access;
}
cmd_buf_tmp = strchr(cmd_buf, '\n');
if (cmd_buf_tmp) {
@ -1843,12 +1911,16 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
ret = qm_cmd_write_dump(qm, cmd_buf);
if (ret) {
kfree(cmd_buf);
return ret;
goto put_dfx_access;
}
kfree(cmd_buf);
return count;
ret = count;
put_dfx_access:
hisi_qm_put_dfx_access(qm);
return ret;
}
static const struct file_operations qm_cmd_fops = {
@ -2445,11 +2517,19 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type)
struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
{
struct hisi_qp *qp;
int ret;
ret = qm_pm_get_sync(qm);
if (ret)
return ERR_PTR(ret);
down_write(&qm->qps_lock);
qp = qm_create_qp_nolock(qm, alg_type);
up_write(&qm->qps_lock);
if (IS_ERR(qp))
qm_pm_put_sync(qm);
return qp;
}
EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
@ -2475,6 +2555,8 @@ void hisi_qm_release_qp(struct hisi_qp *qp)
idr_remove(&qm->qp_idr, qp->qp_id);
up_write(&qm->qps_lock);
qm_pm_put_sync(qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_release_qp);
@ -3200,6 +3282,10 @@ static void hisi_qm_pre_init(struct hisi_qm *qm)
init_rwsem(&qm->qps_lock);
qm->qp_in_used = 0;
qm->misc_ctl = false;
if (qm->fun_type == QM_HW_PF && qm->ver > QM_HW_V2) {
if (!acpi_device_power_manageable(ACPI_COMPANION(&pdev->dev)))
dev_info(&pdev->dev, "_PS0 and _PR0 are not defined");
}
}
static void qm_cmd_uninit(struct hisi_qm *qm)
@ -4057,10 +4143,15 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf,
u32 qos_val, ir;
int ret;
ret = hisi_qm_get_dfx_access(qm);
if (ret)
return ret;
/* Mailbox and reset cannot be operated at the same time */
if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
pci_err(qm->pdev, "dev resetting, read alg qos failed!\n");
return -EAGAIN;
ret = -EAGAIN;
goto err_put_dfx_access;
}
if (qm->fun_type == QM_HW_PF) {
@ -4079,6 +4170,8 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf,
err_get_status:
clear_bit(QM_RESETTING, &qm->misc_ctl);
err_put_dfx_access:
hisi_qm_put_dfx_access(qm);
return ret;
}
@ -4159,15 +4252,23 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf,
fun_index = device * 8 + function;
ret = qm_func_shaper_enable(qm, fun_index, val);
ret = qm_pm_get_sync(qm);
if (ret) {
pci_err(qm->pdev, "failed to enable function shaper!\n");
ret = -EINVAL;
goto err_get_status;
}
ret = count;
ret = qm_func_shaper_enable(qm, fun_index, val);
if (ret) {
pci_err(qm->pdev, "failed to enable function shaper!\n");
ret = -EINVAL;
goto err_put_sync;
}
ret = count;
err_put_sync:
qm_pm_put_sync(qm);
err_get_status:
clear_bit(QM_RESETTING, &qm->misc_ctl);
return ret;
@ -4245,7 +4346,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
*/
void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
{
struct qm_dfx_registers *regs;
const struct debugfs_reg32 *regs;
int i;
/* clear current_qm */
@ -4264,7 +4365,7 @@ void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
regs = qm_dfx_regs;
for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
readl(qm->io_base + regs->reg_offset);
readl(qm->io_base + regs->offset);
regs++;
}
@ -4287,19 +4388,23 @@ int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs)
struct hisi_qm *qm = pci_get_drvdata(pdev);
int pre_existing_vfs, num_vfs, total_vfs, ret;
ret = qm_pm_get_sync(qm);
if (ret)
return ret;
total_vfs = pci_sriov_get_totalvfs(pdev);
pre_existing_vfs = pci_num_vf(pdev);
if (pre_existing_vfs) {
pci_err(pdev, "%d VFs already enabled. Please disable pre-enabled VFs!\n",
pre_existing_vfs);
return 0;
goto err_put_sync;
}
num_vfs = min_t(int, max_vfs, total_vfs);
ret = qm_vf_q_assign(qm, num_vfs);
if (ret) {
pci_err(pdev, "Can't assign queues for VF!\n");
return ret;
goto err_put_sync;
}
qm->vfs_num = num_vfs;
@ -4308,12 +4413,16 @@ int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs)
if (ret) {
pci_err(pdev, "Can't enable VF!\n");
qm_clear_vft_config(qm);
return ret;
goto err_put_sync;
}
pci_info(pdev, "VF enabled, vfs_num(=%d)!\n", num_vfs);
return num_vfs;
err_put_sync:
qm_pm_put_sync(qm);
return ret;
}
EXPORT_SYMBOL_GPL(hisi_qm_sriov_enable);
@ -4328,6 +4437,7 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen)
{
struct hisi_qm *qm = pci_get_drvdata(pdev);
int total_vfs = pci_sriov_get_totalvfs(qm->pdev);
int ret;
if (pci_vfs_assigned(pdev)) {
pci_err(pdev, "Failed to disable VFs as VFs are assigned!\n");
@ -4343,8 +4453,13 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen)
pci_disable_sriov(pdev);
/* clear vf function shaper configure array */
memset(qm->factor + 1, 0, sizeof(struct qm_shaper_factor) * total_vfs);
ret = qm_clear_vft_config(qm);
if (ret)
return ret;
return qm_clear_vft_config(qm);
qm_pm_put_sync(qm);
return 0;
}
EXPORT_SYMBOL_GPL(hisi_qm_sriov_disable);
@ -5164,11 +5279,18 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work)
struct hisi_qm *qm = container_of(rst_work, struct hisi_qm, rst_work);
int ret;
ret = qm_pm_get_sync(qm);
if (ret) {
clear_bit(QM_RST_SCHED, &qm->misc_ctl);
return;
}
/* reset pcie device controller */
ret = qm_controller_reset(qm);
if (ret)
dev_err(&qm->pdev->dev, "controller reset failed (%d)\n", ret);
qm_pm_put_sync(qm);
}
static void qm_pf_reset_vf_prepare(struct hisi_qm *qm,
@ -5680,6 +5802,194 @@ err_pci_init:
}
EXPORT_SYMBOL_GPL(hisi_qm_init);
/**
* hisi_qm_get_dfx_access() - Try to get dfx access.
* @qm: pointer to accelerator device.
*
* Try to get dfx access, then user can get message.
*
* If device is in suspended, return failure, otherwise
* bump up the runtime PM usage counter.
*/
int hisi_qm_get_dfx_access(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
if (pm_runtime_suspended(dev)) {
dev_info(dev, "can not read/write - device in suspended.\n");
return -EAGAIN;
}
return qm_pm_get_sync(qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_get_dfx_access);
/**
* hisi_qm_put_dfx_access() - Put dfx access.
* @qm: pointer to accelerator device.
*
* Put dfx access, drop runtime PM usage counter.
*/
void hisi_qm_put_dfx_access(struct hisi_qm *qm)
{
qm_pm_put_sync(qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_put_dfx_access);
/**
* hisi_qm_pm_init() - Initialize qm runtime PM.
* @qm: pointer to accelerator device.
*
* Function that initialize qm runtime PM.
*/
void hisi_qm_pm_init(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
return;
pm_runtime_set_autosuspend_delay(dev, QM_AUTOSUSPEND_DELAY);
pm_runtime_use_autosuspend(dev);
pm_runtime_put_noidle(dev);
}
EXPORT_SYMBOL_GPL(hisi_qm_pm_init);
/**
* hisi_qm_pm_uninit() - Uninitialize qm runtime PM.
* @qm: pointer to accelerator device.
*
* Function that uninitialize qm runtime PM.
*/
void hisi_qm_pm_uninit(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
return;
pm_runtime_get_noresume(dev);
pm_runtime_dont_use_autosuspend(dev);
}
EXPORT_SYMBOL_GPL(hisi_qm_pm_uninit);
static int qm_prepare_for_suspend(struct hisi_qm *qm)
{
struct pci_dev *pdev = qm->pdev;
int ret;
u32 val;
ret = qm->ops->set_msi(qm, false);
if (ret) {
pci_err(pdev, "failed to disable MSI before suspending!\n");
return ret;
}
/* shutdown OOO register */
writel(ACC_MASTER_GLOBAL_CTRL_SHUTDOWN,
qm->io_base + ACC_MASTER_GLOBAL_CTRL);
ret = readl_relaxed_poll_timeout(qm->io_base + ACC_MASTER_TRANS_RETURN,
val,
(val == ACC_MASTER_TRANS_RETURN_RW),
POLL_PERIOD, POLL_TIMEOUT);
if (ret) {
pci_emerg(pdev, "Bus lock! Please reset system.\n");
return ret;
}
ret = qm_set_pf_mse(qm, false);
if (ret)
pci_err(pdev, "failed to disable MSE before suspending!\n");
return ret;
}
static int qm_rebuild_for_resume(struct hisi_qm *qm)
{
struct pci_dev *pdev = qm->pdev;
int ret;
ret = qm_set_pf_mse(qm, true);
if (ret) {
pci_err(pdev, "failed to enable MSE after resuming!\n");
return ret;
}
ret = qm->ops->set_msi(qm, true);
if (ret) {
pci_err(pdev, "failed to enable MSI after resuming!\n");
return ret;
}
ret = qm_dev_hw_init(qm);
if (ret) {
pci_err(pdev, "failed to init device after resuming\n");
return ret;
}
qm_cmd_init(qm);
hisi_qm_dev_err_init(qm);
return 0;
}
/**
* hisi_qm_suspend() - Runtime suspend of given device.
* @dev: device to suspend.
*
* Function that suspend the device.
*/
int hisi_qm_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct hisi_qm *qm = pci_get_drvdata(pdev);
int ret;
pci_info(pdev, "entering suspended state\n");
ret = hisi_qm_stop(qm, QM_NORMAL);
if (ret) {
pci_err(pdev, "failed to stop qm(%d)\n", ret);
return ret;
}
ret = qm_prepare_for_suspend(qm);
if (ret)
pci_err(pdev, "failed to prepare suspended(%d)\n", ret);
return ret;
}
EXPORT_SYMBOL_GPL(hisi_qm_suspend);
/**
* hisi_qm_resume() - Runtime resume of given device.
* @dev: device to resume.
*
* Function that resume the device.
*/
int hisi_qm_resume(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct hisi_qm *qm = pci_get_drvdata(pdev);
int ret;
pci_info(pdev, "resuming from suspend state\n");
ret = qm_rebuild_for_resume(qm);
if (ret) {
pci_err(pdev, "failed to rebuild resume(%d)\n", ret);
return ret;
}
ret = hisi_qm_start(qm);
if (ret)
pci_err(pdev, "failed to start qm(%d)\n", ret);
return 0;
}
EXPORT_SYMBOL_GPL(hisi_qm_resume);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver");

View File

@ -4,6 +4,7 @@
#define HISI_ACC_QM_H
#include <linux/bitfield.h>
#include <linux/debugfs.h>
#include <linux/iopoll.h>
#include <linux/module.h>
#include <linux/pci.h>
@ -430,4 +431,11 @@ void hisi_qm_dev_shutdown(struct pci_dev *pdev);
void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
int hisi_qm_resume(struct device *dev);
int hisi_qm_suspend(struct device *dev);
void hisi_qm_pm_uninit(struct hisi_qm *qm);
void hisi_qm_pm_init(struct hisi_qm *qm);
int hisi_qm_get_dfx_access(struct hisi_qm *qm);
void hisi_qm_put_dfx_access(struct hisi_qm *qm);
void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset);
#endif

View File

@ -157,11 +157,6 @@ struct sec_ctx {
struct device *dev;
};
enum sec_endian {
SEC_LE = 0,
SEC_32BE,
SEC_64BE
};
enum sec_debug_file_index {
SEC_CLEAR_ENABLE,

View File

@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <linux/seq_file.h>
#include <linux/topology.h>
#include <linux/uacce.h>
@ -57,10 +58,16 @@
#define SEC_MEM_START_INIT_REG 0x301100
#define SEC_MEM_INIT_DONE_REG 0x301104
/* clock gating */
#define SEC_CONTROL_REG 0x301200
#define SEC_TRNG_EN_SHIFT 8
#define SEC_DYNAMIC_GATE_REG 0x30121c
#define SEC_CORE_AUTO_GATE 0x30212c
#define SEC_DYNAMIC_GATE_EN 0x7bff
#define SEC_CORE_AUTO_GATE_EN GENMASK(3, 0)
#define SEC_CLK_GATE_ENABLE BIT(3)
#define SEC_CLK_GATE_DISABLE (~BIT(3))
#define SEC_TRNG_EN_SHIFT 8
#define SEC_AXI_SHUTDOWN_ENABLE BIT(12)
#define SEC_AXI_SHUTDOWN_DISABLE 0xFFFFEFFF
@ -312,31 +319,20 @@ static const struct pci_device_id sec_dev_ids[] = {
};
MODULE_DEVICE_TABLE(pci, sec_dev_ids);
static u8 sec_get_endian(struct hisi_qm *qm)
static void sec_set_endian(struct hisi_qm *qm)
{
u32 reg;
/*
* As for VF, it is a wrong way to get endian setting by
* reading a register of the engine
*/
if (qm->pdev->is_virtfn) {
dev_err_ratelimited(&qm->pdev->dev,
"cannot access a register in VF!\n");
return SEC_LE;
}
reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
/* BD little endian mode */
if (!(reg & BIT(0)))
return SEC_LE;
reg &= ~(BIT(1) | BIT(0));
if (!IS_ENABLED(CONFIG_64BIT))
reg |= BIT(1);
/* BD 32-bits big endian mode */
else if (!(reg & BIT(1)))
return SEC_32BE;
/* BD 64-bits big endian mode */
else
return SEC_64BE;
if (!IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
reg |= BIT(0);
writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
}
static void sec_open_sva_prefetch(struct hisi_qm *qm)
@ -378,15 +374,43 @@ static void sec_close_sva_prefetch(struct hisi_qm *qm)
pci_err(qm->pdev, "failed to close sva prefetch\n");
}
static void sec_enable_clock_gate(struct hisi_qm *qm)
{
u32 val;
if (qm->ver < QM_HW_V3)
return;
val = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
val |= SEC_CLK_GATE_ENABLE;
writel_relaxed(val, qm->io_base + SEC_CONTROL_REG);
val = readl(qm->io_base + SEC_DYNAMIC_GATE_REG);
val |= SEC_DYNAMIC_GATE_EN;
writel(val, qm->io_base + SEC_DYNAMIC_GATE_REG);
val = readl(qm->io_base + SEC_CORE_AUTO_GATE);
val |= SEC_CORE_AUTO_GATE_EN;
writel(val, qm->io_base + SEC_CORE_AUTO_GATE);
}
static void sec_disable_clock_gate(struct hisi_qm *qm)
{
u32 val;
/* Kunpeng920 needs to close clock gating */
val = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
val &= SEC_CLK_GATE_DISABLE;
writel_relaxed(val, qm->io_base + SEC_CONTROL_REG);
}
static int sec_engine_init(struct hisi_qm *qm)
{
int ret;
u32 reg;
/* disable clock gate control */
reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
reg &= SEC_CLK_GATE_DISABLE;
writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
/* disable clock gate control before mem init */
sec_disable_clock_gate(qm);
writel_relaxed(0x1, qm->io_base + SEC_MEM_START_INIT_REG);
@ -429,9 +453,9 @@ static int sec_engine_init(struct hisi_qm *qm)
qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
/* config endian */
reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
reg |= sec_get_endian(qm);
writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
sec_set_endian(qm);
sec_enable_clock_gate(qm);
return 0;
}
@ -533,17 +557,14 @@ static void sec_hw_error_disable(struct hisi_qm *qm)
writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_NFE_REG);
}
static u32 sec_clear_enable_read(struct sec_debug_file *file)
static u32 sec_clear_enable_read(struct hisi_qm *qm)
{
struct hisi_qm *qm = file->qm;
return readl(qm->io_base + SEC_CTRL_CNT_CLR_CE) &
SEC_CTRL_CNT_CLR_CE_BIT;
}
static int sec_clear_enable_write(struct sec_debug_file *file, u32 val)
static int sec_clear_enable_write(struct hisi_qm *qm, u32 val)
{
struct hisi_qm *qm = file->qm;
u32 tmp;
if (val != 1 && val)
@ -561,24 +582,34 @@ static ssize_t sec_debug_read(struct file *filp, char __user *buf,
{
struct sec_debug_file *file = filp->private_data;
char tbuf[SEC_DBGFS_VAL_MAX_LEN];
struct hisi_qm *qm = file->qm;
u32 val;
int ret;
ret = hisi_qm_get_dfx_access(qm);
if (ret)
return ret;
spin_lock_irq(&file->lock);
switch (file->index) {
case SEC_CLEAR_ENABLE:
val = sec_clear_enable_read(file);
val = sec_clear_enable_read(qm);
break;
default:
spin_unlock_irq(&file->lock);
return -EINVAL;
goto err_input;
}
spin_unlock_irq(&file->lock);
ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val);
hisi_qm_put_dfx_access(qm);
ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val);
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
err_input:
spin_unlock_irq(&file->lock);
hisi_qm_put_dfx_access(qm);
return -EINVAL;
}
static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
@ -586,6 +617,7 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
{
struct sec_debug_file *file = filp->private_data;
char tbuf[SEC_DBGFS_VAL_MAX_LEN];
struct hisi_qm *qm = file->qm;
unsigned long val;
int len, ret;
@ -604,11 +636,15 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
if (kstrtoul(tbuf, 0, &val))
return -EFAULT;
ret = hisi_qm_get_dfx_access(qm);
if (ret)
return ret;
spin_lock_irq(&file->lock);
switch (file->index) {
case SEC_CLEAR_ENABLE:
ret = sec_clear_enable_write(file, val);
ret = sec_clear_enable_write(qm, val);
if (ret)
goto err_input;
break;
@ -617,12 +653,11 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
goto err_input;
}
spin_unlock_irq(&file->lock);
return count;
ret = count;
err_input:
spin_unlock_irq(&file->lock);
hisi_qm_put_dfx_access(qm);
return ret;
}
@ -653,6 +688,15 @@ static int sec_debugfs_atomic64_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(sec_atomic64_ops, sec_debugfs_atomic64_get,
sec_debugfs_atomic64_set, "%lld\n");
static int sec_regs_show(struct seq_file *s, void *unused)
{
hisi_qm_regs_dump(s, s->private);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(sec_regs);
static int sec_core_debug_init(struct hisi_qm *qm)
{
struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
@ -671,9 +715,10 @@ static int sec_core_debug_init(struct hisi_qm *qm)
regset->regs = sec_dfx_regs;
regset->nregs = ARRAY_SIZE(sec_dfx_regs);
regset->base = qm->io_base;
regset->dev = dev;
if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID)
debugfs_create_regset32("regs", 0444, tmp_d, regset);
debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops);
for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) {
atomic64_t *data = (atomic64_t *)((uintptr_t)dfx +
@ -981,10 +1026,13 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_alg_unregister;
}
hisi_qm_pm_init(qm);
return 0;
err_alg_unregister:
hisi_qm_alg_unregister(qm, &sec_devices);
if (qm->qp_num >= ctx_q_num)
hisi_qm_alg_unregister(qm, &sec_devices);
err_qm_stop:
sec_debugfs_exit(qm);
hisi_qm_stop(qm, QM_NORMAL);
@ -999,6 +1047,7 @@ static void sec_remove(struct pci_dev *pdev)
{
struct hisi_qm *qm = pci_get_drvdata(pdev);
hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &sec_devices);
if (qm->qp_num >= ctx_q_num)
hisi_qm_alg_unregister(qm, &sec_devices);
@ -1018,6 +1067,10 @@ static void sec_remove(struct pci_dev *pdev)
sec_qm_uninit(qm);
}
static const struct dev_pm_ops sec_pm_ops = {
SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL)
};
static const struct pci_error_handlers sec_err_handler = {
.error_detected = hisi_qm_dev_err_detected,
.slot_reset = hisi_qm_dev_slot_reset,
@ -1033,6 +1086,7 @@ static struct pci_driver sec_pci_driver = {
.err_handler = &sec_err_handler,
.sriov_configure = hisi_qm_sriov_configure,
.shutdown = hisi_qm_dev_shutdown,
.driver.pm = &sec_pm_ops,
};
static void sec_register_debugfs(void)

View File

@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <linux/seq_file.h>
#include <linux/topology.h>
#include <linux/uacce.h>
@ -107,6 +108,14 @@
#define HZIP_DELAY_1_US 1
#define HZIP_POLL_TIMEOUT_US 1000
/* clock gating */
#define HZIP_PEH_CFG_AUTO_GATE 0x3011A8
#define HZIP_PEH_CFG_AUTO_GATE_EN BIT(0)
#define HZIP_CORE_GATED_EN GENMASK(15, 8)
#define HZIP_CORE_GATED_OOO_EN BIT(29)
#define HZIP_CLOCK_GATED_EN (HZIP_CORE_GATED_EN | \
HZIP_CORE_GATED_OOO_EN)
static const char hisi_zip_name[] = "hisi_zip";
static struct dentry *hzip_debugfs_root;
@ -312,6 +321,22 @@ static void hisi_zip_close_sva_prefetch(struct hisi_qm *qm)
pci_err(qm->pdev, "failed to close sva prefetch\n");
}
static void hisi_zip_enable_clock_gate(struct hisi_qm *qm)
{
u32 val;
if (qm->ver < QM_HW_V3)
return;
val = readl(qm->io_base + HZIP_CLOCK_GATE_CTRL);
val |= HZIP_CLOCK_GATED_EN;
writel(val, qm->io_base + HZIP_CLOCK_GATE_CTRL);
val = readl(qm->io_base + HZIP_PEH_CFG_AUTO_GATE);
val |= HZIP_PEH_CFG_AUTO_GATE_EN;
writel(val, qm->io_base + HZIP_PEH_CFG_AUTO_GATE);
}
static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
{
void __iomem *base = qm->io_base;
@ -359,6 +384,8 @@ static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
CQC_CACHE_WB_ENABLE | FIELD_PREP(SQC_CACHE_WB_THRD, 1) |
FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL);
hisi_zip_enable_clock_gate(qm);
return 0;
}
@ -423,17 +450,14 @@ static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file)
return &hisi_zip->qm;
}
static u32 clear_enable_read(struct ctrl_debug_file *file)
static u32 clear_enable_read(struct hisi_qm *qm)
{
struct hisi_qm *qm = file_to_qm(file);
return readl(qm->io_base + HZIP_SOFT_CTRL_CNT_CLR_CE) &
HZIP_SOFT_CTRL_CNT_CLR_CE_BIT;
}
static int clear_enable_write(struct ctrl_debug_file *file, u32 val)
static int clear_enable_write(struct hisi_qm *qm, u32 val)
{
struct hisi_qm *qm = file_to_qm(file);
u32 tmp;
if (val != 1 && val != 0)
@ -450,22 +474,33 @@ static ssize_t hisi_zip_ctrl_debug_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
struct ctrl_debug_file *file = filp->private_data;
struct hisi_qm *qm = file_to_qm(file);
char tbuf[HZIP_BUF_SIZE];
u32 val;
int ret;
ret = hisi_qm_get_dfx_access(qm);
if (ret)
return ret;
spin_lock_irq(&file->lock);
switch (file->index) {
case HZIP_CLEAR_ENABLE:
val = clear_enable_read(file);
val = clear_enable_read(qm);
break;
default:
spin_unlock_irq(&file->lock);
return -EINVAL;
goto err_input;
}
spin_unlock_irq(&file->lock);
hisi_qm_put_dfx_access(qm);
ret = scnprintf(tbuf, sizeof(tbuf), "%u\n", val);
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
err_input:
spin_unlock_irq(&file->lock);
hisi_qm_put_dfx_access(qm);
return -EINVAL;
}
static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
@ -473,6 +508,7 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
size_t count, loff_t *pos)
{
struct ctrl_debug_file *file = filp->private_data;
struct hisi_qm *qm = file_to_qm(file);
char tbuf[HZIP_BUF_SIZE];
unsigned long val;
int len, ret;
@ -491,10 +527,14 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
if (kstrtoul(tbuf, 0, &val))
return -EFAULT;
ret = hisi_qm_get_dfx_access(qm);
if (ret)
return ret;
spin_lock_irq(&file->lock);
switch (file->index) {
case HZIP_CLEAR_ENABLE:
ret = clear_enable_write(file, val);
ret = clear_enable_write(qm, val);
if (ret)
goto err_input;
break;
@ -502,12 +542,12 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
ret = -EINVAL;
goto err_input;
}
spin_unlock_irq(&file->lock);
return count;
ret = count;
err_input:
spin_unlock_irq(&file->lock);
hisi_qm_put_dfx_access(qm);
return ret;
}
@ -538,6 +578,15 @@ static int zip_debugfs_atomic64_get(void *data, u64 *val)
DEFINE_DEBUGFS_ATTRIBUTE(zip_atomic64_ops, zip_debugfs_atomic64_get,
zip_debugfs_atomic64_set, "%llu\n");
static int hisi_zip_regs_show(struct seq_file *s, void *unused)
{
hisi_qm_regs_dump(s, s->private);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(hisi_zip_regs);
static int hisi_zip_core_debug_init(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
@ -560,9 +609,11 @@ static int hisi_zip_core_debug_init(struct hisi_qm *qm)
regset->regs = hzip_dfx_regs;
regset->nregs = ARRAY_SIZE(hzip_dfx_regs);
regset->base = qm->io_base + core_offsets[i];
regset->dev = dev;
tmp_d = debugfs_create_dir(buf, qm->debug.debug_root);
debugfs_create_regset32("regs", 0444, tmp_d, regset);
debugfs_create_file("regs", 0444, tmp_d, regset,
&hisi_zip_regs_fops);
}
return 0;
@ -898,6 +949,8 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_qm_alg_unregister;
}
hisi_qm_pm_init(qm);
return 0;
err_qm_alg_unregister:
@ -920,6 +973,7 @@ static void hisi_zip_remove(struct pci_dev *pdev)
{
struct hisi_qm *qm = pci_get_drvdata(pdev);
hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &zip_devices);
hisi_qm_alg_unregister(qm, &zip_devices);
@ -932,6 +986,10 @@ static void hisi_zip_remove(struct pci_dev *pdev)
hisi_zip_qm_uninit(qm);
}
static const struct dev_pm_ops hisi_zip_pm_ops = {
SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL)
};
static const struct pci_error_handlers hisi_zip_err_handler = {
.error_detected = hisi_qm_dev_err_detected,
.slot_reset = hisi_qm_dev_slot_reset,
@ -948,6 +1006,7 @@ static struct pci_driver hisi_zip_pci_driver = {
hisi_qm_sriov_configure : NULL,
.err_handler = &hisi_zip_err_handler,
.shutdown = hisi_qm_dev_shutdown,
.driver.pm = &hisi_zip_pm_ops,
};
static void hisi_zip_register_debugfs(void)

View File

@ -170,15 +170,19 @@ static struct dcp *global_sdcp;
static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
{
int dma_err;
struct dcp *sdcp = global_sdcp;
const int chan = actx->chan;
uint32_t stat;
unsigned long ret;
struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc),
DMA_TO_DEVICE);
dma_err = dma_mapping_error(sdcp->dev, desc_phys);
if (dma_err)
return dma_err;
reinit_completion(&sdcp->completion[chan]);
/* Clear status register. */
@ -216,18 +220,29 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
struct skcipher_request *req, int init)
{
dma_addr_t key_phys, src_phys, dst_phys;
struct dcp *sdcp = global_sdcp;
struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req);
int ret;
dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
2 * AES_KEYSIZE_128,
DMA_TO_DEVICE);
dma_addr_t src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf,
DCP_BUF_SZ, DMA_TO_DEVICE);
dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf,
DCP_BUF_SZ, DMA_FROM_DEVICE);
key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
2 * AES_KEYSIZE_128, DMA_TO_DEVICE);
ret = dma_mapping_error(sdcp->dev, key_phys);
if (ret)
return ret;
src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf,
DCP_BUF_SZ, DMA_TO_DEVICE);
ret = dma_mapping_error(sdcp->dev, src_phys);
if (ret)
goto err_src;
dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf,
DCP_BUF_SZ, DMA_FROM_DEVICE);
ret = dma_mapping_error(sdcp->dev, dst_phys);
if (ret)
goto err_dst;
if (actx->fill % AES_BLOCK_SIZE) {
dev_err(sdcp->dev, "Invalid block size!\n");
@ -265,10 +280,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
ret = mxs_dcp_start_dma(actx);
aes_done_run:
dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE);
err_dst:
dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
err_src:
dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128,
DMA_TO_DEVICE);
dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE);
return ret;
}
@ -283,21 +300,20 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
struct scatterlist *dst = req->dst;
struct scatterlist *src = req->src;
const int nents = sg_nents(req->src);
int dst_nents = sg_nents(dst);
const int out_off = DCP_BUF_SZ;
uint8_t *in_buf = sdcp->coh->aes_in_buf;
uint8_t *out_buf = sdcp->coh->aes_out_buf;
uint8_t *out_tmp, *src_buf, *dst_buf = NULL;
uint32_t dst_off = 0;
uint8_t *src_buf = NULL;
uint32_t last_out_len = 0;
uint8_t *key = sdcp->coh->aes_key;
int ret = 0;
int split = 0;
unsigned int i, len, clen, rem = 0, tlen = 0;
unsigned int i, len, clen, tlen = 0;
int init = 0;
bool limit_hit = false;
@ -315,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128);
}
for_each_sg(req->src, src, nents, i) {
for_each_sg(req->src, src, sg_nents(src), i) {
src_buf = sg_virt(src);
len = sg_dma_len(src);
tlen += len;
@ -340,34 +356,17 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
* submit the buffer.
*/
if (actx->fill == out_off || sg_is_last(src) ||
limit_hit) {
limit_hit) {
ret = mxs_dcp_run_aes(actx, req, init);
if (ret)
return ret;
init = 0;
out_tmp = out_buf;
sg_pcopy_from_buffer(dst, dst_nents, out_buf,
actx->fill, dst_off);
dst_off += actx->fill;
last_out_len = actx->fill;
while (dst && actx->fill) {
if (!split) {
dst_buf = sg_virt(dst);
dst_off = 0;
}
rem = min(sg_dma_len(dst) - dst_off,
actx->fill);
memcpy(dst_buf + dst_off, out_tmp, rem);
out_tmp += rem;
dst_off += rem;
actx->fill -= rem;
if (dst_off == sg_dma_len(dst)) {
dst = sg_next(dst);
split = 0;
} else {
split = 1;
}
}
actx->fill = 0;
}
} while (len);
@ -557,6 +556,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req)
dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf,
DCP_BUF_SZ, DMA_TO_DEVICE);
ret = dma_mapping_error(sdcp->dev, buf_phys);
if (ret)
return ret;
/* Fill in the DMA descriptor. */
desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE |
MXS_DCP_CONTROL0_INTERRUPT |
@ -589,6 +592,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req)
if (rctx->fini) {
digest_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_out_buf,
DCP_SHA_PAY_SZ, DMA_FROM_DEVICE);
ret = dma_mapping_error(sdcp->dev, digest_phys);
if (ret)
goto done_run;
desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM;
desc->payload = digest_phys;
}

View File

@ -1175,9 +1175,9 @@ static int omap_aes_probe(struct platform_device *pdev)
spin_lock_init(&dd->lock);
INIT_LIST_HEAD(&dd->list);
spin_lock(&list_lock);
spin_lock_bh(&list_lock);
list_add_tail(&dd->list, &dev_list);
spin_unlock(&list_lock);
spin_unlock_bh(&list_lock);
/* Initialize crypto engine */
dd->engine = crypto_engine_alloc_init(dev, 1);
@ -1264,9 +1264,9 @@ static int omap_aes_remove(struct platform_device *pdev)
if (!dd)
return -ENODEV;
spin_lock(&list_lock);
spin_lock_bh(&list_lock);
list_del(&dd->list);
spin_unlock(&list_lock);
spin_unlock_bh(&list_lock);
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) {

View File

@ -210,7 +210,7 @@ void omap_crypto_cleanup(struct scatterlist *sg, struct scatterlist *orig,
buf = sg_virt(sg);
pages = get_order(len);
if (orig && (flags & OMAP_CRYPTO_COPY_MASK))
if (orig && (flags & OMAP_CRYPTO_DATA_COPIED))
omap_crypto_copy_data(sg, orig, offset, len);
if (flags & OMAP_CRYPTO_DATA_COPIED)

View File

@ -1033,9 +1033,9 @@ static int omap_des_probe(struct platform_device *pdev)
INIT_LIST_HEAD(&dd->list);
spin_lock(&list_lock);
spin_lock_bh(&list_lock);
list_add_tail(&dd->list, &dev_list);
spin_unlock(&list_lock);
spin_unlock_bh(&list_lock);
/* Initialize des crypto engine */
dd->engine = crypto_engine_alloc_init(dev, 1);
@ -1094,9 +1094,9 @@ static int omap_des_remove(struct platform_device *pdev)
if (!dd)
return -ENODEV;
spin_lock(&list_lock);
spin_lock_bh(&list_lock);
list_del(&dd->list);
spin_unlock(&list_lock);
spin_unlock_bh(&list_lock);
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)

View File

@ -105,7 +105,6 @@
#define FLAGS_FINAL 1
#define FLAGS_DMA_ACTIVE 2
#define FLAGS_OUTPUT_READY 3
#define FLAGS_INIT 4
#define FLAGS_CPU 5
#define FLAGS_DMA_READY 6
#define FLAGS_AUTO_XOR 7
@ -368,24 +367,6 @@ static void omap_sham_copy_ready_hash(struct ahash_request *req)
hash[i] = le32_to_cpup((__le32 *)in + i);
}
static int omap_sham_hw_init(struct omap_sham_dev *dd)
{
int err;
err = pm_runtime_resume_and_get(dd->dev);
if (err < 0) {
dev_err(dd->dev, "failed to get sync: %d\n", err);
return err;
}
if (!test_bit(FLAGS_INIT, &dd->flags)) {
set_bit(FLAGS_INIT, &dd->flags);
dd->err = 0;
}
return 0;
}
static void omap_sham_write_ctrl_omap2(struct omap_sham_dev *dd, size_t length,
int final, int dma)
{
@ -1093,11 +1074,14 @@ static int omap_sham_hash_one_req(struct crypto_engine *engine, void *areq)
dev_dbg(dd->dev, "hash-one: op: %u, total: %u, digcnt: %zd, final: %d",
ctx->op, ctx->total, ctx->digcnt, final);
dd->req = req;
err = omap_sham_hw_init(dd);
if (err)
err = pm_runtime_resume_and_get(dd->dev);
if (err < 0) {
dev_err(dd->dev, "failed to get sync: %d\n", err);
return err;
}
dd->err = 0;
dd->req = req;
if (ctx->digcnt)
dd->pdata->copy_hash(req, 0);
@ -1736,7 +1720,7 @@ static void omap_sham_done_task(unsigned long data)
if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags))
goto finish;
} else if (test_bit(FLAGS_DMA_READY, &dd->flags)) {
if (test_and_clear_bit(FLAGS_DMA_ACTIVE, &dd->flags)) {
if (test_bit(FLAGS_DMA_ACTIVE, &dd->flags)) {
omap_sham_update_dma_stop(dd);
if (dd->err) {
err = dd->err;
@ -2129,7 +2113,6 @@ static int omap_sham_probe(struct platform_device *pdev)
dd->fallback_sz = OMAP_SHA_DMA_THRESHOLD;
pm_runtime_enable(dev);
pm_runtime_irq_safe(dev);
err = pm_runtime_get_sync(dev);
if (err < 0) {
@ -2144,9 +2127,9 @@ static int omap_sham_probe(struct platform_device *pdev)
(rev & dd->pdata->major_mask) >> dd->pdata->major_shift,
(rev & dd->pdata->minor_mask) >> dd->pdata->minor_shift);
spin_lock(&sham.lock);
spin_lock_bh(&sham.lock);
list_add_tail(&dd->list, &sham.dev_list);
spin_unlock(&sham.lock);
spin_unlock_bh(&sham.lock);
dd->engine = crypto_engine_alloc_init(dev, 1);
if (!dd->engine) {
@ -2194,10 +2177,11 @@ err_algs:
err_engine_start:
crypto_engine_exit(dd->engine);
err_engine:
spin_lock(&sham.lock);
spin_lock_bh(&sham.lock);
list_del(&dd->list);
spin_unlock(&sham.lock);
spin_unlock_bh(&sham.lock);
err_pm:
pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
if (!dd->polling_mode)
dma_release_channel(dd->dma_lch);
@ -2215,9 +2199,9 @@ static int omap_sham_remove(struct platform_device *pdev)
dd = platform_get_drvdata(pdev);
if (!dd)
return -ENODEV;
spin_lock(&sham.lock);
spin_lock_bh(&sham.lock);
list_del(&dd->list);
spin_unlock(&sham.lock);
spin_unlock_bh(&sham.lock);
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) {
crypto_unregister_ahash(
@ -2225,6 +2209,7 @@ static int omap_sham_remove(struct platform_device *pdev)
dd->pdata->algs_info[i].registered--;
}
tasklet_kill(&dd->done_task);
pm_runtime_dont_use_autosuspend(&pdev->dev);
pm_runtime_disable(&pdev->dev);
if (!dd->polling_mode)
@ -2235,32 +2220,11 @@ static int omap_sham_remove(struct platform_device *pdev)
return 0;
}
#ifdef CONFIG_PM_SLEEP
static int omap_sham_suspend(struct device *dev)
{
pm_runtime_put_sync(dev);
return 0;
}
static int omap_sham_resume(struct device *dev)
{
int err = pm_runtime_resume_and_get(dev);
if (err < 0) {
dev_err(dev, "failed to get sync: %d\n", err);
return err;
}
return 0;
}
#endif
static SIMPLE_DEV_PM_OPS(omap_sham_pm_ops, omap_sham_suspend, omap_sham_resume);
static struct platform_driver omap_sham_driver = {
.probe = omap_sham_probe,
.remove = omap_sham_remove,
.driver = {
.name = "omap-sham",
.pm = &omap_sham_pm_ops,
.of_match_table = omap_sham_of_match,
},
};

View File

@ -161,7 +161,7 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_MASK_OFFSET, 0);
}
static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
{
return 0;
}
@ -210,21 +210,21 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data)
hw_data->fw_mmp_name = ADF_4XXX_MMP;
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
hw_data->disable_iov = adf_disable_sriov;
hw_data->send_admin_init = adf_send_admin_init;
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
hw_data->reset_device = adf_reset_flr;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK;
hw_data->uof_get_num_objs = uof_get_num_objs;
hw_data->uof_get_name = uof_get_name;
hw_data->uof_get_ae_mask = uof_get_ae_mask;
hw_data->set_msix_rttable = set_msix_default_rttable;
hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
hw_data->disable_iov = adf_disable_sriov;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
}

View File

@ -221,16 +221,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* Set DMA identifier */
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
dev_err(&pdev->dev, "No usable DMA configuration.\n");
ret = -EFAULT;
goto out_err;
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
}
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (ret) {
dev_err(&pdev->dev, "No usable DMA configuration.\n");
goto out_err;
}
/* Get accelerator capabilities mask */

View File

@ -111,11 +111,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_C3XXX_PF2VF_OFFSET(i);
}
static u32 get_vintmsk_offset(u32 i)
{
return ADF_C3XXX_VINTMSK_OFFSET(i);
}
static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_device = accel_dev->hw_device;
@ -159,8 +154,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
ADF_C3XXX_SMIA1_MASK);
}
static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
{
spin_lock_init(&accel_dev->pf.vf2pf_ints_lock);
return 0;
}
@ -193,8 +190,6 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
hw_data->get_sram_bar_id = get_sram_bar_id;
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_admin_info = adf_gen2_get_admin_info;
hw_data->get_arb_info = adf_gen2_get_arb_info;
hw_data->get_sku = get_sku;
@ -203,16 +198,18 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
hw_data->configure_iov_threads = configure_iov_threads;
hw_data->disable_iov = adf_disable_sriov;
hw_data->send_admin_init = adf_send_admin_init;
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
hw_data->reset_device = adf_reset_flr;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
hw_data->disable_iov = adf_disable_sriov;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
}

View File

@ -29,7 +29,6 @@
#define ADF_C3XXX_ERRSSMSH_EN BIT(3)
#define ADF_C3XXX_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
#define ADF_C3XXX_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04))
/* AE to function mapping */
#define ADF_C3XXX_AE2FUNC_MAP_GRP_A_NUM_REGS 48

View File

@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
ret = -EFAULT;
goto out_err_disable;
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
}
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
if (ret) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_C3XXX_DEVICE_NAME)) {
@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_save_state(pdev)) {
dev_err(&pdev->dev, "Failed to save pci state\n");
ret = -ENOMEM;
goto out_err_free_reg;
goto out_err_disable_aer;
}
ret = qat_crypto_dev_config(accel_dev);
if (ret)
goto out_err_free_reg;
goto out_err_disable_aer;
ret = adf_dev_init(accel_dev);
if (ret)
@ -229,6 +222,8 @@ out_err_dev_stop:
adf_dev_stop(accel_dev);
out_err_dev_shutdown:
adf_dev_shutdown(accel_dev);
out_err_disable_aer:
adf_disable_aer(accel_dev);
out_err_free_reg:
pci_release_regions(accel_pci_dev->pci_dev);
out_err_disable:

View File

@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_C3XXXIOV_PF2VF_OFFSET;
}
static u32 get_vintmsk_offset(u32 i)
{
return ADF_C3XXXIOV_VINTMSK_OFFSET;
}
static int adf_vf_int_noop(struct adf_accel_dev *accel_dev)
{
return 0;
@ -81,10 +76,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data)
hw_data->enable_error_correction = adf_vf_void_noop;
hw_data->init_admin_comms = adf_vf_int_noop;
hw_data->exit_admin_comms = adf_vf_void_noop;
hw_data->send_admin_init = adf_vf2pf_init;
hw_data->send_admin_init = adf_vf2pf_notify_init;
hw_data->init_arb = adf_vf_int_noop;
hw_data->exit_arb = adf_vf_void_noop;
hw_data->disable_iov = adf_vf2pf_shutdown;
hw_data->disable_iov = adf_vf2pf_notify_shutdown;
hw_data->get_accel_mask = get_accel_mask;
hw_data->get_ae_mask = get_ae_mask;
hw_data->get_num_accels = get_num_accels;
@ -92,11 +87,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data)
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_sku = get_sku;
hw_data->enable_ints = adf_vf_void_noop;
hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
hw_data->dev_class->instances++;
adf_devmgr_update_class_index(hw_data);
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);

View File

@ -13,7 +13,6 @@
#define ADF_C3XXXIOV_ETR_BAR 0
#define ADF_C3XXXIOV_ETR_MAX_BANKS 1
#define ADF_C3XXXIOV_PF2VF_OFFSET 0x200
#define ADF_C3XXXIOV_VINTMSK_OFFSET 0x208
void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data);
void adf_clean_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data);

View File

@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
ret = -EFAULT;
goto out_err_disable;
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
}
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
if (ret) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_C3XXXVF_DEVICE_NAME)) {
@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev)
pr_err("QAT: Driver removal failed\n");
return;
}
adf_flush_vf_wq(accel_dev);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
adf_cleanup_accel(accel_dev);

View File

@ -113,11 +113,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_C62X_PF2VF_OFFSET(i);
}
static u32 get_vintmsk_offset(u32 i)
{
return ADF_C62X_VINTMSK_OFFSET(i);
}
static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_device = accel_dev->hw_device;
@ -161,8 +156,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
ADF_C62X_SMIA1_MASK);
}
static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
{
spin_lock_init(&accel_dev->pf.vf2pf_ints_lock);
return 0;
}
@ -195,8 +192,6 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
hw_data->get_sram_bar_id = get_sram_bar_id;
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_admin_info = adf_gen2_get_admin_info;
hw_data->get_arb_info = adf_gen2_get_arb_info;
hw_data->get_sku = get_sku;
@ -205,16 +200,18 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
hw_data->configure_iov_threads = configure_iov_threads;
hw_data->disable_iov = adf_disable_sriov;
hw_data->send_admin_init = adf_send_admin_init;
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
hw_data->reset_device = adf_reset_flr;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
hw_data->disable_iov = adf_disable_sriov;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
}

View File

@ -30,7 +30,6 @@
#define ADF_C62X_ERRSSMSH_EN BIT(3)
#define ADF_C62X_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
#define ADF_C62X_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04))
/* AE to function mapping */
#define ADF_C62X_AE2FUNC_MAP_GRP_A_NUM_REGS 80

View File

@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
ret = -EFAULT;
goto out_err_disable;
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
}
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
if (ret) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_C62X_DEVICE_NAME)) {
@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_save_state(pdev)) {
dev_err(&pdev->dev, "Failed to save pci state\n");
ret = -ENOMEM;
goto out_err_free_reg;
goto out_err_disable_aer;
}
ret = qat_crypto_dev_config(accel_dev);
if (ret)
goto out_err_free_reg;
goto out_err_disable_aer;
ret = adf_dev_init(accel_dev);
if (ret)
@ -229,6 +222,8 @@ out_err_dev_stop:
adf_dev_stop(accel_dev);
out_err_dev_shutdown:
adf_dev_shutdown(accel_dev);
out_err_disable_aer:
adf_disable_aer(accel_dev);
out_err_free_reg:
pci_release_regions(accel_pci_dev->pci_dev);
out_err_disable:

View File

@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_C62XIOV_PF2VF_OFFSET;
}
static u32 get_vintmsk_offset(u32 i)
{
return ADF_C62XIOV_VINTMSK_OFFSET;
}
static int adf_vf_int_noop(struct adf_accel_dev *accel_dev)
{
return 0;
@ -81,10 +76,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data)
hw_data->enable_error_correction = adf_vf_void_noop;
hw_data->init_admin_comms = adf_vf_int_noop;
hw_data->exit_admin_comms = adf_vf_void_noop;
hw_data->send_admin_init = adf_vf2pf_init;
hw_data->send_admin_init = adf_vf2pf_notify_init;
hw_data->init_arb = adf_vf_int_noop;
hw_data->exit_arb = adf_vf_void_noop;
hw_data->disable_iov = adf_vf2pf_shutdown;
hw_data->disable_iov = adf_vf2pf_notify_shutdown;
hw_data->get_accel_mask = get_accel_mask;
hw_data->get_ae_mask = get_ae_mask;
hw_data->get_num_accels = get_num_accels;
@ -92,11 +87,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data)
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_sku = get_sku;
hw_data->enable_ints = adf_vf_void_noop;
hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
hw_data->dev_class->instances++;
adf_devmgr_update_class_index(hw_data);
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);

View File

@ -13,7 +13,6 @@
#define ADF_C62XIOV_ETR_BAR 0
#define ADF_C62XIOV_ETR_MAX_BANKS 1
#define ADF_C62XIOV_PF2VF_OFFSET 0x200
#define ADF_C62XIOV_VINTMSK_OFFSET 0x208
void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data);
void adf_clean_hw_data_c62xiov(struct adf_hw_device_data *hw_data);

View File

@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
ret = -EFAULT;
goto out_err_disable;
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
}
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
if (ret) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_C62XVF_DEVICE_NAME)) {
@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev)
pr_err("QAT: Driver removal failed\n");
return;
}
adf_flush_vf_wq(accel_dev);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
adf_cleanup_accel(accel_dev);

View File

@ -18,8 +18,6 @@
#define ADF_4XXX_DEVICE_NAME "4xxx"
#define ADF_4XXX_PCI_DEVICE_ID 0x4940
#define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941
#define ADF_ERRSOU3 (0x3A000 + 0x0C)
#define ADF_ERRSOU5 (0x3A000 + 0xD8)
#define ADF_DEVICE_FUSECTL_OFFSET 0x40
#define ADF_DEVICE_LEGFUSE_OFFSET 0x4C
#define ADF_DEVICE_FUSECTL_MASK 0x80000000
@ -156,7 +154,6 @@ struct adf_hw_device_data {
u32 (*get_num_aes)(struct adf_hw_device_data *self);
u32 (*get_num_accels)(struct adf_hw_device_data *self);
u32 (*get_pf2vf_offset)(u32 i);
u32 (*get_vintmsk_offset)(u32 i);
void (*get_arb_info)(struct arb_info *arb_csrs_info);
void (*get_admin_info)(struct admin_info *admin_csrs_info);
enum dev_sku_info (*get_sku)(struct adf_hw_device_data *self);
@ -174,7 +171,7 @@ struct adf_hw_device_data {
bool enable);
void (*enable_ints)(struct adf_accel_dev *accel_dev);
void (*set_ssm_wdtimer)(struct adf_accel_dev *accel_dev);
int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev);
int (*enable_pfvf_comms)(struct adf_accel_dev *accel_dev);
void (*reset_device)(struct adf_accel_dev *accel_dev);
void (*set_msix_rttable)(struct adf_accel_dev *accel_dev);
char *(*uof_get_name)(u32 obj_num);
@ -227,7 +224,6 @@ struct adf_fw_loader_data {
struct adf_accel_vf_info {
struct adf_accel_dev *accel_dev;
struct tasklet_struct vf2pf_bh_tasklet;
struct mutex pf2vf_lock; /* protect CSR access for PF2VF messages */
struct ratelimit_state vf2pf_ratelimit;
u32 vf_nr;
@ -249,6 +245,8 @@ struct adf_accel_dev {
struct adf_accel_pci accel_pci_dev;
union {
struct {
/* protects VF2PF interrupts access */
spinlock_t vf2pf_ints_lock;
/* vf_info is non-zero when SR-IOV is init'ed */
struct adf_accel_vf_info *vf_info;
} pf;

View File

@ -194,7 +194,7 @@ int adf_enable_aer(struct adf_accel_dev *accel_dev)
EXPORT_SYMBOL_GPL(adf_enable_aer);
/**
* adf_disable_aer() - Enable Advance Error Reporting for acceleration device
* adf_disable_aer() - Disable Advance Error Reporting for acceleration device
* @accel_dev: Pointer to acceleration device.
*
* Function disables PCI Advance Error Reporting for the

View File

@ -193,22 +193,23 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
void adf_disable_sriov(struct adf_accel_dev *accel_dev);
void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
u32 vf_mask);
void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev,
u32 vf_mask);
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
u32 vf_mask);
void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev);
void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev);
void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info);
int adf_vf2pf_init(struct adf_accel_dev *accel_dev);
void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev);
int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev);
void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev);
int adf_init_pf_wq(void);
void adf_exit_pf_wq(void);
int adf_init_vf_wq(void);
void adf_exit_vf_wq(void);
void adf_flush_vf_wq(struct adf_accel_dev *accel_dev);
#else
static inline int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
{
return 0;
}
#define adf_sriov_configure NULL
static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev)
{
@ -222,12 +223,12 @@ static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
{
}
static inline int adf_vf2pf_init(struct adf_accel_dev *accel_dev)
static inline int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev)
{
return 0;
}
static inline void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev)
static inline void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev)
{
}
@ -249,5 +250,9 @@ static inline void adf_exit_vf_wq(void)
{
}
static inline void adf_flush_vf_wq(struct adf_accel_dev *accel_dev)
{
}
#endif
#endif

View File

@ -61,6 +61,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
struct service_hndl *service;
struct list_head *list_itr;
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
int ret;
if (!hw_data) {
dev_err(&GET_DEV(accel_dev),
@ -88,8 +89,6 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
return -EFAULT;
}
hw_data->enable_ints(accel_dev);
if (adf_ae_init(accel_dev)) {
dev_err(&GET_DEV(accel_dev),
"Failed to initialise Acceleration Engine\n");
@ -110,6 +109,13 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
}
set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status);
hw_data->enable_ints(accel_dev);
hw_data->enable_error_correction(accel_dev);
ret = hw_data->enable_pfvf_comms(accel_dev);
if (ret)
return ret;
/*
* Subservice initialisation is divided into two stages: init and start.
* This is to facilitate any ordering dependencies between services
@ -126,9 +132,6 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
set_bit(accel_dev->accel_id, service->init_status);
}
hw_data->enable_error_correction(accel_dev);
hw_data->enable_vf2pf_comms(accel_dev);
return 0;
}
EXPORT_SYMBOL_GPL(adf_dev_init);

View File

@ -15,6 +15,14 @@
#include "adf_transport_access_macros.h"
#include "adf_transport_internal.h"
#define ADF_MAX_NUM_VFS 32
#define ADF_ERRSOU3 (0x3A000 + 0x0C)
#define ADF_ERRSOU5 (0x3A000 + 0xD8)
#define ADF_ERRMSK3 (0x3A000 + 0x1C)
#define ADF_ERRMSK5 (0x3A000 + 0xDC)
#define ADF_ERR_REG_VF2PF_L(vf_src) (((vf_src) & 0x01FFFE00) >> 9)
#define ADF_ERR_REG_VF2PF_U(vf_src) (((vf_src) & 0x0000FFFF) << 16)
static int adf_enable_msix(struct adf_accel_dev *accel_dev)
{
struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev;
@ -71,14 +79,23 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct adf_bar *pmisc =
&GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)];
void __iomem *pmisc_bar_addr = pmisc->virt_addr;
u32 vf_mask;
void __iomem *pmisc_addr = pmisc->virt_addr;
u32 errsou3, errsou5, errmsk3, errmsk5;
unsigned long vf_mask;
/* Get the interrupt sources triggered by VFs */
vf_mask = ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU5) &
0x0000FFFF) << 16) |
((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU3) &
0x01FFFE00) >> 9);
errsou3 = ADF_CSR_RD(pmisc_addr, ADF_ERRSOU3);
errsou5 = ADF_CSR_RD(pmisc_addr, ADF_ERRSOU5);
vf_mask = ADF_ERR_REG_VF2PF_L(errsou3);
vf_mask |= ADF_ERR_REG_VF2PF_U(errsou5);
/* To avoid adding duplicate entries to work queue, clear
* vf_int_mask_sets bits that are already masked in ERRMSK register.
*/
errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_ERRMSK3);
errmsk5 = ADF_CSR_RD(pmisc_addr, ADF_ERRMSK5);
vf_mask &= ~ADF_ERR_REG_VF2PF_L(errmsk3);
vf_mask &= ~ADF_ERR_REG_VF2PF_U(errmsk5);
if (vf_mask) {
struct adf_accel_vf_info *vf_info;
@ -86,15 +103,13 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
int i;
/* Disable VF2PF interrupts for VFs with pending ints */
adf_disable_vf2pf_interrupts(accel_dev, vf_mask);
adf_disable_vf2pf_interrupts_irq(accel_dev, vf_mask);
/*
* Schedule tasklets to handle VF2PF interrupt BHs
* unless the VF is malicious and is attempting to
* flood the host OS with VF2PF interrupts.
* Handle VF2PF interrupt unless the VF is malicious and
* is attempting to flood the host OS with VF2PF interrupts.
*/
for_each_set_bit(i, (const unsigned long *)&vf_mask,
(sizeof(vf_mask) * BITS_PER_BYTE)) {
for_each_set_bit(i, &vf_mask, ADF_MAX_NUM_VFS) {
vf_info = accel_dev->pf.vf_info + i;
if (!__ratelimit(&vf_info->vf2pf_ratelimit)) {
@ -104,8 +119,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
continue;
}
/* Tasklet will re-enable ints from this VF */
tasklet_hi_schedule(&vf_info->vf2pf_bh_tasklet);
adf_schedule_vf2pf_handler(vf_info);
irq_handled = true;
}

View File

@ -11,28 +11,8 @@
#define ADF_DH895XCC_ERRMSK5 (ADF_DH895XCC_EP_OFFSET + 0xDC)
#define ADF_DH895XCC_ERRMSK5_VF2PF_U_MASK(vf_mask) (vf_mask >> 16)
void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
{
struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
void __iomem *pmisc_bar_addr =
pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x0);
}
void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
{
struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
void __iomem *pmisc_bar_addr =
pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x2);
}
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
u32 vf_mask)
static void __adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
u32 vf_mask)
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct adf_bar *pmisc =
@ -55,7 +35,17 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
}
}
void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
{
unsigned long flags;
spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
__adf_enable_vf2pf_interrupts(accel_dev, vf_mask);
spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
}
static void __adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
u32 vf_mask)
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct adf_bar *pmisc =
@ -78,6 +68,22 @@ void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
}
}
void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
{
unsigned long flags;
spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
__adf_disable_vf2pf_interrupts(accel_dev, vf_mask);
spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
}
void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev, u32 vf_mask)
{
spin_lock(&accel_dev->pf.vf2pf_ints_lock);
__adf_disable_vf2pf_interrupts(accel_dev, vf_mask);
spin_unlock(&accel_dev->pf.vf2pf_ints_lock);
}
static int __adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr)
{
struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
@ -186,7 +192,6 @@ int adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr)
return ret;
}
EXPORT_SYMBOL_GPL(adf_iov_putmsg);
void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
{
@ -216,7 +221,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
resp = (ADF_PF2VF_MSGORIGIN_SYSTEM |
(ADF_PF2VF_MSGTYPE_VERSION_RESP <<
ADF_PF2VF_MSGTYPE_SHIFT) |
(ADF_PFVF_COMPATIBILITY_VERSION <<
(ADF_PFVF_COMPAT_THIS_VERSION <<
ADF_PF2VF_VERSION_RESP_VERS_SHIFT));
dev_dbg(&GET_DEV(accel_dev),
@ -226,19 +231,19 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
if (vf_compat_ver < hw_data->min_iov_compat_ver) {
dev_err(&GET_DEV(accel_dev),
"VF (vers %d) incompatible with PF (vers %d)\n",
vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION);
vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
resp |= ADF_PF2VF_VF_INCOMPATIBLE <<
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
} else if (vf_compat_ver > ADF_PFVF_COMPATIBILITY_VERSION) {
} else if (vf_compat_ver > ADF_PFVF_COMPAT_THIS_VERSION) {
dev_err(&GET_DEV(accel_dev),
"VF (vers %d) compat with PF (vers %d) unkn.\n",
vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION);
vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
resp |= ADF_PF2VF_VF_COMPAT_UNKNOWN <<
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
} else {
dev_dbg(&GET_DEV(accel_dev),
"VF (vers %d) compatible with PF (vers %d)\n",
vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION);
vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
resp |= ADF_PF2VF_VF_COMPATIBLE <<
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
}
@ -251,7 +256,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
resp = (ADF_PF2VF_MSGORIGIN_SYSTEM |
(ADF_PF2VF_MSGTYPE_VERSION_RESP <<
ADF_PF2VF_MSGTYPE_SHIFT) |
(ADF_PFVF_COMPATIBILITY_VERSION <<
(ADF_PFVF_COMPAT_THIS_VERSION <<
ADF_PF2VF_VERSION_RESP_VERS_SHIFT));
resp |= ADF_PF2VF_VF_COMPATIBLE <<
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
@ -284,6 +289,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
/* re-enable interrupt on PF from this VF */
adf_enable_vf2pf_interrupts(accel_dev, (1 << vf_nr));
return;
err:
dev_dbg(&GET_DEV(accel_dev), "Unknown message from VF%d (0x%x);\n",
@ -313,8 +319,10 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev)
msg = ADF_VF2PF_MSGORIGIN_SYSTEM;
msg |= ADF_VF2PF_MSGTYPE_COMPAT_VER_REQ << ADF_VF2PF_MSGTYPE_SHIFT;
msg |= ADF_PFVF_COMPATIBILITY_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT;
BUILD_BUG_ON(ADF_PFVF_COMPATIBILITY_VERSION > 255);
msg |= ADF_PFVF_COMPAT_THIS_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT;
BUILD_BUG_ON(ADF_PFVF_COMPAT_THIS_VERSION > 255);
reinit_completion(&accel_dev->vf.iov_msg_completion);
/* Send request from VF to PF */
ret = adf_iov_putmsg(accel_dev, msg, 0);
@ -338,14 +346,16 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev)
break;
case ADF_PF2VF_VF_COMPAT_UNKNOWN:
/* VF is newer than PF and decides whether it is compatible */
if (accel_dev->vf.pf_version >= hw_data->min_iov_compat_ver)
if (accel_dev->vf.pf_version >= hw_data->min_iov_compat_ver) {
accel_dev->vf.compatible = ADF_PF2VF_VF_COMPATIBLE;
break;
}
fallthrough;
case ADF_PF2VF_VF_INCOMPATIBLE:
dev_err(&GET_DEV(accel_dev),
"PF (vers %d) and VF (vers %d) are not compatible\n",
accel_dev->vf.pf_version,
ADF_PFVF_COMPATIBILITY_VERSION);
ADF_PFVF_COMPAT_THIS_VERSION);
return -EINVAL;
default:
dev_err(&GET_DEV(accel_dev),

View File

@ -52,7 +52,7 @@
* IN_USE_BY pattern as part of a collision control scheme (see adf_iov_putmsg).
*/
#define ADF_PFVF_COMPATIBILITY_VERSION 0x1 /* PF<->VF compat */
#define ADF_PFVF_COMPAT_THIS_VERSION 0x1 /* PF<->VF compat */
/* PF->VF messages */
#define ADF_PF2VF_INT BIT(0)

View File

@ -24,9 +24,8 @@ static void adf_iov_send_resp(struct work_struct *work)
kfree(pf2vf_resp);
}
static void adf_vf2pf_bh_handler(void *data)
void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info)
{
struct adf_accel_vf_info *vf_info = (struct adf_accel_vf_info *)data;
struct adf_pf2vf_resp *pf2vf_resp;
pf2vf_resp = kzalloc(sizeof(*pf2vf_resp), GFP_ATOMIC);
@ -52,9 +51,6 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
vf_info->accel_dev = accel_dev;
vf_info->vf_nr = i;
tasklet_init(&vf_info->vf2pf_bh_tasklet,
(void *)adf_vf2pf_bh_handler,
(unsigned long)vf_info);
mutex_init(&vf_info->pf2vf_lock);
ratelimit_state_init(&vf_info->vf2pf_ratelimit,
DEFAULT_RATELIMIT_INTERVAL,
@ -110,8 +106,6 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
hw_data->configure_iov_threads(accel_dev, false);
for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) {
tasklet_disable(&vf->vf2pf_bh_tasklet);
tasklet_kill(&vf->vf2pf_bh_tasklet);
mutex_destroy(&vf->pf2vf_lock);
}

View File

@ -5,14 +5,14 @@
#include "adf_pf2vf_msg.h"
/**
* adf_vf2pf_init() - send init msg to PF
* adf_vf2pf_notify_init() - send init msg to PF
* @accel_dev: Pointer to acceleration VF device.
*
* Function sends an init message from the VF to a PF
*
* Return: 0 on success, error code otherwise.
*/
int adf_vf2pf_init(struct adf_accel_dev *accel_dev)
int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev)
{
u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM |
(ADF_VF2PF_MSGTYPE_INIT << ADF_VF2PF_MSGTYPE_SHIFT));
@ -25,17 +25,17 @@ int adf_vf2pf_init(struct adf_accel_dev *accel_dev)
set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status);
return 0;
}
EXPORT_SYMBOL_GPL(adf_vf2pf_init);
EXPORT_SYMBOL_GPL(adf_vf2pf_notify_init);
/**
* adf_vf2pf_shutdown() - send shutdown msg to PF
* adf_vf2pf_notify_shutdown() - send shutdown msg to PF
* @accel_dev: Pointer to acceleration VF device.
*
* Function sends a shutdown message from the VF to a PF
*
* Return: void
*/
void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev)
void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev)
{
u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM |
(ADF_VF2PF_MSGTYPE_SHUTDOWN << ADF_VF2PF_MSGTYPE_SHIFT));
@ -45,4 +45,4 @@ void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev)
dev_err(&GET_DEV(accel_dev),
"Failed to send Shutdown event to PF\n");
}
EXPORT_SYMBOL_GPL(adf_vf2pf_shutdown);
EXPORT_SYMBOL_GPL(adf_vf2pf_notify_shutdown);

View File

@ -18,6 +18,7 @@
#include "adf_pf2vf_msg.h"
#define ADF_VINTSOU_OFFSET 0x204
#define ADF_VINTMSK_OFFSET 0x208
#define ADF_VINTSOU_BUN BIT(0)
#define ADF_VINTSOU_PF2VF BIT(1)
@ -28,6 +29,27 @@ struct adf_vf_stop_data {
struct work_struct work;
};
void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
{
struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
void __iomem *pmisc_bar_addr =
pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x0);
}
void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
{
struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
void __iomem *pmisc_bar_addr =
pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x2);
}
EXPORT_SYMBOL_GPL(adf_disable_pf2vf_interrupts);
static int adf_enable_msi(struct adf_accel_dev *accel_dev)
{
struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev;
@ -160,11 +182,21 @@ static irqreturn_t adf_isr(int irq, void *privdata)
struct adf_bar *pmisc =
&GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)];
void __iomem *pmisc_bar_addr = pmisc->virt_addr;
u32 v_int;
bool handled = false;
u32 v_int, v_mask;
/* Read VF INT source CSR to determine the source of VF interrupt */
v_int = ADF_CSR_RD(pmisc_bar_addr, ADF_VINTSOU_OFFSET);
/* Read VF INT mask CSR to determine which sources are masked */
v_mask = ADF_CSR_RD(pmisc_bar_addr, ADF_VINTMSK_OFFSET);
/*
* Recompute v_int ignoring sources that are masked. This is to
* avoid rescheduling the tasklet for interrupts already handled
*/
v_int &= ~v_mask;
/* Check for PF2VF interrupt */
if (v_int & ADF_VINTSOU_PF2VF) {
/* Disable PF to VF interrupt */
@ -172,7 +204,7 @@ static irqreturn_t adf_isr(int irq, void *privdata)
/* Schedule tasklet to handle interrupt BH */
tasklet_hi_schedule(&accel_dev->vf.pf2vf_bh_tasklet);
return IRQ_HANDLED;
handled = true;
}
/* Check bundle interrupt */
@ -184,10 +216,10 @@ static irqreturn_t adf_isr(int irq, void *privdata)
csr_ops->write_csr_int_flag_and_col(bank->csr_addr,
bank->bank_number, 0);
tasklet_hi_schedule(&bank->resp_handler);
return IRQ_HANDLED;
handled = true;
}
return IRQ_NONE;
return handled ? IRQ_HANDLED : IRQ_NONE;
}
static int adf_request_msi_irq(struct adf_accel_dev *accel_dev)
@ -285,6 +317,30 @@ err_out:
}
EXPORT_SYMBOL_GPL(adf_vf_isr_resource_alloc);
/**
* adf_flush_vf_wq() - Flush workqueue for VF
* @accel_dev: Pointer to acceleration device.
*
* Function disables the PF/VF interrupts on the VF so that no new messages
* are received and flushes the workqueue 'adf_vf_stop_wq'.
*
* Return: void.
*/
void adf_flush_vf_wq(struct adf_accel_dev *accel_dev)
{
adf_disable_pf2vf_interrupts(accel_dev);
flush_workqueue(adf_vf_stop_wq);
}
EXPORT_SYMBOL_GPL(adf_flush_vf_wq);
/**
* adf_init_vf_wq() - Init workqueue for VF
*
* Function init workqueue 'adf_vf_stop_wq' for VF.
*
* Return: 0 on success, error code otherwise.
*/
int __init adf_init_vf_wq(void)
{
adf_vf_stop_wq = alloc_workqueue("adf_vf_stop_wq", WQ_MEM_RECLAIM, 0);

View File

@ -131,11 +131,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_DH895XCC_PF2VF_OFFSET(i);
}
static u32 get_vintmsk_offset(u32 i)
{
return ADF_DH895XCC_VINTMSK_OFFSET(i);
}
static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_device = accel_dev->hw_device;
@ -180,8 +175,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
ADF_DH895XCC_SMIA1_MASK);
}
static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
{
spin_lock_init(&accel_dev->pf.vf2pf_ints_lock);
return 0;
}
@ -213,8 +210,6 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
hw_data->get_num_aes = get_num_aes;
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_admin_info = adf_gen2_get_admin_info;
hw_data->get_arb_info = adf_gen2_get_arb_info;
hw_data->get_sram_bar_id = get_sram_bar_id;
@ -224,15 +219,17 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
hw_data->configure_iov_threads = configure_iov_threads;
hw_data->disable_iov = adf_disable_sriov;
hw_data->send_admin_init = adf_send_admin_init;
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
hw_data->reset_device = adf_reset_sbr;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
hw_data->disable_iov = adf_disable_sriov;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
}

View File

@ -35,7 +35,6 @@
#define ADF_DH895XCC_ERRSSMSH_EN BIT(3)
#define ADF_DH895XCC_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
#define ADF_DH895XCC_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04))
/* AE to function mapping */
#define ADF_DH895XCC_AE2FUNC_MAP_GRP_A_NUM_REGS 96

View File

@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
ret = -EFAULT;
goto out_err_disable;
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
}
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
if (ret) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_DH895XCC_DEVICE_NAME)) {
@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_save_state(pdev)) {
dev_err(&pdev->dev, "Failed to save pci state\n");
ret = -ENOMEM;
goto out_err_free_reg;
goto out_err_disable_aer;
}
ret = qat_crypto_dev_config(accel_dev);
if (ret)
goto out_err_free_reg;
goto out_err_disable_aer;
ret = adf_dev_init(accel_dev);
if (ret)
@ -229,6 +222,8 @@ out_err_dev_stop:
adf_dev_stop(accel_dev);
out_err_dev_shutdown:
adf_dev_shutdown(accel_dev);
out_err_disable_aer:
adf_disable_aer(accel_dev);
out_err_free_reg:
pci_release_regions(accel_pci_dev->pci_dev);
out_err_disable:

View File

@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_DH895XCCIOV_PF2VF_OFFSET;
}
static u32 get_vintmsk_offset(u32 i)
{
return ADF_DH895XCCIOV_VINTMSK_OFFSET;
}
static int adf_vf_int_noop(struct adf_accel_dev *accel_dev)
{
return 0;
@ -81,10 +76,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data)
hw_data->enable_error_correction = adf_vf_void_noop;
hw_data->init_admin_comms = adf_vf_int_noop;
hw_data->exit_admin_comms = adf_vf_void_noop;
hw_data->send_admin_init = adf_vf2pf_init;
hw_data->send_admin_init = adf_vf2pf_notify_init;
hw_data->init_arb = adf_vf_int_noop;
hw_data->exit_arb = adf_vf_void_noop;
hw_data->disable_iov = adf_vf2pf_shutdown;
hw_data->disable_iov = adf_vf2pf_notify_shutdown;
hw_data->get_accel_mask = get_accel_mask;
hw_data->get_ae_mask = get_ae_mask;
hw_data->get_num_accels = get_num_accels;
@ -92,11 +87,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data)
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_sku = get_sku;
hw_data->enable_ints = adf_vf_void_noop;
hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms;
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
hw_data->dev_class->instances++;
adf_devmgr_update_class_index(hw_data);
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);

View File

@ -13,7 +13,6 @@
#define ADF_DH895XCCIOV_ETR_BAR 0
#define ADF_DH895XCCIOV_ETR_MAX_BANKS 1
#define ADF_DH895XCCIOV_PF2VF_OFFSET 0x200
#define ADF_DH895XCCIOV_VINTMSK_OFFSET 0x208
void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data);
void adf_clean_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data);

View File

@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
ret = -EFAULT;
goto out_err_disable;
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
}
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
if (ret) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_DH895XCCVF_DEVICE_NAME)) {
@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev)
pr_err("QAT: Driver removal failed\n");
return;
}
adf_flush_vf_wq(accel_dev);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
adf_cleanup_accel(accel_dev);

View File

@ -187,9 +187,9 @@ static int virtcrypto_init_vqs(struct virtio_crypto *vi)
if (ret)
goto err_free;
get_online_cpus();
cpus_read_lock();
virtcrypto_set_affinity(vi);
put_online_cpus();
cpus_read_unlock();
return 0;

View File

@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/arm-smccc.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <asm/archrandom.h>
static u32 smccc_version = ARM_SMCCC_VERSION_1_0;
@ -42,3 +43,19 @@ u32 arm_smccc_get_version(void)
return smccc_version;
}
EXPORT_SYMBOL_GPL(arm_smccc_get_version);
static int __init smccc_devices_init(void)
{
struct platform_device *pdev;
if (smccc_trng_available) {
pdev = platform_device_register_simple("smccc_trng", -1,
NULL, 0);
if (IS_ERR(pdev))
pr_err("smccc_trng: could not register device: %ld\n",
PTR_ERR(pdev));
}
return 0;
}
device_initcall(smccc_devices_init);

View File

@ -3,6 +3,7 @@
/*
* Common values for the SM4 algorithm
* Copyright (C) 2018 ARM Limited or its affiliates.
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#ifndef _CRYPTO_SM4_H
@ -15,17 +16,29 @@
#define SM4_BLOCK_SIZE 16
#define SM4_RKEY_WORDS 32
struct crypto_sm4_ctx {
struct sm4_ctx {
u32 rkey_enc[SM4_RKEY_WORDS];
u32 rkey_dec[SM4_RKEY_WORDS];
};
int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len);
int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
/**
* sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016
* @ctx: The location where the computed key will be stored.
* @in_key: The supplied key.
* @key_len: The length of the supplied key.
*
* Returns 0 on success. The function fails only if an invalid key size (or
* pointer) is supplied.
*/
int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key,
unsigned int key_len);
void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in);
void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in);
/**
* sm4_crypt_block - Encrypt or decrypt a single SM4 block
* @rk: The rkey_enc for encrypt or rkey_dec for decrypt
* @out: Buffer to store output data
* @in: Buffer containing the input data
*/
void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in);
#endif

View File

@ -12,6 +12,7 @@
#ifndef PADATA_H
#define PADATA_H
#include <linux/refcount.h>
#include <linux/compiler_types.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
@ -96,7 +97,7 @@ struct parallel_data {
struct padata_shell *ps;
struct padata_list __percpu *reorder_list;
struct padata_serial_queue __percpu *squeue;
atomic_t refcnt;
refcount_t refcnt;
unsigned int seq_nr;
unsigned int processed;
int cpu;

View File

@ -9,19 +9,6 @@
*
* Copyright (c) 2020 Oracle and/or its affiliates.
* Author: Daniel Jordan <daniel.m.jordan@oracle.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <linux/completion.h>
@ -211,7 +198,7 @@ int padata_do_parallel(struct padata_shell *ps,
if ((pinst->flags & PADATA_RESET))
goto out;
atomic_inc(&pd->refcnt);
refcount_inc(&pd->refcnt);
padata->pd = pd;
padata->cb_cpu = *cb_cpu;
@ -383,7 +370,7 @@ static void padata_serial_worker(struct work_struct *serial_work)
}
local_bh_enable();
if (atomic_sub_and_test(cnt, &pd->refcnt))
if (refcount_sub_and_test(cnt, &pd->refcnt))
padata_free_pd(pd);
}
@ -593,7 +580,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
padata_init_reorder_list(pd);
padata_init_squeues(pd);
pd->seq_nr = -1;
atomic_set(&pd->refcnt, 1);
refcount_set(&pd->refcnt, 1);
spin_lock_init(&pd->lock);
pd->cpu = cpumask_first(pd->cpumask.pcpu);
INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
@ -667,7 +654,7 @@ static int padata_replace(struct padata_instance *pinst)
synchronize_rcu();
list_for_each_entry_continue_reverse(ps, &pinst->pslist, list)
if (atomic_dec_and_test(&ps->opd->refcnt))
if (refcount_dec_and_test(&ps->opd->refcnt))
padata_free_pd(ps->opd);
pinst->flags &= ~PADATA_RESET;
@ -733,7 +720,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
struct cpumask *serial_mask, *parallel_mask;
int err = -EINVAL;
get_online_cpus();
cpus_read_lock();
mutex_lock(&pinst->lock);
switch (cpumask_type) {
@ -753,7 +740,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
out:
mutex_unlock(&pinst->lock);
put_online_cpus();
cpus_read_unlock();
return err;
}
@ -992,7 +979,7 @@ struct padata_instance *padata_alloc(const char *name)
if (!pinst->parallel_wq)
goto err_free_inst;
get_online_cpus();
cpus_read_lock();
pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM |
WQ_CPU_INTENSIVE, 1, name);
@ -1026,7 +1013,7 @@ struct padata_instance *padata_alloc(const char *name)
&pinst->cpu_dead_node);
#endif
put_online_cpus();
cpus_read_unlock();
return pinst;
@ -1036,7 +1023,7 @@ err_free_masks:
err_free_serial_wq:
destroy_workqueue(pinst->serial_wq);
err_put_cpus:
put_online_cpus();
cpus_read_unlock();
destroy_workqueue(pinst->parallel_wq);
err_free_inst:
kfree(pinst);
@ -1074,9 +1061,9 @@ struct padata_shell *padata_alloc_shell(struct padata_instance *pinst)
ps->pinst = pinst;
get_online_cpus();
cpus_read_lock();
pd = padata_alloc_pd(ps);
put_online_cpus();
cpus_read_unlock();
if (!pd)
goto out_free_ps;

View File

@ -128,3 +128,6 @@ config CRYPTO_LIB_CHACHA20POLY1305
config CRYPTO_LIB_SHA256
tristate
config CRYPTO_LIB_SM4
tristate

View File

@ -38,6 +38,9 @@ libpoly1305-y += poly1305.o
obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
libsha256-y := sha256.o
obj-$(CONFIG_CRYPTO_LIB_SM4) += libsm4.o
libsm4-y := sm4.o
ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y)
libblake2s-y += blake2s-selftest.o
libchacha20poly1305-y += chacha20poly1305-selftest.o

View File

@ -73,7 +73,7 @@ void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
}
EXPORT_SYMBOL(blake2s256_hmac);
static int __init mod_init(void)
static int __init blake2s_mod_init(void)
{
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
WARN_ON(!blake2s_selftest()))
@ -81,12 +81,12 @@ static int __init mod_init(void)
return 0;
}
static void __exit mod_exit(void)
static void __exit blake2s_mod_exit(void)
{
}
module_init(mod_init);
module_exit(mod_exit);
module_init(blake2s_mod_init);
module_exit(blake2s_mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("BLAKE2s hash function");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");

View File

@ -354,7 +354,7 @@ bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len
}
EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace);
static int __init mod_init(void)
static int __init chacha20poly1305_init(void)
{
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
WARN_ON(!chacha20poly1305_selftest()))
@ -362,12 +362,12 @@ static int __init mod_init(void)
return 0;
}
static void __exit mod_exit(void)
static void __exit chacha20poly1305_exit(void)
{
}
module_init(mod_init);
module_exit(mod_exit);
module_init(chacha20poly1305_init);
module_exit(chacha20poly1305_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");

View File

@ -13,7 +13,7 @@
#include <linux/module.h>
#include <linux/init.h>
static int __init mod_init(void)
static int __init curve25519_init(void)
{
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
WARN_ON(!curve25519_selftest()))
@ -21,12 +21,12 @@ static int __init mod_init(void)
return 0;
}
static void __exit mod_exit(void)
static void __exit curve25519_exit(void)
{
}
module_init(mod_init);
module_exit(mod_exit);
module_init(curve25519_init);
module_exit(curve25519_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Curve25519 scalar multiplication");

176
lib/crypto/sm4.c Normal file
View File

@ -0,0 +1,176 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4, as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (C) 2018 ARM Limited or its affiliates.
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/module.h>
#include <asm/unaligned.h>
#include <crypto/sm4.h>
static const u32 fk[4] = {
0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
};
static const u32 __cacheline_aligned ck[32] = {
0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
};
static const u8 __cacheline_aligned sbox[256] = {
0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
};
static inline u32 sm4_t_non_lin_sub(u32 x)
{
u32 out;
out = (u32)sbox[x & 0xff];
out |= (u32)sbox[(x >> 8) & 0xff] << 8;
out |= (u32)sbox[(x >> 16) & 0xff] << 16;
out |= (u32)sbox[(x >> 24) & 0xff] << 24;
return out;
}
static inline u32 sm4_key_lin_sub(u32 x)
{
return x ^ rol32(x, 13) ^ rol32(x, 23);
}
static inline u32 sm4_enc_lin_sub(u32 x)
{
return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
}
static inline u32 sm4_key_sub(u32 x)
{
return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
}
static inline u32 sm4_enc_sub(u32 x)
{
return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
}
static inline u32 sm4_round(u32 x0, u32 x1, u32 x2, u32 x3, u32 rk)
{
return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk);
}
/**
* sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016
* @ctx: The location where the computed key will be stored.
* @in_key: The supplied key.
* @key_len: The length of the supplied key.
*
* Returns 0 on success. The function fails only if an invalid key size (or
* pointer) is supplied.
*/
int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key,
unsigned int key_len)
{
u32 rk[4];
const u32 *key = (u32 *)in_key;
int i;
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
rk[0] = get_unaligned_be32(&key[0]) ^ fk[0];
rk[1] = get_unaligned_be32(&key[1]) ^ fk[1];
rk[2] = get_unaligned_be32(&key[2]) ^ fk[2];
rk[3] = get_unaligned_be32(&key[3]) ^ fk[3];
for (i = 0; i < 32; i += 4) {
rk[0] ^= sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]);
rk[1] ^= sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]);
rk[2] ^= sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]);
rk[3] ^= sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]);
ctx->rkey_enc[i + 0] = rk[0];
ctx->rkey_enc[i + 1] = rk[1];
ctx->rkey_enc[i + 2] = rk[2];
ctx->rkey_enc[i + 3] = rk[3];
ctx->rkey_dec[31 - 0 - i] = rk[0];
ctx->rkey_dec[31 - 1 - i] = rk[1];
ctx->rkey_dec[31 - 2 - i] = rk[2];
ctx->rkey_dec[31 - 3 - i] = rk[3];
}
return 0;
}
EXPORT_SYMBOL_GPL(sm4_expandkey);
/**
* sm4_crypt_block - Encrypt or decrypt a single SM4 block
* @rk: The rkey_enc for encrypt or rkey_dec for decrypt
* @out: Buffer to store output data
* @in: Buffer containing the input data
*/
void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in)
{
u32 x[4], i;
x[0] = get_unaligned_be32(in + 0 * 4);
x[1] = get_unaligned_be32(in + 1 * 4);
x[2] = get_unaligned_be32(in + 2 * 4);
x[3] = get_unaligned_be32(in + 3 * 4);
for (i = 0; i < 32; i += 4) {
x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]);
x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]);
x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]);
x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]);
}
put_unaligned_be32(x[3 - 0], out + 0 * 4);
put_unaligned_be32(x[3 - 1], out + 1 * 4);
put_unaligned_be32(x[3 - 2], out + 2 * 4);
put_unaligned_be32(x[3 - 3], out + 3 * 4);
}
EXPORT_SYMBOL_GPL(sm4_crypt_block);
MODULE_DESCRIPTION("Generic SM4 library");
MODULE_LICENSE("GPL v2");

View File

@ -148,7 +148,7 @@ int mpi_resize(MPI a, unsigned nlimbs)
return 0; /* no need to do it */
if (a->d) {
p = kmalloc_array(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
p = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
if (!p)
return -ENOMEM;
memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));