diff --git a/arch/arm/mach-omap2/clock2420_data.c b/arch/arm/mach-omap2/clock2420_data.c index 23bc981574f6..37d65d62ed8f 100644 --- a/arch/arm/mach-omap2/clock2420_data.c +++ b/arch/arm/mach-omap2/clock2420_data.c @@ -1836,7 +1836,7 @@ static struct omap_clk omap2420_clks[] = { CLK(NULL, "vlynq_ick", &vlynq_ick, CK_242X), CLK(NULL, "vlynq_fck", &vlynq_fck, CK_242X), CLK(NULL, "des_ick", &des_ick, CK_242X), - CLK(NULL, "sha_ick", &sha_ick, CK_242X), + CLK("omap-sham", "ick", &sha_ick, CK_242X), CLK("omap_rng", "ick", &rng_ick, CK_242X), CLK(NULL, "aes_ick", &aes_ick, CK_242X), CLK(NULL, "pka_ick", &pka_ick, CK_242X), diff --git a/arch/arm/mach-omap2/clock2430_data.c b/arch/arm/mach-omap2/clock2430_data.c index 2df50d97deb2..b33118fb6a87 100644 --- a/arch/arm/mach-omap2/clock2430_data.c +++ b/arch/arm/mach-omap2/clock2430_data.c @@ -1924,7 +1924,7 @@ static struct omap_clk omap2430_clks[] = { CLK(NULL, "sdma_ick", &sdma_ick, CK_243X), CLK(NULL, "sdrc_ick", &sdrc_ick, CK_243X), CLK(NULL, "des_ick", &des_ick, CK_243X), - CLK(NULL, "sha_ick", &sha_ick, CK_243X), + CLK("omap-sham", "ick", &sha_ick, CK_243X), CLK("omap_rng", "ick", &rng_ick, CK_243X), CLK(NULL, "aes_ick", &aes_ick, CK_243X), CLK(NULL, "pka_ick", &pka_ick, CK_243X), diff --git a/arch/arm/mach-omap2/clock3xxx_data.c b/arch/arm/mach-omap2/clock3xxx_data.c index 833be485c89e..41b155acfca7 100644 --- a/arch/arm/mach-omap2/clock3xxx_data.c +++ b/arch/arm/mach-omap2/clock3xxx_data.c @@ -3284,7 +3284,7 @@ static struct omap_clk omap3xxx_clks[] = { CLK("mmci-omap-hs.2", "ick", &mmchs3_ick, CK_3430ES2 | CK_AM35XX), CLK(NULL, "icr_ick", &icr_ick, CK_343X), CLK(NULL, "aes2_ick", &aes2_ick, CK_343X), - CLK(NULL, "sha12_ick", &sha12_ick, CK_343X), + CLK("omap-sham", "ick", &sha12_ick, CK_343X), CLK(NULL, "des2_ick", &des2_ick, CK_343X), CLK("mmci-omap-hs.1", "ick", &mmchs2_ick, CK_3XXX), CLK("mmci-omap-hs.0", "ick", &mmchs1_ick, CK_3XXX), diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c index 705a7a30a87f..03e6c9ed82a4 100644 --- a/arch/arm/mach-omap2/devices.c +++ b/arch/arm/mach-omap2/devices.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "mux.h" @@ -486,8 +487,10 @@ static void omap_init_pmu(void) } -#ifdef CONFIG_OMAP_SHA1_MD5 -static struct resource sha1_md5_resources[] = { +#if defined(CONFIG_CRYPTO_DEV_OMAP_SHAM) || defined(CONFIG_CRYPTO_DEV_OMAP_SHAM_MODULE) + +#ifdef CONFIG_ARCH_OMAP2 +static struct resource omap2_sham_resources[] = { { .start = OMAP24XX_SEC_SHA1MD5_BASE, .end = OMAP24XX_SEC_SHA1MD5_BASE + 0x64, @@ -498,20 +501,55 @@ static struct resource sha1_md5_resources[] = { .flags = IORESOURCE_IRQ, } }; +static int omap2_sham_resources_sz = ARRAY_SIZE(omap2_sham_resources); +#else +#define omap2_sham_resources NULL +#define omap2_sham_resources_sz 0 +#endif -static struct platform_device sha1_md5_device = { - .name = "OMAP SHA1/MD5", +#ifdef CONFIG_ARCH_OMAP3 +static struct resource omap3_sham_resources[] = { + { + .start = OMAP34XX_SEC_SHA1MD5_BASE, + .end = OMAP34XX_SEC_SHA1MD5_BASE + 0x64, + .flags = IORESOURCE_MEM, + }, + { + .start = INT_34XX_SHA1MD52_IRQ, + .flags = IORESOURCE_IRQ, + }, + { + .start = OMAP34XX_DMA_SHA1MD5_RX, + .flags = IORESOURCE_DMA, + } +}; +static int omap3_sham_resources_sz = ARRAY_SIZE(omap3_sham_resources); +#else +#define omap3_sham_resources NULL +#define omap3_sham_resources_sz 0 +#endif + +static struct platform_device sham_device = { + .name = "omap-sham", .id = -1, - .num_resources = ARRAY_SIZE(sha1_md5_resources), - .resource = sha1_md5_resources, }; -static void omap_init_sha1_md5(void) +static void omap_init_sham(void) { - platform_device_register(&sha1_md5_device); + if (cpu_is_omap24xx()) { + sham_device.resource = omap2_sham_resources; + sham_device.num_resources = omap2_sham_resources_sz; + } else if (cpu_is_omap34xx()) { + sham_device.resource = omap3_sham_resources; + sham_device.num_resources = omap3_sham_resources_sz; + } else { + pr_err("%s: platform not supported\n", __func__); + return; + } + platform_device_register(&sham_device); } #else -static inline void omap_init_sha1_md5(void) { } +static inline void omap_init_sham(void) { } #endif /*-------------------------------------------------------------------------*/ @@ -869,7 +907,7 @@ static int __init omap2_init_devices(void) omap_init_pmu(); omap_hdq_init(); omap_init_sti(); - omap_init_sha1_md5(); + omap_init_sham(); omap_init_vout(); return 0; diff --git a/arch/arm/plat-omap/include/plat/omap34xx.h b/arch/arm/plat-omap/include/plat/omap34xx.h index 2845fdc658b0..98fc8b4a4cc4 100644 --- a/arch/arm/plat-omap/include/plat/omap34xx.h +++ b/arch/arm/plat-omap/include/plat/omap34xx.h @@ -82,5 +82,10 @@ #define OMAP34XX_MAILBOX_BASE (L4_34XX_BASE + 0x94000) +/* Security */ +#define OMAP34XX_SEC_BASE (L4_34XX_BASE + 0xA0000) +#define OMAP34XX_SEC_SHA1MD5_BASE (OMAP34XX_SEC_BASE + 0x23000) +#define OMAP34XX_SEC_AES_BASE (OMAP34XX_SEC_BASE + 0x25000) + #endif /* __ASM_ARCH_OMAP3_H */ diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 20bb0e1ac681..ff16756a51c1 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -32,6 +32,9 @@ #define IN IN1 #define KEY %xmm2 #define IV %xmm3 +#define BSWAP_MASK %xmm10 +#define CTR %xmm11 +#define INC %xmm12 #define KEYP %rdi #define OUTP %rsi @@ -42,6 +45,7 @@ #define T1 %r10 #define TKEYP T1 #define T2 %r11 +#define TCTR_LOW T2 _key_expansion_128: _key_expansion_256a: @@ -724,3 +728,114 @@ ENTRY(aesni_cbc_dec) movups IV, (IVP) .Lcbc_dec_just_ret: ret + +.align 16 +.Lbswap_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* + * _aesni_inc_init: internal ABI + * setup registers used by _aesni_inc + * input: + * IV + * output: + * CTR: == IV, in little endian + * TCTR_LOW: == lower qword of CTR + * INC: == 1, in little endian + * BSWAP_MASK == endian swapping mask + */ +_aesni_inc_init: + movaps .Lbswap_mask, BSWAP_MASK + movaps IV, CTR + PSHUFB_XMM BSWAP_MASK CTR + mov $1, TCTR_LOW + MOVQ_R64_XMM TCTR_LOW INC + MOVQ_R64_XMM CTR TCTR_LOW + ret + +/* + * _aesni_inc: internal ABI + * Increase IV by 1, IV is in big endian + * input: + * IV + * CTR: == IV, in little endian + * TCTR_LOW: == lower qword of CTR + * INC: == 1, in little endian + * BSWAP_MASK == endian swapping mask + * output: + * IV: Increase by 1 + * changed: + * CTR: == output IV, in little endian + * TCTR_LOW: == lower qword of CTR + */ +_aesni_inc: + paddq INC, CTR + add $1, TCTR_LOW + jnc .Linc_low + pslldq $8, INC + paddq INC, CTR + psrldq $8, INC +.Linc_low: + movaps CTR, IV + PSHUFB_XMM BSWAP_MASK IV + ret + +/* + * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +ENTRY(aesni_ctr_enc) + cmp $16, LEN + jb .Lctr_enc_just_ret + mov 480(KEYP), KLEN + movups (IVP), IV + call _aesni_inc_init + cmp $64, LEN + jb .Lctr_enc_loop1 +.align 4 +.Lctr_enc_loop4: + movaps IV, STATE1 + call _aesni_inc + movups (INP), IN1 + movaps IV, STATE2 + call _aesni_inc + movups 0x10(INP), IN2 + movaps IV, STATE3 + call _aesni_inc + movups 0x20(INP), IN3 + movaps IV, STATE4 + call _aesni_inc + movups 0x30(INP), IN4 + call _aesni_enc4 + pxor IN1, STATE1 + movups STATE1, (OUTP) + pxor IN2, STATE2 + movups STATE2, 0x10(OUTP) + pxor IN3, STATE3 + movups STATE3, 0x20(OUTP) + pxor IN4, STATE4 + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lctr_enc_loop4 + cmp $16, LEN + jb .Lctr_enc_ret +.align 4 +.Lctr_enc_loop1: + movaps IV, STATE + call _aesni_inc + movups (INP), IN + call _aesni_enc1 + pxor IN, STATE + movups STATE, (OUTP) + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lctr_enc_loop1 +.Lctr_enc_ret: + movups IV, (IVP) +.Lctr_enc_just_ret: + ret diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 49c552c060e9..2cb3dcc4490a 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -58,6 +59,8 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) { @@ -321,6 +324,72 @@ static struct crypto_alg blk_cbc_alg = { }, }; +static void ctr_crypt_final(struct crypto_aes_ctx *ctx, + struct blkcipher_walk *walk) +{ + u8 *ctrblk = walk->iv; + u8 keystream[AES_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + aesni_enc(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + crypto_inc(ctrblk, AES_BLOCK_SIZE); +} + +static int ctr_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + if (walk.nbytes) { + ctr_crypt_final(ctx, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_fpu_end(); + + return err; +} + +static struct crypto_alg blk_ctr_alg = { + .cra_name = "__ctr-aes-aesni", + .cra_driver_name = "__driver-ctr-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aes_set_key, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}; + static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, unsigned int key_len) { @@ -467,13 +536,11 @@ static struct crypto_alg ablk_cbc_alg = { }, }; -#ifdef HAS_CTR static int ablk_ctr_init(struct crypto_tfm *tfm) { struct cryptd_ablkcipher *cryptd_tfm; - cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))", - 0, 0); + cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0); if (IS_ERR(cryptd_tfm)) return PTR_ERR(cryptd_tfm); ablk_init_common(tfm, cryptd_tfm); @@ -500,11 +567,50 @@ static struct crypto_alg ablk_ctr_alg = { .ivsize = AES_BLOCK_SIZE, .setkey = ablk_set_key, .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, + .decrypt = ablk_encrypt, .geniv = "chainiv", }, }, }; + +#ifdef HAS_CTR +static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher( + "rfc3686(__driver-ctr-aes-aesni)", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_rfc3686_ctr_alg = { + .cra_name = "rfc3686(ctr(aes))", + .cra_driver_name = "rfc3686-ctr-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list), + .cra_init = ablk_rfc3686_ctr_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE, + .max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE, + .ivsize = CTR_RFC3686_IV_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + .geniv = "seqiv", + }, + }, +}; #endif #ifdef HAS_LRW @@ -640,13 +746,17 @@ static int __init aesni_init(void) goto blk_ecb_err; if ((err = crypto_register_alg(&blk_cbc_alg))) goto blk_cbc_err; + if ((err = crypto_register_alg(&blk_ctr_alg))) + goto blk_ctr_err; if ((err = crypto_register_alg(&ablk_ecb_alg))) goto ablk_ecb_err; if ((err = crypto_register_alg(&ablk_cbc_alg))) goto ablk_cbc_err; -#ifdef HAS_CTR if ((err = crypto_register_alg(&ablk_ctr_alg))) goto ablk_ctr_err; +#ifdef HAS_CTR + if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) + goto ablk_rfc3686_ctr_err; #endif #ifdef HAS_LRW if ((err = crypto_register_alg(&ablk_lrw_alg))) @@ -675,13 +785,17 @@ ablk_pcbc_err: ablk_lrw_err: #endif #ifdef HAS_CTR + crypto_unregister_alg(&ablk_rfc3686_ctr_alg); +ablk_rfc3686_ctr_err: +#endif crypto_unregister_alg(&ablk_ctr_alg); ablk_ctr_err: -#endif crypto_unregister_alg(&ablk_cbc_alg); ablk_cbc_err: crypto_unregister_alg(&ablk_ecb_alg); ablk_ecb_err: + crypto_unregister_alg(&blk_ctr_alg); +blk_ctr_err: crypto_unregister_alg(&blk_cbc_alg); blk_cbc_err: crypto_unregister_alg(&blk_ecb_alg); @@ -705,10 +819,12 @@ static void __exit aesni_exit(void) crypto_unregister_alg(&ablk_lrw_alg); #endif #ifdef HAS_CTR - crypto_unregister_alg(&ablk_ctr_alg); + crypto_unregister_alg(&ablk_rfc3686_ctr_alg); #endif + crypto_unregister_alg(&ablk_ctr_alg); crypto_unregister_alg(&ablk_cbc_alg); crypto_unregister_alg(&ablk_ecb_alg); + crypto_unregister_alg(&blk_ctr_alg); crypto_unregister_alg(&blk_cbc_alg); crypto_unregister_alg(&blk_ecb_alg); crypto_unregister_alg(&__aesni_alg); diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h index 14cf526091f9..280bf7fb6aba 100644 --- a/arch/x86/include/asm/inst.h +++ b/arch/x86/include/asm/inst.h @@ -7,7 +7,66 @@ #ifdef __ASSEMBLY__ +#define REG_NUM_INVALID 100 + +#define REG_TYPE_R64 0 +#define REG_TYPE_XMM 1 +#define REG_TYPE_INVALID 100 + + .macro R64_NUM opd r64 + \opd = REG_NUM_INVALID + .ifc \r64,%rax + \opd = 0 + .endif + .ifc \r64,%rcx + \opd = 1 + .endif + .ifc \r64,%rdx + \opd = 2 + .endif + .ifc \r64,%rbx + \opd = 3 + .endif + .ifc \r64,%rsp + \opd = 4 + .endif + .ifc \r64,%rbp + \opd = 5 + .endif + .ifc \r64,%rsi + \opd = 6 + .endif + .ifc \r64,%rdi + \opd = 7 + .endif + .ifc \r64,%r8 + \opd = 8 + .endif + .ifc \r64,%r9 + \opd = 9 + .endif + .ifc \r64,%r10 + \opd = 10 + .endif + .ifc \r64,%r11 + \opd = 11 + .endif + .ifc \r64,%r12 + \opd = 12 + .endif + .ifc \r64,%r13 + \opd = 13 + .endif + .ifc \r64,%r14 + \opd = 14 + .endif + .ifc \r64,%r15 + \opd = 15 + .endif + .endm + .macro XMM_NUM opd xmm + \opd = REG_NUM_INVALID .ifc \xmm,%xmm0 \opd = 0 .endif @@ -58,13 +117,25 @@ .endif .endm + .macro REG_TYPE type reg + R64_NUM reg_type_r64 \reg + XMM_NUM reg_type_xmm \reg + .if reg_type_r64 <> REG_NUM_INVALID + \type = REG_TYPE_R64 + .elseif reg_type_xmm <> REG_NUM_INVALID + \type = REG_TYPE_XMM + .else + \type = REG_TYPE_INVALID + .endif + .endm + .macro PFX_OPD_SIZE .byte 0x66 .endm - .macro PFX_REX opd1 opd2 - .if (\opd1 | \opd2) & 8 - .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) + .macro PFX_REX opd1 opd2 W=0 + .if ((\opd1 | \opd2) & 8) || \W + .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3) .endif .endm @@ -145,6 +216,25 @@ .byte 0x0f, 0x38, 0xdf MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2 .endm + + .macro MOVQ_R64_XMM opd1 opd2 + REG_TYPE movq_r64_xmm_opd1_type \opd1 + .if movq_r64_xmm_opd1_type == REG_TYPE_XMM + XMM_NUM movq_r64_xmm_opd1 \opd1 + R64_NUM movq_r64_xmm_opd2 \opd2 + .else + R64_NUM movq_r64_xmm_opd1 \opd1 + XMM_NUM movq_r64_xmm_opd2 \opd2 + .endif + PFX_OPD_SIZE + PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1 + .if movq_r64_xmm_opd1_type == REG_TYPE_XMM + .byte 0x0f, 0x7e + .else + .byte 0x0f, 0x6e + .endif + MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2 + .endm #endif #endif diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index fe980dae1727..98a66103f4f2 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -24,10 +24,287 @@ #include #include +#include + #include "internal.h" static const char *skcipher_default_geniv __read_mostly; +struct ablkcipher_buffer { + struct list_head entry; + struct scatter_walk dst; + unsigned int len; + void *data; +}; + +enum { + ABLKCIPHER_WALK_SLOW = 1 << 0, +}; + +static inline void ablkcipher_buffer_write(struct ablkcipher_buffer *p) +{ + scatterwalk_copychunks(p->data, &p->dst, p->len, 1); +} + +void __ablkcipher_walk_complete(struct ablkcipher_walk *walk) +{ + struct ablkcipher_buffer *p, *tmp; + + list_for_each_entry_safe(p, tmp, &walk->buffers, entry) { + ablkcipher_buffer_write(p); + list_del(&p->entry); + kfree(p); + } +} +EXPORT_SYMBOL_GPL(__ablkcipher_walk_complete); + +static inline void ablkcipher_queue_write(struct ablkcipher_walk *walk, + struct ablkcipher_buffer *p) +{ + p->dst = walk->out; + list_add_tail(&p->entry, &walk->buffers); +} + +/* Get a spot of the specified length that does not straddle a page. + * The caller needs to ensure that there is enough space for this operation. + */ +static inline u8 *ablkcipher_get_spot(u8 *start, unsigned int len) +{ + u8 *end_page = (u8 *)(((unsigned long)(start + len - 1)) & PAGE_MASK); + return max(start, end_page); +} + +static inline unsigned int ablkcipher_done_slow(struct ablkcipher_walk *walk, + unsigned int bsize) +{ + unsigned int n = bsize; + + for (;;) { + unsigned int len_this_page = scatterwalk_pagelen(&walk->out); + + if (len_this_page > n) + len_this_page = n; + scatterwalk_advance(&walk->out, n); + if (n == len_this_page) + break; + n -= len_this_page; + scatterwalk_start(&walk->out, scatterwalk_sg_next(walk->out.sg)); + } + + return bsize; +} + +static inline unsigned int ablkcipher_done_fast(struct ablkcipher_walk *walk, + unsigned int n) +{ + scatterwalk_advance(&walk->in, n); + scatterwalk_advance(&walk->out, n); + + return n; +} + +static int ablkcipher_walk_next(struct ablkcipher_request *req, + struct ablkcipher_walk *walk); + +int ablkcipher_walk_done(struct ablkcipher_request *req, + struct ablkcipher_walk *walk, int err) +{ + struct crypto_tfm *tfm = req->base.tfm; + unsigned int nbytes = 0; + + if (likely(err >= 0)) { + unsigned int n = walk->nbytes - err; + + if (likely(!(walk->flags & ABLKCIPHER_WALK_SLOW))) + n = ablkcipher_done_fast(walk, n); + else if (WARN_ON(err)) { + err = -EINVAL; + goto err; + } else + n = ablkcipher_done_slow(walk, n); + + nbytes = walk->total - n; + err = 0; + } + + scatterwalk_done(&walk->in, 0, nbytes); + scatterwalk_done(&walk->out, 1, nbytes); + +err: + walk->total = nbytes; + walk->nbytes = nbytes; + + if (nbytes) { + crypto_yield(req->base.flags); + return ablkcipher_walk_next(req, walk); + } + + if (walk->iv != req->info) + memcpy(req->info, walk->iv, tfm->crt_ablkcipher.ivsize); + if (walk->iv_buffer) + kfree(walk->iv_buffer); + + return err; +} +EXPORT_SYMBOL_GPL(ablkcipher_walk_done); + +static inline int ablkcipher_next_slow(struct ablkcipher_request *req, + struct ablkcipher_walk *walk, + unsigned int bsize, + unsigned int alignmask, + void **src_p, void **dst_p) +{ + unsigned aligned_bsize = ALIGN(bsize, alignmask + 1); + struct ablkcipher_buffer *p; + void *src, *dst, *base; + unsigned int n; + + n = ALIGN(sizeof(struct ablkcipher_buffer), alignmask + 1); + n += (aligned_bsize * 3 - (alignmask + 1) + + (alignmask & ~(crypto_tfm_ctx_alignment() - 1))); + + p = kmalloc(n, GFP_ATOMIC); + if (!p) + ablkcipher_walk_done(req, walk, -ENOMEM); + + base = p + 1; + + dst = (u8 *)ALIGN((unsigned long)base, alignmask + 1); + src = dst = ablkcipher_get_spot(dst, bsize); + + p->len = bsize; + p->data = dst; + + scatterwalk_copychunks(src, &walk->in, bsize, 0); + + ablkcipher_queue_write(walk, p); + + walk->nbytes = bsize; + walk->flags |= ABLKCIPHER_WALK_SLOW; + + *src_p = src; + *dst_p = dst; + + return 0; +} + +static inline int ablkcipher_copy_iv(struct ablkcipher_walk *walk, + struct crypto_tfm *tfm, + unsigned int alignmask) +{ + unsigned bs = walk->blocksize; + unsigned int ivsize = tfm->crt_ablkcipher.ivsize; + unsigned aligned_bs = ALIGN(bs, alignmask + 1); + unsigned int size = aligned_bs * 2 + ivsize + max(aligned_bs, ivsize) - + (alignmask + 1); + u8 *iv; + + size += alignmask & ~(crypto_tfm_ctx_alignment() - 1); + walk->iv_buffer = kmalloc(size, GFP_ATOMIC); + if (!walk->iv_buffer) + return -ENOMEM; + + iv = (u8 *)ALIGN((unsigned long)walk->iv_buffer, alignmask + 1); + iv = ablkcipher_get_spot(iv, bs) + aligned_bs; + iv = ablkcipher_get_spot(iv, bs) + aligned_bs; + iv = ablkcipher_get_spot(iv, ivsize); + + walk->iv = memcpy(iv, walk->iv, ivsize); + return 0; +} + +static inline int ablkcipher_next_fast(struct ablkcipher_request *req, + struct ablkcipher_walk *walk) +{ + walk->src.page = scatterwalk_page(&walk->in); + walk->src.offset = offset_in_page(walk->in.offset); + walk->dst.page = scatterwalk_page(&walk->out); + walk->dst.offset = offset_in_page(walk->out.offset); + + return 0; +} + +static int ablkcipher_walk_next(struct ablkcipher_request *req, + struct ablkcipher_walk *walk) +{ + struct crypto_tfm *tfm = req->base.tfm; + unsigned int alignmask, bsize, n; + void *src, *dst; + int err; + + alignmask = crypto_tfm_alg_alignmask(tfm); + n = walk->total; + if (unlikely(n < crypto_tfm_alg_blocksize(tfm))) { + req->base.flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; + return ablkcipher_walk_done(req, walk, -EINVAL); + } + + walk->flags &= ~ABLKCIPHER_WALK_SLOW; + src = dst = NULL; + + bsize = min(walk->blocksize, n); + n = scatterwalk_clamp(&walk->in, n); + n = scatterwalk_clamp(&walk->out, n); + + if (n < bsize || + !scatterwalk_aligned(&walk->in, alignmask) || + !scatterwalk_aligned(&walk->out, alignmask)) { + err = ablkcipher_next_slow(req, walk, bsize, alignmask, + &src, &dst); + goto set_phys_lowmem; + } + + walk->nbytes = n; + + return ablkcipher_next_fast(req, walk); + +set_phys_lowmem: + if (err >= 0) { + walk->src.page = virt_to_page(src); + walk->dst.page = virt_to_page(dst); + walk->src.offset = ((unsigned long)src & (PAGE_SIZE - 1)); + walk->dst.offset = ((unsigned long)dst & (PAGE_SIZE - 1)); + } + + return err; +} + +static int ablkcipher_walk_first(struct ablkcipher_request *req, + struct ablkcipher_walk *walk) +{ + struct crypto_tfm *tfm = req->base.tfm; + unsigned int alignmask; + + alignmask = crypto_tfm_alg_alignmask(tfm); + if (WARN_ON_ONCE(in_irq())) + return -EDEADLK; + + walk->nbytes = walk->total; + if (unlikely(!walk->total)) + return 0; + + walk->iv_buffer = NULL; + walk->iv = req->info; + if (unlikely(((unsigned long)walk->iv & alignmask))) { + int err = ablkcipher_copy_iv(walk, tfm, alignmask); + if (err) + return err; + } + + scatterwalk_start(&walk->in, walk->in.sg); + scatterwalk_start(&walk->out, walk->out.sg); + + return ablkcipher_walk_next(req, walk); +} + +int ablkcipher_walk_phys(struct ablkcipher_request *req, + struct ablkcipher_walk *walk) +{ + walk->blocksize = crypto_tfm_alg_blocksize(req->base.tfm); + return ablkcipher_walk_first(req, walk); +} +EXPORT_SYMBOL_GPL(ablkcipher_walk_phys); + static int setkey_unaligned(struct crypto_ablkcipher *tfm, const u8 *key, unsigned int keylen) { diff --git a/crypto/algapi.c b/crypto/algapi.c index 76fae27ed01c..c3cf1a69a47a 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -544,7 +544,7 @@ int crypto_init_spawn2(struct crypto_spawn *spawn, struct crypto_alg *alg, { int err = -EINVAL; - if (frontend && (alg->cra_flags ^ frontend->type) & frontend->maskset) + if ((alg->cra_flags ^ frontend->type) & frontend->maskset) goto out; spawn->frontend = frontend; diff --git a/crypto/authenc.c b/crypto/authenc.c index 05eb32e0d949..b9884ee0adb6 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -181,6 +181,7 @@ static void authenc_verify_ahash_update_done(struct crypto_async_request *areq, struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct ahash_request *ahreq = (void *)(areq_ctx->tail + ctx->reqoff); + unsigned int cryptlen = req->cryptlen; if (err) goto out; @@ -196,6 +197,7 @@ static void authenc_verify_ahash_update_done(struct crypto_async_request *areq, goto out; authsize = crypto_aead_authsize(authenc); + cryptlen -= authsize; ihash = ahreq->result + authsize; scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen, authsize, 0); @@ -209,7 +211,7 @@ static void authenc_verify_ahash_update_done(struct crypto_async_request *areq, ablkcipher_request_set_callback(abreq, aead_request_flags(req), req->base.complete, req->base.data); ablkcipher_request_set_crypt(abreq, req->src, req->dst, - req->cryptlen, req->iv); + cryptlen, req->iv); err = crypto_ablkcipher_decrypt(abreq); @@ -228,11 +230,13 @@ static void authenc_verify_ahash_done(struct crypto_async_request *areq, struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); struct authenc_request_ctx *areq_ctx = aead_request_ctx(req); struct ahash_request *ahreq = (void *)(areq_ctx->tail + ctx->reqoff); + unsigned int cryptlen = req->cryptlen; if (err) goto out; authsize = crypto_aead_authsize(authenc); + cryptlen -= authsize; ihash = ahreq->result + authsize; scatterwalk_map_and_copy(ihash, areq_ctx->sg, areq_ctx->cryptlen, authsize, 0); @@ -246,7 +250,7 @@ static void authenc_verify_ahash_done(struct crypto_async_request *areq, ablkcipher_request_set_callback(abreq, aead_request_flags(req), req->base.complete, req->base.data); ablkcipher_request_set_crypt(abreq, req->src, req->dst, - req->cryptlen, req->iv); + cryptlen, req->iv); err = crypto_ablkcipher_decrypt(abreq); diff --git a/crypto/internal.h b/crypto/internal.h index 2d226362e594..d4384b08ab29 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -6,7 +6,7 @@ * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) + * Software Foundation; either version 2 of the License, or (at your option) * any later version. * */ diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index 80201241b698..247178cb98ec 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -315,16 +315,13 @@ out_free_inst: goto out; } -static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb) +static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb, + u32 type, u32 mask) { struct crypto_instance *inst; struct crypto_alg *alg; - struct crypto_attr_type *algt; - algt = crypto_get_attr_type(tb); - - alg = crypto_get_attr_alg(tb, algt->type, - (algt->mask & CRYPTO_ALG_TYPE_MASK)); + alg = crypto_get_attr_alg(tb, type, (mask & CRYPTO_ALG_TYPE_MASK)); if (IS_ERR(alg)) return ERR_CAST(alg); @@ -365,7 +362,7 @@ static struct crypto_instance *pcrypt_alloc(struct rtattr **tb) switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_AEAD: - return pcrypt_alloc_aead(tb); + return pcrypt_alloc_aead(tb, algt->type, algt->mask); } return ERR_PTR(-EINVAL); diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index 3de89a424401..41e529af0773 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -68,7 +68,7 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out, void scatterwalk_done(struct scatter_walk *walk, int out, int more) { - if (!offset_in_page(walk->offset) || !more) + if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more) scatterwalk_pagedone(walk, out, more); } EXPORT_SYMBOL_GPL(scatterwalk_done); diff --git a/crypto/shash.c b/crypto/shash.c index 91f7b9d83881..22fd9433141f 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -37,7 +37,7 @@ static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, u8 *buffer, *alignbuffer; int err; - absize = keylen + (alignmask & ~(CRYPTO_MINALIGN - 1)); + absize = keylen + (alignmask & ~(crypto_tfm_ctx_alignment() - 1)); buffer = kmalloc(absize, GFP_KERNEL); if (!buffer) return -ENOMEM; diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index a35159947a26..3ca68f9fc14d 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -394,6 +394,17 @@ out: return 0; } +static void test_hash_sg_init(struct scatterlist *sg) +{ + int i; + + sg_init_table(sg, TVMEMSIZE); + for (i = 0; i < TVMEMSIZE; i++) { + sg_set_buf(sg + i, tvmem[i], PAGE_SIZE); + memset(tvmem[i], 0xff, PAGE_SIZE); + } +} + static void test_hash_speed(const char *algo, unsigned int sec, struct hash_speed *speed) { @@ -423,12 +434,7 @@ static void test_hash_speed(const char *algo, unsigned int sec, goto out; } - sg_init_table(sg, TVMEMSIZE); - for (i = 0; i < TVMEMSIZE; i++) { - sg_set_buf(sg + i, tvmem[i], PAGE_SIZE); - memset(tvmem[i], 0xff, PAGE_SIZE); - } - + test_hash_sg_init(sg); for (i = 0; speed[i].blen != 0; i++) { if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) { printk(KERN_ERR @@ -437,6 +443,9 @@ static void test_hash_speed(const char *algo, unsigned int sec, goto out; } + if (speed[i].klen) + crypto_hash_setkey(tfm, tvmem[0], speed[i].klen); + printk(KERN_INFO "test%3u " "(%5u byte blocks,%5u bytes per update,%4u updates): ", i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen); @@ -458,6 +467,250 @@ out: crypto_free_hash(tfm); } +struct tcrypt_result { + struct completion completion; + int err; +}; + +static void tcrypt_complete(struct crypto_async_request *req, int err) +{ + struct tcrypt_result *res = req->data; + + if (err == -EINPROGRESS) + return; + + res->err = err; + complete(&res->completion); +} + +static inline int do_one_ahash_op(struct ahash_request *req, int ret) +{ + if (ret == -EINPROGRESS || ret == -EBUSY) { + struct tcrypt_result *tr = req->base.data; + + ret = wait_for_completion_interruptible(&tr->completion); + if (!ret) + ret = tr->err; + INIT_COMPLETION(tr->completion); + } + return ret; +} + +static int test_ahash_jiffies_digest(struct ahash_request *req, int blen, + char *out, int sec) +{ + unsigned long start, end; + int bcount; + int ret; + + for (start = jiffies, end = start + sec * HZ, bcount = 0; + time_before(jiffies, end); bcount++) { + ret = do_one_ahash_op(req, crypto_ahash_digest(req)); + if (ret) + return ret; + } + + printk("%6u opers/sec, %9lu bytes/sec\n", + bcount / sec, ((long)bcount * blen) / sec); + + return 0; +} + +static int test_ahash_jiffies(struct ahash_request *req, int blen, + int plen, char *out, int sec) +{ + unsigned long start, end; + int bcount, pcount; + int ret; + + if (plen == blen) + return test_ahash_jiffies_digest(req, blen, out, sec); + + for (start = jiffies, end = start + sec * HZ, bcount = 0; + time_before(jiffies, end); bcount++) { + ret = crypto_ahash_init(req); + if (ret) + return ret; + for (pcount = 0; pcount < blen; pcount += plen) { + ret = do_one_ahash_op(req, crypto_ahash_update(req)); + if (ret) + return ret; + } + /* we assume there is enough space in 'out' for the result */ + ret = do_one_ahash_op(req, crypto_ahash_final(req)); + if (ret) + return ret; + } + + pr_cont("%6u opers/sec, %9lu bytes/sec\n", + bcount / sec, ((long)bcount * blen) / sec); + + return 0; +} + +static int test_ahash_cycles_digest(struct ahash_request *req, int blen, + char *out) +{ + unsigned long cycles = 0; + int ret, i; + + /* Warm-up run. */ + for (i = 0; i < 4; i++) { + ret = do_one_ahash_op(req, crypto_ahash_digest(req)); + if (ret) + goto out; + } + + /* The real thing. */ + for (i = 0; i < 8; i++) { + cycles_t start, end; + + start = get_cycles(); + + ret = do_one_ahash_op(req, crypto_ahash_digest(req)); + if (ret) + goto out; + + end = get_cycles(); + + cycles += end - start; + } + +out: + if (ret) + return ret; + + pr_cont("%6lu cycles/operation, %4lu cycles/byte\n", + cycles / 8, cycles / (8 * blen)); + + return 0; +} + +static int test_ahash_cycles(struct ahash_request *req, int blen, + int plen, char *out) +{ + unsigned long cycles = 0; + int i, pcount, ret; + + if (plen == blen) + return test_ahash_cycles_digest(req, blen, out); + + /* Warm-up run. */ + for (i = 0; i < 4; i++) { + ret = crypto_ahash_init(req); + if (ret) + goto out; + for (pcount = 0; pcount < blen; pcount += plen) { + ret = do_one_ahash_op(req, crypto_ahash_update(req)); + if (ret) + goto out; + } + ret = do_one_ahash_op(req, crypto_ahash_final(req)); + if (ret) + goto out; + } + + /* The real thing. */ + for (i = 0; i < 8; i++) { + cycles_t start, end; + + start = get_cycles(); + + ret = crypto_ahash_init(req); + if (ret) + goto out; + for (pcount = 0; pcount < blen; pcount += plen) { + ret = do_one_ahash_op(req, crypto_ahash_update(req)); + if (ret) + goto out; + } + ret = do_one_ahash_op(req, crypto_ahash_final(req)); + if (ret) + goto out; + + end = get_cycles(); + + cycles += end - start; + } + +out: + if (ret) + return ret; + + pr_cont("%6lu cycles/operation, %4lu cycles/byte\n", + cycles / 8, cycles / (8 * blen)); + + return 0; +} + +static void test_ahash_speed(const char *algo, unsigned int sec, + struct hash_speed *speed) +{ + struct scatterlist sg[TVMEMSIZE]; + struct tcrypt_result tresult; + struct ahash_request *req; + struct crypto_ahash *tfm; + static char output[1024]; + int i, ret; + + printk(KERN_INFO "\ntesting speed of async %s\n", algo); + + tfm = crypto_alloc_ahash(algo, 0, 0); + if (IS_ERR(tfm)) { + pr_err("failed to load transform for %s: %ld\n", + algo, PTR_ERR(tfm)); + return; + } + + if (crypto_ahash_digestsize(tfm) > sizeof(output)) { + pr_err("digestsize(%u) > outputbuffer(%zu)\n", + crypto_ahash_digestsize(tfm), sizeof(output)); + goto out; + } + + test_hash_sg_init(sg); + req = ahash_request_alloc(tfm, GFP_KERNEL); + if (!req) { + pr_err("ahash request allocation failure\n"); + goto out; + } + + init_completion(&tresult.completion); + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tcrypt_complete, &tresult); + + for (i = 0; speed[i].blen != 0; i++) { + if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) { + pr_err("template (%u) too big for tvmem (%lu)\n", + speed[i].blen, TVMEMSIZE * PAGE_SIZE); + break; + } + + pr_info("test%3u " + "(%5u byte blocks,%5u bytes per update,%4u updates): ", + i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen); + + ahash_request_set_crypt(req, sg, output, speed[i].plen); + + if (sec) + ret = test_ahash_jiffies(req, speed[i].blen, + speed[i].plen, output, sec); + else + ret = test_ahash_cycles(req, speed[i].blen, + speed[i].plen, output); + + if (ret) { + pr_err("hashing failed ret=%d\n", ret); + break; + } + } + + ahash_request_free(req); + +out: + crypto_free_ahash(tfm); +} + static void test_available(void) { char **name = check; @@ -881,9 +1134,87 @@ static int do_test(int m) test_hash_speed("rmd320", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; + case 318: + test_hash_speed("ghash-generic", sec, hash_speed_template_16); + if (mode > 300 && mode < 400) break; + case 399: break; + case 400: + /* fall through */ + + case 401: + test_ahash_speed("md4", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 402: + test_ahash_speed("md5", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 403: + test_ahash_speed("sha1", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 404: + test_ahash_speed("sha256", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 405: + test_ahash_speed("sha384", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 406: + test_ahash_speed("sha512", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 407: + test_ahash_speed("wp256", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 408: + test_ahash_speed("wp384", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 409: + test_ahash_speed("wp512", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 410: + test_ahash_speed("tgr128", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 411: + test_ahash_speed("tgr160", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 412: + test_ahash_speed("tgr192", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 413: + test_ahash_speed("sha224", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 414: + test_ahash_speed("rmd128", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 415: + test_ahash_speed("rmd160", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 416: + test_ahash_speed("rmd256", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 417: + test_ahash_speed("rmd320", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + + case 499: + break; + case 1000: test_available(); break; diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h index 966bbfaf95b1..10cb925132c9 100644 --- a/crypto/tcrypt.h +++ b/crypto/tcrypt.h @@ -25,6 +25,7 @@ struct cipher_speed_template { struct hash_speed { unsigned int blen; /* buffer length */ unsigned int plen; /* per-update length */ + unsigned int klen; /* key length */ }; /* @@ -83,4 +84,32 @@ static struct hash_speed generic_hash_speed_template[] = { { .blen = 0, .plen = 0, } }; +static struct hash_speed hash_speed_template_16[] = { + { .blen = 16, .plen = 16, .klen = 16, }, + { .blen = 64, .plen = 16, .klen = 16, }, + { .blen = 64, .plen = 64, .klen = 16, }, + { .blen = 256, .plen = 16, .klen = 16, }, + { .blen = 256, .plen = 64, .klen = 16, }, + { .blen = 256, .plen = 256, .klen = 16, }, + { .blen = 1024, .plen = 16, .klen = 16, }, + { .blen = 1024, .plen = 256, .klen = 16, }, + { .blen = 1024, .plen = 1024, .klen = 16, }, + { .blen = 2048, .plen = 16, .klen = 16, }, + { .blen = 2048, .plen = 256, .klen = 16, }, + { .blen = 2048, .plen = 1024, .klen = 16, }, + { .blen = 2048, .plen = 2048, .klen = 16, }, + { .blen = 4096, .plen = 16, .klen = 16, }, + { .blen = 4096, .plen = 256, .klen = 16, }, + { .blen = 4096, .plen = 1024, .klen = 16, }, + { .blen = 4096, .plen = 4096, .klen = 16, }, + { .blen = 8192, .plen = 16, .klen = 16, }, + { .blen = 8192, .plen = 256, .klen = 16, }, + { .blen = 8192, .plen = 1024, .klen = 16, }, + { .blen = 8192, .plen = 4096, .klen = 16, }, + { .blen = 8192, .plen = 8192, .klen = 16, }, + + /* End marker */ + { .blen = 0, .plen = 0, .klen = 0, } +}; + #endif /* _CRYPTO_TCRYPT_H */ diff --git a/crypto/testmgr.c b/crypto/testmgr.c index c494d7610be1..5c8aaa0cb0b9 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -153,8 +153,21 @@ static void testmgr_free_buf(char *buf[XBUFSIZE]) free_page((unsigned long)buf[i]); } +static int do_one_async_hash_op(struct ahash_request *req, + struct tcrypt_result *tr, + int ret) +{ + if (ret == -EINPROGRESS || ret == -EBUSY) { + ret = wait_for_completion_interruptible(&tr->completion); + if (!ret) + ret = tr->err; + INIT_COMPLETION(tr->completion); + } + return ret; +} + static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, - unsigned int tcount) + unsigned int tcount, bool use_digest) { const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm)); unsigned int i, j, k, temp; @@ -206,23 +219,36 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, } ahash_request_set_crypt(req, sg, result, template[i].psize); - ret = crypto_ahash_digest(req); - switch (ret) { - case 0: - break; - case -EINPROGRESS: - case -EBUSY: - ret = wait_for_completion_interruptible( - &tresult.completion); - if (!ret && !(ret = tresult.err)) { - INIT_COMPLETION(tresult.completion); - break; + if (use_digest) { + ret = do_one_async_hash_op(req, &tresult, + crypto_ahash_digest(req)); + if (ret) { + pr_err("alg: hash: digest failed on test %d " + "for %s: ret=%d\n", j, algo, -ret); + goto out; + } + } else { + ret = do_one_async_hash_op(req, &tresult, + crypto_ahash_init(req)); + if (ret) { + pr_err("alt: hash: init failed on test %d " + "for %s: ret=%d\n", j, algo, -ret); + goto out; + } + ret = do_one_async_hash_op(req, &tresult, + crypto_ahash_update(req)); + if (ret) { + pr_err("alt: hash: update failed on test %d " + "for %s: ret=%d\n", j, algo, -ret); + goto out; + } + ret = do_one_async_hash_op(req, &tresult, + crypto_ahash_final(req)); + if (ret) { + pr_err("alt: hash: final failed on test %d " + "for %s: ret=%d\n", j, algo, -ret); + goto out; } - /* fall through */ - default: - printk(KERN_ERR "alg: hash: digest failed on test %d " - "for %s: ret=%d\n", j, algo, -ret); - goto out; } if (memcmp(result, template[i].digest, @@ -1402,7 +1428,11 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver, return PTR_ERR(tfm); } - err = test_hash(tfm, desc->suite.hash.vecs, desc->suite.hash.count); + err = test_hash(tfm, desc->suite.hash.vecs, + desc->suite.hash.count, true); + if (!err) + err = test_hash(tfm, desc->suite.hash.vecs, + desc->suite.hash.count, false); crypto_free_ahash(tfm); return err; diff --git a/crypto/testmgr.h b/crypto/testmgr.h index fb765173d41c..74e35377fd30 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -1669,17 +1669,73 @@ static struct hash_testvec aes_xcbc128_tv_template[] = { } }; -#define VMAC_AES_TEST_VECTORS 1 -static char vmac_string[128] = {'\x01', '\x01', '\x01', '\x01', +#define VMAC_AES_TEST_VECTORS 8 +static char vmac_string1[128] = {'\x01', '\x01', '\x01', '\x01', '\x02', '\x03', '\x02', '\x02', '\x02', '\x04', '\x01', '\x07', '\x04', '\x01', '\x04', '\x03',}; +static char vmac_string2[128] = {'a', 'b', 'c',}; +static char vmac_string3[128] = {'a', 'b', 'c', 'a', 'b', 'c', + 'a', 'b', 'c', 'a', 'b', 'c', + 'a', 'b', 'c', 'a', 'b', 'c', + 'a', 'b', 'c', 'a', 'b', 'c', + 'a', 'b', 'c', 'a', 'b', 'c', + 'a', 'b', 'c', 'a', 'b', 'c', + 'a', 'b', 'c', 'a', 'b', 'c', + 'a', 'b', 'c', 'a', 'b', 'c', + }; + static struct hash_testvec aes_vmac128_tv_template[] = { { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = NULL, + .digest = "\x07\x58\x80\x35\x77\xa4\x7b\x54", + .psize = 0, + .ksize = 16, + }, { .key = "\x00\x01\x02\x03\x04\x05\x06\x07" "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .plaintext = vmac_string, - .digest = "\xcb\xd7\x8a\xfd\xb7\x33\x79\xe7", + .plaintext = vmac_string1, + .digest = "\xce\xf5\x3c\xd3\xae\x68\x8c\xa1", + .psize = 128, + .ksize = 16, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = vmac_string2, + .digest = "\xc9\x27\xb0\x73\x81\xbd\x14\x2d", + .psize = 128, + .ksize = 16, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = vmac_string3, + .digest = "\x8d\x1a\x95\x8c\x98\x47\x0b\x19", + .psize = 128, + .ksize = 16, + }, { + .key = "abcdefghijklmnop", + .plaintext = NULL, + .digest = "\x3b\x89\xa1\x26\x9e\x55\x8f\x84", + .psize = 0, + .ksize = 16, + }, { + .key = "abcdefghijklmnop", + .plaintext = vmac_string1, + .digest = "\xab\x5e\xab\xb0\xf6\x8d\x74\xc2", + .psize = 128, + .ksize = 16, + }, { + .key = "abcdefghijklmnop", + .plaintext = vmac_string2, + .digest = "\x11\x15\x68\x42\x3d\x7b\x09\xdf", + .psize = 128, + .ksize = 16, + }, { + .key = "abcdefghijklmnop", + .plaintext = vmac_string3, + .digest = "\x8b\x32\x8f\xe1\xed\x8f\xfa\xd4", .psize = 128, .ksize = 16, }, diff --git a/crypto/vmac.c b/crypto/vmac.c index 0a9468e575de..0999274a27ac 100644 --- a/crypto/vmac.c +++ b/crypto/vmac.c @@ -43,6 +43,8 @@ const u64 m63 = UINT64_C(0x7fffffffffffffff); /* 63-bit mask */ const u64 m64 = UINT64_C(0xffffffffffffffff); /* 64-bit mask */ const u64 mpoly = UINT64_C(0x1fffffff1fffffff); /* Poly key mask */ +#define pe64_to_cpup le64_to_cpup /* Prefer little endian */ + #ifdef __LITTLE_ENDIAN #define INDEX_HIGH 1 #define INDEX_LOW 0 @@ -110,8 +112,8 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff); /* Poly key mask */ int i; u64 th, tl; \ rh = rl = 0; \ for (i = 0; i < nw; i += 2) { \ - MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i], \ - le64_to_cpup((mp)+i+1)+(kp)[i+1]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \ + pe64_to_cpup((mp)+i+1)+(kp)[i+1]); \ ADD128(rh, rl, th, tl); \ } \ } while (0) @@ -121,11 +123,11 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff); /* Poly key mask */ int i; u64 th, tl; \ rh1 = rl1 = rh = rl = 0; \ for (i = 0; i < nw; i += 2) { \ - MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i], \ - le64_to_cpup((mp)+i+1)+(kp)[i+1]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \ + pe64_to_cpup((mp)+i+1)+(kp)[i+1]); \ ADD128(rh, rl, th, tl); \ - MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i+2], \ - le64_to_cpup((mp)+i+1)+(kp)[i+3]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2], \ + pe64_to_cpup((mp)+i+1)+(kp)[i+3]); \ ADD128(rh1, rl1, th, tl); \ } \ } while (0) @@ -136,17 +138,17 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff); /* Poly key mask */ int i; u64 th, tl; \ rh = rl = 0; \ for (i = 0; i < nw; i += 8) { \ - MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i], \ - le64_to_cpup((mp)+i+1)+(kp)[i+1]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \ + pe64_to_cpup((mp)+i+1)+(kp)[i+1]); \ ADD128(rh, rl, th, tl); \ - MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+2], \ - le64_to_cpup((mp)+i+3)+(kp)[i+3]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2], \ + pe64_to_cpup((mp)+i+3)+(kp)[i+3]); \ ADD128(rh, rl, th, tl); \ - MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+4], \ - le64_to_cpup((mp)+i+5)+(kp)[i+5]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4], \ + pe64_to_cpup((mp)+i+5)+(kp)[i+5]); \ ADD128(rh, rl, th, tl); \ - MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+6], \ - le64_to_cpup((mp)+i+7)+(kp)[i+7]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6], \ + pe64_to_cpup((mp)+i+7)+(kp)[i+7]); \ ADD128(rh, rl, th, tl); \ } \ } while (0) @@ -156,29 +158,29 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff); /* Poly key mask */ int i; u64 th, tl; \ rh1 = rl1 = rh = rl = 0; \ for (i = 0; i < nw; i += 8) { \ - MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i], \ - le64_to_cpup((mp)+i+1)+(kp)[i+1]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \ + pe64_to_cpup((mp)+i+1)+(kp)[i+1]); \ ADD128(rh, rl, th, tl); \ - MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i+2], \ - le64_to_cpup((mp)+i+1)+(kp)[i+3]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2], \ + pe64_to_cpup((mp)+i+1)+(kp)[i+3]); \ ADD128(rh1, rl1, th, tl); \ - MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+2], \ - le64_to_cpup((mp)+i+3)+(kp)[i+3]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2], \ + pe64_to_cpup((mp)+i+3)+(kp)[i+3]); \ ADD128(rh, rl, th, tl); \ - MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+4], \ - le64_to_cpup((mp)+i+3)+(kp)[i+5]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+4], \ + pe64_to_cpup((mp)+i+3)+(kp)[i+5]); \ ADD128(rh1, rl1, th, tl); \ - MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+4], \ - le64_to_cpup((mp)+i+5)+(kp)[i+5]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4], \ + pe64_to_cpup((mp)+i+5)+(kp)[i+5]); \ ADD128(rh, rl, th, tl); \ - MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+6], \ - le64_to_cpup((mp)+i+5)+(kp)[i+7]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+6], \ + pe64_to_cpup((mp)+i+5)+(kp)[i+7]); \ ADD128(rh1, rl1, th, tl); \ - MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+6], \ - le64_to_cpup((mp)+i+7)+(kp)[i+7]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6], \ + pe64_to_cpup((mp)+i+7)+(kp)[i+7]); \ ADD128(rh, rl, th, tl); \ - MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+8], \ - le64_to_cpup((mp)+i+7)+(kp)[i+9]); \ + MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+8], \ + pe64_to_cpup((mp)+i+7)+(kp)[i+9]); \ ADD128(rh1, rl1, th, tl); \ } \ } while (0) @@ -216,8 +218,8 @@ const u64 mpoly = UINT64_C(0x1fffffff1fffffff); /* Poly key mask */ int i; \ rh = rl = t = 0; \ for (i = 0; i < nw; i += 2) { \ - t1 = le64_to_cpup(mp+i) + kp[i]; \ - t2 = le64_to_cpup(mp+i+1) + kp[i+1]; \ + t1 = pe64_to_cpup(mp+i) + kp[i]; \ + t2 = pe64_to_cpup(mp+i+1) + kp[i+1]; \ m2 = MUL32(t1 >> 32, t2); \ m1 = MUL32(t1, t2 >> 32); \ ADD128(rh, rl, MUL32(t1 >> 32, t2 >> 32), \ @@ -322,8 +324,7 @@ static void vhash_abort(struct vmac_ctx *ctx) ctx->first_block_processed = 0; } -static u64 l3hash(u64 p1, u64 p2, - u64 k1, u64 k2, u64 len) +static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len) { u64 rh, rl, t, z = 0; @@ -474,7 +475,7 @@ static u64 vmac(unsigned char m[], unsigned int mbytes, } p = be64_to_cpup(out_p + i); h = vhash(m, mbytes, (u64 *)0, &ctx->__vmac_ctx); - return p + h; + return le64_to_cpu(p + h); } static int vmac_set_key(unsigned char user_key[], struct vmac_ctx_t *ctx) @@ -549,10 +550,6 @@ static int vmac_setkey(struct crypto_shash *parent, static int vmac_init(struct shash_desc *pdesc) { - struct crypto_shash *parent = pdesc->tfm; - struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); - - memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx)); return 0; } diff --git a/drivers/char/random.c b/drivers/char/random.c index 2fd3d39995d5..8d85587b6d4f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -257,6 +257,7 @@ #define INPUT_POOL_WORDS 128 #define OUTPUT_POOL_WORDS 32 #define SEC_XFER_SIZE 512 +#define EXTRACT_SIZE 10 /* * The minimum number of bits of entropy before we wake up a read on @@ -414,7 +415,7 @@ struct entropy_store { unsigned add_ptr; int entropy_count; int input_rotate; - __u8 *last_data; + __u8 last_data[EXTRACT_SIZE]; }; static __u32 input_pool_data[INPUT_POOL_WORDS]; @@ -714,8 +715,6 @@ void add_disk_randomness(struct gendisk *disk) } #endif -#define EXTRACT_SIZE 10 - /********************************************************************* * * Entropy extraction routines @@ -862,7 +861,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, while (nbytes) { extract_buf(r, tmp); - if (r->last_data) { + if (fips_enabled) { spin_lock_irqsave(&r->lock, flags); if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) panic("Hardware RNG duplicated output!\n"); @@ -951,9 +950,6 @@ static void init_std_data(struct entropy_store *r) now = ktime_get_real(); mix_pool_bytes(r, &now, sizeof(now)); mix_pool_bytes(r, utsname(), sizeof(*(utsname()))); - /* Enable continuous test in fips mode */ - if (fips_enabled) - r->last_data = kmalloc(EXTRACT_SIZE, GFP_KERNEL); } static int rand_initialize(void) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index b08403d7d1ca..fbf94cf496f0 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -170,6 +170,18 @@ config CRYPTO_DEV_MV_CESA Currently the driver supports AES in ECB and CBC mode without DMA. +config CRYPTO_DEV_NIAGARA2 + tristate "Niagara2 Stream Processing Unit driver" + select CRYPTO_ALGAPI + depends on SPARC64 + help + Each core of a Niagara2 processor contains a Stream + Processing Unit, which itself contains several cryptographic + sub-units. One set provides the Modular Arithmetic Unit, + used for SSL offload. The other set provides the Cipher + Group, which can perform encryption, decryption, hashing, + checksumming, and raw copies. + config CRYPTO_DEV_HIFN_795X tristate "Driver HIFN 795x crypto accelerator chips" select CRYPTO_DES @@ -222,4 +234,13 @@ config CRYPTO_DEV_PPC4XX help This option allows you to have support for AMCC crypto acceleration. +config CRYPTO_DEV_OMAP_SHAM + tristate "Support for OMAP SHA1/MD5 hw accelerator" + depends on ARCH_OMAP2 || ARCH_OMAP3 + select CRYPTO_SHA1 + select CRYPTO_MD5 + help + OMAP processors have SHA1/MD5 hw accelerator. Select this if you + want to use the OMAP module for SHA1/MD5 algorithms. + endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 6ffcb3f7f942..6dbbe00c4524 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -1,8 +1,12 @@ obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o +obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o +n2_crypto-objs := n2_core.o n2_asm.o obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ +obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o + diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c index c7a5a43ba691..09389dd2f96b 100644 --- a/drivers/crypto/geode-aes.c +++ b/drivers/crypto/geode-aes.c @@ -15,14 +15,14 @@ #include #include -#include -#include +#include +#include #include "geode-aes.h" /* Static structures */ -static void __iomem * _iobase; +static void __iomem *_iobase; static spinlock_t lock; /* Write a 128 bit field (either a writable key or IV) */ @@ -30,7 +30,7 @@ static inline void _writefield(u32 offset, void *value) { int i; - for(i = 0; i < 4; i++) + for (i = 0; i < 4; i++) iowrite32(((u32 *) value)[i], _iobase + offset + (i * 4)); } @@ -39,7 +39,7 @@ static inline void _readfield(u32 offset, void *value) { int i; - for(i = 0; i < 4; i++) + for (i = 0; i < 4; i++) ((u32 *) value)[i] = ioread32(_iobase + offset + (i * 4)); } @@ -59,7 +59,7 @@ do_crypt(void *src, void *dst, int len, u32 flags) do { status = ioread32(_iobase + AES_INTR_REG); cpu_relax(); - } while(!(status & AES_INTRA_PENDING) && --counter); + } while (!(status & AES_INTRA_PENDING) && --counter); /* Clear the event */ iowrite32((status & 0xFF) | AES_INTRA_PENDING, _iobase + AES_INTR_REG); @@ -317,7 +317,7 @@ geode_cbc_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); op->iv = walk.iv; - while((nbytes = walk.nbytes)) { + while ((nbytes = walk.nbytes)) { op->src = walk.src.virt.addr, op->dst = walk.dst.virt.addr; op->mode = AES_MODE_CBC; @@ -349,7 +349,7 @@ geode_cbc_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); op->iv = walk.iv; - while((nbytes = walk.nbytes)) { + while ((nbytes = walk.nbytes)) { op->src = walk.src.virt.addr, op->dst = walk.dst.virt.addr; op->mode = AES_MODE_CBC; @@ -429,7 +429,7 @@ geode_ecb_decrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); - while((nbytes = walk.nbytes)) { + while ((nbytes = walk.nbytes)) { op->src = walk.src.virt.addr, op->dst = walk.dst.virt.addr; op->mode = AES_MODE_ECB; @@ -459,7 +459,7 @@ geode_ecb_encrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); - while((nbytes = walk.nbytes)) { + while ((nbytes = walk.nbytes)) { op->src = walk.src.virt.addr, op->dst = walk.dst.virt.addr; op->mode = AES_MODE_ECB; @@ -518,11 +518,12 @@ static int __devinit geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id) { int ret; - - if ((ret = pci_enable_device(dev))) + ret = pci_enable_device(dev); + if (ret) return ret; - if ((ret = pci_request_regions(dev, "geode-aes"))) + ret = pci_request_regions(dev, "geode-aes"); + if (ret) goto eenable; _iobase = pci_iomap(dev, 0, 0); @@ -537,13 +538,16 @@ geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id) /* Clear any pending activity */ iowrite32(AES_INTR_PENDING | AES_INTR_MASK, _iobase + AES_INTR_REG); - if ((ret = crypto_register_alg(&geode_alg))) + ret = crypto_register_alg(&geode_alg); + if (ret) goto eiomap; - if ((ret = crypto_register_alg(&geode_ecb_alg))) + ret = crypto_register_alg(&geode_ecb_alg); + if (ret) goto ealg; - if ((ret = crypto_register_alg(&geode_cbc_alg))) + ret = crypto_register_alg(&geode_cbc_alg); + if (ret) goto eecb; printk(KERN_NOTICE "geode-aes: GEODE AES engine enabled.\n"); diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c index 73e8b1713b54..16fce3aadf4d 100644 --- a/drivers/crypto/hifn_795x.c +++ b/drivers/crypto/hifn_795x.c @@ -638,7 +638,7 @@ struct hifn_crypto_alg #define ASYNC_FLAGS_MISALIGNED (1<<0) -struct ablkcipher_walk +struct hifn_cipher_walk { struct scatterlist cache[ASYNC_SCATTERLIST_CACHE]; u32 flags; @@ -657,7 +657,7 @@ struct hifn_request_context u8 *iv; unsigned int ivsize; u8 op, type, mode, unused; - struct ablkcipher_walk walk; + struct hifn_cipher_walk walk; }; #define crypto_alg_to_hifn(a) container_of(a, struct hifn_crypto_alg, alg) @@ -1417,7 +1417,7 @@ static int hifn_setup_dma(struct hifn_device *dev, return 0; } -static int ablkcipher_walk_init(struct ablkcipher_walk *w, +static int hifn_cipher_walk_init(struct hifn_cipher_walk *w, int num, gfp_t gfp_flags) { int i; @@ -1442,7 +1442,7 @@ static int ablkcipher_walk_init(struct ablkcipher_walk *w, return i; } -static void ablkcipher_walk_exit(struct ablkcipher_walk *w) +static void hifn_cipher_walk_exit(struct hifn_cipher_walk *w) { int i; @@ -1486,8 +1486,8 @@ static int ablkcipher_add(unsigned int *drestp, struct scatterlist *dst, return idx; } -static int ablkcipher_walk(struct ablkcipher_request *req, - struct ablkcipher_walk *w) +static int hifn_cipher_walk(struct ablkcipher_request *req, + struct hifn_cipher_walk *w) { struct scatterlist *dst, *t; unsigned int nbytes = req->nbytes, offset, copy, diff; @@ -1600,12 +1600,12 @@ static int hifn_setup_session(struct ablkcipher_request *req) } if (rctx->walk.flags & ASYNC_FLAGS_MISALIGNED) { - err = ablkcipher_walk_init(&rctx->walk, idx, GFP_ATOMIC); + err = hifn_cipher_walk_init(&rctx->walk, idx, GFP_ATOMIC); if (err < 0) return err; } - sg_num = ablkcipher_walk(req, &rctx->walk); + sg_num = hifn_cipher_walk(req, &rctx->walk); if (sg_num < 0) { err = sg_num; goto err_out_exit; @@ -1806,7 +1806,7 @@ static void hifn_process_ready(struct ablkcipher_request *req, int error) kunmap_atomic(saddr, KM_SOFTIRQ0); } - ablkcipher_walk_exit(&rctx->walk); + hifn_cipher_walk_exit(&rctx->walk); } req->base.complete(&req->base, error); diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index 6f29012bcc43..e095422b58dd 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -15,8 +15,14 @@ #include #include #include +#include +#include #include "mv_cesa.h" + +#define MV_CESA "MV-CESA:" +#define MAX_HW_HASH_SIZE 0xFFFF + /* * STM: * /---------------------------------------\ @@ -39,10 +45,12 @@ enum engine_status { * @dst_sg_it: sg iterator for dst * @sg_src_left: bytes left in src to process (scatter list) * @src_start: offset to add to src start position (scatter list) - * @crypt_len: length of current crypt process + * @crypt_len: length of current hw crypt/hash process + * @hw_nbytes: total bytes to process in hw for this request + * @copy_back: whether to copy data back (crypt) or not (hash) * @sg_dst_left: bytes left dst to process in this scatter list * @dst_start: offset to add to dst start position (scatter list) - * @total_req_bytes: total number of bytes processed (request). + * @hw_processed_bytes: number of bytes processed by hw (request). * * sg helper are used to iterate over the scatterlist. Since the size of the * SRAM may be less than the scatter size, this struct struct is used to keep @@ -51,15 +59,19 @@ enum engine_status { struct req_progress { struct sg_mapping_iter src_sg_it; struct sg_mapping_iter dst_sg_it; + void (*complete) (void); + void (*process) (int is_first); /* src mostly */ int sg_src_left; int src_start; int crypt_len; + int hw_nbytes; /* dst mostly */ + int copy_back; int sg_dst_left; int dst_start; - int total_req_bytes; + int hw_processed_bytes; }; struct crypto_priv { @@ -72,10 +84,12 @@ struct crypto_priv { spinlock_t lock; struct crypto_queue queue; enum engine_status eng_st; - struct ablkcipher_request *cur_req; + struct crypto_async_request *cur_req; struct req_progress p; int max_req_size; int sram_size; + int has_sha1; + int has_hmac_sha1; }; static struct crypto_priv *cpg; @@ -97,6 +111,31 @@ struct mv_req_ctx { int decrypt; }; +enum hash_op { + COP_SHA1, + COP_HMAC_SHA1 +}; + +struct mv_tfm_hash_ctx { + struct crypto_shash *fallback; + struct crypto_shash *base_hash; + u32 ivs[2 * SHA1_DIGEST_SIZE / 4]; + int count_add; + enum hash_op op; +}; + +struct mv_req_hash_ctx { + u64 count; + u32 state[SHA1_DIGEST_SIZE / 4]; + u8 buffer[SHA1_BLOCK_SIZE]; + int first_hash; /* marks that we don't have previous state */ + int last_chunk; /* marks that this is the 'final' request */ + int extra_bytes; /* unprocessed bytes in buffer */ + enum hash_op op; + int count_add; + struct scatterlist dummysg; +}; + static void compute_aes_dec_key(struct mv_ctx *ctx) { struct crypto_aes_ctx gen_aes_key; @@ -144,32 +183,51 @@ static int mv_setkey_aes(struct crypto_ablkcipher *cipher, const u8 *key, return 0; } -static void setup_data_in(struct ablkcipher_request *req) +static void copy_src_to_buf(struct req_progress *p, char *dbuf, int len) { int ret; - void *buf; + void *sbuf; + int copied = 0; - if (!cpg->p.sg_src_left) { - ret = sg_miter_next(&cpg->p.src_sg_it); - BUG_ON(!ret); - cpg->p.sg_src_left = cpg->p.src_sg_it.length; - cpg->p.src_start = 0; + while (1) { + if (!p->sg_src_left) { + ret = sg_miter_next(&p->src_sg_it); + BUG_ON(!ret); + p->sg_src_left = p->src_sg_it.length; + p->src_start = 0; + } + + sbuf = p->src_sg_it.addr + p->src_start; + + if (p->sg_src_left <= len - copied) { + memcpy(dbuf + copied, sbuf, p->sg_src_left); + copied += p->sg_src_left; + p->sg_src_left = 0; + if (copied >= len) + break; + } else { + int copy_len = len - copied; + memcpy(dbuf + copied, sbuf, copy_len); + p->src_start += copy_len; + p->sg_src_left -= copy_len; + break; + } } +} - cpg->p.crypt_len = min(cpg->p.sg_src_left, cpg->max_req_size); - - buf = cpg->p.src_sg_it.addr; - buf += cpg->p.src_start; - - memcpy(cpg->sram + SRAM_DATA_IN_START, buf, cpg->p.crypt_len); - - cpg->p.sg_src_left -= cpg->p.crypt_len; - cpg->p.src_start += cpg->p.crypt_len; +static void setup_data_in(void) +{ + struct req_progress *p = &cpg->p; + int data_in_sram = + min(p->hw_nbytes - p->hw_processed_bytes, cpg->max_req_size); + copy_src_to_buf(p, cpg->sram + SRAM_DATA_IN_START + p->crypt_len, + data_in_sram - p->crypt_len); + p->crypt_len = data_in_sram; } static void mv_process_current_q(int first_block) { - struct ablkcipher_request *req = cpg->cur_req; + struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req); struct mv_ctx *ctx = crypto_tfm_ctx(req->base.tfm); struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req); struct sec_accel_config op; @@ -179,6 +237,7 @@ static void mv_process_current_q(int first_block) op.config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | CFG_ENC_MODE_ECB; break; case COP_AES_CBC: + default: op.config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | CFG_ENC_MODE_CBC; op.enc_iv = ENC_IV_POINT(SRAM_DATA_IV) | ENC_IV_BUF_POINT(SRAM_DATA_IV_BUF); @@ -211,7 +270,7 @@ static void mv_process_current_q(int first_block) ENC_P_DST(SRAM_DATA_OUT_START); op.enc_key_p = SRAM_DATA_KEY_P; - setup_data_in(req); + setup_data_in(); op.enc_len = cpg->p.crypt_len; memcpy(cpg->sram + SRAM_CONFIG, &op, sizeof(struct sec_accel_config)); @@ -228,91 +287,294 @@ static void mv_process_current_q(int first_block) static void mv_crypto_algo_completion(void) { - struct ablkcipher_request *req = cpg->cur_req; + struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req); struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req); + sg_miter_stop(&cpg->p.src_sg_it); + sg_miter_stop(&cpg->p.dst_sg_it); + if (req_ctx->op != COP_AES_CBC) return ; memcpy(req->info, cpg->sram + SRAM_DATA_IV_BUF, 16); } +static void mv_process_hash_current(int first_block) +{ + struct ahash_request *req = ahash_request_cast(cpg->cur_req); + struct mv_req_hash_ctx *req_ctx = ahash_request_ctx(req); + struct req_progress *p = &cpg->p; + struct sec_accel_config op = { 0 }; + int is_last; + + switch (req_ctx->op) { + case COP_SHA1: + default: + op.config = CFG_OP_MAC_ONLY | CFG_MACM_SHA1; + break; + case COP_HMAC_SHA1: + op.config = CFG_OP_MAC_ONLY | CFG_MACM_HMAC_SHA1; + break; + } + + op.mac_src_p = + MAC_SRC_DATA_P(SRAM_DATA_IN_START) | MAC_SRC_TOTAL_LEN((u32) + req_ctx-> + count); + + setup_data_in(); + + op.mac_digest = + MAC_DIGEST_P(SRAM_DIGEST_BUF) | MAC_FRAG_LEN(p->crypt_len); + op.mac_iv = + MAC_INNER_IV_P(SRAM_HMAC_IV_IN) | + MAC_OUTER_IV_P(SRAM_HMAC_IV_OUT); + + is_last = req_ctx->last_chunk + && (p->hw_processed_bytes + p->crypt_len >= p->hw_nbytes) + && (req_ctx->count <= MAX_HW_HASH_SIZE); + if (req_ctx->first_hash) { + if (is_last) + op.config |= CFG_NOT_FRAG; + else + op.config |= CFG_FIRST_FRAG; + + req_ctx->first_hash = 0; + } else { + if (is_last) + op.config |= CFG_LAST_FRAG; + else + op.config |= CFG_MID_FRAG; + } + + memcpy(cpg->sram + SRAM_CONFIG, &op, sizeof(struct sec_accel_config)); + + writel(SRAM_CONFIG, cpg->reg + SEC_ACCEL_DESC_P0); + /* GO */ + writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD); + + /* + * XXX: add timer if the interrupt does not occur for some mystery + * reason + */ +} + +static inline int mv_hash_import_sha1_ctx(const struct mv_req_hash_ctx *ctx, + struct shash_desc *desc) +{ + int i; + struct sha1_state shash_state; + + shash_state.count = ctx->count + ctx->count_add; + for (i = 0; i < 5; i++) + shash_state.state[i] = ctx->state[i]; + memcpy(shash_state.buffer, ctx->buffer, sizeof(shash_state.buffer)); + return crypto_shash_import(desc, &shash_state); +} + +static int mv_hash_final_fallback(struct ahash_request *req) +{ + const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm); + struct mv_req_hash_ctx *req_ctx = ahash_request_ctx(req); + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(tfm_ctx->fallback)]; + } desc; + int rc; + + desc.shash.tfm = tfm_ctx->fallback; + desc.shash.flags = CRYPTO_TFM_REQ_MAY_SLEEP; + if (unlikely(req_ctx->first_hash)) { + crypto_shash_init(&desc.shash); + crypto_shash_update(&desc.shash, req_ctx->buffer, + req_ctx->extra_bytes); + } else { + /* only SHA1 for now.... + */ + rc = mv_hash_import_sha1_ctx(req_ctx, &desc.shash); + if (rc) + goto out; + } + rc = crypto_shash_final(&desc.shash, req->result); +out: + return rc; +} + +static void mv_hash_algo_completion(void) +{ + struct ahash_request *req = ahash_request_cast(cpg->cur_req); + struct mv_req_hash_ctx *ctx = ahash_request_ctx(req); + + if (ctx->extra_bytes) + copy_src_to_buf(&cpg->p, ctx->buffer, ctx->extra_bytes); + sg_miter_stop(&cpg->p.src_sg_it); + + ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A); + ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B); + ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C); + ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D); + ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E); + + if (likely(ctx->last_chunk)) { + if (likely(ctx->count <= MAX_HW_HASH_SIZE)) { + memcpy(req->result, cpg->sram + SRAM_DIGEST_BUF, + crypto_ahash_digestsize(crypto_ahash_reqtfm + (req))); + } else + mv_hash_final_fallback(req); + } +} + static void dequeue_complete_req(void) { - struct ablkcipher_request *req = cpg->cur_req; + struct crypto_async_request *req = cpg->cur_req; void *buf; int ret; + cpg->p.hw_processed_bytes += cpg->p.crypt_len; + if (cpg->p.copy_back) { + int need_copy_len = cpg->p.crypt_len; + int sram_offset = 0; + do { + int dst_copy; - cpg->p.total_req_bytes += cpg->p.crypt_len; - do { - int dst_copy; + if (!cpg->p.sg_dst_left) { + ret = sg_miter_next(&cpg->p.dst_sg_it); + BUG_ON(!ret); + cpg->p.sg_dst_left = cpg->p.dst_sg_it.length; + cpg->p.dst_start = 0; + } - if (!cpg->p.sg_dst_left) { - ret = sg_miter_next(&cpg->p.dst_sg_it); - BUG_ON(!ret); - cpg->p.sg_dst_left = cpg->p.dst_sg_it.length; - cpg->p.dst_start = 0; - } + buf = cpg->p.dst_sg_it.addr; + buf += cpg->p.dst_start; - buf = cpg->p.dst_sg_it.addr; - buf += cpg->p.dst_start; + dst_copy = min(need_copy_len, cpg->p.sg_dst_left); - dst_copy = min(cpg->p.crypt_len, cpg->p.sg_dst_left); + memcpy(buf, + cpg->sram + SRAM_DATA_OUT_START + sram_offset, + dst_copy); + sram_offset += dst_copy; + cpg->p.sg_dst_left -= dst_copy; + need_copy_len -= dst_copy; + cpg->p.dst_start += dst_copy; + } while (need_copy_len > 0); + } - memcpy(buf, cpg->sram + SRAM_DATA_OUT_START, dst_copy); - - cpg->p.sg_dst_left -= dst_copy; - cpg->p.crypt_len -= dst_copy; - cpg->p.dst_start += dst_copy; - } while (cpg->p.crypt_len > 0); + cpg->p.crypt_len = 0; BUG_ON(cpg->eng_st != ENGINE_W_DEQUEUE); - if (cpg->p.total_req_bytes < req->nbytes) { + if (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) { /* process next scatter list entry */ cpg->eng_st = ENGINE_BUSY; - mv_process_current_q(0); + cpg->p.process(0); } else { - sg_miter_stop(&cpg->p.src_sg_it); - sg_miter_stop(&cpg->p.dst_sg_it); - mv_crypto_algo_completion(); + cpg->p.complete(); cpg->eng_st = ENGINE_IDLE; - req->base.complete(&req->base, 0); + local_bh_disable(); + req->complete(req, 0); + local_bh_enable(); } } static int count_sgs(struct scatterlist *sl, unsigned int total_bytes) { int i = 0; + size_t cur_len; - do { - total_bytes -= sl[i].length; - i++; - - } while (total_bytes > 0); + while (1) { + cur_len = sl[i].length; + ++i; + if (total_bytes > cur_len) + total_bytes -= cur_len; + else + break; + } return i; } -static void mv_enqueue_new_req(struct ablkcipher_request *req) +static void mv_start_new_crypt_req(struct ablkcipher_request *req) { + struct req_progress *p = &cpg->p; int num_sgs; - cpg->cur_req = req; - memset(&cpg->p, 0, sizeof(struct req_progress)); + cpg->cur_req = &req->base; + memset(p, 0, sizeof(struct req_progress)); + p->hw_nbytes = req->nbytes; + p->complete = mv_crypto_algo_completion; + p->process = mv_process_current_q; + p->copy_back = 1; num_sgs = count_sgs(req->src, req->nbytes); - sg_miter_start(&cpg->p.src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG); + sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG); num_sgs = count_sgs(req->dst, req->nbytes); - sg_miter_start(&cpg->p.dst_sg_it, req->dst, num_sgs, SG_MITER_TO_SG); + sg_miter_start(&p->dst_sg_it, req->dst, num_sgs, SG_MITER_TO_SG); + mv_process_current_q(1); } +static void mv_start_new_hash_req(struct ahash_request *req) +{ + struct req_progress *p = &cpg->p; + struct mv_req_hash_ctx *ctx = ahash_request_ctx(req); + const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm); + int num_sgs, hw_bytes, old_extra_bytes, rc; + cpg->cur_req = &req->base; + memset(p, 0, sizeof(struct req_progress)); + hw_bytes = req->nbytes + ctx->extra_bytes; + old_extra_bytes = ctx->extra_bytes; + + if (unlikely(ctx->extra_bytes)) { + memcpy(cpg->sram + SRAM_DATA_IN_START, ctx->buffer, + ctx->extra_bytes); + p->crypt_len = ctx->extra_bytes; + } + + memcpy(cpg->sram + SRAM_HMAC_IV_IN, tfm_ctx->ivs, sizeof(tfm_ctx->ivs)); + + if (unlikely(!ctx->first_hash)) { + writel(ctx->state[0], cpg->reg + DIGEST_INITIAL_VAL_A); + writel(ctx->state[1], cpg->reg + DIGEST_INITIAL_VAL_B); + writel(ctx->state[2], cpg->reg + DIGEST_INITIAL_VAL_C); + writel(ctx->state[3], cpg->reg + DIGEST_INITIAL_VAL_D); + writel(ctx->state[4], cpg->reg + DIGEST_INITIAL_VAL_E); + } + + ctx->extra_bytes = hw_bytes % SHA1_BLOCK_SIZE; + if (ctx->extra_bytes != 0 + && (!ctx->last_chunk || ctx->count > MAX_HW_HASH_SIZE)) + hw_bytes -= ctx->extra_bytes; + else + ctx->extra_bytes = 0; + + num_sgs = count_sgs(req->src, req->nbytes); + sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG); + + if (hw_bytes) { + p->hw_nbytes = hw_bytes; + p->complete = mv_hash_algo_completion; + p->process = mv_process_hash_current; + + mv_process_hash_current(1); + } else { + copy_src_to_buf(p, ctx->buffer + old_extra_bytes, + ctx->extra_bytes - old_extra_bytes); + sg_miter_stop(&p->src_sg_it); + if (ctx->last_chunk) + rc = mv_hash_final_fallback(req); + else + rc = 0; + cpg->eng_st = ENGINE_IDLE; + local_bh_disable(); + req->base.complete(&req->base, rc); + local_bh_enable(); + } +} + static int queue_manag(void *data) { cpg->eng_st = ENGINE_IDLE; do { - struct ablkcipher_request *req; struct crypto_async_request *async_req = NULL; struct crypto_async_request *backlog; @@ -338,9 +600,18 @@ static int queue_manag(void *data) } if (async_req) { - req = container_of(async_req, - struct ablkcipher_request, base); - mv_enqueue_new_req(req); + if (async_req->tfm->__crt_alg->cra_type != + &crypto_ahash_type) { + struct ablkcipher_request *req = + container_of(async_req, + struct ablkcipher_request, + base); + mv_start_new_crypt_req(req); + } else { + struct ahash_request *req = + ahash_request_cast(async_req); + mv_start_new_hash_req(req); + } async_req = NULL; } @@ -350,13 +621,13 @@ static int queue_manag(void *data) return 0; } -static int mv_handle_req(struct ablkcipher_request *req) +static int mv_handle_req(struct crypto_async_request *req) { unsigned long flags; int ret; spin_lock_irqsave(&cpg->lock, flags); - ret = ablkcipher_enqueue_request(&cpg->queue, req); + ret = crypto_enqueue_request(&cpg->queue, req); spin_unlock_irqrestore(&cpg->lock, flags); wake_up_process(cpg->queue_th); return ret; @@ -369,7 +640,7 @@ static int mv_enc_aes_ecb(struct ablkcipher_request *req) req_ctx->op = COP_AES_ECB; req_ctx->decrypt = 0; - return mv_handle_req(req); + return mv_handle_req(&req->base); } static int mv_dec_aes_ecb(struct ablkcipher_request *req) @@ -381,7 +652,7 @@ static int mv_dec_aes_ecb(struct ablkcipher_request *req) req_ctx->decrypt = 1; compute_aes_dec_key(ctx); - return mv_handle_req(req); + return mv_handle_req(&req->base); } static int mv_enc_aes_cbc(struct ablkcipher_request *req) @@ -391,7 +662,7 @@ static int mv_enc_aes_cbc(struct ablkcipher_request *req) req_ctx->op = COP_AES_CBC; req_ctx->decrypt = 0; - return mv_handle_req(req); + return mv_handle_req(&req->base); } static int mv_dec_aes_cbc(struct ablkcipher_request *req) @@ -403,7 +674,7 @@ static int mv_dec_aes_cbc(struct ablkcipher_request *req) req_ctx->decrypt = 1; compute_aes_dec_key(ctx); - return mv_handle_req(req); + return mv_handle_req(&req->base); } static int mv_cra_init(struct crypto_tfm *tfm) @@ -412,6 +683,215 @@ static int mv_cra_init(struct crypto_tfm *tfm) return 0; } +static void mv_init_hash_req_ctx(struct mv_req_hash_ctx *ctx, int op, + int is_last, unsigned int req_len, + int count_add) +{ + memset(ctx, 0, sizeof(*ctx)); + ctx->op = op; + ctx->count = req_len; + ctx->first_hash = 1; + ctx->last_chunk = is_last; + ctx->count_add = count_add; +} + +static void mv_update_hash_req_ctx(struct mv_req_hash_ctx *ctx, int is_last, + unsigned req_len) +{ + ctx->last_chunk = is_last; + ctx->count += req_len; +} + +static int mv_hash_init(struct ahash_request *req) +{ + const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm); + mv_init_hash_req_ctx(ahash_request_ctx(req), tfm_ctx->op, 0, 0, + tfm_ctx->count_add); + return 0; +} + +static int mv_hash_update(struct ahash_request *req) +{ + if (!req->nbytes) + return 0; + + mv_update_hash_req_ctx(ahash_request_ctx(req), 0, req->nbytes); + return mv_handle_req(&req->base); +} + +static int mv_hash_final(struct ahash_request *req) +{ + struct mv_req_hash_ctx *ctx = ahash_request_ctx(req); + /* dummy buffer of 4 bytes */ + sg_init_one(&ctx->dummysg, ctx->buffer, 4); + /* I think I'm allowed to do that... */ + ahash_request_set_crypt(req, &ctx->dummysg, req->result, 0); + mv_update_hash_req_ctx(ctx, 1, 0); + return mv_handle_req(&req->base); +} + +static int mv_hash_finup(struct ahash_request *req) +{ + if (!req->nbytes) + return mv_hash_final(req); + + mv_update_hash_req_ctx(ahash_request_ctx(req), 1, req->nbytes); + return mv_handle_req(&req->base); +} + +static int mv_hash_digest(struct ahash_request *req) +{ + const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm); + mv_init_hash_req_ctx(ahash_request_ctx(req), tfm_ctx->op, 1, + req->nbytes, tfm_ctx->count_add); + return mv_handle_req(&req->base); +} + +static void mv_hash_init_ivs(struct mv_tfm_hash_ctx *ctx, const void *istate, + const void *ostate) +{ + const struct sha1_state *isha1_state = istate, *osha1_state = ostate; + int i; + for (i = 0; i < 5; i++) { + ctx->ivs[i] = cpu_to_be32(isha1_state->state[i]); + ctx->ivs[i + 5] = cpu_to_be32(osha1_state->state[i]); + } +} + +static int mv_hash_setkey(struct crypto_ahash *tfm, const u8 * key, + unsigned int keylen) +{ + int rc; + struct mv_tfm_hash_ctx *ctx = crypto_tfm_ctx(&tfm->base); + int bs, ds, ss; + + if (!ctx->base_hash) + return 0; + + rc = crypto_shash_setkey(ctx->fallback, key, keylen); + if (rc) + return rc; + + /* Can't see a way to extract the ipad/opad from the fallback tfm + so I'm basically copying code from the hmac module */ + bs = crypto_shash_blocksize(ctx->base_hash); + ds = crypto_shash_digestsize(ctx->base_hash); + ss = crypto_shash_statesize(ctx->base_hash); + + { + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(ctx->base_hash)]; + } desc; + unsigned int i; + char ipad[ss]; + char opad[ss]; + + desc.shash.tfm = ctx->base_hash; + desc.shash.flags = crypto_shash_get_flags(ctx->base_hash) & + CRYPTO_TFM_REQ_MAY_SLEEP; + + if (keylen > bs) { + int err; + + err = + crypto_shash_digest(&desc.shash, key, keylen, ipad); + if (err) + return err; + + keylen = ds; + } else + memcpy(ipad, key, keylen); + + memset(ipad + keylen, 0, bs - keylen); + memcpy(opad, ipad, bs); + + for (i = 0; i < bs; i++) { + ipad[i] ^= 0x36; + opad[i] ^= 0x5c; + } + + rc = crypto_shash_init(&desc.shash) ? : + crypto_shash_update(&desc.shash, ipad, bs) ? : + crypto_shash_export(&desc.shash, ipad) ? : + crypto_shash_init(&desc.shash) ? : + crypto_shash_update(&desc.shash, opad, bs) ? : + crypto_shash_export(&desc.shash, opad); + + if (rc == 0) + mv_hash_init_ivs(ctx, ipad, opad); + + return rc; + } +} + +static int mv_cra_hash_init(struct crypto_tfm *tfm, const char *base_hash_name, + enum hash_op op, int count_add) +{ + const char *fallback_driver_name = tfm->__crt_alg->cra_name; + struct mv_tfm_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_shash *fallback_tfm = NULL; + struct crypto_shash *base_hash = NULL; + int err = -ENOMEM; + + ctx->op = op; + ctx->count_add = count_add; + + /* Allocate a fallback and abort if it failed. */ + fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback_tfm)) { + printk(KERN_WARNING MV_CESA + "Fallback driver '%s' could not be loaded!\n", + fallback_driver_name); + err = PTR_ERR(fallback_tfm); + goto out; + } + ctx->fallback = fallback_tfm; + + if (base_hash_name) { + /* Allocate a hash to compute the ipad/opad of hmac. */ + base_hash = crypto_alloc_shash(base_hash_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(base_hash)) { + printk(KERN_WARNING MV_CESA + "Base driver '%s' could not be loaded!\n", + base_hash_name); + err = PTR_ERR(fallback_tfm); + goto err_bad_base; + } + } + ctx->base_hash = base_hash; + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct mv_req_hash_ctx) + + crypto_shash_descsize(ctx->fallback)); + return 0; +err_bad_base: + crypto_free_shash(fallback_tfm); +out: + return err; +} + +static void mv_cra_hash_exit(struct crypto_tfm *tfm) +{ + struct mv_tfm_hash_ctx *ctx = crypto_tfm_ctx(tfm); + + crypto_free_shash(ctx->fallback); + if (ctx->base_hash) + crypto_free_shash(ctx->base_hash); +} + +static int mv_cra_hash_sha1_init(struct crypto_tfm *tfm) +{ + return mv_cra_hash_init(tfm, NULL, COP_SHA1, 0); +} + +static int mv_cra_hash_hmac_sha1_init(struct crypto_tfm *tfm) +{ + return mv_cra_hash_init(tfm, "sha1", COP_HMAC_SHA1, SHA1_BLOCK_SIZE); +} + irqreturn_t crypto_int(int irq, void *priv) { u32 val; @@ -474,6 +954,53 @@ struct crypto_alg mv_aes_alg_cbc = { }, }; +struct ahash_alg mv_sha1_alg = { + .init = mv_hash_init, + .update = mv_hash_update, + .final = mv_hash_final, + .finup = mv_hash_finup, + .digest = mv_hash_digest, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .base = { + .cra_name = "sha1", + .cra_driver_name = "mv-sha1", + .cra_priority = 300, + .cra_flags = + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_tfm_hash_ctx), + .cra_init = mv_cra_hash_sha1_init, + .cra_exit = mv_cra_hash_exit, + .cra_module = THIS_MODULE, + } + } +}; + +struct ahash_alg mv_hmac_sha1_alg = { + .init = mv_hash_init, + .update = mv_hash_update, + .final = mv_hash_final, + .finup = mv_hash_finup, + .digest = mv_hash_digest, + .setkey = mv_hash_setkey, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "mv-hmac-sha1", + .cra_priority = 300, + .cra_flags = + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_tfm_hash_ctx), + .cra_init = mv_cra_hash_hmac_sha1_init, + .cra_exit = mv_cra_hash_exit, + .cra_module = THIS_MODULE, + } + } +}; + static int mv_probe(struct platform_device *pdev) { struct crypto_priv *cp; @@ -482,7 +1009,7 @@ static int mv_probe(struct platform_device *pdev) int ret; if (cpg) { - printk(KERN_ERR "Second crypto dev?\n"); + printk(KERN_ERR MV_CESA "Second crypto dev?\n"); return -EEXIST; } @@ -496,7 +1023,7 @@ static int mv_probe(struct platform_device *pdev) spin_lock_init(&cp->lock); crypto_init_queue(&cp->queue, 50); - cp->reg = ioremap(res->start, res->end - res->start + 1); + cp->reg = ioremap(res->start, resource_size(res)); if (!cp->reg) { ret = -ENOMEM; goto err; @@ -507,7 +1034,7 @@ static int mv_probe(struct platform_device *pdev) ret = -ENXIO; goto err_unmap_reg; } - cp->sram_size = res->end - res->start + 1; + cp->sram_size = resource_size(res); cp->max_req_size = cp->sram_size - SRAM_CFG_SPACE; cp->sram = ioremap(res->start, cp->sram_size); if (!cp->sram) { @@ -546,6 +1073,21 @@ static int mv_probe(struct platform_device *pdev) ret = crypto_register_alg(&mv_aes_alg_cbc); if (ret) goto err_unreg_ecb; + + ret = crypto_register_ahash(&mv_sha1_alg); + if (ret == 0) + cpg->has_sha1 = 1; + else + printk(KERN_WARNING MV_CESA "Could not register sha1 driver\n"); + + ret = crypto_register_ahash(&mv_hmac_sha1_alg); + if (ret == 0) { + cpg->has_hmac_sha1 = 1; + } else { + printk(KERN_WARNING MV_CESA + "Could not register hmac-sha1 driver\n"); + } + return 0; err_unreg_ecb: crypto_unregister_alg(&mv_aes_alg_ecb); @@ -570,6 +1112,10 @@ static int mv_remove(struct platform_device *pdev) crypto_unregister_alg(&mv_aes_alg_ecb); crypto_unregister_alg(&mv_aes_alg_cbc); + if (cp->has_sha1) + crypto_unregister_ahash(&mv_sha1_alg); + if (cp->has_hmac_sha1) + crypto_unregister_ahash(&mv_hmac_sha1_alg); kthread_stop(cp->queue_th); free_irq(cp->irq, cp); memset(cp->sram, 0, cp->sram_size); diff --git a/drivers/crypto/mv_cesa.h b/drivers/crypto/mv_cesa.h index c3e25d3bb171..08fcb1116d90 100644 --- a/drivers/crypto/mv_cesa.h +++ b/drivers/crypto/mv_cesa.h @@ -1,6 +1,10 @@ #ifndef __MV_CRYPTO_H__ #define DIGEST_INITIAL_VAL_A 0xdd00 +#define DIGEST_INITIAL_VAL_B 0xdd04 +#define DIGEST_INITIAL_VAL_C 0xdd08 +#define DIGEST_INITIAL_VAL_D 0xdd0c +#define DIGEST_INITIAL_VAL_E 0xdd10 #define DES_CMD_REG 0xdd58 #define SEC_ACCEL_CMD 0xde00 @@ -70,6 +74,10 @@ struct sec_accel_config { #define CFG_AES_LEN_128 (0 << 24) #define CFG_AES_LEN_192 (1 << 24) #define CFG_AES_LEN_256 (2 << 24) +#define CFG_NOT_FRAG (0 << 30) +#define CFG_FIRST_FRAG (1 << 30) +#define CFG_LAST_FRAG (2 << 30) +#define CFG_MID_FRAG (3 << 30) u32 enc_p; #define ENC_P_SRC(x) (x) @@ -90,7 +98,11 @@ struct sec_accel_config { #define MAC_SRC_TOTAL_LEN(x) ((x) << 16) u32 mac_digest; +#define MAC_DIGEST_P(x) (x) +#define MAC_FRAG_LEN(x) ((x) << 16) u32 mac_iv; +#define MAC_INNER_IV_P(x) (x) +#define MAC_OUTER_IV_P(x) ((x) << 16) }__attribute__ ((packed)); /* * /-----------\ 0 @@ -101,19 +113,37 @@ struct sec_accel_config { * | IV IN | 4 * 4 * |-----------| 0x40 (inplace) * | IV BUF | 4 * 4 - * |-----------| 0x50 + * |-----------| 0x80 * | DATA IN | 16 * x (max ->max_req_size) - * |-----------| 0x50 (inplace operation) + * |-----------| 0x80 (inplace operation) * | DATA OUT | 16 * x (max ->max_req_size) * \-----------/ SRAM size */ + + /* Hashing memory map: + * /-----------\ 0 + * | ACCEL CFG | 4 * 8 + * |-----------| 0x20 + * | Inner IV | 5 * 4 + * |-----------| 0x34 + * | Outer IV | 5 * 4 + * |-----------| 0x48 + * | Output BUF| 5 * 4 + * |-----------| 0x80 + * | DATA IN | 64 * x (max ->max_req_size) + * \-----------/ SRAM size + */ #define SRAM_CONFIG 0x00 #define SRAM_DATA_KEY_P 0x20 #define SRAM_DATA_IV 0x40 #define SRAM_DATA_IV_BUF 0x40 -#define SRAM_DATA_IN_START 0x50 -#define SRAM_DATA_OUT_START 0x50 +#define SRAM_DATA_IN_START 0x80 +#define SRAM_DATA_OUT_START 0x80 -#define SRAM_CFG_SPACE 0x50 +#define SRAM_HMAC_IV_IN 0x20 +#define SRAM_HMAC_IV_OUT 0x34 +#define SRAM_DIGEST_BUF 0x48 + +#define SRAM_CFG_SPACE 0x80 #endif diff --git a/drivers/crypto/n2_asm.S b/drivers/crypto/n2_asm.S new file mode 100644 index 000000000000..f7c793745a1e --- /dev/null +++ b/drivers/crypto/n2_asm.S @@ -0,0 +1,95 @@ +/* n2_asm.S: Hypervisor calls for NCS support. + * + * Copyright (C) 2009 David S. Miller + */ + +#include +#include +#include "n2_core.h" + + /* o0: queue type + * o1: RA of queue + * o2: num entries in queue + * o3: address of queue handle return + */ +ENTRY(sun4v_ncs_qconf) + mov HV_FAST_NCS_QCONF, %o5 + ta HV_FAST_TRAP + stx %o1, [%o3] + retl + nop +ENDPROC(sun4v_ncs_qconf) + + /* %o0: queue handle + * %o1: address of queue type return + * %o2: address of queue base address return + * %o3: address of queue num entries return + */ +ENTRY(sun4v_ncs_qinfo) + mov %o1, %g1 + mov %o2, %g2 + mov %o3, %g3 + mov HV_FAST_NCS_QINFO, %o5 + ta HV_FAST_TRAP + stx %o1, [%g1] + stx %o2, [%g2] + stx %o3, [%g3] + retl + nop +ENDPROC(sun4v_ncs_qinfo) + + /* %o0: queue handle + * %o1: address of head offset return + */ +ENTRY(sun4v_ncs_gethead) + mov %o1, %o2 + mov HV_FAST_NCS_GETHEAD, %o5 + ta HV_FAST_TRAP + stx %o1, [%o2] + retl + nop +ENDPROC(sun4v_ncs_gethead) + + /* %o0: queue handle + * %o1: address of tail offset return + */ +ENTRY(sun4v_ncs_gettail) + mov %o1, %o2 + mov HV_FAST_NCS_GETTAIL, %o5 + ta HV_FAST_TRAP + stx %o1, [%o2] + retl + nop +ENDPROC(sun4v_ncs_gettail) + + /* %o0: queue handle + * %o1: new tail offset + */ +ENTRY(sun4v_ncs_settail) + mov HV_FAST_NCS_SETTAIL, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_ncs_settail) + + /* %o0: queue handle + * %o1: address of devino return + */ +ENTRY(sun4v_ncs_qhandle_to_devino) + mov %o1, %o2 + mov HV_FAST_NCS_QHANDLE_TO_DEVINO, %o5 + ta HV_FAST_TRAP + stx %o1, [%o2] + retl + nop +ENDPROC(sun4v_ncs_qhandle_to_devino) + + /* %o0: queue handle + * %o1: new head offset + */ +ENTRY(sun4v_ncs_sethead_marker) + mov HV_FAST_NCS_SETHEAD_MARKER, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_ncs_sethead_marker) diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c new file mode 100644 index 000000000000..8566be832f51 --- /dev/null +++ b/drivers/crypto/n2_core.c @@ -0,0 +1,2083 @@ +/* n2_core.c: Niagara2 Stream Processing Unit (SPU) crypto support. + * + * Copyright (C) 2010 David S. Miller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "n2_core.h" + +#define DRV_MODULE_NAME "n2_crypto" +#define DRV_MODULE_VERSION "0.1" +#define DRV_MODULE_RELDATE "April 29, 2010" + +static char version[] __devinitdata = + DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; + +MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); +MODULE_DESCRIPTION("Niagara2 Crypto driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +#define N2_CRA_PRIORITY 300 + +static DEFINE_MUTEX(spu_lock); + +struct spu_queue { + cpumask_t sharing; + unsigned long qhandle; + + spinlock_t lock; + u8 q_type; + void *q; + unsigned long head; + unsigned long tail; + struct list_head jobs; + + unsigned long devino; + + char irq_name[32]; + unsigned int irq; + + struct list_head list; +}; + +static struct spu_queue **cpu_to_cwq; +static struct spu_queue **cpu_to_mau; + +static unsigned long spu_next_offset(struct spu_queue *q, unsigned long off) +{ + if (q->q_type == HV_NCS_QTYPE_MAU) { + off += MAU_ENTRY_SIZE; + if (off == (MAU_ENTRY_SIZE * MAU_NUM_ENTRIES)) + off = 0; + } else { + off += CWQ_ENTRY_SIZE; + if (off == (CWQ_ENTRY_SIZE * CWQ_NUM_ENTRIES)) + off = 0; + } + return off; +} + +struct n2_request_common { + struct list_head entry; + unsigned int offset; +}; +#define OFFSET_NOT_RUNNING (~(unsigned int)0) + +/* An async job request records the final tail value it used in + * n2_request_common->offset, test to see if that offset is in + * the range old_head, new_head, inclusive. + */ +static inline bool job_finished(struct spu_queue *q, unsigned int offset, + unsigned long old_head, unsigned long new_head) +{ + if (old_head <= new_head) { + if (offset > old_head && offset <= new_head) + return true; + } else { + if (offset > old_head || offset <= new_head) + return true; + } + return false; +} + +/* When the HEAD marker is unequal to the actual HEAD, we get + * a virtual device INO interrupt. We should process the + * completed CWQ entries and adjust the HEAD marker to clear + * the IRQ. + */ +static irqreturn_t cwq_intr(int irq, void *dev_id) +{ + unsigned long off, new_head, hv_ret; + struct spu_queue *q = dev_id; + + pr_err("CPU[%d]: Got CWQ interrupt for qhdl[%lx]\n", + smp_processor_id(), q->qhandle); + + spin_lock(&q->lock); + + hv_ret = sun4v_ncs_gethead(q->qhandle, &new_head); + + pr_err("CPU[%d]: CWQ gethead[%lx] hv_ret[%lu]\n", + smp_processor_id(), new_head, hv_ret); + + for (off = q->head; off != new_head; off = spu_next_offset(q, off)) { + /* XXX ... XXX */ + } + + hv_ret = sun4v_ncs_sethead_marker(q->qhandle, new_head); + if (hv_ret == HV_EOK) + q->head = new_head; + + spin_unlock(&q->lock); + + return IRQ_HANDLED; +} + +static irqreturn_t mau_intr(int irq, void *dev_id) +{ + struct spu_queue *q = dev_id; + unsigned long head, hv_ret; + + spin_lock(&q->lock); + + pr_err("CPU[%d]: Got MAU interrupt for qhdl[%lx]\n", + smp_processor_id(), q->qhandle); + + hv_ret = sun4v_ncs_gethead(q->qhandle, &head); + + pr_err("CPU[%d]: MAU gethead[%lx] hv_ret[%lu]\n", + smp_processor_id(), head, hv_ret); + + sun4v_ncs_sethead_marker(q->qhandle, head); + + spin_unlock(&q->lock); + + return IRQ_HANDLED; +} + +static void *spu_queue_next(struct spu_queue *q, void *cur) +{ + return q->q + spu_next_offset(q, cur - q->q); +} + +static int spu_queue_num_free(struct spu_queue *q) +{ + unsigned long head = q->head; + unsigned long tail = q->tail; + unsigned long end = (CWQ_ENTRY_SIZE * CWQ_NUM_ENTRIES); + unsigned long diff; + + if (head > tail) + diff = head - tail; + else + diff = (end - tail) + head; + + return (diff / CWQ_ENTRY_SIZE) - 1; +} + +static void *spu_queue_alloc(struct spu_queue *q, int num_entries) +{ + int avail = spu_queue_num_free(q); + + if (avail >= num_entries) + return q->q + q->tail; + + return NULL; +} + +static unsigned long spu_queue_submit(struct spu_queue *q, void *last) +{ + unsigned long hv_ret, new_tail; + + new_tail = spu_next_offset(q, last - q->q); + + hv_ret = sun4v_ncs_settail(q->qhandle, new_tail); + if (hv_ret == HV_EOK) + q->tail = new_tail; + return hv_ret; +} + +static u64 control_word_base(unsigned int len, unsigned int hmac_key_len, + int enc_type, int auth_type, + unsigned int hash_len, + bool sfas, bool sob, bool eob, bool encrypt, + int opcode) +{ + u64 word = (len - 1) & CONTROL_LEN; + + word |= ((u64) opcode << CONTROL_OPCODE_SHIFT); + word |= ((u64) enc_type << CONTROL_ENC_TYPE_SHIFT); + word |= ((u64) auth_type << CONTROL_AUTH_TYPE_SHIFT); + if (sfas) + word |= CONTROL_STORE_FINAL_AUTH_STATE; + if (sob) + word |= CONTROL_START_OF_BLOCK; + if (eob) + word |= CONTROL_END_OF_BLOCK; + if (encrypt) + word |= CONTROL_ENCRYPT; + if (hmac_key_len) + word |= ((u64) (hmac_key_len - 1)) << CONTROL_HMAC_KEY_LEN_SHIFT; + if (hash_len) + word |= ((u64) (hash_len - 1)) << CONTROL_HASH_LEN_SHIFT; + + return word; +} + +#if 0 +static inline bool n2_should_run_async(struct spu_queue *qp, int this_len) +{ + if (this_len >= 64 || + qp->head != qp->tail) + return true; + return false; +} +#endif + +struct n2_base_ctx { + struct list_head list; +}; + +static void n2_base_ctx_init(struct n2_base_ctx *ctx) +{ + INIT_LIST_HEAD(&ctx->list); +} + +struct n2_hash_ctx { + struct n2_base_ctx base; + + struct crypto_ahash *fallback; + + /* These next three members must match the layout created by + * crypto_init_shash_ops_async. This allows us to properly + * plumb requests we can't do in hardware down to the fallback + * operation, providing all of the data structures and layouts + * expected by those paths. + */ + struct ahash_request fallback_req; + struct shash_desc fallback_desc; + union { + struct md5_state md5; + struct sha1_state sha1; + struct sha256_state sha256; + } u; + + unsigned char hash_key[64]; + unsigned char keyed_zero_hash[32]; +}; + +static int n2_hash_async_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm); + + ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback); + ctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_ahash_init(&ctx->fallback_req); +} + +static int n2_hash_async_update(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm); + + ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback); + ctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP; + ctx->fallback_req.nbytes = req->nbytes; + ctx->fallback_req.src = req->src; + + return crypto_ahash_update(&ctx->fallback_req); +} + +static int n2_hash_async_final(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm); + + ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback); + ctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP; + ctx->fallback_req.result = req->result; + + return crypto_ahash_final(&ctx->fallback_req); +} + +static int n2_hash_async_finup(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm); + + ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback); + ctx->fallback_req.base.flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP; + ctx->fallback_req.nbytes = req->nbytes; + ctx->fallback_req.src = req->src; + ctx->fallback_req.result = req->result; + + return crypto_ahash_finup(&ctx->fallback_req); +} + +static int n2_hash_cra_init(struct crypto_tfm *tfm) +{ + const char *fallback_driver_name = tfm->__crt_alg->cra_name; + struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); + struct n2_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct crypto_ahash *fallback_tfm; + int err; + + fallback_tfm = crypto_alloc_ahash(fallback_driver_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback_tfm)) { + pr_warning("Fallback driver '%s' could not be loaded!\n", + fallback_driver_name); + err = PTR_ERR(fallback_tfm); + goto out; + } + + ctx->fallback = fallback_tfm; + return 0; + +out: + return err; +} + +static void n2_hash_cra_exit(struct crypto_tfm *tfm) +{ + struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); + struct n2_hash_ctx *ctx = crypto_ahash_ctx(ahash); + + crypto_free_ahash(ctx->fallback); +} + +static unsigned long wait_for_tail(struct spu_queue *qp) +{ + unsigned long head, hv_ret; + + do { + hv_ret = sun4v_ncs_gethead(qp->qhandle, &head); + if (hv_ret != HV_EOK) { + pr_err("Hypervisor error on gethead\n"); + break; + } + if (head == qp->tail) { + qp->head = head; + break; + } + } while (1); + return hv_ret; +} + +static unsigned long submit_and_wait_for_tail(struct spu_queue *qp, + struct cwq_initial_entry *ent) +{ + unsigned long hv_ret = spu_queue_submit(qp, ent); + + if (hv_ret == HV_EOK) + hv_ret = wait_for_tail(qp); + + return hv_ret; +} + +static int n2_hash_async_digest(struct ahash_request *req, + unsigned int auth_type, unsigned int digest_size, + unsigned int result_size, void *hash_loc) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cwq_initial_entry *ent; + struct crypto_hash_walk walk; + struct spu_queue *qp; + unsigned long flags; + int err = -ENODEV; + int nbytes, cpu; + + /* The total effective length of the operation may not + * exceed 2^16. + */ + if (unlikely(req->nbytes > (1 << 16))) { + ctx->fallback_req.base.tfm = crypto_ahash_tfm(ctx->fallback); + ctx->fallback_req.base.flags = + req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP; + ctx->fallback_req.nbytes = req->nbytes; + ctx->fallback_req.src = req->src; + ctx->fallback_req.result = req->result; + + return crypto_ahash_digest(&ctx->fallback_req); + } + + n2_base_ctx_init(&ctx->base); + + nbytes = crypto_hash_walk_first(req, &walk); + + cpu = get_cpu(); + qp = cpu_to_cwq[cpu]; + if (!qp) + goto out; + + spin_lock_irqsave(&qp->lock, flags); + + /* XXX can do better, improve this later by doing a by-hand scatterlist + * XXX walk, etc. + */ + ent = qp->q + qp->tail; + + ent->control = control_word_base(nbytes, 0, 0, + auth_type, digest_size, + false, true, false, false, + OPCODE_INPLACE_BIT | + OPCODE_AUTH_MAC); + ent->src_addr = __pa(walk.data); + ent->auth_key_addr = 0UL; + ent->auth_iv_addr = __pa(hash_loc); + ent->final_auth_state_addr = 0UL; + ent->enc_key_addr = 0UL; + ent->enc_iv_addr = 0UL; + ent->dest_addr = __pa(hash_loc); + + nbytes = crypto_hash_walk_done(&walk, 0); + while (nbytes > 0) { + ent = spu_queue_next(qp, ent); + + ent->control = (nbytes - 1); + ent->src_addr = __pa(walk.data); + ent->auth_key_addr = 0UL; + ent->auth_iv_addr = 0UL; + ent->final_auth_state_addr = 0UL; + ent->enc_key_addr = 0UL; + ent->enc_iv_addr = 0UL; + ent->dest_addr = 0UL; + + nbytes = crypto_hash_walk_done(&walk, 0); + } + ent->control |= CONTROL_END_OF_BLOCK; + + if (submit_and_wait_for_tail(qp, ent) != HV_EOK) + err = -EINVAL; + else + err = 0; + + spin_unlock_irqrestore(&qp->lock, flags); + + if (!err) + memcpy(req->result, hash_loc, result_size); +out: + put_cpu(); + + return err; +} + +static int n2_md5_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct md5_state *m = &ctx->u.md5; + + if (unlikely(req->nbytes == 0)) { + static const char md5_zero[MD5_DIGEST_SIZE] = { + 0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04, + 0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e, + }; + + memcpy(req->result, md5_zero, MD5_DIGEST_SIZE); + return 0; + } + m->hash[0] = cpu_to_le32(0x67452301); + m->hash[1] = cpu_to_le32(0xefcdab89); + m->hash[2] = cpu_to_le32(0x98badcfe); + m->hash[3] = cpu_to_le32(0x10325476); + + return n2_hash_async_digest(req, AUTH_TYPE_MD5, + MD5_DIGEST_SIZE, MD5_DIGEST_SIZE, + m->hash); +} + +static int n2_sha1_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct sha1_state *s = &ctx->u.sha1; + + if (unlikely(req->nbytes == 0)) { + static const char sha1_zero[SHA1_DIGEST_SIZE] = { + 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32, + 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8, + 0x07, 0x09 + }; + + memcpy(req->result, sha1_zero, SHA1_DIGEST_SIZE); + return 0; + } + s->state[0] = SHA1_H0; + s->state[1] = SHA1_H1; + s->state[2] = SHA1_H2; + s->state[3] = SHA1_H3; + s->state[4] = SHA1_H4; + + return n2_hash_async_digest(req, AUTH_TYPE_SHA1, + SHA1_DIGEST_SIZE, SHA1_DIGEST_SIZE, + s->state); +} + +static int n2_sha256_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct sha256_state *s = &ctx->u.sha256; + + if (req->nbytes == 0) { + static const char sha256_zero[SHA256_DIGEST_SIZE] = { + 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, + 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, + 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, + 0x1b, 0x78, 0x52, 0xb8, 0x55 + }; + + memcpy(req->result, sha256_zero, SHA256_DIGEST_SIZE); + return 0; + } + s->state[0] = SHA256_H0; + s->state[1] = SHA256_H1; + s->state[2] = SHA256_H2; + s->state[3] = SHA256_H3; + s->state[4] = SHA256_H4; + s->state[5] = SHA256_H5; + s->state[6] = SHA256_H6; + s->state[7] = SHA256_H7; + + return n2_hash_async_digest(req, AUTH_TYPE_SHA256, + SHA256_DIGEST_SIZE, SHA256_DIGEST_SIZE, + s->state); +} + +static int n2_sha224_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct n2_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct sha256_state *s = &ctx->u.sha256; + + if (req->nbytes == 0) { + static const char sha224_zero[SHA224_DIGEST_SIZE] = { + 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, 0x47, + 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, 0x15, 0xa2, + 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, 0xc5, 0xb3, 0xe4, + 0x2f + }; + + memcpy(req->result, sha224_zero, SHA224_DIGEST_SIZE); + return 0; + } + s->state[0] = SHA224_H0; + s->state[1] = SHA224_H1; + s->state[2] = SHA224_H2; + s->state[3] = SHA224_H3; + s->state[4] = SHA224_H4; + s->state[5] = SHA224_H5; + s->state[6] = SHA224_H6; + s->state[7] = SHA224_H7; + + return n2_hash_async_digest(req, AUTH_TYPE_SHA256, + SHA256_DIGEST_SIZE, SHA224_DIGEST_SIZE, + s->state); +} + +struct n2_cipher_context { + int key_len; + int enc_type; + union { + u8 aes[AES_MAX_KEY_SIZE]; + u8 des[DES_KEY_SIZE]; + u8 des3[3 * DES_KEY_SIZE]; + u8 arc4[258]; /* S-box, X, Y */ + } key; +}; + +#define N2_CHUNK_ARR_LEN 16 + +struct n2_crypto_chunk { + struct list_head entry; + unsigned long iv_paddr : 44; + unsigned long arr_len : 20; + unsigned long dest_paddr; + unsigned long dest_final; + struct { + unsigned long src_paddr : 44; + unsigned long src_len : 20; + } arr[N2_CHUNK_ARR_LEN]; +}; + +struct n2_request_context { + struct ablkcipher_walk walk; + struct list_head chunk_list; + struct n2_crypto_chunk chunk; + u8 temp_iv[16]; +}; + +/* The SPU allows some level of flexibility for partial cipher blocks + * being specified in a descriptor. + * + * It merely requires that every descriptor's length field is at least + * as large as the cipher block size. This means that a cipher block + * can span at most 2 descriptors. However, this does not allow a + * partial block to span into the final descriptor as that would + * violate the rule (since every descriptor's length must be at lest + * the block size). So, for example, assuming an 8 byte block size: + * + * 0xe --> 0xa --> 0x8 + * + * is a valid length sequence, whereas: + * + * 0xe --> 0xb --> 0x7 + * + * is not a valid sequence. + */ + +struct n2_cipher_alg { + struct list_head entry; + u8 enc_type; + struct crypto_alg alg; +}; + +static inline struct n2_cipher_alg *n2_cipher_alg(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + + return container_of(alg, struct n2_cipher_alg, alg); +} + +struct n2_cipher_request_context { + struct ablkcipher_walk walk; +}; + +static int n2_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + unsigned int keylen) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm); + struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm); + + ctx->enc_type = (n2alg->enc_type & ENC_TYPE_CHAINING_MASK); + + switch (keylen) { + case AES_KEYSIZE_128: + ctx->enc_type |= ENC_TYPE_ALG_AES128; + break; + case AES_KEYSIZE_192: + ctx->enc_type |= ENC_TYPE_ALG_AES192; + break; + case AES_KEYSIZE_256: + ctx->enc_type |= ENC_TYPE_ALG_AES256; + break; + default: + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + ctx->key_len = keylen; + memcpy(ctx->key.aes, key, keylen); + return 0; +} + +static int n2_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + unsigned int keylen) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm); + struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm); + u32 tmp[DES_EXPKEY_WORDS]; + int err; + + ctx->enc_type = n2alg->enc_type; + + if (keylen != DES_KEY_SIZE) { + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + err = des_ekey(tmp, key); + if (err == 0 && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; + } + + ctx->key_len = keylen; + memcpy(ctx->key.des, key, keylen); + return 0; +} + +static int n2_3des_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + unsigned int keylen) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm); + struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm); + + ctx->enc_type = n2alg->enc_type; + + if (keylen != (3 * DES_KEY_SIZE)) { + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + ctx->key_len = keylen; + memcpy(ctx->key.des3, key, keylen); + return 0; +} + +static int n2_arc4_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + unsigned int keylen) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm); + struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm); + u8 *s = ctx->key.arc4; + u8 *x = s + 256; + u8 *y = x + 1; + int i, j, k; + + ctx->enc_type = n2alg->enc_type; + + j = k = 0; + *x = 0; + *y = 0; + for (i = 0; i < 256; i++) + s[i] = i; + for (i = 0; i < 256; i++) { + u8 a = s[i]; + j = (j + key[k] + a) & 0xff; + s[i] = s[j]; + s[j] = a; + if (++k >= keylen) + k = 0; + } + + return 0; +} + +static inline int cipher_descriptor_len(int nbytes, unsigned int block_size) +{ + int this_len = nbytes; + + this_len -= (nbytes & (block_size - 1)); + return this_len > (1 << 16) ? (1 << 16) : this_len; +} + +static int __n2_crypt_chunk(struct crypto_tfm *tfm, struct n2_crypto_chunk *cp, + struct spu_queue *qp, bool encrypt) +{ + struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm); + struct cwq_initial_entry *ent; + bool in_place; + int i; + + ent = spu_queue_alloc(qp, cp->arr_len); + if (!ent) { + pr_info("queue_alloc() of %d fails\n", + cp->arr_len); + return -EBUSY; + } + + in_place = (cp->dest_paddr == cp->arr[0].src_paddr); + + ent->control = control_word_base(cp->arr[0].src_len, + 0, ctx->enc_type, 0, 0, + false, true, false, encrypt, + OPCODE_ENCRYPT | + (in_place ? OPCODE_INPLACE_BIT : 0)); + ent->src_addr = cp->arr[0].src_paddr; + ent->auth_key_addr = 0UL; + ent->auth_iv_addr = 0UL; + ent->final_auth_state_addr = 0UL; + ent->enc_key_addr = __pa(&ctx->key); + ent->enc_iv_addr = cp->iv_paddr; + ent->dest_addr = (in_place ? 0UL : cp->dest_paddr); + + for (i = 1; i < cp->arr_len; i++) { + ent = spu_queue_next(qp, ent); + + ent->control = cp->arr[i].src_len - 1; + ent->src_addr = cp->arr[i].src_paddr; + ent->auth_key_addr = 0UL; + ent->auth_iv_addr = 0UL; + ent->final_auth_state_addr = 0UL; + ent->enc_key_addr = 0UL; + ent->enc_iv_addr = 0UL; + ent->dest_addr = 0UL; + } + ent->control |= CONTROL_END_OF_BLOCK; + + return (spu_queue_submit(qp, ent) != HV_EOK) ? -EINVAL : 0; +} + +static int n2_compute_chunks(struct ablkcipher_request *req) +{ + struct n2_request_context *rctx = ablkcipher_request_ctx(req); + struct ablkcipher_walk *walk = &rctx->walk; + struct n2_crypto_chunk *chunk; + unsigned long dest_prev; + unsigned int tot_len; + bool prev_in_place; + int err, nbytes; + + ablkcipher_walk_init(walk, req->dst, req->src, req->nbytes); + err = ablkcipher_walk_phys(req, walk); + if (err) + return err; + + INIT_LIST_HEAD(&rctx->chunk_list); + + chunk = &rctx->chunk; + INIT_LIST_HEAD(&chunk->entry); + + chunk->iv_paddr = 0UL; + chunk->arr_len = 0; + chunk->dest_paddr = 0UL; + + prev_in_place = false; + dest_prev = ~0UL; + tot_len = 0; + + while ((nbytes = walk->nbytes) != 0) { + unsigned long dest_paddr, src_paddr; + bool in_place; + int this_len; + + src_paddr = (page_to_phys(walk->src.page) + + walk->src.offset); + dest_paddr = (page_to_phys(walk->dst.page) + + walk->dst.offset); + in_place = (src_paddr == dest_paddr); + this_len = cipher_descriptor_len(nbytes, walk->blocksize); + + if (chunk->arr_len != 0) { + if (in_place != prev_in_place || + (!prev_in_place && + dest_paddr != dest_prev) || + chunk->arr_len == N2_CHUNK_ARR_LEN || + tot_len + this_len > (1 << 16)) { + chunk->dest_final = dest_prev; + list_add_tail(&chunk->entry, + &rctx->chunk_list); + chunk = kzalloc(sizeof(*chunk), GFP_ATOMIC); + if (!chunk) { + err = -ENOMEM; + break; + } + INIT_LIST_HEAD(&chunk->entry); + } + } + if (chunk->arr_len == 0) { + chunk->dest_paddr = dest_paddr; + tot_len = 0; + } + chunk->arr[chunk->arr_len].src_paddr = src_paddr; + chunk->arr[chunk->arr_len].src_len = this_len; + chunk->arr_len++; + + dest_prev = dest_paddr + this_len; + prev_in_place = in_place; + tot_len += this_len; + + err = ablkcipher_walk_done(req, walk, nbytes - this_len); + if (err) + break; + } + if (!err && chunk->arr_len != 0) { + chunk->dest_final = dest_prev; + list_add_tail(&chunk->entry, &rctx->chunk_list); + } + + return err; +} + +static void n2_chunk_complete(struct ablkcipher_request *req, void *final_iv) +{ + struct n2_request_context *rctx = ablkcipher_request_ctx(req); + struct n2_crypto_chunk *c, *tmp; + + if (final_iv) + memcpy(rctx->walk.iv, final_iv, rctx->walk.blocksize); + + ablkcipher_walk_complete(&rctx->walk); + list_for_each_entry_safe(c, tmp, &rctx->chunk_list, entry) { + list_del(&c->entry); + if (unlikely(c != &rctx->chunk)) + kfree(c); + } + +} + +static int n2_do_ecb(struct ablkcipher_request *req, bool encrypt) +{ + struct n2_request_context *rctx = ablkcipher_request_ctx(req); + struct crypto_tfm *tfm = req->base.tfm; + int err = n2_compute_chunks(req); + struct n2_crypto_chunk *c, *tmp; + unsigned long flags, hv_ret; + struct spu_queue *qp; + + if (err) + return err; + + qp = cpu_to_cwq[get_cpu()]; + err = -ENODEV; + if (!qp) + goto out; + + spin_lock_irqsave(&qp->lock, flags); + + list_for_each_entry_safe(c, tmp, &rctx->chunk_list, entry) { + err = __n2_crypt_chunk(tfm, c, qp, encrypt); + if (err) + break; + list_del(&c->entry); + if (unlikely(c != &rctx->chunk)) + kfree(c); + } + if (!err) { + hv_ret = wait_for_tail(qp); + if (hv_ret != HV_EOK) + err = -EINVAL; + } + + spin_unlock_irqrestore(&qp->lock, flags); + + put_cpu(); + +out: + n2_chunk_complete(req, NULL); + return err; +} + +static int n2_encrypt_ecb(struct ablkcipher_request *req) +{ + return n2_do_ecb(req, true); +} + +static int n2_decrypt_ecb(struct ablkcipher_request *req) +{ + return n2_do_ecb(req, false); +} + +static int n2_do_chaining(struct ablkcipher_request *req, bool encrypt) +{ + struct n2_request_context *rctx = ablkcipher_request_ctx(req); + struct crypto_tfm *tfm = req->base.tfm; + unsigned long flags, hv_ret, iv_paddr; + int err = n2_compute_chunks(req); + struct n2_crypto_chunk *c, *tmp; + struct spu_queue *qp; + void *final_iv_addr; + + final_iv_addr = NULL; + + if (err) + return err; + + qp = cpu_to_cwq[get_cpu()]; + err = -ENODEV; + if (!qp) + goto out; + + spin_lock_irqsave(&qp->lock, flags); + + if (encrypt) { + iv_paddr = __pa(rctx->walk.iv); + list_for_each_entry_safe(c, tmp, &rctx->chunk_list, + entry) { + c->iv_paddr = iv_paddr; + err = __n2_crypt_chunk(tfm, c, qp, true); + if (err) + break; + iv_paddr = c->dest_final - rctx->walk.blocksize; + list_del(&c->entry); + if (unlikely(c != &rctx->chunk)) + kfree(c); + } + final_iv_addr = __va(iv_paddr); + } else { + list_for_each_entry_safe_reverse(c, tmp, &rctx->chunk_list, + entry) { + if (c == &rctx->chunk) { + iv_paddr = __pa(rctx->walk.iv); + } else { + iv_paddr = (tmp->arr[tmp->arr_len-1].src_paddr + + tmp->arr[tmp->arr_len-1].src_len - + rctx->walk.blocksize); + } + if (!final_iv_addr) { + unsigned long pa; + + pa = (c->arr[c->arr_len-1].src_paddr + + c->arr[c->arr_len-1].src_len - + rctx->walk.blocksize); + final_iv_addr = rctx->temp_iv; + memcpy(rctx->temp_iv, __va(pa), + rctx->walk.blocksize); + } + c->iv_paddr = iv_paddr; + err = __n2_crypt_chunk(tfm, c, qp, false); + if (err) + break; + list_del(&c->entry); + if (unlikely(c != &rctx->chunk)) + kfree(c); + } + } + if (!err) { + hv_ret = wait_for_tail(qp); + if (hv_ret != HV_EOK) + err = -EINVAL; + } + + spin_unlock_irqrestore(&qp->lock, flags); + + put_cpu(); + +out: + n2_chunk_complete(req, err ? NULL : final_iv_addr); + return err; +} + +static int n2_encrypt_chaining(struct ablkcipher_request *req) +{ + return n2_do_chaining(req, true); +} + +static int n2_decrypt_chaining(struct ablkcipher_request *req) +{ + return n2_do_chaining(req, false); +} + +struct n2_cipher_tmpl { + const char *name; + const char *drv_name; + u8 block_size; + u8 enc_type; + struct ablkcipher_alg ablkcipher; +}; + +static const struct n2_cipher_tmpl cipher_tmpls[] = { + /* ARC4: only ECB is supported (chaining bits ignored) */ + { .name = "ecb(arc4)", + .drv_name = "ecb-arc4", + .block_size = 1, + .enc_type = (ENC_TYPE_ALG_RC4_STREAM | + ENC_TYPE_CHAINING_ECB), + .ablkcipher = { + .min_keysize = 1, + .max_keysize = 256, + .setkey = n2_arc4_setkey, + .encrypt = n2_encrypt_ecb, + .decrypt = n2_decrypt_ecb, + }, + }, + + /* DES: ECB CBC and CFB are supported */ + { .name = "ecb(des)", + .drv_name = "ecb-des", + .block_size = DES_BLOCK_SIZE, + .enc_type = (ENC_TYPE_ALG_DES | + ENC_TYPE_CHAINING_ECB), + .ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = n2_des_setkey, + .encrypt = n2_encrypt_ecb, + .decrypt = n2_decrypt_ecb, + }, + }, + { .name = "cbc(des)", + .drv_name = "cbc-des", + .block_size = DES_BLOCK_SIZE, + .enc_type = (ENC_TYPE_ALG_DES | + ENC_TYPE_CHAINING_CBC), + .ablkcipher = { + .ivsize = DES_BLOCK_SIZE, + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = n2_des_setkey, + .encrypt = n2_encrypt_chaining, + .decrypt = n2_decrypt_chaining, + }, + }, + { .name = "cfb(des)", + .drv_name = "cfb-des", + .block_size = DES_BLOCK_SIZE, + .enc_type = (ENC_TYPE_ALG_DES | + ENC_TYPE_CHAINING_CFB), + .ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = n2_des_setkey, + .encrypt = n2_encrypt_chaining, + .decrypt = n2_decrypt_chaining, + }, + }, + + /* 3DES: ECB CBC and CFB are supported */ + { .name = "ecb(des3_ede)", + .drv_name = "ecb-3des", + .block_size = DES_BLOCK_SIZE, + .enc_type = (ENC_TYPE_ALG_3DES | + ENC_TYPE_CHAINING_ECB), + .ablkcipher = { + .min_keysize = 3 * DES_KEY_SIZE, + .max_keysize = 3 * DES_KEY_SIZE, + .setkey = n2_3des_setkey, + .encrypt = n2_encrypt_ecb, + .decrypt = n2_decrypt_ecb, + }, + }, + { .name = "cbc(des3_ede)", + .drv_name = "cbc-3des", + .block_size = DES_BLOCK_SIZE, + .enc_type = (ENC_TYPE_ALG_3DES | + ENC_TYPE_CHAINING_CBC), + .ablkcipher = { + .ivsize = DES_BLOCK_SIZE, + .min_keysize = 3 * DES_KEY_SIZE, + .max_keysize = 3 * DES_KEY_SIZE, + .setkey = n2_3des_setkey, + .encrypt = n2_encrypt_chaining, + .decrypt = n2_decrypt_chaining, + }, + }, + { .name = "cfb(des3_ede)", + .drv_name = "cfb-3des", + .block_size = DES_BLOCK_SIZE, + .enc_type = (ENC_TYPE_ALG_3DES | + ENC_TYPE_CHAINING_CFB), + .ablkcipher = { + .min_keysize = 3 * DES_KEY_SIZE, + .max_keysize = 3 * DES_KEY_SIZE, + .setkey = n2_3des_setkey, + .encrypt = n2_encrypt_chaining, + .decrypt = n2_decrypt_chaining, + }, + }, + /* AES: ECB CBC and CTR are supported */ + { .name = "ecb(aes)", + .drv_name = "ecb-aes", + .block_size = AES_BLOCK_SIZE, + .enc_type = (ENC_TYPE_ALG_AES128 | + ENC_TYPE_CHAINING_ECB), + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = n2_aes_setkey, + .encrypt = n2_encrypt_ecb, + .decrypt = n2_decrypt_ecb, + }, + }, + { .name = "cbc(aes)", + .drv_name = "cbc-aes", + .block_size = AES_BLOCK_SIZE, + .enc_type = (ENC_TYPE_ALG_AES128 | + ENC_TYPE_CHAINING_CBC), + .ablkcipher = { + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = n2_aes_setkey, + .encrypt = n2_encrypt_chaining, + .decrypt = n2_decrypt_chaining, + }, + }, + { .name = "ctr(aes)", + .drv_name = "ctr-aes", + .block_size = AES_BLOCK_SIZE, + .enc_type = (ENC_TYPE_ALG_AES128 | + ENC_TYPE_CHAINING_COUNTER), + .ablkcipher = { + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = n2_aes_setkey, + .encrypt = n2_encrypt_chaining, + .decrypt = n2_encrypt_chaining, + }, + }, + +}; +#define NUM_CIPHER_TMPLS ARRAY_SIZE(cipher_tmpls) + +static LIST_HEAD(cipher_algs); + +struct n2_hash_tmpl { + const char *name; + int (*digest)(struct ahash_request *req); + u8 digest_size; + u8 block_size; +}; +static const struct n2_hash_tmpl hash_tmpls[] = { + { .name = "md5", + .digest = n2_md5_async_digest, + .digest_size = MD5_DIGEST_SIZE, + .block_size = MD5_HMAC_BLOCK_SIZE }, + { .name = "sha1", + .digest = n2_sha1_async_digest, + .digest_size = SHA1_DIGEST_SIZE, + .block_size = SHA1_BLOCK_SIZE }, + { .name = "sha256", + .digest = n2_sha256_async_digest, + .digest_size = SHA256_DIGEST_SIZE, + .block_size = SHA256_BLOCK_SIZE }, + { .name = "sha224", + .digest = n2_sha224_async_digest, + .digest_size = SHA224_DIGEST_SIZE, + .block_size = SHA224_BLOCK_SIZE }, +}; +#define NUM_HASH_TMPLS ARRAY_SIZE(hash_tmpls) + +struct n2_ahash_alg { + struct list_head entry; + struct ahash_alg alg; +}; +static LIST_HEAD(ahash_algs); + +static int algs_registered; + +static void __n2_unregister_algs(void) +{ + struct n2_cipher_alg *cipher, *cipher_tmp; + struct n2_ahash_alg *alg, *alg_tmp; + + list_for_each_entry_safe(cipher, cipher_tmp, &cipher_algs, entry) { + crypto_unregister_alg(&cipher->alg); + list_del(&cipher->entry); + kfree(cipher); + } + list_for_each_entry_safe(alg, alg_tmp, &ahash_algs, entry) { + crypto_unregister_ahash(&alg->alg); + list_del(&alg->entry); + kfree(alg); + } +} + +static int n2_cipher_cra_init(struct crypto_tfm *tfm) +{ + tfm->crt_ablkcipher.reqsize = sizeof(struct n2_request_context); + return 0; +} + +static int __devinit __n2_register_one_cipher(const struct n2_cipher_tmpl *tmpl) +{ + struct n2_cipher_alg *p = kzalloc(sizeof(*p), GFP_KERNEL); + struct crypto_alg *alg; + int err; + + if (!p) + return -ENOMEM; + + alg = &p->alg; + + snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name); + snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s-n2", tmpl->drv_name); + alg->cra_priority = N2_CRA_PRIORITY; + alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC; + alg->cra_blocksize = tmpl->block_size; + p->enc_type = tmpl->enc_type; + alg->cra_ctxsize = sizeof(struct n2_cipher_context); + alg->cra_type = &crypto_ablkcipher_type; + alg->cra_u.ablkcipher = tmpl->ablkcipher; + alg->cra_init = n2_cipher_cra_init; + alg->cra_module = THIS_MODULE; + + list_add(&p->entry, &cipher_algs); + err = crypto_register_alg(alg); + if (err) { + list_del(&p->entry); + kfree(p); + } + return err; +} + +static int __devinit __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl) +{ + struct n2_ahash_alg *p = kzalloc(sizeof(*p), GFP_KERNEL); + struct hash_alg_common *halg; + struct crypto_alg *base; + struct ahash_alg *ahash; + int err; + + if (!p) + return -ENOMEM; + + ahash = &p->alg; + ahash->init = n2_hash_async_init; + ahash->update = n2_hash_async_update; + ahash->final = n2_hash_async_final; + ahash->finup = n2_hash_async_finup; + ahash->digest = tmpl->digest; + + halg = &ahash->halg; + halg->digestsize = tmpl->digest_size; + + base = &halg->base; + snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name); + snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s-n2", tmpl->name); + base->cra_priority = N2_CRA_PRIORITY; + base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK; + base->cra_blocksize = tmpl->block_size; + base->cra_ctxsize = sizeof(struct n2_hash_ctx); + base->cra_module = THIS_MODULE; + base->cra_init = n2_hash_cra_init; + base->cra_exit = n2_hash_cra_exit; + + list_add(&p->entry, &ahash_algs); + err = crypto_register_ahash(ahash); + if (err) { + list_del(&p->entry); + kfree(p); + } + return err; +} + +static int __devinit n2_register_algs(void) +{ + int i, err = 0; + + mutex_lock(&spu_lock); + if (algs_registered++) + goto out; + + for (i = 0; i < NUM_HASH_TMPLS; i++) { + err = __n2_register_one_ahash(&hash_tmpls[i]); + if (err) { + __n2_unregister_algs(); + goto out; + } + } + for (i = 0; i < NUM_CIPHER_TMPLS; i++) { + err = __n2_register_one_cipher(&cipher_tmpls[i]); + if (err) { + __n2_unregister_algs(); + goto out; + } + } + +out: + mutex_unlock(&spu_lock); + return err; +} + +static void __exit n2_unregister_algs(void) +{ + mutex_lock(&spu_lock); + if (!--algs_registered) + __n2_unregister_algs(); + mutex_unlock(&spu_lock); +} + +/* To map CWQ queues to interrupt sources, the hypervisor API provides + * a devino. This isn't very useful to us because all of the + * interrupts listed in the of_device node have been translated to + * Linux virtual IRQ cookie numbers. + * + * So we have to back-translate, going through the 'intr' and 'ino' + * property tables of the n2cp MDESC node, matching it with the OF + * 'interrupts' property entries, in order to to figure out which + * devino goes to which already-translated IRQ. + */ +static int find_devino_index(struct of_device *dev, struct spu_mdesc_info *ip, + unsigned long dev_ino) +{ + const unsigned int *dev_intrs; + unsigned int intr; + int i; + + for (i = 0; i < ip->num_intrs; i++) { + if (ip->ino_table[i].ino == dev_ino) + break; + } + if (i == ip->num_intrs) + return -ENODEV; + + intr = ip->ino_table[i].intr; + + dev_intrs = of_get_property(dev->node, "interrupts", NULL); + if (!dev_intrs) + return -ENODEV; + + for (i = 0; i < dev->num_irqs; i++) { + if (dev_intrs[i] == intr) + return i; + } + + return -ENODEV; +} + +static int spu_map_ino(struct of_device *dev, struct spu_mdesc_info *ip, + const char *irq_name, struct spu_queue *p, + irq_handler_t handler) +{ + unsigned long herr; + int index; + + herr = sun4v_ncs_qhandle_to_devino(p->qhandle, &p->devino); + if (herr) + return -EINVAL; + + index = find_devino_index(dev, ip, p->devino); + if (index < 0) + return index; + + p->irq = dev->irqs[index]; + + sprintf(p->irq_name, "%s-%d", irq_name, index); + + return request_irq(p->irq, handler, IRQF_SAMPLE_RANDOM, + p->irq_name, p); +} + +static struct kmem_cache *queue_cache[2]; + +static void *new_queue(unsigned long q_type) +{ + return kmem_cache_zalloc(queue_cache[q_type - 1], GFP_KERNEL); +} + +static void free_queue(void *p, unsigned long q_type) +{ + return kmem_cache_free(queue_cache[q_type - 1], p); +} + +static int queue_cache_init(void) +{ + if (!queue_cache[HV_NCS_QTYPE_MAU - 1]) + queue_cache[HV_NCS_QTYPE_MAU - 1] = + kmem_cache_create("cwq_queue", + (MAU_NUM_ENTRIES * + MAU_ENTRY_SIZE), + MAU_ENTRY_SIZE, 0, NULL); + if (!queue_cache[HV_NCS_QTYPE_MAU - 1]) + return -ENOMEM; + + if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) + queue_cache[HV_NCS_QTYPE_CWQ - 1] = + kmem_cache_create("cwq_queue", + (CWQ_NUM_ENTRIES * + CWQ_ENTRY_SIZE), + CWQ_ENTRY_SIZE, 0, NULL); + if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) { + kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); + return -ENOMEM; + } + return 0; +} + +static void queue_cache_destroy(void) +{ + kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); + kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]); +} + +static int spu_queue_register(struct spu_queue *p, unsigned long q_type) +{ + cpumask_var_t old_allowed; + unsigned long hv_ret; + + if (cpumask_empty(&p->sharing)) + return -EINVAL; + + if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL)) + return -ENOMEM; + + cpumask_copy(old_allowed, ¤t->cpus_allowed); + + set_cpus_allowed_ptr(current, &p->sharing); + + hv_ret = sun4v_ncs_qconf(q_type, __pa(p->q), + CWQ_NUM_ENTRIES, &p->qhandle); + if (!hv_ret) + sun4v_ncs_sethead_marker(p->qhandle, 0); + + set_cpus_allowed_ptr(current, old_allowed); + + free_cpumask_var(old_allowed); + + return (hv_ret ? -EINVAL : 0); +} + +static int spu_queue_setup(struct spu_queue *p) +{ + int err; + + p->q = new_queue(p->q_type); + if (!p->q) + return -ENOMEM; + + err = spu_queue_register(p, p->q_type); + if (err) { + free_queue(p->q, p->q_type); + p->q = NULL; + } + + return err; +} + +static void spu_queue_destroy(struct spu_queue *p) +{ + unsigned long hv_ret; + + if (!p->q) + return; + + hv_ret = sun4v_ncs_qconf(p->q_type, p->qhandle, 0, &p->qhandle); + + if (!hv_ret) + free_queue(p->q, p->q_type); +} + +static void spu_list_destroy(struct list_head *list) +{ + struct spu_queue *p, *n; + + list_for_each_entry_safe(p, n, list, list) { + int i; + + for (i = 0; i < NR_CPUS; i++) { + if (cpu_to_cwq[i] == p) + cpu_to_cwq[i] = NULL; + } + + if (p->irq) { + free_irq(p->irq, p); + p->irq = 0; + } + spu_queue_destroy(p); + list_del(&p->list); + kfree(p); + } +} + +/* Walk the backward arcs of a CWQ 'exec-unit' node, + * gathering cpu membership information. + */ +static int spu_mdesc_walk_arcs(struct mdesc_handle *mdesc, + struct of_device *dev, + u64 node, struct spu_queue *p, + struct spu_queue **table) +{ + u64 arc; + + mdesc_for_each_arc(arc, mdesc, node, MDESC_ARC_TYPE_BACK) { + u64 tgt = mdesc_arc_target(mdesc, arc); + const char *name = mdesc_node_name(mdesc, tgt); + const u64 *id; + + if (strcmp(name, "cpu")) + continue; + id = mdesc_get_property(mdesc, tgt, "id", NULL); + if (table[*id] != NULL) { + dev_err(&dev->dev, "%s: SPU cpu slot already set.\n", + dev->node->full_name); + return -EINVAL; + } + cpu_set(*id, p->sharing); + table[*id] = p; + } + return 0; +} + +/* Process an 'exec-unit' MDESC node of type 'cwq'. */ +static int handle_exec_unit(struct spu_mdesc_info *ip, struct list_head *list, + struct of_device *dev, struct mdesc_handle *mdesc, + u64 node, const char *iname, unsigned long q_type, + irq_handler_t handler, struct spu_queue **table) +{ + struct spu_queue *p; + int err; + + p = kzalloc(sizeof(struct spu_queue), GFP_KERNEL); + if (!p) { + dev_err(&dev->dev, "%s: Could not allocate SPU queue.\n", + dev->node->full_name); + return -ENOMEM; + } + + cpus_clear(p->sharing); + spin_lock_init(&p->lock); + p->q_type = q_type; + INIT_LIST_HEAD(&p->jobs); + list_add(&p->list, list); + + err = spu_mdesc_walk_arcs(mdesc, dev, node, p, table); + if (err) + return err; + + err = spu_queue_setup(p); + if (err) + return err; + + return spu_map_ino(dev, ip, iname, p, handler); +} + +static int spu_mdesc_scan(struct mdesc_handle *mdesc, struct of_device *dev, + struct spu_mdesc_info *ip, struct list_head *list, + const char *exec_name, unsigned long q_type, + irq_handler_t handler, struct spu_queue **table) +{ + int err = 0; + u64 node; + + mdesc_for_each_node_by_name(mdesc, node, "exec-unit") { + const char *type; + + type = mdesc_get_property(mdesc, node, "type", NULL); + if (!type || strcmp(type, exec_name)) + continue; + + err = handle_exec_unit(ip, list, dev, mdesc, node, + exec_name, q_type, handler, table); + if (err) { + spu_list_destroy(list); + break; + } + } + + return err; +} + +static int __devinit get_irq_props(struct mdesc_handle *mdesc, u64 node, + struct spu_mdesc_info *ip) +{ + const u64 *intr, *ino; + int intr_len, ino_len; + int i; + + intr = mdesc_get_property(mdesc, node, "intr", &intr_len); + if (!intr) + return -ENODEV; + + ino = mdesc_get_property(mdesc, node, "ino", &ino_len); + if (!intr) + return -ENODEV; + + if (intr_len != ino_len) + return -EINVAL; + + ip->num_intrs = intr_len / sizeof(u64); + ip->ino_table = kzalloc((sizeof(struct ino_blob) * + ip->num_intrs), + GFP_KERNEL); + if (!ip->ino_table) + return -ENOMEM; + + for (i = 0; i < ip->num_intrs; i++) { + struct ino_blob *b = &ip->ino_table[i]; + b->intr = intr[i]; + b->ino = ino[i]; + } + + return 0; +} + +static int __devinit grab_mdesc_irq_props(struct mdesc_handle *mdesc, + struct of_device *dev, + struct spu_mdesc_info *ip, + const char *node_name) +{ + const unsigned int *reg; + u64 node; + + reg = of_get_property(dev->node, "reg", NULL); + if (!reg) + return -ENODEV; + + mdesc_for_each_node_by_name(mdesc, node, "virtual-device") { + const char *name; + const u64 *chdl; + + name = mdesc_get_property(mdesc, node, "name", NULL); + if (!name || strcmp(name, node_name)) + continue; + chdl = mdesc_get_property(mdesc, node, "cfg-handle", NULL); + if (!chdl || (*chdl != *reg)) + continue; + ip->cfg_handle = *chdl; + return get_irq_props(mdesc, node, ip); + } + + return -ENODEV; +} + +static unsigned long n2_spu_hvapi_major; +static unsigned long n2_spu_hvapi_minor; + +static int __devinit n2_spu_hvapi_register(void) +{ + int err; + + n2_spu_hvapi_major = 2; + n2_spu_hvapi_minor = 0; + + err = sun4v_hvapi_register(HV_GRP_NCS, + n2_spu_hvapi_major, + &n2_spu_hvapi_minor); + + if (!err) + pr_info("Registered NCS HVAPI version %lu.%lu\n", + n2_spu_hvapi_major, + n2_spu_hvapi_minor); + + return err; +} + +static void n2_spu_hvapi_unregister(void) +{ + sun4v_hvapi_unregister(HV_GRP_NCS); +} + +static int global_ref; + +static int __devinit grab_global_resources(void) +{ + int err = 0; + + mutex_lock(&spu_lock); + + if (global_ref++) + goto out; + + err = n2_spu_hvapi_register(); + if (err) + goto out; + + err = queue_cache_init(); + if (err) + goto out_hvapi_release; + + err = -ENOMEM; + cpu_to_cwq = kzalloc(sizeof(struct spu_queue *) * NR_CPUS, + GFP_KERNEL); + if (!cpu_to_cwq) + goto out_queue_cache_destroy; + + cpu_to_mau = kzalloc(sizeof(struct spu_queue *) * NR_CPUS, + GFP_KERNEL); + if (!cpu_to_mau) + goto out_free_cwq_table; + + err = 0; + +out: + if (err) + global_ref--; + mutex_unlock(&spu_lock); + return err; + +out_free_cwq_table: + kfree(cpu_to_cwq); + cpu_to_cwq = NULL; + +out_queue_cache_destroy: + queue_cache_destroy(); + +out_hvapi_release: + n2_spu_hvapi_unregister(); + goto out; +} + +static void release_global_resources(void) +{ + mutex_lock(&spu_lock); + if (!--global_ref) { + kfree(cpu_to_cwq); + cpu_to_cwq = NULL; + + kfree(cpu_to_mau); + cpu_to_mau = NULL; + + queue_cache_destroy(); + n2_spu_hvapi_unregister(); + } + mutex_unlock(&spu_lock); +} + +static struct n2_crypto * __devinit alloc_n2cp(void) +{ + struct n2_crypto *np = kzalloc(sizeof(struct n2_crypto), GFP_KERNEL); + + if (np) + INIT_LIST_HEAD(&np->cwq_list); + + return np; +} + +static void free_n2cp(struct n2_crypto *np) +{ + if (np->cwq_info.ino_table) { + kfree(np->cwq_info.ino_table); + np->cwq_info.ino_table = NULL; + } + + kfree(np); +} + +static void __devinit n2_spu_driver_version(void) +{ + static int n2_spu_version_printed; + + if (n2_spu_version_printed++ == 0) + pr_info("%s", version); +} + +static int __devinit n2_crypto_probe(struct of_device *dev, + const struct of_device_id *match) +{ + struct mdesc_handle *mdesc; + const char *full_name; + struct n2_crypto *np; + int err; + + n2_spu_driver_version(); + + full_name = dev->node->full_name; + pr_info("Found N2CP at %s\n", full_name); + + np = alloc_n2cp(); + if (!np) { + dev_err(&dev->dev, "%s: Unable to allocate n2cp.\n", + full_name); + return -ENOMEM; + } + + err = grab_global_resources(); + if (err) { + dev_err(&dev->dev, "%s: Unable to grab " + "global resources.\n", full_name); + goto out_free_n2cp; + } + + mdesc = mdesc_grab(); + + if (!mdesc) { + dev_err(&dev->dev, "%s: Unable to grab MDESC.\n", + full_name); + err = -ENODEV; + goto out_free_global; + } + err = grab_mdesc_irq_props(mdesc, dev, &np->cwq_info, "n2cp"); + if (err) { + dev_err(&dev->dev, "%s: Unable to grab IRQ props.\n", + full_name); + mdesc_release(mdesc); + goto out_free_global; + } + + err = spu_mdesc_scan(mdesc, dev, &np->cwq_info, &np->cwq_list, + "cwq", HV_NCS_QTYPE_CWQ, cwq_intr, + cpu_to_cwq); + mdesc_release(mdesc); + + if (err) { + dev_err(&dev->dev, "%s: CWQ MDESC scan failed.\n", + full_name); + goto out_free_global; + } + + err = n2_register_algs(); + if (err) { + dev_err(&dev->dev, "%s: Unable to register algorithms.\n", + full_name); + goto out_free_spu_list; + } + + dev_set_drvdata(&dev->dev, np); + + return 0; + +out_free_spu_list: + spu_list_destroy(&np->cwq_list); + +out_free_global: + release_global_resources(); + +out_free_n2cp: + free_n2cp(np); + + return err; +} + +static int __devexit n2_crypto_remove(struct of_device *dev) +{ + struct n2_crypto *np = dev_get_drvdata(&dev->dev); + + n2_unregister_algs(); + + spu_list_destroy(&np->cwq_list); + + release_global_resources(); + + free_n2cp(np); + + return 0; +} + +static struct n2_mau * __devinit alloc_ncp(void) +{ + struct n2_mau *mp = kzalloc(sizeof(struct n2_mau), GFP_KERNEL); + + if (mp) + INIT_LIST_HEAD(&mp->mau_list); + + return mp; +} + +static void free_ncp(struct n2_mau *mp) +{ + if (mp->mau_info.ino_table) { + kfree(mp->mau_info.ino_table); + mp->mau_info.ino_table = NULL; + } + + kfree(mp); +} + +static int __devinit n2_mau_probe(struct of_device *dev, + const struct of_device_id *match) +{ + struct mdesc_handle *mdesc; + const char *full_name; + struct n2_mau *mp; + int err; + + n2_spu_driver_version(); + + full_name = dev->node->full_name; + pr_info("Found NCP at %s\n", full_name); + + mp = alloc_ncp(); + if (!mp) { + dev_err(&dev->dev, "%s: Unable to allocate ncp.\n", + full_name); + return -ENOMEM; + } + + err = grab_global_resources(); + if (err) { + dev_err(&dev->dev, "%s: Unable to grab " + "global resources.\n", full_name); + goto out_free_ncp; + } + + mdesc = mdesc_grab(); + + if (!mdesc) { + dev_err(&dev->dev, "%s: Unable to grab MDESC.\n", + full_name); + err = -ENODEV; + goto out_free_global; + } + + err = grab_mdesc_irq_props(mdesc, dev, &mp->mau_info, "ncp"); + if (err) { + dev_err(&dev->dev, "%s: Unable to grab IRQ props.\n", + full_name); + mdesc_release(mdesc); + goto out_free_global; + } + + err = spu_mdesc_scan(mdesc, dev, &mp->mau_info, &mp->mau_list, + "mau", HV_NCS_QTYPE_MAU, mau_intr, + cpu_to_mau); + mdesc_release(mdesc); + + if (err) { + dev_err(&dev->dev, "%s: MAU MDESC scan failed.\n", + full_name); + goto out_free_global; + } + + dev_set_drvdata(&dev->dev, mp); + + return 0; + +out_free_global: + release_global_resources(); + +out_free_ncp: + free_ncp(mp); + + return err; +} + +static int __devexit n2_mau_remove(struct of_device *dev) +{ + struct n2_mau *mp = dev_get_drvdata(&dev->dev); + + spu_list_destroy(&mp->mau_list); + + release_global_resources(); + + free_ncp(mp); + + return 0; +} + +static struct of_device_id n2_crypto_match[] = { + { + .name = "n2cp", + .compatible = "SUNW,n2-cwq", + }, + { + .name = "n2cp", + .compatible = "SUNW,vf-cwq", + }, + {}, +}; + +MODULE_DEVICE_TABLE(of, n2_crypto_match); + +static struct of_platform_driver n2_crypto_driver = { + .name = "n2cp", + .match_table = n2_crypto_match, + .probe = n2_crypto_probe, + .remove = __devexit_p(n2_crypto_remove), +}; + +static struct of_device_id n2_mau_match[] = { + { + .name = "ncp", + .compatible = "SUNW,n2-mau", + }, + { + .name = "ncp", + .compatible = "SUNW,vf-mau", + }, + {}, +}; + +MODULE_DEVICE_TABLE(of, n2_mau_match); + +static struct of_platform_driver n2_mau_driver = { + .name = "ncp", + .match_table = n2_mau_match, + .probe = n2_mau_probe, + .remove = __devexit_p(n2_mau_remove), +}; + +static int __init n2_init(void) +{ + int err = of_register_driver(&n2_crypto_driver, &of_bus_type); + + if (!err) { + err = of_register_driver(&n2_mau_driver, &of_bus_type); + if (err) + of_unregister_driver(&n2_crypto_driver); + } + return err; +} + +static void __exit n2_exit(void) +{ + of_unregister_driver(&n2_mau_driver); + of_unregister_driver(&n2_crypto_driver); +} + +module_init(n2_init); +module_exit(n2_exit); diff --git a/drivers/crypto/n2_core.h b/drivers/crypto/n2_core.h new file mode 100644 index 000000000000..4bcbbeae98f5 --- /dev/null +++ b/drivers/crypto/n2_core.h @@ -0,0 +1,231 @@ +#ifndef _N2_CORE_H +#define _N2_CORE_H + +#ifndef __ASSEMBLY__ + +struct ino_blob { + u64 intr; + u64 ino; +}; + +struct spu_mdesc_info { + u64 cfg_handle; + struct ino_blob *ino_table; + int num_intrs; +}; + +struct n2_crypto { + struct spu_mdesc_info cwq_info; + struct list_head cwq_list; +}; + +struct n2_mau { + struct spu_mdesc_info mau_info; + struct list_head mau_list; +}; + +#define CWQ_ENTRY_SIZE 64 +#define CWQ_NUM_ENTRIES 64 + +#define MAU_ENTRY_SIZE 64 +#define MAU_NUM_ENTRIES 64 + +struct cwq_initial_entry { + u64 control; + u64 src_addr; + u64 auth_key_addr; + u64 auth_iv_addr; + u64 final_auth_state_addr; + u64 enc_key_addr; + u64 enc_iv_addr; + u64 dest_addr; +}; + +struct cwq_ext_entry { + u64 len; + u64 src_addr; + u64 resv1; + u64 resv2; + u64 resv3; + u64 resv4; + u64 resv5; + u64 resv6; +}; + +struct cwq_final_entry { + u64 control; + u64 src_addr; + u64 resv1; + u64 resv2; + u64 resv3; + u64 resv4; + u64 resv5; + u64 resv6; +}; + +#define CONTROL_LEN 0x000000000000ffffULL +#define CONTROL_LEN_SHIFT 0 +#define CONTROL_HMAC_KEY_LEN 0x0000000000ff0000ULL +#define CONTROL_HMAC_KEY_LEN_SHIFT 16 +#define CONTROL_ENC_TYPE 0x00000000ff000000ULL +#define CONTROL_ENC_TYPE_SHIFT 24 +#define ENC_TYPE_ALG_RC4_STREAM 0x00ULL +#define ENC_TYPE_ALG_RC4_NOSTREAM 0x04ULL +#define ENC_TYPE_ALG_DES 0x08ULL +#define ENC_TYPE_ALG_3DES 0x0cULL +#define ENC_TYPE_ALG_AES128 0x10ULL +#define ENC_TYPE_ALG_AES192 0x14ULL +#define ENC_TYPE_ALG_AES256 0x18ULL +#define ENC_TYPE_ALG_RESERVED 0x1cULL +#define ENC_TYPE_ALG_MASK 0x1cULL +#define ENC_TYPE_CHAINING_ECB 0x00ULL +#define ENC_TYPE_CHAINING_CBC 0x01ULL +#define ENC_TYPE_CHAINING_CFB 0x02ULL +#define ENC_TYPE_CHAINING_COUNTER 0x03ULL +#define ENC_TYPE_CHAINING_MASK 0x03ULL +#define CONTROL_AUTH_TYPE 0x0000001f00000000ULL +#define CONTROL_AUTH_TYPE_SHIFT 32 +#define AUTH_TYPE_RESERVED 0x00ULL +#define AUTH_TYPE_MD5 0x01ULL +#define AUTH_TYPE_SHA1 0x02ULL +#define AUTH_TYPE_SHA256 0x03ULL +#define AUTH_TYPE_CRC32 0x04ULL +#define AUTH_TYPE_HMAC_MD5 0x05ULL +#define AUTH_TYPE_HMAC_SHA1 0x06ULL +#define AUTH_TYPE_HMAC_SHA256 0x07ULL +#define AUTH_TYPE_TCP_CHECKSUM 0x08ULL +#define AUTH_TYPE_SSL_HMAC_MD5 0x09ULL +#define AUTH_TYPE_SSL_HMAC_SHA1 0x0aULL +#define AUTH_TYPE_SSL_HMAC_SHA256 0x0bULL +#define CONTROL_STRAND 0x000000e000000000ULL +#define CONTROL_STRAND_SHIFT 37 +#define CONTROL_HASH_LEN 0x0000ff0000000000ULL +#define CONTROL_HASH_LEN_SHIFT 40 +#define CONTROL_INTERRUPT 0x0001000000000000ULL +#define CONTROL_STORE_FINAL_AUTH_STATE 0x0002000000000000ULL +#define CONTROL_RESERVED 0x001c000000000000ULL +#define CONTROL_HV_DONE 0x0004000000000000ULL +#define CONTROL_HV_PROTOCOL_ERROR 0x0008000000000000ULL +#define CONTROL_HV_HARDWARE_ERROR 0x0010000000000000ULL +#define CONTROL_END_OF_BLOCK 0x0020000000000000ULL +#define CONTROL_START_OF_BLOCK 0x0040000000000000ULL +#define CONTROL_ENCRYPT 0x0080000000000000ULL +#define CONTROL_OPCODE 0xff00000000000000ULL +#define CONTROL_OPCODE_SHIFT 56 +#define OPCODE_INPLACE_BIT 0x80ULL +#define OPCODE_SSL_KEYBLOCK 0x10ULL +#define OPCODE_COPY 0x20ULL +#define OPCODE_ENCRYPT 0x40ULL +#define OPCODE_AUTH_MAC 0x41ULL + +#endif /* !(__ASSEMBLY__) */ + +/* NCS v2.0 hypervisor interfaces */ +#define HV_NCS_QTYPE_MAU 0x01 +#define HV_NCS_QTYPE_CWQ 0x02 + +/* ncs_qconf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_NCS_QCONF + * ARG0: Queue type (HV_NCS_QTYPE_{MAU,CWQ}) + * ARG1: Real address of queue, or handle for unconfigure + * ARG2: Number of entries in queue, zero for unconfigure + * RET0: status + * RET1: queue handle + * + * Configure a queue in the stream processing unit. + * + * The real address given as the base must be 64-byte + * aligned. + * + * The queue size can range from a minimum of 2 to a maximum + * of 64. The queue size must be a power of two. + * + * To unconfigure a queue, specify a length of zero and place + * the queue handle into ARG1. + * + * On configure success the hypervisor will set the FIRST, HEAD, + * and TAIL registers to the address of the first entry in the + * queue. The LAST register will be set to point to the last + * entry in the queue. + */ +#define HV_FAST_NCS_QCONF 0x111 + +/* ncs_qinfo() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_NCS_QINFO + * ARG0: Queue handle + * RET0: status + * RET1: Queue type (HV_NCS_QTYPE_{MAU,CWQ}) + * RET2: Queue base address + * RET3: Number of entries + */ +#define HV_FAST_NCS_QINFO 0x112 + +/* ncs_gethead() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_NCS_GETHEAD + * ARG0: Queue handle + * RET0: status + * RET1: queue head offset + */ +#define HV_FAST_NCS_GETHEAD 0x113 + +/* ncs_gettail() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_NCS_GETTAIL + * ARG0: Queue handle + * RET0: status + * RET1: queue tail offset + */ +#define HV_FAST_NCS_GETTAIL 0x114 + +/* ncs_settail() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_NCS_SETTAIL + * ARG0: Queue handle + * ARG1: New tail offset + * RET0: status + */ +#define HV_FAST_NCS_SETTAIL 0x115 + +/* ncs_qhandle_to_devino() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_NCS_QHANDLE_TO_DEVINO + * ARG0: Queue handle + * RET0: status + * RET1: devino + */ +#define HV_FAST_NCS_QHANDLE_TO_DEVINO 0x116 + +/* ncs_sethead_marker() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_NCS_SETHEAD_MARKER + * ARG0: Queue handle + * ARG1: New head offset + * RET0: status + */ +#define HV_FAST_NCS_SETHEAD_MARKER 0x117 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_ncs_qconf(unsigned long queue_type, + unsigned long queue_ra, + unsigned long num_entries, + unsigned long *qhandle); +extern unsigned long sun4v_ncs_qinfo(unsigned long qhandle, + unsigned long *queue_type, + unsigned long *queue_ra, + unsigned long *num_entries); +extern unsigned long sun4v_ncs_gethead(unsigned long qhandle, + unsigned long *head); +extern unsigned long sun4v_ncs_gettail(unsigned long qhandle, + unsigned long *tail); +extern unsigned long sun4v_ncs_settail(unsigned long qhandle, + unsigned long tail); +extern unsigned long sun4v_ncs_qhandle_to_devino(unsigned long qhandle, + unsigned long *devino); +extern unsigned long sun4v_ncs_sethead_marker(unsigned long qhandle, + unsigned long head); +#endif /* !(__ASSEMBLY__) */ + +#endif /* _N2_CORE_H */ diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c new file mode 100644 index 000000000000..8b034337793f --- /dev/null +++ b/drivers/crypto/omap-sham.c @@ -0,0 +1,1259 @@ +/* + * Cryptographic API. + * + * Support for OMAP SHA1/MD5 HW acceleration. + * + * Copyright (c) 2010 Nokia Corporation + * Author: Dmitry Kasatkin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Some ideas are from old omap-sha1-md5.c driver. + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define SHA_REG_DIGEST(x) (0x00 + ((x) * 0x04)) +#define SHA_REG_DIN(x) (0x1C + ((x) * 0x04)) + +#define SHA1_MD5_BLOCK_SIZE SHA1_BLOCK_SIZE +#define MD5_DIGEST_SIZE 16 + +#define SHA_REG_DIGCNT 0x14 + +#define SHA_REG_CTRL 0x18 +#define SHA_REG_CTRL_LENGTH (0xFFFFFFFF << 5) +#define SHA_REG_CTRL_CLOSE_HASH (1 << 4) +#define SHA_REG_CTRL_ALGO_CONST (1 << 3) +#define SHA_REG_CTRL_ALGO (1 << 2) +#define SHA_REG_CTRL_INPUT_READY (1 << 1) +#define SHA_REG_CTRL_OUTPUT_READY (1 << 0) + +#define SHA_REG_REV 0x5C +#define SHA_REG_REV_MAJOR 0xF0 +#define SHA_REG_REV_MINOR 0x0F + +#define SHA_REG_MASK 0x60 +#define SHA_REG_MASK_DMA_EN (1 << 3) +#define SHA_REG_MASK_IT_EN (1 << 2) +#define SHA_REG_MASK_SOFTRESET (1 << 1) +#define SHA_REG_AUTOIDLE (1 << 0) + +#define SHA_REG_SYSSTATUS 0x64 +#define SHA_REG_SYSSTATUS_RESETDONE (1 << 0) + +#define DEFAULT_TIMEOUT_INTERVAL HZ + +#define FLAGS_FIRST 0x0001 +#define FLAGS_FINUP 0x0002 +#define FLAGS_FINAL 0x0004 +#define FLAGS_FAST 0x0008 +#define FLAGS_SHA1 0x0010 +#define FLAGS_DMA_ACTIVE 0x0020 +#define FLAGS_OUTPUT_READY 0x0040 +#define FLAGS_CLEAN 0x0080 +#define FLAGS_INIT 0x0100 +#define FLAGS_CPU 0x0200 +#define FLAGS_HMAC 0x0400 + +/* 3rd byte */ +#define FLAGS_BUSY 16 + +#define OP_UPDATE 1 +#define OP_FINAL 2 + +struct omap_sham_dev; + +struct omap_sham_reqctx { + struct omap_sham_dev *dd; + unsigned long flags; + unsigned long op; + + size_t digcnt; + u8 *buffer; + size_t bufcnt; + size_t buflen; + dma_addr_t dma_addr; + + /* walk state */ + struct scatterlist *sg; + unsigned int offset; /* offset in current sg */ + unsigned int total; /* total request */ +}; + +struct omap_sham_hmac_ctx { + struct crypto_shash *shash; + u8 ipad[SHA1_MD5_BLOCK_SIZE]; + u8 opad[SHA1_MD5_BLOCK_SIZE]; +}; + +struct omap_sham_ctx { + struct omap_sham_dev *dd; + + unsigned long flags; + + /* fallback stuff */ + struct crypto_shash *fallback; + + struct omap_sham_hmac_ctx base[0]; +}; + +#define OMAP_SHAM_QUEUE_LENGTH 1 + +struct omap_sham_dev { + struct list_head list; + unsigned long phys_base; + struct device *dev; + void __iomem *io_base; + int irq; + struct clk *iclk; + spinlock_t lock; + int dma; + int dma_lch; + struct tasklet_struct done_task; + struct tasklet_struct queue_task; + + unsigned long flags; + struct crypto_queue queue; + struct ahash_request *req; +}; + +struct omap_sham_drv { + struct list_head dev_list; + spinlock_t lock; + unsigned long flags; +}; + +static struct omap_sham_drv sham = { + .dev_list = LIST_HEAD_INIT(sham.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(sham.lock), +}; + +static inline u32 omap_sham_read(struct omap_sham_dev *dd, u32 offset) +{ + return __raw_readl(dd->io_base + offset); +} + +static inline void omap_sham_write(struct omap_sham_dev *dd, + u32 offset, u32 value) +{ + __raw_writel(value, dd->io_base + offset); +} + +static inline void omap_sham_write_mask(struct omap_sham_dev *dd, u32 address, + u32 value, u32 mask) +{ + u32 val; + + val = omap_sham_read(dd, address); + val &= ~mask; + val |= value; + omap_sham_write(dd, address, val); +} + +static inline int omap_sham_wait(struct omap_sham_dev *dd, u32 offset, u32 bit) +{ + unsigned long timeout = jiffies + DEFAULT_TIMEOUT_INTERVAL; + + while (!(omap_sham_read(dd, offset) & bit)) { + if (time_is_before_jiffies(timeout)) + return -ETIMEDOUT; + } + + return 0; +} + +static void omap_sham_copy_hash(struct ahash_request *req, int out) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + u32 *hash = (u32 *)req->result; + int i; + + if (likely(ctx->flags & FLAGS_SHA1)) { + /* SHA1 results are in big endian */ + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) + if (out) + hash[i] = be32_to_cpu(omap_sham_read(ctx->dd, + SHA_REG_DIGEST(i))); + else + omap_sham_write(ctx->dd, SHA_REG_DIGEST(i), + cpu_to_be32(hash[i])); + } else { + /* MD5 results are in little endian */ + for (i = 0; i < MD5_DIGEST_SIZE / sizeof(u32); i++) + if (out) + hash[i] = le32_to_cpu(omap_sham_read(ctx->dd, + SHA_REG_DIGEST(i))); + else + omap_sham_write(ctx->dd, SHA_REG_DIGEST(i), + cpu_to_le32(hash[i])); + } +} + +static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length, + int final, int dma) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); + u32 val = length << 5, mask; + + if (unlikely(!ctx->digcnt)) { + + clk_enable(dd->iclk); + + if (!(dd->flags & FLAGS_INIT)) { + omap_sham_write_mask(dd, SHA_REG_MASK, + SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET); + + if (omap_sham_wait(dd, SHA_REG_SYSSTATUS, + SHA_REG_SYSSTATUS_RESETDONE)) + return -ETIMEDOUT; + + dd->flags |= FLAGS_INIT; + } + } else { + omap_sham_write(dd, SHA_REG_DIGCNT, ctx->digcnt); + } + + omap_sham_write_mask(dd, SHA_REG_MASK, + SHA_REG_MASK_IT_EN | (dma ? SHA_REG_MASK_DMA_EN : 0), + SHA_REG_MASK_IT_EN | SHA_REG_MASK_DMA_EN); + /* + * Setting ALGO_CONST only for the first iteration + * and CLOSE_HASH only for the last one. + */ + if (ctx->flags & FLAGS_SHA1) + val |= SHA_REG_CTRL_ALGO; + if (!ctx->digcnt) + val |= SHA_REG_CTRL_ALGO_CONST; + if (final) + val |= SHA_REG_CTRL_CLOSE_HASH; + + mask = SHA_REG_CTRL_ALGO_CONST | SHA_REG_CTRL_CLOSE_HASH | + SHA_REG_CTRL_ALGO | SHA_REG_CTRL_LENGTH; + + omap_sham_write_mask(dd, SHA_REG_CTRL, val, mask); + + return 0; +} + +static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf, + size_t length, int final) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); + int err, count, len32; + const u32 *buffer = (const u32 *)buf; + + dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n", + ctx->digcnt, length, final); + + err = omap_sham_write_ctrl(dd, length, final, 0); + if (err) + return err; + + if (omap_sham_wait(dd, SHA_REG_CTRL, SHA_REG_CTRL_INPUT_READY)) + return -ETIMEDOUT; + + ctx->digcnt += length; + + if (final) + ctx->flags |= FLAGS_FINAL; /* catch last interrupt */ + + len32 = DIV_ROUND_UP(length, sizeof(u32)); + + for (count = 0; count < len32; count++) + omap_sham_write(dd, SHA_REG_DIN(count), buffer[count]); + + return -EINPROGRESS; +} + +static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr, + size_t length, int final) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); + int err, len32; + + dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n", + ctx->digcnt, length, final); + + /* flush cache entries related to our page */ + if (dma_addr == ctx->dma_addr) + dma_sync_single_for_device(dd->dev, dma_addr, length, + DMA_TO_DEVICE); + + len32 = DIV_ROUND_UP(length, sizeof(u32)); + + omap_set_dma_transfer_params(dd->dma_lch, OMAP_DMA_DATA_TYPE_S32, len32, + 1, OMAP_DMA_SYNC_PACKET, dd->dma, OMAP_DMA_DST_SYNC); + + omap_set_dma_src_params(dd->dma_lch, 0, OMAP_DMA_AMODE_POST_INC, + dma_addr, 0, 0); + + err = omap_sham_write_ctrl(dd, length, final, 1); + if (err) + return err; + + ctx->digcnt += length; + + if (final) + ctx->flags |= FLAGS_FINAL; /* catch last interrupt */ + + dd->flags |= FLAGS_DMA_ACTIVE; + + omap_start_dma(dd->dma_lch); + + return -EINPROGRESS; +} + +static size_t omap_sham_append_buffer(struct omap_sham_reqctx *ctx, + const u8 *data, size_t length) +{ + size_t count = min(length, ctx->buflen - ctx->bufcnt); + + count = min(count, ctx->total); + if (count <= 0) + return 0; + memcpy(ctx->buffer + ctx->bufcnt, data, count); + ctx->bufcnt += count; + + return count; +} + +static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx) +{ + size_t count; + + while (ctx->sg) { + count = omap_sham_append_buffer(ctx, + sg_virt(ctx->sg) + ctx->offset, + ctx->sg->length - ctx->offset); + if (!count) + break; + ctx->offset += count; + ctx->total -= count; + if (ctx->offset == ctx->sg->length) { + ctx->sg = sg_next(ctx->sg); + if (ctx->sg) + ctx->offset = 0; + else + ctx->total = 0; + } + } + + return 0; +} + +static int omap_sham_update_dma_slow(struct omap_sham_dev *dd) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); + unsigned int final; + size_t count; + + if (!ctx->total) + return 0; + + omap_sham_append_sg(ctx); + + final = (ctx->flags & FLAGS_FINUP) && !ctx->total; + + dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n", + ctx->bufcnt, ctx->digcnt, final); + + if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) { + count = ctx->bufcnt; + ctx->bufcnt = 0; + return omap_sham_xmit_dma(dd, ctx->dma_addr, count, final); + } + + return 0; +} + +static int omap_sham_update_dma_fast(struct omap_sham_dev *dd) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); + unsigned int length; + + ctx->flags |= FLAGS_FAST; + + length = min(ctx->total, sg_dma_len(ctx->sg)); + ctx->total = length; + + if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) { + dev_err(dd->dev, "dma_map_sg error\n"); + return -EINVAL; + } + + ctx->total -= length; + + return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, 1); +} + +static int omap_sham_update_cpu(struct omap_sham_dev *dd) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); + int bufcnt; + + omap_sham_append_sg(ctx); + bufcnt = ctx->bufcnt; + ctx->bufcnt = 0; + + return omap_sham_xmit_cpu(dd, ctx->buffer, bufcnt, 1); +} + +static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); + + omap_stop_dma(dd->dma_lch); + if (ctx->flags & FLAGS_FAST) + dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); + + return 0; +} + +static void omap_sham_cleanup(struct ahash_request *req) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + struct omap_sham_dev *dd = ctx->dd; + unsigned long flags; + + spin_lock_irqsave(&dd->lock, flags); + if (ctx->flags & FLAGS_CLEAN) { + spin_unlock_irqrestore(&dd->lock, flags); + return; + } + ctx->flags |= FLAGS_CLEAN; + spin_unlock_irqrestore(&dd->lock, flags); + + if (ctx->digcnt) + clk_disable(dd->iclk); + + if (ctx->dma_addr) + dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen, + DMA_TO_DEVICE); + + if (ctx->buffer) + free_page((unsigned long)ctx->buffer); + + dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, ctx->bufcnt); +} + +static int omap_sham_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm); + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + struct omap_sham_dev *dd = NULL, *tmp; + + spin_lock_bh(&sham.lock); + if (!tctx->dd) { + list_for_each_entry(tmp, &sham.dev_list, list) { + dd = tmp; + break; + } + tctx->dd = dd; + } else { + dd = tctx->dd; + } + spin_unlock_bh(&sham.lock); + + ctx->dd = dd; + + ctx->flags = 0; + + ctx->flags |= FLAGS_FIRST; + + dev_dbg(dd->dev, "init: digest size: %d\n", + crypto_ahash_digestsize(tfm)); + + if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE) + ctx->flags |= FLAGS_SHA1; + + ctx->bufcnt = 0; + ctx->digcnt = 0; + + ctx->buflen = PAGE_SIZE; + ctx->buffer = (void *)__get_free_page( + (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC); + if (!ctx->buffer) + return -ENOMEM; + + ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen, + DMA_TO_DEVICE); + if (dma_mapping_error(dd->dev, ctx->dma_addr)) { + dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen); + free_page((unsigned long)ctx->buffer); + return -EINVAL; + } + + if (tctx->flags & FLAGS_HMAC) { + struct omap_sham_hmac_ctx *bctx = tctx->base; + + memcpy(ctx->buffer, bctx->ipad, SHA1_MD5_BLOCK_SIZE); + ctx->bufcnt = SHA1_MD5_BLOCK_SIZE; + ctx->flags |= FLAGS_HMAC; + } + + return 0; + +} + +static int omap_sham_update_req(struct omap_sham_dev *dd) +{ + struct ahash_request *req = dd->req; + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + int err; + + dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n", + ctx->total, ctx->digcnt, (ctx->flags & FLAGS_FINUP) != 0); + + if (ctx->flags & FLAGS_CPU) + err = omap_sham_update_cpu(dd); + else if (ctx->flags & FLAGS_FAST) + err = omap_sham_update_dma_fast(dd); + else + err = omap_sham_update_dma_slow(dd); + + /* wait for dma completion before can take more data */ + dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", err, ctx->digcnt); + + return err; +} + +static int omap_sham_final_req(struct omap_sham_dev *dd) +{ + struct ahash_request *req = dd->req; + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + int err = 0, use_dma = 1; + + if (ctx->bufcnt <= 64) + /* faster to handle last block with cpu */ + use_dma = 0; + + if (use_dma) + err = omap_sham_xmit_dma(dd, ctx->dma_addr, ctx->bufcnt, 1); + else + err = omap_sham_xmit_cpu(dd, ctx->buffer, ctx->bufcnt, 1); + + ctx->bufcnt = 0; + + if (err != -EINPROGRESS) + omap_sham_cleanup(req); + + dev_dbg(dd->dev, "final_req: err: %d\n", err); + + return err; +} + +static int omap_sham_finish_req_hmac(struct ahash_request *req) +{ + struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm); + struct omap_sham_hmac_ctx *bctx = tctx->base; + int bs = crypto_shash_blocksize(bctx->shash); + int ds = crypto_shash_digestsize(bctx->shash); + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(bctx->shash)]; + } desc; + + desc.shash.tfm = bctx->shash; + desc.shash.flags = 0; /* not CRYPTO_TFM_REQ_MAY_SLEEP */ + + return crypto_shash_init(&desc.shash) ?: + crypto_shash_update(&desc.shash, bctx->opad, bs) ?: + crypto_shash_finup(&desc.shash, req->result, ds, req->result); +} + +static void omap_sham_finish_req(struct ahash_request *req, int err) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + + if (!err) { + omap_sham_copy_hash(ctx->dd->req, 1); + if (ctx->flags & FLAGS_HMAC) + err = omap_sham_finish_req_hmac(req); + } + + if (ctx->flags & FLAGS_FINAL) + omap_sham_cleanup(req); + + clear_bit(FLAGS_BUSY, &ctx->dd->flags); + + if (req->base.complete) + req->base.complete(&req->base, err); +} + +static int omap_sham_handle_queue(struct omap_sham_dev *dd) +{ + struct crypto_async_request *async_req, *backlog; + struct omap_sham_reqctx *ctx; + struct ahash_request *req, *prev_req; + unsigned long flags; + int err = 0; + + if (test_and_set_bit(FLAGS_BUSY, &dd->flags)) + return 0; + + spin_lock_irqsave(&dd->lock, flags); + backlog = crypto_get_backlog(&dd->queue); + async_req = crypto_dequeue_request(&dd->queue); + if (!async_req) + clear_bit(FLAGS_BUSY, &dd->flags); + spin_unlock_irqrestore(&dd->lock, flags); + + if (!async_req) + return 0; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + req = ahash_request_cast(async_req); + + prev_req = dd->req; + dd->req = req; + + ctx = ahash_request_ctx(req); + + dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n", + ctx->op, req->nbytes); + + if (req != prev_req && ctx->digcnt) + /* request has changed - restore hash */ + omap_sham_copy_hash(req, 0); + + if (ctx->op == OP_UPDATE) { + err = omap_sham_update_req(dd); + if (err != -EINPROGRESS && (ctx->flags & FLAGS_FINUP)) + /* no final() after finup() */ + err = omap_sham_final_req(dd); + } else if (ctx->op == OP_FINAL) { + err = omap_sham_final_req(dd); + } + + if (err != -EINPROGRESS) { + /* done_task will not finish it, so do it here */ + omap_sham_finish_req(req, err); + tasklet_schedule(&dd->queue_task); + } + + dev_dbg(dd->dev, "exit, err: %d\n", err); + + return err; +} + +static int omap_sham_enqueue(struct ahash_request *req, unsigned int op) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm); + struct omap_sham_dev *dd = tctx->dd; + unsigned long flags; + int err; + + ctx->op = op; + + spin_lock_irqsave(&dd->lock, flags); + err = ahash_enqueue_request(&dd->queue, req); + spin_unlock_irqrestore(&dd->lock, flags); + + omap_sham_handle_queue(dd); + + return err; +} + +static int omap_sham_update(struct ahash_request *req) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + + if (!req->nbytes) + return 0; + + ctx->total = req->nbytes; + ctx->sg = req->src; + ctx->offset = 0; + + if (ctx->flags & FLAGS_FINUP) { + if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 9) { + /* + * OMAP HW accel works only with buffers >= 9 + * will switch to bypass in final() + * final has the same request and data + */ + omap_sham_append_sg(ctx); + return 0; + } else if (ctx->bufcnt + ctx->total <= 64) { + ctx->flags |= FLAGS_CPU; + } else if (!ctx->bufcnt && sg_is_last(ctx->sg)) { + /* may be can use faster functions */ + int aligned = IS_ALIGNED((u32)ctx->sg->offset, + sizeof(u32)); + + if (aligned && (ctx->flags & FLAGS_FIRST)) + /* digest: first and final */ + ctx->flags |= FLAGS_FAST; + + ctx->flags &= ~FLAGS_FIRST; + } + } else if (ctx->bufcnt + ctx->total <= ctx->buflen) { + /* if not finaup -> not fast */ + omap_sham_append_sg(ctx); + return 0; + } + + return omap_sham_enqueue(req, OP_UPDATE); +} + +static int omap_sham_shash_digest(struct crypto_shash *shash, u32 flags, + const u8 *data, unsigned int len, u8 *out) +{ + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(shash)]; + } desc; + + desc.shash.tfm = shash; + desc.shash.flags = flags & CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_shash_digest(&desc.shash, data, len, out); +} + +static int omap_sham_final_shash(struct ahash_request *req) +{ + struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm); + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + + return omap_sham_shash_digest(tctx->fallback, req->base.flags, + ctx->buffer, ctx->bufcnt, req->result); +} + +static int omap_sham_final(struct ahash_request *req) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + int err = 0; + + ctx->flags |= FLAGS_FINUP; + + /* OMAP HW accel works only with buffers >= 9 */ + /* HMAC is always >= 9 because of ipad */ + if ((ctx->digcnt + ctx->bufcnt) < 9) + err = omap_sham_final_shash(req); + else if (ctx->bufcnt) + return omap_sham_enqueue(req, OP_FINAL); + + omap_sham_cleanup(req); + + return err; +} + +static int omap_sham_finup(struct ahash_request *req) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + int err1, err2; + + ctx->flags |= FLAGS_FINUP; + + err1 = omap_sham_update(req); + if (err1 == -EINPROGRESS) + return err1; + /* + * final() has to be always called to cleanup resources + * even if udpate() failed, except EINPROGRESS + */ + err2 = omap_sham_final(req); + + return err1 ?: err2; +} + +static int omap_sham_digest(struct ahash_request *req) +{ + return omap_sham_init(req) ?: omap_sham_finup(req); +} + +static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm); + struct omap_sham_hmac_ctx *bctx = tctx->base; + int bs = crypto_shash_blocksize(bctx->shash); + int ds = crypto_shash_digestsize(bctx->shash); + int err, i; + err = crypto_shash_setkey(tctx->fallback, key, keylen); + if (err) + return err; + + if (keylen > bs) { + err = omap_sham_shash_digest(bctx->shash, + crypto_shash_get_flags(bctx->shash), + key, keylen, bctx->ipad); + if (err) + return err; + keylen = ds; + } else { + memcpy(bctx->ipad, key, keylen); + } + + memset(bctx->ipad + keylen, 0, bs - keylen); + memcpy(bctx->opad, bctx->ipad, bs); + + for (i = 0; i < bs; i++) { + bctx->ipad[i] ^= 0x36; + bctx->opad[i] ^= 0x5c; + } + + return err; +} + +static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base) +{ + struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm); + const char *alg_name = crypto_tfm_alg_name(tfm); + + /* Allocate a fallback and abort if it failed. */ + tctx->fallback = crypto_alloc_shash(alg_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(tctx->fallback)) { + pr_err("omap-sham: fallback driver '%s' " + "could not be loaded.\n", alg_name); + return PTR_ERR(tctx->fallback); + } + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct omap_sham_reqctx)); + + if (alg_base) { + struct omap_sham_hmac_ctx *bctx = tctx->base; + tctx->flags |= FLAGS_HMAC; + bctx->shash = crypto_alloc_shash(alg_base, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(bctx->shash)) { + pr_err("omap-sham: base driver '%s' " + "could not be loaded.\n", alg_base); + crypto_free_shash(tctx->fallback); + return PTR_ERR(bctx->shash); + } + + } + + return 0; +} + +static int omap_sham_cra_init(struct crypto_tfm *tfm) +{ + return omap_sham_cra_init_alg(tfm, NULL); +} + +static int omap_sham_cra_sha1_init(struct crypto_tfm *tfm) +{ + return omap_sham_cra_init_alg(tfm, "sha1"); +} + +static int omap_sham_cra_md5_init(struct crypto_tfm *tfm) +{ + return omap_sham_cra_init_alg(tfm, "md5"); +} + +static void omap_sham_cra_exit(struct crypto_tfm *tfm) +{ + struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm); + + crypto_free_shash(tctx->fallback); + tctx->fallback = NULL; + + if (tctx->flags & FLAGS_HMAC) { + struct omap_sham_hmac_ctx *bctx = tctx->base; + crypto_free_shash(bctx->shash); + } +} + +static struct ahash_alg algs[] = { +{ + .init = omap_sham_init, + .update = omap_sham_update, + .final = omap_sham_final, + .finup = omap_sham_finup, + .digest = omap_sham_digest, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha1", + .cra_driver_name = "omap-sha1", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_sham_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = omap_sham_cra_init, + .cra_exit = omap_sham_cra_exit, + } +}, +{ + .init = omap_sham_init, + .update = omap_sham_update, + .final = omap_sham_final, + .finup = omap_sham_finup, + .digest = omap_sham_digest, + .halg.digestsize = MD5_DIGEST_SIZE, + .halg.base = { + .cra_name = "md5", + .cra_driver_name = "omap-md5", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_sham_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = omap_sham_cra_init, + .cra_exit = omap_sham_cra_exit, + } +}, +{ + .init = omap_sham_init, + .update = omap_sham_update, + .final = omap_sham_final, + .finup = omap_sham_finup, + .digest = omap_sham_digest, + .setkey = omap_sham_setkey, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "omap-hmac-sha1", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_sham_ctx) + + sizeof(struct omap_sham_hmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = omap_sham_cra_sha1_init, + .cra_exit = omap_sham_cra_exit, + } +}, +{ + .init = omap_sham_init, + .update = omap_sham_update, + .final = omap_sham_final, + .finup = omap_sham_finup, + .digest = omap_sham_digest, + .setkey = omap_sham_setkey, + .halg.digestsize = MD5_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(md5)", + .cra_driver_name = "omap-hmac-md5", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_sham_ctx) + + sizeof(struct omap_sham_hmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = omap_sham_cra_md5_init, + .cra_exit = omap_sham_cra_exit, + } +} +}; + +static void omap_sham_done_task(unsigned long data) +{ + struct omap_sham_dev *dd = (struct omap_sham_dev *)data; + struct ahash_request *req = dd->req; + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + int ready = 1; + + if (ctx->flags & FLAGS_OUTPUT_READY) { + ctx->flags &= ~FLAGS_OUTPUT_READY; + ready = 1; + } + + if (dd->flags & FLAGS_DMA_ACTIVE) { + dd->flags &= ~FLAGS_DMA_ACTIVE; + omap_sham_update_dma_stop(dd); + omap_sham_update_dma_slow(dd); + } + + if (ready && !(dd->flags & FLAGS_DMA_ACTIVE)) { + dev_dbg(dd->dev, "update done\n"); + /* finish curent request */ + omap_sham_finish_req(req, 0); + /* start new request */ + omap_sham_handle_queue(dd); + } +} + +static void omap_sham_queue_task(unsigned long data) +{ + struct omap_sham_dev *dd = (struct omap_sham_dev *)data; + + omap_sham_handle_queue(dd); +} + +static irqreturn_t omap_sham_irq(int irq, void *dev_id) +{ + struct omap_sham_dev *dd = dev_id; + struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); + + if (!ctx) { + dev_err(dd->dev, "unknown interrupt.\n"); + return IRQ_HANDLED; + } + + if (unlikely(ctx->flags & FLAGS_FINAL)) + /* final -> allow device to go to power-saving mode */ + omap_sham_write_mask(dd, SHA_REG_CTRL, 0, SHA_REG_CTRL_LENGTH); + + omap_sham_write_mask(dd, SHA_REG_CTRL, SHA_REG_CTRL_OUTPUT_READY, + SHA_REG_CTRL_OUTPUT_READY); + omap_sham_read(dd, SHA_REG_CTRL); + + ctx->flags |= FLAGS_OUTPUT_READY; + tasklet_schedule(&dd->done_task); + + return IRQ_HANDLED; +} + +static void omap_sham_dma_callback(int lch, u16 ch_status, void *data) +{ + struct omap_sham_dev *dd = data; + + if (likely(lch == dd->dma_lch)) + tasklet_schedule(&dd->done_task); +} + +static int omap_sham_dma_init(struct omap_sham_dev *dd) +{ + int err; + + dd->dma_lch = -1; + + err = omap_request_dma(dd->dma, dev_name(dd->dev), + omap_sham_dma_callback, dd, &dd->dma_lch); + if (err) { + dev_err(dd->dev, "Unable to request DMA channel\n"); + return err; + } + omap_set_dma_dest_params(dd->dma_lch, 0, + OMAP_DMA_AMODE_CONSTANT, + dd->phys_base + SHA_REG_DIN(0), 0, 16); + + omap_set_dma_dest_burst_mode(dd->dma_lch, + OMAP_DMA_DATA_BURST_16); + + return 0; +} + +static void omap_sham_dma_cleanup(struct omap_sham_dev *dd) +{ + if (dd->dma_lch >= 0) { + omap_free_dma(dd->dma_lch); + dd->dma_lch = -1; + } +} + +static int __devinit omap_sham_probe(struct platform_device *pdev) +{ + struct omap_sham_dev *dd; + struct device *dev = &pdev->dev; + struct resource *res; + int err, i, j; + + dd = kzalloc(sizeof(struct omap_sham_dev), GFP_KERNEL); + if (dd == NULL) { + dev_err(dev, "unable to alloc data struct.\n"); + err = -ENOMEM; + goto data_err; + } + dd->dev = dev; + platform_set_drvdata(pdev, dd); + + INIT_LIST_HEAD(&dd->list); + spin_lock_init(&dd->lock); + tasklet_init(&dd->done_task, omap_sham_done_task, (unsigned long)dd); + tasklet_init(&dd->queue_task, omap_sham_queue_task, (unsigned long)dd); + crypto_init_queue(&dd->queue, OMAP_SHAM_QUEUE_LENGTH); + + dd->irq = -1; + + /* Get the base address */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "no MEM resource info\n"); + err = -ENODEV; + goto res_err; + } + dd->phys_base = res->start; + + /* Get the DMA */ + res = platform_get_resource(pdev, IORESOURCE_DMA, 0); + if (!res) { + dev_err(dev, "no DMA resource info\n"); + err = -ENODEV; + goto res_err; + } + dd->dma = res->start; + + /* Get the IRQ */ + dd->irq = platform_get_irq(pdev, 0); + if (dd->irq < 0) { + dev_err(dev, "no IRQ resource info\n"); + err = dd->irq; + goto res_err; + } + + err = request_irq(dd->irq, omap_sham_irq, + IRQF_TRIGGER_LOW, dev_name(dev), dd); + if (err) { + dev_err(dev, "unable to request irq.\n"); + goto res_err; + } + + err = omap_sham_dma_init(dd); + if (err) + goto dma_err; + + /* Initializing the clock */ + dd->iclk = clk_get(dev, "ick"); + if (!dd->iclk) { + dev_err(dev, "clock intialization failed.\n"); + err = -ENODEV; + goto clk_err; + } + + dd->io_base = ioremap(dd->phys_base, SZ_4K); + if (!dd->io_base) { + dev_err(dev, "can't ioremap\n"); + err = -ENOMEM; + goto io_err; + } + + clk_enable(dd->iclk); + dev_info(dev, "hw accel on OMAP rev %u.%u\n", + (omap_sham_read(dd, SHA_REG_REV) & SHA_REG_REV_MAJOR) >> 4, + omap_sham_read(dd, SHA_REG_REV) & SHA_REG_REV_MINOR); + clk_disable(dd->iclk); + + spin_lock(&sham.lock); + list_add_tail(&dd->list, &sham.dev_list); + spin_unlock(&sham.lock); + + for (i = 0; i < ARRAY_SIZE(algs); i++) { + err = crypto_register_ahash(&algs[i]); + if (err) + goto err_algs; + } + + return 0; + +err_algs: + for (j = 0; j < i; j++) + crypto_unregister_ahash(&algs[j]); + iounmap(dd->io_base); +io_err: + clk_put(dd->iclk); +clk_err: + omap_sham_dma_cleanup(dd); +dma_err: + if (dd->irq >= 0) + free_irq(dd->irq, dd); +res_err: + kfree(dd); + dd = NULL; +data_err: + dev_err(dev, "initialization failed.\n"); + + return err; +} + +static int __devexit omap_sham_remove(struct platform_device *pdev) +{ + static struct omap_sham_dev *dd; + int i; + + dd = platform_get_drvdata(pdev); + if (!dd) + return -ENODEV; + spin_lock(&sham.lock); + list_del(&dd->list); + spin_unlock(&sham.lock); + for (i = 0; i < ARRAY_SIZE(algs); i++) + crypto_unregister_ahash(&algs[i]); + tasklet_kill(&dd->done_task); + tasklet_kill(&dd->queue_task); + iounmap(dd->io_base); + clk_put(dd->iclk); + omap_sham_dma_cleanup(dd); + if (dd->irq >= 0) + free_irq(dd->irq, dd); + kfree(dd); + dd = NULL; + + return 0; +} + +static struct platform_driver omap_sham_driver = { + .probe = omap_sham_probe, + .remove = omap_sham_remove, + .driver = { + .name = "omap-sham", + .owner = THIS_MODULE, + }, +}; + +static int __init omap_sham_mod_init(void) +{ + pr_info("loading %s driver\n", "omap-sham"); + + if (!cpu_class_is_omap2() || + omap_type() != OMAP2_DEVICE_TYPE_SEC) { + pr_err("Unsupported cpu\n"); + return -ENODEV; + } + + return platform_driver_register(&omap_sham_driver); +} + +static void __exit omap_sham_mod_exit(void) +{ + platform_driver_unregister(&omap_sham_driver); +} + +module_init(omap_sham_mod_init); +module_exit(omap_sham_mod_exit); + +MODULE_DESCRIPTION("OMAP SHA1/MD5 hw acceleration support."); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Dmitry Kasatkin"); diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index dc558a097311..6a0f59d1fc5c 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1,7 +1,7 @@ /* * talitos - Freescale Integrated Security Engine (SEC) device driver * - * Copyright (c) 2008 Freescale Semiconductor, Inc. + * Copyright (c) 2008-2010 Freescale Semiconductor, Inc. * * Scatterlist Crypto API glue code copied from files with the following: * Copyright (c) 2006-2007 Herbert Xu @@ -43,9 +43,12 @@ #include #include #include +#include #include #include #include +#include +#include #include #include "talitos.h" @@ -65,6 +68,13 @@ struct talitos_ptr { __be32 ptr; /* address */ }; +static const struct talitos_ptr zero_entry = { + .len = 0, + .j_extent = 0, + .eptr = 0, + .ptr = 0 +}; + /* descriptor */ struct talitos_desc { __be32 hdr; /* header high bits */ @@ -146,6 +156,7 @@ struct talitos_private { /* .features flag */ #define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001 #define TALITOS_FTR_HW_AUTH_CHECK 0x00000002 +#define TALITOS_FTR_SHA224_HWINIT 0x00000004 static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t dma_addr) { @@ -692,7 +703,7 @@ static void talitos_unregister_rng(struct device *dev) #define TALITOS_MAX_KEY_SIZE 64 #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ -#define MD5_DIGEST_SIZE 16 +#define MD5_BLOCK_SIZE 64 struct talitos_ctx { struct device *dev; @@ -705,6 +716,23 @@ struct talitos_ctx { unsigned int authsize; }; +#define HASH_MAX_BLOCK_SIZE SHA512_BLOCK_SIZE +#define TALITOS_MDEU_MAX_CONTEXT_SIZE TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512 + +struct talitos_ahash_req_ctx { + u64 count; + u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)]; + unsigned int hw_context_size; + u8 buf[HASH_MAX_BLOCK_SIZE]; + u8 bufnext[HASH_MAX_BLOCK_SIZE]; + unsigned int swinit; + unsigned int first; + unsigned int last; + unsigned int to_hash_later; + struct scatterlist bufsl[2]; + struct scatterlist *psrc; +}; + static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { @@ -821,10 +849,14 @@ static void talitos_sg_unmap(struct device *dev, else dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); - if (edesc->dst_is_chained) - talitos_unmap_sg_chain(dev, dst, DMA_FROM_DEVICE); - else - dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE); + if (dst) { + if (edesc->dst_is_chained) + talitos_unmap_sg_chain(dev, dst, + DMA_FROM_DEVICE); + else + dma_unmap_sg(dev, dst, dst_nents, + DMA_FROM_DEVICE); + } } else if (edesc->src_is_chained) talitos_unmap_sg_chain(dev, src, DMA_BIDIRECTIONAL); @@ -1114,12 +1146,67 @@ static int sg_count(struct scatterlist *sg_list, int nbytes, int *chained) return sg_nents; } +/** + * sg_copy_end_to_buffer - Copy end data from SG list to a linear buffer + * @sgl: The SG list + * @nents: Number of SG entries + * @buf: Where to copy to + * @buflen: The number of bytes to copy + * @skip: The number of bytes to skip before copying. + * Note: skip + buflen should equal SG total size. + * + * Returns the number of copied bytes. + * + **/ +static size_t sg_copy_end_to_buffer(struct scatterlist *sgl, unsigned int nents, + void *buf, size_t buflen, unsigned int skip) +{ + unsigned int offset = 0; + unsigned int boffset = 0; + struct sg_mapping_iter miter; + unsigned long flags; + unsigned int sg_flags = SG_MITER_ATOMIC; + size_t total_buffer = buflen + skip; + + sg_flags |= SG_MITER_FROM_SG; + + sg_miter_start(&miter, sgl, nents, sg_flags); + + local_irq_save(flags); + + while (sg_miter_next(&miter) && offset < total_buffer) { + unsigned int len; + unsigned int ignore; + + if ((offset + miter.length) > skip) { + if (offset < skip) { + /* Copy part of this segment */ + ignore = skip - offset; + len = miter.length - ignore; + memcpy(buf + boffset, miter.addr + ignore, len); + } else { + /* Copy all of this segment */ + len = miter.length; + memcpy(buf + boffset, miter.addr, len); + } + boffset += len; + } + offset += miter.length; + } + + sg_miter_stop(&miter); + + local_irq_restore(flags); + return boffset; +} + /* * allocate and map the extended descriptor */ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, struct scatterlist *src, struct scatterlist *dst, + int hash_result, unsigned int cryptlen, unsigned int authsize, int icv_stashing, @@ -1139,11 +1226,16 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, src_nents = sg_count(src, cryptlen + authsize, &src_chained); src_nents = (src_nents == 1) ? 0 : src_nents; - if (dst == src) { - dst_nents = src_nents; + if (hash_result) { + dst_nents = 0; } else { - dst_nents = sg_count(dst, cryptlen + authsize, &dst_chained); - dst_nents = (dst_nents == 1) ? 0 : dst_nents; + if (dst == src) { + dst_nents = src_nents; + } else { + dst_nents = sg_count(dst, cryptlen + authsize, + &dst_chained); + dst_nents = (dst_nents == 1) ? 0 : dst_nents; + } } /* @@ -1172,8 +1264,10 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, edesc->src_is_chained = src_chained; edesc->dst_is_chained = dst_chained; edesc->dma_len = dma_len; - edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0], - edesc->dma_len, DMA_BIDIRECTIONAL); + if (dma_len) + edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0], + edesc->dma_len, + DMA_BIDIRECTIONAL); return edesc; } @@ -1184,7 +1278,7 @@ static struct talitos_edesc *aead_edesc_alloc(struct aead_request *areq, struct crypto_aead *authenc = crypto_aead_reqtfm(areq); struct talitos_ctx *ctx = crypto_aead_ctx(authenc); - return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, + return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, 0, areq->cryptlen, ctx->authsize, icv_stashing, areq->base.flags); } @@ -1441,8 +1535,8 @@ static struct talitos_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request * struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq); struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher); - return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, areq->nbytes, - 0, 0, areq->base.flags); + return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, 0, + areq->nbytes, 0, 0, areq->base.flags); } static int ablkcipher_encrypt(struct ablkcipher_request *areq) @@ -1478,15 +1572,329 @@ static int ablkcipher_decrypt(struct ablkcipher_request *areq) return common_nonsnoop(edesc, areq, NULL, ablkcipher_done); } +static void common_nonsnoop_hash_unmap(struct device *dev, + struct talitos_edesc *edesc, + struct ahash_request *areq) +{ + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + + unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); + + /* When using hashctx-in, must unmap it. */ + if (edesc->desc.ptr[1].len) + unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], + DMA_TO_DEVICE); + + if (edesc->desc.ptr[2].len) + unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], + DMA_TO_DEVICE); + + talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL); + + if (edesc->dma_len) + dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, + DMA_BIDIRECTIONAL); + +} + +static void ahash_done(struct device *dev, + struct talitos_desc *desc, void *context, + int err) +{ + struct ahash_request *areq = context; + struct talitos_edesc *edesc = + container_of(desc, struct talitos_edesc, desc); + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + + if (!req_ctx->last && req_ctx->to_hash_later) { + /* Position any partial block for next update/final/finup */ + memcpy(req_ctx->buf, req_ctx->bufnext, req_ctx->to_hash_later); + } + common_nonsnoop_hash_unmap(dev, edesc, areq); + + kfree(edesc); + + areq->base.complete(&areq->base, err); +} + +static int common_nonsnoop_hash(struct talitos_edesc *edesc, + struct ahash_request *areq, unsigned int length, + void (*callback) (struct device *dev, + struct talitos_desc *desc, + void *context, int error)) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct talitos_ctx *ctx = crypto_ahash_ctx(tfm); + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct device *dev = ctx->dev; + struct talitos_desc *desc = &edesc->desc; + int sg_count, ret; + + /* first DWORD empty */ + desc->ptr[0] = zero_entry; + + /* hash context in */ + if (!req_ctx->first || req_ctx->swinit) { + map_single_talitos_ptr(dev, &desc->ptr[1], + req_ctx->hw_context_size, + (char *)req_ctx->hw_context, 0, + DMA_TO_DEVICE); + req_ctx->swinit = 0; + } else { + desc->ptr[1] = zero_entry; + /* Indicate next op is not the first. */ + req_ctx->first = 0; + } + + /* HMAC key */ + if (ctx->keylen) + map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, + (char *)&ctx->key, 0, DMA_TO_DEVICE); + else + desc->ptr[2] = zero_entry; + + /* + * data in + */ + desc->ptr[3].len = cpu_to_be16(length); + desc->ptr[3].j_extent = 0; + + sg_count = talitos_map_sg(dev, req_ctx->psrc, + edesc->src_nents ? : 1, + DMA_TO_DEVICE, + edesc->src_is_chained); + + if (sg_count == 1) { + to_talitos_ptr(&desc->ptr[3], sg_dma_address(req_ctx->psrc)); + } else { + sg_count = sg_to_link_tbl(req_ctx->psrc, sg_count, length, + &edesc->link_tbl[0]); + if (sg_count > 1) { + desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP; + to_talitos_ptr(&desc->ptr[3], edesc->dma_link_tbl); + dma_sync_single_for_device(ctx->dev, + edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); + } else { + /* Only one segment now, so no link tbl needed */ + to_talitos_ptr(&desc->ptr[3], + sg_dma_address(req_ctx->psrc)); + } + } + + /* fifth DWORD empty */ + desc->ptr[4] = zero_entry; + + /* hash/HMAC out -or- hash context out */ + if (req_ctx->last) + map_single_talitos_ptr(dev, &desc->ptr[5], + crypto_ahash_digestsize(tfm), + areq->result, 0, DMA_FROM_DEVICE); + else + map_single_talitos_ptr(dev, &desc->ptr[5], + req_ctx->hw_context_size, + req_ctx->hw_context, 0, DMA_FROM_DEVICE); + + /* last DWORD empty */ + desc->ptr[6] = zero_entry; + + ret = talitos_submit(dev, desc, callback, areq); + if (ret != -EINPROGRESS) { + common_nonsnoop_hash_unmap(dev, edesc, areq); + kfree(edesc); + } + return ret; +} + +static struct talitos_edesc *ahash_edesc_alloc(struct ahash_request *areq, + unsigned int nbytes) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct talitos_ctx *ctx = crypto_ahash_ctx(tfm); + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + + return talitos_edesc_alloc(ctx->dev, req_ctx->psrc, NULL, 1, + nbytes, 0, 0, areq->base.flags); +} + +static int ahash_init(struct ahash_request *areq) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + + /* Initialize the context */ + req_ctx->count = 0; + req_ctx->first = 1; /* first indicates h/w must init its context */ + req_ctx->swinit = 0; /* assume h/w init of context */ + req_ctx->hw_context_size = + (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE) + ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256 + : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512; + + return 0; +} + +/* + * on h/w without explicit sha224 support, we initialize h/w context + * manually with sha224 constants, and tell it to run sha256. + */ +static int ahash_init_sha224_swinit(struct ahash_request *areq) +{ + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + + ahash_init(areq); + req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/ + + req_ctx->hw_context[0] = cpu_to_be32(SHA224_H0); + req_ctx->hw_context[1] = cpu_to_be32(SHA224_H1); + req_ctx->hw_context[2] = cpu_to_be32(SHA224_H2); + req_ctx->hw_context[3] = cpu_to_be32(SHA224_H3); + req_ctx->hw_context[4] = cpu_to_be32(SHA224_H4); + req_ctx->hw_context[5] = cpu_to_be32(SHA224_H5); + req_ctx->hw_context[6] = cpu_to_be32(SHA224_H6); + req_ctx->hw_context[7] = cpu_to_be32(SHA224_H7); + + /* init 64-bit count */ + req_ctx->hw_context[8] = 0; + req_ctx->hw_context[9] = 0; + + return 0; +} + +static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct talitos_ctx *ctx = crypto_ahash_ctx(tfm); + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct talitos_edesc *edesc; + unsigned int blocksize = + crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + unsigned int nbytes_to_hash; + unsigned int to_hash_later; + unsigned int index; + int chained; + + index = req_ctx->count & (blocksize - 1); + req_ctx->count += nbytes; + + if (!req_ctx->last && (index + nbytes) < blocksize) { + /* Buffer the partial block */ + sg_copy_to_buffer(areq->src, + sg_count(areq->src, nbytes, &chained), + req_ctx->buf + index, nbytes); + return 0; + } + + if (index) { + /* partial block from previous update; chain it in. */ + sg_init_table(req_ctx->bufsl, (nbytes) ? 2 : 1); + sg_set_buf(req_ctx->bufsl, req_ctx->buf, index); + if (nbytes) + scatterwalk_sg_chain(req_ctx->bufsl, 2, + areq->src); + req_ctx->psrc = req_ctx->bufsl; + } else { + req_ctx->psrc = areq->src; + } + nbytes_to_hash = index + nbytes; + if (!req_ctx->last) { + to_hash_later = (nbytes_to_hash & (blocksize - 1)); + if (to_hash_later) { + int nents; + /* Must copy to_hash_later bytes from the end + * to bufnext (a partial block) for later. + */ + nents = sg_count(areq->src, nbytes, &chained); + sg_copy_end_to_buffer(areq->src, nents, + req_ctx->bufnext, + to_hash_later, + nbytes - to_hash_later); + + /* Adjust count for what will be hashed now */ + nbytes_to_hash -= to_hash_later; + } + req_ctx->to_hash_later = to_hash_later; + } + + /* allocate extended descriptor */ + edesc = ahash_edesc_alloc(areq, nbytes_to_hash); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + edesc->desc.hdr = ctx->desc_hdr_template; + + /* On last one, request SEC to pad; otherwise continue */ + if (req_ctx->last) + edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_PAD; + else + edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_CONT; + + /* request SEC to INIT hash. */ + if (req_ctx->first && !req_ctx->swinit) + edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_INIT; + + /* When the tfm context has a keylen, it's an HMAC. + * A first or last (ie. not middle) descriptor must request HMAC. + */ + if (ctx->keylen && (req_ctx->first || req_ctx->last)) + edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC; + + return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, + ahash_done); +} + +static int ahash_update(struct ahash_request *areq) +{ + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + + req_ctx->last = 0; + + return ahash_process_req(areq, areq->nbytes); +} + +static int ahash_final(struct ahash_request *areq) +{ + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + + req_ctx->last = 1; + + return ahash_process_req(areq, 0); +} + +static int ahash_finup(struct ahash_request *areq) +{ + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + + req_ctx->last = 1; + + return ahash_process_req(areq, areq->nbytes); +} + +static int ahash_digest(struct ahash_request *areq) +{ + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); + + ahash->init(areq); + req_ctx->last = 1; + + return ahash_process_req(areq, areq->nbytes); +} + struct talitos_alg_template { - struct crypto_alg alg; + u32 type; + union { + struct crypto_alg crypto; + struct ahash_alg hash; + } alg; __be32 desc_hdr_template; }; static struct talitos_alg_template driver_algs[] = { /* AEAD algorithms. These use a single-pass ipsec_esp descriptor */ - { - .alg = { + { .type = CRYPTO_ALG_TYPE_AEAD, + .alg.crypto = { .cra_name = "authenc(hmac(sha1),cbc(aes))", .cra_driver_name = "authenc-hmac-sha1-cbc-aes-talitos", .cra_blocksize = AES_BLOCK_SIZE, @@ -1511,8 +1919,8 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, - { - .alg = { + { .type = CRYPTO_ALG_TYPE_AEAD, + .alg.crypto = { .cra_name = "authenc(hmac(sha1),cbc(des3_ede))", .cra_driver_name = "authenc-hmac-sha1-cbc-3des-talitos", .cra_blocksize = DES3_EDE_BLOCK_SIZE, @@ -1538,8 +1946,8 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, - { - .alg = { + { .type = CRYPTO_ALG_TYPE_AEAD, + .alg.crypto = { .cra_name = "authenc(hmac(sha256),cbc(aes))", .cra_driver_name = "authenc-hmac-sha256-cbc-aes-talitos", .cra_blocksize = AES_BLOCK_SIZE, @@ -1564,8 +1972,8 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA256_HMAC, }, - { - .alg = { + { .type = CRYPTO_ALG_TYPE_AEAD, + .alg.crypto = { .cra_name = "authenc(hmac(sha256),cbc(des3_ede))", .cra_driver_name = "authenc-hmac-sha256-cbc-3des-talitos", .cra_blocksize = DES3_EDE_BLOCK_SIZE, @@ -1591,8 +1999,8 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA256_HMAC, }, - { - .alg = { + { .type = CRYPTO_ALG_TYPE_AEAD, + .alg.crypto = { .cra_name = "authenc(hmac(md5),cbc(aes))", .cra_driver_name = "authenc-hmac-md5-cbc-aes-talitos", .cra_blocksize = AES_BLOCK_SIZE, @@ -1617,8 +2025,8 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_MD5_HMAC, }, - { - .alg = { + { .type = CRYPTO_ALG_TYPE_AEAD, + .alg.crypto = { .cra_name = "authenc(hmac(md5),cbc(des3_ede))", .cra_driver_name = "authenc-hmac-md5-cbc-3des-talitos", .cra_blocksize = DES3_EDE_BLOCK_SIZE, @@ -1645,8 +2053,8 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_MD5_HMAC, }, /* ABLKCIPHER algorithms. */ - { - .alg = { + { .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { .cra_name = "cbc(aes)", .cra_driver_name = "cbc-aes-talitos", .cra_blocksize = AES_BLOCK_SIZE, @@ -1667,8 +2075,8 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_CBC, }, - { - .alg = { + { .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { .cra_name = "cbc(des3_ede)", .cra_driver_name = "cbc-3des-talitos", .cra_blocksize = DES3_EDE_BLOCK_SIZE, @@ -1689,14 +2097,140 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_SEL0_DEU | DESC_HDR_MODE0_DEU_CBC | DESC_HDR_MODE0_DEU_3DES, - } + }, + /* AHASH algorithms. */ + { .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .init = ahash_init, + .update = ahash_update, + .final = ahash_final, + .finup = ahash_finup, + .digest = ahash_digest, + .halg.digestsize = MD5_DIGEST_SIZE, + .halg.base = { + .cra_name = "md5", + .cra_driver_name = "md5-talitos", + .cra_blocksize = MD5_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC, + .cra_type = &crypto_ahash_type + } + }, + .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | + DESC_HDR_SEL0_MDEUA | + DESC_HDR_MODE0_MDEU_MD5, + }, + { .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .init = ahash_init, + .update = ahash_update, + .final = ahash_final, + .finup = ahash_finup, + .digest = ahash_digest, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-talitos", + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC, + .cra_type = &crypto_ahash_type + } + }, + .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | + DESC_HDR_SEL0_MDEUA | + DESC_HDR_MODE0_MDEU_SHA1, + }, + { .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .init = ahash_init, + .update = ahash_update, + .final = ahash_final, + .finup = ahash_finup, + .digest = ahash_digest, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-talitos", + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC, + .cra_type = &crypto_ahash_type + } + }, + .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | + DESC_HDR_SEL0_MDEUA | + DESC_HDR_MODE0_MDEU_SHA224, + }, + { .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .init = ahash_init, + .update = ahash_update, + .final = ahash_final, + .finup = ahash_finup, + .digest = ahash_digest, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-talitos", + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC, + .cra_type = &crypto_ahash_type + } + }, + .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | + DESC_HDR_SEL0_MDEUA | + DESC_HDR_MODE0_MDEU_SHA256, + }, + { .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .init = ahash_init, + .update = ahash_update, + .final = ahash_final, + .finup = ahash_finup, + .digest = ahash_digest, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-talitos", + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC, + .cra_type = &crypto_ahash_type + } + }, + .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | + DESC_HDR_SEL0_MDEUB | + DESC_HDR_MODE0_MDEUB_SHA384, + }, + { .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .init = ahash_init, + .update = ahash_update, + .final = ahash_final, + .finup = ahash_finup, + .digest = ahash_digest, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-talitos", + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC, + .cra_type = &crypto_ahash_type + } + }, + .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | + DESC_HDR_SEL0_MDEUB | + DESC_HDR_MODE0_MDEUB_SHA512, + }, }; struct talitos_crypto_alg { struct list_head entry; struct device *dev; - __be32 desc_hdr_template; - struct crypto_alg crypto_alg; + struct talitos_alg_template algt; }; static int talitos_cra_init(struct crypto_tfm *tfm) @@ -1705,13 +2239,28 @@ static int talitos_cra_init(struct crypto_tfm *tfm) struct talitos_crypto_alg *talitos_alg; struct talitos_ctx *ctx = crypto_tfm_ctx(tfm); - talitos_alg = container_of(alg, struct talitos_crypto_alg, crypto_alg); + if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH) + talitos_alg = container_of(__crypto_ahash_alg(alg), + struct talitos_crypto_alg, + algt.alg.hash); + else + talitos_alg = container_of(alg, struct talitos_crypto_alg, + algt.alg.crypto); /* update context with ptr to dev */ ctx->dev = talitos_alg->dev; /* copy descriptor header template value */ - ctx->desc_hdr_template = talitos_alg->desc_hdr_template; + ctx->desc_hdr_template = talitos_alg->algt.desc_hdr_template; + + return 0; +} + +static int talitos_cra_init_aead(struct crypto_tfm *tfm) +{ + struct talitos_ctx *ctx = crypto_tfm_ctx(tfm); + + talitos_cra_init(tfm); /* random first IV */ get_random_bytes(ctx->iv, TALITOS_MAX_IV_LENGTH); @@ -1719,6 +2268,19 @@ static int talitos_cra_init(struct crypto_tfm *tfm) return 0; } +static int talitos_cra_init_ahash(struct crypto_tfm *tfm) +{ + struct talitos_ctx *ctx = crypto_tfm_ctx(tfm); + + talitos_cra_init(tfm); + + ctx->keylen = 0; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct talitos_ahash_req_ctx)); + + return 0; +} + /* * given the alg's descriptor header template, determine whether descriptor * type and primary/secondary execution units required match the hw @@ -1747,7 +2309,15 @@ static int talitos_remove(struct of_device *ofdev) int i; list_for_each_entry_safe(t_alg, n, &priv->alg_list, entry) { - crypto_unregister_alg(&t_alg->crypto_alg); + switch (t_alg->algt.type) { + case CRYPTO_ALG_TYPE_ABLKCIPHER: + case CRYPTO_ALG_TYPE_AEAD: + crypto_unregister_alg(&t_alg->algt.alg.crypto); + break; + case CRYPTO_ALG_TYPE_AHASH: + crypto_unregister_ahash(&t_alg->algt.alg.hash); + break; + } list_del(&t_alg->entry); kfree(t_alg); } @@ -1781,6 +2351,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, struct talitos_alg_template *template) { + struct talitos_private *priv = dev_get_drvdata(dev); struct talitos_crypto_alg *t_alg; struct crypto_alg *alg; @@ -1788,16 +2359,36 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, if (!t_alg) return ERR_PTR(-ENOMEM); - alg = &t_alg->crypto_alg; - *alg = template->alg; + t_alg->algt = *template; + + switch (t_alg->algt.type) { + case CRYPTO_ALG_TYPE_ABLKCIPHER: + alg = &t_alg->algt.alg.crypto; + alg->cra_init = talitos_cra_init; + break; + case CRYPTO_ALG_TYPE_AEAD: + alg = &t_alg->algt.alg.crypto; + alg->cra_init = talitos_cra_init_aead; + break; + case CRYPTO_ALG_TYPE_AHASH: + alg = &t_alg->algt.alg.hash.halg.base; + alg->cra_init = talitos_cra_init_ahash; + if (!(priv->features & TALITOS_FTR_SHA224_HWINIT) && + !strcmp(alg->cra_name, "sha224")) { + t_alg->algt.alg.hash.init = ahash_init_sha224_swinit; + t_alg->algt.desc_hdr_template = + DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | + DESC_HDR_SEL0_MDEUA | + DESC_HDR_MODE0_MDEU_SHA256; + } + break; + } alg->cra_module = THIS_MODULE; - alg->cra_init = talitos_cra_init; alg->cra_priority = TALITOS_CRA_PRIORITY; alg->cra_alignmask = 0; alg->cra_ctxsize = sizeof(struct talitos_ctx); - t_alg->desc_hdr_template = template->desc_hdr_template; t_alg->dev = dev; return t_alg; @@ -1877,7 +2468,8 @@ static int talitos_probe(struct of_device *ofdev, priv->features |= TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT; if (of_device_is_compatible(np, "fsl,sec2.1")) - priv->features |= TALITOS_FTR_HW_AUTH_CHECK; + priv->features |= TALITOS_FTR_HW_AUTH_CHECK | + TALITOS_FTR_SHA224_HWINIT; priv->chan = kzalloc(sizeof(struct talitos_channel) * priv->num_channels, GFP_KERNEL); @@ -1931,6 +2523,7 @@ static int talitos_probe(struct of_device *ofdev, for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { if (hw_supports(dev, driver_algs[i].desc_hdr_template)) { struct talitos_crypto_alg *t_alg; + char *name = NULL; t_alg = talitos_alg_alloc(dev, &driver_algs[i]); if (IS_ERR(t_alg)) { @@ -1938,15 +2531,27 @@ static int talitos_probe(struct of_device *ofdev, goto err_out; } - err = crypto_register_alg(&t_alg->crypto_alg); + switch (t_alg->algt.type) { + case CRYPTO_ALG_TYPE_ABLKCIPHER: + case CRYPTO_ALG_TYPE_AEAD: + err = crypto_register_alg( + &t_alg->algt.alg.crypto); + name = t_alg->algt.alg.crypto.cra_driver_name; + break; + case CRYPTO_ALG_TYPE_AHASH: + err = crypto_register_ahash( + &t_alg->algt.alg.hash); + name = + t_alg->algt.alg.hash.halg.base.cra_driver_name; + break; + } if (err) { dev_err(dev, "%s alg registration failed\n", - t_alg->crypto_alg.cra_driver_name); + name); kfree(t_alg); } else { list_add_tail(&t_alg->entry, &priv->alg_list); - dev_info(dev, "%s\n", - t_alg->crypto_alg.cra_driver_name); + dev_info(dev, "%s\n", name); } } } diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index ff5a1450e145..0b746aca4587 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -1,7 +1,7 @@ /* * Freescale SEC (talitos) device register and descriptor header defines * - * Copyright (c) 2006-2008 Freescale Semiconductor, Inc. + * Copyright (c) 2006-2010 Freescale Semiconductor, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -130,6 +130,9 @@ #define TALITOS_CRCUISR 0xf030 /* cyclic redundancy check unit*/ #define TALITOS_CRCUISR_LO 0xf034 +#define TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256 0x28 +#define TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512 0x48 + /* * talitos descriptor header (hdr) bits */ @@ -157,12 +160,16 @@ #define DESC_HDR_MODE0_AESU_CBC cpu_to_be32(0x00200000) #define DESC_HDR_MODE0_DEU_CBC cpu_to_be32(0x00400000) #define DESC_HDR_MODE0_DEU_3DES cpu_to_be32(0x00200000) +#define DESC_HDR_MODE0_MDEU_CONT cpu_to_be32(0x08000000) #define DESC_HDR_MODE0_MDEU_INIT cpu_to_be32(0x01000000) #define DESC_HDR_MODE0_MDEU_HMAC cpu_to_be32(0x00800000) #define DESC_HDR_MODE0_MDEU_PAD cpu_to_be32(0x00400000) +#define DESC_HDR_MODE0_MDEU_SHA224 cpu_to_be32(0x00300000) #define DESC_HDR_MODE0_MDEU_MD5 cpu_to_be32(0x00200000) #define DESC_HDR_MODE0_MDEU_SHA256 cpu_to_be32(0x00100000) #define DESC_HDR_MODE0_MDEU_SHA1 cpu_to_be32(0x00000000) +#define DESC_HDR_MODE0_MDEUB_SHA384 cpu_to_be32(0x00000000) +#define DESC_HDR_MODE0_MDEUB_SHA512 cpu_to_be32(0x00200000) #define DESC_HDR_MODE0_MDEU_MD5_HMAC (DESC_HDR_MODE0_MDEU_MD5 | \ DESC_HDR_MODE0_MDEU_HMAC) #define DESC_HDR_MODE0_MDEU_SHA256_HMAC (DESC_HDR_MODE0_MDEU_SHA256 | \ @@ -181,9 +188,12 @@ #define DESC_HDR_MODE1_MDEU_INIT cpu_to_be32(0x00001000) #define DESC_HDR_MODE1_MDEU_HMAC cpu_to_be32(0x00000800) #define DESC_HDR_MODE1_MDEU_PAD cpu_to_be32(0x00000400) +#define DESC_HDR_MODE1_MDEU_SHA224 cpu_to_be32(0x00000300) #define DESC_HDR_MODE1_MDEU_MD5 cpu_to_be32(0x00000200) #define DESC_HDR_MODE1_MDEU_SHA256 cpu_to_be32(0x00000100) #define DESC_HDR_MODE1_MDEU_SHA1 cpu_to_be32(0x00000000) +#define DESC_HDR_MODE1_MDEUB_SHA384 cpu_to_be32(0x00000000) +#define DESC_HDR_MODE1_MDEUB_SHA512 cpu_to_be32(0x00000200) #define DESC_HDR_MODE1_MDEU_MD5_HMAC (DESC_HDR_MODE1_MDEU_MD5 | \ DESC_HDR_MODE1_MDEU_HMAC) #define DESC_HDR_MODE1_MDEU_SHA256_HMAC (DESC_HDR_MODE1_MDEU_SHA256 | \ diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index fc0d575c71e0..59c3e5bd2c06 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -103,6 +103,23 @@ struct blkcipher_walk { unsigned int blocksize; }; +struct ablkcipher_walk { + struct { + struct page *page; + unsigned int offset; + } src, dst; + + struct scatter_walk in; + unsigned int nbytes; + struct scatter_walk out; + unsigned int total; + struct list_head buffers; + u8 *iv_buffer; + u8 *iv; + int flags; + unsigned int blocksize; +}; + extern const struct crypto_type crypto_ablkcipher_type; extern const struct crypto_type crypto_aead_type; extern const struct crypto_type crypto_blkcipher_type; @@ -173,6 +190,12 @@ int blkcipher_walk_virt_block(struct blkcipher_desc *desc, struct blkcipher_walk *walk, unsigned int blocksize); +int ablkcipher_walk_done(struct ablkcipher_request *req, + struct ablkcipher_walk *walk, int err); +int ablkcipher_walk_phys(struct ablkcipher_request *req, + struct ablkcipher_walk *walk); +void __ablkcipher_walk_complete(struct ablkcipher_walk *walk); + static inline void *crypto_tfm_ctx_aligned(struct crypto_tfm *tfm) { return PTR_ALIGN(crypto_tfm_ctx(tfm), @@ -283,6 +306,23 @@ static inline void blkcipher_walk_init(struct blkcipher_walk *walk, walk->total = nbytes; } +static inline void ablkcipher_walk_init(struct ablkcipher_walk *walk, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + walk->in.sg = src; + walk->out.sg = dst; + walk->total = nbytes; + INIT_LIST_HEAD(&walk->buffers); +} + +static inline void ablkcipher_walk_complete(struct ablkcipher_walk *walk) +{ + if (unlikely(!list_empty(&walk->buffers))) + __ablkcipher_walk_complete(walk); +} + static inline struct crypto_async_request *crypto_get_backlog( struct crypto_queue *queue) { diff --git a/include/linux/padata.h b/include/linux/padata.h index 51611da9c498..8d8406246eef 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -24,7 +24,19 @@ #include #include #include +#include +/** + * struct padata_priv - Embedded to the users data structure. + * + * @list: List entry, to attach to the padata lists. + * @pd: Pointer to the internal control structure. + * @cb_cpu: Callback cpu for serializatioon. + * @seq_nr: Sequence number of the parallelized data object. + * @info: Used to pass information from the parallel to the serial function. + * @parallel: Parallel execution function. + * @serial: Serial complete function. + */ struct padata_priv { struct list_head list; struct parallel_data *pd; @@ -35,11 +47,29 @@ struct padata_priv { void (*serial)(struct padata_priv *padata); }; +/** + * struct padata_list + * + * @list: List head. + * @lock: List lock. + */ struct padata_list { struct list_head list; spinlock_t lock; }; +/** + * struct padata_queue - The percpu padata queues. + * + * @parallel: List to wait for parallelization. + * @reorder: List to wait for reordering after parallel processing. + * @serial: List to wait for serialization after reordering. + * @pwork: work struct for parallelization. + * @swork: work struct for serialization. + * @pd: Backpointer to the internal control structure. + * @num_obj: Number of objects that are processed by this cpu. + * @cpu_index: Index of the cpu. + */ struct padata_queue { struct padata_list parallel; struct padata_list reorder; @@ -51,6 +81,20 @@ struct padata_queue { int cpu_index; }; +/** + * struct parallel_data - Internal control structure, covers everything + * that depends on the cpumask in use. + * + * @pinst: padata instance. + * @queue: percpu padata queues. + * @seq_nr: The sequence number that will be attached to the next object. + * @reorder_objects: Number of objects waiting in the reorder queues. + * @refcnt: Number of objects holding a reference on this parallel_data. + * @max_seq_nr: Maximal used sequence number. + * @cpumask: cpumask in use. + * @lock: Reorder lock. + * @timer: Reorder timer. + */ struct parallel_data { struct padata_instance *pinst; struct padata_queue *queue; @@ -60,8 +104,19 @@ struct parallel_data { unsigned int max_seq_nr; cpumask_var_t cpumask; spinlock_t lock; + struct timer_list timer; }; +/** + * struct padata_instance - The overall control structure. + * + * @cpu_notifier: cpu hotplug notifier. + * @wq: The workqueue in use. + * @pd: The internal control structure. + * @cpumask: User supplied cpumask. + * @lock: padata instance lock. + * @flags: padata flags. + */ struct padata_instance { struct notifier_block cpu_notifier; struct workqueue_struct *wq; diff --git a/kernel/padata.c b/kernel/padata.c index fd03513c7327..b1c9857f8402 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -29,7 +29,7 @@ #include #define MAX_SEQ_NR INT_MAX - NR_CPUS -#define MAX_OBJ_NUM 10000 * NR_CPUS +#define MAX_OBJ_NUM 1000 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) { @@ -88,7 +88,7 @@ static void padata_parallel_worker(struct work_struct *work) local_bh_enable(); } -/* +/** * padata_do_parallel - padata parallelization function * * @pinst: padata instance @@ -152,6 +152,23 @@ out: } EXPORT_SYMBOL(padata_do_parallel); +/* + * padata_get_next - Get the next object that needs serialization. + * + * Return values are: + * + * A pointer to the control struct of the next object that needs + * serialization, if present in one of the percpu reorder queues. + * + * NULL, if all percpu reorder queues are empty. + * + * -EINPROGRESS, if the next object that needs serialization will + * be parallel processed by another cpu and is not yet present in + * the cpu's reorder queue. + * + * -ENODATA, if this cpu has to do the parallel processing for + * the next object. + */ static struct padata_priv *padata_get_next(struct parallel_data *pd) { int cpu, num_cpus, empty, calc_seq_nr; @@ -173,7 +190,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) /* * Calculate the seq_nr of the object that should be - * next in this queue. + * next in this reorder queue. */ overrun = 0; calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus) @@ -231,7 +248,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) goto out; } - if (next_nr % num_cpus == next_queue->cpu_index) { + queue = per_cpu_ptr(pd->queue, smp_processor_id()); + if (queue->cpu_index == next_queue->cpu_index) { padata = ERR_PTR(-ENODATA); goto out; } @@ -247,19 +265,40 @@ static void padata_reorder(struct parallel_data *pd) struct padata_queue *queue; struct padata_instance *pinst = pd->pinst; -try_again: + /* + * We need to ensure that only one cpu can work on dequeueing of + * the reorder queue the time. Calculating in which percpu reorder + * queue the next object will arrive takes some time. A spinlock + * would be highly contended. Also it is not clear in which order + * the objects arrive to the reorder queues. So a cpu could wait to + * get the lock just to notice that there is nothing to do at the + * moment. Therefore we use a trylock and let the holder of the lock + * care for all the objects enqueued during the holdtime of the lock. + */ if (!spin_trylock_bh(&pd->lock)) - goto out; + return; while (1) { padata = padata_get_next(pd); + /* + * All reorder queues are empty, or the next object that needs + * serialization is parallel processed by another cpu and is + * still on it's way to the cpu's reorder queue, nothing to + * do for now. + */ if (!padata || PTR_ERR(padata) == -EINPROGRESS) break; + /* + * This cpu has to do the parallel processing of the next + * object. It's waiting in the cpu's parallelization queue, + * so exit imediately. + */ if (PTR_ERR(padata) == -ENODATA) { + del_timer(&pd->timer); spin_unlock_bh(&pd->lock); - goto out; + return; } queue = per_cpu_ptr(pd->queue, padata->cb_cpu); @@ -273,13 +312,27 @@ try_again: spin_unlock_bh(&pd->lock); - if (atomic_read(&pd->reorder_objects)) - goto try_again; + /* + * The next object that needs serialization might have arrived to + * the reorder queues in the meantime, we will be called again + * from the timer function if noone else cares for it. + */ + if (atomic_read(&pd->reorder_objects) + && !(pinst->flags & PADATA_RESET)) + mod_timer(&pd->timer, jiffies + HZ); + else + del_timer(&pd->timer); -out: return; } +static void padata_reorder_timer(unsigned long arg) +{ + struct parallel_data *pd = (struct parallel_data *)arg; + + padata_reorder(pd); +} + static void padata_serial_worker(struct work_struct *work) { struct padata_queue *queue; @@ -308,7 +361,7 @@ static void padata_serial_worker(struct work_struct *work) local_bh_enable(); } -/* +/** * padata_do_serial - padata serialization function * * @padata: object to be serialized. @@ -338,6 +391,7 @@ void padata_do_serial(struct padata_priv *padata) } EXPORT_SYMBOL(padata_do_serial); +/* Allocate and initialize the internal cpumask dependend resources. */ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, const struct cpumask *cpumask) { @@ -358,17 +412,15 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL)) goto err_free_queue; - for_each_possible_cpu(cpu) { + cpumask_and(pd->cpumask, cpumask, cpu_active_mask); + + for_each_cpu(cpu, pd->cpumask) { queue = per_cpu_ptr(pd->queue, cpu); queue->pd = pd; - if (cpumask_test_cpu(cpu, cpumask) - && cpumask_test_cpu(cpu, cpu_active_mask)) { - queue->cpu_index = cpu_index; - cpu_index++; - } else - queue->cpu_index = -1; + queue->cpu_index = cpu_index; + cpu_index++; INIT_LIST_HEAD(&queue->reorder.list); INIT_LIST_HEAD(&queue->parallel.list); @@ -382,11 +434,10 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, atomic_set(&queue->num_obj, 0); } - cpumask_and(pd->cpumask, cpumask, cpu_active_mask); - num_cpus = cpumask_weight(pd->cpumask); pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1; + setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); atomic_set(&pd->seq_nr, -1); atomic_set(&pd->reorder_objects, 0); atomic_set(&pd->refcnt, 0); @@ -410,6 +461,31 @@ static void padata_free_pd(struct parallel_data *pd) kfree(pd); } +/* Flush all objects out of the padata queues. */ +static void padata_flush_queues(struct parallel_data *pd) +{ + int cpu; + struct padata_queue *queue; + + for_each_cpu(cpu, pd->cpumask) { + queue = per_cpu_ptr(pd->queue, cpu); + flush_work(&queue->pwork); + } + + del_timer_sync(&pd->timer); + + if (atomic_read(&pd->reorder_objects)) + padata_reorder(pd); + + for_each_cpu(cpu, pd->cpumask) { + queue = per_cpu_ptr(pd->queue, cpu); + flush_work(&queue->swork); + } + + BUG_ON(atomic_read(&pd->refcnt) != 0); +} + +/* Replace the internal control stucture with a new one. */ static void padata_replace(struct padata_instance *pinst, struct parallel_data *pd_new) { @@ -421,17 +497,13 @@ static void padata_replace(struct padata_instance *pinst, synchronize_rcu(); - while (atomic_read(&pd_old->refcnt) != 0) - yield(); - - flush_workqueue(pinst->wq); - + padata_flush_queues(pd_old); padata_free_pd(pd_old); pinst->flags &= ~PADATA_RESET; } -/* +/** * padata_set_cpumask - set the cpumask that padata should use * * @pinst: padata instance @@ -443,10 +515,10 @@ int padata_set_cpumask(struct padata_instance *pinst, struct parallel_data *pd; int err = 0; - might_sleep(); - mutex_lock(&pinst->lock); + get_online_cpus(); + pd = padata_alloc_pd(pinst, cpumask); if (!pd) { err = -ENOMEM; @@ -458,6 +530,8 @@ int padata_set_cpumask(struct padata_instance *pinst, padata_replace(pinst, pd); out: + put_online_cpus(); + mutex_unlock(&pinst->lock); return err; @@ -479,7 +553,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu) return 0; } -/* +/** * padata_add_cpu - add a cpu to the padata cpumask * * @pinst: padata instance @@ -489,12 +563,12 @@ int padata_add_cpu(struct padata_instance *pinst, int cpu) { int err; - might_sleep(); - mutex_lock(&pinst->lock); + get_online_cpus(); cpumask_set_cpu(cpu, pinst->cpumask); err = __padata_add_cpu(pinst, cpu); + put_online_cpus(); mutex_unlock(&pinst->lock); @@ -517,7 +591,7 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) return 0; } -/* +/** * padata_remove_cpu - remove a cpu from the padata cpumask * * @pinst: padata instance @@ -527,12 +601,12 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu) { int err; - might_sleep(); - mutex_lock(&pinst->lock); + get_online_cpus(); cpumask_clear_cpu(cpu, pinst->cpumask); err = __padata_remove_cpu(pinst, cpu); + put_online_cpus(); mutex_unlock(&pinst->lock); @@ -540,38 +614,35 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu) } EXPORT_SYMBOL(padata_remove_cpu); -/* +/** * padata_start - start the parallel processing * * @pinst: padata instance to start */ void padata_start(struct padata_instance *pinst) { - might_sleep(); - mutex_lock(&pinst->lock); pinst->flags |= PADATA_INIT; mutex_unlock(&pinst->lock); } EXPORT_SYMBOL(padata_start); -/* +/** * padata_stop - stop the parallel processing * * @pinst: padata instance to stop */ void padata_stop(struct padata_instance *pinst) { - might_sleep(); - mutex_lock(&pinst->lock); pinst->flags &= ~PADATA_INIT; mutex_unlock(&pinst->lock); } EXPORT_SYMBOL(padata_stop); -static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +#ifdef CONFIG_HOTPLUG_CPU +static int padata_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) { int err; struct padata_instance *pinst; @@ -621,8 +692,9 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } +#endif -/* +/** * padata_alloc - allocate and initialize a padata instance * * @cpumask: cpumask that padata uses for parallelization @@ -631,7 +703,6 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, struct padata_instance *padata_alloc(const struct cpumask *cpumask, struct workqueue_struct *wq) { - int err; struct padata_instance *pinst; struct parallel_data *pd; @@ -639,6 +710,8 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask, if (!pinst) goto err; + get_online_cpus(); + pd = padata_alloc_pd(pinst, cpumask); if (!pd) goto err_free_inst; @@ -654,31 +727,32 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask, pinst->flags = 0; +#ifdef CONFIG_HOTPLUG_CPU pinst->cpu_notifier.notifier_call = padata_cpu_callback; pinst->cpu_notifier.priority = 0; - err = register_hotcpu_notifier(&pinst->cpu_notifier); - if (err) - goto err_free_cpumask; + register_hotcpu_notifier(&pinst->cpu_notifier); +#endif + + put_online_cpus(); mutex_init(&pinst->lock); return pinst; -err_free_cpumask: - free_cpumask_var(pinst->cpumask); err_free_pd: padata_free_pd(pd); err_free_inst: kfree(pinst); + put_online_cpus(); err: return NULL; } EXPORT_SYMBOL(padata_alloc); -/* +/** * padata_free - free a padata instance * - * @ padata_inst: padata instance to free + * @padata_inst: padata instance to free */ void padata_free(struct padata_instance *pinst) { @@ -686,10 +760,13 @@ void padata_free(struct padata_instance *pinst) synchronize_rcu(); - while (atomic_read(&pinst->pd->refcnt) != 0) - yield(); - +#ifdef CONFIG_HOTPLUG_CPU unregister_hotcpu_notifier(&pinst->cpu_notifier); +#endif + get_online_cpus(); + padata_flush_queues(pinst->pd); + put_online_cpus(); + padata_free_pd(pinst->pd); free_cpumask_var(pinst->cpumask); kfree(pinst);