crypto: x86/glue_helper - use le128 instead of u128 for CTR mode

'u128' currently used for CTR mode is on little-endian 'long long' swapped
and would require extra swap operations by SSE/AVX code. Use of le128
instead of u128 allows IV calculations to be done with vector registers
easier.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Jussi Kivilinna 2012-10-20 15:06:36 +03:00 committed by Herbert Xu
parent e080b17a8c
commit 58990986f1
9 changed files with 64 additions and 58 deletions

View File

@ -1317,21 +1317,21 @@ static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
u128_xor(&dst[1], &dst[1], &iv); u128_xor(&dst[1], &dst[1], &iv);
} }
static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{ {
be128 ctrblk; be128 ctrblk;
if (dst != src) if (dst != src)
*dst = *src; *dst = *src;
u128_to_be128(&ctrblk, iv); le128_to_be128(&ctrblk, iv);
u128_inc(iv); le128_inc(iv);
camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
} }
static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
u128 *iv) le128 *iv)
{ {
be128 ctrblks[2]; be128 ctrblks[2];
@ -1340,10 +1340,10 @@ static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
dst[1] = src[1]; dst[1] = src[1];
} }
u128_to_be128(&ctrblks[0], iv); le128_to_be128(&ctrblks[0], iv);
u128_inc(iv); le128_inc(iv);
u128_to_be128(&ctrblks[1], iv); le128_to_be128(&ctrblks[1], iv);
u128_inc(iv); le128_inc(iv);
camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks); camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks);
} }

View File

@ -78,19 +78,19 @@ static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
} }
static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{ {
be128 ctrblk; be128 ctrblk;
u128_to_be128(&ctrblk, iv); le128_to_be128(&ctrblk, iv);
u128_inc(iv); le128_inc(iv);
__cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); __cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk); u128_xor(dst, src, (u128 *)&ctrblk);
} }
static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
u128 *iv) le128 *iv)
{ {
be128 ctrblks[CAST6_PARALLEL_BLOCKS]; be128 ctrblks[CAST6_PARALLEL_BLOCKS];
unsigned int i; unsigned int i;
@ -99,8 +99,8 @@ static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
if (dst != src) if (dst != src)
dst[i] = src[i]; dst[i] = src[i];
u128_to_be128(&ctrblks[i], iv); le128_to_be128(&ctrblks[i], iv);
u128_inc(iv); le128_inc(iv);
} }
cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);

View File

@ -221,16 +221,16 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
u8 *src = (u8 *)walk->src.virt.addr; u8 *src = (u8 *)walk->src.virt.addr;
u8 *dst = (u8 *)walk->dst.virt.addr; u8 *dst = (u8 *)walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes; unsigned int nbytes = walk->nbytes;
u128 ctrblk; le128 ctrblk;
u128 tmp; u128 tmp;
be128_to_u128(&ctrblk, (be128 *)walk->iv); be128_to_le128(&ctrblk, (be128 *)walk->iv);
memcpy(&tmp, src, nbytes); memcpy(&tmp, src, nbytes);
fn_ctr(ctx, &tmp, &tmp, &ctrblk); fn_ctr(ctx, &tmp, &tmp, &ctrblk);
memcpy(dst, &tmp, nbytes); memcpy(dst, &tmp, nbytes);
u128_to_be128((be128 *)walk->iv, &ctrblk); le128_to_be128((be128 *)walk->iv, &ctrblk);
} }
EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit); EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
@ -243,11 +243,11 @@ static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
unsigned int nbytes = walk->nbytes; unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr; u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr; u128 *dst = (u128 *)walk->dst.virt.addr;
u128 ctrblk; le128 ctrblk;
unsigned int num_blocks, func_bytes; unsigned int num_blocks, func_bytes;
unsigned int i; unsigned int i;
be128_to_u128(&ctrblk, (be128 *)walk->iv); be128_to_le128(&ctrblk, (be128 *)walk->iv);
/* Process multi-block batch */ /* Process multi-block batch */
for (i = 0; i < gctx->num_funcs; i++) { for (i = 0; i < gctx->num_funcs; i++) {
@ -269,7 +269,7 @@ static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
} }
done: done:
u128_to_be128((be128 *)walk->iv, &ctrblk); le128_to_be128((be128 *)walk->iv, &ctrblk);
return nbytes; return nbytes;
} }

View File

@ -56,19 +56,19 @@ static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
} }
static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{ {
be128 ctrblk; be128 ctrblk;
u128_to_be128(&ctrblk, iv); le128_to_be128(&ctrblk, iv);
u128_inc(iv); le128_inc(iv);
__serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk); u128_xor(dst, src, (u128 *)&ctrblk);
} }
static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
u128 *iv) le128 *iv)
{ {
be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
unsigned int i; unsigned int i;
@ -77,8 +77,8 @@ static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
if (dst != src) if (dst != src)
dst[i] = src[i]; dst[i] = src[i];
u128_to_be128(&ctrblks[i], iv); le128_to_be128(&ctrblks[i], iv);
u128_inc(iv); le128_inc(iv);
} }
serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);

View File

@ -59,19 +59,19 @@ static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
} }
static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{ {
be128 ctrblk; be128 ctrblk;
u128_to_be128(&ctrblk, iv); le128_to_be128(&ctrblk, iv);
u128_inc(iv); le128_inc(iv);
__serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk); u128_xor(dst, src, (u128 *)&ctrblk);
} }
static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
u128 *iv) le128 *iv)
{ {
be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
unsigned int i; unsigned int i;
@ -80,8 +80,8 @@ static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
if (dst != src) if (dst != src)
dst[i] = src[i]; dst[i] = src[i];
u128_to_be128(&ctrblks[i], iv); le128_to_be128(&ctrblks[i], iv);
u128_inc(iv); le128_inc(iv);
} }
serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);

View File

@ -90,7 +90,7 @@ static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src)
} }
static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src, static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src,
u128 *iv) le128 *iv)
{ {
be128 ctrblks[TWOFISH_PARALLEL_BLOCKS]; be128 ctrblks[TWOFISH_PARALLEL_BLOCKS];
unsigned int i; unsigned int i;
@ -99,8 +99,8 @@ static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src,
if (dst != src) if (dst != src)
dst[i] = src[i]; dst[i] = src[i];
u128_to_be128(&ctrblks[i], iv); le128_to_be128(&ctrblks[i], iv);
u128_inc(iv); le128_inc(iv);
} }
twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);

View File

@ -62,15 +62,15 @@ void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
} }
EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{ {
be128 ctrblk; be128 ctrblk;
if (dst != src) if (dst != src)
*dst = *src; *dst = *src;
u128_to_be128(&ctrblk, iv); le128_to_be128(&ctrblk, iv);
u128_inc(iv); le128_inc(iv);
twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, dst, (u128 *)&ctrblk); u128_xor(dst, dst, (u128 *)&ctrblk);
@ -78,7 +78,7 @@ void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
u128 *iv) le128 *iv)
{ {
be128 ctrblks[3]; be128 ctrblks[3];
@ -88,12 +88,12 @@ void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
dst[2] = src[2]; dst[2] = src[2];
} }
u128_to_be128(&ctrblks[0], iv); le128_to_be128(&ctrblks[0], iv);
u128_inc(iv); le128_inc(iv);
u128_to_be128(&ctrblks[1], iv); le128_to_be128(&ctrblks[1], iv);
u128_inc(iv); le128_inc(iv);
u128_to_be128(&ctrblks[2], iv); le128_to_be128(&ctrblks[2], iv);
u128_inc(iv); le128_inc(iv);
twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
} }

View File

@ -13,7 +13,7 @@
typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
u128 *iv); le128 *iv);
#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) #define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) #define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
@ -71,23 +71,29 @@ static inline void glue_fpu_end(bool fpu_enabled)
kernel_fpu_end(); kernel_fpu_end();
} }
static inline void u128_to_be128(be128 *dst, const u128 *src) static inline void le128_to_be128(be128 *dst, const le128 *src)
{ {
dst->a = cpu_to_be64(src->a); dst->a = cpu_to_be64(le64_to_cpu(src->a));
dst->b = cpu_to_be64(src->b); dst->b = cpu_to_be64(le64_to_cpu(src->b));
} }
static inline void be128_to_u128(u128 *dst, const be128 *src) static inline void be128_to_le128(le128 *dst, const be128 *src)
{ {
dst->a = be64_to_cpu(src->a); dst->a = cpu_to_le64(be64_to_cpu(src->a));
dst->b = be64_to_cpu(src->b); dst->b = cpu_to_le64(be64_to_cpu(src->b));
} }
static inline void u128_inc(u128 *i) static inline void le128_inc(le128 *i)
{ {
i->b++; u64 a = le64_to_cpu(i->a);
if (!i->b) u64 b = le64_to_cpu(i->b);
i->a++;
b++;
if (!b)
a++;
i->a = cpu_to_le64(a);
i->b = cpu_to_le64(b);
} }
extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,

View File

@ -31,9 +31,9 @@ asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
/* helpers from twofish_x86_64-3way module */ /* helpers from twofish_x86_64-3way module */
extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
u128 *iv); le128 *iv);
extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
u128 *iv); le128 *iv);
extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen); unsigned int keylen);