linux-stable/arch/x86/crypto/camellia-x86_64-asm_64.S
Thomas Gleixner 1a59d1b8e0 treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 156
Based on 1 normalized pattern(s):

  this program is free software you can redistribute it and or modify
  it under the terms of the gnu general public license as published by
  the free software foundation either version 2 of the license or at
  your option any later version this program is distributed in the
  hope that it will be useful but without any warranty without even
  the implied warranty of merchantability or fitness for a particular
  purpose see the gnu general public license for more details you
  should have received a copy of the gnu general public license along
  with this program if not write to the free software foundation inc
  59 temple place suite 330 boston ma 02111 1307 usa

extracted by the scancode license scanner the SPDX license identifier

  GPL-2.0-or-later

has been chosen to replace the boilerplate/reference in 1334 file(s).

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Allison Randal <allison@lohutok.net>
Reviewed-by: Richard Fontana <rfontana@redhat.com>
Cc: linux-spdx@vger.kernel.org
Link: https://lkml.kernel.org/r/20190527070033.113240726@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-05-30 11:26:35 -07:00

499 lines
10 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Camellia Cipher Algorithm (x86_64)
*
* Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*/
#include <linux/linkage.h>
.file "camellia-x86_64-asm_64.S"
.text
.extern camellia_sp10011110;
.extern camellia_sp22000222;
.extern camellia_sp03303033;
.extern camellia_sp00444404;
.extern camellia_sp02220222;
.extern camellia_sp30333033;
.extern camellia_sp44044404;
.extern camellia_sp11101110;
#define sp10011110 camellia_sp10011110
#define sp22000222 camellia_sp22000222
#define sp03303033 camellia_sp03303033
#define sp00444404 camellia_sp00444404
#define sp02220222 camellia_sp02220222
#define sp30333033 camellia_sp30333033
#define sp44044404 camellia_sp44044404
#define sp11101110 camellia_sp11101110
#define CAMELLIA_TABLE_BYTE_LEN 272
/* struct camellia_ctx: */
#define key_table 0
#define key_length CAMELLIA_TABLE_BYTE_LEN
/* register macros */
#define CTX %rdi
#define RIO %rsi
#define RIOd %esi
#define RAB0 %rax
#define RCD0 %rcx
#define RAB1 %rbx
#define RCD1 %rdx
#define RAB0d %eax
#define RCD0d %ecx
#define RAB1d %ebx
#define RCD1d %edx
#define RAB0bl %al
#define RCD0bl %cl
#define RAB1bl %bl
#define RCD1bl %dl
#define RAB0bh %ah
#define RCD0bh %ch
#define RAB1bh %bh
#define RCD1bh %dh
#define RT0 %rsi
#define RT1 %r12
#define RT2 %r8
#define RT0d %esi
#define RT1d %r12d
#define RT2d %r8d
#define RT2bl %r8b
#define RXOR %r9
#define RR12 %r10
#define RDST %r11
#define RXORd %r9d
#define RXORbl %r9b
#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
movzbl ab ## bl, tmp2 ## d; \
movzbl ab ## bh, tmp1 ## d; \
rorq $16, ab; \
xorq T0(, tmp2, 8), dst; \
xorq T1(, tmp1, 8), dst;
/**********************************************************************
1-way camellia
**********************************************************************/
#define roundsm(ab, subkey, cd) \
movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
\
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
\
xorq RT2, cd ## 0;
#define fls(l, r, kl, kr) \
movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
andl l ## 0d, RT0d; \
roll $1, RT0d; \
shlq $32, RT0; \
xorq RT0, l ## 0; \
movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
orq r ## 0, RT1; \
shrq $32, RT1; \
xorq RT1, r ## 0; \
\
movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \
orq l ## 0, RT2; \
shrq $32, RT2; \
xorq RT2, l ## 0; \
movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \
andl r ## 0d, RT0d; \
roll $1, RT0d; \
shlq $32, RT0; \
xorq RT0, r ## 0;
#define enc_rounds(i) \
roundsm(RAB, i + 2, RCD); \
roundsm(RCD, i + 3, RAB); \
roundsm(RAB, i + 4, RCD); \
roundsm(RCD, i + 5, RAB); \
roundsm(RAB, i + 6, RCD); \
roundsm(RCD, i + 7, RAB);
#define enc_fls(i) \
fls(RAB, RCD, i + 0, i + 1);
#define enc_inpack() \
movq (RIO), RAB0; \
bswapq RAB0; \
rolq $32, RAB0; \
movq 4*2(RIO), RCD0; \
bswapq RCD0; \
rorq $32, RCD0; \
xorq key_table(CTX), RAB0;
#define enc_outunpack(op, max) \
xorq key_table(CTX, max, 8), RCD0; \
rorq $32, RCD0; \
bswapq RCD0; \
op ## q RCD0, (RIO); \
rolq $32, RAB0; \
bswapq RAB0; \
op ## q RAB0, 4*2(RIO);
#define dec_rounds(i) \
roundsm(RAB, i + 7, RCD); \
roundsm(RCD, i + 6, RAB); \
roundsm(RAB, i + 5, RCD); \
roundsm(RCD, i + 4, RAB); \
roundsm(RAB, i + 3, RCD); \
roundsm(RCD, i + 2, RAB);
#define dec_fls(i) \
fls(RAB, RCD, i + 1, i + 0);
#define dec_inpack(max) \
movq (RIO), RAB0; \
bswapq RAB0; \
rolq $32, RAB0; \
movq 4*2(RIO), RCD0; \
bswapq RCD0; \
rorq $32, RCD0; \
xorq key_table(CTX, max, 8), RAB0;
#define dec_outunpack() \
xorq key_table(CTX), RCD0; \
rorq $32, RCD0; \
bswapq RCD0; \
movq RCD0, (RIO); \
rolq $32, RAB0; \
bswapq RAB0; \
movq RAB0, 4*2(RIO);
ENTRY(__camellia_enc_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool xor
*/
movq %r12, RR12;
movq %rcx, RXOR;
movq %rsi, RDST;
movq %rdx, RIO;
enc_inpack();
enc_rounds(0);
enc_fls(8);
enc_rounds(8);
enc_fls(16);
enc_rounds(16);
movl $24, RT1d; /* max */
cmpb $16, key_length(CTX);
je .L__enc_done;
enc_fls(24);
enc_rounds(24);
movl $32, RT1d; /* max */
.L__enc_done:
testb RXORbl, RXORbl;
movq RDST, RIO;
jnz .L__enc_xor;
enc_outunpack(mov, RT1);
movq RR12, %r12;
ret;
.L__enc_xor:
enc_outunpack(xor, RT1);
movq RR12, %r12;
ret;
ENDPROC(__camellia_enc_blk)
ENTRY(camellia_dec_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
cmpl $16, key_length(CTX);
movl $32, RT2d;
movl $24, RXORd;
cmovel RXORd, RT2d; /* max */
movq %r12, RR12;
movq %rsi, RDST;
movq %rdx, RIO;
dec_inpack(RT2);
cmpb $24, RT2bl;
je .L__dec_rounds16;
dec_rounds(24);
dec_fls(24);
.L__dec_rounds16:
dec_rounds(16);
dec_fls(16);
dec_rounds(8);
dec_fls(8);
dec_rounds(0);
movq RDST, RIO;
dec_outunpack();
movq RR12, %r12;
ret;
ENDPROC(camellia_dec_blk)
/**********************************************************************
2-way camellia
**********************************************************************/
#define roundsm2(ab, subkey, cd) \
movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
xorq RT2, cd ## 1; \
\
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
\
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
xorq RT2, cd ## 0; \
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
#define fls2(l, r, kl, kr) \
movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
andl l ## 0d, RT0d; \
roll $1, RT0d; \
shlq $32, RT0; \
xorq RT0, l ## 0; \
movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
orq r ## 0, RT1; \
shrq $32, RT1; \
xorq RT1, r ## 0; \
\
movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \
andl l ## 1d, RT2d; \
roll $1, RT2d; \
shlq $32, RT2; \
xorq RT2, l ## 1; \
movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \
orq r ## 1, RT0; \
shrq $32, RT0; \
xorq RT0, r ## 1; \
\
movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \
orq l ## 0, RT1; \
shrq $32, RT1; \
xorq RT1, l ## 0; \
movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \
andl r ## 0d, RT2d; \
roll $1, RT2d; \
shlq $32, RT2; \
xorq RT2, r ## 0; \
\
movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \
orq l ## 1, RT0; \
shrq $32, RT0; \
xorq RT0, l ## 1; \
movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \
andl r ## 1d, RT1d; \
roll $1, RT1d; \
shlq $32, RT1; \
xorq RT1, r ## 1;
#define enc_rounds2(i) \
roundsm2(RAB, i + 2, RCD); \
roundsm2(RCD, i + 3, RAB); \
roundsm2(RAB, i + 4, RCD); \
roundsm2(RCD, i + 5, RAB); \
roundsm2(RAB, i + 6, RCD); \
roundsm2(RCD, i + 7, RAB);
#define enc_fls2(i) \
fls2(RAB, RCD, i + 0, i + 1);
#define enc_inpack2() \
movq (RIO), RAB0; \
bswapq RAB0; \
rorq $32, RAB0; \
movq 4*2(RIO), RCD0; \
bswapq RCD0; \
rolq $32, RCD0; \
xorq key_table(CTX), RAB0; \
\
movq 8*2(RIO), RAB1; \
bswapq RAB1; \
rorq $32, RAB1; \
movq 12*2(RIO), RCD1; \
bswapq RCD1; \
rolq $32, RCD1; \
xorq key_table(CTX), RAB1;
#define enc_outunpack2(op, max) \
xorq key_table(CTX, max, 8), RCD0; \
rolq $32, RCD0; \
bswapq RCD0; \
op ## q RCD0, (RIO); \
rorq $32, RAB0; \
bswapq RAB0; \
op ## q RAB0, 4*2(RIO); \
\
xorq key_table(CTX, max, 8), RCD1; \
rolq $32, RCD1; \
bswapq RCD1; \
op ## q RCD1, 8*2(RIO); \
rorq $32, RAB1; \
bswapq RAB1; \
op ## q RAB1, 12*2(RIO);
#define dec_rounds2(i) \
roundsm2(RAB, i + 7, RCD); \
roundsm2(RCD, i + 6, RAB); \
roundsm2(RAB, i + 5, RCD); \
roundsm2(RCD, i + 4, RAB); \
roundsm2(RAB, i + 3, RCD); \
roundsm2(RCD, i + 2, RAB);
#define dec_fls2(i) \
fls2(RAB, RCD, i + 1, i + 0);
#define dec_inpack2(max) \
movq (RIO), RAB0; \
bswapq RAB0; \
rorq $32, RAB0; \
movq 4*2(RIO), RCD0; \
bswapq RCD0; \
rolq $32, RCD0; \
xorq key_table(CTX, max, 8), RAB0; \
\
movq 8*2(RIO), RAB1; \
bswapq RAB1; \
rorq $32, RAB1; \
movq 12*2(RIO), RCD1; \
bswapq RCD1; \
rolq $32, RCD1; \
xorq key_table(CTX, max, 8), RAB1;
#define dec_outunpack2() \
xorq key_table(CTX), RCD0; \
rolq $32, RCD0; \
bswapq RCD0; \
movq RCD0, (RIO); \
rorq $32, RAB0; \
bswapq RAB0; \
movq RAB0, 4*2(RIO); \
\
xorq key_table(CTX), RCD1; \
rolq $32, RCD1; \
bswapq RCD1; \
movq RCD1, 8*2(RIO); \
rorq $32, RAB1; \
bswapq RAB1; \
movq RAB1, 12*2(RIO);
ENTRY(__camellia_enc_blk_2way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool xor
*/
pushq %rbx;
movq %r12, RR12;
movq %rcx, RXOR;
movq %rsi, RDST;
movq %rdx, RIO;
enc_inpack2();
enc_rounds2(0);
enc_fls2(8);
enc_rounds2(8);
enc_fls2(16);
enc_rounds2(16);
movl $24, RT2d; /* max */
cmpb $16, key_length(CTX);
je .L__enc2_done;
enc_fls2(24);
enc_rounds2(24);
movl $32, RT2d; /* max */
.L__enc2_done:
test RXORbl, RXORbl;
movq RDST, RIO;
jnz .L__enc2_xor;
enc_outunpack2(mov, RT2);
movq RR12, %r12;
popq %rbx;
ret;
.L__enc2_xor:
enc_outunpack2(xor, RT2);
movq RR12, %r12;
popq %rbx;
ret;
ENDPROC(__camellia_enc_blk_2way)
ENTRY(camellia_dec_blk_2way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
cmpl $16, key_length(CTX);
movl $32, RT2d;
movl $24, RXORd;
cmovel RXORd, RT2d; /* max */
movq %rbx, RXOR;
movq %r12, RR12;
movq %rsi, RDST;
movq %rdx, RIO;
dec_inpack2(RT2);
cmpb $24, RT2bl;
je .L__dec2_rounds16;
dec_rounds2(24);
dec_fls2(24);
.L__dec2_rounds16:
dec_rounds2(16);
dec_fls2(16);
dec_rounds2(8);
dec_fls2(8);
dec_rounds2(0);
movq RDST, RIO;
dec_outunpack2();
movq RR12, %r12;
movq RXOR, %rbx;
ret;
ENDPROC(camellia_dec_blk_2way)