Make sha1 / sha256 / sha512 go faster

This commit is contained in:
Justine Tunney 2021-06-26 00:11:12 -07:00
parent 5144c22189
commit 2d79ab6c15
14 changed files with 2299 additions and 93 deletions

View file

@ -119,9 +119,9 @@
#define MBEDTLS_MD5_SMALLER
#define MBEDTLS_SHA1_SMALLER
#ifdef TINY
#define MBEDTLS_SHA256_SMALLER
#define MBEDTLS_SHA512_SMALLER
#ifdef TINY
#define MBEDTLS_AES_ROM_TABLES
#define MBEDTLS_AES_FEWER_TABLES
#else

View file

@ -96,8 +96,7 @@ static int gcm_gen_table( mbedtls_gcm_context *ctx )
#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64)
/* With CLMUL support, we need only h, not the rest of the table */
if( X86_HAVE( PCLMUL ) )
return( 0 );
if (X86_HAVE(AES) && X86_HAVE(PCLMUL)) return 0;
#endif
/* 0 corresponds to 0 in GF(2^128) */
@ -191,7 +190,7 @@ static void gcm_mult( mbedtls_gcm_context *ctx, const unsigned char x[16],
uint64_t zh, zl;
#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64)
if( X86_HAVE( PCLMUL ) ) {
if (X86_HAVE(AES) && X86_HAVE(PCLMUL)) {
unsigned char h[16];
PUT_UINT32_BE( ctx->HH[8] >> 32, h, 0 );
@ -240,11 +239,11 @@ static void gcm_mult( mbedtls_gcm_context *ctx, const unsigned char x[16],
}
int mbedtls_gcm_starts( mbedtls_gcm_context *ctx,
int mode,
const unsigned char *iv,
size_t iv_len,
const unsigned char *add,
size_t add_len )
int mode,
const unsigned char *iv,
size_t iv_len,
const unsigned char *add,
size_t add_len )
{
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
unsigned char work_buf[16];
@ -327,9 +326,9 @@ int mbedtls_gcm_starts( mbedtls_gcm_context *ctx,
}
int mbedtls_gcm_update( mbedtls_gcm_context *ctx,
size_t length,
const unsigned char *input,
unsigned char *output )
size_t length,
const unsigned char *input,
unsigned char *output )
{
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
unsigned char ectr[16];
@ -390,8 +389,8 @@ int mbedtls_gcm_update( mbedtls_gcm_context *ctx,
}
int mbedtls_gcm_finish( mbedtls_gcm_context *ctx,
unsigned char *tag,
size_t tag_len )
unsigned char *tag,
size_t tag_len )
{
unsigned char work_buf[16];
size_t i;
@ -431,16 +430,16 @@ int mbedtls_gcm_finish( mbedtls_gcm_context *ctx,
}
int mbedtls_gcm_crypt_and_tag( mbedtls_gcm_context *ctx,
int mode,
size_t length,
const unsigned char *iv,
size_t iv_len,
const unsigned char *add,
size_t add_len,
const unsigned char *input,
unsigned char *output,
size_t tag_len,
unsigned char *tag )
int mode,
size_t length,
const unsigned char *iv,
size_t iv_len,
const unsigned char *add,
size_t add_len,
const unsigned char *input,
unsigned char *output,
size_t tag_len,
unsigned char *tag )
{
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
@ -464,15 +463,15 @@ int mbedtls_gcm_crypt_and_tag( mbedtls_gcm_context *ctx,
}
int mbedtls_gcm_auth_decrypt( mbedtls_gcm_context *ctx,
size_t length,
const unsigned char *iv,
size_t iv_len,
const unsigned char *add,
size_t add_len,
const unsigned char *tag,
size_t tag_len,
const unsigned char *input,
unsigned char *output )
size_t length,
const unsigned char *iv,
size_t iv_len,
const unsigned char *add,
size_t add_len,
const unsigned char *tag,
size_t tag_len,
const unsigned char *input,
unsigned char *output )
{
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
unsigned char check_tag[16];

View file

@ -1,4 +1,6 @@
#include "libc/bits/bits.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
#include "third_party/mbedtls/common.h"
#include "third_party/mbedtls/endian.h"
@ -37,6 +39,8 @@ asm(".include \"libc/disclaimer.inc\"");
* http://www.itl.nist.gov/fipspubs/fip180-1.htm
*/
void sha1_transform_avx2(mbedtls_sha1_context *, const uint8_t *, int);
#define SHA1_VALIDATE_RET(cond) \
MBEDTLS_INTERNAL_VALIDATE_RET( cond, MBEDTLS_ERR_SHA1_BAD_INPUT_DATA )
@ -145,6 +149,11 @@ int mbedtls_internal_sha1_process( mbedtls_sha1_context *ctx,
SHA1_VALIDATE_RET( ctx != NULL );
SHA1_VALIDATE_RET( (const unsigned char *)data != NULL );
if (!IsTiny() && X86_HAVE(AVX2) && X86_HAVE(BMI) && X86_HAVE(BMI2)) {
sha1_transform_avx2(ctx, data, 1);
return 0;
}
#ifdef MBEDTLS_SHA1_SMALLER
#define ROL(a, b) ((a << b) | (a >> (32 - b)))
@ -387,8 +396,8 @@ int mbedtls_sha1_update_ret( mbedtls_sha1_context *ctx,
size_t ilen )
{
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
size_t fill;
uint32_t left;
size_t n, fill;
SHA1_VALIDATE_RET( ctx != NULL );
SHA1_VALIDATE_RET( ilen == 0 || input != NULL );
@ -417,6 +426,12 @@ int mbedtls_sha1_update_ret( mbedtls_sha1_context *ctx,
left = 0;
}
if (!IsTiny() && ilen >= 64 && X86_HAVE(AVX2) && X86_HAVE(BMI) && X86_HAVE(BMI2)) {
sha1_transform_avx2(ctx, input, ilen / 64);
input += ROUNDDOWN(ilen, 64);
ilen -= ROUNDDOWN(ilen, 64);
}
while( ilen >= 64 )
{
if( ( ret = mbedtls_internal_sha1_process( ctx, input ) ) != 0 )

View file

@ -18,8 +18,8 @@ COSMOPOLITAN_C_START_
*/
typedef struct mbedtls_sha1_context
{
uint32_t total[2]; /*!< The number of Bytes processed. */
uint32_t state[5]; /*!< The intermediate digest state. */
uint32_t total[2]; /*!< The number of Bytes processed. */
uint8_t buffer[64]; /*!< The data block being processed. */
}
mbedtls_sha1_context;

View file

@ -1,3 +1,6 @@
#include "libc/dce.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
#include "third_party/mbedtls/common.h"
#include "third_party/mbedtls/endian.h"
@ -40,6 +43,8 @@ asm(".include \"libc/disclaimer.inc\"");
MBEDTLS_INTERNAL_VALIDATE_RET( cond, MBEDTLS_ERR_SHA256_BAD_INPUT_DATA )
#define SHA256_VALIDATE(cond) MBEDTLS_INTERNAL_VALIDATE( cond )
void sha256_transform_rorx(mbedtls_sha256_context *, const uint8_t *, int);
#if !defined(MBEDTLS_SHA256_ALT)
void mbedtls_sha256_init( mbedtls_sha256_context *ctx )
@ -151,7 +156,7 @@ static const uint32_t K[] =
} while( 0 )
int mbedtls_internal_sha256_process( mbedtls_sha256_context *ctx,
const unsigned char data[64] )
const unsigned char data[64] )
{
struct
{
@ -164,20 +169,22 @@ int mbedtls_internal_sha256_process( mbedtls_sha256_context *ctx,
SHA256_VALIDATE_RET( ctx != NULL );
SHA256_VALIDATE_RET( (const unsigned char *)data != NULL );
if (!IsTiny() && X86_HAVE(AVX2) && X86_HAVE(BMI2)) {
sha256_transform_rorx(ctx, data, 1);
return 0;
}
for( i = 0; i < 8; i++ )
local.A[i] = ctx->state[i];
#if defined(MBEDTLS_SHA256_SMALLER)
for( i = 0; i < 64; i++ )
{
for( i = 0; i < 64; i++ ) {
if( i < 16 )
GET_UINT32_BE( local.W[i], data, 4 * i );
else
R( i );
P( local.A[0], local.A[1], local.A[2], local.A[3], local.A[4],
local.A[5], local.A[6], local.A[7], local.W[i], K[i] );
local.temp1 = local.A[7]; local.A[7] = local.A[6];
local.A[6] = local.A[5]; local.A[5] = local.A[4];
local.A[4] = local.A[3]; local.A[3] = local.A[2];
@ -187,9 +194,7 @@ int mbedtls_internal_sha256_process( mbedtls_sha256_context *ctx,
#else /* MBEDTLS_SHA256_SMALLER */
for( i = 0; i < 16; i++ )
GET_UINT32_BE( local.W[i], data, 4 * i );
for( i = 0; i < 16; i += 8 )
{
for( i = 0; i < 16; i += 8 ) {
P( local.A[0], local.A[1], local.A[2], local.A[3], local.A[4],
local.A[5], local.A[6], local.A[7], local.W[i+0], K[i+0] );
P( local.A[7], local.A[0], local.A[1], local.A[2], local.A[3],
@ -207,9 +212,7 @@ int mbedtls_internal_sha256_process( mbedtls_sha256_context *ctx,
P( local.A[1], local.A[2], local.A[3], local.A[4], local.A[5],
local.A[6], local.A[7], local.A[0], local.W[i+7], K[i+7] );
}
for( i = 16; i < 64; i += 8 )
{
for( i = 16; i < 64; i += 8 ) {
P( local.A[0], local.A[1], local.A[2], local.A[3], local.A[4],
local.A[5], local.A[6], local.A[7], R(i+0), K[i+0] );
P( local.A[7], local.A[0], local.A[1], local.A[2], local.A[3],
@ -278,6 +281,12 @@ int mbedtls_sha256_update_ret( mbedtls_sha256_context *ctx,
left = 0;
}
if (!IsTiny() && ilen >= 64 && X86_HAVE(AVX2) && X86_HAVE(BMI2)) {
sha256_transform_rorx(ctx, input, ilen / 64);
input += ROUNDDOWN(ilen, 64);
ilen -= ROUNDDOWN(ilen, 64);
}
while( ilen >= 64 )
{
if( ( ret = mbedtls_internal_sha256_process( ctx, input ) ) != 0 )

View file

@ -16,8 +16,8 @@ COSMOPOLITAN_C_START_
*/
typedef struct mbedtls_sha256_context
{
uint32_t total[2]; /*!< The number of Bytes processed. */
uint32_t state[8]; /*!< The intermediate digest state. */
uint32_t total[2]; /*!< The number of Bytes processed. */
unsigned char buffer[64]; /*!< The data block being processed. */
int is224; /*!< Determines which function to use:
0: Use SHA-256, or 1: Use SHA-224. */

View file

@ -1,4 +1,6 @@
#include "libc/literal.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
#include "third_party/mbedtls/common.h"
#include "third_party/mbedtls/endian.h"
@ -37,6 +39,8 @@ asm(".include \"libc/disclaimer.inc\"");
* http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
*/
void sha512_transform_rorx(mbedtls_sha512_context *, const uint8_t *, int);
#if defined(MBEDTLS_SHA512_C)
#define SHA512_VALIDATE_RET(cond) \
@ -224,12 +228,16 @@ int mbedtls_internal_sha512_process( mbedtls_sha512_context *ctx,
SHA512_VALIDATE_RET( ctx != NULL );
SHA512_VALIDATE_RET( (const unsigned char *)data != NULL );
if (!IsTiny() && X86_HAVE(AVX2)) {
sha512_transform_rorx(ctx, data, 1);
return 0;
}
#define SHR(x,n) ((x) >> (n))
#define ROTR(x,n) (SHR((x),(n)) | ((x) << (64 - (n))))
#define S0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x, 7))
#define S1(x) (ROTR(x,19) ^ ROTR(x,61) ^ SHR(x, 6))
#define S2(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39))
#define S3(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41))
@ -263,10 +271,14 @@ int mbedtls_internal_sha512_process( mbedtls_sha512_context *ctx,
P( local.A[0], local.A[1], local.A[2], local.A[3], local.A[4],
local.A[5], local.A[6], local.A[7], local.W[i], K[i] );
local.temp1 = local.A[7]; local.A[7] = local.A[6];
local.A[6] = local.A[5]; local.A[5] = local.A[4];
local.A[4] = local.A[3]; local.A[3] = local.A[2];
local.A[2] = local.A[1]; local.A[1] = local.A[0];
local.temp1 = local.A[7];
local.A[7] = local.A[6];
local.A[6] = local.A[5];
local.A[5] = local.A[4];
local.A[4] = local.A[3];
local.A[3] = local.A[2];
local.A[2] = local.A[1];
local.A[1] = local.A[0];
local.A[0] = local.temp1;
}
#else /* MBEDTLS_SHA512_SMALLER */
@ -362,6 +374,12 @@ int mbedtls_sha512_update_ret( mbedtls_sha512_context *ctx,
left = 0;
}
if (!IsTiny() && ilen >= 128 && X86_HAVE(AVX2)) {
sha512_transform_rorx(ctx, input, ilen / 128);
input += ROUNDDOWN(ilen, 128);
ilen -= ROUNDDOWN(ilen, 128);
}
while( ilen >= 128 )
{
if( ( ret = mbedtls_internal_sha512_process( ctx, input ) ) != 0 )

View file

@ -16,8 +16,8 @@ COSMOPOLITAN_C_START_
*/
typedef struct mbedtls_sha512_context
{
uint64_t total[2]; /*!< The number of Bytes processed. */
uint64_t state[8]; /*!< The intermediate digest state. */
uint64_t total[2]; /*!< The number of Bytes processed. */
unsigned char buffer[128]; /*!< The data block being processed. */
#if !defined(MBEDTLS_SHA512_NO_SHA384)
int is384; /*!< Determines which function to use:

View file

@ -1021,17 +1021,15 @@ int execute_tests(int argc, const char **argv, const char *default_filename) {
if (unmet_dep_count > 0 || ret == DISPATCH_UNSUPPORTED_SUITE) {
total_skipped++;
WRITE("----");
if (1 == option_verbose && ret == DISPATCH_UNSUPPORTED_SUITE) {
WRITE("\n Test Suite not enabled");
}
if (1 == option_verbose && unmet_dep_count > 0) {
WRITE("\n Unmet dependencies: ");
WRITE(" (unmet dependencies: ");
for (i = 0; i < unmet_dep_count; i++) {
WRITE("%d ", unmet_dependencies[i]);
if (i) WRITE(",");
WRITE("%d", unmet_dependencies[i]);
}
if (missing_unmet_dependencies) WRITE("...");
}
WRITE("\n");
WRITE(")\n");
fflush(stdout);
unmet_dep_count = 0;
missing_unmet_dependencies = 0;