mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-07 23:13:34 +00:00
286 lines
9.5 KiB
C
286 lines
9.5 KiB
C
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:4;coding:utf-8 -*-│
|
|||
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|||
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|||
|
│ Copyright The Mbed TLS Contributors │
|
|||
|
│ │
|
|||
|
│ Licensed under the Apache License, Version 2.0 (the "License"); │
|
|||
|
│ you may not use this file except in compliance with the License. │
|
|||
|
│ You may obtain a copy of the License at │
|
|||
|
│ │
|
|||
|
│ http://www.apache.org/licenses/LICENSE-2.0 │
|
|||
|
│ │
|
|||
|
│ Unless required by applicable law or agreed to in writing, software │
|
|||
|
│ distributed under the License is distributed on an "AS IS" BASIS, │
|
|||
|
│ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. │
|
|||
|
│ See the License for the specific language governing permissions and │
|
|||
|
│ limitations under the License. │
|
|||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|||
|
#include "libc/bits/bits.h"
|
|||
|
#include "libc/log/backtrace.internal.h"
|
|||
|
#include "libc/log/check.h"
|
|||
|
#include "libc/macros.internal.h"
|
|||
|
#include "libc/mem/mem.h"
|
|||
|
#include "libc/nexgen32e/bsr.h"
|
|||
|
#include "libc/nexgen32e/x86feature.h"
|
|||
|
#include "third_party/mbedtls/bignum.h"
|
|||
|
#include "third_party/mbedtls/bignum_internal.h"
|
|||
|
#include "third_party/mbedtls/profile.h"
|
|||
|
/* clang-format off */
|
|||
|
|
|||
|
void Mul(uint64_t *c, uint64_t *A, unsigned n, uint64_t *B, unsigned m)
|
|||
|
{
|
|||
|
if (!m--) return;
|
|||
|
mbedtls_platform_zeroize(c, m * ciL);
|
|||
|
mbedtls_mpi_mul_hlp1(n, A, c + m, B[m]);
|
|||
|
for (; m > 0; m--)
|
|||
|
mbedtls_mpi_mul_hlp(n, A, c + m - 1, B[m - 1]);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Computes inner loop of multiplication algorithm.
|
|||
|
*/
|
|||
|
void mbedtls_mpi_mul_hlp1(size_t n, const uint64_t *s, uint64_t *d, uint64_t b)
|
|||
|
{
|
|||
|
size_t i;
|
|||
|
uint128_t x;
|
|||
|
uint64_t c, t;
|
|||
|
i = c = 0;
|
|||
|
#ifdef __x86_64__
|
|||
|
#define MULXADOX(i) \
|
|||
|
"mulx\t" #i "*8(%2),%%rax,%%r9\n\t" \
|
|||
|
"adox\t%0,%%rax\n\t" \
|
|||
|
"mov\t%%rax," #i "*8(%1)\n\t" \
|
|||
|
"mov\t%%r9,%0\n\t"
|
|||
|
if (X86_HAVE(BMI2) && X86_HAVE(ADX))
|
|||
|
{
|
|||
|
for (; n >= 8; n -= 8, s += 8, d += 8)
|
|||
|
{
|
|||
|
asm volatile("xor\t%%r8d,%%r8d\n\t" //
|
|||
|
MULXADOX(0) //
|
|||
|
MULXADOX(1) //
|
|||
|
MULXADOX(2) //
|
|||
|
MULXADOX(3) //
|
|||
|
MULXADOX(4) //
|
|||
|
MULXADOX(5) //
|
|||
|
MULXADOX(6) //
|
|||
|
MULXADOX(7) //
|
|||
|
"adcx\t%%r8,%0\n" //
|
|||
|
"adox\t%%r8,%0" //
|
|||
|
: "+r"(c)
|
|||
|
: "r"(d), "S"(s), "d"(b)
|
|||
|
: "rax", "r8", "r9", "memory", "cc");
|
|||
|
}
|
|||
|
for (; n >= 4; n -= 4, s += 4, d += 4)
|
|||
|
{
|
|||
|
asm volatile("xor\t%%r8d,%%r8d\n\t" //
|
|||
|
MULXADOX(0) //
|
|||
|
MULXADOX(1) //
|
|||
|
MULXADOX(2) //
|
|||
|
MULXADOX(3) //
|
|||
|
"adcx\t%%r8,%0\n" //
|
|||
|
"adox\t%%r8,%0" //
|
|||
|
: "+r"(c)
|
|||
|
: "r"(d), "S"(s), "d"(b)
|
|||
|
: "rax", "r8", "r9", "memory", "cc");
|
|||
|
}
|
|||
|
}
|
|||
|
#undef MULXADOX
|
|||
|
#endif
|
|||
|
for (; i < n; ++i)
|
|||
|
{
|
|||
|
x = s[i];
|
|||
|
x *= b;
|
|||
|
x += c;
|
|||
|
c = x >> 64;
|
|||
|
d[i] = x;
|
|||
|
}
|
|||
|
d[i] = c;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Computes inner loop of multiplication algorithm.
|
|||
|
*/
|
|||
|
void mbedtls_mpi_mul_hlp(size_t n, uint64_t *s, uint64_t *d, uint64_t b)
|
|||
|
{
|
|||
|
size_t i;
|
|||
|
uint128_t x;
|
|||
|
uint64_t c, l, h, t;
|
|||
|
i = c = 0;
|
|||
|
#ifdef __x86_64__
|
|||
|
#define MULADDC(i) \
|
|||
|
"mulx\t" #i "*8(%2),%%rax,%%r9\n\t" \
|
|||
|
"adcx\t" #i "*8(%1),%%rax\n\t" \
|
|||
|
"adox\t%0,%%rax\n\t" \
|
|||
|
"mov\t%%rax," #i "*8(%1)\n\t" \
|
|||
|
"mov\t%%r9,%0\n\t"
|
|||
|
if (X86_HAVE(BMI2) && X86_HAVE(ADX))
|
|||
|
{
|
|||
|
for (; n >= 8; n -= 8, s += 8, d += 8)
|
|||
|
{
|
|||
|
asm volatile("xor\t%%r8d,%%r8d\n\t" //
|
|||
|
MULADDC(0) //
|
|||
|
MULADDC(1) //
|
|||
|
MULADDC(2) //
|
|||
|
MULADDC(3) //
|
|||
|
MULADDC(4) //
|
|||
|
MULADDC(5) //
|
|||
|
MULADDC(6) //
|
|||
|
MULADDC(7) //
|
|||
|
"adcx\t%%r8,%0\n" //
|
|||
|
"adox\t%%r8,%0" //
|
|||
|
: "+r"(c)
|
|||
|
: "r"(d), "S"(s), "d"(b)
|
|||
|
: "rax", "r8", "r9", "memory", "cc");
|
|||
|
}
|
|||
|
|
|||
|
for (; n >= 4; n -= 4, s += 4, d += 4)
|
|||
|
{
|
|||
|
asm volatile("xor\t%%r8d,%%r8d\n\t" //
|
|||
|
MULADDC(0) //
|
|||
|
MULADDC(1) //
|
|||
|
MULADDC(2) //
|
|||
|
MULADDC(3) //
|
|||
|
"adcx\t%%r8,%0\n" //
|
|||
|
"adox\t%%r8,%0" //
|
|||
|
: "+r"(c)
|
|||
|
: "r"(d), "S"(s), "d"(b)
|
|||
|
: "rax", "r8", "r9", "memory", "cc");
|
|||
|
}
|
|||
|
}
|
|||
|
#undef MULADDC
|
|||
|
#endif
|
|||
|
for (; i < n; ++i)
|
|||
|
{
|
|||
|
x = s[i];
|
|||
|
x *= b;
|
|||
|
x += c;
|
|||
|
l = x;
|
|||
|
h = x >> 64;
|
|||
|
t = d[i];
|
|||
|
d[i] = t + l;
|
|||
|
c = (t + l < t) + h;
|
|||
|
}
|
|||
|
do
|
|||
|
{
|
|||
|
d[i] += c;
|
|||
|
} while ((c = d[i++] < c));
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Multiplies big number with unsigned scalar: X = A × b
|
|||
|
*
|
|||
|
* @param X receives result w/ aliasing permitted
|
|||
|
* @param A is left-hand side big number
|
|||
|
* @param B is left-hand side unsigned scalar
|
|||
|
* @return 0 on success or negative on error
|
|||
|
*/
|
|||
|
int mbedtls_mpi_mul_int(mbedtls_mpi *X, const mbedtls_mpi *A,
|
|||
|
mbedtls_mpi_uint b)
|
|||
|
{
|
|||
|
int r;
|
|||
|
size_t n;
|
|||
|
MPI_VALIDATE_RET(X);
|
|||
|
MPI_VALIDATE_RET(A);
|
|||
|
n = mbedtls_mpi_limbs(A);
|
|||
|
if ((r = mbedtls_mpi_grow(X, n + 1))) return r;
|
|||
|
mbedtls_mpi_mul_hlp1(n, A->p, X->p, b);
|
|||
|
X->s = A->s;
|
|||
|
X->n = n + 1;
|
|||
|
return 0;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Multiplies big numbers: X = A * B
|
|||
|
*
|
|||
|
* @param X is destination mpi
|
|||
|
* @param A is first factor
|
|||
|
* @param B is second factor
|
|||
|
* @return 0 on success or <0 on error
|
|||
|
*/
|
|||
|
int mbedtls_mpi_mul_mpi(mbedtls_mpi *X, const mbedtls_mpi *A,
|
|||
|
const mbedtls_mpi *B)
|
|||
|
{
|
|||
|
int i, j, t, k, ret;
|
|||
|
mbedtls_mpi_uint *K;
|
|||
|
mbedtls_mpi TA, TB, *T;
|
|||
|
MPI_VALIDATE_RET(X);
|
|||
|
MPI_VALIDATE_RET(A);
|
|||
|
MPI_VALIDATE_RET(B);
|
|||
|
|
|||
|
i = mbedtls_mpi_limbs(A);
|
|||
|
j = mbedtls_mpi_limbs(B);
|
|||
|
|
|||
|
if (!i || !j)
|
|||
|
return mbedtls_mpi_lset(X, 0);
|
|||
|
|
|||
|
if( j > i )
|
|||
|
T = A,
|
|||
|
A = B,
|
|||
|
B = T,
|
|||
|
t = i,
|
|||
|
i = j,
|
|||
|
j = t;
|
|||
|
|
|||
|
if (!IsTiny() && j == 1) {
|
|||
|
if (X->n < i + 1)
|
|||
|
if ((ret = mbedtls_mpi_grow(X, i + 1))) return ret;
|
|||
|
else if (X->n > i + 1)
|
|||
|
mbedtls_platform_zeroize(X->p + i + 1, (X->n - (i + 1)) * ciL);
|
|||
|
mbedtls_mpi_mul_hlp1(i, A->p, X->p, B->p[0]);
|
|||
|
X->s = A->s * B->s;
|
|||
|
return 0;
|
|||
|
}
|
|||
|
|
|||
|
if (!IsTiny() && i == j) {
|
|||
|
if (X->n < i * 2)
|
|||
|
if ((ret = mbedtls_mpi_grow(X, i * 2))) return ret;
|
|||
|
else if (X->n > i * 2)
|
|||
|
mbedtls_platform_zeroize(X->p + i * 2, (X->n - (i * 2)) * ciL);
|
|||
|
if (i == 4) {
|
|||
|
Mul4x4(X->p, A->p, B->p);
|
|||
|
X->s = A->s * B->s;
|
|||
|
return 0;
|
|||
|
} else if (i == 6 && X86_HAVE(BMI2) && X86_HAVE(ADX)) {
|
|||
|
Mul6x6Adx(X->p, A->p, B->p);
|
|||
|
X->s = A->s * B->s;
|
|||
|
return 0;
|
|||
|
} else if (i == 8 && X86_HAVE(BMI2) && X86_HAVE(ADX)) {
|
|||
|
Mul8x8Adx(X->p, A->p, B->p);
|
|||
|
X->s = A->s * B->s;
|
|||
|
return 0;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
mbedtls_mpi_init( &TA );
|
|||
|
mbedtls_mpi_init( &TB );
|
|||
|
|
|||
|
if (X->n < i + j)
|
|||
|
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + j ) );
|
|||
|
else if (X->n > i + j)
|
|||
|
mbedtls_platform_zeroize( X->p + i + j, (X->n - (i + j)) * ciL );
|
|||
|
if (X == A) {
|
|||
|
MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) );
|
|||
|
A = &TA;
|
|||
|
}
|
|||
|
if (X == B) {
|
|||
|
MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) );
|
|||
|
B = &TB;
|
|||
|
}
|
|||
|
if (!IsTiny() &&
|
|||
|
i >= 16 && i == j && !(i & (i - 1)) &&
|
|||
|
X86_HAVE(BMI2) && X86_HAVE(ADX) &&
|
|||
|
(K = malloc(i * 4 * sizeof(*K)))) {
|
|||
|
Karatsuba(X->p, A->p, B->p, i, K);
|
|||
|
free(K);
|
|||
|
} else {
|
|||
|
Mul(X->p, A->p, i, B->p, j);
|
|||
|
}
|
|||
|
|
|||
|
X->s = A->s * B->s;
|
|||
|
ret = 0;
|
|||
|
cleanup:
|
|||
|
mbedtls_mpi_free(&TB);
|
|||
|
mbedtls_mpi_free(&TA);
|
|||
|
return ret;
|
|||
|
}
|