2021-07-19 21:55:20 +00:00
|
|
|
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:4;coding:utf-8 -*-│
|
|
|
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
|
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
|
|
|
│ Copyright The Mbed TLS Contributors │
|
|
|
|
|
│ │
|
|
|
|
|
│ Licensed under the Apache License, Version 2.0 (the "License"); │
|
|
|
|
|
│ you may not use this file except in compliance with the License. │
|
|
|
|
|
│ You may obtain a copy of the License at │
|
|
|
|
|
│ │
|
|
|
|
|
│ http://www.apache.org/licenses/LICENSE-2.0 │
|
|
|
|
|
│ │
|
|
|
|
|
│ Unless required by applicable law or agreed to in writing, software │
|
|
|
|
|
│ distributed under the License is distributed on an "AS IS" BASIS, │
|
|
|
|
|
│ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. │
|
|
|
|
|
│ See the License for the specific language governing permissions and │
|
|
|
|
|
│ limitations under the License. │
|
|
|
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
|
|
|
#include "libc/bits/bits.h"
|
|
|
|
|
#include "libc/log/backtrace.internal.h"
|
|
|
|
|
#include "libc/log/check.h"
|
|
|
|
|
#include "libc/macros.internal.h"
|
|
|
|
|
#include "libc/mem/mem.h"
|
|
|
|
|
#include "libc/nexgen32e/bsr.h"
|
|
|
|
|
#include "libc/nexgen32e/x86feature.h"
|
|
|
|
|
#include "third_party/mbedtls/bignum.h"
|
|
|
|
|
#include "third_party/mbedtls/bignum_internal.h"
|
|
|
|
|
#include "third_party/mbedtls/profile.h"
|
|
|
|
|
/* clang-format off */
|
|
|
|
|
|
|
|
|
|
void Mul(uint64_t *c, uint64_t *A, unsigned n, uint64_t *B, unsigned m)
|
|
|
|
|
{
|
|
|
|
|
if (!m--) return;
|
|
|
|
|
mbedtls_platform_zeroize(c, m * ciL);
|
|
|
|
|
mbedtls_mpi_mul_hlp1(n, A, c + m, B[m]);
|
|
|
|
|
for (; m > 0; m--)
|
|
|
|
|
mbedtls_mpi_mul_hlp(n, A, c + m - 1, B[m - 1]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Computes inner loop of multiplication algorithm.
|
|
|
|
|
*/
|
|
|
|
|
void mbedtls_mpi_mul_hlp1(size_t n, const uint64_t *s, uint64_t *d, uint64_t b)
|
|
|
|
|
{
|
|
|
|
|
size_t i;
|
|
|
|
|
uint128_t x;
|
2021-08-04 05:42:17 +00:00
|
|
|
|
uint64_t c, t, t1, t2;
|
2021-07-19 21:55:20 +00:00
|
|
|
|
i = c = 0;
|
2021-08-05 21:43:53 +00:00
|
|
|
|
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
|
2021-08-04 05:42:17 +00:00
|
|
|
|
if( X86_HAVE(BMI2) )
|
2021-07-19 21:55:20 +00:00
|
|
|
|
{
|
2021-08-04 05:42:17 +00:00
|
|
|
|
for( ; i + 8 <= n; i += 8 )
|
2021-07-19 21:55:20 +00:00
|
|
|
|
{
|
2021-08-04 05:42:17 +00:00
|
|
|
|
asm volatile("mulx\t(%2),%%rax,%%rbx\n\t"
|
|
|
|
|
"add\t%0,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,(%1)\n\t"
|
|
|
|
|
"mulx\t8(%2),%%rax,%0\n\t"
|
|
|
|
|
"adc\t%%rbx,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,8(%1)\n\t"
|
|
|
|
|
"mulx\t16(%2),%%rax,%%rbx\n\t"
|
|
|
|
|
"adc\t%0,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,16(%1)\n\t"
|
|
|
|
|
"mulx\t24(%2),%%rax,%0\n\t"
|
|
|
|
|
"adc\t%%rbx,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,24(%1)\n\t"
|
|
|
|
|
"mulx\t32(%2),%%rax,%%rbx\n\t"
|
|
|
|
|
"adc\t%0,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,32(%1)\n\t"
|
|
|
|
|
"mulx\t40(%2),%%rax,%0\n\t"
|
|
|
|
|
"adc\t%%rbx,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,40(%1)\n\t"
|
|
|
|
|
"mulx\t48(%2),%%rax,%%rbx\n\t"
|
|
|
|
|
"adc\t%0,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,48(%1)\n\t"
|
|
|
|
|
"mulx\t56(%2),%%rax,%0\n\t"
|
|
|
|
|
"adc\t%%rbx,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,56(%1)\n\t"
|
|
|
|
|
"adc\t$0,%0"
|
2021-07-19 21:55:20 +00:00
|
|
|
|
: "+r"(c)
|
2021-08-04 05:42:17 +00:00
|
|
|
|
: "r"(d + i), "r"(s + i), "d"(b)
|
|
|
|
|
: "rax", "rbx", "memory", "cc");
|
2021-07-19 21:55:20 +00:00
|
|
|
|
}
|
2021-08-04 05:42:17 +00:00
|
|
|
|
for( ; i + 4 <= n; i += 4 )
|
2021-07-19 21:55:20 +00:00
|
|
|
|
{
|
2021-08-04 05:42:17 +00:00
|
|
|
|
asm volatile("mulx\t(%2),%%rax,%%rbx\n\t"
|
|
|
|
|
"add\t%0,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,(%1)\n\t"
|
|
|
|
|
"mulx\t8(%2),%%rax,%0\n\t"
|
|
|
|
|
"adc\t%%rbx,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,8(%1)\n\t"
|
|
|
|
|
"mulx\t16(%2),%%rax,%%rbx\n\t"
|
|
|
|
|
"adc\t%0,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,16(%1)\n\t"
|
|
|
|
|
"mulx\t24(%2),%%rax,%0\n\t"
|
|
|
|
|
"adc\t%%rbx,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,24(%1)\n\t"
|
|
|
|
|
"adc\t$0,%0"
|
2021-07-19 21:55:20 +00:00
|
|
|
|
: "+r"(c)
|
2021-08-04 05:42:17 +00:00
|
|
|
|
: "r"(d + i), "r"(s + i), "d"(b)
|
|
|
|
|
: "rax", "rbx", "memory", "cc");
|
2021-07-19 21:55:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2021-08-04 05:42:17 +00:00
|
|
|
|
for( ; i < n; ++i )
|
2021-07-19 21:55:20 +00:00
|
|
|
|
{
|
|
|
|
|
x = s[i];
|
|
|
|
|
x *= b;
|
|
|
|
|
x += c;
|
|
|
|
|
c = x >> 64;
|
|
|
|
|
d[i] = x;
|
|
|
|
|
}
|
|
|
|
|
d[i] = c;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Computes inner loop of multiplication algorithm.
|
|
|
|
|
*/
|
|
|
|
|
void mbedtls_mpi_mul_hlp(size_t n, uint64_t *s, uint64_t *d, uint64_t b)
|
|
|
|
|
{
|
|
|
|
|
size_t i;
|
|
|
|
|
uint128_t x;
|
|
|
|
|
uint64_t c, l, h, t;
|
|
|
|
|
i = c = 0;
|
2021-08-05 21:43:53 +00:00
|
|
|
|
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
|
2021-07-19 21:55:20 +00:00
|
|
|
|
if (X86_HAVE(BMI2) && X86_HAVE(ADX))
|
|
|
|
|
{
|
2021-08-04 05:42:17 +00:00
|
|
|
|
for( ; i + 8 <= n; i += 8 )
|
2021-07-19 21:55:20 +00:00
|
|
|
|
{
|
2021-08-04 05:42:17 +00:00
|
|
|
|
asm volatile("xor\t%%r8d,%%r8d\n\t"
|
|
|
|
|
"mulx\t(%2),%%rax,%%rbx\n\t"
|
|
|
|
|
"adcx\t(%1),%%rax\n\t"
|
|
|
|
|
"adox\t%0,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,(%1)\n\t"
|
|
|
|
|
"mulx\t8(%2),%%rax,%0\n\t"
|
|
|
|
|
"adcx\t8(%1),%%rax\n\t"
|
|
|
|
|
"adox\t%%rbx,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,8(%1)\n\t"
|
|
|
|
|
"mulx\t16(%2),%%rax,%%rbx\n\t"
|
|
|
|
|
"adcx\t16(%1),%%rax\n\t"
|
|
|
|
|
"adox\t%0,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,16(%1)\n\t"
|
|
|
|
|
"mulx\t24(%2),%%rax,%0\n\t"
|
|
|
|
|
"adcx\t24(%1),%%rax\n\t"
|
|
|
|
|
"adox\t%%rbx,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,24(%1)\n\t"
|
|
|
|
|
"mulx\t32(%2),%%rax,%%rbx\n\t"
|
|
|
|
|
"adcx\t32(%1),%%rax\n\t"
|
|
|
|
|
"adox\t%0,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,32(%1)\n\t"
|
|
|
|
|
"mulx\t40(%2),%%rax,%0\n\t"
|
|
|
|
|
"adcx\t40(%1),%%rax\n\t"
|
|
|
|
|
"adox\t%%rbx,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,40(%1)\n\t"
|
|
|
|
|
"mulx\t48(%2),%%rax,%%rbx\n\t"
|
|
|
|
|
"adcx\t48(%1),%%rax\n\t"
|
|
|
|
|
"adox\t%0,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,48(%1)\n\t"
|
|
|
|
|
"mulx\t56(%2),%%rax,%0\n\t"
|
|
|
|
|
"adcx\t56(%1),%%rax\n\t"
|
|
|
|
|
"adox\t%%rbx,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,56(%1)\n\t"
|
|
|
|
|
"adcx\t%%r8,%0\n\t"
|
|
|
|
|
"adox\t%%r8,%0"
|
2021-07-19 21:55:20 +00:00
|
|
|
|
: "+r"(c)
|
2021-08-04 05:42:17 +00:00
|
|
|
|
: "r"(d + i), "r"(s + i), "d"(b)
|
|
|
|
|
: "rax", "rbx", "r8", "memory", "cc");
|
2021-07-19 21:55:20 +00:00
|
|
|
|
}
|
2021-08-04 05:42:17 +00:00
|
|
|
|
for( ; i + 4 <= n; i += 4 )
|
2021-07-19 21:55:20 +00:00
|
|
|
|
{
|
2021-08-04 05:42:17 +00:00
|
|
|
|
asm volatile("xor\t%%r8d,%%r8d\n\t"
|
|
|
|
|
"mulx\t(%2),%%rax,%%rbx\n\t"
|
|
|
|
|
"adcx\t(%1),%%rax\n\t"
|
|
|
|
|
"adox\t%0,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,(%1)\n\t"
|
|
|
|
|
"mulx\t8(%2),%%rax,%0\n\t"
|
|
|
|
|
"adcx\t8(%1),%%rax\n\t"
|
|
|
|
|
"adox\t%%rbx,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,8(%1)\n\t"
|
|
|
|
|
"mulx\t16(%2),%%rax,%%rbx\n\t"
|
|
|
|
|
"adcx\t16(%1),%%rax\n\t"
|
|
|
|
|
"adox\t%0,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,16(%1)\n\t"
|
|
|
|
|
"mulx\t24(%2),%%rax,%0\n\t"
|
|
|
|
|
"adcx\t24(%1),%%rax\n\t"
|
|
|
|
|
"adox\t%%rbx,%%rax\n\t"
|
|
|
|
|
"mov\t%%rax,24(%1)\n\t"
|
|
|
|
|
"adcx\t%%r8,%0\n\t"
|
|
|
|
|
"adox\t%%r8,%0"
|
2021-07-19 21:55:20 +00:00
|
|
|
|
: "+r"(c)
|
2021-08-04 05:42:17 +00:00
|
|
|
|
: "r"(d + i), "r"(s + i), "d"(b)
|
|
|
|
|
: "rax", "rbx", "r8", "memory", "cc");
|
2021-07-19 21:55:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2021-08-04 05:42:17 +00:00
|
|
|
|
for( ; i < n; ++i )
|
2021-07-19 21:55:20 +00:00
|
|
|
|
{
|
|
|
|
|
x = s[i];
|
|
|
|
|
x *= b;
|
|
|
|
|
x += c;
|
|
|
|
|
l = x;
|
|
|
|
|
h = x >> 64;
|
|
|
|
|
t = d[i];
|
|
|
|
|
d[i] = t + l;
|
|
|
|
|
c = (t + l < t) + h;
|
|
|
|
|
}
|
|
|
|
|
do
|
|
|
|
|
{
|
|
|
|
|
d[i] += c;
|
|
|
|
|
} while ((c = d[i++] < c));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Multiplies big number with unsigned scalar: X = A × b
|
|
|
|
|
*
|
|
|
|
|
* @param X receives result w/ aliasing permitted
|
|
|
|
|
* @param A is left-hand side big number
|
|
|
|
|
* @param B is left-hand side unsigned scalar
|
|
|
|
|
* @return 0 on success or negative on error
|
|
|
|
|
*/
|
|
|
|
|
int mbedtls_mpi_mul_int(mbedtls_mpi *X, const mbedtls_mpi *A,
|
|
|
|
|
mbedtls_mpi_uint b)
|
|
|
|
|
{
|
|
|
|
|
int r;
|
|
|
|
|
size_t n;
|
|
|
|
|
MPI_VALIDATE_RET(X);
|
|
|
|
|
MPI_VALIDATE_RET(A);
|
|
|
|
|
n = mbedtls_mpi_limbs(A);
|
|
|
|
|
if ((r = mbedtls_mpi_grow(X, n + 1))) return r;
|
|
|
|
|
mbedtls_mpi_mul_hlp1(n, A->p, X->p, b);
|
|
|
|
|
X->s = A->s;
|
|
|
|
|
X->n = n + 1;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Multiplies big numbers: X = A * B
|
|
|
|
|
*
|
|
|
|
|
* @param X is destination mpi
|
|
|
|
|
* @param A is first factor
|
|
|
|
|
* @param B is second factor
|
|
|
|
|
* @return 0 on success or <0 on error
|
|
|
|
|
*/
|
|
|
|
|
int mbedtls_mpi_mul_mpi(mbedtls_mpi *X, const mbedtls_mpi *A,
|
|
|
|
|
const mbedtls_mpi *B)
|
|
|
|
|
{
|
|
|
|
|
int i, j, t, k, ret;
|
|
|
|
|
mbedtls_mpi_uint *K;
|
|
|
|
|
mbedtls_mpi TA, TB, *T;
|
|
|
|
|
MPI_VALIDATE_RET(X);
|
|
|
|
|
MPI_VALIDATE_RET(A);
|
|
|
|
|
MPI_VALIDATE_RET(B);
|
|
|
|
|
|
|
|
|
|
i = mbedtls_mpi_limbs(A);
|
|
|
|
|
j = mbedtls_mpi_limbs(B);
|
|
|
|
|
|
|
|
|
|
if (!i || !j)
|
|
|
|
|
return mbedtls_mpi_lset(X, 0);
|
|
|
|
|
|
|
|
|
|
if( j > i )
|
|
|
|
|
T = A,
|
|
|
|
|
A = B,
|
|
|
|
|
B = T,
|
|
|
|
|
t = i,
|
|
|
|
|
i = j,
|
|
|
|
|
j = t;
|
|
|
|
|
|
|
|
|
|
if (!IsTiny() && j == 1) {
|
2021-08-04 05:42:17 +00:00
|
|
|
|
if (X->n < i + 1) {
|
2021-07-19 21:55:20 +00:00
|
|
|
|
if ((ret = mbedtls_mpi_grow(X, i + 1))) return ret;
|
2021-08-04 05:42:17 +00:00
|
|
|
|
} else if (X->n > i + 1) {
|
2021-07-19 21:55:20 +00:00
|
|
|
|
mbedtls_platform_zeroize(X->p + i + 1, (X->n - (i + 1)) * ciL);
|
2021-08-04 05:42:17 +00:00
|
|
|
|
}
|
2021-07-19 21:55:20 +00:00
|
|
|
|
mbedtls_mpi_mul_hlp1(i, A->p, X->p, B->p[0]);
|
|
|
|
|
X->s = A->s * B->s;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!IsTiny() && i == j) {
|
2021-08-04 05:42:17 +00:00
|
|
|
|
if (X->n < i * 2) {
|
2021-07-19 21:55:20 +00:00
|
|
|
|
if ((ret = mbedtls_mpi_grow(X, i * 2))) return ret;
|
2021-08-04 05:42:17 +00:00
|
|
|
|
} else if (X->n > i * 2) {
|
2021-07-19 21:55:20 +00:00
|
|
|
|
mbedtls_platform_zeroize(X->p + i * 2, (X->n - (i * 2)) * ciL);
|
2021-08-04 05:42:17 +00:00
|
|
|
|
}
|
2021-07-19 21:55:20 +00:00
|
|
|
|
if (i == 4) {
|
|
|
|
|
Mul4x4(X->p, A->p, B->p);
|
|
|
|
|
X->s = A->s * B->s;
|
|
|
|
|
return 0;
|
|
|
|
|
} else if (i == 6 && X86_HAVE(BMI2) && X86_HAVE(ADX)) {
|
|
|
|
|
Mul6x6Adx(X->p, A->p, B->p);
|
|
|
|
|
X->s = A->s * B->s;
|
|
|
|
|
return 0;
|
|
|
|
|
} else if (i == 8 && X86_HAVE(BMI2) && X86_HAVE(ADX)) {
|
|
|
|
|
Mul8x8Adx(X->p, A->p, B->p);
|
|
|
|
|
X->s = A->s * B->s;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mbedtls_mpi_init( &TA );
|
|
|
|
|
mbedtls_mpi_init( &TB );
|
|
|
|
|
|
|
|
|
|
if (X->n < i + j)
|
|
|
|
|
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + j ) );
|
|
|
|
|
else if (X->n > i + j)
|
|
|
|
|
mbedtls_platform_zeroize( X->p + i + j, (X->n - (i + j)) * ciL );
|
|
|
|
|
if (X == A) {
|
|
|
|
|
MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) );
|
|
|
|
|
A = &TA;
|
|
|
|
|
}
|
|
|
|
|
if (X == B) {
|
|
|
|
|
MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) );
|
|
|
|
|
B = &TB;
|
|
|
|
|
}
|
|
|
|
|
if (!IsTiny() &&
|
|
|
|
|
i >= 16 && i == j && !(i & (i - 1)) &&
|
|
|
|
|
X86_HAVE(BMI2) && X86_HAVE(ADX) &&
|
|
|
|
|
(K = malloc(i * 4 * sizeof(*K)))) {
|
|
|
|
|
Karatsuba(X->p, A->p, B->p, i, K);
|
|
|
|
|
free(K);
|
|
|
|
|
} else {
|
|
|
|
|
Mul(X->p, A->p, i, B->p, j);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
X->s = A->s * B->s;
|
|
|
|
|
ret = 0;
|
|
|
|
|
cleanup:
|
|
|
|
|
mbedtls_mpi_free(&TB);
|
|
|
|
|
mbedtls_mpi_free(&TA);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|