Make stronger crypto nearly as fast

One of the disadvantages of x25519 and ℘256 is it only provides 126 bits
of security, so that seems like a weak link in the chain, if we're using
ECDHE-ECDSA-AES256-GCM-SHA384. The U.S. government wants classified data
to be encrypted using a curve at least as strong as ℘384, which provides
192 bits of security, but if you read the consensus of stack exchange it
would give you the impression that ℘384 is three times slower.

This change (as well as the previous one) makes ℘384 three times as fast
by tuning its modulus and multiplication subroutines with new tests that
should convincingly show: the optimized code behaves the same way as the
old code. Some of the diff noise from the previous change is now removed
too, so that our vendored fork can be more easily compared with upstream
sources. So you can now have stronger cryptography without compromises.

℘384 modulus Justine                        l:         28𝑐          9𝑛𝑠
℘384 modulus MbedTLS NIST                   l:        127𝑐         41𝑛𝑠
℘384 modulus MbedTLS MPI                    l:      1,850𝑐        597𝑛𝑠

The benchmarks above show the improvements made by secp384r1() which is
an important function since it needs to be called 13,000 times whenever
someone establishes a connection to your web server. The same's true of
Mul6x6Adx() which is able to multiply 384-bit numbers in 73 cycles, but
only if your CPU was purchased after 2014 when Broadwell was introduced
This commit is contained in:
Justine Tunney 2021-07-26 15:16:43 -07:00
parent 398f0c16fb
commit ea83cc0ad0
27 changed files with 4291 additions and 3361 deletions

View file

@ -1,39 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Computes C = A + B
//
// @param rdi is C
// @param rsi is A
// @param rdx is B
// @param rcx is number of additions
// @return al has carry
adc: .leafprologue
test %ecx,%ecx
jz 1f
xor %r9d,%r9d
0: mov (%rsi,%r9,8),%rax
adc (%rdx,%r9,8),%rax
mov %rax,(%rdi,%r9,8)
inc %r9d
loop 0b
1: setb %al
.leafepilogue
.endfn adc,globl

View file

@ -18,34 +18,47 @@
*/
#include "libc/macros.internal.h"
// Computes 512-bit product of 256-bit and 256-bit numbers.
//
// Instructions: 88
// Total Cycles: 36
// Total uOps: 120
// uOps Per Cycle: 3.33
// IPC: 2.44
// Block RThroughput: 20.0
//
// @param rdi receives 8 quadword result
// @param rsi is left hand side which must have 4 quadwords
// @param rdx is right hand side which must have 4 quadwords
// @note words are host endian while array is little endian
// @mayalias
Mul4x4Adx:
push %rbp
mov %rsp,%rbp
.profilable
push %r15
push %r14
push %r13
push %r12
sub $56,%rsp
mov %r15,-8(%rbp)
mov %r14,-16(%rbp)
mov %r13,-24(%rbp)
mov %r12,-32(%rbp)
mov %rbx,-40(%rbp)
mov %rdx,%r12
push %rbx
sub $16,%rsp
mov (%rdx),%rdx
mov (%rsi),%rax
mov 16(%rsi),%r11
mov 24(%rsi),%r10
xor %r13d,%r13d
mulx %rax,%rbx,%rax
mov %rbx,-48(%rbp)
mov 8(%rsi),%rbx
mulx %rbx,%rdx,%rcx
adox %rdx,%rax
add %rdx,%rax
mov (%r12),%rdx
mulx %r11,%rdx,%r9
adox %rdx,%rcx
adc %rdx,%rcx
mov (%r12),%rdx
mulx %r10,%rdx,%r8
adox %rdx,%r9
adox %r13,%r8
adc %rdx,%r9
adc $0,%r8
xor %r13d,%r13d
mov (%rsi),%r14
mov 8(%r12),%rdx
@ -105,12 +118,103 @@ Mul4x4Adx:
adox %r14,%r10
mov %rsi,(%rdi)
mov %r10,56(%rdi)
add $16,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
mov -8(%rbp),%r15
mov -16(%rbp),%r14
mov -24(%rbp),%r13
mov -32(%rbp),%r12
mov -40(%rbp),%rbx
leave
ret
.endfn Mul4x4Adx,globl
.end
TIMELINE VIEW 0123456789 012345
Index 0123456789 0123456789
[0,0] DeER . . . . . . . subq $56, %rsp
[0,1] DeER . . . . . . . movq %r15, -8(%rbp)
[0,2] D=eER. . . . . . . movq %r14, -16(%rbp)
[0,3] D==eER . . . . . . movq %r13, -24(%rbp)
[0,4] D===eER . . . . . . movq %r12, -32(%rbp)
[0,5] D====eER . . . . . . movq %rbx, -40(%rbp)
[0,6] .DeE---R . . . . . . movq %rdx, %r12
[0,7] .DeeeeeER . . . . . . movq (%rdx), %rdx
[0,8] .D=eeeeeER. . . . . . movq (%rsi), %rax
[0,9] .D=eeeeeER. . . . . . movq 16(%rsi), %r11
[0,10] .D==eeeeeER . . . . . movq 24(%rsi), %r10
[0,11] . D=====eeeeER . . . . . mulxq %rax, %rbx, %rax
[0,12] . D========eER . . . . . movq %rbx, -48(%rbp)
[0,13] . D=eeeeeE---R . . . . . movq 8(%rsi), %rbx
[0,14] . D=====eeeeER. . . . . mulxq %rbx, %rdx, %rcx
[0,15] . D========eER. . . . . addq %rdx, %rax
[0,16] . D=eeeeeE---R. . . . . movq (%r12), %rdx
[0,17] . D=====eeeeER . . . . mulxq %r11, %rdx, %r9
[0,18] . D========eER . . . . adcq %rdx, %rcx
[0,19] . DeeeeeE----R . . . . movq (%r12), %rdx
[0,20] . D=====eeeeER . . . . mulxq %r10, %rdx, %r8
[0,21] . D========eER . . . . adcq %rdx, %r9
[0,22] . D=========eER . . . . adcq $0, %r8
[0,23] . D-----------R . . . . xorl %r13d, %r13d
[0,24] . .DeeeeeE----R . . . . movq (%rsi), %r14
[0,25] . .DeeeeeE----R . . . . movq 8(%r12), %rdx
[0,26] . .D=====eeeeER . . . . mulxq %r14, %r14, %r15
[0,27] . .D========eER . . . . adoxq %r14, %rax
[0,28] . . D========eER . . . . adcxq %r15, %rcx
[0,29] . . D========eER . . . . movq %rax, -56(%rbp)
[0,30] . . D=====eeeeER . . . . mulxq %rbx, %r14, %rax
[0,31] . . D=========eER. . . . adoxq %r14, %rcx
[0,32] . . D=========eER . . . adcxq %rax, %r9
[0,33] . . D=====eeeeE-R . . . mulxq %r11, %r14, %rax
[0,34] . . D==========eER . . . adoxq %r14, %r9
[0,35] . . D===========eER . . . adcxq %rax, %r8
[0,36] . . D=====eeeeE--R . . . mulxq %r10, %rdx, %rax
[0,37] . . D===========eER . . . adoxq %rdx, %r8
[0,38] . . DeeeeeE-------R . . . movq 16(%r12), %rdx
[0,39] . . D============eER. . . adcxq %r13, %rax
[0,40] . . D============eER . . adoxq %r13, %rax
[0,41] . . DeeeeeE--------R . . movq (%rsi), %r13
[0,42] . . D=====E--------R . . xorl %r15d, %r15d
[0,43] . . D=====eeeeE----R . . mulxq %r13, %r13, %r14
[0,44] . . .D=======eE----R . . adoxq %r13, %rcx
[0,45] . . .D========eE---R . . adcxq %r14, %r9
[0,46] . . .D=====eeeeE---R . . mulxq %rbx, %r14, %r13
[0,47] . . .D=========eE--R . . adoxq %r14, %r9
[0,48] . . . D=========eE-R . . adcxq %r13, %r8
[0,49] . . . D=====eeeeE--R . . mulxq %r11, %r14, %r13
[0,50] . . . D==========eER . . adoxq %r14, %r8
[0,51] . . . D===========eER . . adcxq %r13, %rax
[0,52] . . . DeeeeeE------R . . movq (%rsi), %rsi
[0,53] . . . D=====eeeeE--R . . mulxq %r10, %rdx, %r13
[0,54] . . . D===========eER . . adoxq %rdx, %rax
[0,55] . . . D============eER . . adcxq %r15, %r13
[0,56] . . . DeeeeeE-------R . . movq 24(%r12), %rdx
[0,57] . . . D============eER. . adoxq %r15, %r13
[0,58] . . . D=====eeeeE----R. . mulxq %rsi, %r12, %rsi
[0,59] . . . D======E-------R. . xorl %r14d, %r14d
[0,60] . . . D========eE---R. . adoxq %r12, %r9
[0,61] . . . D=========eE--R. . adcxq %rsi, %r8
[0,62] . . . D=====eeeeE---R. . mulxq %rbx, %rsi, %rbx
[0,63] . . . D==========eE-R. . adoxq %rsi, %r8
[0,64] . . . .D==========eER. . adcxq %rbx, %rax
[0,65] . . . .D=====eeeeE--R. . mulxq %r11, %r11, %rsi
[0,66] . . . .DeeeeeE------R. . movq -56(%rbp), %rbx
[0,67] . . . .D===eE-------R. . movq %rcx, 16(%rdi)
[0,68] . . . . D==========eER . adcxq %rsi, %r13
[0,69] . . . . DeeeeeE------R . movq -48(%rbp), %rsi
[0,70] . . . . D====eE------R . movq %rbx, 8(%rdi)
[0,71] . . . . D===========eER . adoxq %r11, %rax
[0,72] . . . . D=======eE----R . movq %r9, 24(%rdi)
[0,73] . . . . D=========eE--R . movq %r8, 32(%rdi)
[0,74] . . . . D===========eER . movq %rax, 40(%rdi)
[0,75] . . . . D====eeeeE----R . mulxq %r10, %rdx, %r10
[0,76] . . . . D===========eER . adoxq %rdx, %r13
[0,77] . . . . D============eER . adcxq %r14, %r10
[0,78] . . . . D===========eER . movq %r13, 48(%rdi)
[0,79] . . . . D============eER. adoxq %r14, %r10
[0,80] . . . . D============eER. movq %rsi, (%rdi)
[0,81] . . . . D=============eER movq %r10, 56(%rdi)
[0,82] . . . . DeeeeeE---------R movq -8(%rbp), %r15
[0,83] . . . . DeeeeeE---------R movq -16(%rbp), %r14
[0,84] . . . . DeeeeeE--------R movq -24(%rbp), %r13
[0,85] . . . . DeeeeeE--------R movq -32(%rbp), %r12
[0,86] . . . . D=eeeeeE-------R movq -40(%rbp), %rbx
[0,87] . . . . D===eE---------R addq $56, %rsp

View file

@ -18,37 +18,50 @@
*/
#include "libc/macros.internal.h"
// Computes 768-bit product of 384-bit and 384-bit numbers.
//
// Instructions: 153
// Total Cycles: 73
// Total uOps: 261
// uOps Per Cycle: 3.58
// IPC: 2.10
// Block RThroughput: 43.5
//
// @param rdi receives 8 quadword result
// @param rsi is left hand side which must have 4 quadwords
// @param rdx is right hand side which must have 4 quadwords
// @note words are host endian while array is little endian
// @mayalias
Mul6x6Adx:
push %rbp
mov %rsp,%rbp
.profilable
push %r15
push %r14
push %r13
push %r12
push %rbx
sub $64,%rsp
mov %r15,-8(%rbp)
mov %r14,-16(%rbp)
mov %r13,-24(%rbp)
mov %r12,-32(%rbp)
mov %rbx,-40(%rbp)
mov %rdx,%rbx
sub $24,%rsp
mov (%rdx),%rdx
xor %r8d,%r8d
mulx (%rsi),%rcx,%rax
mulx 8(%rsi),%rdx,%r12
mov %rcx,-48(%rbp)
adox %rdx,%rax
add %rdx,%rax
mov (%rbx),%rdx
mulx 16(%rsi),%rdx,%r15
adox %rdx,%r12
adc %rdx,%r12
mov (%rbx),%rdx
mulx 24(%rsi),%rdx,%r10
adox %rdx,%r15
adc %rdx,%r15
mov (%rbx),%rdx
mulx 32(%rsi),%rdx,%r9
adox %rdx,%r10
adc %rdx,%r10
mov (%rbx),%rdx
mulx 40(%rsi),%rdx,%rcx
adox %rdx,%r9
adc %rdx,%r9
mov 8(%rbx),%rdx
adox %r8,%rcx
adc $0,%rcx
mulx (%rsi),%r13,%r11
xor %r8d,%r8d
adox %r13,%rax
@ -171,12 +184,167 @@ Mul6x6Adx:
mov %r8,64(%rdi)
mov %r11,72(%rdi)
mov %rdx,88(%rdi)
add $24,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
mov -8(%rbp),%r15
mov -16(%rbp),%r14
mov -24(%rbp),%r13
mov -32(%rbp),%r12
mov -40(%rbp),%rbx
leave
ret
.endfn Mul6x6Adx,globl
.end
SIMULATION 0123456789 0123456789 0123456789 012
Index 0123456789 0123456789 0123456789 0123456789
[0,0] DeER . . . . . . . . . . . . . . . movq %r15, -8(%rbp)
[0,1] D=eER. . . . . . . . . . . . . . . movq %r14, -16(%rbp)
[0,2] D==eER . . . . . . . . . . . . . . movq %r13, -24(%rbp)
[0,3] D===eER . . . . . . . . . . . . . . movq %r12, -32(%rbp)
[0,4] D====eER . . . . . . . . . . . . . . movq %rbx, -40(%rbp)
[0,5] DeE----R . . . . . . . . . . . . . . movq %rdx, %rbx
[0,6] .DeeeeeER . . . . . . . . . . . . . . movq (%rdx), %rdx
[0,7] .D=====eeeeeeeeeER . . . . . . . . . . . . mulxq (%rsi), %rcx, %rax
[0,8] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 8(%rsi), %rdx, %r12
[0,9] . D=======eE------R . . . . . . . . . . . . movq %rcx, -48(%rbp)
[0,10] . D=============eER . . . . . . . . . . . . addq %rdx, %rax
[0,11] . DeeeeeE--------R . . . . . . . . . . . . movq (%rbx), %rdx
[0,12] . D=====eeeeeeeeeER. . . . . . . . . . . . mulxq 16(%rsi), %rdx, %r15
[0,13] . D=============eER. . . . . . . . . . . . adcq %rdx, %r12
[0,14] . DeeeeeE--------R. . . . . . . . . . . . movq (%rbx), %rdx
[0,15] . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 24(%rsi), %rdx, %r10
[0,16] . D=============eER . . . . . . . . . . . adcq %rdx, %r15
[0,17] . DeeeeeE--------R . . . . . . . . . . . movq (%rbx), %rdx
[0,18] . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 32(%rsi), %rdx, %r9
[0,19] . D=============eER . . . . . . . . . . . adcq %rdx, %r10
[0,20] . .DeeeeeE--------R . . . . . . . . . . . movq (%rbx), %rdx
[0,21] . .D=====eeeeeeeeeER . . . . . . . . . . . mulxq 40(%rsi), %rdx, %rcx
[0,22] . .D=============eER . . . . . . . . . . . adcq %rdx, %r9
[0,23] . . DeeeeeE--------R . . . . . . . . . . . movq 8(%rbx), %rdx
[0,24] . . D=============eER . . . . . . . . . . . adcq $0, %rcx
[0,25] . . D=====eeeeeeeeeER . . . . . . . . . . . mulxq (%rsi), %r13, %r11
[0,26] . . D--------------R . . . . . . . . . . . xorl %r8d, %r8d
[0,27] . . D========eE----R . . . . . . . . . . . adoxq %r13, %rax
[0,28] . . D=============eER. . . . . . . . . . . adcxq %r11, %r12
[0,29] . . D=========eE----R. . . . . . . . . . . movq %rax, -56(%rbp)
[0,30] . . D====eeeeeeeeeER. . . . . . . . . . . mulxq 8(%rsi), %r11, %rax
[0,31] . . D=============eER . . . . . . . . . . adoxq %r11, %r12
[0,32] . . D==============eER . . . . . . . . . . adcxq %rax, %r15
[0,33] . . D=============eER . . . . . . . . . . movq %r12, %r14
[0,34] . . D====eeeeeeeeeE-R . . . . . . . . . . mulxq 16(%rsi), %r11, %rax
[0,35] . . D==============eER . . . . . . . . . . adoxq %r11, %r15
[0,36] . . .D==============eER . . . . . . . . . . adcxq %rax, %r10
[0,37] . . .D====eeeeeeeeeE--R . . . . . . . . . . mulxq 24(%rsi), %r11, %rax
[0,38] . . .D===============eER. . . . . . . . . . adoxq %r11, %r10
[0,39] . . . D===============eER . . . . . . . . . adcxq %rax, %r9
[0,40] . . . D====eeeeeeeeeE---R . . . . . . . . . mulxq 32(%rsi), %r11, %rax
[0,41] . . . D================eER . . . . . . . . . adoxq %r11, %r9
[0,42] . . . D================eER . . . . . . . . . adcxq %rax, %rcx
[0,43] . . . D====eeeeeeeeeE----R . . . . . . . . . mulxq 40(%rsi), %rdx, %rax
[0,44] . . . D=================eER . . . . . . . . . adoxq %rdx, %rcx
[0,45] . . . D=================eER. . . . . . . . . adcxq %r8, %rax
[0,46] . . . DeeeeeE-------------R. . . . . . . . . movq 16(%rbx), %rdx
[0,47] . . . D==================eER . . . . . . . . adoxq %r8, %rax
[0,48] . . . D====eeeeeeeeeE-----R . . . . . . . . mulxq (%rsi), %r13, %r8
[0,49] . . . D====E--------------R . . . . . . . . xorl %r11d, %r11d
[0,50] . . . D=========eE--------R . . . . . . . . adoxq %r13, %r14
[0,51] . . . .D=========eE-------R . . . . . . . . movq %r14, -64(%rbp)
[0,52] . . . .D============eE----R . . . . . . . . adcxq %r8, %r15
[0,53] . . . .D====eeeeeeeeeE----R . . . . . . . . mulxq 8(%rsi), %r12, %r8
[0,54] . . . . D============eE---R . . . . . . . . adoxq %r12, %r15
[0,55] . . . . D=============eE--R . . . . . . . . adcxq %r8, %r10
[0,56] . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 16(%rsi), %r12, %r8
[0,57] . . . . D=============eE-R . . . . . . . . adoxq %r12, %r10
[0,58] . . . . D==============eER . . . . . . . . adcxq %r8, %r9
[0,59] . . . . D====eeeeeeeeeE--R . . . . . . . . mulxq 24(%rsi), %r12, %r8
[0,60] . . . . D==============eER . . . . . . . . adoxq %r12, %r9
[0,61] . . . . D===============eER . . . . . . . . adcxq %r8, %rcx
[0,62] . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 32(%rsi), %r12, %r8
[0,63] . . . . D===============eER . . . . . . . . adoxq %r12, %rcx
[0,64] . . . . D================eER. . . . . . . . adcxq %r8, %rax
[0,65] . . . . D====eeeeeeeeeE----R. . . . . . . . mulxq 40(%rsi), %rdx, %r8
[0,66] . . . . .D================eER . . . . . . . adoxq %rdx, %rax
[0,67] . . . . .D=================eER . . . . . . . adcxq %r11, %r8
[0,68] . . . . .DeeeeeE-------------R . . . . . . . movq 24(%rbx), %rdx
[0,69] . . . . .D==================eER . . . . . . . adoxq %r11, %r8
[0,70] . . . . . D====eeeeeeeeeE-----R . . . . . . . mulxq (%rsi), %r13, %r11
[0,71] . . . . . D====E--------------R . . . . . . . xorl %r12d, %r12d
[0,72] . . . . . D===========eE------R . . . . . . . adoxq %r13, %r15
[0,73] . . . . . D============eE----R . . . . . . . adcxq %r11, %r10
[0,74] . . . . . D====eeeeeeeeeE----R . . . . . . . mulxq 8(%rsi), %r13, %r11
[0,75] . . . . . D=============eE---R . . . . . . . adoxq %r13, %r10
[0,76] . . . . . D=============eE--R . . . . . . . adcxq %r11, %r9
[0,77] . . . . . D====eeeeeeeeeE---R . . . . . . . mulxq 16(%rsi), %r13, %r11
[0,78] . . . . . D==============eE-R . . . . . . . adoxq %r13, %r9
[0,79] . . . . . D==============eER . . . . . . . adcxq %r11, %rcx
[0,80] . . . . . D====eeeeeeeeeE--R . . . . . . . mulxq 24(%rsi), %r13, %r11
[0,81] . . . . . D===============eER . . . . . . . adoxq %r13, %rcx
[0,82] . . . . . .D===============eER. . . . . . . adcxq %r11, %rax
[0,83] . . . . . .D====eeeeeeeeeE---R. . . . . . . mulxq 32(%rsi), %r13, %r11
[0,84] . . . . . .D================eER . . . . . . adoxq %r13, %rax
[0,85] . . . . . . D================eER . . . . . . adcxq %r11, %r8
[0,86] . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 40(%rsi), %rdx, %r11
[0,87] . . . . . . D=================eER . . . . . . adoxq %rdx, %r8
[0,88] . . . . . . DeeeeeE------------R . . . . . . movq 32(%rbx), %rdx
[0,89] . . . . . . D=================eER . . . . . . adcxq %r12, %r11
[0,90] . . . . . . D=====eeeeeeeeeE----R . . . . . . mulxq (%rsi), %r14, %r13
[0,91] . . . . . . D=================eER. . . . . . adoxq %r12, %r11
[0,92] . . . . . . D-------------------R. . . . . . xorl %r12d, %r12d
[0,93] . . . . . . D===========eE------R. . . . . . adoxq %r14, %r10
[0,94] . . . . . . D=============eE----R. . . . . . adcxq %r13, %r9
[0,95] . . . . . . D====eeeeeeeeeE----R. . . . . . mulxq 8(%rsi), %r14, %r13
[0,96] . . . . . . D=============eE---R. . . . . . adoxq %r14, %r9
[0,97] . . . . . . D==============eE--R. . . . . . adcxq %r13, %rcx
[0,98] . . . . . . .D====eeeeeeeeeE---R. . . . . . mulxq 16(%rsi), %r14, %r13
[0,99] . . . . . . .D==============eE-R. . . . . . adoxq %r14, %rcx
[0,100] . . . . . . .D===============eER. . . . . . adcxq %r13, %rax
[0,101] . . . . . . . D====eeeeeeeeeE--R. . . . . . mulxq 24(%rsi), %r14, %r13
[0,102] . . . . . . . D===============eER . . . . . adoxq %r14, %rax
[0,103] . . . . . . . D================eER . . . . . adcxq %r13, %r8
[0,104] . . . . . . . D====eeeeeeeeeE---R . . . . . mulxq 32(%rsi), %r14, %r13
[0,105] . . . . . . . D================eER . . . . . adoxq %r14, %r8
[0,106] . . . . . . . D=================eER . . . . . adcxq %r13, %r11
[0,107] . . . . . . . D====eeeeeeeeeE----R . . . . . mulxq 40(%rsi), %rdx, %r13
[0,108] . . . . . . . D=================eER. . . . . adoxq %rdx, %r11
[0,109] . . . . . . . D==================eER . . . . adcxq %r12, %r13
[0,110] . . . . . . . DeeeeeE-------------R . . . . movq 40(%rbx), %rdx
[0,111] . . . . . . . D==================eER . . . . adoxq %r12, %r13
[0,112] . . . . . . . D=====eeeeeeeeeE-----R . . . . mulxq (%rsi), %r14, %rbx
[0,113] . . . . . . . .D-------------------R . . . . xorl %r12d, %r12d
[0,114] . . . . . . . .D===========eE------R . . . . adoxq %r14, %r9
[0,115] . . . . . . . .D=============eE----R . . . . adcxq %rbx, %rcx
[0,116] . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 8(%rsi), %r14, %rbx
[0,117] . . . . . . . . D=============eE---R . . . . adoxq %r14, %rcx
[0,118] . . . . . . . . D==============eE--R . . . . adcxq %rbx, %rax
[0,119] . . . . . . . . D====eeeeeeeeeE---R . . . . mulxq 16(%rsi), %r14, %rbx
[0,120] . . . . . . . . D==============eE-R . . . . adoxq %r14, %rax
[0,121] . . . . . . . . D===============eER . . . . adcxq %rbx, %r8
[0,122] . . . . . . . . D====eeeeeeeeeE--R . . . . mulxq 24(%rsi), %r14, %rbx
[0,123] . . . . . . . . D===============eER . . . . adoxq %r14, %r8
[0,124] . . . . . . . . D================eER . . . . adcxq %rbx, %r11
[0,125] . . . . . . . . D====eeeeeeeeeE---R . . . . mulxq 32(%rsi), %r14, %rbx
[0,126] . . . . . . . . .D====eeeeeeeeeE--R . . . . mulxq 40(%rsi), %rsi, %rdx
[0,127] . . . . . . . . .D===============eER. . . . adoxq %r14, %r11
[0,128] . . . . . . . . .D================eER . . . adcxq %rbx, %r13
[0,129] . . . . . . . . . D================eER . . . adoxq %rsi, %r13
[0,130] . . . . . . . . . D=================eER . . . adcxq %r12, %rdx
[0,131] . . . . . . . . . D==================eER . . . adoxq %r12, %rdx
[0,132] . . . . . . . . . DeeeeeE--------------R . . . movq -48(%rbp), %rsi
[0,133] . . . . . . . . . D=eeeeeE-------------R . . . movq -56(%rbp), %rbx
[0,134] . . . . . . . . . D===eE---------------R . . . movq %r15, 24(%rdi)
[0,135] . . . . . . . . . D=eeeeeE------------R . . . movq -64(%rbp), %r14
[0,136] . . . . . . . . . D================eE-R . . . movq %r13, 80(%rdi)
[0,137] . . . . . . . . . D=================eER . . . movq %rbx, 8(%rdi)
[0,138] . . . . . . . . . D==================eER. . . movq %r14, 16(%rdi)
[0,139] . . . . . . . . . D===================eER . . movq %rsi, (%rdi)
[0,140] . . . . . . . . . D====================eER . . movq %r10, 32(%rdi)
[0,141] . . . . . . . . . D====================eER . . movq %r9, 40(%rdi)
[0,142] . . . . . . . . . D=====================eER . . movq %rcx, 48(%rdi)
[0,143] . . . . . . . . . D======================eER. . movq %rax, 56(%rdi)
[0,144] . . . . . . . . . D=======================eER . movq %r8, 64(%rdi)
[0,145] . . . . . . . . . D========================eER. movq %r11, 72(%rdi)
[0,146] . . . . . . . . . D=========================eER movq %rdx, 88(%rdi)
[0,147] . . . . . . . . . DeeeeeE--------------------R movq -8(%rbp), %r15
[0,148] . . . . . . . . . D=eeeeeE-------------------R movq -16(%rbp), %r14
[0,149] . . . . . . . . . D=eeeeeE-------------------R movq -24(%rbp), %r13
[0,150] . . . . . . . . . D==eeeeeE------------------R movq -32(%rbp), %r12
[0,151] . . . . . . . . . D==eeeeeE------------------R movq -40(%rbp), %rbx

View file

@ -1,483 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
/ Computes 1024-bit product of 512-bit and 512-bit numbers.
/
/ Instructions: 262
/ Total Cycles: 114
/ Total uOps: 469
/ Dispatch Width: 6
/ uOps Per Cycle: 4.11
/ IPC: 2.30
/ Block RThroughput: 78.2
/
/ @param rdi receives 16 quadword result
/ @param rsi is left hand side which must have 8 quadwords
/ @param rdx is right hand side which must have 8 quadwords
/ @note words are host endian while array is little endian
/ @mayalias
Mul8x8Adx:
push %rbp
mov %rsp,%rbp
.profilable
push %r15
push %r14
push %r13
push %r12
mov %rdx,%r12
push %rbx
sub $64,%rsp
mov (%rdx),%rdx
xor %r13d,%r13d
mulx (%rsi),%rax,%rcx
mov %rdi,-48(%rbp)
mov %rax,-56(%rbp)
mulx 8(%rsi),%rdx,%rax
adox %rdx,%rcx
mov (%r12),%rdx
mulx 16(%rsi),%rdx,%rbx
adox %rdx,%rax
mov (%r12),%rdx
mulx 24(%rsi),%rdx,%r11
adox %rdx,%rbx
mov (%r12),%rdx
mulx 32(%rsi),%rdx,%r10
adox %rdx,%r11
mov (%r12),%rdx
mulx 40(%rsi),%rdx,%r9
adox %rdx,%r10
mov (%r12),%rdx
mulx 48(%rsi),%rdx,%r8
adox %rdx,%r9
mov (%r12),%rdx
mulx 56(%rsi),%rdx,%rdi
adox %rdx,%r8
adox %r13,%rdi
xor %r13d,%r13d
mov 8(%r12),%rdx
mulx (%rsi),%r15,%r14
adox %r15,%rcx
adcx %r14,%rax
mov %rcx,-64(%rbp)
mulx 8(%rsi),%r14,%rcx
adox %r14,%rax
adcx %rcx,%rbx
mulx 16(%rsi),%r14,%rcx
adox %r14,%rbx
adcx %rcx,%r11
mulx 24(%rsi),%r14,%rcx
adox %r14,%r11
adcx %rcx,%r10
mulx 32(%rsi),%r14,%rcx
adox %r14,%r10
adcx %rcx,%r9
mulx 40(%rsi),%r14,%rcx
adox %r14,%r9
adcx %rcx,%r8
mulx 48(%rsi),%r14,%rcx
adox %r14,%r8
adcx %rcx,%rdi
mulx 56(%rsi),%rdx,%rcx
adox %rdx,%rdi
adcx %r13,%rcx
mov 16(%r12),%rdx
adox %r13,%rcx
mulx (%rsi),%r15,%r14
xor %r13d,%r13d
adox %r15,%rax
adcx %r14,%rbx
mov %rax,-72(%rbp)
mulx 8(%rsi),%r14,%rax
adox %r14,%rbx
adcx %rax,%r11
mulx 16(%rsi),%r14,%rax
adox %r14,%r11
adcx %rax,%r10
mulx 24(%rsi),%r14,%rax
adox %r14,%r10
adcx %rax,%r9
mulx 32(%rsi),%r14,%rax
adox %r14,%r9
adcx %rax,%r8
mulx 40(%rsi),%r14,%rax
adox %r14,%r8
adcx %rax,%rdi
mulx 48(%rsi),%r14,%rax
adox %r14,%rdi
adcx %rax,%rcx
mulx 56(%rsi),%rdx,%rax
adox %rdx,%rcx
adcx %r13,%rax
adox %r13,%rax
xor %r13d,%r13d
mov 24(%r12),%rdx
mulx (%rsi),%r15,%r14
adox %r15,%rbx
adcx %r14,%r11
mov %rbx,-80(%rbp)
mov %r11,%r15
mulx 8(%rsi),%r14,%rbx
adox %r14,%r15
adcx %rbx,%r10
mulx 16(%rsi),%rbx,%r11
adox %rbx,%r10
adcx %r11,%r9
mulx 24(%rsi),%rbx,%r11
adox %rbx,%r9
adcx %r11,%r8
mulx 32(%rsi),%rbx,%r11
adox %rbx,%r8
adcx %r11,%rdi
mulx 40(%rsi),%rbx,%r11
adox %rbx,%rdi
adcx %r11,%rcx
mulx 48(%rsi),%rbx,%r11
adox %rbx,%rcx
adcx %r11,%rax
mulx 56(%rsi),%rdx,%r11
adox %rdx,%rax
adcx %r13,%r11
mov 32(%r12),%rdx
adox %r13,%r11
xor %ebx,%ebx
mulx (%rsi),%r14,%r13
adox %r14,%r15
adcx %r13,%r10
mov %r15,-88(%rbp)
mulx 8(%rsi),%r14,%r13
mov %r10,%r15
adcx %r13,%r9
adox %r14,%r15
mulx 16(%rsi),%r13,%r10
adox %r13,%r9
adcx %r10,%r8
mulx 24(%rsi),%r13,%r10
adcx %r10,%rdi
adox %r13,%r8
mulx 32(%rsi),%r13,%r10
adox %r13,%rdi
adcx %r10,%rcx
mulx 40(%rsi),%r13,%r10
adox %r13,%rcx
adcx %r10,%rax
mulx 48(%rsi),%r13,%r10
adox %r13,%rax
adcx %r10,%r11
mulx 56(%rsi),%rdx,%r10
adox %rdx,%r11
adcx %rbx,%r10
mov 40(%r12),%rdx
adox %rbx,%r10
mulx (%rsi),%r14,%r13
xor %ebx,%ebx
adox %r14,%r15
mov %r15,-96(%rbp)
adcx %r13,%r9
mulx 8(%rsi),%r14,%r13
mov %r9,%r15
adox %r14,%r15
adcx %r13,%r8
mulx 16(%rsi),%r13,%r9
adox %r13,%r8
adcx %r9,%rdi
mulx 24(%rsi),%r13,%r9
adox %r13,%rdi
adcx %r9,%rcx
mulx 32(%rsi),%r13,%r9
adox %r13,%rcx
adcx %r9,%rax
mulx 40(%rsi),%r13,%r9
adox %r13,%rax
adcx %r9,%r11
mulx 48(%rsi),%r13,%r9
adox %r13,%r11
adcx %r9,%r10
mulx 56(%rsi),%rdx,%r9
adox %rdx,%r10
adcx %rbx,%r9
adox %rbx,%r9
xor %ebx,%ebx
mov 48(%r12),%rdx
mulx (%rsi),%r14,%r13
adox %r14,%r15
adcx %r13,%r8
mov %r15,-104(%rbp)
mulx 8(%rsi),%r14,%r13
mov %r8,%r15
adcx %r13,%rdi
adox %r14,%r15
mulx 16(%rsi),%r13,%r8
adox %r13,%rdi
adcx %r8,%rcx
mulx 24(%rsi),%r13,%r8
adox %r13,%rcx
adcx %r8,%rax
mulx 32(%rsi),%r13,%r8
adox %r13,%rax
adcx %r8,%r11
mulx 40(%rsi),%r13,%r8
adox %r13,%r11
adcx %r8,%r10
mulx 48(%rsi),%r13,%r8
adox %r13,%r10
adcx %r8,%r9
mulx 56(%rsi),%rdx,%r8
adox %rdx,%r9
mov 56(%r12),%rdx
adcx %rbx,%r8
mulx (%rsi),%r13,%r12
adox %rbx,%r8
xor %ebx,%ebx
adox %r13,%r15
adcx %r12,%rdi
mulx 8(%rsi),%r13,%r12
adox %r13,%rdi
adcx %r12,%rcx
mulx 16(%rsi),%r13,%r12
adox %r13,%rcx
adcx %r12,%rax
mulx 24(%rsi),%r13,%r12
adox %r13,%rax
adcx %r12,%r11
mulx 32(%rsi),%r13,%r12
adox %r13,%r11
adcx %r12,%r10
mulx 40(%rsi),%r13,%r12
adox %r13,%r10
adcx %r12,%r9
mulx 48(%rsi),%r13,%r12
mulx 56(%rsi),%rsi,%rdx
adox %r13,%r9
adcx %r12,%r8
adox %rsi,%r8
adcx %rbx,%rdx
mov -64(%rbp),%rsi
adox %rbx,%rdx
mov -48(%rbp),%rbx
mov -56(%rbp),%r14
mov %rsi,8(%rbx)
mov -72(%rbp),%rsi
mov %r14,(%rbx)
mov %rsi,16(%rbx)
mov -80(%rbp),%rsi
mov %rsi,24(%rbx)
mov -88(%rbp),%rsi
mov %rsi,32(%rbx)
mov -96(%rbp),%rsi
mov %rsi,40(%rbx)
mov -104(%rbp),%rsi
mov %r15,56(%rbx)
mov %rsi,48(%rbx)
mov %rdi,64(%rbx)
mov %rcx,72(%rbx)
mov %rax,80(%rbx)
mov %r11,88(%rbx)
mov %r10,96(%rbx)
mov %r9,104(%rbx)
mov %r8,112(%rbx)
mov %rdx,120(%rbx)
add $64,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.endfn Mul8x8Adx,globl
.end
Timeline view: 0123456789 0123456789 0123456789 0123456789
Index 0123456789 0123456789 0123456789 0123456789
[0,0] DeeER. . . . . . . . . . . . . . . . pushq %r15
[0,1] D==eeER . . . . . . . . . . . . . . . pushq %r14
[0,2] .D===eeER . . . . . . . . . . . . . . . pushq %r13
[0,3] .D=====eeER . . . . . . . . . . . . . . pushq %r12
[0,4] . DeE-----R . . . . . . . . . . . . . . movq %rdx, %r12
[0,5] . D======eeER . . . . . . . . . . . . . . pushq %rbx
[0,6] . D========eER . . . . . . . . . . . . . . subq $64, %rsp
[0,7] . DeeeeeE----R . . . . . . . . . . . . . . movq (%rdx), %rdx
[0,8] . D---------R . . . . . . . . . . . . . . xorl %r13d, %r13d
[0,9] . D====eeeeeeeeeER . . . . . . . . . . . . . mulxq (%rsi), %rax, %rcx
[0,10] . D======eE------R . . . . . . . . . . . . . movq %rdi, -48(%rbp)
[0,11] . D======eE-----R . . . . . . . . . . . . . movq %rax, -56(%rbp)
[0,12] . D====eeeeeeeeeER. . . . . . . . . . . . . mulxq 8(%rsi), %rdx, %rax
[0,13] . D============eER. . . . . . . . . . . . . adoxq %rdx, %rcx
[0,14] . DeeeeeE-------R. . . . . . . . . . . . . movq (%r12), %rdx
[0,15] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 16(%rsi), %rdx, %rbx
[0,16] . D============eE-R . . . . . . . . . . . . adoxq %rdx, %rax
[0,17] . .DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
[0,18] . .D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 24(%rsi), %rdx, %r11
[0,19] . .D=============eER . . . . . . . . . . . . adoxq %rdx, %rbx
[0,20] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
[0,21] . . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 32(%rsi), %rdx, %r10
[0,22] . . D=============eER . . . . . . . . . . . . adoxq %rdx, %r11
[0,23] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
[0,24] . . D=====eeeeeeeeeER. . . . . . . . . . . . mulxq 40(%rsi), %rdx, %r9
[0,25] . . D=============eER. . . . . . . . . . . . adoxq %rdx, %r10
[0,26] . . DeeeeeE--------R. . . . . . . . . . . . movq (%r12), %rdx
[0,27] . . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 48(%rsi), %rdx, %r8
[0,28] . . D=============eER . . . . . . . . . . . adoxq %rdx, %r9
[0,29] . . DeeeeeE--------R . . . . . . . . . . . movq (%r12), %rdx
[0,30] . . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 56(%rsi), %rdx, %rdi
[0,31] . . D=============eER . . . . . . . . . . . adoxq %rdx, %r8
[0,32] . . .D=============eER . . . . . . . . . . . adoxq %r13, %rdi
[0,33] . . .D---------------R . . . . . . . . . . . xorl %r13d, %r13d
[0,34] . . .DeeeeeE---------R . . . . . . . . . . . movq 8(%r12), %rdx
[0,35] . . . D====eeeeeeeeeER . . . . . . . . . . . mulxq (%rsi), %r15, %r14
[0,36] . . . D=======eE-----R . . . . . . . . . . . adoxq %r15, %rcx
[0,37] . . . D=============eER . . . . . . . . . . . adcxq %r14, %rax
[0,38] . . . D=======eE-----R . . . . . . . . . . . movq %rcx, -64(%rbp)
[0,39] . . . D====eeeeeeeeeER . . . . . . . . . . . mulxq 8(%rsi), %r14, %rcx
[0,40] . . . D=============eER. . . . . . . . . . . adoxq %r14, %rax
[0,41] . . . D=============eER . . . . . . . . . . adcxq %rcx, %rbx
[0,42] . . . D====eeeeeeeeeE-R . . . . . . . . . . mulxq 16(%rsi), %r14, %rcx
[0,43] . . . D==============eER . . . . . . . . . . adoxq %r14, %rbx
[0,44] . . . D==============eER . . . . . . . . . . adcxq %rcx, %r11
[0,45] . . . D====eeeeeeeeeE--R . . . . . . . . . . mulxq 24(%rsi), %r14, %rcx
[0,46] . . . D===============eER . . . . . . . . . . adoxq %r14, %r11
[0,47] . . . .D===============eER. . . . . . . . . . adcxq %rcx, %r10
[0,48] . . . .D====eeeeeeeeeE---R. . . . . . . . . . mulxq 32(%rsi), %r14, %rcx
[0,49] . . . .D================eER . . . . . . . . . adoxq %r14, %r10
[0,50] . . . . D================eER . . . . . . . . . adcxq %rcx, %r9
[0,51] . . . . D====eeeeeeeeeE----R . . . . . . . . . mulxq 40(%rsi), %r14, %rcx
[0,52] . . . . D=================eER . . . . . . . . . adoxq %r14, %r9
[0,53] . . . . D=================eER . . . . . . . . . adcxq %rcx, %r8
[0,54] . . . . D====eeeeeeeeeE-----R . . . . . . . . . mulxq 48(%rsi), %r14, %rcx
[0,55] . . . . D==================eER. . . . . . . . . adoxq %r14, %r8
[0,56] . . . . D==================eER . . . . . . . . adcxq %rcx, %rdi
[0,57] . . . . D====eeeeeeeeeE------R . . . . . . . . mulxq 56(%rsi), %rdx, %rcx
[0,58] . . . . D===================eER . . . . . . . . adoxq %rdx, %rdi
[0,59] . . . . D===================eER . . . . . . . . adcxq %r13, %rcx
[0,60] . . . . DeeeeeE---------------R . . . . . . . . movq 16(%r12), %rdx
[0,61] . . . . D====================eER . . . . . . . . adoxq %r13, %rcx
[0,62] . . . . .D====eeeeeeeeeE-------R . . . . . . . . mulxq (%rsi), %r15, %r14
[0,63] . . . . .D---------------------R . . . . . . . . xorl %r13d, %r13d
[0,64] . . . . .D=======eE------------R . . . . . . . . adoxq %r15, %rax
[0,65] . . . . . D============eE------R . . . . . . . . adcxq %r14, %rbx
[0,66] . . . . . D=======eE-----------R . . . . . . . . movq %rax, -72(%rbp)
[0,67] . . . . . D====eeeeeeeeeE------R . . . . . . . . mulxq 8(%rsi), %r14, %rax
[0,68] . . . . . D============eE-----R . . . . . . . . adoxq %r14, %rbx
[0,69] . . . . . D=============eE----R . . . . . . . . adcxq %rax, %r11
[0,70] . . . . . D====eeeeeeeeeE-----R . . . . . . . . mulxq 16(%rsi), %r14, %rax
[0,71] . . . . . D=============eE---R . . . . . . . . adoxq %r14, %r11
[0,72] . . . . . D==============eE--R . . . . . . . . adcxq %rax, %r10
[0,73] . . . . . D====eeeeeeeeeE----R . . . . . . . . mulxq 24(%rsi), %r14, %rax
[0,74] . . . . . D==============eE-R . . . . . . . . adoxq %r14, %r10
[0,75] . . . . . D===============eER . . . . . . . . adcxq %rax, %r9
[0,76] . . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 32(%rsi), %r14, %rax
[0,77] . . . . . .D===============eER. . . . . . . . adoxq %r14, %r9
[0,78] . . . . . .D================eER . . . . . . . adcxq %rax, %r8
[0,79] . . . . . .D====eeeeeeeeeE----R . . . . . . . mulxq 40(%rsi), %r14, %rax
[0,80] . . . . . . D================eER . . . . . . . adoxq %r14, %r8
[0,81] . . . . . . D=================eER . . . . . . . adcxq %rax, %rdi
[0,82] . . . . . . D====eeeeeeeeeE-----R . . . . . . . mulxq 48(%rsi), %r14, %rax
[0,83] . . . . . . D=================eER . . . . . . . adoxq %r14, %rdi
[0,84] . . . . . . D==================eER. . . . . . . adcxq %rax, %rcx
[0,85] . . . . . . D====eeeeeeeeeE------R. . . . . . . mulxq 56(%rsi), %rdx, %rax
[0,86] . . . . . . D==================eER . . . . . . adoxq %rdx, %rcx
[0,87] . . . . . . D===================eER . . . . . . adcxq %r13, %rax
[0,88] . . . . . . D====================eER . . . . . . adoxq %r13, %rax
[0,89] . . . . . . D----------------------R . . . . . . xorl %r13d, %r13d
[0,90] . . . . . . DeeeeeE----------------R . . . . . . movq 24(%r12), %rdx
[0,91] . . . . . . D====eeeeeeeeeE-------R . . . . . . mulxq (%rsi), %r15, %r14
[0,92] . . . . . . D===========eE--------R . . . . . . adoxq %r15, %rbx
[0,93] . . . . . . D=============eE------R . . . . . . adcxq %r14, %r11
[0,94] . . . . . . .D===========eE-------R . . . . . . movq %rbx, -80(%rbp)
[0,95] . . . . . . .D=============eE-----R . . . . . . movq %r11, %r15
[0,96] . . . . . . .D====eeeeeeeeeE------R . . . . . . mulxq 8(%rsi), %r14, %rbx
[0,97] . . . . . . . D=============eE----R . . . . . . adoxq %r14, %r15
[0,98] . . . . . . . D==============eE---R . . . . . . adcxq %rbx, %r10
[0,99] . . . . . . . D====eeeeeeeeeE-----R . . . . . . mulxq 16(%rsi), %rbx, %r11
[0,100] . . . . . . . D==============eE--R . . . . . . adoxq %rbx, %r10
[0,101] . . . . . . . D===============eE-R . . . . . . adcxq %r11, %r9
[0,102] . . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 24(%rsi), %rbx, %r11
[0,103] . . . . . . . D===============eER . . . . . . adoxq %rbx, %r9
[0,104] . . . . . . . D================eER . . . . . . adcxq %r11, %r8
[0,105] . . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 32(%rsi), %rbx, %r11
[0,106] . . . . . . . D================eER. . . . . . adoxq %rbx, %r8
[0,107] . . . . . . . D=================eER . . . . . adcxq %r11, %rdi
[0,108] . . . . . . . D====eeeeeeeeeE-----R . . . . . mulxq 40(%rsi), %rbx, %r11
[0,109] . . . . . . . .D=================eER . . . . . adoxq %rbx, %rdi
[0,110] . . . . . . . .D==================eER . . . . . adcxq %r11, %rcx
[0,111] . . . . . . . .D====eeeeeeeeeE------R . . . . . mulxq 48(%rsi), %rbx, %r11
[0,112] . . . . . . . . D==================eER . . . . . adoxq %rbx, %rcx
[0,113] . . . . . . . . D===================eER. . . . . adcxq %r11, %rax
[0,114] . . . . . . . . D====eeeeeeeeeE-------R. . . . . mulxq 56(%rsi), %rdx, %r11
[0,115] . . . . . . . . D===================eER . . . . adoxq %rdx, %rax
[0,116] . . . . . . . . D====================eER . . . . adcxq %r13, %r11
[0,117] . . . . . . . . DeeeeeE----------------R . . . . movq 32(%r12), %rdx
[0,118] . . . . . . . . D=====================eER . . . . adoxq %r13, %r11
[0,119] . . . . . . . . D=====E-----------------R . . . . xorl %ebx, %ebx
[0,120] . . . . . . . . D====eeeeeeeeeE--------R . . . . mulxq (%rsi), %r14, %r13
[0,121] . . . . . . . . D===========eE---------R . . . . adoxq %r14, %r15
[0,122] . . . . . . . . D=============eE-------R . . . . adcxq %r13, %r10
[0,123] . . . . . . . . D===========eE--------R . . . . movq %r15, -88(%rbp)
[0,124] . . . . . . . . D====eeeeeeeeeE-------R . . . . mulxq 8(%rsi), %r14, %r13
[0,125] . . . . . . . . D=============eE------R . . . . movq %r10, %r15
[0,126] . . . . . . . . .D============eE------R . . . . adcxq %r13, %r9
[0,127] . . . . . . . . .D=============eE-----R . . . . adoxq %r14, %r15
[0,128] . . . . . . . . .D====eeeeeeeeeE------R . . . . mulxq 16(%rsi), %r13, %r10
[0,129] . . . . . . . . . D=============eE----R . . . . adoxq %r13, %r9
[0,130] . . . . . . . . . D==============eE---R . . . . adcxq %r10, %r8
[0,131] . . . . . . . . . D====eeeeeeeeeE-----R . . . . mulxq 24(%rsi), %r13, %r10
[0,132] . . . . . . . . . D==============eE--R . . . . adcxq %r10, %rdi
[0,133] . . . . . . . . . D===============eE-R . . . . adoxq %r13, %r8
[0,134] . . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 32(%rsi), %r13, %r10
[0,135] . . . . . . . . . D===============eER . . . . adoxq %r13, %rdi
[0,136] . . . . . . . . . D================eER . . . . adcxq %r10, %rcx
[0,137] . . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 40(%rsi), %r13, %r10
[0,138] . . . . . . . . . D================eER. . . . adoxq %r13, %rcx
[0,139] . . . . . . . . . D=================eER . . . adcxq %r10, %rax
[0,140] . . . . . . . . . D====eeeeeeeeeE-----R . . . mulxq 48(%rsi), %r13, %r10
[0,141] . . . . . . . . . .D=================eER . . . adoxq %r13, %rax
[0,142] . . . . . . . . . .D==================eER . . . adcxq %r10, %r11
[0,143] . . . . . . . . . .D====eeeeeeeeeE------R . . . mulxq 56(%rsi), %rdx, %r10
[0,144] . . . . . . . . . . D==================eER . . . adoxq %rdx, %r11
[0,145] . . . . . . . . . . D===================eER. . . adcxq %rbx, %r10
[0,146] . . . . . . . . . . DeeeeeE---------------R. . . movq 40(%r12), %rdx
[0,147] . . . . . . . . . . D====================eER . . adoxq %rbx, %r10
[0,148] . . . . . . . . . . D====eeeeeeeeeE-------R . . mulxq (%rsi), %r14, %r13
[0,149] . . . . . . . . . . D---------------------R . . xorl %ebx, %ebx
[0,150] . . . . . . . . . . D============eE-------R . . adoxq %r14, %r15
[0,151] . . . . . . . . . . D============eE------R . . movq %r15, -96(%rbp)
[0,152] . . . . . . . . . . D============eE------R . . adcxq %r13, %r9
[0,153] . . . . . . . . . . D=====eeeeeeeeeE-----R . . mulxq 8(%rsi), %r14, %r13
[0,154] . . . . . . . . . . D============eE-----R . . movq %r9, %r15
[0,155] . . . . . . . . . . D=============eE----R . . adoxq %r14, %r15
[0,156] . . . . . . . . . . D==============eE---R . . adcxq %r13, %r8
[0,157] . . . . . . . . . . .D====eeeeeeeeeE----R . . mulxq 16(%rsi), %r13, %r9
[0,158] . . . . . . . . . . .D==============eE--R . . adoxq %r13, %r8
[0,159] . . . . . . . . . . .D===============eE-R . . adcxq %r9, %rdi
[0,160] . . . . . . . . . . . D====eeeeeeeeeE---R . . mulxq 24(%rsi), %r13, %r9
[0,161] . . . . . . . . . . . D===============eER . . adoxq %r13, %rdi
[0,162] . . . . . . . . . . . D================eER . . adcxq %r9, %rcx
[0,163] . . . . . . . . . . . D====eeeeeeeeeE---R . . mulxq 32(%rsi), %r13, %r9
[0,164] . . . . . . . . . . . D================eER . . adoxq %r13, %rcx
[0,165] . . . . . . . . . . . D=================eER . . adcxq %r9, %rax
[0,166] . . . . . . . . . . . D====eeeeeeeeeE----R . . mulxq 40(%rsi), %r13, %r9
[0,167] . . . . . . . . . . . D=================eER. . adoxq %r13, %rax
[0,168] . . . . . . . . . . . D==================eER . adcxq %r9, %r11
[0,169] . . . . . . . . . . . D====eeeeeeeeeE-----R . mulxq 48(%rsi), %r13, %r9
[0,170] . . . . . . . . . . . D==================eER . adoxq %r13, %r11
[0,171] . . . . . . . . . . . D===================eER . adcxq %r9, %r10
[0,172] . . . . . . . . . . . .D====eeeeeeeeeE------R . mulxq 56(%rsi), %rdx, %r9
[0,173] . . . . . . . . . . . .D===================eER. adoxq %rdx, %r10
[0,174] . . . . . . . . . . . .D====================eER adcxq %rbx, %r9

495
libc/nexgen32e/mul8x8adx.S Normal file
View file

@ -0,0 +1,495 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Computes 1024-bit product of 512-bit and 512-bit numbers.
//
// Instructions: 260
// Total Cycles: 98
// Total uOps: 452
// uOps Per Cycle: 4.61
// IPC: 2.65
// Block RThroughput: 75.3
//
// @param rdi receives 16 quadword result
// @param rsi is left hand side which must have 8 quadwords
// @param rdx is right hand side which must have 8 quadwords
// @note words are host endian while array is little endian
// @mayalias
Mul8x8Adx:
push %rbp
mov %rsp,%rbp
.profilable
sub $104,%rsp
mov %r15,-8(%rbp)
mov %r14,-16(%rbp)
mov %r13,-24(%rbp)
mov %r12,-32(%rbp)
mov %rbx,-40(%rbp)
mov %rdx,%r12
mov (%rdx),%rdx
mulx (%rsi),%rax,%rcx
mov %rdi,-48(%rbp)
mov %rax,-56(%rbp)
mulx 8(%rsi),%rdx,%rax
add %rdx,%rcx
mov (%r12),%rdx
mulx 16(%rsi),%rdx,%rbx
adc %rdx,%rax
mov (%r12),%rdx
mulx 24(%rsi),%rdx,%r11
adc %rdx,%rbx
mov (%r12),%rdx
mulx 32(%rsi),%rdx,%r10
adc %rdx,%r11
mov (%r12),%rdx
mulx 40(%rsi),%rdx,%r9
adc %rdx,%r10
mov (%r12),%rdx
mulx 48(%rsi),%rdx,%r8
adc %rdx,%r9
mov (%r12),%rdx
mulx 56(%rsi),%rdx,%rdi
adc %rdx,%r8
adc $0,%rdi
xor %r13d,%r13d
mov 8(%r12),%rdx
mulx (%rsi),%r15,%r14
adox %r15,%rcx
adcx %r14,%rax
mov %rcx,-64(%rbp)
mulx 8(%rsi),%r14,%rcx
adox %r14,%rax
adcx %rcx,%rbx
mulx 16(%rsi),%r14,%rcx
adox %r14,%rbx
adcx %rcx,%r11
mulx 24(%rsi),%r14,%rcx
adox %r14,%r11
adcx %rcx,%r10
mulx 32(%rsi),%r14,%rcx
adox %r14,%r10
adcx %rcx,%r9
mulx 40(%rsi),%r14,%rcx
adox %r14,%r9
adcx %rcx,%r8
mulx 48(%rsi),%r14,%rcx
adox %r14,%r8
adcx %rcx,%rdi
mulx 56(%rsi),%rdx,%rcx
adox %rdx,%rdi
adcx %r13,%rcx
mov 16(%r12),%rdx
adox %r13,%rcx
mulx (%rsi),%r15,%r14
xor %r13d,%r13d
adox %r15,%rax
adcx %r14,%rbx
mov %rax,-72(%rbp)
mulx 8(%rsi),%r14,%rax
adox %r14,%rbx
adcx %rax,%r11
mulx 16(%rsi),%r14,%rax
adox %r14,%r11
adcx %rax,%r10
mulx 24(%rsi),%r14,%rax
adox %r14,%r10
adcx %rax,%r9
mulx 32(%rsi),%r14,%rax
adox %r14,%r9
adcx %rax,%r8
mulx 40(%rsi),%r14,%rax
adox %r14,%r8
adcx %rax,%rdi
mulx 48(%rsi),%r14,%rax
adox %r14,%rdi
adcx %rax,%rcx
mulx 56(%rsi),%rdx,%rax
adox %rdx,%rcx
adcx %r13,%rax
adox %r13,%rax
xor %r13d,%r13d
mov 24(%r12),%rdx
mulx (%rsi),%r15,%r14
adox %r15,%rbx
adcx %r14,%r11
mov %rbx,-80(%rbp)
mov %r11,%r15
mulx 8(%rsi),%r14,%rbx
adox %r14,%r15
adcx %rbx,%r10
mulx 16(%rsi),%rbx,%r11
adox %rbx,%r10
adcx %r11,%r9
mulx 24(%rsi),%rbx,%r11
adox %rbx,%r9
adcx %r11,%r8
mulx 32(%rsi),%rbx,%r11
adox %rbx,%r8
adcx %r11,%rdi
mulx 40(%rsi),%rbx,%r11
adox %rbx,%rdi
adcx %r11,%rcx
mulx 48(%rsi),%rbx,%r11
adox %rbx,%rcx
adcx %r11,%rax
mulx 56(%rsi),%rdx,%r11
adox %rdx,%rax
adcx %r13,%r11
mov 32(%r12),%rdx
adox %r13,%r11
xor %ebx,%ebx
mulx (%rsi),%r14,%r13
adox %r14,%r15
adcx %r13,%r10
mov %r15,-88(%rbp)
mulx 8(%rsi),%r14,%r13
mov %r10,%r15
adcx %r13,%r9
adox %r14,%r15
mulx 16(%rsi),%r13,%r10
adox %r13,%r9
adcx %r10,%r8
mulx 24(%rsi),%r13,%r10
adcx %r10,%rdi
adox %r13,%r8
mulx 32(%rsi),%r13,%r10
adox %r13,%rdi
adcx %r10,%rcx
mulx 40(%rsi),%r13,%r10
adox %r13,%rcx
adcx %r10,%rax
mulx 48(%rsi),%r13,%r10
adox %r13,%rax
adcx %r10,%r11
mulx 56(%rsi),%rdx,%r10
adox %rdx,%r11
adcx %rbx,%r10
mov 40(%r12),%rdx
adox %rbx,%r10
mulx (%rsi),%r14,%r13
xor %ebx,%ebx
adox %r14,%r15
mov %r15,-96(%rbp)
adcx %r13,%r9
mulx 8(%rsi),%r14,%r13
mov %r9,%r15
adox %r14,%r15
adcx %r13,%r8
mulx 16(%rsi),%r13,%r9
adox %r13,%r8
adcx %r9,%rdi
mulx 24(%rsi),%r13,%r9
adox %r13,%rdi
adcx %r9,%rcx
mulx 32(%rsi),%r13,%r9
adox %r13,%rcx
adcx %r9,%rax
mulx 40(%rsi),%r13,%r9
adox %r13,%rax
adcx %r9,%r11
mulx 48(%rsi),%r13,%r9
adox %r13,%r11
adcx %r9,%r10
mulx 56(%rsi),%rdx,%r9
adox %rdx,%r10
adcx %rbx,%r9
adox %rbx,%r9
xor %ebx,%ebx
mov 48(%r12),%rdx
mulx (%rsi),%r14,%r13
adox %r14,%r15
adcx %r13,%r8
mov %r15,-104(%rbp)
mulx 8(%rsi),%r14,%r13
mov %r8,%r15
adcx %r13,%rdi
adox %r14,%r15
mulx 16(%rsi),%r13,%r8
adox %r13,%rdi
adcx %r8,%rcx
mulx 24(%rsi),%r13,%r8
adox %r13,%rcx
adcx %r8,%rax
mulx 32(%rsi),%r13,%r8
adox %r13,%rax
adcx %r8,%r11
mulx 40(%rsi),%r13,%r8
adox %r13,%r11
adcx %r8,%r10
mulx 48(%rsi),%r13,%r8
adox %r13,%r10
adcx %r8,%r9
mulx 56(%rsi),%rdx,%r8
adox %rdx,%r9
mov 56(%r12),%rdx
adcx %rbx,%r8
mulx (%rsi),%r13,%r12
adox %rbx,%r8
xor %ebx,%ebx
adox %r13,%r15
adcx %r12,%rdi
mulx 8(%rsi),%r13,%r12
adox %r13,%rdi
adcx %r12,%rcx
mulx 16(%rsi),%r13,%r12
adox %r13,%rcx
adcx %r12,%rax
mulx 24(%rsi),%r13,%r12
adox %r13,%rax
adcx %r12,%r11
mulx 32(%rsi),%r13,%r12
adox %r13,%r11
adcx %r12,%r10
mulx 40(%rsi),%r13,%r12
adox %r13,%r10
adcx %r12,%r9
mulx 48(%rsi),%r13,%r12
mulx 56(%rsi),%rsi,%rdx
adox %r13,%r9
adcx %r12,%r8
adox %rsi,%r8
adcx %rbx,%rdx
mov -64(%rbp),%rsi
adox %rbx,%rdx
mov -48(%rbp),%rbx
mov -56(%rbp),%r14
mov %rsi,8(%rbx)
mov -72(%rbp),%rsi
mov %r14,(%rbx)
mov %rsi,16(%rbx)
mov -80(%rbp),%rsi
mov %rsi,24(%rbx)
mov -88(%rbp),%rsi
mov %rsi,32(%rbx)
mov -96(%rbp),%rsi
mov %rsi,40(%rbx)
mov -104(%rbp),%rsi
mov %r15,56(%rbx)
mov %rsi,48(%rbx)
mov %rdi,64(%rbx)
mov %rcx,72(%rbx)
mov %rax,80(%rbx)
mov %r11,88(%rbx)
mov %r10,96(%rbx)
mov %r9,104(%rbx)
mov %r8,112(%rbx)
mov %rdx,120(%rbx)
mov -8(%rbp),%r15
mov -16(%rbp),%r14
mov -24(%rbp),%r13
mov -32(%rbp),%r12
mov -40(%rbp),%rbx
leave
ret
.endfn Mul8x8Adx,globl
.end
TIMELINE VIEW 0123456789 0123456789 0123456789 0123456789
Index 0123456789 0123456789 0123456789 0123456789
[0,0] DeER . . . . . . . . . . . . . . . . subq $104, %rsp
[0,1] DeER . . . . . . . . . . . . . . . . movq %r15, -8(%rbp)
[0,2] D=eER. . . . . . . . . . . . . . . . movq %r14, -16(%rbp)
[0,3] D==eER . . . . . . . . . . . . . . . movq %r13, -24(%rbp)
[0,4] D===eER . . . . . . . . . . . . . . . movq %r12, -32(%rbp)
[0,5] D====eER . . . . . . . . . . . . . . . movq %rbx, -40(%rbp)
[0,6] .DeE---R . . . . . . . . . . . . . . . movq %rdx, %r12
[0,7] .DeeeeeER . . . . . . . . . . . . . . . movq (%rdx), %rdx
[0,8] .D=====eeeeeeeeeER . . . . . . . . . . . . . mulxq (%rsi), %rax, %rcx
[0,9] . D====eE--------R . . . . . . . . . . . . . movq %rdi, -48(%rbp)
[0,10] . D=======eE-----R . . . . . . . . . . . . . movq %rax, -56(%rbp)
[0,11] . D=====eeeeeeeeeER . . . . . . . . . . . . . mulxq 8(%rsi), %rdx, %rax
[0,12] . D============eER . . . . . . . . . . . . . addq %rdx, %rcx
[0,13] . DeeeeeE--------R . . . . . . . . . . . . . movq (%r12), %rdx
[0,14] . D=====eeeeeeeeeER. . . . . . . . . . . . . mulxq 16(%rsi), %rdx, %rbx
[0,15] . D============eER. . . . . . . . . . . . . adcq %rdx, %rax
[0,16] . DeeeeeE--------R. . . . . . . . . . . . . movq (%r12), %rdx
[0,17] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 24(%rsi), %rdx, %r11
[0,18] . D============eER . . . . . . . . . . . . adcq %rdx, %rbx
[0,19] . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
[0,20] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 32(%rsi), %rdx, %r10
[0,21] . .D============eER . . . . . . . . . . . . adcq %rdx, %r11
[0,22] . .DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
[0,23] . .D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 40(%rsi), %rdx, %r9
[0,24] . . D============eER . . . . . . . . . . . . adcq %rdx, %r10
[0,25] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
[0,26] . . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 48(%rsi), %rdx, %r8
[0,27] . . D============eER . . . . . . . . . . . . adcq %rdx, %r9
[0,28] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
[0,29] . . D=====eeeeeeeeeER. . . . . . . . . . . . mulxq 56(%rsi), %rdx, %rdi
[0,30] . . D============eER. . . . . . . . . . . . adcq %rdx, %r8
[0,31] . . D=============eER . . . . . . . . . . . adcq $0, %rdi
[0,32] . . D---------------R . . . . . . . . . . . xorl %r13d, %r13d
[0,33] . . DeeeeeE---------R . . . . . . . . . . . movq 8(%r12), %rdx
[0,34] . . D====eeeeeeeeeER . . . . . . . . . . . mulxq (%rsi), %r15, %r14
[0,35] . . D=======eE-----R . . . . . . . . . . . adoxq %r15, %rcx
[0,36] . . D=============eER . . . . . . . . . . . adcxq %r14, %rax
[0,37] . . .D=======eE-----R . . . . . . . . . . . movq %rcx, -64(%rbp)
[0,38] . . .D====eeeeeeeeeER . . . . . . . . . . . mulxq 8(%rsi), %r14, %rcx
[0,39] . . .D=============eER . . . . . . . . . . . adoxq %r14, %rax
[0,40] . . . D=============eER . . . . . . . . . . . adcxq %rcx, %rbx
[0,41] . . . D====eeeeeeeeeE-R . . . . . . . . . . . mulxq 16(%rsi), %r14, %rcx
[0,42] . . . D==============eER. . . . . . . . . . . adoxq %r14, %rbx
[0,43] . . . D==============eER . . . . . . . . . . adcxq %rcx, %r11
[0,44] . . . D====eeeeeeeeeE--R . . . . . . . . . . mulxq 24(%rsi), %r14, %rcx
[0,45] . . . D===============eER . . . . . . . . . . adoxq %r14, %r11
[0,46] . . . D===============eER . . . . . . . . . . adcxq %rcx, %r10
[0,47] . . . D====eeeeeeeeeE---R . . . . . . . . . . mulxq 32(%rsi), %r14, %rcx
[0,48] . . . D================eER . . . . . . . . . . adoxq %r14, %r10
[0,49] . . . D================eER. . . . . . . . . . adcxq %rcx, %r9
[0,50] . . . D====eeeeeeeeeE----R. . . . . . . . . . mulxq 40(%rsi), %r14, %rcx
[0,51] . . . D=================eER . . . . . . . . . adoxq %r14, %r9
[0,52] . . . .D=================eER . . . . . . . . . adcxq %rcx, %r8
[0,53] . . . .D====eeeeeeeeeE-----R . . . . . . . . . mulxq 48(%rsi), %r14, %rcx
[0,54] . . . .D==================eER . . . . . . . . . adoxq %r14, %r8
[0,55] . . . . D==================eER . . . . . . . . . adcxq %rcx, %rdi
[0,56] . . . . D====eeeeeeeeeE------R . . . . . . . . . mulxq 56(%rsi), %rdx, %rcx
[0,57] . . . . D===================eER. . . . . . . . . adoxq %rdx, %rdi
[0,58] . . . . D===================eER . . . . . . . . adcxq %r13, %rcx
[0,59] . . . . DeeeeeE---------------R . . . . . . . . movq 16(%r12), %rdx
[0,60] . . . . D====================eER . . . . . . . . adoxq %r13, %rcx
[0,61] . . . . D====eeeeeeeeeE-------R . . . . . . . . mulxq (%rsi), %r15, %r14
[0,62] . . . . D---------------------R . . . . . . . . xorl %r13d, %r13d
[0,63] . . . . D=======eE------------R . . . . . . . . adoxq %r15, %rax
[0,64] . . . . D============eE------R . . . . . . . . adcxq %r14, %rbx
[0,65] . . . . D=======eE-----------R . . . . . . . . movq %rax, -72(%rbp)
[0,66] . . . . D====eeeeeeeeeE------R . . . . . . . . mulxq 8(%rsi), %r14, %rax
[0,67] . . . . .D============eE-----R . . . . . . . . adoxq %r14, %rbx
[0,68] . . . . .D=============eE----R . . . . . . . . adcxq %rax, %r11
[0,69] . . . . .D====eeeeeeeeeE-----R . . . . . . . . mulxq 16(%rsi), %r14, %rax
[0,70] . . . . . D=============eE---R . . . . . . . . adoxq %r14, %r11
[0,71] . . . . . D==============eE--R . . . . . . . . adcxq %rax, %r10
[0,72] . . . . . D====eeeeeeeeeE----R . . . . . . . . mulxq 24(%rsi), %r14, %rax
[0,73] . . . . . D==============eE-R . . . . . . . . adoxq %r14, %r10
[0,74] . . . . . D===============eER . . . . . . . . adcxq %rax, %r9
[0,75] . . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 32(%rsi), %r14, %rax
[0,76] . . . . . D===============eER . . . . . . . . adoxq %r14, %r9
[0,77] . . . . . D================eER . . . . . . . . adcxq %rax, %r8
[0,78] . . . . . D====eeeeeeeeeE----R . . . . . . . . mulxq 40(%rsi), %r14, %rax
[0,79] . . . . . D================eER. . . . . . . . adoxq %r14, %r8
[0,80] . . . . . D=================eER . . . . . . . adcxq %rax, %rdi
[0,81] . . . . . D====eeeeeeeeeE-----R . . . . . . . mulxq 48(%rsi), %r14, %rax
[0,82] . . . . . .D=================eER . . . . . . . adoxq %r14, %rdi
[0,83] . . . . . .D==================eER . . . . . . . adcxq %rax, %rcx
[0,84] . . . . . .D====eeeeeeeeeE------R . . . . . . . mulxq 56(%rsi), %rdx, %rax
[0,85] . . . . . . D==================eER . . . . . . . adoxq %rdx, %rcx
[0,86] . . . . . . D===================eER. . . . . . . adcxq %r13, %rax
[0,87] . . . . . . D====================eER . . . . . . adoxq %r13, %rax
[0,88] . . . . . . D----------------------R . . . . . . xorl %r13d, %r13d
[0,89] . . . . . . DeeeeeE----------------R . . . . . . movq 24(%r12), %rdx
[0,90] . . . . . . D====eeeeeeeeeE-------R . . . . . . mulxq (%rsi), %r15, %r14
[0,91] . . . . . . D===========eE--------R . . . . . . adoxq %r15, %rbx
[0,92] . . . . . . D=============eE------R . . . . . . adcxq %r14, %r11
[0,93] . . . . . . D===========eE-------R . . . . . . movq %rbx, -80(%rbp)
[0,94] . . . . . . D=============eE-----R . . . . . . movq %r11, %r15
[0,95] . . . . . . D====eeeeeeeeeE------R . . . . . . mulxq 8(%rsi), %r14, %rbx
[0,96] . . . . . . D=============eE----R . . . . . . adoxq %r14, %r15
[0,97] . . . . . . D==============eE---R . . . . . . adcxq %rbx, %r10
[0,98] . . . . . . D====eeeeeeeeeE-----R . . . . . . mulxq 16(%rsi), %rbx, %r11
[0,99] . . . . . . .D==============eE--R . . . . . . adoxq %rbx, %r10
[0,100] . . . . . . .D===============eE-R . . . . . . adcxq %r11, %r9
[0,101] . . . . . . .D====eeeeeeeeeE----R . . . . . . mulxq 24(%rsi), %rbx, %r11
[0,102] . . . . . . . D===============eER . . . . . . adoxq %rbx, %r9
[0,103] . . . . . . . D================eER . . . . . . adcxq %r11, %r8
[0,104] . . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 32(%rsi), %rbx, %r11
[0,105] . . . . . . . D================eER . . . . . . adoxq %rbx, %r8
[0,106] . . . . . . . D=================eER . . . . . . adcxq %r11, %rdi
[0,107] . . . . . . . D====eeeeeeeeeE-----R . . . . . . mulxq 40(%rsi), %rbx, %r11
[0,108] . . . . . . . D=================eER. . . . . . adoxq %rbx, %rdi
[0,109] . . . . . . . D==================eER . . . . . adcxq %r11, %rcx
[0,110] . . . . . . . D====eeeeeeeeeE------R . . . . . mulxq 48(%rsi), %rbx, %r11
[0,111] . . . . . . . D==================eER . . . . . adoxq %rbx, %rcx
[0,112] . . . . . . . D===================eER . . . . . adcxq %r11, %rax
[0,113] . . . . . . . D====eeeeeeeeeE-------R . . . . . mulxq 56(%rsi), %rdx, %r11
[0,114] . . . . . . . .D===================eER . . . . . adoxq %rdx, %rax
[0,115] . . . . . . . .D====================eER. . . . . adcxq %r13, %r11
[0,116] . . . . . . . .DeeeeeE----------------R. . . . . movq 32(%r12), %rdx
[0,117] . . . . . . . .D=====================eER . . . . adoxq %r13, %r11
[0,118] . . . . . . . .D=====E-----------------R . . . . xorl %ebx, %ebx
[0,119] . . . . . . . . D====eeeeeeeeeE--------R . . . . mulxq (%rsi), %r14, %r13
[0,120] . . . . . . . . D===========eE---------R . . . . adoxq %r14, %r15
[0,121] . . . . . . . . D=============eE-------R . . . . adcxq %r13, %r10
[0,122] . . . . . . . . D===========eE--------R . . . . movq %r15, -88(%rbp)
[0,123] . . . . . . . . D====eeeeeeeeeE-------R . . . . mulxq 8(%rsi), %r14, %r13
[0,124] . . . . . . . . D=============eE------R . . . . movq %r10, %r15
[0,125] . . . . . . . . D============eE------R . . . . adcxq %r13, %r9
[0,126] . . . . . . . . D=============eE-----R . . . . adoxq %r14, %r15
[0,127] . . . . . . . . D====eeeeeeeeeE------R . . . . mulxq 16(%rsi), %r13, %r10
[0,128] . . . . . . . . D=============eE----R . . . . adoxq %r13, %r9
[0,129] . . . . . . . . D==============eE---R . . . . adcxq %r10, %r8
[0,130] . . . . . . . . D====eeeeeeeeeE-----R . . . . mulxq 24(%rsi), %r13, %r10
[0,131] . . . . . . . . .D==============eE--R . . . . adcxq %r10, %rdi
[0,132] . . . . . . . . .D===============eE-R . . . . adoxq %r13, %r8
[0,133] . . . . . . . . .D====eeeeeeeeeE----R . . . . mulxq 32(%rsi), %r13, %r10
[0,134] . . . . . . . . . D===============eER . . . . adoxq %r13, %rdi
[0,135] . . . . . . . . . D================eER . . . . adcxq %r10, %rcx
[0,136] . . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 40(%rsi), %r13, %r10
[0,137] . . . . . . . . . D================eER . . . . adoxq %r13, %rcx
[0,138] . . . . . . . . . D=================eER . . . . adcxq %r10, %rax
[0,139] . . . . . . . . . D====eeeeeeeeeE-----R . . . . mulxq 48(%rsi), %r13, %r10
[0,140] . . . . . . . . . D=================eER. . . . adoxq %r13, %rax
[0,141] . . . . . . . . . D==================eER . . . adcxq %r10, %r11
[0,142] . . . . . . . . . D====eeeeeeeeeE------R . . . mulxq 56(%rsi), %rdx, %r10
[0,143] . . . . . . . . . D==================eER . . . adoxq %rdx, %r11
[0,144] . . . . . . . . . D===================eER . . . adcxq %rbx, %r10
[0,145] . . . . . . . . . DeeeeeE---------------R . . . movq 40(%r12), %rdx
[0,146] . . . . . . . . . D====================eER . . . adoxq %rbx, %r10
[0,147] . . . . . . . . . .D====eeeeeeeeeE-------R . . . mulxq (%rsi), %r14, %r13
[0,148] . . . . . . . . . .D---------------------R . . . xorl %ebx, %ebx
[0,149] . . . . . . . . . .D============eE-------R . . . adoxq %r14, %r15
[0,150] . . . . . . . . . . D============eE------R . . . movq %r15, -96(%rbp)
[0,151] . . . . . . . . . . D============eE------R . . . adcxq %r13, %r9
[0,152] . . . . . . . . . . D=====eeeeeeeeeE-----R . . . mulxq 8(%rsi), %r14, %r13
[0,153] . . . . . . . . . . D============eE-----R . . . movq %r9, %r15
[0,154] . . . . . . . . . . D=============eE----R . . . adoxq %r14, %r15
[0,155] . . . . . . . . . . D==============eE---R . . . adcxq %r13, %r8
[0,156] . . . . . . . . . . D====eeeeeeeeeE----R . . . mulxq 16(%rsi), %r13, %r9
[0,157] . . . . . . . . . . D==============eE--R . . . adoxq %r13, %r8
[0,158] . . . . . . . . . . D===============eE-R . . . adcxq %r9, %rdi
[0,159] . . . . . . . . . . D====eeeeeeeeeE---R . . . mulxq 24(%rsi), %r13, %r9
[0,160] . . . . . . . . . . D===============eER . . . adoxq %r13, %rdi
[0,161] . . . . . . . . . . D================eER. . . adcxq %r9, %rcx
[0,162] . . . . . . . . . . .D====eeeeeeeeeE---R. . . mulxq 32(%rsi), %r13, %r9
[0,163] . . . . . . . . . . .D================eER . . adoxq %r13, %rcx
[0,164] . . . . . . . . . . .D=================eER . . adcxq %r9, %rax
[0,165] . . . . . . . . . . . D====eeeeeeeeeE----R . . mulxq 40(%rsi), %r13, %r9
[0,166] . . . . . . . . . . . D=================eER . . adoxq %r13, %rax
[0,167] . . . . . . . . . . . D==================eER . . adcxq %r9, %r11
[0,168] . . . . . . . . . . . D====eeeeeeeeeE-----R . . mulxq 48(%rsi), %r13, %r9
[0,169] . . . . . . . . . . . D==================eER. . adoxq %r13, %r11
[0,170] . . . . . . . . . . . D===================eER . adcxq %r9, %r10
[0,171] . . . . . . . . . . . D====eeeeeeeeeE------R . mulxq 56(%rsi), %rdx, %r9
[0,172] . . . . . . . . . . . D===================eER . adoxq %rdx, %r10
[0,173] . . . . . . . . . . . D====================eER . adcxq %rbx, %r9
[0,174] . . . . . . . . . . . D====================eER. adoxq %rbx, %r9
[0,175] . . . . . . . . . . . D----------------------R. xorl %ebx, %ebx
[0,176] . . . . . . . . . . . DeeeeeE----------------R. movq 48(%r12), %rdx
[0,177] . . . . . . . . . . . .D=====eeeeeeeeeE------R. mulxq (%rsi), %r14, %r13
[0,178] . . . . . . . . . . . .D==========eE---------R. adoxq %r14, %r15
[0,179] . . . . . . . . . . . .D==============eE-----R. adcxq %r13, %r8
[0,180] . . . . . . . . . . . . D==========eE--------R. movq %r15, -104(%rbp)
[0,181] . . . . . . . . . . . . D=====eeeeeeeeeE-----R. mulxq 8(%rsi), %r14, %r13
[0,182] . . . . . . . . . . . . D==============eE----R. movq %r8, %r15
[0,183] . . . . . . . . . . . . D==============eE---R. adcxq %r13, %rdi
[0,184] . . . . . . . . . . . . D===============eE--R. adoxq %r14, %r15
[0,185] . . . . . . . . . . . . D=====eeeeeeeeeE----R. mulxq 16(%rsi), %r13, %r8
[0,186] . . . . . . . . . . . . D===============eE-R. adoxq %r13, %rdi
[0,187] . . . . . . . . . . . . D================eER. adcxq %r8, %rcx
[0,188] . . . . . . . . . . . . D=====eeeeeeeeeE---R. mulxq 24(%rsi), %r13, %r8
[0,189] . . . . . . . . . . . . D================eER adoxq %r13, %rcx

View file

@ -1,41 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Computes C = A - B
//
// Aliasing such as sbb(A,A,B) or sbb(B,A,B) is OK.
//
// @param rdi is C
// @param rsi is A
// @param rdx is B
// @param rcx is number of subtracts
// @return al is carry
sbb: .leafprologue
test %ecx,%ecx
jz 1f
xor %r9d,%r9d
0: mov (%rsi,%r9,8),%rax
sbb (%rdx,%r9,8),%rax
mov %rax,(%rdi,%r9,8)
inc %r9d
loop 0b
1: setb %al
.leafepilogue
.endfn sbb,globl

View file

@ -39,6 +39,7 @@
#include "third_party/mbedtls/des.h"
#include "third_party/mbedtls/dhm.h"
#include "third_party/mbedtls/ecp.h"
#include "third_party/mbedtls/ecp_internal.h"
#include "third_party/mbedtls/entropy.h"
#include "third_party/mbedtls/error.h"
#include "third_party/mbedtls/gcm.h"
@ -148,17 +149,17 @@ static void P256_MPI(mbedtls_mpi *N) {
static void P256_JUSTINE(mbedtls_mpi *N) {
memcpy(N->p, rng, 8 * 8);
ecp_mod_p256(N);
secp256r1(N->p);
}
static void P384_MPI(mbedtls_mpi *N) {
memcpy(N->p, rng, 8 * 8);
memcpy(N->p, rng, 12 * 8);
ASSERT_EQ(0, mbedtls_mpi_mod_mpi(N, N, &grp.P));
}
static void P384_JUSTINE(mbedtls_mpi *N) {
memcpy(N->p, rng, 8 * 8);
ecp_mod_p384(N);
memcpy(N->p, rng, 12 * 8);
secp384r1(N->p);
}
BENCH(p256, bench) {
@ -166,6 +167,7 @@ BENCH(p256, bench) {
mbedtls_ecp_group_init(&grp);
mbedtls_ecp_group_load(&grp, MBEDTLS_ECP_DP_SECP256R1);
mbedtls_mpi x = {1, 8, gc(calloc(8, 8))};
rngset(x.p, 8 * 8, rand64, -1);
EZBENCH2("P-256 modulus MbedTLS MPI lib", donothing, P256_MPI(&x));
EZBENCH2("P-256 modulus Justine rewrite", donothing, P256_JUSTINE(&x));
mbedtls_ecp_group_free(&grp);
@ -176,10 +178,10 @@ BENCH(p384, bench) {
#ifdef MBEDTLS_ECP_C
mbedtls_ecp_group_init(&grp);
mbedtls_ecp_group_load(&grp, MBEDTLS_ECP_DP_SECP384R1);
uint64_t y[12];
mbedtls_mpi x = {1, 12, gc(calloc(12, 8))};
EZBENCH2("P-384 modulus MbedTLS MPI lib", donothing, P384_MPI(&x));
EZBENCH2("P-384 modulus Justine rewrite", donothing, P384_JUSTINE(&x));
rngset(x.p, 12 * 8, rand64, -1);
mbedtls_ecp_group_free(&grp);
#endif
}
@ -1112,3 +1114,49 @@ BENCH(cmpint, bench) {
EZBENCH2("cmpint 3.1", donothing, mbedtls_mpi_cmp_int(&z, 0));
EZBENCH2("cmpint 3.2", donothing, mbedtls_mpi_cmp_int(&z, 1));
}
mbedtls_mpi_uint F1(mbedtls_mpi_uint *d, const mbedtls_mpi_uint *a,
const mbedtls_mpi_uint *b, size_t n) {
size_t i;
unsigned char cf;
mbedtls_mpi_uint c, x;
cf = c = i = 0;
for (; i < n; ++i) SBB(d[i], a[i], b[i], c, c);
return c;
}
mbedtls_mpi_uint F2(mbedtls_mpi_uint *d, const mbedtls_mpi_uint *a,
const mbedtls_mpi_uint *b, size_t n) {
size_t i;
unsigned char cf;
mbedtls_mpi_uint c, x;
cf = c = i = 0;
asm volatile("xor\t%1,%1\n\t"
".align\t16\n1:\t"
"mov\t(%5,%3,8),%1\n\t"
"sbb\t(%6,%3,8),%1\n\t"
"mov\t%1,(%4,%3,8)\n\t"
"lea\t1(%3),%3\n\t"
"dec\t%2\n\t"
"jnz\t1b"
: "=@ccb"(cf), "=&r"(x), "+c"(n), "=r"(i)
: "r"(d), "r"(a), "r"(b), "3"(0)
: "cc", "memory");
return cf;
}
TEST(wut, wut) {
uint64_t A[8];
uint64_t B[8];
uint64_t C[8];
uint64_t D[8];
int i;
for (i = 0; i < 1000; ++i) {
rngset(A, sizeof(A), rand64, -1);
rngset(B, sizeof(B), rand64, -1);
int x = F1(C, A, B, 8);
int y = F2(D, A, B, 8);
ASSERT_EQ(x, y);
ASSERT_EQ(0, memcmp(C, D, sizeof(C)));
}
}

File diff suppressed because it is too large Load diff

View file

@ -80,17 +80,17 @@
#ifndef TINY
#define MBEDTLS_ECP_DP_SECP256R1_ENABLED
#define MBEDTLS_ECP_DP_SECP384R1_ENABLED
#define MBEDTLS_ECP_DP_SECP521R1_ENABLED
#define MBEDTLS_ECP_DP_CURVE25519_ENABLED
#define MBEDTLS_ECP_DP_CURVE448_ENABLED
/*#define MBEDTLS_ECP_DP_SECP192R1_ENABLED*/
/*#define MBEDTLS_ECP_DP_SECP224R1_ENABLED*/
/*#define MBEDTLS_ECP_DP_SECP521R1_ENABLED*/
/*#define MBEDTLS_ECP_DP_SECP192K1_ENABLED*/
/*#define MBEDTLS_ECP_DP_SECP224K1_ENABLED*/
/*#define MBEDTLS_ECP_DP_SECP256K1_ENABLED*/
/*#define MBEDTLS_ECP_DP_BP256R1_ENABLED*/
/*#define MBEDTLS_ECP_DP_BP384R1_ENABLED*/
/*#define MBEDTLS_ECP_DP_BP512R1_ENABLED*/
/*#define MBEDTLS_ECP_DP_CURVE448_ENABLED*/
#endif
#define MBEDTLS_X509_CHECK_KEY_USAGE

View file

@ -1,8 +1,8 @@
#ifndef MBEDTLS_ECDH_H
#define MBEDTLS_ECDH_H
#include "third_party/mbedtls/config.h"
#include "third_party/mbedtls/ecdh_everest.h"
#include "third_party/mbedtls/ecp.h"
#include "third_party/mbedtls/everest.h"
/* clang-format off */
#ifdef __cplusplus

279
third_party/mbedtls/ecdh_everest.c vendored Normal file
View file

@ -0,0 +1,279 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:4;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright The Mbed TLS Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 │
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "third_party/mbedtls/ecdh_everest.h"
#include "third_party/mbedtls/everest.h"
#if defined(MBEDTLS_ECDH_C) && defined(MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED)
#define KEYSIZE 32
asm(".ident\t\"\\n\\n\
Mbed TLS (Apache 2.0)\\n\
Copyright ARM Limited\\n\
Copyright Mbed TLS Contributors\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
/**
* \brief This function sets up the ECDH context with the information
* given.
*
* This function should be called after mbedtls_ecdh_init() but
* before mbedtls_ecdh_make_params(). There is no need to call
* this function before mbedtls_ecdh_read_params().
*
* This is the first function used by a TLS server for
* ECDHE ciphersuites.
*
* \param ctx The ECDH context to set up.
* \param grp_id The group id of the group to set up the context for.
*
* \return \c 0 on success.
*/
int mbedtls_everest_setup(mbedtls_ecdh_context_everest *ctx, int grp_id)
{
if (grp_id != MBEDTLS_ECP_DP_CURVE25519)
return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
mbedtls_platform_zeroize(ctx, sizeof(*ctx));
return 0;
}
/**
* \brief This function frees a context.
*
* \param ctx The context to free.
*/
void mbedtls_everest_free(mbedtls_ecdh_context_everest *ctx)
{
if (!ctx) return;
mbedtls_platform_zeroize(ctx, sizeof(*ctx));
}
/**
* \brief This function generates a public key and a TLS
* ServerKeyExchange payload.
*
* This is the second function used by a TLS server for ECDHE
* ciphersuites. (It is called after mbedtls_ecdh_setup().)
*
* \note This function assumes that the ECP group (grp) of the
* \p ctx context has already been properly set,
* for example, using mbedtls_ecp_group_load().
*
* \see ecp.h
*
* \param ctx The ECDH context.
* \param olen The number of characters written.
* \param buf The destination buffer.
* \param blen The length of the destination buffer.
* \param f_rng The RNG function.
* \param p_rng The RNG context.
*
* \return \c 0 on success.
* \return An \c MBEDTLS_ERR_ECP_XXX error code on failure.
*/
int mbedtls_everest_make_params(mbedtls_ecdh_context_everest *ctx, size_t *olen,
unsigned char *buf, size_t blen,
int (*f_rng)(void *, unsigned char *, size_t),
void *p_rng)
{
int ret = 0;
uint8_t base[KEYSIZE] = {9};
if ((ret = f_rng(p_rng, ctx->our_secret, KEYSIZE)) != 0) return ret;
*olen = KEYSIZE + 4;
if (blen < *olen) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL;
*buf++ = MBEDTLS_ECP_TLS_NAMED_CURVE;
*buf++ = MBEDTLS_ECP_TLS_CURVE25519 >> 8;
*buf++ = MBEDTLS_ECP_TLS_CURVE25519 & 0xFF;
*buf++ = KEYSIZE;
curve25519(buf, ctx->our_secret, base);
base[0] = 0;
if (!timingsafe_memcmp(buf, base, KEYSIZE))
return MBEDTLS_ERR_ECP_RANDOM_FAILED;
return 0;
}
/**
* \brief This function parses and processes a TLS ServerKeyExhange
* payload.
*
* This is the first function used by a TLS client for ECDHE
* ciphersuites.
*
* \see ecp.h
*
* \param ctx The ECDH context.
* \param buf The pointer to the start of the input buffer.
* \param end The address for one Byte past the end of the buffer.
*
* \return \c 0 on success.
* \return An \c MBEDTLS_ERR_ECP_XXX error code on failure.
*/
int mbedtls_everest_read_params(mbedtls_ecdh_context_everest *ctx,
const unsigned char **buf,
const unsigned char *end)
{
if (end - *buf < KEYSIZE + 1) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
if ((*(*buf)++ != KEYSIZE)) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
memcpy(ctx->peer_point, *buf, KEYSIZE);
*buf += KEYSIZE;
return 0;
}
/**
* \brief This function sets up an ECDH context from an EC key.
*
* It is used by clients and servers in place of the
* ServerKeyEchange for static ECDH, and imports ECDH
* parameters from the EC key information of a certificate.
*
* \see ecp.h
*
* \param ctx The ECDH context to set up.
* \param key The EC key to use.
* \param side Defines the source of the key: 1: Our key, or
* 0: The key of the peer.
*
* \return \c 0 on success.
* \return An \c MBEDTLS_ERR_ECP_XXX error code on failure.
*/
int mbedtls_everest_get_params(mbedtls_ecdh_context_everest *ctx,
const mbedtls_ecp_keypair *key,
mbedtls_everest_ecdh_side side)
{
size_t olen = 0;
mbedtls_everest_ecdh_side s;
switch (side)
{
case MBEDTLS_EVEREST_ECDH_THEIRS:
return mbedtls_ecp_point_write_binary(&key->grp, &key->Q,
MBEDTLS_ECP_PF_COMPRESSED, &olen,
ctx->peer_point, KEYSIZE);
case MBEDTLS_EVEREST_ECDH_OURS:
return mbedtls_mpi_write_binary_le(&key->d, ctx->our_secret, KEYSIZE);
default:
return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
}
}
/**
* \brief This function generates a public key and a TLS
* ClientKeyExchange payload.
*
* This is the second function used by a TLS client for ECDH(E)
* ciphersuites.
*
* \see ecp.h
*
* \param ctx The ECDH context.
* \param olen The number of Bytes written.
* \param buf The destination buffer.
* \param blen The size of the destination buffer.
* \param f_rng The RNG function.
* \param p_rng The RNG context.
*
* \return \c 0 on success.
* \return An \c MBEDTLS_ERR_ECP_XXX error code on failure.
*/
int mbedtls_everest_make_public(mbedtls_ecdh_context_everest *ctx, size_t *olen,
unsigned char *buf, size_t blen,
int (*f_rng)(void *, unsigned char *, size_t),
void *p_rng)
{
int ret = 0;
unsigned char base[KEYSIZE] = {9};
if (!ctx) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
if ((ret = f_rng(p_rng, ctx->our_secret, KEYSIZE))) return ret;
*olen = KEYSIZE + 1;
if (blen < *olen) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL;
*buf++ = KEYSIZE;
curve25519(buf, ctx->our_secret, base);
base[0] = 0;
if (!timingsafe_memcmp(buf, base, KEYSIZE))
return MBEDTLS_ERR_ECP_RANDOM_FAILED;
return ret;
}
/**
* \brief This function parses and processes a TLS ClientKeyExchange
* payload.
*
* This is the third function used by a TLS server for ECDH(E)
* ciphersuites. (It is called after mbedtls_ecdh_setup() and
* mbedtls_ecdh_make_params().)
*
* \see ecp.h
*
* \param ctx The ECDH context.
* \param buf The start of the input buffer.
* \param blen The length of the input buffer.
*
* \return \c 0 on success.
* \return An \c MBEDTLS_ERR_ECP_XXX error code on failure.
*/
int mbedtls_everest_read_public(mbedtls_ecdh_context_everest *ctx,
const unsigned char *buf, size_t blen)
{
if (blen < KEYSIZE + 1) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL;
if ((*buf++ != KEYSIZE)) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
memcpy(ctx->peer_point, buf, KEYSIZE);
return 0;
}
/**
* \brief This function derives and exports the shared secret.
*
* This is the last function used by both TLS client
* and servers.
*
* \note If \p f_rng is not NULL, it is used to implement
* countermeasures against side-channel attacks.
* For more information, see mbedtls_ecp_mul().
*
* \see ecp.h
*
* \param ctx The ECDH context.
* \param olen The number of Bytes written.
* \param buf The destination buffer.
* \param blen The length of the destination buffer.
* \param f_rng The RNG function.
* \param p_rng The RNG context.
*
* \return \c 0 on success.
* \return An \c MBEDTLS_ERR_ECP_XXX error code on failure.
*/
int mbedtls_everest_calc_secret(mbedtls_ecdh_context_everest *ctx, size_t *olen,
unsigned char *buf, size_t blen,
int (*f_rng)(void *, unsigned char *, size_t),
void *p_rng)
{
/* f_rng and p_rng are not used here because this implementation does not
need blinding since it has constant trace. (todo(jart): wut?) */
*olen = KEYSIZE;
if (blen < *olen) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL;
curve25519(buf, ctx->our_secret, ctx->peer_point);
if (!timingsafe_memcmp(buf, ctx->our_secret, KEYSIZE)) goto wut;
/* Wipe the DH secret and don't let the peer chose a small subgroup point */
mbedtls_platform_zeroize(ctx->our_secret, KEYSIZE);
if (!timingsafe_memcmp(buf, ctx->our_secret, KEYSIZE)) goto wut;
return 0;
wut:
mbedtls_platform_zeroize(buf, KEYSIZE);
mbedtls_platform_zeroize(ctx->our_secret, KEYSIZE);
return MBEDTLS_ERR_ECP_RANDOM_FAILED;
}
#endif

43
third_party/mbedtls/ecdh_everest.h vendored Normal file
View file

@ -0,0 +1,43 @@
#ifndef COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_
#define COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_
#include "third_party/mbedtls/config.h"
#include "third_party/mbedtls/ecp.h"
COSMOPOLITAN_C_START_
#define MBEDTLS_ECP_TLS_CURVE25519 0x1d
#define MBEDTLS_X25519_KEY_SIZE_BYTES 32
typedef enum {
MBEDTLS_EVEREST_ECDH_OURS,
MBEDTLS_EVEREST_ECDH_THEIRS,
} mbedtls_everest_ecdh_side;
typedef struct {
unsigned char our_secret[MBEDTLS_X25519_KEY_SIZE_BYTES];
unsigned char peer_point[MBEDTLS_X25519_KEY_SIZE_BYTES];
} mbedtls_ecdh_context_everest;
int mbedtls_everest_setup(mbedtls_ecdh_context_everest *, int);
void mbedtls_everest_free(mbedtls_ecdh_context_everest *);
int mbedtls_everest_make_params(mbedtls_ecdh_context_everest *, size_t *,
unsigned char *, size_t,
int (*)(void *, unsigned char *, size_t),
void *);
int mbedtls_everest_read_params(mbedtls_ecdh_context_everest *,
const unsigned char **, const unsigned char *);
int mbedtls_everest_get_params(mbedtls_ecdh_context_everest *,
const mbedtls_ecp_keypair *,
mbedtls_everest_ecdh_side);
int mbedtls_everest_make_public(mbedtls_ecdh_context_everest *, size_t *,
unsigned char *, size_t,
int (*)(void *, unsigned char *, size_t),
void *);
int mbedtls_everest_read_public(mbedtls_ecdh_context_everest *,
const unsigned char *, size_t);
int mbedtls_everest_calc_secret(mbedtls_ecdh_context_everest *, size_t *,
unsigned char *, size_t,
int (*)(void *, unsigned char *, size_t),
void *);
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_ */

View file

@ -28,31 +28,12 @@ Mbed TLS (Apache 2.0)\\n\
Copyright ARM Limited\\n\
Copyright Mbed TLS Contributors\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
/*
* Elliptic curve DSA
*
* Copyright The Mbed TLS Contributors
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* References:
/**
* @fileoverview Elliptic curve Digital Signature Algorithm
*
* SEC1 http://www.secg.org/index.php?action=secg,docs_secg
* @see SEC1 http://www.secg.org/index.php?action=secg,docs_secg
*/
#if defined(MBEDTLS_ECDSA_C)

View file

@ -511,12 +511,15 @@ static const mbedtls_ecp_curve_info ecp_supported_curves[] =
#if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED)
{ MBEDTLS_ECP_DP_CURVE25519, 29, 256, "x25519" },
#endif
#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
{ MBEDTLS_ECP_DP_SECP256R1, 23, 256, "secp256r1" },
#endif
#if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
{ MBEDTLS_ECP_DP_SECP384R1, 24, 384, "secp384r1" },
#endif
#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
{ MBEDTLS_ECP_DP_CURVE448, 30, 448, "x448" },
#endif
#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
{ MBEDTLS_ECP_DP_SECP256R1, 23, 256, "secp256r1" },
#endif
#if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
{ MBEDTLS_ECP_DP_SECP521R1, 25, 521, "secp521r1" },
#endif
@ -543,9 +546,6 @@ static const mbedtls_ecp_curve_info ecp_supported_curves[] =
#endif
#if defined(MBEDTLS_ECP_DP_SECP192K1_ENABLED)
{ MBEDTLS_ECP_DP_SECP192K1, 18, 192, "secp192k1" },
#endif
#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
{ MBEDTLS_ECP_DP_CURVE448, 30, 448, "x448" },
#endif
{ MBEDTLS_ECP_DP_NONE, 0, 0, NULL },
};

View file

@ -38,32 +38,15 @@ mbedtls_p256_isz( uint64_t p[4] )
static inline bool
mbedtls_p256_gte( uint64_t p[5] )
{
return( (p[4] ||
p[3] > 0xffffffff00000001 ||
return( ((int64_t)p[4] > 0 ||
(p[3] > 0xffffffff00000001 ||
(p[3] == 0xffffffff00000001 &&
p[2] > 0x0000000000000000 ||
(p[2] > 0x0000000000000000 ||
(p[2] == 0x0000000000000000 &&
p[1] > 0x00000000ffffffff ||
(p[1] > 0x00000000ffffffff ||
(p[1] == 0x00000000ffffffff &&
p[0] > 0xffffffffffffffff ||
(p[0] == 0xffffffffffffffff))))) );
}
static int
mbedtls_p256_cmp( const uint64_t a[5],
const uint64_t b[5] )
{
if( a[4] < b[4] ) return -1;
if( a[4] > b[4] ) return 1;
if( a[3] < b[3] ) return -1;
if( a[3] > b[3] ) return 1;
if( a[2] < b[2] ) return -1;
if( a[2] > b[2] ) return 1;
if( a[1] < b[1] ) return -1;
if( a[1] > b[1] ) return 1;
if( a[0] < b[0] ) return -1;
if( a[0] > b[0] ) return 1;
return 0;
(p[0] > 0xffffffffffffffff ||
(p[0] == 0xffffffffffffffff))))))))) );
}
static inline void
@ -119,125 +102,49 @@ mbedtls_p256_rum( uint64_t p[5] )
mbedtls_p256_red( p );
}
static void
mbedtls_p256_mod(uint64_t X[8])
{
secp256r1(X);
if ((int64_t)X[4] < 0) {
do {
mbedtls_p256_gro(X);
} while ((int64_t)X[4] < 0);
} else {
while (mbedtls_p256_gte(X)) {
mbedtls_p256_red(X);
}
}
}
static inline void
mbedtls_p256_sar( uint64_t p[5] )
{
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
asm("sarq\t32+%0\n\t"
"rcrq\t24+%0\n\t"
"rcrq\t16+%0\n\t"
"rcrq\t8+%0\n\t"
"rcrq\t%0\n\t"
: "+o"(*p)
: /* no inputs */
: "memory", "cc");
#else
p[0] = p[0] >> 1 | p[1] << 63;
p[1] = p[1] >> 1 | p[2] << 63;
p[2] = p[2] >> 1 | p[3] << 63;
p[3] = p[3] >> 1 | p[4] << 63;
p[4] = (int64_t)p[4] >> 1;
#endif
}
static inline void
mbedtls_p256_shl( uint64_t p[5] )
{
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
asm("shlq\t%0\n\t"
"rclq\t8+%0\n\t"
"rclq\t16+%0\n\t"
"rclq\t24+%0\n\t"
"rclq\t32+%0\n\t"
: "+o"(*p)
: /* no inputs */
: "memory", "cc");
#else
p[4] = p[3] >> 63;
p[3] = p[3] << 1 | p[2] >> 63;
p[2] = p[2] << 1 | p[1] >> 63;
p[1] = p[1] << 1 | p[0] >> 63;
p[0] = p[0] << 1;
#endif
mbedtls_p256_rum( p );
}
static inline void
mbedtls_p256_jam( uint64_t p[5] )
{
secp256r1( p );
if( (int64_t)p[4] < 0 )
do
mbedtls_p256_gro( p );
while( (int64_t)p[4] < 0 );
else
mbedtls_p256_rum( p );
}
static void
mbedtls_p256_mul_1x1( uint64_t X[8],
const uint64_t A[4], size_t n,
const uint64_t B[4], size_t m )
{
uint128_t t;
t = A[0];
t *= B[0];
X[ 0] = t;
X[ 1] = t >> 64;
X[ 2] = 0;
X[ 3] = 0;
X[ 4] = 0;
X[ 5] = 0;
X[ 6] = 0;
X[ 7] = 0;
}
static void
mbedtls_p256_mul_nx1( uint64_t X[8],
const uint64_t A[4], size_t n,
const uint64_t B[4], size_t m )
{
mbedtls_mpi_mul_hlp1(n, A, X, B[0]);
mbedtls_platform_zeroize( X + n + m, ( 8 - n - m ) * 8 );
if ( n + m >= 4 )
mbedtls_p256_jam( X );
}
static void
mbedtls_p256_mul_4x4( uint64_t X[8],
const uint64_t A[4], size_t n,
const uint64_t B[4], size_t m )
{
Mul4x4( X, A, B );
mbedtls_p256_jam( X );
}
static void
mbedtls_p256_mul_nxm( uint64_t X[8],
const uint64_t A[4], size_t n,
const uint64_t B[4], size_t m )
{
if (A == X) A = gc(memcpy(malloc(4 * 8), A, 4 * 8));
if (B == X) B = gc(memcpy(malloc(4 * 8), B, 4 * 8));
Mul( X, A, n, B, m );
mbedtls_platform_zeroize( X + n + m, (8 - n - m) * 8 );
if ( n + m >= 4 )
mbedtls_p256_jam( X );
}
static void
mbedtls_p256_mul( uint64_t X[8],
const uint64_t A[4], size_t n,
const uint64_t B[4], size_t m )
{
if( n == 4 && m == 4 )
mbedtls_p256_mul_4x4( X, A, n, B, m );
else if( m == 1 && n == 1 )
mbedtls_p256_mul_1x1( X, A, n, B, m );
else if( m == 1 )
mbedtls_p256_mul_nx1( X, A, n, B, m );
else
mbedtls_p256_mul_nxm( X, A, n, B, m );
Mul4x4( X, A, B );
mbedtls_p256_mod( X );
}
static void

View file

@ -36,42 +36,20 @@ mbedtls_p384_isz( uint64_t p[6] )
}
static inline bool
mbedtls_p384_gte( uint64_t p[7] )
{
return( (p[6] ||
p[5] > 0xffffffffffffffff ||
mbedtls_p384_gte( uint64_t p[7] ) {
return( ((int64_t)p[6] > 0 ||
(p[5] > 0xffffffffffffffff ||
(p[5] == 0xffffffffffffffff &&
p[4] > 0xffffffffffffffff ||
(p[4] > 0xffffffffffffffff ||
(p[4] == 0xffffffffffffffff &&
p[3] > 0xffffffffffffffff ||
(p[3] > 0xffffffffffffffff ||
(p[3] == 0xffffffffffffffff &&
p[2] > 0xfffffffffffffffe ||
(p[2] > 0xfffffffffffffffe ||
(p[2] == 0xfffffffffffffffe &&
p[1] > 0xffffffff00000000 ||
(p[1] > 0xffffffff00000000 ||
(p[1] == 0xffffffff00000000 &&
p[0] > 0x00000000ffffffff ||
(p[0] == 0x00000000ffffffff))))))) );
}
static int
mbedtls_p384_cmp( const uint64_t a[7],
const uint64_t b[7] )
{
if( a[6] < b[6] ) return -1;
if( a[6] > b[6] ) return 1;
if( a[5] < b[5] ) return -1;
if( a[5] > b[5] ) return 1;
if( a[4] < b[4] ) return -1;
if( a[4] > b[4] ) return 1;
if( a[3] < b[3] ) return -1;
if( a[3] > b[3] ) return 1;
if( a[2] < b[2] ) return -1;
if( a[2] > b[2] ) return 1;
if( a[1] < b[1] ) return -1;
if( a[1] > b[1] ) return 1;
if( a[0] < b[0] ) return -1;
if( a[0] > b[0] ) return 1;
return 0;
(p[0] > 0x00000000ffffffff ||
(p[0] == 0x00000000ffffffff))))))))))))) );
}
static inline void
@ -101,7 +79,7 @@ mbedtls_p384_red( uint64_t p[7] )
#endif
}
static noinline void
static inline void
mbedtls_p384_gro( uint64_t p[7] )
{
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
@ -128,28 +106,31 @@ mbedtls_p384_gro( uint64_t p[7] )
#endif
}
static void
static inline void
mbedtls_p384_rum( uint64_t p[7] )
{
while( mbedtls_p384_gte( p ) )
mbedtls_p384_red( p );
}
static inline void
mbedtls_p384_mod(uint64_t X[12])
{
secp384r1(X);
if ((int64_t)X[6] < 0) {
do {
mbedtls_p384_gro(X);
} while ((int64_t)X[6] < 0);
} else {
while (mbedtls_p384_gte(X)) {
mbedtls_p384_red(X);
}
}
}
static inline void
mbedtls_p384_sar( uint64_t p[7] )
{
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
asm("sarq\t48+%0\n\t"
"rcrq\t40+%0\n\t"
"rcrq\t32+%0\n\t"
"rcrq\t24+%0\n\t"
"rcrq\t16+%0\n\t"
"rcrq\t8+%0\n\t"
"rcrq\t%0\n\t"
: "+o"(*p)
: /* no inputs */
: "memory", "cc");
#else
p[0] = p[0] >> 1 | p[1] << 63;
p[1] = p[1] >> 1 | p[2] << 63;
p[2] = p[2] >> 1 | p[3] << 63;
@ -157,24 +138,11 @@ mbedtls_p384_sar( uint64_t p[7] )
p[4] = p[4] >> 1 | p[5] << 63;
p[5] = p[5] >> 1 | p[6] << 63;
p[6] = (int64_t)p[6] >> 1;
#endif
}
static inline void
mbedtls_p384_shl( uint64_t p[7] )
{
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
asm("shlq\t%0\n\t"
"rclq\t8+%0\n\t"
"rclq\t16+%0\n\t"
"rclq\t24+%0\n\t"
"rclq\t32+%0\n\t"
"rclq\t40+%0\n\t"
"rclq\t48+%0\n\t"
: "+o"(*p)
: /* no inputs */
: "memory", "cc");
#else
p[6] = p[5] >> 63;
p[5] = p[5] << 1 | p[4] >> 63;
p[4] = p[4] << 1 | p[3] >> 63;
@ -182,90 +150,24 @@ mbedtls_p384_shl( uint64_t p[7] )
p[2] = p[2] << 1 | p[1] >> 63;
p[1] = p[1] << 1 | p[0] >> 63;
p[0] = p[0] << 1;
#endif
mbedtls_p384_rum( p );
}
static inline void
mbedtls_p384_jam( uint64_t p[7] )
{
secp384r1( p );
if( (int64_t)p[6] < 0 )
do
mbedtls_p384_gro( p );
while( (int64_t)p[6] < 0 );
else
mbedtls_p384_rum( p );
}
static void
mbedtls_p384_mul_1x1( uint64_t X[12],
const uint64_t A[6], size_t n,
const uint64_t B[6], size_t m )
{
uint128_t t;
t = A[0];
t *= B[0];
X[ 0] = t;
X[ 1] = t >> 64;
X[ 2] = 0;
X[ 3] = 0;
X[ 4] = 0;
X[ 5] = 0;
X[ 6] = 0;
X[ 7] = 0;
X[ 8] = 0;
X[ 9] = 0;
X[10] = 0;
X[11] = 0;
}
static void
mbedtls_p384_mul_nx1( uint64_t X[12],
const uint64_t A[6], size_t n,
const uint64_t B[6], size_t m )
{
mbedtls_mpi_mul_hlp1(n, A, X, B[0]);
mbedtls_platform_zeroize( X + n + m, ( 12 - n - m ) * 8 );
if ( n + m >= 6 )
mbedtls_p384_jam( X );
}
static void
mbedtls_p384_mul_6x6( uint64_t X[12],
const uint64_t A[6], size_t n,
const uint64_t B[6], size_t m )
{
Mul6x6Adx( X, A, B );
mbedtls_p384_jam( X );
}
static void
mbedtls_p384_mul_nxm( uint64_t X[12],
const uint64_t A[6], size_t n,
const uint64_t B[6], size_t m )
{
if (A == X) A = gc(memcpy(malloc(6 * 8), A, 6 * 8));
if (B == X) B = gc(memcpy(malloc(6 * 8), B, 6 * 8));
Mul( X, A, n, B, m );
mbedtls_platform_zeroize( X + n + m, (12 - n - m) * 8 );
if ( n + m >= 6 )
mbedtls_p384_jam( X );
}
static void
mbedtls_p384_mul( uint64_t X[12],
const uint64_t A[6], size_t n,
const uint64_t B[6], size_t m )
{
if( n == 6 && m == 6 && X86_HAVE(ADX) && X86_HAVE(BMI2) )
mbedtls_p384_mul_6x6( X, A, n, B, m );
else if( m == 1 && n == 1 )
mbedtls_p384_mul_1x1( X, A, n, B, m );
else if( m == 1 )
mbedtls_p384_mul_nx1( X, A, n, B, m );
if( X86_HAVE(ADX) && X86_HAVE(BMI2) )
Mul6x6Adx( X, A, B );
else
mbedtls_p384_mul_nxm( X, A, n, B, m );
{
if (A == X) A = gc(memcpy(malloc(6 * 8), A, 6 * 8));
if (B == X) B = gc(memcpy(malloc(6 * 8), B, 6 * 8));
Mul( X, A, n, B, m );
mbedtls_platform_zeroize( X + n + m, (12 - n - m) * 8 );
}
mbedtls_p384_mod( X );
}
static void

View file

@ -46,7 +46,7 @@ asm(".include \"libc/disclaimer.inc\"");
* limitations under the License.
*/
/* #if defined(MBEDTLS_ECP_C) */
#if defined(MBEDTLS_ECP_C)
#if !defined(MBEDTLS_ECP_ALT)
@ -635,12 +635,7 @@ static int ecp_group_load( mbedtls_ecp_group *grp,
#endif /* ECP_LOAD_GROUP */
#if defined(MBEDTLS_ECP_NIST_OPTIM)
#define NIST_MODP( P ) grp->modp = ecp_mod_ ## P;
#else
#define NIST_MODP( P )
#endif
#if defined(MBEDTLS_ECP_NIST_OPTIM)
/* Forward declarations */
#if defined(MBEDTLS_ECP_DP_SECP192R1_ENABLED)
static int ecp_mod_p192( mbedtls_mpi * );
#endif
@ -650,8 +645,13 @@ static int ecp_mod_p224( mbedtls_mpi * );
#if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
static int ecp_mod_p521( mbedtls_mpi * );
#endif
#define NIST_MODP( P ) grp->modp = ecp_mod_ ## P;
#else
#define NIST_MODP( P )
#endif /* MBEDTLS_ECP_NIST_OPTIM */
/* Additional forward declarations */
#if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED)
static int ecp_mod_p255( mbedtls_mpi * );
#endif
@ -771,8 +771,6 @@ cleanup:
}
#endif /* MBEDTLS_ECP_DP_CURVE448_ENABLED */
#if defined(MBEDTLS_ECP_C)
/**
* \brief This function sets up an ECP group context
* from a standardized set of domain parameters.
@ -879,7 +877,6 @@ int mbedtls_ecp_group_load( mbedtls_ecp_group *grp, mbedtls_ecp_group_id id )
return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
}
}
#endif /* MBEDTLS_ECP_C */
#if defined(MBEDTLS_ECP_NIST_OPTIM)
/*
@ -892,6 +889,7 @@ int mbedtls_ecp_group_load( mbedtls_ecp_group *grp, mbedtls_ecp_group_id id )
* MPI remains loose, since these functions can be deactivated at will.
*/
#if defined(MBEDTLS_ECP_DP_SECP192R1_ENABLED)
/*
* Compared to the way things are presented in FIPS 186-3 D.2,
* we proceed in columns, from right (least significant chunk) to left,
@ -942,13 +940,17 @@ static int ecp_mod_p192( mbedtls_mpi *N )
int ret = MBEDTLS_ERR_THIS_CORRUPTION;
mbedtls_mpi_uint c = 0;
mbedtls_mpi_uint *p, *end;
/* Make sure we have enough blocks so that A(5) is legal */
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( N, 6 * WIDTH ) );
p = N->p;
end = p + N->n;
ADD( 3 ); ADD( 5 ); NEXT; // A0 += A3 + A5
ADD( 3 ); ADD( 4 ); ADD( 5 ); NEXT; // A1 += A3 + A4 + A5
ADD( 4 ); ADD( 5 ); LAST; // A2 += A4 + A5
cleanup:
return( ret );
}
@ -958,7 +960,11 @@ cleanup:
#undef ADD
#undef NEXT
#undef LAST
#endif /* MBEDTLS_ECP_DP_SECP192R1_ENABLED */
#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED) || \
defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) || \
defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
/*
* The reader is advised to first understand ecp_mod_p192() since the same
* general structure is used here, but with additional complications:
@ -1059,6 +1065,7 @@ static inline void sub32( uint32_t *dst, uint32_t src, signed char *carry )
static inline int fix_negative( mbedtls_mpi *N, signed char c, mbedtls_mpi *C, size_t bits )
{
int ret = MBEDTLS_ERR_THIS_CORRUPTION;
/* C = - c * 2^(bits + 32) */
#if !defined(MBEDTLS_HAVE_INT64)
((void) bits);
@ -1068,19 +1075,24 @@ static inline int fix_negative( mbedtls_mpi *N, signed char c, mbedtls_mpi *C, s
else
#endif
C->p[ C->n - 1 ] = (mbedtls_mpi_uint) -c;
/* N = - ( C - N ) */
MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( N, C, N ) );
N->s = -1;
cleanup:
return( ret );
}
#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED)
/*
* Fast quasi-reduction modulo p224 (FIPS 186-3 D.2.2)
*/
static int ecp_mod_p224( mbedtls_mpi *N )
{
INIT( 224 );
SUB( 7 ); SUB( 11 ); NEXT; // A0 += -A7 - A11
SUB( 8 ); SUB( 12 ); NEXT; // A1 += -A8 - A12
SUB( 9 ); SUB( 13 ); NEXT; // A2 += -A9 - A13
@ -1088,9 +1100,97 @@ static int ecp_mod_p224( mbedtls_mpi *N )
SUB( 11 ); ADD( 8 ); ADD( 12 ); NEXT; // A4 += -A11 + A8 + A12
SUB( 12 ); ADD( 9 ); ADD( 13 ); NEXT; // A5 += -A12 + A9 + A13
SUB( 13 ); ADD( 10 ); LAST; // A6 += -A13 + A10
cleanup:
return( ret );
}
#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED */
#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
/*
* Fast quasi-reduction modulo p256 (FIPS 186-3 D.2.3)
*/
int ecp_mod_p256_old( mbedtls_mpi *N )
{
INIT( 256 );
ADD( 8 ); ADD( 9 );
SUB( 11 ); SUB( 12 ); SUB( 13 ); SUB( 14 ); NEXT; // A0
ADD( 9 ); ADD( 10 );
SUB( 12 ); SUB( 13 ); SUB( 14 ); SUB( 15 ); NEXT; // A1
ADD( 10 ); ADD( 11 );
SUB( 13 ); SUB( 14 ); SUB( 15 ); NEXT; // A2
ADD( 11 ); ADD( 11 ); ADD( 12 ); ADD( 12 ); ADD( 13 );
SUB( 15 ); SUB( 8 ); SUB( 9 ); NEXT; // A3
ADD( 12 ); ADD( 12 ); ADD( 13 ); ADD( 13 ); ADD( 14 );
SUB( 9 ); SUB( 10 ); NEXT; // A4
ADD( 13 ); ADD( 13 ); ADD( 14 ); ADD( 14 ); ADD( 15 );
SUB( 10 ); SUB( 11 ); NEXT; // A5
ADD( 14 ); ADD( 14 ); ADD( 15 ); ADD( 15 ); ADD( 14 ); ADD( 13 );
SUB( 8 ); SUB( 9 ); NEXT; // A6
ADD( 15 ); ADD( 15 ); ADD( 15 ); ADD( 8 );
SUB( 10 ); SUB( 11 ); SUB( 12 ); SUB( 13 ); LAST; // A7
cleanup:
return( ret );
}
#endif /* MBEDTLS_ECP_DP_SECP256R1_ENABLED */
#if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
/*
* Fast quasi-reduction modulo p384 (FIPS 186-3 D.2.4)
*/
int ecp_mod_p384_old( mbedtls_mpi *N )
{
INIT( 384 );
ADD( 12 ); ADD( 21 ); ADD( 20 );
SUB( 23 ); NEXT; // A0
ADD( 13 ); ADD( 22 ); ADD( 23 );
SUB( 12 ); SUB( 20 ); NEXT; // A2
ADD( 14 ); ADD( 23 );
SUB( 13 ); SUB( 21 ); NEXT; // A2
ADD( 15 ); ADD( 12 ); ADD( 20 ); ADD( 21 );
SUB( 14 ); SUB( 22 ); SUB( 23 ); NEXT; // A3
ADD( 21 ); ADD( 21 ); ADD( 16 ); ADD( 13 ); ADD( 12 ); ADD( 20 ); ADD( 22 );
SUB( 15 ); SUB( 23 ); SUB( 23 ); NEXT; // A4
ADD( 22 ); ADD( 22 ); ADD( 17 ); ADD( 14 ); ADD( 13 ); ADD( 21 ); ADD( 23 );
SUB( 16 ); NEXT; // A5
ADD( 23 ); ADD( 23 ); ADD( 18 ); ADD( 15 ); ADD( 14 ); ADD( 22 );
SUB( 17 ); NEXT; // A6
ADD( 19 ); ADD( 16 ); ADD( 15 ); ADD( 23 );
SUB( 18 ); NEXT; // A7
ADD( 20 ); ADD( 17 ); ADD( 16 );
SUB( 19 ); NEXT; // A8
ADD( 21 ); ADD( 18 ); ADD( 17 );
SUB( 20 ); NEXT; // A9
ADD( 22 ); ADD( 19 ); ADD( 18 );
SUB( 21 ); NEXT; // A10
ADD( 23 ); ADD( 20 ); ADD( 19 );
SUB( 22 ); LAST; // A11
cleanup:
return( ret );
}
#endif /* MBEDTLS_ECP_DP_SECP384R1_ENABLED */
#undef A
#undef LOAD32
@ -1100,6 +1200,10 @@ cleanup:
#undef NEXT
#undef LAST
#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED ||
MBEDTLS_ECP_DP_SECP256R1_ENABLED ||
MBEDTLS_ECP_DP_SECP384R1_ENABLED */
#if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
/*
* Here we have an actual Mersenne prime, so things are more straightforward.
@ -1156,6 +1260,8 @@ cleanup:
#endif /* MBEDTLS_ECP_NIST_OPTIM */
#if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED)
/* Size of p255 in terms of mbedtls_mpi_uint */
#define P255_WIDTH ( 255 / 8 / sizeof( mbedtls_mpi_uint ) + 1 )
@ -1169,28 +1275,34 @@ static int ecp_mod_p255( mbedtls_mpi *N )
size_t i;
mbedtls_mpi M;
mbedtls_mpi_uint Mp[P255_WIDTH + 2];
if( N->n < P255_WIDTH )
return( 0 );
/* M = A1 */
M.s = 1;
M.n = N->n - ( P255_WIDTH - 1 );
if( M.n > P255_WIDTH + 1 )
return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
M.p = Mp;
mbedtls_platform_zeroize( Mp, sizeof Mp );
memset( Mp, 0, sizeof Mp );
memcpy( Mp, N->p + P255_WIDTH - 1, M.n * sizeof( mbedtls_mpi_uint ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, 255 % ( 8 * sizeof( mbedtls_mpi_uint ) ) ) );
M.n++; /* Make room for multiplication by 19 */
/* N = A0 */
MBEDTLS_MPI_CHK( mbedtls_mpi_set_bit( N, 255, 0 ) );
for( i = P255_WIDTH; i < N->n; i++ )
N->p[i] = 0;
/* N = A0 + 19 * A1 */
MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &M, &M, 19 ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( N, N, &M ) );
cleanup:
return( ret );
}
#endif /* MBEDTLS_ECP_DP_CURVE25519_ENABLED */
#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
@ -1231,7 +1343,7 @@ static int ecp_mod_p448( mbedtls_mpi *N )
/* Shouldn't be called with N larger than 2^896! */
return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
M.p = Mp;
mbedtls_platform_zeroize( Mp, sizeof( Mp ) );
memset( Mp, 0, sizeof( Mp ) );
memcpy( Mp, N->p + P448_WIDTH, M.n * sizeof( mbedtls_mpi_uint ) );
/* N = A0 */
@ -1299,7 +1411,7 @@ static inline int ecp_mod_koblitz( mbedtls_mpi *N, mbedtls_mpi_uint *Rp, size_t
M.n = N->n - ( p_limbs - adjust );
if( M.n > p_limbs + adjust )
M.n = p_limbs + adjust;
mbedtls_platform_zeroize( Mp, sizeof Mp );
memset( Mp, 0, sizeof Mp );
memcpy( Mp, N->p + p_limbs - adjust, M.n * sizeof( mbedtls_mpi_uint ) );
if( shift != 0 )
MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, shift ) );
@ -1321,7 +1433,7 @@ static inline int ecp_mod_koblitz( mbedtls_mpi *N, mbedtls_mpi_uint *Rp, size_t
M.n = N->n - ( p_limbs - adjust );
if( M.n > p_limbs + adjust )
M.n = p_limbs + adjust;
mbedtls_platform_zeroize( Mp, sizeof Mp );
memset( Mp, 0, sizeof Mp );
memcpy( Mp, N->p + p_limbs - adjust, M.n * sizeof( mbedtls_mpi_uint ) );
if( shift != 0 )
MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, shift ) );
@ -1392,4 +1504,4 @@ static int ecp_mod_p256k1( mbedtls_mpi *N )
#endif /* !MBEDTLS_ECP_ALT */
/* #endif /\* MBEDTLS_ECP_C *\/ */
#endif /* MBEDTLS_ECP_C */

File diff suppressed because it is too large Load diff

View file

@ -1,52 +1,10 @@
#ifndef COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_
#define COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_
#include "third_party/mbedtls/config.h"
#include "third_party/mbedtls/ecp.h"
#ifndef COSMOPOLITAN_THIRD_PARTY_MBEDTLS_EVEREST_H_
#define COSMOPOLITAN_THIRD_PARTY_MBEDTLS_EVEREST_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define MBEDTLS_ECP_TLS_CURVE25519 0x1d
#define MBEDTLS_X25519_KEY_SIZE_BYTES 32
typedef enum {
MBEDTLS_X25519_ECDH_OURS,
MBEDTLS_X25519_ECDH_THEIRS,
} mbedtls_x25519_ecdh_side;
typedef struct {
unsigned char our_secret[MBEDTLS_X25519_KEY_SIZE_BYTES];
unsigned char peer_point[MBEDTLS_X25519_KEY_SIZE_BYTES];
} mbedtls_x25519_context;
typedef enum {
MBEDTLS_EVEREST_ECDH_OURS,
MBEDTLS_EVEREST_ECDH_THEIRS,
} mbedtls_everest_ecdh_side;
typedef struct {
mbedtls_x25519_context ctx;
} mbedtls_ecdh_context_everest;
int mbedtls_everest_setup(mbedtls_ecdh_context_everest *, int);
void mbedtls_everest_free(mbedtls_ecdh_context_everest *);
int mbedtls_everest_make_params(mbedtls_ecdh_context_everest *, size_t *,
unsigned char *, size_t,
int (*)(void *, unsigned char *, size_t),
void *);
int mbedtls_everest_read_params(mbedtls_ecdh_context_everest *,
const unsigned char **, const unsigned char *);
int mbedtls_everest_get_params(mbedtls_ecdh_context_everest *,
const mbedtls_ecp_keypair *,
mbedtls_everest_ecdh_side);
int mbedtls_everest_make_public(mbedtls_ecdh_context_everest *, size_t *,
unsigned char *, size_t,
int (*)(void *, unsigned char *, size_t),
void *);
int mbedtls_everest_read_public(mbedtls_ecdh_context_everest *,
const unsigned char *, size_t);
int mbedtls_everest_calc_secret(mbedtls_ecdh_context_everest *, size_t *,
unsigned char *, size_t,
int (*)(void *, unsigned char *, size_t),
void *);
void curve25519(uint8_t[32], const uint8_t[32], const uint8_t[32]);
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_ */
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_THIRD_PARTY_MBEDTLS_EVEREST_H_ */

View file

@ -55,7 +55,7 @@ $(THIRD_PARTY_MBEDTLS_A_OBJS): \
o/$(MODE)/third_party/mbedtls/everest.o: \
OVERRIDE_CFLAGS += \
-Os
-O3
o/$(MODE)/third_party/mbedtls/bigmul4.o \
o/$(MODE)/third_party/mbedtls/bigmul6.o: \
@ -70,11 +70,6 @@ o/$(MODE)/third_party/mbedtls/shiftright2-avx.o: \
OVERRIDE_CFLAGS += \
-O3 -mavx
# tail recursion is so important because everest was written in f*
o/$(MODE)/third_party/mbedtls/everest.o: \
OVERRIDE_CFLAGS += \
-foptimize-sibling-calls
THIRD_PARTY_MBEDTLS_LIBS = $(foreach x,$(THIRD_PARTY_MBEDTLS_ARTIFACTS),$($(x)))
THIRD_PARTY_MBEDTLS_SRCS = $(foreach x,$(THIRD_PARTY_MBEDTLS_ARTIFACTS),$($(x)_SRCS))
THIRD_PARTY_MBEDTLS_HDRS = $(foreach x,$(THIRD_PARTY_MBEDTLS_ARTIFACTS),$($(x)_HDRS))

View file

@ -26,7 +26,7 @@
#define H(w) (w & 0xffffffff00000000)
/**
* Fastest quasi-reduction modulo NIST P-256.
* Fastest quasi-reduction modulo 256.
*
* p = 2² - 2²² + 2¹² + 2 - 1
* B = T + 2×S + 2×S + S + S D D D D mod p

View file

@ -24,7 +24,7 @@
#define Q(i) p[i >> 1]
/**
* Fastest quasi-reduction modulo Prime 384.
* Fastest quasi-reduction modulo 384.
*
* p = 2³ 2¹² 2 + 2³² 1
* B = T + 2×S + S + S + S + S + S D D D mod p
@ -44,8 +44,7 @@
void secp384r1(uint64_t p[12]) {
int r;
char o;
signed char G;
uint64_t A, B, C, D, E, F, a, b, c;
uint64_t A, B, C, D, E, F, G, a, b, c;
A = Q(0);
B = Q(2);
C = Q(4);
@ -57,8 +56,8 @@ void secp384r1(uint64_t p[12]) {
a = Q(22) << 32 | Q(21) >> 32;
b = Q(23) >> 32;
ADC(C, C, a << 1, 0, o);
ADC(D, D, (b << 1 | a >> 63), o, o);
ADC(E, E, (b >> 63), o, o);
ADC(D, D, b << 1 | a >> 63, o, o);
ADC(E, E, b >> 63, o, o);
ADC(F, F, o, o, o);
G += o;
ADC(A, A, Q(12), 0, o);
@ -118,91 +117,105 @@ void secp384r1(uint64_t p[12]) {
asm volatile(/* S₁ = (0 ‖0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₂‖A₂₁‖0 ‖0 ‖0 ‖0 ) */
"mov\t21*4(%9),%7\n\t"
"mov\t23*4(%9),%k8\n\t"
"mov\t%7,%%r12\n\t"
"shr\t$63,%%r12\n\t"
"shl\t%7\n\t"
"rcl\t%8\n\t"
"shl\t%8\n\t"
"or\t%%r12,%8\n\t"
"mov\t13*4(%9),%%r12\n\t"
"add\t%7,%2\n\t"
"mov\t23*4(%9),%k7\n\t"
"adc\t%8,%3\n\t"
"mov\t15*4(%9),%%r13\n\t"
"adc\t$0,%4\n\t"
"mov\t12*4(%9),%k8\n\t"
"adc\t$0,%5\n\t"
"adc\t$0,%b6\n\t"
/* S₂ = (A₂₃‖A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂) */
"add\t12*4(%9),%0\n\t"
"adc\t14*4(%9),%1\n\t"
"adc\t16*4(%9),%2\n\t"
"adc\t18*4(%9),%3\n\t"
"adc\t20*4(%9),%4\n\t"
"adc\t22*4(%9),%5\n\t"
"adc\t$0,%b6\n\t"
/* S₃ = (A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃‖A₂₂‖A₂₁) */
"mov\t12*4(%9),%k7\n\t"
"mov\t17*4(%9),%%r14\n\t"
"adc\t$0,%6\n\t"
"mov\t19*4(%9),%%r15\n\t"
/* D₁ = (A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃) */
"shl\t$32,%8\n\t"
"or\t%8,%7\n\t"
"mov\t23*4(%9),%k8\n\t"
"sub\t%7,%0\n\t"
"mov\t21*4(%9),%7\n\t"
"sbb\t%%r12,%1\n\t"
"sbb\t%%r13,%2\n\t"
"sbb\t%%r14,%3\n\t"
"sbb\t%%r15,%4\n\t"
"sbb\t%7,%5\n\t"
"mov\t12*4(%9),%k7\n\t"
"sbb\t$0,%6\n\t"
/* S₃ = (A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃‖A₂₂‖A₂₁) */
"shl\t$32,%7\n\t"
"or\t%7,%8\n\t"
"add\t21*4(%9),%0\n\t"
"adc\t%8,%1\n\t"
"adc\t13*4(%9),%2\n\t"
"adc\t15*4(%9),%3\n\t"
"adc\t17*4(%9),%4\n\t"
"adc\t19*4(%9),%5\n\t"
"adc\t$0,%b6\n\t"
/* S₄ = (A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₀‖0 ‖A₂₃‖0 ) */
"mov\t23*4(%9),%k7\n\t"
"adc\t%8,%1\n\t"
"mov\t20*4(%9),%k8\n\t"
"adc\t%%r12,%2\n\t"
"mov\t12*4(%9),%%r12\n\t"
"adc\t%%r13,%3\n\t"
"mov\t14*4(%9),%%r13\n\t"
"adc\t%%r14,%4\n\t"
"mov\t16*4(%9),%%r14\n\t"
"adc\t%%r15,%5\n\t"
"mov\t18*4(%9),%%r15\n\t"
"adc\t$0,%6\n\t"
/* S₄ = (A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₀‖0 ‖A₂₃‖0 ) */
"shl\t$32,%7\n\t"
"shl\t$32,%8\n\t"
"add\t%7,%0\n\t"
"adc\t%8,%1\n\t"
"adc\t12*4(%9),%2\n\t"
"adc\t14*4(%9),%3\n\t"
"adc\t16*4(%9),%4\n\t"
"adc\t18*4(%9),%5\n\t"
"adc\t$0,%b6\n\t"
"adc\t%%r12,%2\n\t"
"adc\t%%r13,%3\n\t"
"adc\t%%r14,%4\n\t"
"adc\t%%r15,%5\n\t"
"adc\t$0,%6\n\t"
/* S₂ = (A₂₃‖A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂) */
"add\t%%r12,%0\n\t"
"mov\t20*4(%9),%%r12\n\t"
"adc\t%%r13,%1\n\t"
"mov\t22*4(%9),%%r13\n\t"
"adc\t%%r14,%2\n\t"
"adc\t%%r15,%3\n\t"
"adc\t%%r12,%4\n\t"
"adc\t%%r13,%5\n\t"
"adc\t$0,%6\n\t"
/* S₅ = (0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₂‖A₂₁‖A₂₀‖0 ‖0 ‖0 ‖0 ) */
"mov\t23*4(%9),%k7\n\t"
"mov\t20*4(%9),%k8\n\t"
"shl\t$32,%7\n\t"
"shl\t$32,%8\n\t"
"add\t20*4(%9),%2\n\t"
"adc\t22*4(%9),%3\n\t"
"add\t%%r12,%2\n\t"
"adc\t%%r13,%3\n\t"
"adc\t$0,%4\n\t"
"adc\t$0,%5\n\t"
"adc\t$0,%b6\n\t"
"adc\t$0,%6\n\t"
/* S₆ = (0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₂‖A₂₁‖0 ‖0 ‖A₂₀) */
"mov\t20*4(%9),%k7\n\t"
"mov\t21*4(%9),%k8\n\t"
"mov\t%%r12d,%k7\n\t"
"mov\t%%r12,%8\n\t"
"shr\t$32,%8\n\t"
"shl\t$32,%8\n\t"
"add\t%7,%0\n\t"
"adc\t%8,%1\n\t"
"adc\t22*4(%9),%2\n\t"
"adc\t%%r13,%2\n\t"
"adc\t$0,%3\n\t"
"adc\t$0,%4\n\t"
"adc\t$0,%5\n\t"
"adc\t$0,%b6\n\t"
/* D₁ = (A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃) */
"mov\t23*4(%9),%k7\n\t"
"mov\t12*4(%9),%k8\n\t"
"shl\t$32,%8\n\t"
"or\t%8,%7\n\t"
"sub\t%7,%0\n\t"
"sbb\t13*4(%9),%1\n\t"
"sbb\t15*4(%9),%2\n\t"
"sbb\t17*4(%9),%3\n\t"
"sbb\t19*4(%9),%4\n\t"
"sbb\t21*4(%9),%5\n\t"
"sbb\t$0,%b6\n\t"
"adc\t$0,%6\n\t"
/* D₂ = (0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₂‖A₂₁‖A₂₀‖0 ) */
"mov\t20*4(%9),%k7\n\t"
"mov\t23*4(%9),%k8\n\t"
"mov\t%%r12d,%k7\n\t"
"mov\t21*4(%9),%%r12\n\t"
"mov\t%%r13,%8\n\t"
"shr\t$32,%8\n\t"
"shl\t$32,%7\n\t"
"sub\t%7,%0\n\t"
"sbb\t21*4(%9),%1\n\t"
"sbb\t%%r12,%1\n\t"
"sbb\t%8,%2\n\t"
"sbb\t$0,%3\n\t"
"sbb\t$0,%4\n\t"
"sbb\t$0,%5\n\t"
"sbb\t$0,%b6\n\t"
"sbb\t$0,%6\n\t"
/* D₃ = (0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₃‖0 ‖0 ‖0 ) */
"mov\t23*4(%9),%k7\n\t"
"mov\t%%r13,%7\n\t"
"shr\t$32,%7\n\t"
"mov\t%k7,%k8\n\t"
"shl\t$32,%7\n\t"
"sub\t%7,%1\n\t"
@ -210,11 +223,11 @@ void secp384r1(uint64_t p[12]) {
"sbb\t$0,%3\n\t"
"sbb\t$0,%4\n\t"
"sbb\t$0,%5\n\t"
"sbb\t$0,%b6\n\t"
"sbb\t$0,%6"
: "+r"(A), "+r"(B), "+r"(C), "+r"(D), "+r"(E), "+r"(F), "+q"(G),
"=&r"(a), "=&r"(b)
: "r"(p)
: "memory");
: "memory", "r12", "r13", "r14", "r15");
#endif
p[0] = A;
p[1] = B;
@ -223,11 +236,12 @@ void secp384r1(uint64_t p[12]) {
p[4] = E;
p[5] = F;
p[6] = G;
p[7] = 0;
p[8] = 0;
p[9] = 0;
p[10] = 0;
p[11] = 0;
G = CONCEAL("r", 0L);
p[7] = G;
p[8] = G;
p[9] = G;
p[10] = G;
p[11] = G;
}
int ecp_mod_p384(mbedtls_mpi *N) {
@ -249,3 +263,130 @@ int ecp_mod_p384(mbedtls_mpi *N) {
}
return 0;
}
/*
Instructions: 115
Total Cycles: 46
Total uOps: 116
uOps Per Cycle: 2.52
IPC: 2.50
Block RThroughput: 31.0
SIMULATION 0123456789 0123456789
Index 0123456789 0123456789 012345
[0,0] DR . . . . . . . . . xorl %r10d, %r10d
[0,1] DeeeeeER . . . . . . . . movq (%rdi), %r9
[0,2] DeeeeeER . . . . . . . . movq 8(%rdi), %r8
[0,3] D=eeeeeER . . . . . . . . movq 16(%rdi), %rsi
[0,4] D=eeeeeER . . . . . . . . movq 24(%rdi), %rcx
[0,5] D==eeeeeER. . . . . . . . movq 32(%rdi), %rdx
[0,6] .D==eeeeeER . . . . . . . movq 40(%rdi), %rax
[0,7] .D=eeeeeE-R . . . . . . . movq 84(%rdi), %r11
[0,8] .D==eeeeeER . . . . . . . movl 92(%rdi), %ebx
[0,9] .D======eER . . . . . . . movq %r11, %r12
[0,10] .D=======eER . . . . . . . shrq $63, %r12
[0,11] .D======eE-R . . . . . . . shlq %r11
[0,12] . D======eER . . . . . . . shlq %rbx
[0,13] . D=======eER . . . . . . . orq %r12, %rbx
[0,14] . D==eeeeeE-R . . . . . . . movq 52(%rdi), %r12
[0,15] . D======eE-R . . . . . . . addq %r11, %rsi
[0,16] . D==eeeeeE-R . . . . . . . movl 92(%rdi), %r11d
[0,17] . D========eER . . . . . . . adcq %rbx, %rcx
[0,18] . D==eeeeeE-R . . . . . . . movq 60(%rdi), %r13
[0,19] . D========eER. . . . . . . adcq $0, %rdx
[0,20] . D==eeeeeE--R. . . . . . . movl 48(%rdi), %ebx
[0,21] . D=========eER . . . . . . adcq $0, %rax
[0,22] . D===eeeeeE--R . . . . . . movq 68(%rdi), %r14
[0,23] . D==========eER . . . . . . adcq $0, %r10
[0,24] . D==eeeeeE---R . . . . . . movq 76(%rdi), %r15
[0,25] . D======eE---R . . . . . . shlq $32, %rbx
[0,26] . D=======eE--R . . . . . . orq %rbx, %r11
[0,27] . D===eeeeeE--R . . . . . . movl 92(%rdi), %ebx
[0,28] . D========eE-R . . . . . . subq %r11, %r9
[0,29] . D===eeeeeE--R . . . . . . movq 84(%rdi), %r11
[0,30] . D========eER . . . . . . sbbq %r12, %r8
[0,31] . D=========eER . . . . . . sbbq %r13, %rsi
[0,32] . D==========eER . . . . . . sbbq %r14, %rcx
[0,33] . D===========eER. . . . . . sbbq %r15, %rdx
[0,34] . D============eER . . . . . sbbq %r11, %rax
[0,35] . D===eeeeeE-----R . . . . . movl 48(%rdi), %r11d
[0,36] . .D============eER . . . . . sbbq $0, %r10
[0,37] . .D========eE----R . . . . . shlq $32, %r11
[0,38] . .D=========eE---R . . . . . orq %r11, %rbx
[0,39] . .D==eeeeeE------R . . . . . movl 92(%rdi), %r11d
[0,40] . .D======eeeeeeE-R . . . . . addq 84(%rdi), %r9
[0,41] . . D===========eER . . . . . adcq %rbx, %r8
[0,42] . . D==eeeeeE-----R . . . . . movl 80(%rdi), %ebx
[0,43] . . D============eER . . . . . adcq %r12, %rsi
[0,44] . . D==eeeeeE------R . . . . . movq 48(%rdi), %r12
[0,45] . . D=============eER . . . . . adcq %r13, %rcx
[0,46] . . D===eeeeeE------R . . . . . movq 56(%rdi), %r13
[0,47] . . D=============eER. . . . . adcq %r14, %rdx
[0,48] . . D==eeeeeE-------R. . . . . movq 64(%rdi), %r14
[0,49] . . D==============eER . . . . adcq %r15, %rax
[0,50] . . D===eeeeeE-------R . . . . movq 72(%rdi), %r15
[0,51] . . D===============eER . . . . adcq $0, %r10
[0,52] . . D=======eE--------R . . . . shlq $32, %r11
[0,53] . . D=======eE-------R . . . . shlq $32, %rbx
[0,54] . . D=========eE-----R . . . . addq %r11, %r9
[0,55] . . D==========eE----R . . . . adcq %rbx, %r8
[0,56] . . D===========eE---R . . . . adcq %r12, %rsi
[0,57] . . D============eE--R . . . . adcq %r13, %rcx
[0,58] . . D=============eE-R . . . . adcq %r14, %rdx
[0,59] . . D=============eER . . . . adcq %r15, %rax
[0,60] . . D==============eER . . . . adcq $0, %r10
[0,61] . . D=========eE-----R . . . . addq %r12, %r9
[0,62] . . D=eeeeeE---------R . . . . movq 80(%rdi), %r12
[0,63] . . D==============eER . . . . adcq %r13, %r8
[0,64] . . D==eeeeeE--------R . . . . movq 88(%rdi), %r13
[0,65] . . .D==============eER . . . . adcq %r14, %rsi
[0,66] . . .D===============eER. . . . adcq %r15, %rcx
[0,67] . . .D================eER . . . adcq %r12, %rdx
[0,68] . . .D=================eER . . . adcq %r13, %rax
[0,69] . . .D==================eER . . . adcq $0, %r10
[0,70] . . .D===============eE---R . . . addq %r12, %rsi
[0,71] . . . D===============eE--R . . . adcq %r13, %rcx
[0,72] . . . D================eE-R . . . adcq $0, %rdx
[0,73] . . . D=================eER . . . adcq $0, %rax
[0,74] . . . D==================eER . . . adcq $0, %r10
[0,75] . . . D====eE--------------R . . . movl %r12d, %r11d
[0,76] . . . D====eE--------------R . . . movq %r12, %rbx
[0,77] . . . D====eE-------------R . . . shrq $32, %rbx
[0,78] . . . D============eE-----R . . . shlq $32, %rbx
[0,79] . . . D=======eE----------R . . . addq %r11, %r9
[0,80] . . . D=============eE----R . . . adcq %rbx, %r8
[0,81] . . . D=================eER . . . adcq %r13, %rsi
[0,82] . . . D==================eER. . . adcq $0, %rcx
[0,83] . . . D==================eER . . adcq $0, %rdx
[0,84] . . . D===================eER . . adcq $0, %rax
[0,85] . . . D====================eER . . adcq $0, %r10
[0,86] . . . D===eE-----------------R . . movl %r12d, %r11d
[0,87] . . . DeeeeeE----------------R . . movq 84(%rdi), %r12
[0,88] . . . D===eE-----------------R . . movq %r13, %rbx
[0,89] . . . D================eE---R . . shrq $32, %rbx
[0,90] . . . D=================eE--R . . shlq $32, %r11
[0,91] . . . D==================eE-R . . subq %r11, %r9
[0,92] . . . D===================eER . . sbbq %r12, %r8
[0,93] . . . D====================eER . . sbbq %rbx, %rsi
[0,94] . . . D=====================eER. . sbbq $0, %rcx
[0,95] . . . .D=====================eER . sbbq $0, %rdx
[0,96] . . . .D======================eER . sbbq $0, %rax
[0,97] . . . .D=======================eER . sbbq $0, %r10
[0,98] . . . .D==eE---------------------R . movq %r13, %r11
[0,99] . . . .D=================eE------R . shrq $32, %r11
[0,100] . . . .D==================eE-----R . movl %r11d, %ebx
[0,101] . . . . D==================eE----R . shlq $32, %r11
[0,102] . . . . D===================eE---R . subq %r11, %r8
[0,103] . . . . D====================eE--R . sbbq %rbx, %rsi
[0,104] . . . . D=====================eE-R . sbbq $0, %rcx
[0,105] . . . . D======================eER . sbbq $0, %rdx
[0,106] . . . . D=======================eER . sbbq $0, %rax
[0,107] . . . . D=======================eER. sbbq $0, %r10
[0,108] . . . . D================eE-------R. movq %r9, (%rdi)
[0,109] . . . . D===================eE----R. movq %r8, 8(%rdi)
[0,110] . . . . D====================eE---R. movq %rsi, 16(%rdi)
[0,111] . . . . D=====================eE--R. movq %rcx, 24(%rdi)
[0,112] . . . . D======================eE-R. movq %rdx, 32(%rdi)
[0,113] . . . . D======================eER. movq %rax, 40(%rdi)
[0,114] . . . . D=======================eER movq %r10, 48(%rdi)
*/

View file

@ -61,7 +61,6 @@ static const uint16_t ciphersuite_preference[] =
MBEDTLS_TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256,
MBEDTLS_TLS_DHE_RSA_WITH_AES_128_CCM,
MBEDTLS_TLS_DHE_RSA_WITH_AES_256_CCM,
/* weakened perfect forward secrecy */
MBEDTLS_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
MBEDTLS_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384,
MBEDTLS_TLS_DHE_RSA_WITH_AES_128_CBC_SHA256,

77
third_party/mbedtls/test/everest_test.c vendored Normal file
View file

@ -0,0 +1,77 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/rand/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h"
#include "third_party/mbedtls/config.h"
#include "third_party/mbedtls/endian.h"
void Hacl_Curve25519_crypto_scalarmult(uint8_t *, uint8_t *, uint8_t *);
void curve25519(uint8_t[32], uint8_t[32], uint8_t[32]);
const uint64_t kNumbers[] = {
0x0000000000000000, //
0x0000000000000001, //
0x0000000000001000, //
0x0000000002000000, //
0x0000004000000000, //
0x0008000000000000, //
0x8000000000000000, //
0x0007ffffffffffff, //
0x0000003fffffffff, //
0x0000000001ffffff, //
0x0000000000000fff, //
0xffffffffffffffff, //
0xfff8000000000000, //
};
TEST(everest, tinierVersionBehavesTheSame) {
size_t i;
uint8_t secret[32], bpoint[32], public[2][32];
for (i = 0; i < 500; ++i) {
rngset(secret, sizeof(secret), rand64, -1);
rngset(bpoint, sizeof(bpoint), rand64, -1);
Hacl_Curve25519_crypto_scalarmult(public[0], secret, bpoint);
curve25519(public[1], secret, bpoint);
ASSERT_EQ(0, memcmp(public[0], public[1], sizeof(public[0])));
}
for (i = 0; i < 500; ++i) {
Write64le(secret + 000, kNumbers[rand() % ARRAYLEN(kNumbers)]);
Write64le(secret + 010, kNumbers[rand() % ARRAYLEN(kNumbers)]);
Write64le(secret + 020, kNumbers[rand() % ARRAYLEN(kNumbers)]);
Write64le(secret + 030, kNumbers[rand() % ARRAYLEN(kNumbers)]);
Write64le(bpoint + 000, kNumbers[rand() % ARRAYLEN(kNumbers)]);
Write64le(bpoint + 010, kNumbers[rand() % ARRAYLEN(kNumbers)]);
Write64le(bpoint + 020, kNumbers[rand() % ARRAYLEN(kNumbers)]);
Write64le(bpoint + 030, kNumbers[rand() % ARRAYLEN(kNumbers)]);
Hacl_Curve25519_crypto_scalarmult(public[0], secret, bpoint);
curve25519(public[1], secret, bpoint);
ASSERT_EQ(0, memcmp(public[0], public[1], sizeof(public[0])));
}
}
BENCH(everest, bench) {
uint8_t secret[32], bpoint[32], public[32];
rngset(secret, sizeof(secret), rand64, -1);
rngset(bpoint, sizeof(bpoint), rand64, -1);
EZBENCH2("everest", donothing,
Hacl_Curve25519_crypto_scalarmult(public, secret, bpoint));
EZBENCH2("mariana", donothing, curve25519(public, secret, bpoint));
}

View file

@ -0,0 +1,899 @@
#include "libc/bits/bits.h"
#include "libc/limits.h"
#include "third_party/mbedtls/asn1.h"
#include "third_party/mbedtls/bignum.h"
#include "third_party/mbedtls/common.h"
#include "third_party/mbedtls/error.h"
#include "third_party/mbedtls/platform.h"
asm(".ident\t\"\\n\\n\
Everest (Apache 2.0)\\n\
Copyright 2016-2018 INRIA and Microsoft Corporation\"");
asm(".include \"libc/disclaimer.inc\"");
/* clang-format off */
/*
* ECDH with curve-optimized implementation multiplexing
*
* Copyright 2016-2018 INRIA and Microsoft Corporation
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of mbed TLS (https://tls.mbed.org)
*/
#ifdef memcpy
#undef memcpy
#endif
#define memcpy(x,y,z) __builtin_memcpy(x,y,z)
#define load64_le(b) READ64LE(b)
#define store64_le(b, i) WRITE64LE(b, i)
#define KRML_HOST_EXIT exit
#define KRML_HOST_PRINTF printf
#define KRML_EXIT \
do { \
KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \
KRML_HOST_EXIT(254); \
} while (0)
#define _KRML_CHECK_SIZE_PRAGMA \
_Pragma("GCC diagnostic ignored \"-Wtype-limits\"")
#define KRML_CHECK_SIZE(size_elt, sz) \
do { \
_KRML_CHECK_SIZE_PRAGMA \
if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) { \
KRML_HOST_PRINTF( \
"Maximum allocatable size exceeded, aborting before overflow at " \
"%s:%d\n", \
__FILE__, __LINE__); \
KRML_HOST_EXIT(253); \
} \
} while (0)
typedef const char *Prims_string;
typedef struct {
uint32_t length;
const char *data;
} FStar_Bytes_bytes;
typedef int32_t Prims_pos, Prims_nat, Prims_nonzero, Prims_int,
krml_checked_int_t;
/* Prims_nat not yet in scope */
inline static int32_t krml_time() {
return (int32_t)time(NULL);
}
static uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b)
{
uint64_t x = a ^ b;
uint64_t minus_x = ~x + (uint64_t)1U;
uint64_t x_or_minus_x = x | minus_x;
uint64_t xnx = x_or_minus_x >> (uint32_t)63U;
return xnx - (uint64_t)1U;
}
static uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b)
{
uint64_t x = a;
uint64_t y = b;
uint64_t x_xor_y = x ^ y;
uint64_t x_sub_y = x - y;
uint64_t x_sub_y_xor_y = x_sub_y ^ y;
uint64_t q = x_xor_y | x_sub_y_xor_y;
uint64_t x_xor_q = x ^ q;
uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U;
return x_xor_q_ - (uint64_t)1U;
}
static uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b)
{
uint32_t x = a ^ b;
uint32_t minus_x = ~x + (uint32_t)1U;
uint32_t x_or_minus_x = x | minus_x;
uint32_t xnx = x_or_minus_x >> (uint32_t)31U;
return xnx - (uint32_t)1U;
}
static uint32_t FStar_UInt32_gte_mask(uint32_t a, uint32_t b)
{
uint32_t x = a;
uint32_t y = b;
uint32_t x_xor_y = x ^ y;
uint32_t x_sub_y = x - y;
uint32_t x_sub_y_xor_y = x_sub_y ^ y;
uint32_t q = x_xor_y | x_sub_y_xor_y;
uint32_t x_xor_q = x ^ q;
uint32_t x_xor_q_ = x_xor_q >> (uint32_t)31U;
return x_xor_q_ - (uint32_t)1U;
}
static uint16_t FStar_UInt16_eq_mask(uint16_t a, uint16_t b)
{
uint16_t x = a ^ b;
uint16_t minus_x = ~x + (uint16_t)1U;
uint16_t x_or_minus_x = x | minus_x;
uint16_t xnx = x_or_minus_x >> (uint32_t)15U;
return xnx - (uint16_t)1U;
}
static uint16_t FStar_UInt16_gte_mask(uint16_t a, uint16_t b)
{
uint16_t x = a;
uint16_t y = b;
uint16_t x_xor_y = x ^ y;
uint16_t x_sub_y = x - y;
uint16_t x_sub_y_xor_y = x_sub_y ^ y;
uint16_t q = x_xor_y | x_sub_y_xor_y;
uint16_t x_xor_q = x ^ q;
uint16_t x_xor_q_ = x_xor_q >> (uint32_t)15U;
return x_xor_q_ - (uint16_t)1U;
}
static uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b)
{
uint8_t x = a ^ b;
uint8_t minus_x = ~x + (uint8_t)1U;
uint8_t x_or_minus_x = x | minus_x;
uint8_t xnx = x_or_minus_x >> (uint32_t)7U;
return xnx - (uint8_t)1U;
}
static uint8_t FStar_UInt8_gte_mask(uint8_t a, uint8_t b)
{
uint8_t x = a;
uint8_t y = b;
uint8_t x_xor_y = x ^ y;
uint8_t x_sub_y = x - y;
uint8_t x_sub_y_xor_y = x_sub_y ^ y;
uint8_t q = x_xor_y | x_sub_y_xor_y;
uint8_t x_xor_q = x ^ q;
uint8_t x_xor_q_ = x_xor_q >> (uint32_t)7U;
return x_xor_q_ - (uint8_t)1U;
}
static void Hacl_Bignum_Modulo_carry_top(uint64_t *b)
{
uint64_t b4 = b[4U];
uint64_t b0 = b[0U];
uint64_t b4_ = b4 & (uint64_t)0x7ffffffffffffU;
uint64_t b0_ = b0 + (uint64_t)19U * (b4 >> (uint32_t)51U);
b[4U] = b4_;
b[0U] = b0_;
}
inline static void Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, uint128_t *input)
{
uint32_t i;
for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
{
uint128_t xi = input[i];
output[i] = (uint64_t)xi;
}
}
inline static void
Hacl_Bignum_Fproduct_sum_scalar_multiplication_(uint128_t *output, uint64_t *input, uint64_t s)
{
uint32_t i;
for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
{
uint128_t xi = output[i];
uint64_t yi = input[i];
output[i] = xi + (uint128_t)yi * s;
}
}
inline static void Hacl_Bignum_Fproduct_carry_wide_(uint128_t *tmp)
{
uint32_t i;
for (i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U)
{
uint32_t ctr = i;
uint128_t tctr = tmp[ctr];
uint128_t tctrp1 = tmp[ctr + (uint32_t)1U];
uint64_t r0 = (uint64_t)tctr & (uint64_t)0x7ffffffffffffU;
uint128_t c = tctr >> (uint32_t)51U;
tmp[ctr] = (uint128_t)r0;
tmp[ctr + (uint32_t)1U] = tctrp1 + c;
}
}
inline static void Hacl_Bignum_Fmul_shift_reduce(uint64_t *output)
{
uint64_t tmp = output[4U];
uint64_t b0;
{
uint32_t i;
for (i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U)
{
uint32_t ctr = (uint32_t)5U - i - (uint32_t)1U;
uint64_t z = output[ctr - (uint32_t)1U];
output[ctr] = z;
}
}
output[0U] = tmp;
b0 = output[0U];
output[0U] = (uint64_t)19U * b0;
}
static void
Hacl_Bignum_Fmul_mul_shift_reduce_(uint128_t *output, uint64_t *input, uint64_t *input2)
{
uint32_t i;
uint64_t input2i;
{
uint32_t i0;
for (i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0 = i0 + (uint32_t)1U)
{
uint64_t input2i0 = input2[i0];
Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i0);
Hacl_Bignum_Fmul_shift_reduce(input);
}
}
i = (uint32_t)4U;
input2i = input2[i];
Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
}
inline static void Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input2)
{
uint64_t tmp[5U] = { 0U };
memcpy(tmp, input, (uint32_t)5U * sizeof input[0U]);
KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
{
uint128_t t[5U];
{
uint32_t _i;
for (_i = 0U; _i < (uint32_t)5U; ++_i)
t[_i] = (uint128_t)(uint64_t)0U;
}
{
uint128_t b4;
uint128_t b0;
uint128_t b4_;
uint128_t b0_;
uint64_t i0;
uint64_t i1;
uint64_t i0_;
uint64_t i1_;
Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2);
Hacl_Bignum_Fproduct_carry_wide_(t);
b4 = t[4U];
b0 = t[0U];
b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU;
b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U);
t[4U] = b4_;
t[0U] = b0_;
Hacl_Bignum_Fproduct_copy_from_wide_(output, t);
i0 = output[0U];
i1 = output[1U];
i0_ = i0 & (uint64_t)0x7ffffffffffffU;
i1_ = i1 + (i0 >> (uint32_t)51U);
output[0U] = i0_;
output[1U] = i1_;
}
}
}
inline static void Hacl_Bignum_Fsquare_fsquare__(uint128_t *tmp, uint64_t *output)
{
uint64_t r0 = output[0U];
uint64_t r1 = output[1U];
uint64_t r2 = output[2U];
uint64_t r3 = output[3U];
uint64_t r4 = output[4U];
uint64_t d0 = r0 * (uint64_t)2U;
uint64_t d1 = r1 * (uint64_t)2U;
uint64_t d2 = r2 * (uint64_t)2U * (uint64_t)19U;
uint64_t d419 = r4 * (uint64_t)19U;
uint64_t d4 = d419 * (uint64_t)2U;
uint128_t s0 = (uint128_t)r0 * r0 + (uint128_t)d4 * r1 + (uint128_t)d2 * r3;
uint128_t s1 = (uint128_t)d0 * r1 + (uint128_t)d4 * r2 + (uint128_t)(r3 * (uint64_t)19U) * r3;
uint128_t s2 = (uint128_t)d0 * r2 + (uint128_t)r1 * r1 + (uint128_t)d4 * r3;
uint128_t s3 = (uint128_t)d0 * r3 + (uint128_t)d1 * r2 + (uint128_t)r4 * d419;
uint128_t s4 = (uint128_t)d0 * r4 + (uint128_t)d1 * r3 + (uint128_t)r2 * r2;
tmp[0U] = s0;
tmp[1U] = s1;
tmp[2U] = s2;
tmp[3U] = s3;
tmp[4U] = s4;
}
inline static void Hacl_Bignum_Fsquare_fsquare_(uint128_t *tmp, uint64_t *output)
{
uint128_t b4;
uint128_t b0;
uint128_t b4_;
uint128_t b0_;
uint64_t i0;
uint64_t i1;
uint64_t i0_;
uint64_t i1_;
Hacl_Bignum_Fsquare_fsquare__(tmp, output);
Hacl_Bignum_Fproduct_carry_wide_(tmp);
b4 = tmp[4U];
b0 = tmp[0U];
b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU;
b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U);
tmp[4U] = b4_;
tmp[0U] = b0_;
Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
i0 = output[0U];
i1 = output[1U];
i0_ = i0 & (uint64_t)0x7ffffffffffffU;
i1_ = i1 + (i0 >> (uint32_t)51U);
output[0U] = i0_;
output[1U] = i1_;
}
static void
Hacl_Bignum_Fsquare_fsquare_times_(uint64_t *input, uint128_t *tmp, uint32_t count1)
{
uint32_t i;
Hacl_Bignum_Fsquare_fsquare_(tmp, input);
for (i = (uint32_t)1U; i < count1; i = i + (uint32_t)1U)
Hacl_Bignum_Fsquare_fsquare_(tmp, input);
}
inline static void
Hacl_Bignum_Fsquare_fsquare_times(uint64_t *output, uint64_t *input, uint32_t count1)
{
KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
{
uint128_t t[5U];
{
uint32_t _i;
for (_i = 0U; _i < (uint32_t)5U; ++_i)
t[_i] = (uint128_t)(uint64_t)0U;
}
memcpy(output, input, (uint32_t)5U * sizeof input[0U]);
Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
}
}
inline static void Hacl_Bignum_Fsquare_fsquare_times_inplace(uint64_t *output, uint32_t count1)
{
KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
{
uint128_t t[5U];
{
uint32_t _i;
for (_i = 0U; _i < (uint32_t)5U; ++_i)
t[_i] = (uint128_t)(uint64_t)0U;
}
Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
}
}
inline static void Hacl_Bignum_Crecip_crecip(uint64_t *out, uint64_t *z)
{
uint64_t buf[20U] = { 0U };
uint64_t *a0 = buf;
uint64_t *t00 = buf + (uint32_t)5U;
uint64_t *b0 = buf + (uint32_t)10U;
uint64_t *t01;
uint64_t *b1;
uint64_t *c0;
uint64_t *a;
uint64_t *t0;
uint64_t *b;
uint64_t *c;
Hacl_Bignum_Fsquare_fsquare_times(a0, z, (uint32_t)1U);
Hacl_Bignum_Fsquare_fsquare_times(t00, a0, (uint32_t)2U);
Hacl_Bignum_Fmul_fmul(b0, t00, z);
Hacl_Bignum_Fmul_fmul(a0, b0, a0);
Hacl_Bignum_Fsquare_fsquare_times(t00, a0, (uint32_t)1U);
Hacl_Bignum_Fmul_fmul(b0, t00, b0);
Hacl_Bignum_Fsquare_fsquare_times(t00, b0, (uint32_t)5U);
t01 = buf + (uint32_t)5U;
b1 = buf + (uint32_t)10U;
c0 = buf + (uint32_t)15U;
Hacl_Bignum_Fmul_fmul(b1, t01, b1);
Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)10U);
Hacl_Bignum_Fmul_fmul(c0, t01, b1);
Hacl_Bignum_Fsquare_fsquare_times(t01, c0, (uint32_t)20U);
Hacl_Bignum_Fmul_fmul(t01, t01, c0);
Hacl_Bignum_Fsquare_fsquare_times_inplace(t01, (uint32_t)10U);
Hacl_Bignum_Fmul_fmul(b1, t01, b1);
Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)50U);
a = buf;
t0 = buf + (uint32_t)5U;
b = buf + (uint32_t)10U;
c = buf + (uint32_t)15U;
Hacl_Bignum_Fmul_fmul(c, t0, b);
Hacl_Bignum_Fsquare_fsquare_times(t0, c, (uint32_t)100U);
Hacl_Bignum_Fmul_fmul(t0, t0, c);
Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)50U);
Hacl_Bignum_Fmul_fmul(t0, t0, b);
Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)5U);
Hacl_Bignum_Fmul_fmul(out, t0, a);
}
inline static void Hacl_Bignum_fsum(uint64_t *a, uint64_t *b)
{
uint32_t i;
for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
{
uint64_t xi = a[i];
uint64_t yi = b[i];
a[i] = xi + yi;
}
}
inline static void Hacl_Bignum_fdifference(uint64_t *a, uint64_t *b)
{
uint64_t tmp[5U] = { 0U };
uint64_t b0;
uint64_t b1;
uint64_t b2;
uint64_t b3;
uint64_t b4;
memcpy(tmp, b, (uint32_t)5U * sizeof b[0U]);
b0 = tmp[0U];
b1 = tmp[1U];
b2 = tmp[2U];
b3 = tmp[3U];
b4 = tmp[4U];
tmp[0U] = b0 + (uint64_t)0x3fffffffffff68U;
tmp[1U] = b1 + (uint64_t)0x3ffffffffffff8U;
tmp[2U] = b2 + (uint64_t)0x3ffffffffffff8U;
tmp[3U] = b3 + (uint64_t)0x3ffffffffffff8U;
tmp[4U] = b4 + (uint64_t)0x3ffffffffffff8U;
{
uint32_t i;
for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
{
uint64_t xi = a[i];
uint64_t yi = tmp[i];
a[i] = yi - xi;
}
}
}
inline static void Hacl_Bignum_fscalar(uint64_t *output, uint64_t *b, uint64_t s)
{
KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
{
uint128_t tmp[5U];
{
uint32_t _i;
for (_i = 0U; _i < (uint32_t)5U; ++_i)
tmp[_i] = (uint128_t)(uint64_t)0U;
}
{
uint128_t b4;
uint128_t b0;
uint128_t b4_;
uint128_t b0_;
{
uint32_t i;
for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
{
uint64_t xi = b[i];
tmp[i] = (uint128_t)xi * s;
}
}
Hacl_Bignum_Fproduct_carry_wide_(tmp);
b4 = tmp[4U];
b0 = tmp[0U];
b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU;
b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U);
tmp[4U] = b4_;
tmp[0U] = b0_;
Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
}
}
}
inline static void Hacl_Bignum_fmul(uint64_t *output, uint64_t *a, uint64_t *b)
{
Hacl_Bignum_Fmul_fmul(output, a, b);
}
inline static void Hacl_Bignum_crecip(uint64_t *output, uint64_t *input)
{
Hacl_Bignum_Crecip_crecip(output, input);
}
static void
Hacl_EC_Point_swap_conditional_step(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
{
uint32_t i = ctr - (uint32_t)1U;
uint64_t ai = a[i];
uint64_t bi = b[i];
uint64_t x = swap1 & (ai ^ bi);
uint64_t ai1 = ai ^ x;
uint64_t bi1 = bi ^ x;
a[i] = ai1;
b[i] = bi1;
}
static void
Hacl_EC_Point_swap_conditional_(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
{
if (!(ctr == (uint32_t)0U))
{
uint32_t i;
Hacl_EC_Point_swap_conditional_step(a, b, swap1, ctr);
i = ctr - (uint32_t)1U;
Hacl_EC_Point_swap_conditional_(a, b, swap1, i);
}
}
static void Hacl_EC_Point_swap_conditional(uint64_t *a, uint64_t *b, uint64_t iswap)
{
uint64_t swap1 = (uint64_t)0U - iswap;
Hacl_EC_Point_swap_conditional_(a, b, swap1, (uint32_t)5U);
Hacl_EC_Point_swap_conditional_(a + (uint32_t)5U, b + (uint32_t)5U, swap1, (uint32_t)5U);
}
static void Hacl_EC_Point_copy(uint64_t *output, uint64_t *input)
{
memcpy(output, input, (uint32_t)5U * sizeof input[0U]);
memcpy(output + (uint32_t)5U,
input + (uint32_t)5U,
(uint32_t)5U * sizeof (input + (uint32_t)5U)[0U]);
}
static void Hacl_EC_Format_fexpand(uint64_t *output, uint8_t *input)
{
uint64_t i0 = load64_le(input);
uint8_t *x00 = input + (uint32_t)6U;
uint64_t i1 = load64_le(x00);
uint8_t *x01 = input + (uint32_t)12U;
uint64_t i2 = load64_le(x01);
uint8_t *x02 = input + (uint32_t)19U;
uint64_t i3 = load64_le(x02);
uint8_t *x0 = input + (uint32_t)24U;
uint64_t i4 = load64_le(x0);
uint64_t output0 = i0 & (uint64_t)0x7ffffffffffffU;
uint64_t output1 = i1 >> (uint32_t)3U & (uint64_t)0x7ffffffffffffU;
uint64_t output2 = i2 >> (uint32_t)6U & (uint64_t)0x7ffffffffffffU;
uint64_t output3 = i3 >> (uint32_t)1U & (uint64_t)0x7ffffffffffffU;
uint64_t output4 = i4 >> (uint32_t)12U & (uint64_t)0x7ffffffffffffU;
output[0U] = output0;
output[1U] = output1;
output[2U] = output2;
output[3U] = output3;
output[4U] = output4;
}
static void Hacl_EC_Format_fcontract_first_carry_pass(uint64_t *input)
{
uint64_t t0 = input[0U];
uint64_t t1 = input[1U];
uint64_t t2 = input[2U];
uint64_t t3 = input[3U];
uint64_t t4 = input[4U];
uint64_t t1_ = t1 + (t0 >> (uint32_t)51U);
uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU;
uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U);
uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU;
uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U);
uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU;
uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U);
uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU;
input[0U] = t0_;
input[1U] = t1__;
input[2U] = t2__;
input[3U] = t3__;
input[4U] = t4_;
}
static void Hacl_EC_Format_fcontract_first_carry_full(uint64_t *input)
{
Hacl_EC_Format_fcontract_first_carry_pass(input);
Hacl_Bignum_Modulo_carry_top(input);
}
static void Hacl_EC_Format_fcontract_second_carry_pass(uint64_t *input)
{
uint64_t t0 = input[0U];
uint64_t t1 = input[1U];
uint64_t t2 = input[2U];
uint64_t t3 = input[3U];
uint64_t t4 = input[4U];
uint64_t t1_ = t1 + (t0 >> (uint32_t)51U);
uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU;
uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U);
uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU;
uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U);
uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU;
uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U);
uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU;
input[0U] = t0_;
input[1U] = t1__;
input[2U] = t2__;
input[3U] = t3__;
input[4U] = t4_;
}
static void Hacl_EC_Format_fcontract_second_carry_full(uint64_t *input)
{
uint64_t i0;
uint64_t i1;
uint64_t i0_;
uint64_t i1_;
Hacl_EC_Format_fcontract_second_carry_pass(input);
Hacl_Bignum_Modulo_carry_top(input);
i0 = input[0U];
i1 = input[1U];
i0_ = i0 & (uint64_t)0x7ffffffffffffU;
i1_ = i1 + (i0 >> (uint32_t)51U);
input[0U] = i0_;
input[1U] = i1_;
}
static void Hacl_EC_Format_fcontract_trim(uint64_t *input)
{
uint64_t a0 = input[0U];
uint64_t a1 = input[1U];
uint64_t a2 = input[2U];
uint64_t a3 = input[3U];
uint64_t a4 = input[4U];
uint64_t mask0 = FStar_UInt64_gte_mask(a0, (uint64_t)0x7ffffffffffedU);
uint64_t mask1 = FStar_UInt64_eq_mask(a1, (uint64_t)0x7ffffffffffffU);
uint64_t mask2 = FStar_UInt64_eq_mask(a2, (uint64_t)0x7ffffffffffffU);
uint64_t mask3 = FStar_UInt64_eq_mask(a3, (uint64_t)0x7ffffffffffffU);
uint64_t mask4 = FStar_UInt64_eq_mask(a4, (uint64_t)0x7ffffffffffffU);
uint64_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
uint64_t a0_ = a0 - ((uint64_t)0x7ffffffffffedU & mask);
uint64_t a1_ = a1 - ((uint64_t)0x7ffffffffffffU & mask);
uint64_t a2_ = a2 - ((uint64_t)0x7ffffffffffffU & mask);
uint64_t a3_ = a3 - ((uint64_t)0x7ffffffffffffU & mask);
uint64_t a4_ = a4 - ((uint64_t)0x7ffffffffffffU & mask);
input[0U] = a0_;
input[1U] = a1_;
input[2U] = a2_;
input[3U] = a3_;
input[4U] = a4_;
}
static void Hacl_EC_Format_fcontract_store(uint8_t *output, uint64_t *input)
{
uint64_t t0 = input[0U];
uint64_t t1 = input[1U];
uint64_t t2 = input[2U];
uint64_t t3 = input[3U];
uint64_t t4 = input[4U];
uint64_t o0 = t1 << (uint32_t)51U | t0;
uint64_t o1 = t2 << (uint32_t)38U | t1 >> (uint32_t)13U;
uint64_t o2 = t3 << (uint32_t)25U | t2 >> (uint32_t)26U;
uint64_t o3 = t4 << (uint32_t)12U | t3 >> (uint32_t)39U;
uint8_t *b0 = output;
uint8_t *b1 = output + (uint32_t)8U;
uint8_t *b2 = output + (uint32_t)16U;
uint8_t *b3 = output + (uint32_t)24U;
store64_le(b0, o0);
store64_le(b1, o1);
store64_le(b2, o2);
store64_le(b3, o3);
}
static void Hacl_EC_Format_fcontract(uint8_t *output, uint64_t *input)
{
Hacl_EC_Format_fcontract_first_carry_full(input);
Hacl_EC_Format_fcontract_second_carry_full(input);
Hacl_EC_Format_fcontract_trim(input);
Hacl_EC_Format_fcontract_store(output, input);
}
static void Hacl_EC_Format_scalar_of_point(uint8_t *scalar, uint64_t *point)
{
uint64_t *x = point;
uint64_t *z = point + (uint32_t)5U;
uint64_t buf[10U] = { 0U };
uint64_t *zmone = buf;
uint64_t *sc = buf + (uint32_t)5U;
Hacl_Bignum_crecip(zmone, z);
Hacl_Bignum_fmul(sc, x, zmone);
Hacl_EC_Format_fcontract(scalar, sc);
}
static void
Hacl_EC_AddAndDouble_fmonty(
uint64_t *pp,
uint64_t *ppq,
uint64_t *p,
uint64_t *pq,
uint64_t *qmqp
)
{
uint64_t *qx = qmqp;
uint64_t *x2 = pp;
uint64_t *z2 = pp + (uint32_t)5U;
uint64_t *x3 = ppq;
uint64_t *z3 = ppq + (uint32_t)5U;
uint64_t *x = p;
uint64_t *z = p + (uint32_t)5U;
uint64_t *xprime = pq;
uint64_t *zprime = pq + (uint32_t)5U;
uint64_t buf[40U] = { 0U };
uint64_t *origx = buf;
uint64_t *origxprime0 = buf + (uint32_t)5U;
uint64_t *xxprime0 = buf + (uint32_t)25U;
uint64_t *zzprime0 = buf + (uint32_t)30U;
uint64_t *origxprime;
uint64_t *xx0;
uint64_t *zz0;
uint64_t *xxprime;
uint64_t *zzprime;
uint64_t *zzzprime;
uint64_t *zzz;
uint64_t *xx;
uint64_t *zz;
uint64_t scalar;
memcpy(origx, x, (uint32_t)5U * sizeof x[0U]);
Hacl_Bignum_fsum(x, z);
Hacl_Bignum_fdifference(z, origx);
memcpy(origxprime0, xprime, (uint32_t)5U * sizeof xprime[0U]);
Hacl_Bignum_fsum(xprime, zprime);
Hacl_Bignum_fdifference(zprime, origxprime0);
Hacl_Bignum_fmul(xxprime0, xprime, z);
Hacl_Bignum_fmul(zzprime0, x, zprime);
origxprime = buf + (uint32_t)5U;
xx0 = buf + (uint32_t)15U;
zz0 = buf + (uint32_t)20U;
xxprime = buf + (uint32_t)25U;
zzprime = buf + (uint32_t)30U;
zzzprime = buf + (uint32_t)35U;
memcpy(origxprime, xxprime, (uint32_t)5U * sizeof xxprime[0U]);
Hacl_Bignum_fsum(xxprime, zzprime);
Hacl_Bignum_fdifference(zzprime, origxprime);
Hacl_Bignum_Fsquare_fsquare_times(x3, xxprime, (uint32_t)1U);
Hacl_Bignum_Fsquare_fsquare_times(zzzprime, zzprime, (uint32_t)1U);
Hacl_Bignum_fmul(z3, zzzprime, qx);
Hacl_Bignum_Fsquare_fsquare_times(xx0, x, (uint32_t)1U);
Hacl_Bignum_Fsquare_fsquare_times(zz0, z, (uint32_t)1U);
zzz = buf + (uint32_t)10U;
xx = buf + (uint32_t)15U;
zz = buf + (uint32_t)20U;
Hacl_Bignum_fmul(x2, xx, zz);
Hacl_Bignum_fdifference(zz, xx);
scalar = (uint64_t)121665U;
Hacl_Bignum_fscalar(zzz, zz, scalar);
Hacl_Bignum_fsum(zzz, xx);
Hacl_Bignum_fmul(z2, zzz, zz);
}
static void
Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(
uint64_t *nq,
uint64_t *nqpq,
uint64_t *nq2,
uint64_t *nqpq2,
uint64_t *q,
uint8_t byt
)
{
uint64_t bit0 = (uint64_t)(byt >> (uint32_t)7U);
uint64_t bit;
Hacl_EC_Point_swap_conditional(nq, nqpq, bit0);
Hacl_EC_AddAndDouble_fmonty(nq2, nqpq2, nq, nqpq, q);
bit = (uint64_t)(byt >> (uint32_t)7U);
Hacl_EC_Point_swap_conditional(nq2, nqpq2, bit);
}
static void
Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(
uint64_t *nq,
uint64_t *nqpq,
uint64_t *nq2,
uint64_t *nqpq2,
uint64_t *q,
uint8_t byt
)
{
uint8_t byt1;
Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt);
byt1 = byt << (uint32_t)1U;
Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1);
}
static void
Hacl_EC_Ladder_SmallLoop_cmult_small_loop(
uint64_t *nq,
uint64_t *nqpq,
uint64_t *nq2,
uint64_t *nqpq2,
uint64_t *q,
uint8_t byt,
uint32_t i
)
{
if (!(i == (uint32_t)0U))
{
uint32_t i_ = i - (uint32_t)1U;
uint8_t byt_;
Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(nq, nqpq, nq2, nqpq2, q, byt);
byt_ = byt << (uint32_t)2U;
Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byt_, i_);
}
}
static void
Hacl_EC_Ladder_BigLoop_cmult_big_loop(
uint8_t *n1,
uint64_t *nq,
uint64_t *nqpq,
uint64_t *nq2,
uint64_t *nqpq2,
uint64_t *q,
uint32_t i
)
{
if (!(i == (uint32_t)0U))
{
uint32_t i1 = i - (uint32_t)1U;
uint8_t byte = n1[i1];
Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byte, (uint32_t)4U);
Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, i1);
}
}
static void Hacl_EC_Ladder_cmult(uint64_t *result, uint8_t *n1, uint64_t *q)
{
uint64_t point_buf[40U] = { 0U };
uint64_t *nq = point_buf;
uint64_t *nqpq = point_buf + (uint32_t)10U;
uint64_t *nq2 = point_buf + (uint32_t)20U;
uint64_t *nqpq2 = point_buf + (uint32_t)30U;
Hacl_EC_Point_copy(nqpq, q);
nq[0U] = (uint64_t)1U;
Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, (uint32_t)32U);
Hacl_EC_Point_copy(result, nq);
}
void Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint)
{
uint64_t buf0[10U] = { 0U };
uint64_t *x0 = buf0;
uint64_t *z = buf0 + (uint32_t)5U;
uint64_t *q;
Hacl_EC_Format_fexpand(x0, basepoint);
z[0U] = (uint64_t)1U;
q = buf0;
{
uint8_t e[32U] = { 0U };
uint8_t e0;
uint8_t e31;
uint8_t e01;
uint8_t e311;
uint8_t e312;
uint8_t *scalar;
memcpy(e, secret, (uint32_t)32U * sizeof secret[0U]);
e0 = e[0U];
e31 = e[31U];
e01 = e0 & (uint8_t)248U;
e311 = e31 & (uint8_t)127U;
e312 = e311 | (uint8_t)64U;
e[0U] = e01;
e[31U] = e312;
scalar = e;
{
uint64_t buf[15U] = { 0U };
uint64_t *nq = buf;
uint64_t *x = nq;
x[0U] = (uint64_t)1U;
Hacl_EC_Ladder_cmult(nq, scalar, q);
Hacl_EC_Format_scalar_of_point(mypublic, nq);
}
}
}

View file

@ -0,0 +1,294 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/rand/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h"
#include "third_party/mbedtls/bignum.h"
#include "third_party/mbedtls/ecp.h"
#include "third_party/mbedtls/ecp_internal.h"
#include "third_party/mbedtls/math.h"
#ifdef MBEDTLS_ECP_C
int ecp_mod_p384_old(mbedtls_mpi *);
int GetEntropy(void *c, unsigned char *p, size_t n) {
rngset(p, n, rand64, -1);
return 0;
}
TEST(secp384r1, testIsTheSame) {
int i;
mbedtls_mpi A, B;
mbedtls_mpi_init(&A);
mbedtls_mpi_init(&B);
mbedtls_mpi_fill_random(&A, 12 * 8, GetEntropy, 0);
mbedtls_mpi_copy(&B, &A);
ecp_mod_p384(&A);
ecp_mod_p384_old(&B);
for (i = 0; i < 1000; ++i) {
if (memcmp(A.p, B.p, 12 * 8)) {
for (i = 0; i < 12; ++i) {
printf("0x%016lx vs. 0x%016lx %d\n", A.p[i], B.p[i], A.p[i] == B.p[i]);
}
exit(1);
}
}
mbedtls_mpi_free(&B);
mbedtls_mpi_free(&A);
}
static inline bool mbedtls_p384_gte(uint64_t p[7]) {
return (((int64_t)p[6] > 0 ||
(p[5] > 0xffffffffffffffff ||
(p[5] == 0xffffffffffffffff &&
(p[4] > 0xffffffffffffffff ||
(p[4] == 0xffffffffffffffff &&
(p[3] > 0xffffffffffffffff ||
(p[3] == 0xffffffffffffffff &&
(p[2] > 0xfffffffffffffffe ||
(p[2] == 0xfffffffffffffffe &&
(p[1] > 0xffffffff00000000 ||
(p[1] == 0xffffffff00000000 &&
(p[0] > 0x00000000ffffffff ||
(p[0] == 0x00000000ffffffff))))))))))))));
}
static inline void mbedtls_p384_gro(uint64_t p[7]) {
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
asm("addq\t%1,%0\n\t"
"adcq\t%2,8+%0\n\t"
"adcq\t%3,16+%0\n\t"
"adcq\t%4,24+%0\n\t"
"adcq\t%4,32+%0\n\t"
"adcq\t%4,40+%0\n\t"
"adcq\t$0,48+%0"
: "+o"(*p)
: "r"(0x00000000ffffffffl), "r"(0xffffffff00000000),
"i"(0xfffffffffffffffel), "i"(0xffffffffffffffff)
: "memory", "cc");
#else
uint64_t c;
ADC(p[0], p[0], 0x00000000ffffffff, 0, c);
ADC(p[1], p[1], 0xffffffff00000000, c, c);
ADC(p[2], p[2], 0xfffffffffffffffe, c, c);
ADC(p[3], p[3], 0xffffffffffffffff, c, c);
ADC(p[4], p[4], 0xffffffffffffffff, c, c);
ADC(p[5], p[5], 0xffffffffffffffff, c, c);
ADC(p[6], p[6], 0, c, c);
#endif
}
static inline void mbedtls_p384_red(uint64_t p[7]) {
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
asm("subq\t%1,%0\n\t"
"sbbq\t%2,8+%0\n\t"
"sbbq\t%3,16+%0\n\t"
"sbbq\t%4,24+%0\n\t"
"sbbq\t%4,32+%0\n\t"
"sbbq\t%4,40+%0\n\t"
"sbbq\t$0,48+%0"
: "+o"(*p)
: "r"(0x00000000ffffffffl), "r"(0xffffffff00000000),
"i"(0xfffffffffffffffel), "i"(0xffffffffffffffff)
: "memory", "cc");
#else
uint64_t c;
SBB(p[0], p[0], 0x00000000ffffffff, 0, c);
SBB(p[1], p[1], 0xffffffff00000000, c, c);
SBB(p[2], p[2], 0xfffffffffffffffe, c, c);
SBB(p[3], p[3], 0xffffffffffffffff, c, c);
SBB(p[4], p[4], 0xffffffffffffffff, c, c);
SBB(p[5], p[5], 0xffffffffffffffff, c, c);
SBB(p[6], p[6], 0, c, c);
#endif
}
static inline void mbedtls_p384_rum(uint64_t p[7]) {
while (mbedtls_p384_gte(p)) mbedtls_p384_red(p);
}
static inline void mbedtls_p384_mod(uint64_t X[12]) {
secp384r1(X);
if ((int64_t)X[6] < 0) {
do {
mbedtls_p384_gro(X);
} while ((int64_t)X[6] < 0);
} else {
while (mbedtls_p384_gte(X)) {
mbedtls_p384_red(X);
}
}
}
TEST(secp384r1, needsDownwardCorrection) {
int i;
uint64_t P[6] = {
0x00000000ffffffff, //
0xffffffff00000000, //
0xfffffffffffffffe, //
0xffffffffffffffff, //
0xffffffffffffffff, //
0xffffffffffffffff, //
};
uint64_t X[12] = {
0xffffffffffffffff, //
0xffffffffffffffff, //
0xffffffffffffffff, //
0xffffffffffffffff, //
0xffffffffffffffff, //
0xffffffffffffffff, //
0xffffffffffffffff, //
0xffffffffffffffff, //
0xffffffffffffffff, //
0xffffffffffffffff, //
0xffffffffffffffff, //
0xffffffffffffffff, //
};
uint64_t W[12] /* == X mod P */ = {
0xfffffffe00000000, //
0x0000000200000000, //
0xfffffffe00000000, //
0x0000000200000000, //
0x0000000000000001, //
};
mbedtls_p384_mod(X);
if (memcmp(W, X, 12 * 8)) {
for (i = 0; i < 12; ++i) {
printf("0x%016lx vs. 0x%016lx %d\n", W[i], X[i], W[i] == X[i]);
}
exit(1);
}
}
TEST(secp384r1, needsUpwardCorrection) {
int i;
uint64_t P[6] = {
0x00000000ffffffff, //
0xffffffff00000000, //
0xfffffffffffffffe, //
0xffffffffffffffff, //
0xffffffffffffffff, //
0xffffffffffffffff, //
};
uint64_t X[12] = {
0x0000000000000000, //
0x0000000000000000, //
0x0000000000000000, //
0x0000000000000000, //
0x0000000000000000, //
0x0000000000000000, //
0x0000000000000000, //
0x0000000000000000, //
0x0000000000000000, //
0x0000000000000000, //
0x0000000000000000, //
0x00000000ffffffff, //
};
uint64_t W[12] /* == X mod P */ = {
0xffffffffffffffff, //
0x0000000000000000, //
0xfffffffefffffffd, //
0x0000000100000000, //
0x0000000000000000, //
0x00000001ffffffff, //
};
mbedtls_p384_mod(X);
if (memcmp(W, X, 12 * 8)) {
for (i = 0; i < 12; ++i) {
printf("0x%016lx vs. 0x%016lx %d\n", W[i], X[i], W[i] == X[i]);
}
exit(1);
}
}
BENCH(secp384r1, bench) {
mbedtls_mpi A;
mbedtls_mpi_init(&A);
mbedtls_mpi_fill_random(&A, 12 * 8, GetEntropy, 0);
EZBENCH2("secp384r1", donothing, secp384r1(A.p));
EZBENCH2("ecp_mod_p384", donothing, ecp_mod_p384(&A));
EZBENCH2("ecp_mod_p384_old", donothing, ecp_mod_p384_old(&A));
mbedtls_mpi_free(&A);
}
void mbedtls_p384_shl_a(uint64_t p[7]) {
asm("shlq\t%0\n\t"
"rclq\t8+%0\n\t"
"rclq\t16+%0\n\t"
"rclq\t24+%0\n\t"
"rclq\t32+%0\n\t"
"rclq\t40+%0\n\t"
"rclq\t48+%0\n\t"
: "+o"(*p)
: /* no inputs */
: "memory", "cc");
mbedtls_p384_rum(p);
}
void mbedtls_p384_shl_b(uint64_t p[7]) {
p[6] = p[5] >> 63;
p[5] = p[5] << 1 | p[4] >> 63;
p[4] = p[4] << 1 | p[3] >> 63;
p[3] = p[3] << 1 | p[2] >> 63;
p[2] = p[2] << 1 | p[1] >> 63;
p[1] = p[1] << 1 | p[0] >> 63;
p[0] = p[0] << 1;
mbedtls_p384_rum(p);
}
BENCH(shl, bench) {
uint64_t A[7] = {0};
EZBENCH2("mbedtls_p384_shl_a", donothing, mbedtls_p384_shl_a(A));
EZBENCH2("mbedtls_p384_shl_b", donothing, mbedtls_p384_shl_b(A));
}
void mbedtls_p384_red_a(uint64_t p[7]) {
asm("subq\t%1,%0\n\t"
"sbbq\t%2,8+%0\n\t"
"sbbq\t%3,16+%0\n\t"
"sbbq\t%4,24+%0\n\t"
"sbbq\t%4,32+%0\n\t"
"sbbq\t%4,40+%0\n\t"
"sbbq\t$0,48+%0"
: "+o"(*p)
: "r"(0x00000000ffffffffl), "r"(0xffffffff00000000),
"i"(0xfffffffffffffffel), "i"(0xffffffffffffffff)
: "memory", "cc");
}
void mbedtls_p384_red_b(uint64_t p[7]) {
uint64_t c;
SBB(p[0], p[0], 0x00000000ffffffff, 0, c);
SBB(p[1], p[1], 0xffffffff00000000, c, c);
SBB(p[2], p[2], 0xfffffffffffffffe, c, c);
SBB(p[3], p[3], 0xffffffffffffffff, c, c);
SBB(p[4], p[4], 0xffffffffffffffff, c, c);
SBB(p[5], p[5], 0xffffffffffffffff, c, c);
SBB(p[6], p[6], 0, c, c);
}
BENCH(red, bench) {
uint64_t A[7] = {0};
EZBENCH2("mbedtls_p384_red_a", donothing, mbedtls_p384_red_a(A));
EZBENCH2("mbedtls_p384_red_b", donothing, mbedtls_p384_red_b(A));
}
#endif /* MBEDTLS_ECP_C */

View file

@ -78,7 +78,9 @@ THIRD_PARTY_MBEDTLS_TEST_COMS = \
o/$(MODE)/third_party/mbedtls/test/test_suite_timing.com \
o/$(MODE)/third_party/mbedtls/test/test_suite_version.com \
o/$(MODE)/third_party/mbedtls/test/test_suite_x509parse.com \
o/$(MODE)/third_party/mbedtls/test/test_suite_x509write.com
o/$(MODE)/third_party/mbedtls/test/test_suite_x509write.com \
o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com \
o/$(MODE)/third_party/mbedtls/test/everest_test.com
THIRD_PARTY_MBEDTLS_TEST_TESTS = \
$(THIRD_PARTY_MBEDTLS_TEST_COMS:%=%.ok)
@ -1340,3 +1342,22 @@ o/$(MODE)/third_party/mbedtls/test/test_suite_x509write.com.dbg: \
$(CRT) \
$(APE)
@$(APELINK)
o/$(MODE)/third_party/mbedtls/test/everest_test.com: o/$(MODE)/third_party/mbedtls/test/everest_test.com.dbg
o/$(MODE)/third_party/mbedtls/test/everest_test.com.dbg: \
$(THIRD_PARTY_MBEDTLS_TEST_DEPS) \
o/$(MODE)/third_party/mbedtls/test/everest_test.o \
o/$(MODE)/third_party/mbedtls/test/everest_unravaged.o \
$(LIBC_TESTMAIN) \
$(CRT) \
$(APE)
@$(APELINK)
o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com: o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com.dbg
o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com.dbg: \
$(THIRD_PARTY_MBEDTLS_TEST_DEPS) \
o/$(MODE)/third_party/mbedtls/test/secp384r1_test.o \
$(LIBC_TESTMAIN) \
$(CRT) \
$(APE)
@$(APELINK)