mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
Make stronger crypto nearly as fast
One of the disadvantages of x25519 and ℘256 is it only provides 126 bits of security, so that seems like a weak link in the chain, if we're using ECDHE-ECDSA-AES256-GCM-SHA384. The U.S. government wants classified data to be encrypted using a curve at least as strong as ℘384, which provides 192 bits of security, but if you read the consensus of stack exchange it would give you the impression that ℘384 is three times slower. This change (as well as the previous one) makes ℘384 three times as fast by tuning its modulus and multiplication subroutines with new tests that should convincingly show: the optimized code behaves the same way as the old code. Some of the diff noise from the previous change is now removed too, so that our vendored fork can be more easily compared with upstream sources. So you can now have stronger cryptography without compromises. ℘384 modulus Justine l: 28𝑐 9𝑛𝑠 ℘384 modulus MbedTLS NIST l: 127𝑐 41𝑛𝑠 ℘384 modulus MbedTLS MPI l: 1,850𝑐 597𝑛𝑠 The benchmarks above show the improvements made by secp384r1() which is an important function since it needs to be called 13,000 times whenever someone establishes a connection to your web server. The same's true of Mul6x6Adx() which is able to multiply 384-bit numbers in 73 cycles, but only if your CPU was purchased after 2014 when Broadwell was introduced
This commit is contained in:
parent
398f0c16fb
commit
ea83cc0ad0
27 changed files with 4291 additions and 3361 deletions
|
@ -1,39 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Computes C = A + B
|
||||
//
|
||||
// @param rdi is C
|
||||
// @param rsi is A
|
||||
// @param rdx is B
|
||||
// @param rcx is number of additions
|
||||
// @return al has carry
|
||||
adc: .leafprologue
|
||||
test %ecx,%ecx
|
||||
jz 1f
|
||||
xor %r9d,%r9d
|
||||
0: mov (%rsi,%r9,8),%rax
|
||||
adc (%rdx,%r9,8),%rax
|
||||
mov %rax,(%rdi,%r9,8)
|
||||
inc %r9d
|
||||
loop 0b
|
||||
1: setb %al
|
||||
.leafepilogue
|
||||
.endfn adc,globl
|
|
@ -18,34 +18,47 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Computes 512-bit product of 256-bit and 256-bit numbers.
|
||||
//
|
||||
// Instructions: 88
|
||||
// Total Cycles: 36
|
||||
// Total uOps: 120
|
||||
// uOps Per Cycle: 3.33
|
||||
// IPC: 2.44
|
||||
// Block RThroughput: 20.0
|
||||
//
|
||||
// @param rdi receives 8 quadword result
|
||||
// @param rsi is left hand side which must have 4 quadwords
|
||||
// @param rdx is right hand side which must have 4 quadwords
|
||||
// @note words are host endian while array is little endian
|
||||
// @mayalias
|
||||
Mul4x4Adx:
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
push %r15
|
||||
push %r14
|
||||
push %r13
|
||||
push %r12
|
||||
sub $56,%rsp
|
||||
mov %r15,-8(%rbp)
|
||||
mov %r14,-16(%rbp)
|
||||
mov %r13,-24(%rbp)
|
||||
mov %r12,-32(%rbp)
|
||||
mov %rbx,-40(%rbp)
|
||||
mov %rdx,%r12
|
||||
push %rbx
|
||||
sub $16,%rsp
|
||||
mov (%rdx),%rdx
|
||||
mov (%rsi),%rax
|
||||
mov 16(%rsi),%r11
|
||||
mov 24(%rsi),%r10
|
||||
xor %r13d,%r13d
|
||||
mulx %rax,%rbx,%rax
|
||||
mov %rbx,-48(%rbp)
|
||||
mov 8(%rsi),%rbx
|
||||
mulx %rbx,%rdx,%rcx
|
||||
adox %rdx,%rax
|
||||
add %rdx,%rax
|
||||
mov (%r12),%rdx
|
||||
mulx %r11,%rdx,%r9
|
||||
adox %rdx,%rcx
|
||||
adc %rdx,%rcx
|
||||
mov (%r12),%rdx
|
||||
mulx %r10,%rdx,%r8
|
||||
adox %rdx,%r9
|
||||
adox %r13,%r8
|
||||
adc %rdx,%r9
|
||||
adc $0,%r8
|
||||
xor %r13d,%r13d
|
||||
mov (%rsi),%r14
|
||||
mov 8(%r12),%rdx
|
||||
|
@ -105,12 +118,103 @@ Mul4x4Adx:
|
|||
adox %r14,%r10
|
||||
mov %rsi,(%rdi)
|
||||
mov %r10,56(%rdi)
|
||||
add $16,%rsp
|
||||
pop %rbx
|
||||
pop %r12
|
||||
pop %r13
|
||||
pop %r14
|
||||
pop %r15
|
||||
pop %rbp
|
||||
mov -8(%rbp),%r15
|
||||
mov -16(%rbp),%r14
|
||||
mov -24(%rbp),%r13
|
||||
mov -32(%rbp),%r12
|
||||
mov -40(%rbp),%rbx
|
||||
leave
|
||||
ret
|
||||
.endfn Mul4x4Adx,globl
|
||||
|
||||
.end
|
||||
TIMELINE VIEW 0123456789 012345
|
||||
Index 0123456789 0123456789
|
||||
[0,0] DeER . . . . . . . subq $56, %rsp
|
||||
[0,1] DeER . . . . . . . movq %r15, -8(%rbp)
|
||||
[0,2] D=eER. . . . . . . movq %r14, -16(%rbp)
|
||||
[0,3] D==eER . . . . . . movq %r13, -24(%rbp)
|
||||
[0,4] D===eER . . . . . . movq %r12, -32(%rbp)
|
||||
[0,5] D====eER . . . . . . movq %rbx, -40(%rbp)
|
||||
[0,6] .DeE---R . . . . . . movq %rdx, %r12
|
||||
[0,7] .DeeeeeER . . . . . . movq (%rdx), %rdx
|
||||
[0,8] .D=eeeeeER. . . . . . movq (%rsi), %rax
|
||||
[0,9] .D=eeeeeER. . . . . . movq 16(%rsi), %r11
|
||||
[0,10] .D==eeeeeER . . . . . movq 24(%rsi), %r10
|
||||
[0,11] . D=====eeeeER . . . . . mulxq %rax, %rbx, %rax
|
||||
[0,12] . D========eER . . . . . movq %rbx, -48(%rbp)
|
||||
[0,13] . D=eeeeeE---R . . . . . movq 8(%rsi), %rbx
|
||||
[0,14] . D=====eeeeER. . . . . mulxq %rbx, %rdx, %rcx
|
||||
[0,15] . D========eER. . . . . addq %rdx, %rax
|
||||
[0,16] . D=eeeeeE---R. . . . . movq (%r12), %rdx
|
||||
[0,17] . D=====eeeeER . . . . mulxq %r11, %rdx, %r9
|
||||
[0,18] . D========eER . . . . adcq %rdx, %rcx
|
||||
[0,19] . DeeeeeE----R . . . . movq (%r12), %rdx
|
||||
[0,20] . D=====eeeeER . . . . mulxq %r10, %rdx, %r8
|
||||
[0,21] . D========eER . . . . adcq %rdx, %r9
|
||||
[0,22] . D=========eER . . . . adcq $0, %r8
|
||||
[0,23] . D-----------R . . . . xorl %r13d, %r13d
|
||||
[0,24] . .DeeeeeE----R . . . . movq (%rsi), %r14
|
||||
[0,25] . .DeeeeeE----R . . . . movq 8(%r12), %rdx
|
||||
[0,26] . .D=====eeeeER . . . . mulxq %r14, %r14, %r15
|
||||
[0,27] . .D========eER . . . . adoxq %r14, %rax
|
||||
[0,28] . . D========eER . . . . adcxq %r15, %rcx
|
||||
[0,29] . . D========eER . . . . movq %rax, -56(%rbp)
|
||||
[0,30] . . D=====eeeeER . . . . mulxq %rbx, %r14, %rax
|
||||
[0,31] . . D=========eER. . . . adoxq %r14, %rcx
|
||||
[0,32] . . D=========eER . . . adcxq %rax, %r9
|
||||
[0,33] . . D=====eeeeE-R . . . mulxq %r11, %r14, %rax
|
||||
[0,34] . . D==========eER . . . adoxq %r14, %r9
|
||||
[0,35] . . D===========eER . . . adcxq %rax, %r8
|
||||
[0,36] . . D=====eeeeE--R . . . mulxq %r10, %rdx, %rax
|
||||
[0,37] . . D===========eER . . . adoxq %rdx, %r8
|
||||
[0,38] . . DeeeeeE-------R . . . movq 16(%r12), %rdx
|
||||
[0,39] . . D============eER. . . adcxq %r13, %rax
|
||||
[0,40] . . D============eER . . adoxq %r13, %rax
|
||||
[0,41] . . DeeeeeE--------R . . movq (%rsi), %r13
|
||||
[0,42] . . D=====E--------R . . xorl %r15d, %r15d
|
||||
[0,43] . . D=====eeeeE----R . . mulxq %r13, %r13, %r14
|
||||
[0,44] . . .D=======eE----R . . adoxq %r13, %rcx
|
||||
[0,45] . . .D========eE---R . . adcxq %r14, %r9
|
||||
[0,46] . . .D=====eeeeE---R . . mulxq %rbx, %r14, %r13
|
||||
[0,47] . . .D=========eE--R . . adoxq %r14, %r9
|
||||
[0,48] . . . D=========eE-R . . adcxq %r13, %r8
|
||||
[0,49] . . . D=====eeeeE--R . . mulxq %r11, %r14, %r13
|
||||
[0,50] . . . D==========eER . . adoxq %r14, %r8
|
||||
[0,51] . . . D===========eER . . adcxq %r13, %rax
|
||||
[0,52] . . . DeeeeeE------R . . movq (%rsi), %rsi
|
||||
[0,53] . . . D=====eeeeE--R . . mulxq %r10, %rdx, %r13
|
||||
[0,54] . . . D===========eER . . adoxq %rdx, %rax
|
||||
[0,55] . . . D============eER . . adcxq %r15, %r13
|
||||
[0,56] . . . DeeeeeE-------R . . movq 24(%r12), %rdx
|
||||
[0,57] . . . D============eER. . adoxq %r15, %r13
|
||||
[0,58] . . . D=====eeeeE----R. . mulxq %rsi, %r12, %rsi
|
||||
[0,59] . . . D======E-------R. . xorl %r14d, %r14d
|
||||
[0,60] . . . D========eE---R. . adoxq %r12, %r9
|
||||
[0,61] . . . D=========eE--R. . adcxq %rsi, %r8
|
||||
[0,62] . . . D=====eeeeE---R. . mulxq %rbx, %rsi, %rbx
|
||||
[0,63] . . . D==========eE-R. . adoxq %rsi, %r8
|
||||
[0,64] . . . .D==========eER. . adcxq %rbx, %rax
|
||||
[0,65] . . . .D=====eeeeE--R. . mulxq %r11, %r11, %rsi
|
||||
[0,66] . . . .DeeeeeE------R. . movq -56(%rbp), %rbx
|
||||
[0,67] . . . .D===eE-------R. . movq %rcx, 16(%rdi)
|
||||
[0,68] . . . . D==========eER . adcxq %rsi, %r13
|
||||
[0,69] . . . . DeeeeeE------R . movq -48(%rbp), %rsi
|
||||
[0,70] . . . . D====eE------R . movq %rbx, 8(%rdi)
|
||||
[0,71] . . . . D===========eER . adoxq %r11, %rax
|
||||
[0,72] . . . . D=======eE----R . movq %r9, 24(%rdi)
|
||||
[0,73] . . . . D=========eE--R . movq %r8, 32(%rdi)
|
||||
[0,74] . . . . D===========eER . movq %rax, 40(%rdi)
|
||||
[0,75] . . . . D====eeeeE----R . mulxq %r10, %rdx, %r10
|
||||
[0,76] . . . . D===========eER . adoxq %rdx, %r13
|
||||
[0,77] . . . . D============eER . adcxq %r14, %r10
|
||||
[0,78] . . . . D===========eER . movq %r13, 48(%rdi)
|
||||
[0,79] . . . . D============eER. adoxq %r14, %r10
|
||||
[0,80] . . . . D============eER. movq %rsi, (%rdi)
|
||||
[0,81] . . . . D=============eER movq %r10, 56(%rdi)
|
||||
[0,82] . . . . DeeeeeE---------R movq -8(%rbp), %r15
|
||||
[0,83] . . . . DeeeeeE---------R movq -16(%rbp), %r14
|
||||
[0,84] . . . . DeeeeeE--------R movq -24(%rbp), %r13
|
||||
[0,85] . . . . DeeeeeE--------R movq -32(%rbp), %r12
|
||||
[0,86] . . . . D=eeeeeE-------R movq -40(%rbp), %rbx
|
||||
[0,87] . . . . D===eE---------R addq $56, %rsp
|
||||
|
|
|
@ -18,37 +18,50 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Computes 768-bit product of 384-bit and 384-bit numbers.
|
||||
//
|
||||
// Instructions: 153
|
||||
// Total Cycles: 73
|
||||
// Total uOps: 261
|
||||
// uOps Per Cycle: 3.58
|
||||
// IPC: 2.10
|
||||
// Block RThroughput: 43.5
|
||||
//
|
||||
// @param rdi receives 8 quadword result
|
||||
// @param rsi is left hand side which must have 4 quadwords
|
||||
// @param rdx is right hand side which must have 4 quadwords
|
||||
// @note words are host endian while array is little endian
|
||||
// @mayalias
|
||||
Mul6x6Adx:
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
push %r15
|
||||
push %r14
|
||||
push %r13
|
||||
push %r12
|
||||
push %rbx
|
||||
sub $64,%rsp
|
||||
mov %r15,-8(%rbp)
|
||||
mov %r14,-16(%rbp)
|
||||
mov %r13,-24(%rbp)
|
||||
mov %r12,-32(%rbp)
|
||||
mov %rbx,-40(%rbp)
|
||||
mov %rdx,%rbx
|
||||
sub $24,%rsp
|
||||
mov (%rdx),%rdx
|
||||
xor %r8d,%r8d
|
||||
mulx (%rsi),%rcx,%rax
|
||||
mulx 8(%rsi),%rdx,%r12
|
||||
mov %rcx,-48(%rbp)
|
||||
adox %rdx,%rax
|
||||
add %rdx,%rax
|
||||
mov (%rbx),%rdx
|
||||
mulx 16(%rsi),%rdx,%r15
|
||||
adox %rdx,%r12
|
||||
adc %rdx,%r12
|
||||
mov (%rbx),%rdx
|
||||
mulx 24(%rsi),%rdx,%r10
|
||||
adox %rdx,%r15
|
||||
adc %rdx,%r15
|
||||
mov (%rbx),%rdx
|
||||
mulx 32(%rsi),%rdx,%r9
|
||||
adox %rdx,%r10
|
||||
adc %rdx,%r10
|
||||
mov (%rbx),%rdx
|
||||
mulx 40(%rsi),%rdx,%rcx
|
||||
adox %rdx,%r9
|
||||
adc %rdx,%r9
|
||||
mov 8(%rbx),%rdx
|
||||
adox %r8,%rcx
|
||||
adc $0,%rcx
|
||||
mulx (%rsi),%r13,%r11
|
||||
xor %r8d,%r8d
|
||||
adox %r13,%rax
|
||||
|
@ -171,12 +184,167 @@ Mul6x6Adx:
|
|||
mov %r8,64(%rdi)
|
||||
mov %r11,72(%rdi)
|
||||
mov %rdx,88(%rdi)
|
||||
add $24,%rsp
|
||||
pop %rbx
|
||||
pop %r12
|
||||
pop %r13
|
||||
pop %r14
|
||||
pop %r15
|
||||
pop %rbp
|
||||
mov -8(%rbp),%r15
|
||||
mov -16(%rbp),%r14
|
||||
mov -24(%rbp),%r13
|
||||
mov -32(%rbp),%r12
|
||||
mov -40(%rbp),%rbx
|
||||
leave
|
||||
ret
|
||||
.endfn Mul6x6Adx,globl
|
||||
|
||||
.end
|
||||
SIMULATION 0123456789 0123456789 0123456789 012
|
||||
Index 0123456789 0123456789 0123456789 0123456789
|
||||
[0,0] DeER . . . . . . . . . . . . . . . movq %r15, -8(%rbp)
|
||||
[0,1] D=eER. . . . . . . . . . . . . . . movq %r14, -16(%rbp)
|
||||
[0,2] D==eER . . . . . . . . . . . . . . movq %r13, -24(%rbp)
|
||||
[0,3] D===eER . . . . . . . . . . . . . . movq %r12, -32(%rbp)
|
||||
[0,4] D====eER . . . . . . . . . . . . . . movq %rbx, -40(%rbp)
|
||||
[0,5] DeE----R . . . . . . . . . . . . . . movq %rdx, %rbx
|
||||
[0,6] .DeeeeeER . . . . . . . . . . . . . . movq (%rdx), %rdx
|
||||
[0,7] .D=====eeeeeeeeeER . . . . . . . . . . . . mulxq (%rsi), %rcx, %rax
|
||||
[0,8] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 8(%rsi), %rdx, %r12
|
||||
[0,9] . D=======eE------R . . . . . . . . . . . . movq %rcx, -48(%rbp)
|
||||
[0,10] . D=============eER . . . . . . . . . . . . addq %rdx, %rax
|
||||
[0,11] . DeeeeeE--------R . . . . . . . . . . . . movq (%rbx), %rdx
|
||||
[0,12] . D=====eeeeeeeeeER. . . . . . . . . . . . mulxq 16(%rsi), %rdx, %r15
|
||||
[0,13] . D=============eER. . . . . . . . . . . . adcq %rdx, %r12
|
||||
[0,14] . DeeeeeE--------R. . . . . . . . . . . . movq (%rbx), %rdx
|
||||
[0,15] . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 24(%rsi), %rdx, %r10
|
||||
[0,16] . D=============eER . . . . . . . . . . . adcq %rdx, %r15
|
||||
[0,17] . DeeeeeE--------R . . . . . . . . . . . movq (%rbx), %rdx
|
||||
[0,18] . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 32(%rsi), %rdx, %r9
|
||||
[0,19] . D=============eER . . . . . . . . . . . adcq %rdx, %r10
|
||||
[0,20] . .DeeeeeE--------R . . . . . . . . . . . movq (%rbx), %rdx
|
||||
[0,21] . .D=====eeeeeeeeeER . . . . . . . . . . . mulxq 40(%rsi), %rdx, %rcx
|
||||
[0,22] . .D=============eER . . . . . . . . . . . adcq %rdx, %r9
|
||||
[0,23] . . DeeeeeE--------R . . . . . . . . . . . movq 8(%rbx), %rdx
|
||||
[0,24] . . D=============eER . . . . . . . . . . . adcq $0, %rcx
|
||||
[0,25] . . D=====eeeeeeeeeER . . . . . . . . . . . mulxq (%rsi), %r13, %r11
|
||||
[0,26] . . D--------------R . . . . . . . . . . . xorl %r8d, %r8d
|
||||
[0,27] . . D========eE----R . . . . . . . . . . . adoxq %r13, %rax
|
||||
[0,28] . . D=============eER. . . . . . . . . . . adcxq %r11, %r12
|
||||
[0,29] . . D=========eE----R. . . . . . . . . . . movq %rax, -56(%rbp)
|
||||
[0,30] . . D====eeeeeeeeeER. . . . . . . . . . . mulxq 8(%rsi), %r11, %rax
|
||||
[0,31] . . D=============eER . . . . . . . . . . adoxq %r11, %r12
|
||||
[0,32] . . D==============eER . . . . . . . . . . adcxq %rax, %r15
|
||||
[0,33] . . D=============eER . . . . . . . . . . movq %r12, %r14
|
||||
[0,34] . . D====eeeeeeeeeE-R . . . . . . . . . . mulxq 16(%rsi), %r11, %rax
|
||||
[0,35] . . D==============eER . . . . . . . . . . adoxq %r11, %r15
|
||||
[0,36] . . .D==============eER . . . . . . . . . . adcxq %rax, %r10
|
||||
[0,37] . . .D====eeeeeeeeeE--R . . . . . . . . . . mulxq 24(%rsi), %r11, %rax
|
||||
[0,38] . . .D===============eER. . . . . . . . . . adoxq %r11, %r10
|
||||
[0,39] . . . D===============eER . . . . . . . . . adcxq %rax, %r9
|
||||
[0,40] . . . D====eeeeeeeeeE---R . . . . . . . . . mulxq 32(%rsi), %r11, %rax
|
||||
[0,41] . . . D================eER . . . . . . . . . adoxq %r11, %r9
|
||||
[0,42] . . . D================eER . . . . . . . . . adcxq %rax, %rcx
|
||||
[0,43] . . . D====eeeeeeeeeE----R . . . . . . . . . mulxq 40(%rsi), %rdx, %rax
|
||||
[0,44] . . . D=================eER . . . . . . . . . adoxq %rdx, %rcx
|
||||
[0,45] . . . D=================eER. . . . . . . . . adcxq %r8, %rax
|
||||
[0,46] . . . DeeeeeE-------------R. . . . . . . . . movq 16(%rbx), %rdx
|
||||
[0,47] . . . D==================eER . . . . . . . . adoxq %r8, %rax
|
||||
[0,48] . . . D====eeeeeeeeeE-----R . . . . . . . . mulxq (%rsi), %r13, %r8
|
||||
[0,49] . . . D====E--------------R . . . . . . . . xorl %r11d, %r11d
|
||||
[0,50] . . . D=========eE--------R . . . . . . . . adoxq %r13, %r14
|
||||
[0,51] . . . .D=========eE-------R . . . . . . . . movq %r14, -64(%rbp)
|
||||
[0,52] . . . .D============eE----R . . . . . . . . adcxq %r8, %r15
|
||||
[0,53] . . . .D====eeeeeeeeeE----R . . . . . . . . mulxq 8(%rsi), %r12, %r8
|
||||
[0,54] . . . . D============eE---R . . . . . . . . adoxq %r12, %r15
|
||||
[0,55] . . . . D=============eE--R . . . . . . . . adcxq %r8, %r10
|
||||
[0,56] . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 16(%rsi), %r12, %r8
|
||||
[0,57] . . . . D=============eE-R . . . . . . . . adoxq %r12, %r10
|
||||
[0,58] . . . . D==============eER . . . . . . . . adcxq %r8, %r9
|
||||
[0,59] . . . . D====eeeeeeeeeE--R . . . . . . . . mulxq 24(%rsi), %r12, %r8
|
||||
[0,60] . . . . D==============eER . . . . . . . . adoxq %r12, %r9
|
||||
[0,61] . . . . D===============eER . . . . . . . . adcxq %r8, %rcx
|
||||
[0,62] . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 32(%rsi), %r12, %r8
|
||||
[0,63] . . . . D===============eER . . . . . . . . adoxq %r12, %rcx
|
||||
[0,64] . . . . D================eER. . . . . . . . adcxq %r8, %rax
|
||||
[0,65] . . . . D====eeeeeeeeeE----R. . . . . . . . mulxq 40(%rsi), %rdx, %r8
|
||||
[0,66] . . . . .D================eER . . . . . . . adoxq %rdx, %rax
|
||||
[0,67] . . . . .D=================eER . . . . . . . adcxq %r11, %r8
|
||||
[0,68] . . . . .DeeeeeE-------------R . . . . . . . movq 24(%rbx), %rdx
|
||||
[0,69] . . . . .D==================eER . . . . . . . adoxq %r11, %r8
|
||||
[0,70] . . . . . D====eeeeeeeeeE-----R . . . . . . . mulxq (%rsi), %r13, %r11
|
||||
[0,71] . . . . . D====E--------------R . . . . . . . xorl %r12d, %r12d
|
||||
[0,72] . . . . . D===========eE------R . . . . . . . adoxq %r13, %r15
|
||||
[0,73] . . . . . D============eE----R . . . . . . . adcxq %r11, %r10
|
||||
[0,74] . . . . . D====eeeeeeeeeE----R . . . . . . . mulxq 8(%rsi), %r13, %r11
|
||||
[0,75] . . . . . D=============eE---R . . . . . . . adoxq %r13, %r10
|
||||
[0,76] . . . . . D=============eE--R . . . . . . . adcxq %r11, %r9
|
||||
[0,77] . . . . . D====eeeeeeeeeE---R . . . . . . . mulxq 16(%rsi), %r13, %r11
|
||||
[0,78] . . . . . D==============eE-R . . . . . . . adoxq %r13, %r9
|
||||
[0,79] . . . . . D==============eER . . . . . . . adcxq %r11, %rcx
|
||||
[0,80] . . . . . D====eeeeeeeeeE--R . . . . . . . mulxq 24(%rsi), %r13, %r11
|
||||
[0,81] . . . . . D===============eER . . . . . . . adoxq %r13, %rcx
|
||||
[0,82] . . . . . .D===============eER. . . . . . . adcxq %r11, %rax
|
||||
[0,83] . . . . . .D====eeeeeeeeeE---R. . . . . . . mulxq 32(%rsi), %r13, %r11
|
||||
[0,84] . . . . . .D================eER . . . . . . adoxq %r13, %rax
|
||||
[0,85] . . . . . . D================eER . . . . . . adcxq %r11, %r8
|
||||
[0,86] . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 40(%rsi), %rdx, %r11
|
||||
[0,87] . . . . . . D=================eER . . . . . . adoxq %rdx, %r8
|
||||
[0,88] . . . . . . DeeeeeE------------R . . . . . . movq 32(%rbx), %rdx
|
||||
[0,89] . . . . . . D=================eER . . . . . . adcxq %r12, %r11
|
||||
[0,90] . . . . . . D=====eeeeeeeeeE----R . . . . . . mulxq (%rsi), %r14, %r13
|
||||
[0,91] . . . . . . D=================eER. . . . . . adoxq %r12, %r11
|
||||
[0,92] . . . . . . D-------------------R. . . . . . xorl %r12d, %r12d
|
||||
[0,93] . . . . . . D===========eE------R. . . . . . adoxq %r14, %r10
|
||||
[0,94] . . . . . . D=============eE----R. . . . . . adcxq %r13, %r9
|
||||
[0,95] . . . . . . D====eeeeeeeeeE----R. . . . . . mulxq 8(%rsi), %r14, %r13
|
||||
[0,96] . . . . . . D=============eE---R. . . . . . adoxq %r14, %r9
|
||||
[0,97] . . . . . . D==============eE--R. . . . . . adcxq %r13, %rcx
|
||||
[0,98] . . . . . . .D====eeeeeeeeeE---R. . . . . . mulxq 16(%rsi), %r14, %r13
|
||||
[0,99] . . . . . . .D==============eE-R. . . . . . adoxq %r14, %rcx
|
||||
[0,100] . . . . . . .D===============eER. . . . . . adcxq %r13, %rax
|
||||
[0,101] . . . . . . . D====eeeeeeeeeE--R. . . . . . mulxq 24(%rsi), %r14, %r13
|
||||
[0,102] . . . . . . . D===============eER . . . . . adoxq %r14, %rax
|
||||
[0,103] . . . . . . . D================eER . . . . . adcxq %r13, %r8
|
||||
[0,104] . . . . . . . D====eeeeeeeeeE---R . . . . . mulxq 32(%rsi), %r14, %r13
|
||||
[0,105] . . . . . . . D================eER . . . . . adoxq %r14, %r8
|
||||
[0,106] . . . . . . . D=================eER . . . . . adcxq %r13, %r11
|
||||
[0,107] . . . . . . . D====eeeeeeeeeE----R . . . . . mulxq 40(%rsi), %rdx, %r13
|
||||
[0,108] . . . . . . . D=================eER. . . . . adoxq %rdx, %r11
|
||||
[0,109] . . . . . . . D==================eER . . . . adcxq %r12, %r13
|
||||
[0,110] . . . . . . . DeeeeeE-------------R . . . . movq 40(%rbx), %rdx
|
||||
[0,111] . . . . . . . D==================eER . . . . adoxq %r12, %r13
|
||||
[0,112] . . . . . . . D=====eeeeeeeeeE-----R . . . . mulxq (%rsi), %r14, %rbx
|
||||
[0,113] . . . . . . . .D-------------------R . . . . xorl %r12d, %r12d
|
||||
[0,114] . . . . . . . .D===========eE------R . . . . adoxq %r14, %r9
|
||||
[0,115] . . . . . . . .D=============eE----R . . . . adcxq %rbx, %rcx
|
||||
[0,116] . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 8(%rsi), %r14, %rbx
|
||||
[0,117] . . . . . . . . D=============eE---R . . . . adoxq %r14, %rcx
|
||||
[0,118] . . . . . . . . D==============eE--R . . . . adcxq %rbx, %rax
|
||||
[0,119] . . . . . . . . D====eeeeeeeeeE---R . . . . mulxq 16(%rsi), %r14, %rbx
|
||||
[0,120] . . . . . . . . D==============eE-R . . . . adoxq %r14, %rax
|
||||
[0,121] . . . . . . . . D===============eER . . . . adcxq %rbx, %r8
|
||||
[0,122] . . . . . . . . D====eeeeeeeeeE--R . . . . mulxq 24(%rsi), %r14, %rbx
|
||||
[0,123] . . . . . . . . D===============eER . . . . adoxq %r14, %r8
|
||||
[0,124] . . . . . . . . D================eER . . . . adcxq %rbx, %r11
|
||||
[0,125] . . . . . . . . D====eeeeeeeeeE---R . . . . mulxq 32(%rsi), %r14, %rbx
|
||||
[0,126] . . . . . . . . .D====eeeeeeeeeE--R . . . . mulxq 40(%rsi), %rsi, %rdx
|
||||
[0,127] . . . . . . . . .D===============eER. . . . adoxq %r14, %r11
|
||||
[0,128] . . . . . . . . .D================eER . . . adcxq %rbx, %r13
|
||||
[0,129] . . . . . . . . . D================eER . . . adoxq %rsi, %r13
|
||||
[0,130] . . . . . . . . . D=================eER . . . adcxq %r12, %rdx
|
||||
[0,131] . . . . . . . . . D==================eER . . . adoxq %r12, %rdx
|
||||
[0,132] . . . . . . . . . DeeeeeE--------------R . . . movq -48(%rbp), %rsi
|
||||
[0,133] . . . . . . . . . D=eeeeeE-------------R . . . movq -56(%rbp), %rbx
|
||||
[0,134] . . . . . . . . . D===eE---------------R . . . movq %r15, 24(%rdi)
|
||||
[0,135] . . . . . . . . . D=eeeeeE------------R . . . movq -64(%rbp), %r14
|
||||
[0,136] . . . . . . . . . D================eE-R . . . movq %r13, 80(%rdi)
|
||||
[0,137] . . . . . . . . . D=================eER . . . movq %rbx, 8(%rdi)
|
||||
[0,138] . . . . . . . . . D==================eER. . . movq %r14, 16(%rdi)
|
||||
[0,139] . . . . . . . . . D===================eER . . movq %rsi, (%rdi)
|
||||
[0,140] . . . . . . . . . D====================eER . . movq %r10, 32(%rdi)
|
||||
[0,141] . . . . . . . . . D====================eER . . movq %r9, 40(%rdi)
|
||||
[0,142] . . . . . . . . . D=====================eER . . movq %rcx, 48(%rdi)
|
||||
[0,143] . . . . . . . . . D======================eER. . movq %rax, 56(%rdi)
|
||||
[0,144] . . . . . . . . . D=======================eER . movq %r8, 64(%rdi)
|
||||
[0,145] . . . . . . . . . D========================eER. movq %r11, 72(%rdi)
|
||||
[0,146] . . . . . . . . . D=========================eER movq %rdx, 88(%rdi)
|
||||
[0,147] . . . . . . . . . DeeeeeE--------------------R movq -8(%rbp), %r15
|
||||
[0,148] . . . . . . . . . D=eeeeeE-------------------R movq -16(%rbp), %r14
|
||||
[0,149] . . . . . . . . . D=eeeeeE-------------------R movq -24(%rbp), %r13
|
||||
[0,150] . . . . . . . . . D==eeeeeE------------------R movq -32(%rbp), %r12
|
||||
[0,151] . . . . . . . . . D==eeeeeE------------------R movq -40(%rbp), %rbx
|
||||
|
|
|
@ -1,483 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
/ Computes 1024-bit product of 512-bit and 512-bit numbers.
|
||||
/
|
||||
/ Instructions: 262
|
||||
/ Total Cycles: 114
|
||||
/ Total uOps: 469
|
||||
/ Dispatch Width: 6
|
||||
/ uOps Per Cycle: 4.11
|
||||
/ IPC: 2.30
|
||||
/ Block RThroughput: 78.2
|
||||
/
|
||||
/ @param rdi receives 16 quadword result
|
||||
/ @param rsi is left hand side which must have 8 quadwords
|
||||
/ @param rdx is right hand side which must have 8 quadwords
|
||||
/ @note words are host endian while array is little endian
|
||||
/ @mayalias
|
||||
Mul8x8Adx:
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
push %r15
|
||||
push %r14
|
||||
push %r13
|
||||
push %r12
|
||||
mov %rdx,%r12
|
||||
push %rbx
|
||||
sub $64,%rsp
|
||||
mov (%rdx),%rdx
|
||||
xor %r13d,%r13d
|
||||
mulx (%rsi),%rax,%rcx
|
||||
mov %rdi,-48(%rbp)
|
||||
mov %rax,-56(%rbp)
|
||||
mulx 8(%rsi),%rdx,%rax
|
||||
adox %rdx,%rcx
|
||||
mov (%r12),%rdx
|
||||
mulx 16(%rsi),%rdx,%rbx
|
||||
adox %rdx,%rax
|
||||
mov (%r12),%rdx
|
||||
mulx 24(%rsi),%rdx,%r11
|
||||
adox %rdx,%rbx
|
||||
mov (%r12),%rdx
|
||||
mulx 32(%rsi),%rdx,%r10
|
||||
adox %rdx,%r11
|
||||
mov (%r12),%rdx
|
||||
mulx 40(%rsi),%rdx,%r9
|
||||
adox %rdx,%r10
|
||||
mov (%r12),%rdx
|
||||
mulx 48(%rsi),%rdx,%r8
|
||||
adox %rdx,%r9
|
||||
mov (%r12),%rdx
|
||||
mulx 56(%rsi),%rdx,%rdi
|
||||
adox %rdx,%r8
|
||||
adox %r13,%rdi
|
||||
xor %r13d,%r13d
|
||||
mov 8(%r12),%rdx
|
||||
mulx (%rsi),%r15,%r14
|
||||
adox %r15,%rcx
|
||||
adcx %r14,%rax
|
||||
mov %rcx,-64(%rbp)
|
||||
mulx 8(%rsi),%r14,%rcx
|
||||
adox %r14,%rax
|
||||
adcx %rcx,%rbx
|
||||
mulx 16(%rsi),%r14,%rcx
|
||||
adox %r14,%rbx
|
||||
adcx %rcx,%r11
|
||||
mulx 24(%rsi),%r14,%rcx
|
||||
adox %r14,%r11
|
||||
adcx %rcx,%r10
|
||||
mulx 32(%rsi),%r14,%rcx
|
||||
adox %r14,%r10
|
||||
adcx %rcx,%r9
|
||||
mulx 40(%rsi),%r14,%rcx
|
||||
adox %r14,%r9
|
||||
adcx %rcx,%r8
|
||||
mulx 48(%rsi),%r14,%rcx
|
||||
adox %r14,%r8
|
||||
adcx %rcx,%rdi
|
||||
mulx 56(%rsi),%rdx,%rcx
|
||||
adox %rdx,%rdi
|
||||
adcx %r13,%rcx
|
||||
mov 16(%r12),%rdx
|
||||
adox %r13,%rcx
|
||||
mulx (%rsi),%r15,%r14
|
||||
xor %r13d,%r13d
|
||||
adox %r15,%rax
|
||||
adcx %r14,%rbx
|
||||
mov %rax,-72(%rbp)
|
||||
mulx 8(%rsi),%r14,%rax
|
||||
adox %r14,%rbx
|
||||
adcx %rax,%r11
|
||||
mulx 16(%rsi),%r14,%rax
|
||||
adox %r14,%r11
|
||||
adcx %rax,%r10
|
||||
mulx 24(%rsi),%r14,%rax
|
||||
adox %r14,%r10
|
||||
adcx %rax,%r9
|
||||
mulx 32(%rsi),%r14,%rax
|
||||
adox %r14,%r9
|
||||
adcx %rax,%r8
|
||||
mulx 40(%rsi),%r14,%rax
|
||||
adox %r14,%r8
|
||||
adcx %rax,%rdi
|
||||
mulx 48(%rsi),%r14,%rax
|
||||
adox %r14,%rdi
|
||||
adcx %rax,%rcx
|
||||
mulx 56(%rsi),%rdx,%rax
|
||||
adox %rdx,%rcx
|
||||
adcx %r13,%rax
|
||||
adox %r13,%rax
|
||||
xor %r13d,%r13d
|
||||
mov 24(%r12),%rdx
|
||||
mulx (%rsi),%r15,%r14
|
||||
adox %r15,%rbx
|
||||
adcx %r14,%r11
|
||||
mov %rbx,-80(%rbp)
|
||||
mov %r11,%r15
|
||||
mulx 8(%rsi),%r14,%rbx
|
||||
adox %r14,%r15
|
||||
adcx %rbx,%r10
|
||||
mulx 16(%rsi),%rbx,%r11
|
||||
adox %rbx,%r10
|
||||
adcx %r11,%r9
|
||||
mulx 24(%rsi),%rbx,%r11
|
||||
adox %rbx,%r9
|
||||
adcx %r11,%r8
|
||||
mulx 32(%rsi),%rbx,%r11
|
||||
adox %rbx,%r8
|
||||
adcx %r11,%rdi
|
||||
mulx 40(%rsi),%rbx,%r11
|
||||
adox %rbx,%rdi
|
||||
adcx %r11,%rcx
|
||||
mulx 48(%rsi),%rbx,%r11
|
||||
adox %rbx,%rcx
|
||||
adcx %r11,%rax
|
||||
mulx 56(%rsi),%rdx,%r11
|
||||
adox %rdx,%rax
|
||||
adcx %r13,%r11
|
||||
mov 32(%r12),%rdx
|
||||
adox %r13,%r11
|
||||
xor %ebx,%ebx
|
||||
mulx (%rsi),%r14,%r13
|
||||
adox %r14,%r15
|
||||
adcx %r13,%r10
|
||||
mov %r15,-88(%rbp)
|
||||
mulx 8(%rsi),%r14,%r13
|
||||
mov %r10,%r15
|
||||
adcx %r13,%r9
|
||||
adox %r14,%r15
|
||||
mulx 16(%rsi),%r13,%r10
|
||||
adox %r13,%r9
|
||||
adcx %r10,%r8
|
||||
mulx 24(%rsi),%r13,%r10
|
||||
adcx %r10,%rdi
|
||||
adox %r13,%r8
|
||||
mulx 32(%rsi),%r13,%r10
|
||||
adox %r13,%rdi
|
||||
adcx %r10,%rcx
|
||||
mulx 40(%rsi),%r13,%r10
|
||||
adox %r13,%rcx
|
||||
adcx %r10,%rax
|
||||
mulx 48(%rsi),%r13,%r10
|
||||
adox %r13,%rax
|
||||
adcx %r10,%r11
|
||||
mulx 56(%rsi),%rdx,%r10
|
||||
adox %rdx,%r11
|
||||
adcx %rbx,%r10
|
||||
mov 40(%r12),%rdx
|
||||
adox %rbx,%r10
|
||||
mulx (%rsi),%r14,%r13
|
||||
xor %ebx,%ebx
|
||||
adox %r14,%r15
|
||||
mov %r15,-96(%rbp)
|
||||
adcx %r13,%r9
|
||||
mulx 8(%rsi),%r14,%r13
|
||||
mov %r9,%r15
|
||||
adox %r14,%r15
|
||||
adcx %r13,%r8
|
||||
mulx 16(%rsi),%r13,%r9
|
||||
adox %r13,%r8
|
||||
adcx %r9,%rdi
|
||||
mulx 24(%rsi),%r13,%r9
|
||||
adox %r13,%rdi
|
||||
adcx %r9,%rcx
|
||||
mulx 32(%rsi),%r13,%r9
|
||||
adox %r13,%rcx
|
||||
adcx %r9,%rax
|
||||
mulx 40(%rsi),%r13,%r9
|
||||
adox %r13,%rax
|
||||
adcx %r9,%r11
|
||||
mulx 48(%rsi),%r13,%r9
|
||||
adox %r13,%r11
|
||||
adcx %r9,%r10
|
||||
mulx 56(%rsi),%rdx,%r9
|
||||
adox %rdx,%r10
|
||||
adcx %rbx,%r9
|
||||
adox %rbx,%r9
|
||||
xor %ebx,%ebx
|
||||
mov 48(%r12),%rdx
|
||||
mulx (%rsi),%r14,%r13
|
||||
adox %r14,%r15
|
||||
adcx %r13,%r8
|
||||
mov %r15,-104(%rbp)
|
||||
mulx 8(%rsi),%r14,%r13
|
||||
mov %r8,%r15
|
||||
adcx %r13,%rdi
|
||||
adox %r14,%r15
|
||||
mulx 16(%rsi),%r13,%r8
|
||||
adox %r13,%rdi
|
||||
adcx %r8,%rcx
|
||||
mulx 24(%rsi),%r13,%r8
|
||||
adox %r13,%rcx
|
||||
adcx %r8,%rax
|
||||
mulx 32(%rsi),%r13,%r8
|
||||
adox %r13,%rax
|
||||
adcx %r8,%r11
|
||||
mulx 40(%rsi),%r13,%r8
|
||||
adox %r13,%r11
|
||||
adcx %r8,%r10
|
||||
mulx 48(%rsi),%r13,%r8
|
||||
adox %r13,%r10
|
||||
adcx %r8,%r9
|
||||
mulx 56(%rsi),%rdx,%r8
|
||||
adox %rdx,%r9
|
||||
mov 56(%r12),%rdx
|
||||
adcx %rbx,%r8
|
||||
mulx (%rsi),%r13,%r12
|
||||
adox %rbx,%r8
|
||||
xor %ebx,%ebx
|
||||
adox %r13,%r15
|
||||
adcx %r12,%rdi
|
||||
mulx 8(%rsi),%r13,%r12
|
||||
adox %r13,%rdi
|
||||
adcx %r12,%rcx
|
||||
mulx 16(%rsi),%r13,%r12
|
||||
adox %r13,%rcx
|
||||
adcx %r12,%rax
|
||||
mulx 24(%rsi),%r13,%r12
|
||||
adox %r13,%rax
|
||||
adcx %r12,%r11
|
||||
mulx 32(%rsi),%r13,%r12
|
||||
adox %r13,%r11
|
||||
adcx %r12,%r10
|
||||
mulx 40(%rsi),%r13,%r12
|
||||
adox %r13,%r10
|
||||
adcx %r12,%r9
|
||||
mulx 48(%rsi),%r13,%r12
|
||||
mulx 56(%rsi),%rsi,%rdx
|
||||
adox %r13,%r9
|
||||
adcx %r12,%r8
|
||||
adox %rsi,%r8
|
||||
adcx %rbx,%rdx
|
||||
mov -64(%rbp),%rsi
|
||||
adox %rbx,%rdx
|
||||
mov -48(%rbp),%rbx
|
||||
mov -56(%rbp),%r14
|
||||
mov %rsi,8(%rbx)
|
||||
mov -72(%rbp),%rsi
|
||||
mov %r14,(%rbx)
|
||||
mov %rsi,16(%rbx)
|
||||
mov -80(%rbp),%rsi
|
||||
mov %rsi,24(%rbx)
|
||||
mov -88(%rbp),%rsi
|
||||
mov %rsi,32(%rbx)
|
||||
mov -96(%rbp),%rsi
|
||||
mov %rsi,40(%rbx)
|
||||
mov -104(%rbp),%rsi
|
||||
mov %r15,56(%rbx)
|
||||
mov %rsi,48(%rbx)
|
||||
mov %rdi,64(%rbx)
|
||||
mov %rcx,72(%rbx)
|
||||
mov %rax,80(%rbx)
|
||||
mov %r11,88(%rbx)
|
||||
mov %r10,96(%rbx)
|
||||
mov %r9,104(%rbx)
|
||||
mov %r8,112(%rbx)
|
||||
mov %rdx,120(%rbx)
|
||||
add $64,%rsp
|
||||
pop %rbx
|
||||
pop %r12
|
||||
pop %r13
|
||||
pop %r14
|
||||
pop %r15
|
||||
pop %rbp
|
||||
ret
|
||||
.endfn Mul8x8Adx,globl
|
||||
|
||||
.end
|
||||
Timeline view: 0123456789 0123456789 0123456789 0123456789
|
||||
Index 0123456789 0123456789 0123456789 0123456789
|
||||
[0,0] DeeER. . . . . . . . . . . . . . . . pushq %r15
|
||||
[0,1] D==eeER . . . . . . . . . . . . . . . pushq %r14
|
||||
[0,2] .D===eeER . . . . . . . . . . . . . . . pushq %r13
|
||||
[0,3] .D=====eeER . . . . . . . . . . . . . . pushq %r12
|
||||
[0,4] . DeE-----R . . . . . . . . . . . . . . movq %rdx, %r12
|
||||
[0,5] . D======eeER . . . . . . . . . . . . . . pushq %rbx
|
||||
[0,6] . D========eER . . . . . . . . . . . . . . subq $64, %rsp
|
||||
[0,7] . DeeeeeE----R . . . . . . . . . . . . . . movq (%rdx), %rdx
|
||||
[0,8] . D---------R . . . . . . . . . . . . . . xorl %r13d, %r13d
|
||||
[0,9] . D====eeeeeeeeeER . . . . . . . . . . . . . mulxq (%rsi), %rax, %rcx
|
||||
[0,10] . D======eE------R . . . . . . . . . . . . . movq %rdi, -48(%rbp)
|
||||
[0,11] . D======eE-----R . . . . . . . . . . . . . movq %rax, -56(%rbp)
|
||||
[0,12] . D====eeeeeeeeeER. . . . . . . . . . . . . mulxq 8(%rsi), %rdx, %rax
|
||||
[0,13] . D============eER. . . . . . . . . . . . . adoxq %rdx, %rcx
|
||||
[0,14] . DeeeeeE-------R. . . . . . . . . . . . . movq (%r12), %rdx
|
||||
[0,15] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 16(%rsi), %rdx, %rbx
|
||||
[0,16] . D============eE-R . . . . . . . . . . . . adoxq %rdx, %rax
|
||||
[0,17] . .DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
|
||||
[0,18] . .D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 24(%rsi), %rdx, %r11
|
||||
[0,19] . .D=============eER . . . . . . . . . . . . adoxq %rdx, %rbx
|
||||
[0,20] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
|
||||
[0,21] . . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 32(%rsi), %rdx, %r10
|
||||
[0,22] . . D=============eER . . . . . . . . . . . . adoxq %rdx, %r11
|
||||
[0,23] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
|
||||
[0,24] . . D=====eeeeeeeeeER. . . . . . . . . . . . mulxq 40(%rsi), %rdx, %r9
|
||||
[0,25] . . D=============eER. . . . . . . . . . . . adoxq %rdx, %r10
|
||||
[0,26] . . DeeeeeE--------R. . . . . . . . . . . . movq (%r12), %rdx
|
||||
[0,27] . . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 48(%rsi), %rdx, %r8
|
||||
[0,28] . . D=============eER . . . . . . . . . . . adoxq %rdx, %r9
|
||||
[0,29] . . DeeeeeE--------R . . . . . . . . . . . movq (%r12), %rdx
|
||||
[0,30] . . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 56(%rsi), %rdx, %rdi
|
||||
[0,31] . . D=============eER . . . . . . . . . . . adoxq %rdx, %r8
|
||||
[0,32] . . .D=============eER . . . . . . . . . . . adoxq %r13, %rdi
|
||||
[0,33] . . .D---------------R . . . . . . . . . . . xorl %r13d, %r13d
|
||||
[0,34] . . .DeeeeeE---------R . . . . . . . . . . . movq 8(%r12), %rdx
|
||||
[0,35] . . . D====eeeeeeeeeER . . . . . . . . . . . mulxq (%rsi), %r15, %r14
|
||||
[0,36] . . . D=======eE-----R . . . . . . . . . . . adoxq %r15, %rcx
|
||||
[0,37] . . . D=============eER . . . . . . . . . . . adcxq %r14, %rax
|
||||
[0,38] . . . D=======eE-----R . . . . . . . . . . . movq %rcx, -64(%rbp)
|
||||
[0,39] . . . D====eeeeeeeeeER . . . . . . . . . . . mulxq 8(%rsi), %r14, %rcx
|
||||
[0,40] . . . D=============eER. . . . . . . . . . . adoxq %r14, %rax
|
||||
[0,41] . . . D=============eER . . . . . . . . . . adcxq %rcx, %rbx
|
||||
[0,42] . . . D====eeeeeeeeeE-R . . . . . . . . . . mulxq 16(%rsi), %r14, %rcx
|
||||
[0,43] . . . D==============eER . . . . . . . . . . adoxq %r14, %rbx
|
||||
[0,44] . . . D==============eER . . . . . . . . . . adcxq %rcx, %r11
|
||||
[0,45] . . . D====eeeeeeeeeE--R . . . . . . . . . . mulxq 24(%rsi), %r14, %rcx
|
||||
[0,46] . . . D===============eER . . . . . . . . . . adoxq %r14, %r11
|
||||
[0,47] . . . .D===============eER. . . . . . . . . . adcxq %rcx, %r10
|
||||
[0,48] . . . .D====eeeeeeeeeE---R. . . . . . . . . . mulxq 32(%rsi), %r14, %rcx
|
||||
[0,49] . . . .D================eER . . . . . . . . . adoxq %r14, %r10
|
||||
[0,50] . . . . D================eER . . . . . . . . . adcxq %rcx, %r9
|
||||
[0,51] . . . . D====eeeeeeeeeE----R . . . . . . . . . mulxq 40(%rsi), %r14, %rcx
|
||||
[0,52] . . . . D=================eER . . . . . . . . . adoxq %r14, %r9
|
||||
[0,53] . . . . D=================eER . . . . . . . . . adcxq %rcx, %r8
|
||||
[0,54] . . . . D====eeeeeeeeeE-----R . . . . . . . . . mulxq 48(%rsi), %r14, %rcx
|
||||
[0,55] . . . . D==================eER. . . . . . . . . adoxq %r14, %r8
|
||||
[0,56] . . . . D==================eER . . . . . . . . adcxq %rcx, %rdi
|
||||
[0,57] . . . . D====eeeeeeeeeE------R . . . . . . . . mulxq 56(%rsi), %rdx, %rcx
|
||||
[0,58] . . . . D===================eER . . . . . . . . adoxq %rdx, %rdi
|
||||
[0,59] . . . . D===================eER . . . . . . . . adcxq %r13, %rcx
|
||||
[0,60] . . . . DeeeeeE---------------R . . . . . . . . movq 16(%r12), %rdx
|
||||
[0,61] . . . . D====================eER . . . . . . . . adoxq %r13, %rcx
|
||||
[0,62] . . . . .D====eeeeeeeeeE-------R . . . . . . . . mulxq (%rsi), %r15, %r14
|
||||
[0,63] . . . . .D---------------------R . . . . . . . . xorl %r13d, %r13d
|
||||
[0,64] . . . . .D=======eE------------R . . . . . . . . adoxq %r15, %rax
|
||||
[0,65] . . . . . D============eE------R . . . . . . . . adcxq %r14, %rbx
|
||||
[0,66] . . . . . D=======eE-----------R . . . . . . . . movq %rax, -72(%rbp)
|
||||
[0,67] . . . . . D====eeeeeeeeeE------R . . . . . . . . mulxq 8(%rsi), %r14, %rax
|
||||
[0,68] . . . . . D============eE-----R . . . . . . . . adoxq %r14, %rbx
|
||||
[0,69] . . . . . D=============eE----R . . . . . . . . adcxq %rax, %r11
|
||||
[0,70] . . . . . D====eeeeeeeeeE-----R . . . . . . . . mulxq 16(%rsi), %r14, %rax
|
||||
[0,71] . . . . . D=============eE---R . . . . . . . . adoxq %r14, %r11
|
||||
[0,72] . . . . . D==============eE--R . . . . . . . . adcxq %rax, %r10
|
||||
[0,73] . . . . . D====eeeeeeeeeE----R . . . . . . . . mulxq 24(%rsi), %r14, %rax
|
||||
[0,74] . . . . . D==============eE-R . . . . . . . . adoxq %r14, %r10
|
||||
[0,75] . . . . . D===============eER . . . . . . . . adcxq %rax, %r9
|
||||
[0,76] . . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 32(%rsi), %r14, %rax
|
||||
[0,77] . . . . . .D===============eER. . . . . . . . adoxq %r14, %r9
|
||||
[0,78] . . . . . .D================eER . . . . . . . adcxq %rax, %r8
|
||||
[0,79] . . . . . .D====eeeeeeeeeE----R . . . . . . . mulxq 40(%rsi), %r14, %rax
|
||||
[0,80] . . . . . . D================eER . . . . . . . adoxq %r14, %r8
|
||||
[0,81] . . . . . . D=================eER . . . . . . . adcxq %rax, %rdi
|
||||
[0,82] . . . . . . D====eeeeeeeeeE-----R . . . . . . . mulxq 48(%rsi), %r14, %rax
|
||||
[0,83] . . . . . . D=================eER . . . . . . . adoxq %r14, %rdi
|
||||
[0,84] . . . . . . D==================eER. . . . . . . adcxq %rax, %rcx
|
||||
[0,85] . . . . . . D====eeeeeeeeeE------R. . . . . . . mulxq 56(%rsi), %rdx, %rax
|
||||
[0,86] . . . . . . D==================eER . . . . . . adoxq %rdx, %rcx
|
||||
[0,87] . . . . . . D===================eER . . . . . . adcxq %r13, %rax
|
||||
[0,88] . . . . . . D====================eER . . . . . . adoxq %r13, %rax
|
||||
[0,89] . . . . . . D----------------------R . . . . . . xorl %r13d, %r13d
|
||||
[0,90] . . . . . . DeeeeeE----------------R . . . . . . movq 24(%r12), %rdx
|
||||
[0,91] . . . . . . D====eeeeeeeeeE-------R . . . . . . mulxq (%rsi), %r15, %r14
|
||||
[0,92] . . . . . . D===========eE--------R . . . . . . adoxq %r15, %rbx
|
||||
[0,93] . . . . . . D=============eE------R . . . . . . adcxq %r14, %r11
|
||||
[0,94] . . . . . . .D===========eE-------R . . . . . . movq %rbx, -80(%rbp)
|
||||
[0,95] . . . . . . .D=============eE-----R . . . . . . movq %r11, %r15
|
||||
[0,96] . . . . . . .D====eeeeeeeeeE------R . . . . . . mulxq 8(%rsi), %r14, %rbx
|
||||
[0,97] . . . . . . . D=============eE----R . . . . . . adoxq %r14, %r15
|
||||
[0,98] . . . . . . . D==============eE---R . . . . . . adcxq %rbx, %r10
|
||||
[0,99] . . . . . . . D====eeeeeeeeeE-----R . . . . . . mulxq 16(%rsi), %rbx, %r11
|
||||
[0,100] . . . . . . . D==============eE--R . . . . . . adoxq %rbx, %r10
|
||||
[0,101] . . . . . . . D===============eE-R . . . . . . adcxq %r11, %r9
|
||||
[0,102] . . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 24(%rsi), %rbx, %r11
|
||||
[0,103] . . . . . . . D===============eER . . . . . . adoxq %rbx, %r9
|
||||
[0,104] . . . . . . . D================eER . . . . . . adcxq %r11, %r8
|
||||
[0,105] . . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 32(%rsi), %rbx, %r11
|
||||
[0,106] . . . . . . . D================eER. . . . . . adoxq %rbx, %r8
|
||||
[0,107] . . . . . . . D=================eER . . . . . adcxq %r11, %rdi
|
||||
[0,108] . . . . . . . D====eeeeeeeeeE-----R . . . . . mulxq 40(%rsi), %rbx, %r11
|
||||
[0,109] . . . . . . . .D=================eER . . . . . adoxq %rbx, %rdi
|
||||
[0,110] . . . . . . . .D==================eER . . . . . adcxq %r11, %rcx
|
||||
[0,111] . . . . . . . .D====eeeeeeeeeE------R . . . . . mulxq 48(%rsi), %rbx, %r11
|
||||
[0,112] . . . . . . . . D==================eER . . . . . adoxq %rbx, %rcx
|
||||
[0,113] . . . . . . . . D===================eER. . . . . adcxq %r11, %rax
|
||||
[0,114] . . . . . . . . D====eeeeeeeeeE-------R. . . . . mulxq 56(%rsi), %rdx, %r11
|
||||
[0,115] . . . . . . . . D===================eER . . . . adoxq %rdx, %rax
|
||||
[0,116] . . . . . . . . D====================eER . . . . adcxq %r13, %r11
|
||||
[0,117] . . . . . . . . DeeeeeE----------------R . . . . movq 32(%r12), %rdx
|
||||
[0,118] . . . . . . . . D=====================eER . . . . adoxq %r13, %r11
|
||||
[0,119] . . . . . . . . D=====E-----------------R . . . . xorl %ebx, %ebx
|
||||
[0,120] . . . . . . . . D====eeeeeeeeeE--------R . . . . mulxq (%rsi), %r14, %r13
|
||||
[0,121] . . . . . . . . D===========eE---------R . . . . adoxq %r14, %r15
|
||||
[0,122] . . . . . . . . D=============eE-------R . . . . adcxq %r13, %r10
|
||||
[0,123] . . . . . . . . D===========eE--------R . . . . movq %r15, -88(%rbp)
|
||||
[0,124] . . . . . . . . D====eeeeeeeeeE-------R . . . . mulxq 8(%rsi), %r14, %r13
|
||||
[0,125] . . . . . . . . D=============eE------R . . . . movq %r10, %r15
|
||||
[0,126] . . . . . . . . .D============eE------R . . . . adcxq %r13, %r9
|
||||
[0,127] . . . . . . . . .D=============eE-----R . . . . adoxq %r14, %r15
|
||||
[0,128] . . . . . . . . .D====eeeeeeeeeE------R . . . . mulxq 16(%rsi), %r13, %r10
|
||||
[0,129] . . . . . . . . . D=============eE----R . . . . adoxq %r13, %r9
|
||||
[0,130] . . . . . . . . . D==============eE---R . . . . adcxq %r10, %r8
|
||||
[0,131] . . . . . . . . . D====eeeeeeeeeE-----R . . . . mulxq 24(%rsi), %r13, %r10
|
||||
[0,132] . . . . . . . . . D==============eE--R . . . . adcxq %r10, %rdi
|
||||
[0,133] . . . . . . . . . D===============eE-R . . . . adoxq %r13, %r8
|
||||
[0,134] . . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 32(%rsi), %r13, %r10
|
||||
[0,135] . . . . . . . . . D===============eER . . . . adoxq %r13, %rdi
|
||||
[0,136] . . . . . . . . . D================eER . . . . adcxq %r10, %rcx
|
||||
[0,137] . . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 40(%rsi), %r13, %r10
|
||||
[0,138] . . . . . . . . . D================eER. . . . adoxq %r13, %rcx
|
||||
[0,139] . . . . . . . . . D=================eER . . . adcxq %r10, %rax
|
||||
[0,140] . . . . . . . . . D====eeeeeeeeeE-----R . . . mulxq 48(%rsi), %r13, %r10
|
||||
[0,141] . . . . . . . . . .D=================eER . . . adoxq %r13, %rax
|
||||
[0,142] . . . . . . . . . .D==================eER . . . adcxq %r10, %r11
|
||||
[0,143] . . . . . . . . . .D====eeeeeeeeeE------R . . . mulxq 56(%rsi), %rdx, %r10
|
||||
[0,144] . . . . . . . . . . D==================eER . . . adoxq %rdx, %r11
|
||||
[0,145] . . . . . . . . . . D===================eER. . . adcxq %rbx, %r10
|
||||
[0,146] . . . . . . . . . . DeeeeeE---------------R. . . movq 40(%r12), %rdx
|
||||
[0,147] . . . . . . . . . . D====================eER . . adoxq %rbx, %r10
|
||||
[0,148] . . . . . . . . . . D====eeeeeeeeeE-------R . . mulxq (%rsi), %r14, %r13
|
||||
[0,149] . . . . . . . . . . D---------------------R . . xorl %ebx, %ebx
|
||||
[0,150] . . . . . . . . . . D============eE-------R . . adoxq %r14, %r15
|
||||
[0,151] . . . . . . . . . . D============eE------R . . movq %r15, -96(%rbp)
|
||||
[0,152] . . . . . . . . . . D============eE------R . . adcxq %r13, %r9
|
||||
[0,153] . . . . . . . . . . D=====eeeeeeeeeE-----R . . mulxq 8(%rsi), %r14, %r13
|
||||
[0,154] . . . . . . . . . . D============eE-----R . . movq %r9, %r15
|
||||
[0,155] . . . . . . . . . . D=============eE----R . . adoxq %r14, %r15
|
||||
[0,156] . . . . . . . . . . D==============eE---R . . adcxq %r13, %r8
|
||||
[0,157] . . . . . . . . . . .D====eeeeeeeeeE----R . . mulxq 16(%rsi), %r13, %r9
|
||||
[0,158] . . . . . . . . . . .D==============eE--R . . adoxq %r13, %r8
|
||||
[0,159] . . . . . . . . . . .D===============eE-R . . adcxq %r9, %rdi
|
||||
[0,160] . . . . . . . . . . . D====eeeeeeeeeE---R . . mulxq 24(%rsi), %r13, %r9
|
||||
[0,161] . . . . . . . . . . . D===============eER . . adoxq %r13, %rdi
|
||||
[0,162] . . . . . . . . . . . D================eER . . adcxq %r9, %rcx
|
||||
[0,163] . . . . . . . . . . . D====eeeeeeeeeE---R . . mulxq 32(%rsi), %r13, %r9
|
||||
[0,164] . . . . . . . . . . . D================eER . . adoxq %r13, %rcx
|
||||
[0,165] . . . . . . . . . . . D=================eER . . adcxq %r9, %rax
|
||||
[0,166] . . . . . . . . . . . D====eeeeeeeeeE----R . . mulxq 40(%rsi), %r13, %r9
|
||||
[0,167] . . . . . . . . . . . D=================eER. . adoxq %r13, %rax
|
||||
[0,168] . . . . . . . . . . . D==================eER . adcxq %r9, %r11
|
||||
[0,169] . . . . . . . . . . . D====eeeeeeeeeE-----R . mulxq 48(%rsi), %r13, %r9
|
||||
[0,170] . . . . . . . . . . . D==================eER . adoxq %r13, %r11
|
||||
[0,171] . . . . . . . . . . . D===================eER . adcxq %r9, %r10
|
||||
[0,172] . . . . . . . . . . . .D====eeeeeeeeeE------R . mulxq 56(%rsi), %rdx, %r9
|
||||
[0,173] . . . . . . . . . . . .D===================eER. adoxq %rdx, %r10
|
||||
[0,174] . . . . . . . . . . . .D====================eER adcxq %rbx, %r9
|
495
libc/nexgen32e/mul8x8adx.S
Normal file
495
libc/nexgen32e/mul8x8adx.S
Normal file
|
@ -0,0 +1,495 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Computes 1024-bit product of 512-bit and 512-bit numbers.
|
||||
//
|
||||
// Instructions: 260
|
||||
// Total Cycles: 98
|
||||
// Total uOps: 452
|
||||
// uOps Per Cycle: 4.61
|
||||
// IPC: 2.65
|
||||
// Block RThroughput: 75.3
|
||||
//
|
||||
// @param rdi receives 16 quadword result
|
||||
// @param rsi is left hand side which must have 8 quadwords
|
||||
// @param rdx is right hand side which must have 8 quadwords
|
||||
// @note words are host endian while array is little endian
|
||||
// @mayalias
|
||||
Mul8x8Adx:
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
sub $104,%rsp
|
||||
mov %r15,-8(%rbp)
|
||||
mov %r14,-16(%rbp)
|
||||
mov %r13,-24(%rbp)
|
||||
mov %r12,-32(%rbp)
|
||||
mov %rbx,-40(%rbp)
|
||||
mov %rdx,%r12
|
||||
mov (%rdx),%rdx
|
||||
mulx (%rsi),%rax,%rcx
|
||||
mov %rdi,-48(%rbp)
|
||||
mov %rax,-56(%rbp)
|
||||
mulx 8(%rsi),%rdx,%rax
|
||||
add %rdx,%rcx
|
||||
mov (%r12),%rdx
|
||||
mulx 16(%rsi),%rdx,%rbx
|
||||
adc %rdx,%rax
|
||||
mov (%r12),%rdx
|
||||
mulx 24(%rsi),%rdx,%r11
|
||||
adc %rdx,%rbx
|
||||
mov (%r12),%rdx
|
||||
mulx 32(%rsi),%rdx,%r10
|
||||
adc %rdx,%r11
|
||||
mov (%r12),%rdx
|
||||
mulx 40(%rsi),%rdx,%r9
|
||||
adc %rdx,%r10
|
||||
mov (%r12),%rdx
|
||||
mulx 48(%rsi),%rdx,%r8
|
||||
adc %rdx,%r9
|
||||
mov (%r12),%rdx
|
||||
mulx 56(%rsi),%rdx,%rdi
|
||||
adc %rdx,%r8
|
||||
adc $0,%rdi
|
||||
xor %r13d,%r13d
|
||||
mov 8(%r12),%rdx
|
||||
mulx (%rsi),%r15,%r14
|
||||
adox %r15,%rcx
|
||||
adcx %r14,%rax
|
||||
mov %rcx,-64(%rbp)
|
||||
mulx 8(%rsi),%r14,%rcx
|
||||
adox %r14,%rax
|
||||
adcx %rcx,%rbx
|
||||
mulx 16(%rsi),%r14,%rcx
|
||||
adox %r14,%rbx
|
||||
adcx %rcx,%r11
|
||||
mulx 24(%rsi),%r14,%rcx
|
||||
adox %r14,%r11
|
||||
adcx %rcx,%r10
|
||||
mulx 32(%rsi),%r14,%rcx
|
||||
adox %r14,%r10
|
||||
adcx %rcx,%r9
|
||||
mulx 40(%rsi),%r14,%rcx
|
||||
adox %r14,%r9
|
||||
adcx %rcx,%r8
|
||||
mulx 48(%rsi),%r14,%rcx
|
||||
adox %r14,%r8
|
||||
adcx %rcx,%rdi
|
||||
mulx 56(%rsi),%rdx,%rcx
|
||||
adox %rdx,%rdi
|
||||
adcx %r13,%rcx
|
||||
mov 16(%r12),%rdx
|
||||
adox %r13,%rcx
|
||||
mulx (%rsi),%r15,%r14
|
||||
xor %r13d,%r13d
|
||||
adox %r15,%rax
|
||||
adcx %r14,%rbx
|
||||
mov %rax,-72(%rbp)
|
||||
mulx 8(%rsi),%r14,%rax
|
||||
adox %r14,%rbx
|
||||
adcx %rax,%r11
|
||||
mulx 16(%rsi),%r14,%rax
|
||||
adox %r14,%r11
|
||||
adcx %rax,%r10
|
||||
mulx 24(%rsi),%r14,%rax
|
||||
adox %r14,%r10
|
||||
adcx %rax,%r9
|
||||
mulx 32(%rsi),%r14,%rax
|
||||
adox %r14,%r9
|
||||
adcx %rax,%r8
|
||||
mulx 40(%rsi),%r14,%rax
|
||||
adox %r14,%r8
|
||||
adcx %rax,%rdi
|
||||
mulx 48(%rsi),%r14,%rax
|
||||
adox %r14,%rdi
|
||||
adcx %rax,%rcx
|
||||
mulx 56(%rsi),%rdx,%rax
|
||||
adox %rdx,%rcx
|
||||
adcx %r13,%rax
|
||||
adox %r13,%rax
|
||||
xor %r13d,%r13d
|
||||
mov 24(%r12),%rdx
|
||||
mulx (%rsi),%r15,%r14
|
||||
adox %r15,%rbx
|
||||
adcx %r14,%r11
|
||||
mov %rbx,-80(%rbp)
|
||||
mov %r11,%r15
|
||||
mulx 8(%rsi),%r14,%rbx
|
||||
adox %r14,%r15
|
||||
adcx %rbx,%r10
|
||||
mulx 16(%rsi),%rbx,%r11
|
||||
adox %rbx,%r10
|
||||
adcx %r11,%r9
|
||||
mulx 24(%rsi),%rbx,%r11
|
||||
adox %rbx,%r9
|
||||
adcx %r11,%r8
|
||||
mulx 32(%rsi),%rbx,%r11
|
||||
adox %rbx,%r8
|
||||
adcx %r11,%rdi
|
||||
mulx 40(%rsi),%rbx,%r11
|
||||
adox %rbx,%rdi
|
||||
adcx %r11,%rcx
|
||||
mulx 48(%rsi),%rbx,%r11
|
||||
adox %rbx,%rcx
|
||||
adcx %r11,%rax
|
||||
mulx 56(%rsi),%rdx,%r11
|
||||
adox %rdx,%rax
|
||||
adcx %r13,%r11
|
||||
mov 32(%r12),%rdx
|
||||
adox %r13,%r11
|
||||
xor %ebx,%ebx
|
||||
mulx (%rsi),%r14,%r13
|
||||
adox %r14,%r15
|
||||
adcx %r13,%r10
|
||||
mov %r15,-88(%rbp)
|
||||
mulx 8(%rsi),%r14,%r13
|
||||
mov %r10,%r15
|
||||
adcx %r13,%r9
|
||||
adox %r14,%r15
|
||||
mulx 16(%rsi),%r13,%r10
|
||||
adox %r13,%r9
|
||||
adcx %r10,%r8
|
||||
mulx 24(%rsi),%r13,%r10
|
||||
adcx %r10,%rdi
|
||||
adox %r13,%r8
|
||||
mulx 32(%rsi),%r13,%r10
|
||||
adox %r13,%rdi
|
||||
adcx %r10,%rcx
|
||||
mulx 40(%rsi),%r13,%r10
|
||||
adox %r13,%rcx
|
||||
adcx %r10,%rax
|
||||
mulx 48(%rsi),%r13,%r10
|
||||
adox %r13,%rax
|
||||
adcx %r10,%r11
|
||||
mulx 56(%rsi),%rdx,%r10
|
||||
adox %rdx,%r11
|
||||
adcx %rbx,%r10
|
||||
mov 40(%r12),%rdx
|
||||
adox %rbx,%r10
|
||||
mulx (%rsi),%r14,%r13
|
||||
xor %ebx,%ebx
|
||||
adox %r14,%r15
|
||||
mov %r15,-96(%rbp)
|
||||
adcx %r13,%r9
|
||||
mulx 8(%rsi),%r14,%r13
|
||||
mov %r9,%r15
|
||||
adox %r14,%r15
|
||||
adcx %r13,%r8
|
||||
mulx 16(%rsi),%r13,%r9
|
||||
adox %r13,%r8
|
||||
adcx %r9,%rdi
|
||||
mulx 24(%rsi),%r13,%r9
|
||||
adox %r13,%rdi
|
||||
adcx %r9,%rcx
|
||||
mulx 32(%rsi),%r13,%r9
|
||||
adox %r13,%rcx
|
||||
adcx %r9,%rax
|
||||
mulx 40(%rsi),%r13,%r9
|
||||
adox %r13,%rax
|
||||
adcx %r9,%r11
|
||||
mulx 48(%rsi),%r13,%r9
|
||||
adox %r13,%r11
|
||||
adcx %r9,%r10
|
||||
mulx 56(%rsi),%rdx,%r9
|
||||
adox %rdx,%r10
|
||||
adcx %rbx,%r9
|
||||
adox %rbx,%r9
|
||||
xor %ebx,%ebx
|
||||
mov 48(%r12),%rdx
|
||||
mulx (%rsi),%r14,%r13
|
||||
adox %r14,%r15
|
||||
adcx %r13,%r8
|
||||
mov %r15,-104(%rbp)
|
||||
mulx 8(%rsi),%r14,%r13
|
||||
mov %r8,%r15
|
||||
adcx %r13,%rdi
|
||||
adox %r14,%r15
|
||||
mulx 16(%rsi),%r13,%r8
|
||||
adox %r13,%rdi
|
||||
adcx %r8,%rcx
|
||||
mulx 24(%rsi),%r13,%r8
|
||||
adox %r13,%rcx
|
||||
adcx %r8,%rax
|
||||
mulx 32(%rsi),%r13,%r8
|
||||
adox %r13,%rax
|
||||
adcx %r8,%r11
|
||||
mulx 40(%rsi),%r13,%r8
|
||||
adox %r13,%r11
|
||||
adcx %r8,%r10
|
||||
mulx 48(%rsi),%r13,%r8
|
||||
adox %r13,%r10
|
||||
adcx %r8,%r9
|
||||
mulx 56(%rsi),%rdx,%r8
|
||||
adox %rdx,%r9
|
||||
mov 56(%r12),%rdx
|
||||
adcx %rbx,%r8
|
||||
mulx (%rsi),%r13,%r12
|
||||
adox %rbx,%r8
|
||||
xor %ebx,%ebx
|
||||
adox %r13,%r15
|
||||
adcx %r12,%rdi
|
||||
mulx 8(%rsi),%r13,%r12
|
||||
adox %r13,%rdi
|
||||
adcx %r12,%rcx
|
||||
mulx 16(%rsi),%r13,%r12
|
||||
adox %r13,%rcx
|
||||
adcx %r12,%rax
|
||||
mulx 24(%rsi),%r13,%r12
|
||||
adox %r13,%rax
|
||||
adcx %r12,%r11
|
||||
mulx 32(%rsi),%r13,%r12
|
||||
adox %r13,%r11
|
||||
adcx %r12,%r10
|
||||
mulx 40(%rsi),%r13,%r12
|
||||
adox %r13,%r10
|
||||
adcx %r12,%r9
|
||||
mulx 48(%rsi),%r13,%r12
|
||||
mulx 56(%rsi),%rsi,%rdx
|
||||
adox %r13,%r9
|
||||
adcx %r12,%r8
|
||||
adox %rsi,%r8
|
||||
adcx %rbx,%rdx
|
||||
mov -64(%rbp),%rsi
|
||||
adox %rbx,%rdx
|
||||
mov -48(%rbp),%rbx
|
||||
mov -56(%rbp),%r14
|
||||
mov %rsi,8(%rbx)
|
||||
mov -72(%rbp),%rsi
|
||||
mov %r14,(%rbx)
|
||||
mov %rsi,16(%rbx)
|
||||
mov -80(%rbp),%rsi
|
||||
mov %rsi,24(%rbx)
|
||||
mov -88(%rbp),%rsi
|
||||
mov %rsi,32(%rbx)
|
||||
mov -96(%rbp),%rsi
|
||||
mov %rsi,40(%rbx)
|
||||
mov -104(%rbp),%rsi
|
||||
mov %r15,56(%rbx)
|
||||
mov %rsi,48(%rbx)
|
||||
mov %rdi,64(%rbx)
|
||||
mov %rcx,72(%rbx)
|
||||
mov %rax,80(%rbx)
|
||||
mov %r11,88(%rbx)
|
||||
mov %r10,96(%rbx)
|
||||
mov %r9,104(%rbx)
|
||||
mov %r8,112(%rbx)
|
||||
mov %rdx,120(%rbx)
|
||||
mov -8(%rbp),%r15
|
||||
mov -16(%rbp),%r14
|
||||
mov -24(%rbp),%r13
|
||||
mov -32(%rbp),%r12
|
||||
mov -40(%rbp),%rbx
|
||||
leave
|
||||
ret
|
||||
.endfn Mul8x8Adx,globl
|
||||
|
||||
.end
|
||||
TIMELINE VIEW 0123456789 0123456789 0123456789 0123456789
|
||||
Index 0123456789 0123456789 0123456789 0123456789
|
||||
[0,0] DeER . . . . . . . . . . . . . . . . subq $104, %rsp
|
||||
[0,1] DeER . . . . . . . . . . . . . . . . movq %r15, -8(%rbp)
|
||||
[0,2] D=eER. . . . . . . . . . . . . . . . movq %r14, -16(%rbp)
|
||||
[0,3] D==eER . . . . . . . . . . . . . . . movq %r13, -24(%rbp)
|
||||
[0,4] D===eER . . . . . . . . . . . . . . . movq %r12, -32(%rbp)
|
||||
[0,5] D====eER . . . . . . . . . . . . . . . movq %rbx, -40(%rbp)
|
||||
[0,6] .DeE---R . . . . . . . . . . . . . . . movq %rdx, %r12
|
||||
[0,7] .DeeeeeER . . . . . . . . . . . . . . . movq (%rdx), %rdx
|
||||
[0,8] .D=====eeeeeeeeeER . . . . . . . . . . . . . mulxq (%rsi), %rax, %rcx
|
||||
[0,9] . D====eE--------R . . . . . . . . . . . . . movq %rdi, -48(%rbp)
|
||||
[0,10] . D=======eE-----R . . . . . . . . . . . . . movq %rax, -56(%rbp)
|
||||
[0,11] . D=====eeeeeeeeeER . . . . . . . . . . . . . mulxq 8(%rsi), %rdx, %rax
|
||||
[0,12] . D============eER . . . . . . . . . . . . . addq %rdx, %rcx
|
||||
[0,13] . DeeeeeE--------R . . . . . . . . . . . . . movq (%r12), %rdx
|
||||
[0,14] . D=====eeeeeeeeeER. . . . . . . . . . . . . mulxq 16(%rsi), %rdx, %rbx
|
||||
[0,15] . D============eER. . . . . . . . . . . . . adcq %rdx, %rax
|
||||
[0,16] . DeeeeeE--------R. . . . . . . . . . . . . movq (%r12), %rdx
|
||||
[0,17] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 24(%rsi), %rdx, %r11
|
||||
[0,18] . D============eER . . . . . . . . . . . . adcq %rdx, %rbx
|
||||
[0,19] . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
|
||||
[0,20] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 32(%rsi), %rdx, %r10
|
||||
[0,21] . .D============eER . . . . . . . . . . . . adcq %rdx, %r11
|
||||
[0,22] . .DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
|
||||
[0,23] . .D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 40(%rsi), %rdx, %r9
|
||||
[0,24] . . D============eER . . . . . . . . . . . . adcq %rdx, %r10
|
||||
[0,25] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
|
||||
[0,26] . . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 48(%rsi), %rdx, %r8
|
||||
[0,27] . . D============eER . . . . . . . . . . . . adcq %rdx, %r9
|
||||
[0,28] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx
|
||||
[0,29] . . D=====eeeeeeeeeER. . . . . . . . . . . . mulxq 56(%rsi), %rdx, %rdi
|
||||
[0,30] . . D============eER. . . . . . . . . . . . adcq %rdx, %r8
|
||||
[0,31] . . D=============eER . . . . . . . . . . . adcq $0, %rdi
|
||||
[0,32] . . D---------------R . . . . . . . . . . . xorl %r13d, %r13d
|
||||
[0,33] . . DeeeeeE---------R . . . . . . . . . . . movq 8(%r12), %rdx
|
||||
[0,34] . . D====eeeeeeeeeER . . . . . . . . . . . mulxq (%rsi), %r15, %r14
|
||||
[0,35] . . D=======eE-----R . . . . . . . . . . . adoxq %r15, %rcx
|
||||
[0,36] . . D=============eER . . . . . . . . . . . adcxq %r14, %rax
|
||||
[0,37] . . .D=======eE-----R . . . . . . . . . . . movq %rcx, -64(%rbp)
|
||||
[0,38] . . .D====eeeeeeeeeER . . . . . . . . . . . mulxq 8(%rsi), %r14, %rcx
|
||||
[0,39] . . .D=============eER . . . . . . . . . . . adoxq %r14, %rax
|
||||
[0,40] . . . D=============eER . . . . . . . . . . . adcxq %rcx, %rbx
|
||||
[0,41] . . . D====eeeeeeeeeE-R . . . . . . . . . . . mulxq 16(%rsi), %r14, %rcx
|
||||
[0,42] . . . D==============eER. . . . . . . . . . . adoxq %r14, %rbx
|
||||
[0,43] . . . D==============eER . . . . . . . . . . adcxq %rcx, %r11
|
||||
[0,44] . . . D====eeeeeeeeeE--R . . . . . . . . . . mulxq 24(%rsi), %r14, %rcx
|
||||
[0,45] . . . D===============eER . . . . . . . . . . adoxq %r14, %r11
|
||||
[0,46] . . . D===============eER . . . . . . . . . . adcxq %rcx, %r10
|
||||
[0,47] . . . D====eeeeeeeeeE---R . . . . . . . . . . mulxq 32(%rsi), %r14, %rcx
|
||||
[0,48] . . . D================eER . . . . . . . . . . adoxq %r14, %r10
|
||||
[0,49] . . . D================eER. . . . . . . . . . adcxq %rcx, %r9
|
||||
[0,50] . . . D====eeeeeeeeeE----R. . . . . . . . . . mulxq 40(%rsi), %r14, %rcx
|
||||
[0,51] . . . D=================eER . . . . . . . . . adoxq %r14, %r9
|
||||
[0,52] . . . .D=================eER . . . . . . . . . adcxq %rcx, %r8
|
||||
[0,53] . . . .D====eeeeeeeeeE-----R . . . . . . . . . mulxq 48(%rsi), %r14, %rcx
|
||||
[0,54] . . . .D==================eER . . . . . . . . . adoxq %r14, %r8
|
||||
[0,55] . . . . D==================eER . . . . . . . . . adcxq %rcx, %rdi
|
||||
[0,56] . . . . D====eeeeeeeeeE------R . . . . . . . . . mulxq 56(%rsi), %rdx, %rcx
|
||||
[0,57] . . . . D===================eER. . . . . . . . . adoxq %rdx, %rdi
|
||||
[0,58] . . . . D===================eER . . . . . . . . adcxq %r13, %rcx
|
||||
[0,59] . . . . DeeeeeE---------------R . . . . . . . . movq 16(%r12), %rdx
|
||||
[0,60] . . . . D====================eER . . . . . . . . adoxq %r13, %rcx
|
||||
[0,61] . . . . D====eeeeeeeeeE-------R . . . . . . . . mulxq (%rsi), %r15, %r14
|
||||
[0,62] . . . . D---------------------R . . . . . . . . xorl %r13d, %r13d
|
||||
[0,63] . . . . D=======eE------------R . . . . . . . . adoxq %r15, %rax
|
||||
[0,64] . . . . D============eE------R . . . . . . . . adcxq %r14, %rbx
|
||||
[0,65] . . . . D=======eE-----------R . . . . . . . . movq %rax, -72(%rbp)
|
||||
[0,66] . . . . D====eeeeeeeeeE------R . . . . . . . . mulxq 8(%rsi), %r14, %rax
|
||||
[0,67] . . . . .D============eE-----R . . . . . . . . adoxq %r14, %rbx
|
||||
[0,68] . . . . .D=============eE----R . . . . . . . . adcxq %rax, %r11
|
||||
[0,69] . . . . .D====eeeeeeeeeE-----R . . . . . . . . mulxq 16(%rsi), %r14, %rax
|
||||
[0,70] . . . . . D=============eE---R . . . . . . . . adoxq %r14, %r11
|
||||
[0,71] . . . . . D==============eE--R . . . . . . . . adcxq %rax, %r10
|
||||
[0,72] . . . . . D====eeeeeeeeeE----R . . . . . . . . mulxq 24(%rsi), %r14, %rax
|
||||
[0,73] . . . . . D==============eE-R . . . . . . . . adoxq %r14, %r10
|
||||
[0,74] . . . . . D===============eER . . . . . . . . adcxq %rax, %r9
|
||||
[0,75] . . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 32(%rsi), %r14, %rax
|
||||
[0,76] . . . . . D===============eER . . . . . . . . adoxq %r14, %r9
|
||||
[0,77] . . . . . D================eER . . . . . . . . adcxq %rax, %r8
|
||||
[0,78] . . . . . D====eeeeeeeeeE----R . . . . . . . . mulxq 40(%rsi), %r14, %rax
|
||||
[0,79] . . . . . D================eER. . . . . . . . adoxq %r14, %r8
|
||||
[0,80] . . . . . D=================eER . . . . . . . adcxq %rax, %rdi
|
||||
[0,81] . . . . . D====eeeeeeeeeE-----R . . . . . . . mulxq 48(%rsi), %r14, %rax
|
||||
[0,82] . . . . . .D=================eER . . . . . . . adoxq %r14, %rdi
|
||||
[0,83] . . . . . .D==================eER . . . . . . . adcxq %rax, %rcx
|
||||
[0,84] . . . . . .D====eeeeeeeeeE------R . . . . . . . mulxq 56(%rsi), %rdx, %rax
|
||||
[0,85] . . . . . . D==================eER . . . . . . . adoxq %rdx, %rcx
|
||||
[0,86] . . . . . . D===================eER. . . . . . . adcxq %r13, %rax
|
||||
[0,87] . . . . . . D====================eER . . . . . . adoxq %r13, %rax
|
||||
[0,88] . . . . . . D----------------------R . . . . . . xorl %r13d, %r13d
|
||||
[0,89] . . . . . . DeeeeeE----------------R . . . . . . movq 24(%r12), %rdx
|
||||
[0,90] . . . . . . D====eeeeeeeeeE-------R . . . . . . mulxq (%rsi), %r15, %r14
|
||||
[0,91] . . . . . . D===========eE--------R . . . . . . adoxq %r15, %rbx
|
||||
[0,92] . . . . . . D=============eE------R . . . . . . adcxq %r14, %r11
|
||||
[0,93] . . . . . . D===========eE-------R . . . . . . movq %rbx, -80(%rbp)
|
||||
[0,94] . . . . . . D=============eE-----R . . . . . . movq %r11, %r15
|
||||
[0,95] . . . . . . D====eeeeeeeeeE------R . . . . . . mulxq 8(%rsi), %r14, %rbx
|
||||
[0,96] . . . . . . D=============eE----R . . . . . . adoxq %r14, %r15
|
||||
[0,97] . . . . . . D==============eE---R . . . . . . adcxq %rbx, %r10
|
||||
[0,98] . . . . . . D====eeeeeeeeeE-----R . . . . . . mulxq 16(%rsi), %rbx, %r11
|
||||
[0,99] . . . . . . .D==============eE--R . . . . . . adoxq %rbx, %r10
|
||||
[0,100] . . . . . . .D===============eE-R . . . . . . adcxq %r11, %r9
|
||||
[0,101] . . . . . . .D====eeeeeeeeeE----R . . . . . . mulxq 24(%rsi), %rbx, %r11
|
||||
[0,102] . . . . . . . D===============eER . . . . . . adoxq %rbx, %r9
|
||||
[0,103] . . . . . . . D================eER . . . . . . adcxq %r11, %r8
|
||||
[0,104] . . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 32(%rsi), %rbx, %r11
|
||||
[0,105] . . . . . . . D================eER . . . . . . adoxq %rbx, %r8
|
||||
[0,106] . . . . . . . D=================eER . . . . . . adcxq %r11, %rdi
|
||||
[0,107] . . . . . . . D====eeeeeeeeeE-----R . . . . . . mulxq 40(%rsi), %rbx, %r11
|
||||
[0,108] . . . . . . . D=================eER. . . . . . adoxq %rbx, %rdi
|
||||
[0,109] . . . . . . . D==================eER . . . . . adcxq %r11, %rcx
|
||||
[0,110] . . . . . . . D====eeeeeeeeeE------R . . . . . mulxq 48(%rsi), %rbx, %r11
|
||||
[0,111] . . . . . . . D==================eER . . . . . adoxq %rbx, %rcx
|
||||
[0,112] . . . . . . . D===================eER . . . . . adcxq %r11, %rax
|
||||
[0,113] . . . . . . . D====eeeeeeeeeE-------R . . . . . mulxq 56(%rsi), %rdx, %r11
|
||||
[0,114] . . . . . . . .D===================eER . . . . . adoxq %rdx, %rax
|
||||
[0,115] . . . . . . . .D====================eER. . . . . adcxq %r13, %r11
|
||||
[0,116] . . . . . . . .DeeeeeE----------------R. . . . . movq 32(%r12), %rdx
|
||||
[0,117] . . . . . . . .D=====================eER . . . . adoxq %r13, %r11
|
||||
[0,118] . . . . . . . .D=====E-----------------R . . . . xorl %ebx, %ebx
|
||||
[0,119] . . . . . . . . D====eeeeeeeeeE--------R . . . . mulxq (%rsi), %r14, %r13
|
||||
[0,120] . . . . . . . . D===========eE---------R . . . . adoxq %r14, %r15
|
||||
[0,121] . . . . . . . . D=============eE-------R . . . . adcxq %r13, %r10
|
||||
[0,122] . . . . . . . . D===========eE--------R . . . . movq %r15, -88(%rbp)
|
||||
[0,123] . . . . . . . . D====eeeeeeeeeE-------R . . . . mulxq 8(%rsi), %r14, %r13
|
||||
[0,124] . . . . . . . . D=============eE------R . . . . movq %r10, %r15
|
||||
[0,125] . . . . . . . . D============eE------R . . . . adcxq %r13, %r9
|
||||
[0,126] . . . . . . . . D=============eE-----R . . . . adoxq %r14, %r15
|
||||
[0,127] . . . . . . . . D====eeeeeeeeeE------R . . . . mulxq 16(%rsi), %r13, %r10
|
||||
[0,128] . . . . . . . . D=============eE----R . . . . adoxq %r13, %r9
|
||||
[0,129] . . . . . . . . D==============eE---R . . . . adcxq %r10, %r8
|
||||
[0,130] . . . . . . . . D====eeeeeeeeeE-----R . . . . mulxq 24(%rsi), %r13, %r10
|
||||
[0,131] . . . . . . . . .D==============eE--R . . . . adcxq %r10, %rdi
|
||||
[0,132] . . . . . . . . .D===============eE-R . . . . adoxq %r13, %r8
|
||||
[0,133] . . . . . . . . .D====eeeeeeeeeE----R . . . . mulxq 32(%rsi), %r13, %r10
|
||||
[0,134] . . . . . . . . . D===============eER . . . . adoxq %r13, %rdi
|
||||
[0,135] . . . . . . . . . D================eER . . . . adcxq %r10, %rcx
|
||||
[0,136] . . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 40(%rsi), %r13, %r10
|
||||
[0,137] . . . . . . . . . D================eER . . . . adoxq %r13, %rcx
|
||||
[0,138] . . . . . . . . . D=================eER . . . . adcxq %r10, %rax
|
||||
[0,139] . . . . . . . . . D====eeeeeeeeeE-----R . . . . mulxq 48(%rsi), %r13, %r10
|
||||
[0,140] . . . . . . . . . D=================eER. . . . adoxq %r13, %rax
|
||||
[0,141] . . . . . . . . . D==================eER . . . adcxq %r10, %r11
|
||||
[0,142] . . . . . . . . . D====eeeeeeeeeE------R . . . mulxq 56(%rsi), %rdx, %r10
|
||||
[0,143] . . . . . . . . . D==================eER . . . adoxq %rdx, %r11
|
||||
[0,144] . . . . . . . . . D===================eER . . . adcxq %rbx, %r10
|
||||
[0,145] . . . . . . . . . DeeeeeE---------------R . . . movq 40(%r12), %rdx
|
||||
[0,146] . . . . . . . . . D====================eER . . . adoxq %rbx, %r10
|
||||
[0,147] . . . . . . . . . .D====eeeeeeeeeE-------R . . . mulxq (%rsi), %r14, %r13
|
||||
[0,148] . . . . . . . . . .D---------------------R . . . xorl %ebx, %ebx
|
||||
[0,149] . . . . . . . . . .D============eE-------R . . . adoxq %r14, %r15
|
||||
[0,150] . . . . . . . . . . D============eE------R . . . movq %r15, -96(%rbp)
|
||||
[0,151] . . . . . . . . . . D============eE------R . . . adcxq %r13, %r9
|
||||
[0,152] . . . . . . . . . . D=====eeeeeeeeeE-----R . . . mulxq 8(%rsi), %r14, %r13
|
||||
[0,153] . . . . . . . . . . D============eE-----R . . . movq %r9, %r15
|
||||
[0,154] . . . . . . . . . . D=============eE----R . . . adoxq %r14, %r15
|
||||
[0,155] . . . . . . . . . . D==============eE---R . . . adcxq %r13, %r8
|
||||
[0,156] . . . . . . . . . . D====eeeeeeeeeE----R . . . mulxq 16(%rsi), %r13, %r9
|
||||
[0,157] . . . . . . . . . . D==============eE--R . . . adoxq %r13, %r8
|
||||
[0,158] . . . . . . . . . . D===============eE-R . . . adcxq %r9, %rdi
|
||||
[0,159] . . . . . . . . . . D====eeeeeeeeeE---R . . . mulxq 24(%rsi), %r13, %r9
|
||||
[0,160] . . . . . . . . . . D===============eER . . . adoxq %r13, %rdi
|
||||
[0,161] . . . . . . . . . . D================eER. . . adcxq %r9, %rcx
|
||||
[0,162] . . . . . . . . . . .D====eeeeeeeeeE---R. . . mulxq 32(%rsi), %r13, %r9
|
||||
[0,163] . . . . . . . . . . .D================eER . . adoxq %r13, %rcx
|
||||
[0,164] . . . . . . . . . . .D=================eER . . adcxq %r9, %rax
|
||||
[0,165] . . . . . . . . . . . D====eeeeeeeeeE----R . . mulxq 40(%rsi), %r13, %r9
|
||||
[0,166] . . . . . . . . . . . D=================eER . . adoxq %r13, %rax
|
||||
[0,167] . . . . . . . . . . . D==================eER . . adcxq %r9, %r11
|
||||
[0,168] . . . . . . . . . . . D====eeeeeeeeeE-----R . . mulxq 48(%rsi), %r13, %r9
|
||||
[0,169] . . . . . . . . . . . D==================eER. . adoxq %r13, %r11
|
||||
[0,170] . . . . . . . . . . . D===================eER . adcxq %r9, %r10
|
||||
[0,171] . . . . . . . . . . . D====eeeeeeeeeE------R . mulxq 56(%rsi), %rdx, %r9
|
||||
[0,172] . . . . . . . . . . . D===================eER . adoxq %rdx, %r10
|
||||
[0,173] . . . . . . . . . . . D====================eER . adcxq %rbx, %r9
|
||||
[0,174] . . . . . . . . . . . D====================eER. adoxq %rbx, %r9
|
||||
[0,175] . . . . . . . . . . . D----------------------R. xorl %ebx, %ebx
|
||||
[0,176] . . . . . . . . . . . DeeeeeE----------------R. movq 48(%r12), %rdx
|
||||
[0,177] . . . . . . . . . . . .D=====eeeeeeeeeE------R. mulxq (%rsi), %r14, %r13
|
||||
[0,178] . . . . . . . . . . . .D==========eE---------R. adoxq %r14, %r15
|
||||
[0,179] . . . . . . . . . . . .D==============eE-----R. adcxq %r13, %r8
|
||||
[0,180] . . . . . . . . . . . . D==========eE--------R. movq %r15, -104(%rbp)
|
||||
[0,181] . . . . . . . . . . . . D=====eeeeeeeeeE-----R. mulxq 8(%rsi), %r14, %r13
|
||||
[0,182] . . . . . . . . . . . . D==============eE----R. movq %r8, %r15
|
||||
[0,183] . . . . . . . . . . . . D==============eE---R. adcxq %r13, %rdi
|
||||
[0,184] . . . . . . . . . . . . D===============eE--R. adoxq %r14, %r15
|
||||
[0,185] . . . . . . . . . . . . D=====eeeeeeeeeE----R. mulxq 16(%rsi), %r13, %r8
|
||||
[0,186] . . . . . . . . . . . . D===============eE-R. adoxq %r13, %rdi
|
||||
[0,187] . . . . . . . . . . . . D================eER. adcxq %r8, %rcx
|
||||
[0,188] . . . . . . . . . . . . D=====eeeeeeeeeE---R. mulxq 24(%rsi), %r13, %r8
|
||||
[0,189] . . . . . . . . . . . . D================eER adoxq %r13, %rcx
|
|
@ -1,41 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Computes C = A - B
|
||||
//
|
||||
// Aliasing such as sbb(A,A,B) or sbb(B,A,B) is OK.
|
||||
//
|
||||
// @param rdi is C
|
||||
// @param rsi is A
|
||||
// @param rdx is B
|
||||
// @param rcx is number of subtracts
|
||||
// @return al is carry
|
||||
sbb: .leafprologue
|
||||
test %ecx,%ecx
|
||||
jz 1f
|
||||
xor %r9d,%r9d
|
||||
0: mov (%rsi,%r9,8),%rax
|
||||
sbb (%rdx,%r9,8),%rax
|
||||
mov %rax,(%rdi,%r9,8)
|
||||
inc %r9d
|
||||
loop 0b
|
||||
1: setb %al
|
||||
.leafepilogue
|
||||
.endfn sbb,globl
|
|
@ -39,6 +39,7 @@
|
|||
#include "third_party/mbedtls/des.h"
|
||||
#include "third_party/mbedtls/dhm.h"
|
||||
#include "third_party/mbedtls/ecp.h"
|
||||
#include "third_party/mbedtls/ecp_internal.h"
|
||||
#include "third_party/mbedtls/entropy.h"
|
||||
#include "third_party/mbedtls/error.h"
|
||||
#include "third_party/mbedtls/gcm.h"
|
||||
|
@ -148,17 +149,17 @@ static void P256_MPI(mbedtls_mpi *N) {
|
|||
|
||||
static void P256_JUSTINE(mbedtls_mpi *N) {
|
||||
memcpy(N->p, rng, 8 * 8);
|
||||
ecp_mod_p256(N);
|
||||
secp256r1(N->p);
|
||||
}
|
||||
|
||||
static void P384_MPI(mbedtls_mpi *N) {
|
||||
memcpy(N->p, rng, 8 * 8);
|
||||
memcpy(N->p, rng, 12 * 8);
|
||||
ASSERT_EQ(0, mbedtls_mpi_mod_mpi(N, N, &grp.P));
|
||||
}
|
||||
|
||||
static void P384_JUSTINE(mbedtls_mpi *N) {
|
||||
memcpy(N->p, rng, 8 * 8);
|
||||
ecp_mod_p384(N);
|
||||
memcpy(N->p, rng, 12 * 8);
|
||||
secp384r1(N->p);
|
||||
}
|
||||
|
||||
BENCH(p256, bench) {
|
||||
|
@ -166,6 +167,7 @@ BENCH(p256, bench) {
|
|||
mbedtls_ecp_group_init(&grp);
|
||||
mbedtls_ecp_group_load(&grp, MBEDTLS_ECP_DP_SECP256R1);
|
||||
mbedtls_mpi x = {1, 8, gc(calloc(8, 8))};
|
||||
rngset(x.p, 8 * 8, rand64, -1);
|
||||
EZBENCH2("P-256 modulus MbedTLS MPI lib", donothing, P256_MPI(&x));
|
||||
EZBENCH2("P-256 modulus Justine rewrite", donothing, P256_JUSTINE(&x));
|
||||
mbedtls_ecp_group_free(&grp);
|
||||
|
@ -176,10 +178,10 @@ BENCH(p384, bench) {
|
|||
#ifdef MBEDTLS_ECP_C
|
||||
mbedtls_ecp_group_init(&grp);
|
||||
mbedtls_ecp_group_load(&grp, MBEDTLS_ECP_DP_SECP384R1);
|
||||
uint64_t y[12];
|
||||
mbedtls_mpi x = {1, 12, gc(calloc(12, 8))};
|
||||
EZBENCH2("P-384 modulus MbedTLS MPI lib", donothing, P384_MPI(&x));
|
||||
EZBENCH2("P-384 modulus Justine rewrite", donothing, P384_JUSTINE(&x));
|
||||
rngset(x.p, 12 * 8, rand64, -1);
|
||||
mbedtls_ecp_group_free(&grp);
|
||||
#endif
|
||||
}
|
||||
|
@ -1112,3 +1114,49 @@ BENCH(cmpint, bench) {
|
|||
EZBENCH2("cmpint 3.1", donothing, mbedtls_mpi_cmp_int(&z, 0));
|
||||
EZBENCH2("cmpint 3.2", donothing, mbedtls_mpi_cmp_int(&z, 1));
|
||||
}
|
||||
|
||||
mbedtls_mpi_uint F1(mbedtls_mpi_uint *d, const mbedtls_mpi_uint *a,
|
||||
const mbedtls_mpi_uint *b, size_t n) {
|
||||
size_t i;
|
||||
unsigned char cf;
|
||||
mbedtls_mpi_uint c, x;
|
||||
cf = c = i = 0;
|
||||
for (; i < n; ++i) SBB(d[i], a[i], b[i], c, c);
|
||||
return c;
|
||||
}
|
||||
|
||||
mbedtls_mpi_uint F2(mbedtls_mpi_uint *d, const mbedtls_mpi_uint *a,
|
||||
const mbedtls_mpi_uint *b, size_t n) {
|
||||
size_t i;
|
||||
unsigned char cf;
|
||||
mbedtls_mpi_uint c, x;
|
||||
cf = c = i = 0;
|
||||
asm volatile("xor\t%1,%1\n\t"
|
||||
".align\t16\n1:\t"
|
||||
"mov\t(%5,%3,8),%1\n\t"
|
||||
"sbb\t(%6,%3,8),%1\n\t"
|
||||
"mov\t%1,(%4,%3,8)\n\t"
|
||||
"lea\t1(%3),%3\n\t"
|
||||
"dec\t%2\n\t"
|
||||
"jnz\t1b"
|
||||
: "=@ccb"(cf), "=&r"(x), "+c"(n), "=r"(i)
|
||||
: "r"(d), "r"(a), "r"(b), "3"(0)
|
||||
: "cc", "memory");
|
||||
return cf;
|
||||
}
|
||||
|
||||
TEST(wut, wut) {
|
||||
uint64_t A[8];
|
||||
uint64_t B[8];
|
||||
uint64_t C[8];
|
||||
uint64_t D[8];
|
||||
int i;
|
||||
for (i = 0; i < 1000; ++i) {
|
||||
rngset(A, sizeof(A), rand64, -1);
|
||||
rngset(B, sizeof(B), rand64, -1);
|
||||
int x = F1(C, A, B, 8);
|
||||
int y = F2(D, A, B, 8);
|
||||
ASSERT_EQ(x, y);
|
||||
ASSERT_EQ(0, memcmp(C, D, sizeof(C)));
|
||||
}
|
||||
}
|
||||
|
|
2039
third_party/mbedtls/bignum.c
vendored
2039
third_party/mbedtls/bignum.c
vendored
File diff suppressed because it is too large
Load diff
4
third_party/mbedtls/config.h
vendored
4
third_party/mbedtls/config.h
vendored
|
@ -80,17 +80,17 @@
|
|||
#ifndef TINY
|
||||
#define MBEDTLS_ECP_DP_SECP256R1_ENABLED
|
||||
#define MBEDTLS_ECP_DP_SECP384R1_ENABLED
|
||||
#define MBEDTLS_ECP_DP_SECP521R1_ENABLED
|
||||
#define MBEDTLS_ECP_DP_CURVE25519_ENABLED
|
||||
#define MBEDTLS_ECP_DP_CURVE448_ENABLED
|
||||
/*#define MBEDTLS_ECP_DP_SECP192R1_ENABLED*/
|
||||
/*#define MBEDTLS_ECP_DP_SECP224R1_ENABLED*/
|
||||
/*#define MBEDTLS_ECP_DP_SECP521R1_ENABLED*/
|
||||
/*#define MBEDTLS_ECP_DP_SECP192K1_ENABLED*/
|
||||
/*#define MBEDTLS_ECP_DP_SECP224K1_ENABLED*/
|
||||
/*#define MBEDTLS_ECP_DP_SECP256K1_ENABLED*/
|
||||
/*#define MBEDTLS_ECP_DP_BP256R1_ENABLED*/
|
||||
/*#define MBEDTLS_ECP_DP_BP384R1_ENABLED*/
|
||||
/*#define MBEDTLS_ECP_DP_BP512R1_ENABLED*/
|
||||
/*#define MBEDTLS_ECP_DP_CURVE448_ENABLED*/
|
||||
#endif
|
||||
|
||||
#define MBEDTLS_X509_CHECK_KEY_USAGE
|
||||
|
|
2
third_party/mbedtls/ecdh.h
vendored
2
third_party/mbedtls/ecdh.h
vendored
|
@ -1,8 +1,8 @@
|
|||
#ifndef MBEDTLS_ECDH_H
|
||||
#define MBEDTLS_ECDH_H
|
||||
#include "third_party/mbedtls/config.h"
|
||||
#include "third_party/mbedtls/ecdh_everest.h"
|
||||
#include "third_party/mbedtls/ecp.h"
|
||||
#include "third_party/mbedtls/everest.h"
|
||||
/* clang-format off */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
279
third_party/mbedtls/ecdh_everest.c
vendored
Normal file
279
third_party/mbedtls/ecdh_everest.c
vendored
Normal file
|
@ -0,0 +1,279 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:4;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright The Mbed TLS Contributors │
|
||||
│ │
|
||||
│ Licensed under the Apache License, Version 2.0 (the "License"); │
|
||||
│ you may not use this file except in compliance with the License. │
|
||||
│ You may obtain a copy of the License at │
|
||||
│ │
|
||||
│ http://www.apache.org/licenses/LICENSE-2.0 │
|
||||
│ │
|
||||
│ Unless required by applicable law or agreed to in writing, software │
|
||||
│ distributed under the License is distributed on an "AS IS" BASIS, │
|
||||
│ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. │
|
||||
│ See the License for the specific language governing permissions and │
|
||||
│ limitations under the License. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "third_party/mbedtls/ecdh_everest.h"
|
||||
#include "third_party/mbedtls/everest.h"
|
||||
#if defined(MBEDTLS_ECDH_C) && defined(MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED)
|
||||
#define KEYSIZE 32
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
Mbed TLS (Apache 2.0)\\n\
|
||||
Copyright ARM Limited\\n\
|
||||
Copyright Mbed TLS Contributors\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
/* clang-format off */
|
||||
|
||||
/**
|
||||
* \brief This function sets up the ECDH context with the information
|
||||
* given.
|
||||
*
|
||||
* This function should be called after mbedtls_ecdh_init() but
|
||||
* before mbedtls_ecdh_make_params(). There is no need to call
|
||||
* this function before mbedtls_ecdh_read_params().
|
||||
*
|
||||
* This is the first function used by a TLS server for
|
||||
* ECDHE ciphersuites.
|
||||
*
|
||||
* \param ctx The ECDH context to set up.
|
||||
* \param grp_id The group id of the group to set up the context for.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
*/
|
||||
int mbedtls_everest_setup(mbedtls_ecdh_context_everest *ctx, int grp_id)
|
||||
{
|
||||
if (grp_id != MBEDTLS_ECP_DP_CURVE25519)
|
||||
return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
|
||||
mbedtls_platform_zeroize(ctx, sizeof(*ctx));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief This function frees a context.
|
||||
*
|
||||
* \param ctx The context to free.
|
||||
*/
|
||||
void mbedtls_everest_free(mbedtls_ecdh_context_everest *ctx)
|
||||
{
|
||||
if (!ctx) return;
|
||||
mbedtls_platform_zeroize(ctx, sizeof(*ctx));
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief This function generates a public key and a TLS
|
||||
* ServerKeyExchange payload.
|
||||
*
|
||||
* This is the second function used by a TLS server for ECDHE
|
||||
* ciphersuites. (It is called after mbedtls_ecdh_setup().)
|
||||
*
|
||||
* \note This function assumes that the ECP group (grp) of the
|
||||
* \p ctx context has already been properly set,
|
||||
* for example, using mbedtls_ecp_group_load().
|
||||
*
|
||||
* \see ecp.h
|
||||
*
|
||||
* \param ctx The ECDH context.
|
||||
* \param olen The number of characters written.
|
||||
* \param buf The destination buffer.
|
||||
* \param blen The length of the destination buffer.
|
||||
* \param f_rng The RNG function.
|
||||
* \param p_rng The RNG context.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return An \c MBEDTLS_ERR_ECP_XXX error code on failure.
|
||||
*/
|
||||
int mbedtls_everest_make_params(mbedtls_ecdh_context_everest *ctx, size_t *olen,
|
||||
unsigned char *buf, size_t blen,
|
||||
int (*f_rng)(void *, unsigned char *, size_t),
|
||||
void *p_rng)
|
||||
{
|
||||
int ret = 0;
|
||||
uint8_t base[KEYSIZE] = {9};
|
||||
if ((ret = f_rng(p_rng, ctx->our_secret, KEYSIZE)) != 0) return ret;
|
||||
*olen = KEYSIZE + 4;
|
||||
if (blen < *olen) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL;
|
||||
*buf++ = MBEDTLS_ECP_TLS_NAMED_CURVE;
|
||||
*buf++ = MBEDTLS_ECP_TLS_CURVE25519 >> 8;
|
||||
*buf++ = MBEDTLS_ECP_TLS_CURVE25519 & 0xFF;
|
||||
*buf++ = KEYSIZE;
|
||||
curve25519(buf, ctx->our_secret, base);
|
||||
base[0] = 0;
|
||||
if (!timingsafe_memcmp(buf, base, KEYSIZE))
|
||||
return MBEDTLS_ERR_ECP_RANDOM_FAILED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief This function parses and processes a TLS ServerKeyExhange
|
||||
* payload.
|
||||
*
|
||||
* This is the first function used by a TLS client for ECDHE
|
||||
* ciphersuites.
|
||||
*
|
||||
* \see ecp.h
|
||||
*
|
||||
* \param ctx The ECDH context.
|
||||
* \param buf The pointer to the start of the input buffer.
|
||||
* \param end The address for one Byte past the end of the buffer.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return An \c MBEDTLS_ERR_ECP_XXX error code on failure.
|
||||
*/
|
||||
int mbedtls_everest_read_params(mbedtls_ecdh_context_everest *ctx,
|
||||
const unsigned char **buf,
|
||||
const unsigned char *end)
|
||||
{
|
||||
if (end - *buf < KEYSIZE + 1) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
|
||||
if ((*(*buf)++ != KEYSIZE)) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
|
||||
memcpy(ctx->peer_point, *buf, KEYSIZE);
|
||||
*buf += KEYSIZE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief This function sets up an ECDH context from an EC key.
|
||||
*
|
||||
* It is used by clients and servers in place of the
|
||||
* ServerKeyEchange for static ECDH, and imports ECDH
|
||||
* parameters from the EC key information of a certificate.
|
||||
*
|
||||
* \see ecp.h
|
||||
*
|
||||
* \param ctx The ECDH context to set up.
|
||||
* \param key The EC key to use.
|
||||
* \param side Defines the source of the key: 1: Our key, or
|
||||
* 0: The key of the peer.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return An \c MBEDTLS_ERR_ECP_XXX error code on failure.
|
||||
*/
|
||||
int mbedtls_everest_get_params(mbedtls_ecdh_context_everest *ctx,
|
||||
const mbedtls_ecp_keypair *key,
|
||||
mbedtls_everest_ecdh_side side)
|
||||
{
|
||||
size_t olen = 0;
|
||||
mbedtls_everest_ecdh_side s;
|
||||
switch (side)
|
||||
{
|
||||
case MBEDTLS_EVEREST_ECDH_THEIRS:
|
||||
return mbedtls_ecp_point_write_binary(&key->grp, &key->Q,
|
||||
MBEDTLS_ECP_PF_COMPRESSED, &olen,
|
||||
ctx->peer_point, KEYSIZE);
|
||||
case MBEDTLS_EVEREST_ECDH_OURS:
|
||||
return mbedtls_mpi_write_binary_le(&key->d, ctx->our_secret, KEYSIZE);
|
||||
default:
|
||||
return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief This function generates a public key and a TLS
|
||||
* ClientKeyExchange payload.
|
||||
*
|
||||
* This is the second function used by a TLS client for ECDH(E)
|
||||
* ciphersuites.
|
||||
*
|
||||
* \see ecp.h
|
||||
*
|
||||
* \param ctx The ECDH context.
|
||||
* \param olen The number of Bytes written.
|
||||
* \param buf The destination buffer.
|
||||
* \param blen The size of the destination buffer.
|
||||
* \param f_rng The RNG function.
|
||||
* \param p_rng The RNG context.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return An \c MBEDTLS_ERR_ECP_XXX error code on failure.
|
||||
*/
|
||||
int mbedtls_everest_make_public(mbedtls_ecdh_context_everest *ctx, size_t *olen,
|
||||
unsigned char *buf, size_t blen,
|
||||
int (*f_rng)(void *, unsigned char *, size_t),
|
||||
void *p_rng)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned char base[KEYSIZE] = {9};
|
||||
if (!ctx) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
|
||||
if ((ret = f_rng(p_rng, ctx->our_secret, KEYSIZE))) return ret;
|
||||
*olen = KEYSIZE + 1;
|
||||
if (blen < *olen) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL;
|
||||
*buf++ = KEYSIZE;
|
||||
curve25519(buf, ctx->our_secret, base);
|
||||
base[0] = 0;
|
||||
if (!timingsafe_memcmp(buf, base, KEYSIZE))
|
||||
return MBEDTLS_ERR_ECP_RANDOM_FAILED;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief This function parses and processes a TLS ClientKeyExchange
|
||||
* payload.
|
||||
*
|
||||
* This is the third function used by a TLS server for ECDH(E)
|
||||
* ciphersuites. (It is called after mbedtls_ecdh_setup() and
|
||||
* mbedtls_ecdh_make_params().)
|
||||
*
|
||||
* \see ecp.h
|
||||
*
|
||||
* \param ctx The ECDH context.
|
||||
* \param buf The start of the input buffer.
|
||||
* \param blen The length of the input buffer.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return An \c MBEDTLS_ERR_ECP_XXX error code on failure.
|
||||
*/
|
||||
int mbedtls_everest_read_public(mbedtls_ecdh_context_everest *ctx,
|
||||
const unsigned char *buf, size_t blen)
|
||||
{
|
||||
if (blen < KEYSIZE + 1) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL;
|
||||
if ((*buf++ != KEYSIZE)) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
|
||||
memcpy(ctx->peer_point, buf, KEYSIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief This function derives and exports the shared secret.
|
||||
*
|
||||
* This is the last function used by both TLS client
|
||||
* and servers.
|
||||
*
|
||||
* \note If \p f_rng is not NULL, it is used to implement
|
||||
* countermeasures against side-channel attacks.
|
||||
* For more information, see mbedtls_ecp_mul().
|
||||
*
|
||||
* \see ecp.h
|
||||
*
|
||||
* \param ctx The ECDH context.
|
||||
* \param olen The number of Bytes written.
|
||||
* \param buf The destination buffer.
|
||||
* \param blen The length of the destination buffer.
|
||||
* \param f_rng The RNG function.
|
||||
* \param p_rng The RNG context.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return An \c MBEDTLS_ERR_ECP_XXX error code on failure.
|
||||
*/
|
||||
int mbedtls_everest_calc_secret(mbedtls_ecdh_context_everest *ctx, size_t *olen,
|
||||
unsigned char *buf, size_t blen,
|
||||
int (*f_rng)(void *, unsigned char *, size_t),
|
||||
void *p_rng)
|
||||
{
|
||||
/* f_rng and p_rng are not used here because this implementation does not
|
||||
need blinding since it has constant trace. (todo(jart): wut?) */
|
||||
*olen = KEYSIZE;
|
||||
if (blen < *olen) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL;
|
||||
curve25519(buf, ctx->our_secret, ctx->peer_point);
|
||||
if (!timingsafe_memcmp(buf, ctx->our_secret, KEYSIZE)) goto wut;
|
||||
/* Wipe the DH secret and don't let the peer chose a small subgroup point */
|
||||
mbedtls_platform_zeroize(ctx->our_secret, KEYSIZE);
|
||||
if (!timingsafe_memcmp(buf, ctx->our_secret, KEYSIZE)) goto wut;
|
||||
return 0;
|
||||
wut:
|
||||
mbedtls_platform_zeroize(buf, KEYSIZE);
|
||||
mbedtls_platform_zeroize(ctx->our_secret, KEYSIZE);
|
||||
return MBEDTLS_ERR_ECP_RANDOM_FAILED;
|
||||
}
|
||||
|
||||
#endif
|
43
third_party/mbedtls/ecdh_everest.h
vendored
Normal file
43
third_party/mbedtls/ecdh_everest.h
vendored
Normal file
|
@ -0,0 +1,43 @@
|
|||
#ifndef COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_
|
||||
#define COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_
|
||||
#include "third_party/mbedtls/config.h"
|
||||
#include "third_party/mbedtls/ecp.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define MBEDTLS_ECP_TLS_CURVE25519 0x1d
|
||||
#define MBEDTLS_X25519_KEY_SIZE_BYTES 32
|
||||
|
||||
typedef enum {
|
||||
MBEDTLS_EVEREST_ECDH_OURS,
|
||||
MBEDTLS_EVEREST_ECDH_THEIRS,
|
||||
} mbedtls_everest_ecdh_side;
|
||||
|
||||
typedef struct {
|
||||
unsigned char our_secret[MBEDTLS_X25519_KEY_SIZE_BYTES];
|
||||
unsigned char peer_point[MBEDTLS_X25519_KEY_SIZE_BYTES];
|
||||
} mbedtls_ecdh_context_everest;
|
||||
|
||||
int mbedtls_everest_setup(mbedtls_ecdh_context_everest *, int);
|
||||
void mbedtls_everest_free(mbedtls_ecdh_context_everest *);
|
||||
int mbedtls_everest_make_params(mbedtls_ecdh_context_everest *, size_t *,
|
||||
unsigned char *, size_t,
|
||||
int (*)(void *, unsigned char *, size_t),
|
||||
void *);
|
||||
int mbedtls_everest_read_params(mbedtls_ecdh_context_everest *,
|
||||
const unsigned char **, const unsigned char *);
|
||||
int mbedtls_everest_get_params(mbedtls_ecdh_context_everest *,
|
||||
const mbedtls_ecp_keypair *,
|
||||
mbedtls_everest_ecdh_side);
|
||||
int mbedtls_everest_make_public(mbedtls_ecdh_context_everest *, size_t *,
|
||||
unsigned char *, size_t,
|
||||
int (*)(void *, unsigned char *, size_t),
|
||||
void *);
|
||||
int mbedtls_everest_read_public(mbedtls_ecdh_context_everest *,
|
||||
const unsigned char *, size_t);
|
||||
int mbedtls_everest_calc_secret(mbedtls_ecdh_context_everest *, size_t *,
|
||||
unsigned char *, size_t,
|
||||
int (*)(void *, unsigned char *, size_t),
|
||||
void *);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_ */
|
25
third_party/mbedtls/ecdsa.c
vendored
25
third_party/mbedtls/ecdsa.c
vendored
|
@ -28,31 +28,12 @@ Mbed TLS (Apache 2.0)\\n\
|
|||
Copyright ARM Limited\\n\
|
||||
Copyright Mbed TLS Contributors\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
|
||||
/* clang-format off */
|
||||
/*
|
||||
* Elliptic curve DSA
|
||||
*
|
||||
* Copyright The Mbed TLS Contributors
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* References:
|
||||
/**
|
||||
* @fileoverview Elliptic curve Digital Signature Algorithm
|
||||
*
|
||||
* SEC1 http://www.secg.org/index.php?action=secg,docs_secg
|
||||
* @see SEC1 http://www.secg.org/index.php?action=secg,docs_secg
|
||||
*/
|
||||
|
||||
#if defined(MBEDTLS_ECDSA_C)
|
||||
|
|
12
third_party/mbedtls/ecp.c
vendored
12
third_party/mbedtls/ecp.c
vendored
|
@ -511,12 +511,15 @@ static const mbedtls_ecp_curve_info ecp_supported_curves[] =
|
|||
#if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED)
|
||||
{ MBEDTLS_ECP_DP_CURVE25519, 29, 256, "x25519" },
|
||||
#endif
|
||||
#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
|
||||
{ MBEDTLS_ECP_DP_SECP256R1, 23, 256, "secp256r1" },
|
||||
#endif
|
||||
#if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
|
||||
{ MBEDTLS_ECP_DP_SECP384R1, 24, 384, "secp384r1" },
|
||||
#endif
|
||||
#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
|
||||
{ MBEDTLS_ECP_DP_CURVE448, 30, 448, "x448" },
|
||||
#endif
|
||||
#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
|
||||
{ MBEDTLS_ECP_DP_SECP256R1, 23, 256, "secp256r1" },
|
||||
#endif
|
||||
#if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
|
||||
{ MBEDTLS_ECP_DP_SECP521R1, 25, 521, "secp521r1" },
|
||||
#endif
|
||||
|
@ -543,9 +546,6 @@ static const mbedtls_ecp_curve_info ecp_supported_curves[] =
|
|||
#endif
|
||||
#if defined(MBEDTLS_ECP_DP_SECP192K1_ENABLED)
|
||||
{ MBEDTLS_ECP_DP_SECP192K1, 18, 192, "secp192k1" },
|
||||
#endif
|
||||
#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
|
||||
{ MBEDTLS_ECP_DP_CURVE448, 30, 448, "x448" },
|
||||
#endif
|
||||
{ MBEDTLS_ECP_DP_NONE, 0, 0, NULL },
|
||||
};
|
||||
|
|
139
third_party/mbedtls/ecp256.c
vendored
139
third_party/mbedtls/ecp256.c
vendored
|
@ -38,32 +38,15 @@ mbedtls_p256_isz( uint64_t p[4] )
|
|||
static inline bool
|
||||
mbedtls_p256_gte( uint64_t p[5] )
|
||||
{
|
||||
return( (p[4] ||
|
||||
p[3] > 0xffffffff00000001 ||
|
||||
return( ((int64_t)p[4] > 0 ||
|
||||
(p[3] > 0xffffffff00000001 ||
|
||||
(p[3] == 0xffffffff00000001 &&
|
||||
p[2] > 0x0000000000000000 ||
|
||||
(p[2] > 0x0000000000000000 ||
|
||||
(p[2] == 0x0000000000000000 &&
|
||||
p[1] > 0x00000000ffffffff ||
|
||||
(p[1] > 0x00000000ffffffff ||
|
||||
(p[1] == 0x00000000ffffffff &&
|
||||
p[0] > 0xffffffffffffffff ||
|
||||
(p[0] == 0xffffffffffffffff))))) );
|
||||
}
|
||||
|
||||
static int
|
||||
mbedtls_p256_cmp( const uint64_t a[5],
|
||||
const uint64_t b[5] )
|
||||
{
|
||||
if( a[4] < b[4] ) return -1;
|
||||
if( a[4] > b[4] ) return 1;
|
||||
if( a[3] < b[3] ) return -1;
|
||||
if( a[3] > b[3] ) return 1;
|
||||
if( a[2] < b[2] ) return -1;
|
||||
if( a[2] > b[2] ) return 1;
|
||||
if( a[1] < b[1] ) return -1;
|
||||
if( a[1] > b[1] ) return 1;
|
||||
if( a[0] < b[0] ) return -1;
|
||||
if( a[0] > b[0] ) return 1;
|
||||
return 0;
|
||||
(p[0] > 0xffffffffffffffff ||
|
||||
(p[0] == 0xffffffffffffffff))))))))) );
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -119,125 +102,49 @@ mbedtls_p256_rum( uint64_t p[5] )
|
|||
mbedtls_p256_red( p );
|
||||
}
|
||||
|
||||
static void
|
||||
mbedtls_p256_mod(uint64_t X[8])
|
||||
{
|
||||
secp256r1(X);
|
||||
if ((int64_t)X[4] < 0) {
|
||||
do {
|
||||
mbedtls_p256_gro(X);
|
||||
} while ((int64_t)X[4] < 0);
|
||||
} else {
|
||||
while (mbedtls_p256_gte(X)) {
|
||||
mbedtls_p256_red(X);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
mbedtls_p256_sar( uint64_t p[5] )
|
||||
{
|
||||
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
|
||||
asm("sarq\t32+%0\n\t"
|
||||
"rcrq\t24+%0\n\t"
|
||||
"rcrq\t16+%0\n\t"
|
||||
"rcrq\t8+%0\n\t"
|
||||
"rcrq\t%0\n\t"
|
||||
: "+o"(*p)
|
||||
: /* no inputs */
|
||||
: "memory", "cc");
|
||||
#else
|
||||
p[0] = p[0] >> 1 | p[1] << 63;
|
||||
p[1] = p[1] >> 1 | p[2] << 63;
|
||||
p[2] = p[2] >> 1 | p[3] << 63;
|
||||
p[3] = p[3] >> 1 | p[4] << 63;
|
||||
p[4] = (int64_t)p[4] >> 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
mbedtls_p256_shl( uint64_t p[5] )
|
||||
{
|
||||
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
|
||||
asm("shlq\t%0\n\t"
|
||||
"rclq\t8+%0\n\t"
|
||||
"rclq\t16+%0\n\t"
|
||||
"rclq\t24+%0\n\t"
|
||||
"rclq\t32+%0\n\t"
|
||||
: "+o"(*p)
|
||||
: /* no inputs */
|
||||
: "memory", "cc");
|
||||
#else
|
||||
p[4] = p[3] >> 63;
|
||||
p[3] = p[3] << 1 | p[2] >> 63;
|
||||
p[2] = p[2] << 1 | p[1] >> 63;
|
||||
p[1] = p[1] << 1 | p[0] >> 63;
|
||||
p[0] = p[0] << 1;
|
||||
#endif
|
||||
mbedtls_p256_rum( p );
|
||||
}
|
||||
|
||||
static inline void
|
||||
mbedtls_p256_jam( uint64_t p[5] )
|
||||
{
|
||||
secp256r1( p );
|
||||
if( (int64_t)p[4] < 0 )
|
||||
do
|
||||
mbedtls_p256_gro( p );
|
||||
while( (int64_t)p[4] < 0 );
|
||||
else
|
||||
mbedtls_p256_rum( p );
|
||||
}
|
||||
|
||||
static void
|
||||
mbedtls_p256_mul_1x1( uint64_t X[8],
|
||||
const uint64_t A[4], size_t n,
|
||||
const uint64_t B[4], size_t m )
|
||||
{
|
||||
uint128_t t;
|
||||
t = A[0];
|
||||
t *= B[0];
|
||||
X[ 0] = t;
|
||||
X[ 1] = t >> 64;
|
||||
X[ 2] = 0;
|
||||
X[ 3] = 0;
|
||||
X[ 4] = 0;
|
||||
X[ 5] = 0;
|
||||
X[ 6] = 0;
|
||||
X[ 7] = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
mbedtls_p256_mul_nx1( uint64_t X[8],
|
||||
const uint64_t A[4], size_t n,
|
||||
const uint64_t B[4], size_t m )
|
||||
{
|
||||
mbedtls_mpi_mul_hlp1(n, A, X, B[0]);
|
||||
mbedtls_platform_zeroize( X + n + m, ( 8 - n - m ) * 8 );
|
||||
if ( n + m >= 4 )
|
||||
mbedtls_p256_jam( X );
|
||||
}
|
||||
|
||||
static void
|
||||
mbedtls_p256_mul_4x4( uint64_t X[8],
|
||||
const uint64_t A[4], size_t n,
|
||||
const uint64_t B[4], size_t m )
|
||||
{
|
||||
Mul4x4( X, A, B );
|
||||
mbedtls_p256_jam( X );
|
||||
}
|
||||
|
||||
static void
|
||||
mbedtls_p256_mul_nxm( uint64_t X[8],
|
||||
const uint64_t A[4], size_t n,
|
||||
const uint64_t B[4], size_t m )
|
||||
{
|
||||
if (A == X) A = gc(memcpy(malloc(4 * 8), A, 4 * 8));
|
||||
if (B == X) B = gc(memcpy(malloc(4 * 8), B, 4 * 8));
|
||||
Mul( X, A, n, B, m );
|
||||
mbedtls_platform_zeroize( X + n + m, (8 - n - m) * 8 );
|
||||
if ( n + m >= 4 )
|
||||
mbedtls_p256_jam( X );
|
||||
}
|
||||
|
||||
static void
|
||||
mbedtls_p256_mul( uint64_t X[8],
|
||||
const uint64_t A[4], size_t n,
|
||||
const uint64_t B[4], size_t m )
|
||||
{
|
||||
if( n == 4 && m == 4 )
|
||||
mbedtls_p256_mul_4x4( X, A, n, B, m );
|
||||
else if( m == 1 && n == 1 )
|
||||
mbedtls_p256_mul_1x1( X, A, n, B, m );
|
||||
else if( m == 1 )
|
||||
mbedtls_p256_mul_nx1( X, A, n, B, m );
|
||||
else
|
||||
mbedtls_p256_mul_nxm( X, A, n, B, m );
|
||||
Mul4x4( X, A, B );
|
||||
mbedtls_p256_mod( X );
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
168
third_party/mbedtls/ecp384.c
vendored
168
third_party/mbedtls/ecp384.c
vendored
|
@ -36,42 +36,20 @@ mbedtls_p384_isz( uint64_t p[6] )
|
|||
}
|
||||
|
||||
static inline bool
|
||||
mbedtls_p384_gte( uint64_t p[7] )
|
||||
{
|
||||
return( (p[6] ||
|
||||
p[5] > 0xffffffffffffffff ||
|
||||
mbedtls_p384_gte( uint64_t p[7] ) {
|
||||
return( ((int64_t)p[6] > 0 ||
|
||||
(p[5] > 0xffffffffffffffff ||
|
||||
(p[5] == 0xffffffffffffffff &&
|
||||
p[4] > 0xffffffffffffffff ||
|
||||
(p[4] > 0xffffffffffffffff ||
|
||||
(p[4] == 0xffffffffffffffff &&
|
||||
p[3] > 0xffffffffffffffff ||
|
||||
(p[3] > 0xffffffffffffffff ||
|
||||
(p[3] == 0xffffffffffffffff &&
|
||||
p[2] > 0xfffffffffffffffe ||
|
||||
(p[2] > 0xfffffffffffffffe ||
|
||||
(p[2] == 0xfffffffffffffffe &&
|
||||
p[1] > 0xffffffff00000000 ||
|
||||
(p[1] > 0xffffffff00000000 ||
|
||||
(p[1] == 0xffffffff00000000 &&
|
||||
p[0] > 0x00000000ffffffff ||
|
||||
(p[0] == 0x00000000ffffffff))))))) );
|
||||
}
|
||||
|
||||
static int
|
||||
mbedtls_p384_cmp( const uint64_t a[7],
|
||||
const uint64_t b[7] )
|
||||
{
|
||||
if( a[6] < b[6] ) return -1;
|
||||
if( a[6] > b[6] ) return 1;
|
||||
if( a[5] < b[5] ) return -1;
|
||||
if( a[5] > b[5] ) return 1;
|
||||
if( a[4] < b[4] ) return -1;
|
||||
if( a[4] > b[4] ) return 1;
|
||||
if( a[3] < b[3] ) return -1;
|
||||
if( a[3] > b[3] ) return 1;
|
||||
if( a[2] < b[2] ) return -1;
|
||||
if( a[2] > b[2] ) return 1;
|
||||
if( a[1] < b[1] ) return -1;
|
||||
if( a[1] > b[1] ) return 1;
|
||||
if( a[0] < b[0] ) return -1;
|
||||
if( a[0] > b[0] ) return 1;
|
||||
return 0;
|
||||
(p[0] > 0x00000000ffffffff ||
|
||||
(p[0] == 0x00000000ffffffff))))))))))))) );
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -101,7 +79,7 @@ mbedtls_p384_red( uint64_t p[7] )
|
|||
#endif
|
||||
}
|
||||
|
||||
static noinline void
|
||||
static inline void
|
||||
mbedtls_p384_gro( uint64_t p[7] )
|
||||
{
|
||||
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
|
||||
|
@ -128,28 +106,31 @@ mbedtls_p384_gro( uint64_t p[7] )
|
|||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
static inline void
|
||||
mbedtls_p384_rum( uint64_t p[7] )
|
||||
{
|
||||
while( mbedtls_p384_gte( p ) )
|
||||
mbedtls_p384_red( p );
|
||||
}
|
||||
|
||||
static inline void
|
||||
mbedtls_p384_mod(uint64_t X[12])
|
||||
{
|
||||
secp384r1(X);
|
||||
if ((int64_t)X[6] < 0) {
|
||||
do {
|
||||
mbedtls_p384_gro(X);
|
||||
} while ((int64_t)X[6] < 0);
|
||||
} else {
|
||||
while (mbedtls_p384_gte(X)) {
|
||||
mbedtls_p384_red(X);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
mbedtls_p384_sar( uint64_t p[7] )
|
||||
{
|
||||
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
|
||||
asm("sarq\t48+%0\n\t"
|
||||
"rcrq\t40+%0\n\t"
|
||||
"rcrq\t32+%0\n\t"
|
||||
"rcrq\t24+%0\n\t"
|
||||
"rcrq\t16+%0\n\t"
|
||||
"rcrq\t8+%0\n\t"
|
||||
"rcrq\t%0\n\t"
|
||||
: "+o"(*p)
|
||||
: /* no inputs */
|
||||
: "memory", "cc");
|
||||
#else
|
||||
p[0] = p[0] >> 1 | p[1] << 63;
|
||||
p[1] = p[1] >> 1 | p[2] << 63;
|
||||
p[2] = p[2] >> 1 | p[3] << 63;
|
||||
|
@ -157,24 +138,11 @@ mbedtls_p384_sar( uint64_t p[7] )
|
|||
p[4] = p[4] >> 1 | p[5] << 63;
|
||||
p[5] = p[5] >> 1 | p[6] << 63;
|
||||
p[6] = (int64_t)p[6] >> 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
mbedtls_p384_shl( uint64_t p[7] )
|
||||
{
|
||||
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
|
||||
asm("shlq\t%0\n\t"
|
||||
"rclq\t8+%0\n\t"
|
||||
"rclq\t16+%0\n\t"
|
||||
"rclq\t24+%0\n\t"
|
||||
"rclq\t32+%0\n\t"
|
||||
"rclq\t40+%0\n\t"
|
||||
"rclq\t48+%0\n\t"
|
||||
: "+o"(*p)
|
||||
: /* no inputs */
|
||||
: "memory", "cc");
|
||||
#else
|
||||
p[6] = p[5] >> 63;
|
||||
p[5] = p[5] << 1 | p[4] >> 63;
|
||||
p[4] = p[4] << 1 | p[3] >> 63;
|
||||
|
@ -182,90 +150,24 @@ mbedtls_p384_shl( uint64_t p[7] )
|
|||
p[2] = p[2] << 1 | p[1] >> 63;
|
||||
p[1] = p[1] << 1 | p[0] >> 63;
|
||||
p[0] = p[0] << 1;
|
||||
#endif
|
||||
mbedtls_p384_rum( p );
|
||||
}
|
||||
|
||||
static inline void
|
||||
mbedtls_p384_jam( uint64_t p[7] )
|
||||
{
|
||||
secp384r1( p );
|
||||
if( (int64_t)p[6] < 0 )
|
||||
do
|
||||
mbedtls_p384_gro( p );
|
||||
while( (int64_t)p[6] < 0 );
|
||||
else
|
||||
mbedtls_p384_rum( p );
|
||||
}
|
||||
|
||||
static void
|
||||
mbedtls_p384_mul_1x1( uint64_t X[12],
|
||||
const uint64_t A[6], size_t n,
|
||||
const uint64_t B[6], size_t m )
|
||||
{
|
||||
uint128_t t;
|
||||
t = A[0];
|
||||
t *= B[0];
|
||||
X[ 0] = t;
|
||||
X[ 1] = t >> 64;
|
||||
X[ 2] = 0;
|
||||
X[ 3] = 0;
|
||||
X[ 4] = 0;
|
||||
X[ 5] = 0;
|
||||
X[ 6] = 0;
|
||||
X[ 7] = 0;
|
||||
X[ 8] = 0;
|
||||
X[ 9] = 0;
|
||||
X[10] = 0;
|
||||
X[11] = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
mbedtls_p384_mul_nx1( uint64_t X[12],
|
||||
const uint64_t A[6], size_t n,
|
||||
const uint64_t B[6], size_t m )
|
||||
{
|
||||
mbedtls_mpi_mul_hlp1(n, A, X, B[0]);
|
||||
mbedtls_platform_zeroize( X + n + m, ( 12 - n - m ) * 8 );
|
||||
if ( n + m >= 6 )
|
||||
mbedtls_p384_jam( X );
|
||||
}
|
||||
|
||||
static void
|
||||
mbedtls_p384_mul_6x6( uint64_t X[12],
|
||||
const uint64_t A[6], size_t n,
|
||||
const uint64_t B[6], size_t m )
|
||||
{
|
||||
Mul6x6Adx( X, A, B );
|
||||
mbedtls_p384_jam( X );
|
||||
}
|
||||
|
||||
static void
|
||||
mbedtls_p384_mul_nxm( uint64_t X[12],
|
||||
const uint64_t A[6], size_t n,
|
||||
const uint64_t B[6], size_t m )
|
||||
{
|
||||
if (A == X) A = gc(memcpy(malloc(6 * 8), A, 6 * 8));
|
||||
if (B == X) B = gc(memcpy(malloc(6 * 8), B, 6 * 8));
|
||||
Mul( X, A, n, B, m );
|
||||
mbedtls_platform_zeroize( X + n + m, (12 - n - m) * 8 );
|
||||
if ( n + m >= 6 )
|
||||
mbedtls_p384_jam( X );
|
||||
}
|
||||
|
||||
static void
|
||||
mbedtls_p384_mul( uint64_t X[12],
|
||||
const uint64_t A[6], size_t n,
|
||||
const uint64_t B[6], size_t m )
|
||||
{
|
||||
if( n == 6 && m == 6 && X86_HAVE(ADX) && X86_HAVE(BMI2) )
|
||||
mbedtls_p384_mul_6x6( X, A, n, B, m );
|
||||
else if( m == 1 && n == 1 )
|
||||
mbedtls_p384_mul_1x1( X, A, n, B, m );
|
||||
else if( m == 1 )
|
||||
mbedtls_p384_mul_nx1( X, A, n, B, m );
|
||||
if( X86_HAVE(ADX) && X86_HAVE(BMI2) )
|
||||
Mul6x6Adx( X, A, B );
|
||||
else
|
||||
mbedtls_p384_mul_nxm( X, A, n, B, m );
|
||||
{
|
||||
if (A == X) A = gc(memcpy(malloc(6 * 8), A, 6 * 8));
|
||||
if (B == X) B = gc(memcpy(malloc(6 * 8), B, 6 * 8));
|
||||
Mul( X, A, n, B, m );
|
||||
mbedtls_platform_zeroize( X + n + m, (12 - n - m) * 8 );
|
||||
}
|
||||
mbedtls_p384_mod( X );
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
142
third_party/mbedtls/ecp_curves.c
vendored
142
third_party/mbedtls/ecp_curves.c
vendored
|
@ -46,7 +46,7 @@ asm(".include \"libc/disclaimer.inc\"");
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* #if defined(MBEDTLS_ECP_C) */
|
||||
#if defined(MBEDTLS_ECP_C)
|
||||
|
||||
#if !defined(MBEDTLS_ECP_ALT)
|
||||
|
||||
|
@ -635,12 +635,7 @@ static int ecp_group_load( mbedtls_ecp_group *grp,
|
|||
#endif /* ECP_LOAD_GROUP */
|
||||
|
||||
#if defined(MBEDTLS_ECP_NIST_OPTIM)
|
||||
#define NIST_MODP( P ) grp->modp = ecp_mod_ ## P;
|
||||
#else
|
||||
#define NIST_MODP( P )
|
||||
#endif
|
||||
|
||||
#if defined(MBEDTLS_ECP_NIST_OPTIM)
|
||||
/* Forward declarations */
|
||||
#if defined(MBEDTLS_ECP_DP_SECP192R1_ENABLED)
|
||||
static int ecp_mod_p192( mbedtls_mpi * );
|
||||
#endif
|
||||
|
@ -650,8 +645,13 @@ static int ecp_mod_p224( mbedtls_mpi * );
|
|||
#if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
|
||||
static int ecp_mod_p521( mbedtls_mpi * );
|
||||
#endif
|
||||
|
||||
#define NIST_MODP( P ) grp->modp = ecp_mod_ ## P;
|
||||
#else
|
||||
#define NIST_MODP( P )
|
||||
#endif /* MBEDTLS_ECP_NIST_OPTIM */
|
||||
|
||||
/* Additional forward declarations */
|
||||
#if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED)
|
||||
static int ecp_mod_p255( mbedtls_mpi * );
|
||||
#endif
|
||||
|
@ -771,8 +771,6 @@ cleanup:
|
|||
}
|
||||
#endif /* MBEDTLS_ECP_DP_CURVE448_ENABLED */
|
||||
|
||||
|
||||
#if defined(MBEDTLS_ECP_C)
|
||||
/**
|
||||
* \brief This function sets up an ECP group context
|
||||
* from a standardized set of domain parameters.
|
||||
|
@ -879,7 +877,6 @@ int mbedtls_ecp_group_load( mbedtls_ecp_group *grp, mbedtls_ecp_group_id id )
|
|||
return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
|
||||
}
|
||||
}
|
||||
#endif /* MBEDTLS_ECP_C */
|
||||
|
||||
#if defined(MBEDTLS_ECP_NIST_OPTIM)
|
||||
/*
|
||||
|
@ -892,6 +889,7 @@ int mbedtls_ecp_group_load( mbedtls_ecp_group *grp, mbedtls_ecp_group_id id )
|
|||
* MPI remains loose, since these functions can be deactivated at will.
|
||||
*/
|
||||
|
||||
#if defined(MBEDTLS_ECP_DP_SECP192R1_ENABLED)
|
||||
/*
|
||||
* Compared to the way things are presented in FIPS 186-3 D.2,
|
||||
* we proceed in columns, from right (least significant chunk) to left,
|
||||
|
@ -942,13 +940,17 @@ static int ecp_mod_p192( mbedtls_mpi *N )
|
|||
int ret = MBEDTLS_ERR_THIS_CORRUPTION;
|
||||
mbedtls_mpi_uint c = 0;
|
||||
mbedtls_mpi_uint *p, *end;
|
||||
|
||||
/* Make sure we have enough blocks so that A(5) is legal */
|
||||
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( N, 6 * WIDTH ) );
|
||||
|
||||
p = N->p;
|
||||
end = p + N->n;
|
||||
|
||||
ADD( 3 ); ADD( 5 ); NEXT; // A0 += A3 + A5
|
||||
ADD( 3 ); ADD( 4 ); ADD( 5 ); NEXT; // A1 += A3 + A4 + A5
|
||||
ADD( 4 ); ADD( 5 ); LAST; // A2 += A4 + A5
|
||||
|
||||
cleanup:
|
||||
return( ret );
|
||||
}
|
||||
|
@ -958,7 +960,11 @@ cleanup:
|
|||
#undef ADD
|
||||
#undef NEXT
|
||||
#undef LAST
|
||||
#endif /* MBEDTLS_ECP_DP_SECP192R1_ENABLED */
|
||||
|
||||
#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED) || \
|
||||
defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) || \
|
||||
defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
|
||||
/*
|
||||
* The reader is advised to first understand ecp_mod_p192() since the same
|
||||
* general structure is used here, but with additional complications:
|
||||
|
@ -1059,6 +1065,7 @@ static inline void sub32( uint32_t *dst, uint32_t src, signed char *carry )
|
|||
static inline int fix_negative( mbedtls_mpi *N, signed char c, mbedtls_mpi *C, size_t bits )
|
||||
{
|
||||
int ret = MBEDTLS_ERR_THIS_CORRUPTION;
|
||||
|
||||
/* C = - c * 2^(bits + 32) */
|
||||
#if !defined(MBEDTLS_HAVE_INT64)
|
||||
((void) bits);
|
||||
|
@ -1068,19 +1075,24 @@ static inline int fix_negative( mbedtls_mpi *N, signed char c, mbedtls_mpi *C, s
|
|||
else
|
||||
#endif
|
||||
C->p[ C->n - 1 ] = (mbedtls_mpi_uint) -c;
|
||||
|
||||
/* N = - ( C - N ) */
|
||||
MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( N, C, N ) );
|
||||
N->s = -1;
|
||||
|
||||
cleanup:
|
||||
|
||||
return( ret );
|
||||
}
|
||||
|
||||
#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED)
|
||||
/*
|
||||
* Fast quasi-reduction modulo p224 (FIPS 186-3 D.2.2)
|
||||
*/
|
||||
static int ecp_mod_p224( mbedtls_mpi *N )
|
||||
{
|
||||
INIT( 224 );
|
||||
|
||||
SUB( 7 ); SUB( 11 ); NEXT; // A0 += -A7 - A11
|
||||
SUB( 8 ); SUB( 12 ); NEXT; // A1 += -A8 - A12
|
||||
SUB( 9 ); SUB( 13 ); NEXT; // A2 += -A9 - A13
|
||||
|
@ -1088,9 +1100,97 @@ static int ecp_mod_p224( mbedtls_mpi *N )
|
|||
SUB( 11 ); ADD( 8 ); ADD( 12 ); NEXT; // A4 += -A11 + A8 + A12
|
||||
SUB( 12 ); ADD( 9 ); ADD( 13 ); NEXT; // A5 += -A12 + A9 + A13
|
||||
SUB( 13 ); ADD( 10 ); LAST; // A6 += -A13 + A10
|
||||
|
||||
cleanup:
|
||||
return( ret );
|
||||
}
|
||||
#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED */
|
||||
|
||||
#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
|
||||
/*
|
||||
* Fast quasi-reduction modulo p256 (FIPS 186-3 D.2.3)
|
||||
*/
|
||||
int ecp_mod_p256_old( mbedtls_mpi *N )
|
||||
{
|
||||
INIT( 256 );
|
||||
|
||||
ADD( 8 ); ADD( 9 );
|
||||
SUB( 11 ); SUB( 12 ); SUB( 13 ); SUB( 14 ); NEXT; // A0
|
||||
|
||||
ADD( 9 ); ADD( 10 );
|
||||
SUB( 12 ); SUB( 13 ); SUB( 14 ); SUB( 15 ); NEXT; // A1
|
||||
|
||||
ADD( 10 ); ADD( 11 );
|
||||
SUB( 13 ); SUB( 14 ); SUB( 15 ); NEXT; // A2
|
||||
|
||||
ADD( 11 ); ADD( 11 ); ADD( 12 ); ADD( 12 ); ADD( 13 );
|
||||
SUB( 15 ); SUB( 8 ); SUB( 9 ); NEXT; // A3
|
||||
|
||||
ADD( 12 ); ADD( 12 ); ADD( 13 ); ADD( 13 ); ADD( 14 );
|
||||
SUB( 9 ); SUB( 10 ); NEXT; // A4
|
||||
|
||||
ADD( 13 ); ADD( 13 ); ADD( 14 ); ADD( 14 ); ADD( 15 );
|
||||
SUB( 10 ); SUB( 11 ); NEXT; // A5
|
||||
|
||||
ADD( 14 ); ADD( 14 ); ADD( 15 ); ADD( 15 ); ADD( 14 ); ADD( 13 );
|
||||
SUB( 8 ); SUB( 9 ); NEXT; // A6
|
||||
|
||||
ADD( 15 ); ADD( 15 ); ADD( 15 ); ADD( 8 );
|
||||
SUB( 10 ); SUB( 11 ); SUB( 12 ); SUB( 13 ); LAST; // A7
|
||||
|
||||
cleanup:
|
||||
return( ret );
|
||||
}
|
||||
#endif /* MBEDTLS_ECP_DP_SECP256R1_ENABLED */
|
||||
|
||||
#if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
|
||||
/*
|
||||
* Fast quasi-reduction modulo p384 (FIPS 186-3 D.2.4)
|
||||
*/
|
||||
int ecp_mod_p384_old( mbedtls_mpi *N )
|
||||
{
|
||||
INIT( 384 );
|
||||
|
||||
ADD( 12 ); ADD( 21 ); ADD( 20 );
|
||||
SUB( 23 ); NEXT; // A0
|
||||
|
||||
ADD( 13 ); ADD( 22 ); ADD( 23 );
|
||||
SUB( 12 ); SUB( 20 ); NEXT; // A2
|
||||
|
||||
ADD( 14 ); ADD( 23 );
|
||||
SUB( 13 ); SUB( 21 ); NEXT; // A2
|
||||
|
||||
ADD( 15 ); ADD( 12 ); ADD( 20 ); ADD( 21 );
|
||||
SUB( 14 ); SUB( 22 ); SUB( 23 ); NEXT; // A3
|
||||
|
||||
ADD( 21 ); ADD( 21 ); ADD( 16 ); ADD( 13 ); ADD( 12 ); ADD( 20 ); ADD( 22 );
|
||||
SUB( 15 ); SUB( 23 ); SUB( 23 ); NEXT; // A4
|
||||
|
||||
ADD( 22 ); ADD( 22 ); ADD( 17 ); ADD( 14 ); ADD( 13 ); ADD( 21 ); ADD( 23 );
|
||||
SUB( 16 ); NEXT; // A5
|
||||
|
||||
ADD( 23 ); ADD( 23 ); ADD( 18 ); ADD( 15 ); ADD( 14 ); ADD( 22 );
|
||||
SUB( 17 ); NEXT; // A6
|
||||
|
||||
ADD( 19 ); ADD( 16 ); ADD( 15 ); ADD( 23 );
|
||||
SUB( 18 ); NEXT; // A7
|
||||
|
||||
ADD( 20 ); ADD( 17 ); ADD( 16 );
|
||||
SUB( 19 ); NEXT; // A8
|
||||
|
||||
ADD( 21 ); ADD( 18 ); ADD( 17 );
|
||||
SUB( 20 ); NEXT; // A9
|
||||
|
||||
ADD( 22 ); ADD( 19 ); ADD( 18 );
|
||||
SUB( 21 ); NEXT; // A10
|
||||
|
||||
ADD( 23 ); ADD( 20 ); ADD( 19 );
|
||||
SUB( 22 ); LAST; // A11
|
||||
|
||||
cleanup:
|
||||
return( ret );
|
||||
}
|
||||
#endif /* MBEDTLS_ECP_DP_SECP384R1_ENABLED */
|
||||
|
||||
#undef A
|
||||
#undef LOAD32
|
||||
|
@ -1100,6 +1200,10 @@ cleanup:
|
|||
#undef NEXT
|
||||
#undef LAST
|
||||
|
||||
#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED ||
|
||||
MBEDTLS_ECP_DP_SECP256R1_ENABLED ||
|
||||
MBEDTLS_ECP_DP_SECP384R1_ENABLED */
|
||||
|
||||
#if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
|
||||
/*
|
||||
* Here we have an actual Mersenne prime, so things are more straightforward.
|
||||
|
@ -1156,6 +1260,8 @@ cleanup:
|
|||
|
||||
#endif /* MBEDTLS_ECP_NIST_OPTIM */
|
||||
|
||||
#if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED)
|
||||
|
||||
/* Size of p255 in terms of mbedtls_mpi_uint */
|
||||
#define P255_WIDTH ( 255 / 8 / sizeof( mbedtls_mpi_uint ) + 1 )
|
||||
|
||||
|
@ -1169,28 +1275,34 @@ static int ecp_mod_p255( mbedtls_mpi *N )
|
|||
size_t i;
|
||||
mbedtls_mpi M;
|
||||
mbedtls_mpi_uint Mp[P255_WIDTH + 2];
|
||||
|
||||
if( N->n < P255_WIDTH )
|
||||
return( 0 );
|
||||
|
||||
/* M = A1 */
|
||||
M.s = 1;
|
||||
M.n = N->n - ( P255_WIDTH - 1 );
|
||||
if( M.n > P255_WIDTH + 1 )
|
||||
return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
|
||||
M.p = Mp;
|
||||
mbedtls_platform_zeroize( Mp, sizeof Mp );
|
||||
memset( Mp, 0, sizeof Mp );
|
||||
memcpy( Mp, N->p + P255_WIDTH - 1, M.n * sizeof( mbedtls_mpi_uint ) );
|
||||
MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, 255 % ( 8 * sizeof( mbedtls_mpi_uint ) ) ) );
|
||||
M.n++; /* Make room for multiplication by 19 */
|
||||
|
||||
/* N = A0 */
|
||||
MBEDTLS_MPI_CHK( mbedtls_mpi_set_bit( N, 255, 0 ) );
|
||||
for( i = P255_WIDTH; i < N->n; i++ )
|
||||
N->p[i] = 0;
|
||||
|
||||
/* N = A0 + 19 * A1 */
|
||||
MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &M, &M, 19 ) );
|
||||
MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( N, N, &M ) );
|
||||
|
||||
cleanup:
|
||||
return( ret );
|
||||
}
|
||||
#endif /* MBEDTLS_ECP_DP_CURVE25519_ENABLED */
|
||||
|
||||
#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
|
||||
|
||||
|
@ -1231,7 +1343,7 @@ static int ecp_mod_p448( mbedtls_mpi *N )
|
|||
/* Shouldn't be called with N larger than 2^896! */
|
||||
return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
|
||||
M.p = Mp;
|
||||
mbedtls_platform_zeroize( Mp, sizeof( Mp ) );
|
||||
memset( Mp, 0, sizeof( Mp ) );
|
||||
memcpy( Mp, N->p + P448_WIDTH, M.n * sizeof( mbedtls_mpi_uint ) );
|
||||
|
||||
/* N = A0 */
|
||||
|
@ -1299,7 +1411,7 @@ static inline int ecp_mod_koblitz( mbedtls_mpi *N, mbedtls_mpi_uint *Rp, size_t
|
|||
M.n = N->n - ( p_limbs - adjust );
|
||||
if( M.n > p_limbs + adjust )
|
||||
M.n = p_limbs + adjust;
|
||||
mbedtls_platform_zeroize( Mp, sizeof Mp );
|
||||
memset( Mp, 0, sizeof Mp );
|
||||
memcpy( Mp, N->p + p_limbs - adjust, M.n * sizeof( mbedtls_mpi_uint ) );
|
||||
if( shift != 0 )
|
||||
MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, shift ) );
|
||||
|
@ -1321,7 +1433,7 @@ static inline int ecp_mod_koblitz( mbedtls_mpi *N, mbedtls_mpi_uint *Rp, size_t
|
|||
M.n = N->n - ( p_limbs - adjust );
|
||||
if( M.n > p_limbs + adjust )
|
||||
M.n = p_limbs + adjust;
|
||||
mbedtls_platform_zeroize( Mp, sizeof Mp );
|
||||
memset( Mp, 0, sizeof Mp );
|
||||
memcpy( Mp, N->p + p_limbs - adjust, M.n * sizeof( mbedtls_mpi_uint ) );
|
||||
if( shift != 0 )
|
||||
MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, shift ) );
|
||||
|
@ -1392,4 +1504,4 @@ static int ecp_mod_p256k1( mbedtls_mpi *N )
|
|||
|
||||
#endif /* !MBEDTLS_ECP_ALT */
|
||||
|
||||
/* #endif /\* MBEDTLS_ECP_C *\/ */
|
||||
#endif /* MBEDTLS_ECP_C */
|
||||
|
|
1337
third_party/mbedtls/everest.c
vendored
1337
third_party/mbedtls/everest.c
vendored
File diff suppressed because it is too large
Load diff
54
third_party/mbedtls/everest.h
vendored
54
third_party/mbedtls/everest.h
vendored
|
@ -1,52 +1,10 @@
|
|||
#ifndef COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_
|
||||
#define COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_
|
||||
#include "third_party/mbedtls/config.h"
|
||||
#include "third_party/mbedtls/ecp.h"
|
||||
#ifndef COSMOPOLITAN_THIRD_PARTY_MBEDTLS_EVEREST_H_
|
||||
#define COSMOPOLITAN_THIRD_PARTY_MBEDTLS_EVEREST_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define MBEDTLS_ECP_TLS_CURVE25519 0x1d
|
||||
#define MBEDTLS_X25519_KEY_SIZE_BYTES 32
|
||||
|
||||
typedef enum {
|
||||
MBEDTLS_X25519_ECDH_OURS,
|
||||
MBEDTLS_X25519_ECDH_THEIRS,
|
||||
} mbedtls_x25519_ecdh_side;
|
||||
|
||||
typedef struct {
|
||||
unsigned char our_secret[MBEDTLS_X25519_KEY_SIZE_BYTES];
|
||||
unsigned char peer_point[MBEDTLS_X25519_KEY_SIZE_BYTES];
|
||||
} mbedtls_x25519_context;
|
||||
|
||||
typedef enum {
|
||||
MBEDTLS_EVEREST_ECDH_OURS,
|
||||
MBEDTLS_EVEREST_ECDH_THEIRS,
|
||||
} mbedtls_everest_ecdh_side;
|
||||
|
||||
typedef struct {
|
||||
mbedtls_x25519_context ctx;
|
||||
} mbedtls_ecdh_context_everest;
|
||||
|
||||
int mbedtls_everest_setup(mbedtls_ecdh_context_everest *, int);
|
||||
void mbedtls_everest_free(mbedtls_ecdh_context_everest *);
|
||||
int mbedtls_everest_make_params(mbedtls_ecdh_context_everest *, size_t *,
|
||||
unsigned char *, size_t,
|
||||
int (*)(void *, unsigned char *, size_t),
|
||||
void *);
|
||||
int mbedtls_everest_read_params(mbedtls_ecdh_context_everest *,
|
||||
const unsigned char **, const unsigned char *);
|
||||
int mbedtls_everest_get_params(mbedtls_ecdh_context_everest *,
|
||||
const mbedtls_ecp_keypair *,
|
||||
mbedtls_everest_ecdh_side);
|
||||
int mbedtls_everest_make_public(mbedtls_ecdh_context_everest *, size_t *,
|
||||
unsigned char *, size_t,
|
||||
int (*)(void *, unsigned char *, size_t),
|
||||
void *);
|
||||
int mbedtls_everest_read_public(mbedtls_ecdh_context_everest *,
|
||||
const unsigned char *, size_t);
|
||||
int mbedtls_everest_calc_secret(mbedtls_ecdh_context_everest *, size_t *,
|
||||
unsigned char *, size_t,
|
||||
int (*)(void *, unsigned char *, size_t),
|
||||
void *);
|
||||
void curve25519(uint8_t[32], const uint8_t[32], const uint8_t[32]);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_ */
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_THIRD_PARTY_MBEDTLS_EVEREST_H_ */
|
||||
|
|
7
third_party/mbedtls/mbedtls.mk
vendored
7
third_party/mbedtls/mbedtls.mk
vendored
|
@ -55,7 +55,7 @@ $(THIRD_PARTY_MBEDTLS_A_OBJS): \
|
|||
|
||||
o/$(MODE)/third_party/mbedtls/everest.o: \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-Os
|
||||
-O3
|
||||
|
||||
o/$(MODE)/third_party/mbedtls/bigmul4.o \
|
||||
o/$(MODE)/third_party/mbedtls/bigmul6.o: \
|
||||
|
@ -70,11 +70,6 @@ o/$(MODE)/third_party/mbedtls/shiftright2-avx.o: \
|
|||
OVERRIDE_CFLAGS += \
|
||||
-O3 -mavx
|
||||
|
||||
# tail recursion is so important because everest was written in f*
|
||||
o/$(MODE)/third_party/mbedtls/everest.o: \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-foptimize-sibling-calls
|
||||
|
||||
THIRD_PARTY_MBEDTLS_LIBS = $(foreach x,$(THIRD_PARTY_MBEDTLS_ARTIFACTS),$($(x)))
|
||||
THIRD_PARTY_MBEDTLS_SRCS = $(foreach x,$(THIRD_PARTY_MBEDTLS_ARTIFACTS),$($(x)_SRCS))
|
||||
THIRD_PARTY_MBEDTLS_HDRS = $(foreach x,$(THIRD_PARTY_MBEDTLS_ARTIFACTS),$($(x)_HDRS))
|
||||
|
|
2
third_party/mbedtls/secp256r1.c
vendored
2
third_party/mbedtls/secp256r1.c
vendored
|
@ -26,7 +26,7 @@
|
|||
#define H(w) (w & 0xffffffff00000000)
|
||||
|
||||
/**
|
||||
* Fastest quasi-reduction modulo NIST P-256.
|
||||
* Fastest quasi-reduction modulo ℘256.
|
||||
*
|
||||
* p = 2²⁵⁶ - 2²²⁴ + 2¹⁹² + 2⁹⁶ - 1
|
||||
* B = T + 2×S₁ + 2×S₂ + S₃ + S₄ – D₁ – D₂ – D₃ – D₄ mod p
|
||||
|
|
269
third_party/mbedtls/secp384r1.c
vendored
269
third_party/mbedtls/secp384r1.c
vendored
|
@ -24,7 +24,7 @@
|
|||
#define Q(i) p[i >> 1]
|
||||
|
||||
/**
|
||||
* Fastest quasi-reduction modulo Prime 384.
|
||||
* Fastest quasi-reduction modulo ℘384.
|
||||
*
|
||||
* p = 2³⁸⁴ – 2¹²⁸ – 2⁶ + 2³² – 1
|
||||
* B = T + 2×S₁ + S₂ + S₃ + S₄ + S₅ + S₆ – D₁ – D₂ – D₃ mod p
|
||||
|
@ -44,8 +44,7 @@
|
|||
void secp384r1(uint64_t p[12]) {
|
||||
int r;
|
||||
char o;
|
||||
signed char G;
|
||||
uint64_t A, B, C, D, E, F, a, b, c;
|
||||
uint64_t A, B, C, D, E, F, G, a, b, c;
|
||||
A = Q(0);
|
||||
B = Q(2);
|
||||
C = Q(4);
|
||||
|
@ -57,8 +56,8 @@ void secp384r1(uint64_t p[12]) {
|
|||
a = Q(22) << 32 | Q(21) >> 32;
|
||||
b = Q(23) >> 32;
|
||||
ADC(C, C, a << 1, 0, o);
|
||||
ADC(D, D, (b << 1 | a >> 63), o, o);
|
||||
ADC(E, E, (b >> 63), o, o);
|
||||
ADC(D, D, b << 1 | a >> 63, o, o);
|
||||
ADC(E, E, b >> 63, o, o);
|
||||
ADC(F, F, o, o, o);
|
||||
G += o;
|
||||
ADC(A, A, Q(12), 0, o);
|
||||
|
@ -118,91 +117,105 @@ void secp384r1(uint64_t p[12]) {
|
|||
asm volatile(/* S₁ = (0 ‖0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₂‖A₂₁‖0 ‖0 ‖0 ‖0 ) */
|
||||
"mov\t21*4(%9),%7\n\t"
|
||||
"mov\t23*4(%9),%k8\n\t"
|
||||
"mov\t%7,%%r12\n\t"
|
||||
"shr\t$63,%%r12\n\t"
|
||||
"shl\t%7\n\t"
|
||||
"rcl\t%8\n\t"
|
||||
"shl\t%8\n\t"
|
||||
"or\t%%r12,%8\n\t"
|
||||
"mov\t13*4(%9),%%r12\n\t"
|
||||
"add\t%7,%2\n\t"
|
||||
"mov\t23*4(%9),%k7\n\t"
|
||||
"adc\t%8,%3\n\t"
|
||||
"mov\t15*4(%9),%%r13\n\t"
|
||||
"adc\t$0,%4\n\t"
|
||||
"mov\t12*4(%9),%k8\n\t"
|
||||
"adc\t$0,%5\n\t"
|
||||
"adc\t$0,%b6\n\t"
|
||||
/* S₂ = (A₂₃‖A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂) */
|
||||
"add\t12*4(%9),%0\n\t"
|
||||
"adc\t14*4(%9),%1\n\t"
|
||||
"adc\t16*4(%9),%2\n\t"
|
||||
"adc\t18*4(%9),%3\n\t"
|
||||
"adc\t20*4(%9),%4\n\t"
|
||||
"adc\t22*4(%9),%5\n\t"
|
||||
"adc\t$0,%b6\n\t"
|
||||
/* S₃ = (A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃‖A₂₂‖A₂₁) */
|
||||
"mov\t12*4(%9),%k7\n\t"
|
||||
"mov\t17*4(%9),%%r14\n\t"
|
||||
"adc\t$0,%6\n\t"
|
||||
"mov\t19*4(%9),%%r15\n\t"
|
||||
/* D₁ = (A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃) */
|
||||
"shl\t$32,%8\n\t"
|
||||
"or\t%8,%7\n\t"
|
||||
"mov\t23*4(%9),%k8\n\t"
|
||||
"sub\t%7,%0\n\t"
|
||||
"mov\t21*4(%9),%7\n\t"
|
||||
"sbb\t%%r12,%1\n\t"
|
||||
"sbb\t%%r13,%2\n\t"
|
||||
"sbb\t%%r14,%3\n\t"
|
||||
"sbb\t%%r15,%4\n\t"
|
||||
"sbb\t%7,%5\n\t"
|
||||
"mov\t12*4(%9),%k7\n\t"
|
||||
"sbb\t$0,%6\n\t"
|
||||
/* S₃ = (A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃‖A₂₂‖A₂₁) */
|
||||
"shl\t$32,%7\n\t"
|
||||
"or\t%7,%8\n\t"
|
||||
"add\t21*4(%9),%0\n\t"
|
||||
"adc\t%8,%1\n\t"
|
||||
"adc\t13*4(%9),%2\n\t"
|
||||
"adc\t15*4(%9),%3\n\t"
|
||||
"adc\t17*4(%9),%4\n\t"
|
||||
"adc\t19*4(%9),%5\n\t"
|
||||
"adc\t$0,%b6\n\t"
|
||||
/* S₄ = (A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₀‖0 ‖A₂₃‖0 ) */
|
||||
"mov\t23*4(%9),%k7\n\t"
|
||||
"adc\t%8,%1\n\t"
|
||||
"mov\t20*4(%9),%k8\n\t"
|
||||
"adc\t%%r12,%2\n\t"
|
||||
"mov\t12*4(%9),%%r12\n\t"
|
||||
"adc\t%%r13,%3\n\t"
|
||||
"mov\t14*4(%9),%%r13\n\t"
|
||||
"adc\t%%r14,%4\n\t"
|
||||
"mov\t16*4(%9),%%r14\n\t"
|
||||
"adc\t%%r15,%5\n\t"
|
||||
"mov\t18*4(%9),%%r15\n\t"
|
||||
"adc\t$0,%6\n\t"
|
||||
/* S₄ = (A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₀‖0 ‖A₂₃‖0 ) */
|
||||
"shl\t$32,%7\n\t"
|
||||
"shl\t$32,%8\n\t"
|
||||
"add\t%7,%0\n\t"
|
||||
"adc\t%8,%1\n\t"
|
||||
"adc\t12*4(%9),%2\n\t"
|
||||
"adc\t14*4(%9),%3\n\t"
|
||||
"adc\t16*4(%9),%4\n\t"
|
||||
"adc\t18*4(%9),%5\n\t"
|
||||
"adc\t$0,%b6\n\t"
|
||||
"adc\t%%r12,%2\n\t"
|
||||
"adc\t%%r13,%3\n\t"
|
||||
"adc\t%%r14,%4\n\t"
|
||||
"adc\t%%r15,%5\n\t"
|
||||
"adc\t$0,%6\n\t"
|
||||
/* S₂ = (A₂₃‖A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂) */
|
||||
"add\t%%r12,%0\n\t"
|
||||
"mov\t20*4(%9),%%r12\n\t"
|
||||
"adc\t%%r13,%1\n\t"
|
||||
"mov\t22*4(%9),%%r13\n\t"
|
||||
"adc\t%%r14,%2\n\t"
|
||||
"adc\t%%r15,%3\n\t"
|
||||
"adc\t%%r12,%4\n\t"
|
||||
"adc\t%%r13,%5\n\t"
|
||||
"adc\t$0,%6\n\t"
|
||||
/* S₅ = (0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₂‖A₂₁‖A₂₀‖0 ‖0 ‖0 ‖0 ) */
|
||||
"mov\t23*4(%9),%k7\n\t"
|
||||
"mov\t20*4(%9),%k8\n\t"
|
||||
"shl\t$32,%7\n\t"
|
||||
"shl\t$32,%8\n\t"
|
||||
"add\t20*4(%9),%2\n\t"
|
||||
"adc\t22*4(%9),%3\n\t"
|
||||
"add\t%%r12,%2\n\t"
|
||||
"adc\t%%r13,%3\n\t"
|
||||
"adc\t$0,%4\n\t"
|
||||
"adc\t$0,%5\n\t"
|
||||
"adc\t$0,%b6\n\t"
|
||||
"adc\t$0,%6\n\t"
|
||||
/* S₆ = (0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₂‖A₂₁‖0 ‖0 ‖A₂₀) */
|
||||
"mov\t20*4(%9),%k7\n\t"
|
||||
"mov\t21*4(%9),%k8\n\t"
|
||||
"mov\t%%r12d,%k7\n\t"
|
||||
"mov\t%%r12,%8\n\t"
|
||||
"shr\t$32,%8\n\t"
|
||||
"shl\t$32,%8\n\t"
|
||||
"add\t%7,%0\n\t"
|
||||
"adc\t%8,%1\n\t"
|
||||
"adc\t22*4(%9),%2\n\t"
|
||||
"adc\t%%r13,%2\n\t"
|
||||
"adc\t$0,%3\n\t"
|
||||
"adc\t$0,%4\n\t"
|
||||
"adc\t$0,%5\n\t"
|
||||
"adc\t$0,%b6\n\t"
|
||||
/* D₁ = (A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃) */
|
||||
"mov\t23*4(%9),%k7\n\t"
|
||||
"mov\t12*4(%9),%k8\n\t"
|
||||
"shl\t$32,%8\n\t"
|
||||
"or\t%8,%7\n\t"
|
||||
"sub\t%7,%0\n\t"
|
||||
"sbb\t13*4(%9),%1\n\t"
|
||||
"sbb\t15*4(%9),%2\n\t"
|
||||
"sbb\t17*4(%9),%3\n\t"
|
||||
"sbb\t19*4(%9),%4\n\t"
|
||||
"sbb\t21*4(%9),%5\n\t"
|
||||
"sbb\t$0,%b6\n\t"
|
||||
"adc\t$0,%6\n\t"
|
||||
/* D₂ = (0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₂‖A₂₁‖A₂₀‖0 ) */
|
||||
"mov\t20*4(%9),%k7\n\t"
|
||||
"mov\t23*4(%9),%k8\n\t"
|
||||
"mov\t%%r12d,%k7\n\t"
|
||||
"mov\t21*4(%9),%%r12\n\t"
|
||||
"mov\t%%r13,%8\n\t"
|
||||
"shr\t$32,%8\n\t"
|
||||
"shl\t$32,%7\n\t"
|
||||
"sub\t%7,%0\n\t"
|
||||
"sbb\t21*4(%9),%1\n\t"
|
||||
"sbb\t%%r12,%1\n\t"
|
||||
"sbb\t%8,%2\n\t"
|
||||
"sbb\t$0,%3\n\t"
|
||||
"sbb\t$0,%4\n\t"
|
||||
"sbb\t$0,%5\n\t"
|
||||
"sbb\t$0,%b6\n\t"
|
||||
"sbb\t$0,%6\n\t"
|
||||
/* D₃ = (0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₃‖0 ‖0 ‖0 ) */
|
||||
"mov\t23*4(%9),%k7\n\t"
|
||||
"mov\t%%r13,%7\n\t"
|
||||
"shr\t$32,%7\n\t"
|
||||
"mov\t%k7,%k8\n\t"
|
||||
"shl\t$32,%7\n\t"
|
||||
"sub\t%7,%1\n\t"
|
||||
|
@ -210,11 +223,11 @@ void secp384r1(uint64_t p[12]) {
|
|||
"sbb\t$0,%3\n\t"
|
||||
"sbb\t$0,%4\n\t"
|
||||
"sbb\t$0,%5\n\t"
|
||||
"sbb\t$0,%b6\n\t"
|
||||
"sbb\t$0,%6"
|
||||
: "+r"(A), "+r"(B), "+r"(C), "+r"(D), "+r"(E), "+r"(F), "+q"(G),
|
||||
"=&r"(a), "=&r"(b)
|
||||
: "r"(p)
|
||||
: "memory");
|
||||
: "memory", "r12", "r13", "r14", "r15");
|
||||
#endif
|
||||
p[0] = A;
|
||||
p[1] = B;
|
||||
|
@ -223,11 +236,12 @@ void secp384r1(uint64_t p[12]) {
|
|||
p[4] = E;
|
||||
p[5] = F;
|
||||
p[6] = G;
|
||||
p[7] = 0;
|
||||
p[8] = 0;
|
||||
p[9] = 0;
|
||||
p[10] = 0;
|
||||
p[11] = 0;
|
||||
G = CONCEAL("r", 0L);
|
||||
p[7] = G;
|
||||
p[8] = G;
|
||||
p[9] = G;
|
||||
p[10] = G;
|
||||
p[11] = G;
|
||||
}
|
||||
|
||||
int ecp_mod_p384(mbedtls_mpi *N) {
|
||||
|
@ -249,3 +263,130 @@ int ecp_mod_p384(mbedtls_mpi *N) {
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Instructions: 115
|
||||
Total Cycles: 46
|
||||
Total uOps: 116
|
||||
uOps Per Cycle: 2.52
|
||||
IPC: 2.50
|
||||
Block RThroughput: 31.0
|
||||
|
||||
SIMULATION 0123456789 0123456789
|
||||
Index 0123456789 0123456789 012345
|
||||
[0,0] DR . . . . . . . . . xorl %r10d, %r10d
|
||||
[0,1] DeeeeeER . . . . . . . . movq (%rdi), %r9
|
||||
[0,2] DeeeeeER . . . . . . . . movq 8(%rdi), %r8
|
||||
[0,3] D=eeeeeER . . . . . . . . movq 16(%rdi), %rsi
|
||||
[0,4] D=eeeeeER . . . . . . . . movq 24(%rdi), %rcx
|
||||
[0,5] D==eeeeeER. . . . . . . . movq 32(%rdi), %rdx
|
||||
[0,6] .D==eeeeeER . . . . . . . movq 40(%rdi), %rax
|
||||
[0,7] .D=eeeeeE-R . . . . . . . movq 84(%rdi), %r11
|
||||
[0,8] .D==eeeeeER . . . . . . . movl 92(%rdi), %ebx
|
||||
[0,9] .D======eER . . . . . . . movq %r11, %r12
|
||||
[0,10] .D=======eER . . . . . . . shrq $63, %r12
|
||||
[0,11] .D======eE-R . . . . . . . shlq %r11
|
||||
[0,12] . D======eER . . . . . . . shlq %rbx
|
||||
[0,13] . D=======eER . . . . . . . orq %r12, %rbx
|
||||
[0,14] . D==eeeeeE-R . . . . . . . movq 52(%rdi), %r12
|
||||
[0,15] . D======eE-R . . . . . . . addq %r11, %rsi
|
||||
[0,16] . D==eeeeeE-R . . . . . . . movl 92(%rdi), %r11d
|
||||
[0,17] . D========eER . . . . . . . adcq %rbx, %rcx
|
||||
[0,18] . D==eeeeeE-R . . . . . . . movq 60(%rdi), %r13
|
||||
[0,19] . D========eER. . . . . . . adcq $0, %rdx
|
||||
[0,20] . D==eeeeeE--R. . . . . . . movl 48(%rdi), %ebx
|
||||
[0,21] . D=========eER . . . . . . adcq $0, %rax
|
||||
[0,22] . D===eeeeeE--R . . . . . . movq 68(%rdi), %r14
|
||||
[0,23] . D==========eER . . . . . . adcq $0, %r10
|
||||
[0,24] . D==eeeeeE---R . . . . . . movq 76(%rdi), %r15
|
||||
[0,25] . D======eE---R . . . . . . shlq $32, %rbx
|
||||
[0,26] . D=======eE--R . . . . . . orq %rbx, %r11
|
||||
[0,27] . D===eeeeeE--R . . . . . . movl 92(%rdi), %ebx
|
||||
[0,28] . D========eE-R . . . . . . subq %r11, %r9
|
||||
[0,29] . D===eeeeeE--R . . . . . . movq 84(%rdi), %r11
|
||||
[0,30] . D========eER . . . . . . sbbq %r12, %r8
|
||||
[0,31] . D=========eER . . . . . . sbbq %r13, %rsi
|
||||
[0,32] . D==========eER . . . . . . sbbq %r14, %rcx
|
||||
[0,33] . D===========eER. . . . . . sbbq %r15, %rdx
|
||||
[0,34] . D============eER . . . . . sbbq %r11, %rax
|
||||
[0,35] . D===eeeeeE-----R . . . . . movl 48(%rdi), %r11d
|
||||
[0,36] . .D============eER . . . . . sbbq $0, %r10
|
||||
[0,37] . .D========eE----R . . . . . shlq $32, %r11
|
||||
[0,38] . .D=========eE---R . . . . . orq %r11, %rbx
|
||||
[0,39] . .D==eeeeeE------R . . . . . movl 92(%rdi), %r11d
|
||||
[0,40] . .D======eeeeeeE-R . . . . . addq 84(%rdi), %r9
|
||||
[0,41] . . D===========eER . . . . . adcq %rbx, %r8
|
||||
[0,42] . . D==eeeeeE-----R . . . . . movl 80(%rdi), %ebx
|
||||
[0,43] . . D============eER . . . . . adcq %r12, %rsi
|
||||
[0,44] . . D==eeeeeE------R . . . . . movq 48(%rdi), %r12
|
||||
[0,45] . . D=============eER . . . . . adcq %r13, %rcx
|
||||
[0,46] . . D===eeeeeE------R . . . . . movq 56(%rdi), %r13
|
||||
[0,47] . . D=============eER. . . . . adcq %r14, %rdx
|
||||
[0,48] . . D==eeeeeE-------R. . . . . movq 64(%rdi), %r14
|
||||
[0,49] . . D==============eER . . . . adcq %r15, %rax
|
||||
[0,50] . . D===eeeeeE-------R . . . . movq 72(%rdi), %r15
|
||||
[0,51] . . D===============eER . . . . adcq $0, %r10
|
||||
[0,52] . . D=======eE--------R . . . . shlq $32, %r11
|
||||
[0,53] . . D=======eE-------R . . . . shlq $32, %rbx
|
||||
[0,54] . . D=========eE-----R . . . . addq %r11, %r9
|
||||
[0,55] . . D==========eE----R . . . . adcq %rbx, %r8
|
||||
[0,56] . . D===========eE---R . . . . adcq %r12, %rsi
|
||||
[0,57] . . D============eE--R . . . . adcq %r13, %rcx
|
||||
[0,58] . . D=============eE-R . . . . adcq %r14, %rdx
|
||||
[0,59] . . D=============eER . . . . adcq %r15, %rax
|
||||
[0,60] . . D==============eER . . . . adcq $0, %r10
|
||||
[0,61] . . D=========eE-----R . . . . addq %r12, %r9
|
||||
[0,62] . . D=eeeeeE---------R . . . . movq 80(%rdi), %r12
|
||||
[0,63] . . D==============eER . . . . adcq %r13, %r8
|
||||
[0,64] . . D==eeeeeE--------R . . . . movq 88(%rdi), %r13
|
||||
[0,65] . . .D==============eER . . . . adcq %r14, %rsi
|
||||
[0,66] . . .D===============eER. . . . adcq %r15, %rcx
|
||||
[0,67] . . .D================eER . . . adcq %r12, %rdx
|
||||
[0,68] . . .D=================eER . . . adcq %r13, %rax
|
||||
[0,69] . . .D==================eER . . . adcq $0, %r10
|
||||
[0,70] . . .D===============eE---R . . . addq %r12, %rsi
|
||||
[0,71] . . . D===============eE--R . . . adcq %r13, %rcx
|
||||
[0,72] . . . D================eE-R . . . adcq $0, %rdx
|
||||
[0,73] . . . D=================eER . . . adcq $0, %rax
|
||||
[0,74] . . . D==================eER . . . adcq $0, %r10
|
||||
[0,75] . . . D====eE--------------R . . . movl %r12d, %r11d
|
||||
[0,76] . . . D====eE--------------R . . . movq %r12, %rbx
|
||||
[0,77] . . . D====eE-------------R . . . shrq $32, %rbx
|
||||
[0,78] . . . D============eE-----R . . . shlq $32, %rbx
|
||||
[0,79] . . . D=======eE----------R . . . addq %r11, %r9
|
||||
[0,80] . . . D=============eE----R . . . adcq %rbx, %r8
|
||||
[0,81] . . . D=================eER . . . adcq %r13, %rsi
|
||||
[0,82] . . . D==================eER. . . adcq $0, %rcx
|
||||
[0,83] . . . D==================eER . . adcq $0, %rdx
|
||||
[0,84] . . . D===================eER . . adcq $0, %rax
|
||||
[0,85] . . . D====================eER . . adcq $0, %r10
|
||||
[0,86] . . . D===eE-----------------R . . movl %r12d, %r11d
|
||||
[0,87] . . . DeeeeeE----------------R . . movq 84(%rdi), %r12
|
||||
[0,88] . . . D===eE-----------------R . . movq %r13, %rbx
|
||||
[0,89] . . . D================eE---R . . shrq $32, %rbx
|
||||
[0,90] . . . D=================eE--R . . shlq $32, %r11
|
||||
[0,91] . . . D==================eE-R . . subq %r11, %r9
|
||||
[0,92] . . . D===================eER . . sbbq %r12, %r8
|
||||
[0,93] . . . D====================eER . . sbbq %rbx, %rsi
|
||||
[0,94] . . . D=====================eER. . sbbq $0, %rcx
|
||||
[0,95] . . . .D=====================eER . sbbq $0, %rdx
|
||||
[0,96] . . . .D======================eER . sbbq $0, %rax
|
||||
[0,97] . . . .D=======================eER . sbbq $0, %r10
|
||||
[0,98] . . . .D==eE---------------------R . movq %r13, %r11
|
||||
[0,99] . . . .D=================eE------R . shrq $32, %r11
|
||||
[0,100] . . . .D==================eE-----R . movl %r11d, %ebx
|
||||
[0,101] . . . . D==================eE----R . shlq $32, %r11
|
||||
[0,102] . . . . D===================eE---R . subq %r11, %r8
|
||||
[0,103] . . . . D====================eE--R . sbbq %rbx, %rsi
|
||||
[0,104] . . . . D=====================eE-R . sbbq $0, %rcx
|
||||
[0,105] . . . . D======================eER . sbbq $0, %rdx
|
||||
[0,106] . . . . D=======================eER . sbbq $0, %rax
|
||||
[0,107] . . . . D=======================eER. sbbq $0, %r10
|
||||
[0,108] . . . . D================eE-------R. movq %r9, (%rdi)
|
||||
[0,109] . . . . D===================eE----R. movq %r8, 8(%rdi)
|
||||
[0,110] . . . . D====================eE---R. movq %rsi, 16(%rdi)
|
||||
[0,111] . . . . D=====================eE--R. movq %rcx, 24(%rdi)
|
||||
[0,112] . . . . D======================eE-R. movq %rdx, 32(%rdi)
|
||||
[0,113] . . . . D======================eER. movq %rax, 40(%rdi)
|
||||
[0,114] . . . . D=======================eER movq %r10, 48(%rdi)
|
||||
*/
|
||||
|
|
1
third_party/mbedtls/ssl_ciphersuites.c
vendored
1
third_party/mbedtls/ssl_ciphersuites.c
vendored
|
@ -61,7 +61,6 @@ static const uint16_t ciphersuite_preference[] =
|
|||
MBEDTLS_TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256,
|
||||
MBEDTLS_TLS_DHE_RSA_WITH_AES_128_CCM,
|
||||
MBEDTLS_TLS_DHE_RSA_WITH_AES_256_CCM,
|
||||
/* weakened perfect forward secrecy */
|
||||
MBEDTLS_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
|
||||
MBEDTLS_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384,
|
||||
MBEDTLS_TLS_DHE_RSA_WITH_AES_128_CBC_SHA256,
|
||||
|
|
77
third_party/mbedtls/test/everest_test.c
vendored
Normal file
77
third_party/mbedtls/test/everest_test.c
vendored
Normal file
|
@ -0,0 +1,77 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/rand/rand.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/testlib/ezbench.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
#include "third_party/mbedtls/config.h"
|
||||
#include "third_party/mbedtls/endian.h"
|
||||
|
||||
void Hacl_Curve25519_crypto_scalarmult(uint8_t *, uint8_t *, uint8_t *);
|
||||
void curve25519(uint8_t[32], uint8_t[32], uint8_t[32]);
|
||||
|
||||
const uint64_t kNumbers[] = {
|
||||
0x0000000000000000, //
|
||||
0x0000000000000001, //
|
||||
0x0000000000001000, //
|
||||
0x0000000002000000, //
|
||||
0x0000004000000000, //
|
||||
0x0008000000000000, //
|
||||
0x8000000000000000, //
|
||||
0x0007ffffffffffff, //
|
||||
0x0000003fffffffff, //
|
||||
0x0000000001ffffff, //
|
||||
0x0000000000000fff, //
|
||||
0xffffffffffffffff, //
|
||||
0xfff8000000000000, //
|
||||
};
|
||||
|
||||
TEST(everest, tinierVersionBehavesTheSame) {
|
||||
size_t i;
|
||||
uint8_t secret[32], bpoint[32], public[2][32];
|
||||
for (i = 0; i < 500; ++i) {
|
||||
rngset(secret, sizeof(secret), rand64, -1);
|
||||
rngset(bpoint, sizeof(bpoint), rand64, -1);
|
||||
Hacl_Curve25519_crypto_scalarmult(public[0], secret, bpoint);
|
||||
curve25519(public[1], secret, bpoint);
|
||||
ASSERT_EQ(0, memcmp(public[0], public[1], sizeof(public[0])));
|
||||
}
|
||||
for (i = 0; i < 500; ++i) {
|
||||
Write64le(secret + 000, kNumbers[rand() % ARRAYLEN(kNumbers)]);
|
||||
Write64le(secret + 010, kNumbers[rand() % ARRAYLEN(kNumbers)]);
|
||||
Write64le(secret + 020, kNumbers[rand() % ARRAYLEN(kNumbers)]);
|
||||
Write64le(secret + 030, kNumbers[rand() % ARRAYLEN(kNumbers)]);
|
||||
Write64le(bpoint + 000, kNumbers[rand() % ARRAYLEN(kNumbers)]);
|
||||
Write64le(bpoint + 010, kNumbers[rand() % ARRAYLEN(kNumbers)]);
|
||||
Write64le(bpoint + 020, kNumbers[rand() % ARRAYLEN(kNumbers)]);
|
||||
Write64le(bpoint + 030, kNumbers[rand() % ARRAYLEN(kNumbers)]);
|
||||
Hacl_Curve25519_crypto_scalarmult(public[0], secret, bpoint);
|
||||
curve25519(public[1], secret, bpoint);
|
||||
ASSERT_EQ(0, memcmp(public[0], public[1], sizeof(public[0])));
|
||||
}
|
||||
}
|
||||
|
||||
BENCH(everest, bench) {
|
||||
uint8_t secret[32], bpoint[32], public[32];
|
||||
rngset(secret, sizeof(secret), rand64, -1);
|
||||
rngset(bpoint, sizeof(bpoint), rand64, -1);
|
||||
EZBENCH2("everest", donothing,
|
||||
Hacl_Curve25519_crypto_scalarmult(public, secret, bpoint));
|
||||
EZBENCH2("mariana", donothing, curve25519(public, secret, bpoint));
|
||||
}
|
899
third_party/mbedtls/test/everest_unravaged.c
vendored
Normal file
899
third_party/mbedtls/test/everest_unravaged.c
vendored
Normal file
|
@ -0,0 +1,899 @@
|
|||
#include "libc/bits/bits.h"
|
||||
#include "libc/limits.h"
|
||||
#include "third_party/mbedtls/asn1.h"
|
||||
#include "third_party/mbedtls/bignum.h"
|
||||
#include "third_party/mbedtls/common.h"
|
||||
#include "third_party/mbedtls/error.h"
|
||||
#include "third_party/mbedtls/platform.h"
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
Everest (Apache 2.0)\\n\
|
||||
Copyright 2016-2018 INRIA and Microsoft Corporation\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
|
||||
/* clang-format off */
|
||||
/*
|
||||
* ECDH with curve-optimized implementation multiplexing
|
||||
*
|
||||
* Copyright 2016-2018 INRIA and Microsoft Corporation
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* This file is part of mbed TLS (https://tls.mbed.org)
|
||||
*/
|
||||
|
||||
#ifdef memcpy
|
||||
#undef memcpy
|
||||
#endif
|
||||
#define memcpy(x,y,z) __builtin_memcpy(x,y,z)
|
||||
|
||||
#define load64_le(b) READ64LE(b)
|
||||
#define store64_le(b, i) WRITE64LE(b, i)
|
||||
|
||||
#define KRML_HOST_EXIT exit
|
||||
#define KRML_HOST_PRINTF printf
|
||||
|
||||
#define KRML_EXIT \
|
||||
do { \
|
||||
KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \
|
||||
KRML_HOST_EXIT(254); \
|
||||
} while (0)
|
||||
|
||||
#define _KRML_CHECK_SIZE_PRAGMA \
|
||||
_Pragma("GCC diagnostic ignored \"-Wtype-limits\"")
|
||||
|
||||
#define KRML_CHECK_SIZE(size_elt, sz) \
|
||||
do { \
|
||||
_KRML_CHECK_SIZE_PRAGMA \
|
||||
if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) { \
|
||||
KRML_HOST_PRINTF( \
|
||||
"Maximum allocatable size exceeded, aborting before overflow at " \
|
||||
"%s:%d\n", \
|
||||
__FILE__, __LINE__); \
|
||||
KRML_HOST_EXIT(253); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
typedef const char *Prims_string;
|
||||
|
||||
typedef struct {
|
||||
uint32_t length;
|
||||
const char *data;
|
||||
} FStar_Bytes_bytes;
|
||||
|
||||
typedef int32_t Prims_pos, Prims_nat, Prims_nonzero, Prims_int,
|
||||
krml_checked_int_t;
|
||||
|
||||
/* Prims_nat not yet in scope */
|
||||
inline static int32_t krml_time() {
|
||||
return (int32_t)time(NULL);
|
||||
}
|
||||
|
||||
static uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b)
|
||||
{
|
||||
uint64_t x = a ^ b;
|
||||
uint64_t minus_x = ~x + (uint64_t)1U;
|
||||
uint64_t x_or_minus_x = x | minus_x;
|
||||
uint64_t xnx = x_or_minus_x >> (uint32_t)63U;
|
||||
return xnx - (uint64_t)1U;
|
||||
}
|
||||
|
||||
static uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b)
|
||||
{
|
||||
uint64_t x = a;
|
||||
uint64_t y = b;
|
||||
uint64_t x_xor_y = x ^ y;
|
||||
uint64_t x_sub_y = x - y;
|
||||
uint64_t x_sub_y_xor_y = x_sub_y ^ y;
|
||||
uint64_t q = x_xor_y | x_sub_y_xor_y;
|
||||
uint64_t x_xor_q = x ^ q;
|
||||
uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U;
|
||||
return x_xor_q_ - (uint64_t)1U;
|
||||
}
|
||||
|
||||
static uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b)
|
||||
{
|
||||
uint32_t x = a ^ b;
|
||||
uint32_t minus_x = ~x + (uint32_t)1U;
|
||||
uint32_t x_or_minus_x = x | minus_x;
|
||||
uint32_t xnx = x_or_minus_x >> (uint32_t)31U;
|
||||
return xnx - (uint32_t)1U;
|
||||
}
|
||||
|
||||
static uint32_t FStar_UInt32_gte_mask(uint32_t a, uint32_t b)
|
||||
{
|
||||
uint32_t x = a;
|
||||
uint32_t y = b;
|
||||
uint32_t x_xor_y = x ^ y;
|
||||
uint32_t x_sub_y = x - y;
|
||||
uint32_t x_sub_y_xor_y = x_sub_y ^ y;
|
||||
uint32_t q = x_xor_y | x_sub_y_xor_y;
|
||||
uint32_t x_xor_q = x ^ q;
|
||||
uint32_t x_xor_q_ = x_xor_q >> (uint32_t)31U;
|
||||
return x_xor_q_ - (uint32_t)1U;
|
||||
}
|
||||
|
||||
static uint16_t FStar_UInt16_eq_mask(uint16_t a, uint16_t b)
|
||||
{
|
||||
uint16_t x = a ^ b;
|
||||
uint16_t minus_x = ~x + (uint16_t)1U;
|
||||
uint16_t x_or_minus_x = x | minus_x;
|
||||
uint16_t xnx = x_or_minus_x >> (uint32_t)15U;
|
||||
return xnx - (uint16_t)1U;
|
||||
}
|
||||
|
||||
static uint16_t FStar_UInt16_gte_mask(uint16_t a, uint16_t b)
|
||||
{
|
||||
uint16_t x = a;
|
||||
uint16_t y = b;
|
||||
uint16_t x_xor_y = x ^ y;
|
||||
uint16_t x_sub_y = x - y;
|
||||
uint16_t x_sub_y_xor_y = x_sub_y ^ y;
|
||||
uint16_t q = x_xor_y | x_sub_y_xor_y;
|
||||
uint16_t x_xor_q = x ^ q;
|
||||
uint16_t x_xor_q_ = x_xor_q >> (uint32_t)15U;
|
||||
return x_xor_q_ - (uint16_t)1U;
|
||||
}
|
||||
|
||||
static uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b)
|
||||
{
|
||||
uint8_t x = a ^ b;
|
||||
uint8_t minus_x = ~x + (uint8_t)1U;
|
||||
uint8_t x_or_minus_x = x | minus_x;
|
||||
uint8_t xnx = x_or_minus_x >> (uint32_t)7U;
|
||||
return xnx - (uint8_t)1U;
|
||||
}
|
||||
|
||||
static uint8_t FStar_UInt8_gte_mask(uint8_t a, uint8_t b)
|
||||
{
|
||||
uint8_t x = a;
|
||||
uint8_t y = b;
|
||||
uint8_t x_xor_y = x ^ y;
|
||||
uint8_t x_sub_y = x - y;
|
||||
uint8_t x_sub_y_xor_y = x_sub_y ^ y;
|
||||
uint8_t q = x_xor_y | x_sub_y_xor_y;
|
||||
uint8_t x_xor_q = x ^ q;
|
||||
uint8_t x_xor_q_ = x_xor_q >> (uint32_t)7U;
|
||||
return x_xor_q_ - (uint8_t)1U;
|
||||
}
|
||||
|
||||
static void Hacl_Bignum_Modulo_carry_top(uint64_t *b)
|
||||
{
|
||||
uint64_t b4 = b[4U];
|
||||
uint64_t b0 = b[0U];
|
||||
uint64_t b4_ = b4 & (uint64_t)0x7ffffffffffffU;
|
||||
uint64_t b0_ = b0 + (uint64_t)19U * (b4 >> (uint32_t)51U);
|
||||
b[4U] = b4_;
|
||||
b[0U] = b0_;
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, uint128_t *input)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
|
||||
{
|
||||
uint128_t xi = input[i];
|
||||
output[i] = (uint64_t)xi;
|
||||
}
|
||||
}
|
||||
|
||||
inline static void
|
||||
Hacl_Bignum_Fproduct_sum_scalar_multiplication_(uint128_t *output, uint64_t *input, uint64_t s)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
|
||||
{
|
||||
uint128_t xi = output[i];
|
||||
uint64_t yi = input[i];
|
||||
output[i] = xi + (uint128_t)yi * s;
|
||||
}
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_Fproduct_carry_wide_(uint128_t *tmp)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U)
|
||||
{
|
||||
uint32_t ctr = i;
|
||||
uint128_t tctr = tmp[ctr];
|
||||
uint128_t tctrp1 = tmp[ctr + (uint32_t)1U];
|
||||
uint64_t r0 = (uint64_t)tctr & (uint64_t)0x7ffffffffffffU;
|
||||
uint128_t c = tctr >> (uint32_t)51U;
|
||||
tmp[ctr] = (uint128_t)r0;
|
||||
tmp[ctr + (uint32_t)1U] = tctrp1 + c;
|
||||
}
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_Fmul_shift_reduce(uint64_t *output)
|
||||
{
|
||||
uint64_t tmp = output[4U];
|
||||
uint64_t b0;
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U)
|
||||
{
|
||||
uint32_t ctr = (uint32_t)5U - i - (uint32_t)1U;
|
||||
uint64_t z = output[ctr - (uint32_t)1U];
|
||||
output[ctr] = z;
|
||||
}
|
||||
}
|
||||
output[0U] = tmp;
|
||||
b0 = output[0U];
|
||||
output[0U] = (uint64_t)19U * b0;
|
||||
}
|
||||
|
||||
static void
|
||||
Hacl_Bignum_Fmul_mul_shift_reduce_(uint128_t *output, uint64_t *input, uint64_t *input2)
|
||||
{
|
||||
uint32_t i;
|
||||
uint64_t input2i;
|
||||
{
|
||||
uint32_t i0;
|
||||
for (i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0 = i0 + (uint32_t)1U)
|
||||
{
|
||||
uint64_t input2i0 = input2[i0];
|
||||
Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i0);
|
||||
Hacl_Bignum_Fmul_shift_reduce(input);
|
||||
}
|
||||
}
|
||||
i = (uint32_t)4U;
|
||||
input2i = input2[i];
|
||||
Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input2)
|
||||
{
|
||||
uint64_t tmp[5U] = { 0U };
|
||||
memcpy(tmp, input, (uint32_t)5U * sizeof input[0U]);
|
||||
KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
|
||||
{
|
||||
uint128_t t[5U];
|
||||
{
|
||||
uint32_t _i;
|
||||
for (_i = 0U; _i < (uint32_t)5U; ++_i)
|
||||
t[_i] = (uint128_t)(uint64_t)0U;
|
||||
}
|
||||
{
|
||||
uint128_t b4;
|
||||
uint128_t b0;
|
||||
uint128_t b4_;
|
||||
uint128_t b0_;
|
||||
uint64_t i0;
|
||||
uint64_t i1;
|
||||
uint64_t i0_;
|
||||
uint64_t i1_;
|
||||
Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2);
|
||||
Hacl_Bignum_Fproduct_carry_wide_(t);
|
||||
b4 = t[4U];
|
||||
b0 = t[0U];
|
||||
b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU;
|
||||
b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U);
|
||||
t[4U] = b4_;
|
||||
t[0U] = b0_;
|
||||
Hacl_Bignum_Fproduct_copy_from_wide_(output, t);
|
||||
i0 = output[0U];
|
||||
i1 = output[1U];
|
||||
i0_ = i0 & (uint64_t)0x7ffffffffffffU;
|
||||
i1_ = i1 + (i0 >> (uint32_t)51U);
|
||||
output[0U] = i0_;
|
||||
output[1U] = i1_;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_Fsquare_fsquare__(uint128_t *tmp, uint64_t *output)
|
||||
{
|
||||
uint64_t r0 = output[0U];
|
||||
uint64_t r1 = output[1U];
|
||||
uint64_t r2 = output[2U];
|
||||
uint64_t r3 = output[3U];
|
||||
uint64_t r4 = output[4U];
|
||||
uint64_t d0 = r0 * (uint64_t)2U;
|
||||
uint64_t d1 = r1 * (uint64_t)2U;
|
||||
uint64_t d2 = r2 * (uint64_t)2U * (uint64_t)19U;
|
||||
uint64_t d419 = r4 * (uint64_t)19U;
|
||||
uint64_t d4 = d419 * (uint64_t)2U;
|
||||
uint128_t s0 = (uint128_t)r0 * r0 + (uint128_t)d4 * r1 + (uint128_t)d2 * r3;
|
||||
uint128_t s1 = (uint128_t)d0 * r1 + (uint128_t)d4 * r2 + (uint128_t)(r3 * (uint64_t)19U) * r3;
|
||||
uint128_t s2 = (uint128_t)d0 * r2 + (uint128_t)r1 * r1 + (uint128_t)d4 * r3;
|
||||
uint128_t s3 = (uint128_t)d0 * r3 + (uint128_t)d1 * r2 + (uint128_t)r4 * d419;
|
||||
uint128_t s4 = (uint128_t)d0 * r4 + (uint128_t)d1 * r3 + (uint128_t)r2 * r2;
|
||||
tmp[0U] = s0;
|
||||
tmp[1U] = s1;
|
||||
tmp[2U] = s2;
|
||||
tmp[3U] = s3;
|
||||
tmp[4U] = s4;
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_Fsquare_fsquare_(uint128_t *tmp, uint64_t *output)
|
||||
{
|
||||
uint128_t b4;
|
||||
uint128_t b0;
|
||||
uint128_t b4_;
|
||||
uint128_t b0_;
|
||||
uint64_t i0;
|
||||
uint64_t i1;
|
||||
uint64_t i0_;
|
||||
uint64_t i1_;
|
||||
Hacl_Bignum_Fsquare_fsquare__(tmp, output);
|
||||
Hacl_Bignum_Fproduct_carry_wide_(tmp);
|
||||
b4 = tmp[4U];
|
||||
b0 = tmp[0U];
|
||||
b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU;
|
||||
b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U);
|
||||
tmp[4U] = b4_;
|
||||
tmp[0U] = b0_;
|
||||
Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
|
||||
i0 = output[0U];
|
||||
i1 = output[1U];
|
||||
i0_ = i0 & (uint64_t)0x7ffffffffffffU;
|
||||
i1_ = i1 + (i0 >> (uint32_t)51U);
|
||||
output[0U] = i0_;
|
||||
output[1U] = i1_;
|
||||
}
|
||||
|
||||
static void
|
||||
Hacl_Bignum_Fsquare_fsquare_times_(uint64_t *input, uint128_t *tmp, uint32_t count1)
|
||||
{
|
||||
uint32_t i;
|
||||
Hacl_Bignum_Fsquare_fsquare_(tmp, input);
|
||||
for (i = (uint32_t)1U; i < count1; i = i + (uint32_t)1U)
|
||||
Hacl_Bignum_Fsquare_fsquare_(tmp, input);
|
||||
}
|
||||
|
||||
inline static void
|
||||
Hacl_Bignum_Fsquare_fsquare_times(uint64_t *output, uint64_t *input, uint32_t count1)
|
||||
{
|
||||
KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
|
||||
{
|
||||
uint128_t t[5U];
|
||||
{
|
||||
uint32_t _i;
|
||||
for (_i = 0U; _i < (uint32_t)5U; ++_i)
|
||||
t[_i] = (uint128_t)(uint64_t)0U;
|
||||
}
|
||||
memcpy(output, input, (uint32_t)5U * sizeof input[0U]);
|
||||
Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
|
||||
}
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_Fsquare_fsquare_times_inplace(uint64_t *output, uint32_t count1)
|
||||
{
|
||||
KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
|
||||
{
|
||||
uint128_t t[5U];
|
||||
{
|
||||
uint32_t _i;
|
||||
for (_i = 0U; _i < (uint32_t)5U; ++_i)
|
||||
t[_i] = (uint128_t)(uint64_t)0U;
|
||||
}
|
||||
Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
|
||||
}
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_Crecip_crecip(uint64_t *out, uint64_t *z)
|
||||
{
|
||||
uint64_t buf[20U] = { 0U };
|
||||
uint64_t *a0 = buf;
|
||||
uint64_t *t00 = buf + (uint32_t)5U;
|
||||
uint64_t *b0 = buf + (uint32_t)10U;
|
||||
uint64_t *t01;
|
||||
uint64_t *b1;
|
||||
uint64_t *c0;
|
||||
uint64_t *a;
|
||||
uint64_t *t0;
|
||||
uint64_t *b;
|
||||
uint64_t *c;
|
||||
Hacl_Bignum_Fsquare_fsquare_times(a0, z, (uint32_t)1U);
|
||||
Hacl_Bignum_Fsquare_fsquare_times(t00, a0, (uint32_t)2U);
|
||||
Hacl_Bignum_Fmul_fmul(b0, t00, z);
|
||||
Hacl_Bignum_Fmul_fmul(a0, b0, a0);
|
||||
Hacl_Bignum_Fsquare_fsquare_times(t00, a0, (uint32_t)1U);
|
||||
Hacl_Bignum_Fmul_fmul(b0, t00, b0);
|
||||
Hacl_Bignum_Fsquare_fsquare_times(t00, b0, (uint32_t)5U);
|
||||
t01 = buf + (uint32_t)5U;
|
||||
b1 = buf + (uint32_t)10U;
|
||||
c0 = buf + (uint32_t)15U;
|
||||
Hacl_Bignum_Fmul_fmul(b1, t01, b1);
|
||||
Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)10U);
|
||||
Hacl_Bignum_Fmul_fmul(c0, t01, b1);
|
||||
Hacl_Bignum_Fsquare_fsquare_times(t01, c0, (uint32_t)20U);
|
||||
Hacl_Bignum_Fmul_fmul(t01, t01, c0);
|
||||
Hacl_Bignum_Fsquare_fsquare_times_inplace(t01, (uint32_t)10U);
|
||||
Hacl_Bignum_Fmul_fmul(b1, t01, b1);
|
||||
Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)50U);
|
||||
a = buf;
|
||||
t0 = buf + (uint32_t)5U;
|
||||
b = buf + (uint32_t)10U;
|
||||
c = buf + (uint32_t)15U;
|
||||
Hacl_Bignum_Fmul_fmul(c, t0, b);
|
||||
Hacl_Bignum_Fsquare_fsquare_times(t0, c, (uint32_t)100U);
|
||||
Hacl_Bignum_Fmul_fmul(t0, t0, c);
|
||||
Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)50U);
|
||||
Hacl_Bignum_Fmul_fmul(t0, t0, b);
|
||||
Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)5U);
|
||||
Hacl_Bignum_Fmul_fmul(out, t0, a);
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_fsum(uint64_t *a, uint64_t *b)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
|
||||
{
|
||||
uint64_t xi = a[i];
|
||||
uint64_t yi = b[i];
|
||||
a[i] = xi + yi;
|
||||
}
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_fdifference(uint64_t *a, uint64_t *b)
|
||||
{
|
||||
uint64_t tmp[5U] = { 0U };
|
||||
uint64_t b0;
|
||||
uint64_t b1;
|
||||
uint64_t b2;
|
||||
uint64_t b3;
|
||||
uint64_t b4;
|
||||
memcpy(tmp, b, (uint32_t)5U * sizeof b[0U]);
|
||||
b0 = tmp[0U];
|
||||
b1 = tmp[1U];
|
||||
b2 = tmp[2U];
|
||||
b3 = tmp[3U];
|
||||
b4 = tmp[4U];
|
||||
tmp[0U] = b0 + (uint64_t)0x3fffffffffff68U;
|
||||
tmp[1U] = b1 + (uint64_t)0x3ffffffffffff8U;
|
||||
tmp[2U] = b2 + (uint64_t)0x3ffffffffffff8U;
|
||||
tmp[3U] = b3 + (uint64_t)0x3ffffffffffff8U;
|
||||
tmp[4U] = b4 + (uint64_t)0x3ffffffffffff8U;
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
|
||||
{
|
||||
uint64_t xi = a[i];
|
||||
uint64_t yi = tmp[i];
|
||||
a[i] = yi - xi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_fscalar(uint64_t *output, uint64_t *b, uint64_t s)
|
||||
{
|
||||
KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
|
||||
{
|
||||
uint128_t tmp[5U];
|
||||
{
|
||||
uint32_t _i;
|
||||
for (_i = 0U; _i < (uint32_t)5U; ++_i)
|
||||
tmp[_i] = (uint128_t)(uint64_t)0U;
|
||||
}
|
||||
{
|
||||
uint128_t b4;
|
||||
uint128_t b0;
|
||||
uint128_t b4_;
|
||||
uint128_t b0_;
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
|
||||
{
|
||||
uint64_t xi = b[i];
|
||||
tmp[i] = (uint128_t)xi * s;
|
||||
}
|
||||
}
|
||||
Hacl_Bignum_Fproduct_carry_wide_(tmp);
|
||||
b4 = tmp[4U];
|
||||
b0 = tmp[0U];
|
||||
b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU;
|
||||
b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U);
|
||||
tmp[4U] = b4_;
|
||||
tmp[0U] = b0_;
|
||||
Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_fmul(uint64_t *output, uint64_t *a, uint64_t *b)
|
||||
{
|
||||
Hacl_Bignum_Fmul_fmul(output, a, b);
|
||||
}
|
||||
|
||||
inline static void Hacl_Bignum_crecip(uint64_t *output, uint64_t *input)
|
||||
{
|
||||
Hacl_Bignum_Crecip_crecip(output, input);
|
||||
}
|
||||
|
||||
static void
|
||||
Hacl_EC_Point_swap_conditional_step(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
|
||||
{
|
||||
uint32_t i = ctr - (uint32_t)1U;
|
||||
uint64_t ai = a[i];
|
||||
uint64_t bi = b[i];
|
||||
uint64_t x = swap1 & (ai ^ bi);
|
||||
uint64_t ai1 = ai ^ x;
|
||||
uint64_t bi1 = bi ^ x;
|
||||
a[i] = ai1;
|
||||
b[i] = bi1;
|
||||
}
|
||||
|
||||
static void
|
||||
Hacl_EC_Point_swap_conditional_(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
|
||||
{
|
||||
if (!(ctr == (uint32_t)0U))
|
||||
{
|
||||
uint32_t i;
|
||||
Hacl_EC_Point_swap_conditional_step(a, b, swap1, ctr);
|
||||
i = ctr - (uint32_t)1U;
|
||||
Hacl_EC_Point_swap_conditional_(a, b, swap1, i);
|
||||
}
|
||||
}
|
||||
|
||||
static void Hacl_EC_Point_swap_conditional(uint64_t *a, uint64_t *b, uint64_t iswap)
|
||||
{
|
||||
uint64_t swap1 = (uint64_t)0U - iswap;
|
||||
Hacl_EC_Point_swap_conditional_(a, b, swap1, (uint32_t)5U);
|
||||
Hacl_EC_Point_swap_conditional_(a + (uint32_t)5U, b + (uint32_t)5U, swap1, (uint32_t)5U);
|
||||
}
|
||||
|
||||
static void Hacl_EC_Point_copy(uint64_t *output, uint64_t *input)
|
||||
{
|
||||
memcpy(output, input, (uint32_t)5U * sizeof input[0U]);
|
||||
memcpy(output + (uint32_t)5U,
|
||||
input + (uint32_t)5U,
|
||||
(uint32_t)5U * sizeof (input + (uint32_t)5U)[0U]);
|
||||
}
|
||||
|
||||
static void Hacl_EC_Format_fexpand(uint64_t *output, uint8_t *input)
|
||||
{
|
||||
uint64_t i0 = load64_le(input);
|
||||
uint8_t *x00 = input + (uint32_t)6U;
|
||||
uint64_t i1 = load64_le(x00);
|
||||
uint8_t *x01 = input + (uint32_t)12U;
|
||||
uint64_t i2 = load64_le(x01);
|
||||
uint8_t *x02 = input + (uint32_t)19U;
|
||||
uint64_t i3 = load64_le(x02);
|
||||
uint8_t *x0 = input + (uint32_t)24U;
|
||||
uint64_t i4 = load64_le(x0);
|
||||
uint64_t output0 = i0 & (uint64_t)0x7ffffffffffffU;
|
||||
uint64_t output1 = i1 >> (uint32_t)3U & (uint64_t)0x7ffffffffffffU;
|
||||
uint64_t output2 = i2 >> (uint32_t)6U & (uint64_t)0x7ffffffffffffU;
|
||||
uint64_t output3 = i3 >> (uint32_t)1U & (uint64_t)0x7ffffffffffffU;
|
||||
uint64_t output4 = i4 >> (uint32_t)12U & (uint64_t)0x7ffffffffffffU;
|
||||
output[0U] = output0;
|
||||
output[1U] = output1;
|
||||
output[2U] = output2;
|
||||
output[3U] = output3;
|
||||
output[4U] = output4;
|
||||
}
|
||||
|
||||
static void Hacl_EC_Format_fcontract_first_carry_pass(uint64_t *input)
|
||||
{
|
||||
uint64_t t0 = input[0U];
|
||||
uint64_t t1 = input[1U];
|
||||
uint64_t t2 = input[2U];
|
||||
uint64_t t3 = input[3U];
|
||||
uint64_t t4 = input[4U];
|
||||
uint64_t t1_ = t1 + (t0 >> (uint32_t)51U);
|
||||
uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU;
|
||||
uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U);
|
||||
uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU;
|
||||
uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U);
|
||||
uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU;
|
||||
uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U);
|
||||
uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU;
|
||||
input[0U] = t0_;
|
||||
input[1U] = t1__;
|
||||
input[2U] = t2__;
|
||||
input[3U] = t3__;
|
||||
input[4U] = t4_;
|
||||
}
|
||||
|
||||
static void Hacl_EC_Format_fcontract_first_carry_full(uint64_t *input)
|
||||
{
|
||||
Hacl_EC_Format_fcontract_first_carry_pass(input);
|
||||
Hacl_Bignum_Modulo_carry_top(input);
|
||||
}
|
||||
|
||||
static void Hacl_EC_Format_fcontract_second_carry_pass(uint64_t *input)
|
||||
{
|
||||
uint64_t t0 = input[0U];
|
||||
uint64_t t1 = input[1U];
|
||||
uint64_t t2 = input[2U];
|
||||
uint64_t t3 = input[3U];
|
||||
uint64_t t4 = input[4U];
|
||||
uint64_t t1_ = t1 + (t0 >> (uint32_t)51U);
|
||||
uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU;
|
||||
uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U);
|
||||
uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU;
|
||||
uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U);
|
||||
uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU;
|
||||
uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U);
|
||||
uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU;
|
||||
input[0U] = t0_;
|
||||
input[1U] = t1__;
|
||||
input[2U] = t2__;
|
||||
input[3U] = t3__;
|
||||
input[4U] = t4_;
|
||||
}
|
||||
|
||||
static void Hacl_EC_Format_fcontract_second_carry_full(uint64_t *input)
|
||||
{
|
||||
uint64_t i0;
|
||||
uint64_t i1;
|
||||
uint64_t i0_;
|
||||
uint64_t i1_;
|
||||
Hacl_EC_Format_fcontract_second_carry_pass(input);
|
||||
Hacl_Bignum_Modulo_carry_top(input);
|
||||
i0 = input[0U];
|
||||
i1 = input[1U];
|
||||
i0_ = i0 & (uint64_t)0x7ffffffffffffU;
|
||||
i1_ = i1 + (i0 >> (uint32_t)51U);
|
||||
input[0U] = i0_;
|
||||
input[1U] = i1_;
|
||||
}
|
||||
|
||||
static void Hacl_EC_Format_fcontract_trim(uint64_t *input)
|
||||
{
|
||||
uint64_t a0 = input[0U];
|
||||
uint64_t a1 = input[1U];
|
||||
uint64_t a2 = input[2U];
|
||||
uint64_t a3 = input[3U];
|
||||
uint64_t a4 = input[4U];
|
||||
uint64_t mask0 = FStar_UInt64_gte_mask(a0, (uint64_t)0x7ffffffffffedU);
|
||||
uint64_t mask1 = FStar_UInt64_eq_mask(a1, (uint64_t)0x7ffffffffffffU);
|
||||
uint64_t mask2 = FStar_UInt64_eq_mask(a2, (uint64_t)0x7ffffffffffffU);
|
||||
uint64_t mask3 = FStar_UInt64_eq_mask(a3, (uint64_t)0x7ffffffffffffU);
|
||||
uint64_t mask4 = FStar_UInt64_eq_mask(a4, (uint64_t)0x7ffffffffffffU);
|
||||
uint64_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
|
||||
uint64_t a0_ = a0 - ((uint64_t)0x7ffffffffffedU & mask);
|
||||
uint64_t a1_ = a1 - ((uint64_t)0x7ffffffffffffU & mask);
|
||||
uint64_t a2_ = a2 - ((uint64_t)0x7ffffffffffffU & mask);
|
||||
uint64_t a3_ = a3 - ((uint64_t)0x7ffffffffffffU & mask);
|
||||
uint64_t a4_ = a4 - ((uint64_t)0x7ffffffffffffU & mask);
|
||||
input[0U] = a0_;
|
||||
input[1U] = a1_;
|
||||
input[2U] = a2_;
|
||||
input[3U] = a3_;
|
||||
input[4U] = a4_;
|
||||
}
|
||||
|
||||
static void Hacl_EC_Format_fcontract_store(uint8_t *output, uint64_t *input)
|
||||
{
|
||||
uint64_t t0 = input[0U];
|
||||
uint64_t t1 = input[1U];
|
||||
uint64_t t2 = input[2U];
|
||||
uint64_t t3 = input[3U];
|
||||
uint64_t t4 = input[4U];
|
||||
uint64_t o0 = t1 << (uint32_t)51U | t0;
|
||||
uint64_t o1 = t2 << (uint32_t)38U | t1 >> (uint32_t)13U;
|
||||
uint64_t o2 = t3 << (uint32_t)25U | t2 >> (uint32_t)26U;
|
||||
uint64_t o3 = t4 << (uint32_t)12U | t3 >> (uint32_t)39U;
|
||||
uint8_t *b0 = output;
|
||||
uint8_t *b1 = output + (uint32_t)8U;
|
||||
uint8_t *b2 = output + (uint32_t)16U;
|
||||
uint8_t *b3 = output + (uint32_t)24U;
|
||||
store64_le(b0, o0);
|
||||
store64_le(b1, o1);
|
||||
store64_le(b2, o2);
|
||||
store64_le(b3, o3);
|
||||
}
|
||||
|
||||
static void Hacl_EC_Format_fcontract(uint8_t *output, uint64_t *input)
|
||||
{
|
||||
Hacl_EC_Format_fcontract_first_carry_full(input);
|
||||
Hacl_EC_Format_fcontract_second_carry_full(input);
|
||||
Hacl_EC_Format_fcontract_trim(input);
|
||||
Hacl_EC_Format_fcontract_store(output, input);
|
||||
}
|
||||
|
||||
static void Hacl_EC_Format_scalar_of_point(uint8_t *scalar, uint64_t *point)
|
||||
{
|
||||
uint64_t *x = point;
|
||||
uint64_t *z = point + (uint32_t)5U;
|
||||
uint64_t buf[10U] = { 0U };
|
||||
uint64_t *zmone = buf;
|
||||
uint64_t *sc = buf + (uint32_t)5U;
|
||||
Hacl_Bignum_crecip(zmone, z);
|
||||
Hacl_Bignum_fmul(sc, x, zmone);
|
||||
Hacl_EC_Format_fcontract(scalar, sc);
|
||||
}
|
||||
|
||||
static void
|
||||
Hacl_EC_AddAndDouble_fmonty(
|
||||
uint64_t *pp,
|
||||
uint64_t *ppq,
|
||||
uint64_t *p,
|
||||
uint64_t *pq,
|
||||
uint64_t *qmqp
|
||||
)
|
||||
{
|
||||
uint64_t *qx = qmqp;
|
||||
uint64_t *x2 = pp;
|
||||
uint64_t *z2 = pp + (uint32_t)5U;
|
||||
uint64_t *x3 = ppq;
|
||||
uint64_t *z3 = ppq + (uint32_t)5U;
|
||||
uint64_t *x = p;
|
||||
uint64_t *z = p + (uint32_t)5U;
|
||||
uint64_t *xprime = pq;
|
||||
uint64_t *zprime = pq + (uint32_t)5U;
|
||||
uint64_t buf[40U] = { 0U };
|
||||
uint64_t *origx = buf;
|
||||
uint64_t *origxprime0 = buf + (uint32_t)5U;
|
||||
uint64_t *xxprime0 = buf + (uint32_t)25U;
|
||||
uint64_t *zzprime0 = buf + (uint32_t)30U;
|
||||
uint64_t *origxprime;
|
||||
uint64_t *xx0;
|
||||
uint64_t *zz0;
|
||||
uint64_t *xxprime;
|
||||
uint64_t *zzprime;
|
||||
uint64_t *zzzprime;
|
||||
uint64_t *zzz;
|
||||
uint64_t *xx;
|
||||
uint64_t *zz;
|
||||
uint64_t scalar;
|
||||
memcpy(origx, x, (uint32_t)5U * sizeof x[0U]);
|
||||
Hacl_Bignum_fsum(x, z);
|
||||
Hacl_Bignum_fdifference(z, origx);
|
||||
memcpy(origxprime0, xprime, (uint32_t)5U * sizeof xprime[0U]);
|
||||
Hacl_Bignum_fsum(xprime, zprime);
|
||||
Hacl_Bignum_fdifference(zprime, origxprime0);
|
||||
Hacl_Bignum_fmul(xxprime0, xprime, z);
|
||||
Hacl_Bignum_fmul(zzprime0, x, zprime);
|
||||
origxprime = buf + (uint32_t)5U;
|
||||
xx0 = buf + (uint32_t)15U;
|
||||
zz0 = buf + (uint32_t)20U;
|
||||
xxprime = buf + (uint32_t)25U;
|
||||
zzprime = buf + (uint32_t)30U;
|
||||
zzzprime = buf + (uint32_t)35U;
|
||||
memcpy(origxprime, xxprime, (uint32_t)5U * sizeof xxprime[0U]);
|
||||
Hacl_Bignum_fsum(xxprime, zzprime);
|
||||
Hacl_Bignum_fdifference(zzprime, origxprime);
|
||||
Hacl_Bignum_Fsquare_fsquare_times(x3, xxprime, (uint32_t)1U);
|
||||
Hacl_Bignum_Fsquare_fsquare_times(zzzprime, zzprime, (uint32_t)1U);
|
||||
Hacl_Bignum_fmul(z3, zzzprime, qx);
|
||||
Hacl_Bignum_Fsquare_fsquare_times(xx0, x, (uint32_t)1U);
|
||||
Hacl_Bignum_Fsquare_fsquare_times(zz0, z, (uint32_t)1U);
|
||||
zzz = buf + (uint32_t)10U;
|
||||
xx = buf + (uint32_t)15U;
|
||||
zz = buf + (uint32_t)20U;
|
||||
Hacl_Bignum_fmul(x2, xx, zz);
|
||||
Hacl_Bignum_fdifference(zz, xx);
|
||||
scalar = (uint64_t)121665U;
|
||||
Hacl_Bignum_fscalar(zzz, zz, scalar);
|
||||
Hacl_Bignum_fsum(zzz, xx);
|
||||
Hacl_Bignum_fmul(z2, zzz, zz);
|
||||
}
|
||||
|
||||
static void
|
||||
Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(
|
||||
uint64_t *nq,
|
||||
uint64_t *nqpq,
|
||||
uint64_t *nq2,
|
||||
uint64_t *nqpq2,
|
||||
uint64_t *q,
|
||||
uint8_t byt
|
||||
)
|
||||
{
|
||||
uint64_t bit0 = (uint64_t)(byt >> (uint32_t)7U);
|
||||
uint64_t bit;
|
||||
Hacl_EC_Point_swap_conditional(nq, nqpq, bit0);
|
||||
Hacl_EC_AddAndDouble_fmonty(nq2, nqpq2, nq, nqpq, q);
|
||||
bit = (uint64_t)(byt >> (uint32_t)7U);
|
||||
Hacl_EC_Point_swap_conditional(nq2, nqpq2, bit);
|
||||
}
|
||||
|
||||
static void
|
||||
Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(
|
||||
uint64_t *nq,
|
||||
uint64_t *nqpq,
|
||||
uint64_t *nq2,
|
||||
uint64_t *nqpq2,
|
||||
uint64_t *q,
|
||||
uint8_t byt
|
||||
)
|
||||
{
|
||||
uint8_t byt1;
|
||||
Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt);
|
||||
byt1 = byt << (uint32_t)1U;
|
||||
Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1);
|
||||
}
|
||||
|
||||
static void
|
||||
Hacl_EC_Ladder_SmallLoop_cmult_small_loop(
|
||||
uint64_t *nq,
|
||||
uint64_t *nqpq,
|
||||
uint64_t *nq2,
|
||||
uint64_t *nqpq2,
|
||||
uint64_t *q,
|
||||
uint8_t byt,
|
||||
uint32_t i
|
||||
)
|
||||
{
|
||||
if (!(i == (uint32_t)0U))
|
||||
{
|
||||
uint32_t i_ = i - (uint32_t)1U;
|
||||
uint8_t byt_;
|
||||
Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(nq, nqpq, nq2, nqpq2, q, byt);
|
||||
byt_ = byt << (uint32_t)2U;
|
||||
Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byt_, i_);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
Hacl_EC_Ladder_BigLoop_cmult_big_loop(
|
||||
uint8_t *n1,
|
||||
uint64_t *nq,
|
||||
uint64_t *nqpq,
|
||||
uint64_t *nq2,
|
||||
uint64_t *nqpq2,
|
||||
uint64_t *q,
|
||||
uint32_t i
|
||||
)
|
||||
{
|
||||
if (!(i == (uint32_t)0U))
|
||||
{
|
||||
uint32_t i1 = i - (uint32_t)1U;
|
||||
uint8_t byte = n1[i1];
|
||||
Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byte, (uint32_t)4U);
|
||||
Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, i1);
|
||||
}
|
||||
}
|
||||
|
||||
static void Hacl_EC_Ladder_cmult(uint64_t *result, uint8_t *n1, uint64_t *q)
|
||||
{
|
||||
uint64_t point_buf[40U] = { 0U };
|
||||
uint64_t *nq = point_buf;
|
||||
uint64_t *nqpq = point_buf + (uint32_t)10U;
|
||||
uint64_t *nq2 = point_buf + (uint32_t)20U;
|
||||
uint64_t *nqpq2 = point_buf + (uint32_t)30U;
|
||||
Hacl_EC_Point_copy(nqpq, q);
|
||||
nq[0U] = (uint64_t)1U;
|
||||
Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, (uint32_t)32U);
|
||||
Hacl_EC_Point_copy(result, nq);
|
||||
}
|
||||
|
||||
void Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint)
|
||||
{
|
||||
uint64_t buf0[10U] = { 0U };
|
||||
uint64_t *x0 = buf0;
|
||||
uint64_t *z = buf0 + (uint32_t)5U;
|
||||
uint64_t *q;
|
||||
Hacl_EC_Format_fexpand(x0, basepoint);
|
||||
z[0U] = (uint64_t)1U;
|
||||
q = buf0;
|
||||
{
|
||||
uint8_t e[32U] = { 0U };
|
||||
uint8_t e0;
|
||||
uint8_t e31;
|
||||
uint8_t e01;
|
||||
uint8_t e311;
|
||||
uint8_t e312;
|
||||
uint8_t *scalar;
|
||||
memcpy(e, secret, (uint32_t)32U * sizeof secret[0U]);
|
||||
e0 = e[0U];
|
||||
e31 = e[31U];
|
||||
e01 = e0 & (uint8_t)248U;
|
||||
e311 = e31 & (uint8_t)127U;
|
||||
e312 = e311 | (uint8_t)64U;
|
||||
e[0U] = e01;
|
||||
e[31U] = e312;
|
||||
scalar = e;
|
||||
{
|
||||
uint64_t buf[15U] = { 0U };
|
||||
uint64_t *nq = buf;
|
||||
uint64_t *x = nq;
|
||||
x[0U] = (uint64_t)1U;
|
||||
Hacl_EC_Ladder_cmult(nq, scalar, q);
|
||||
Hacl_EC_Format_scalar_of_point(mypublic, nq);
|
||||
}
|
||||
}
|
||||
}
|
294
third_party/mbedtls/test/secp384r1_test.c
vendored
Normal file
294
third_party/mbedtls/test/secp384r1_test.c
vendored
Normal file
|
@ -0,0 +1,294 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/rand/rand.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/testlib/ezbench.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
#include "third_party/mbedtls/bignum.h"
|
||||
#include "third_party/mbedtls/ecp.h"
|
||||
#include "third_party/mbedtls/ecp_internal.h"
|
||||
#include "third_party/mbedtls/math.h"
|
||||
#ifdef MBEDTLS_ECP_C
|
||||
|
||||
int ecp_mod_p384_old(mbedtls_mpi *);
|
||||
|
||||
int GetEntropy(void *c, unsigned char *p, size_t n) {
|
||||
rngset(p, n, rand64, -1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
TEST(secp384r1, testIsTheSame) {
|
||||
int i;
|
||||
mbedtls_mpi A, B;
|
||||
mbedtls_mpi_init(&A);
|
||||
mbedtls_mpi_init(&B);
|
||||
mbedtls_mpi_fill_random(&A, 12 * 8, GetEntropy, 0);
|
||||
mbedtls_mpi_copy(&B, &A);
|
||||
ecp_mod_p384(&A);
|
||||
ecp_mod_p384_old(&B);
|
||||
for (i = 0; i < 1000; ++i) {
|
||||
if (memcmp(A.p, B.p, 12 * 8)) {
|
||||
for (i = 0; i < 12; ++i) {
|
||||
printf("0x%016lx vs. 0x%016lx %d\n", A.p[i], B.p[i], A.p[i] == B.p[i]);
|
||||
}
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
mbedtls_mpi_free(&B);
|
||||
mbedtls_mpi_free(&A);
|
||||
}
|
||||
|
||||
static inline bool mbedtls_p384_gte(uint64_t p[7]) {
|
||||
return (((int64_t)p[6] > 0 ||
|
||||
(p[5] > 0xffffffffffffffff ||
|
||||
(p[5] == 0xffffffffffffffff &&
|
||||
(p[4] > 0xffffffffffffffff ||
|
||||
(p[4] == 0xffffffffffffffff &&
|
||||
(p[3] > 0xffffffffffffffff ||
|
||||
(p[3] == 0xffffffffffffffff &&
|
||||
(p[2] > 0xfffffffffffffffe ||
|
||||
(p[2] == 0xfffffffffffffffe &&
|
||||
(p[1] > 0xffffffff00000000 ||
|
||||
(p[1] == 0xffffffff00000000 &&
|
||||
(p[0] > 0x00000000ffffffff ||
|
||||
(p[0] == 0x00000000ffffffff))))))))))))));
|
||||
}
|
||||
|
||||
static inline void mbedtls_p384_gro(uint64_t p[7]) {
|
||||
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
|
||||
asm("addq\t%1,%0\n\t"
|
||||
"adcq\t%2,8+%0\n\t"
|
||||
"adcq\t%3,16+%0\n\t"
|
||||
"adcq\t%4,24+%0\n\t"
|
||||
"adcq\t%4,32+%0\n\t"
|
||||
"adcq\t%4,40+%0\n\t"
|
||||
"adcq\t$0,48+%0"
|
||||
: "+o"(*p)
|
||||
: "r"(0x00000000ffffffffl), "r"(0xffffffff00000000),
|
||||
"i"(0xfffffffffffffffel), "i"(0xffffffffffffffff)
|
||||
: "memory", "cc");
|
||||
#else
|
||||
uint64_t c;
|
||||
ADC(p[0], p[0], 0x00000000ffffffff, 0, c);
|
||||
ADC(p[1], p[1], 0xffffffff00000000, c, c);
|
||||
ADC(p[2], p[2], 0xfffffffffffffffe, c, c);
|
||||
ADC(p[3], p[3], 0xffffffffffffffff, c, c);
|
||||
ADC(p[4], p[4], 0xffffffffffffffff, c, c);
|
||||
ADC(p[5], p[5], 0xffffffffffffffff, c, c);
|
||||
ADC(p[6], p[6], 0, c, c);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mbedtls_p384_red(uint64_t p[7]) {
|
||||
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
|
||||
asm("subq\t%1,%0\n\t"
|
||||
"sbbq\t%2,8+%0\n\t"
|
||||
"sbbq\t%3,16+%0\n\t"
|
||||
"sbbq\t%4,24+%0\n\t"
|
||||
"sbbq\t%4,32+%0\n\t"
|
||||
"sbbq\t%4,40+%0\n\t"
|
||||
"sbbq\t$0,48+%0"
|
||||
: "+o"(*p)
|
||||
: "r"(0x00000000ffffffffl), "r"(0xffffffff00000000),
|
||||
"i"(0xfffffffffffffffel), "i"(0xffffffffffffffff)
|
||||
: "memory", "cc");
|
||||
#else
|
||||
uint64_t c;
|
||||
SBB(p[0], p[0], 0x00000000ffffffff, 0, c);
|
||||
SBB(p[1], p[1], 0xffffffff00000000, c, c);
|
||||
SBB(p[2], p[2], 0xfffffffffffffffe, c, c);
|
||||
SBB(p[3], p[3], 0xffffffffffffffff, c, c);
|
||||
SBB(p[4], p[4], 0xffffffffffffffff, c, c);
|
||||
SBB(p[5], p[5], 0xffffffffffffffff, c, c);
|
||||
SBB(p[6], p[6], 0, c, c);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mbedtls_p384_rum(uint64_t p[7]) {
|
||||
while (mbedtls_p384_gte(p)) mbedtls_p384_red(p);
|
||||
}
|
||||
|
||||
static inline void mbedtls_p384_mod(uint64_t X[12]) {
|
||||
secp384r1(X);
|
||||
if ((int64_t)X[6] < 0) {
|
||||
do {
|
||||
mbedtls_p384_gro(X);
|
||||
} while ((int64_t)X[6] < 0);
|
||||
} else {
|
||||
while (mbedtls_p384_gte(X)) {
|
||||
mbedtls_p384_red(X);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(secp384r1, needsDownwardCorrection) {
|
||||
int i;
|
||||
uint64_t P[6] = {
|
||||
0x00000000ffffffff, //
|
||||
0xffffffff00000000, //
|
||||
0xfffffffffffffffe, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
};
|
||||
uint64_t X[12] = {
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
};
|
||||
uint64_t W[12] /* == X mod P */ = {
|
||||
0xfffffffe00000000, //
|
||||
0x0000000200000000, //
|
||||
0xfffffffe00000000, //
|
||||
0x0000000200000000, //
|
||||
0x0000000000000001, //
|
||||
};
|
||||
mbedtls_p384_mod(X);
|
||||
if (memcmp(W, X, 12 * 8)) {
|
||||
for (i = 0; i < 12; ++i) {
|
||||
printf("0x%016lx vs. 0x%016lx %d\n", W[i], X[i], W[i] == X[i]);
|
||||
}
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(secp384r1, needsUpwardCorrection) {
|
||||
int i;
|
||||
uint64_t P[6] = {
|
||||
0x00000000ffffffff, //
|
||||
0xffffffff00000000, //
|
||||
0xfffffffffffffffe, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
0xffffffffffffffff, //
|
||||
};
|
||||
uint64_t X[12] = {
|
||||
0x0000000000000000, //
|
||||
0x0000000000000000, //
|
||||
0x0000000000000000, //
|
||||
0x0000000000000000, //
|
||||
0x0000000000000000, //
|
||||
0x0000000000000000, //
|
||||
0x0000000000000000, //
|
||||
0x0000000000000000, //
|
||||
0x0000000000000000, //
|
||||
0x0000000000000000, //
|
||||
0x0000000000000000, //
|
||||
0x00000000ffffffff, //
|
||||
};
|
||||
uint64_t W[12] /* == X mod P */ = {
|
||||
0xffffffffffffffff, //
|
||||
0x0000000000000000, //
|
||||
0xfffffffefffffffd, //
|
||||
0x0000000100000000, //
|
||||
0x0000000000000000, //
|
||||
0x00000001ffffffff, //
|
||||
};
|
||||
mbedtls_p384_mod(X);
|
||||
if (memcmp(W, X, 12 * 8)) {
|
||||
for (i = 0; i < 12; ++i) {
|
||||
printf("0x%016lx vs. 0x%016lx %d\n", W[i], X[i], W[i] == X[i]);
|
||||
}
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
BENCH(secp384r1, bench) {
|
||||
mbedtls_mpi A;
|
||||
mbedtls_mpi_init(&A);
|
||||
mbedtls_mpi_fill_random(&A, 12 * 8, GetEntropy, 0);
|
||||
EZBENCH2("secp384r1", donothing, secp384r1(A.p));
|
||||
EZBENCH2("ecp_mod_p384", donothing, ecp_mod_p384(&A));
|
||||
EZBENCH2("ecp_mod_p384_old", donothing, ecp_mod_p384_old(&A));
|
||||
mbedtls_mpi_free(&A);
|
||||
}
|
||||
|
||||
void mbedtls_p384_shl_a(uint64_t p[7]) {
|
||||
asm("shlq\t%0\n\t"
|
||||
"rclq\t8+%0\n\t"
|
||||
"rclq\t16+%0\n\t"
|
||||
"rclq\t24+%0\n\t"
|
||||
"rclq\t32+%0\n\t"
|
||||
"rclq\t40+%0\n\t"
|
||||
"rclq\t48+%0\n\t"
|
||||
: "+o"(*p)
|
||||
: /* no inputs */
|
||||
: "memory", "cc");
|
||||
mbedtls_p384_rum(p);
|
||||
}
|
||||
|
||||
void mbedtls_p384_shl_b(uint64_t p[7]) {
|
||||
p[6] = p[5] >> 63;
|
||||
p[5] = p[5] << 1 | p[4] >> 63;
|
||||
p[4] = p[4] << 1 | p[3] >> 63;
|
||||
p[3] = p[3] << 1 | p[2] >> 63;
|
||||
p[2] = p[2] << 1 | p[1] >> 63;
|
||||
p[1] = p[1] << 1 | p[0] >> 63;
|
||||
p[0] = p[0] << 1;
|
||||
mbedtls_p384_rum(p);
|
||||
}
|
||||
|
||||
BENCH(shl, bench) {
|
||||
uint64_t A[7] = {0};
|
||||
EZBENCH2("mbedtls_p384_shl_a", donothing, mbedtls_p384_shl_a(A));
|
||||
EZBENCH2("mbedtls_p384_shl_b", donothing, mbedtls_p384_shl_b(A));
|
||||
}
|
||||
|
||||
void mbedtls_p384_red_a(uint64_t p[7]) {
|
||||
asm("subq\t%1,%0\n\t"
|
||||
"sbbq\t%2,8+%0\n\t"
|
||||
"sbbq\t%3,16+%0\n\t"
|
||||
"sbbq\t%4,24+%0\n\t"
|
||||
"sbbq\t%4,32+%0\n\t"
|
||||
"sbbq\t%4,40+%0\n\t"
|
||||
"sbbq\t$0,48+%0"
|
||||
: "+o"(*p)
|
||||
: "r"(0x00000000ffffffffl), "r"(0xffffffff00000000),
|
||||
"i"(0xfffffffffffffffel), "i"(0xffffffffffffffff)
|
||||
: "memory", "cc");
|
||||
}
|
||||
|
||||
void mbedtls_p384_red_b(uint64_t p[7]) {
|
||||
uint64_t c;
|
||||
SBB(p[0], p[0], 0x00000000ffffffff, 0, c);
|
||||
SBB(p[1], p[1], 0xffffffff00000000, c, c);
|
||||
SBB(p[2], p[2], 0xfffffffffffffffe, c, c);
|
||||
SBB(p[3], p[3], 0xffffffffffffffff, c, c);
|
||||
SBB(p[4], p[4], 0xffffffffffffffff, c, c);
|
||||
SBB(p[5], p[5], 0xffffffffffffffff, c, c);
|
||||
SBB(p[6], p[6], 0, c, c);
|
||||
}
|
||||
|
||||
BENCH(red, bench) {
|
||||
uint64_t A[7] = {0};
|
||||
EZBENCH2("mbedtls_p384_red_a", donothing, mbedtls_p384_red_a(A));
|
||||
EZBENCH2("mbedtls_p384_red_b", donothing, mbedtls_p384_red_b(A));
|
||||
}
|
||||
|
||||
#endif /* MBEDTLS_ECP_C */
|
23
third_party/mbedtls/test/test.mk
vendored
23
third_party/mbedtls/test/test.mk
vendored
|
@ -78,7 +78,9 @@ THIRD_PARTY_MBEDTLS_TEST_COMS = \
|
|||
o/$(MODE)/third_party/mbedtls/test/test_suite_timing.com \
|
||||
o/$(MODE)/third_party/mbedtls/test/test_suite_version.com \
|
||||
o/$(MODE)/third_party/mbedtls/test/test_suite_x509parse.com \
|
||||
o/$(MODE)/third_party/mbedtls/test/test_suite_x509write.com
|
||||
o/$(MODE)/third_party/mbedtls/test/test_suite_x509write.com \
|
||||
o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com \
|
||||
o/$(MODE)/third_party/mbedtls/test/everest_test.com
|
||||
|
||||
THIRD_PARTY_MBEDTLS_TEST_TESTS = \
|
||||
$(THIRD_PARTY_MBEDTLS_TEST_COMS:%=%.ok)
|
||||
|
@ -1340,3 +1342,22 @@ o/$(MODE)/third_party/mbedtls/test/test_suite_x509write.com.dbg: \
|
|||
$(CRT) \
|
||||
$(APE)
|
||||
@$(APELINK)
|
||||
|
||||
o/$(MODE)/third_party/mbedtls/test/everest_test.com: o/$(MODE)/third_party/mbedtls/test/everest_test.com.dbg
|
||||
o/$(MODE)/third_party/mbedtls/test/everest_test.com.dbg: \
|
||||
$(THIRD_PARTY_MBEDTLS_TEST_DEPS) \
|
||||
o/$(MODE)/third_party/mbedtls/test/everest_test.o \
|
||||
o/$(MODE)/third_party/mbedtls/test/everest_unravaged.o \
|
||||
$(LIBC_TESTMAIN) \
|
||||
$(CRT) \
|
||||
$(APE)
|
||||
@$(APELINK)
|
||||
|
||||
o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com: o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com.dbg
|
||||
o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com.dbg: \
|
||||
$(THIRD_PARTY_MBEDTLS_TEST_DEPS) \
|
||||
o/$(MODE)/third_party/mbedtls/test/secp384r1_test.o \
|
||||
$(LIBC_TESTMAIN) \
|
||||
$(CRT) \
|
||||
$(APE)
|
||||
@$(APELINK)
|
||||
|
|
Loading…
Reference in a new issue