diff --git a/libc/nexgen32e/adc.S b/libc/nexgen32e/adc.S deleted file mode 100644 index d58f7089b..000000000 --- a/libc/nexgen32e/adc.S +++ /dev/null @@ -1,39 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2021 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Computes C = A + B -// -// @param rdi is C -// @param rsi is A -// @param rdx is B -// @param rcx is number of additions -// @return al has carry -adc: .leafprologue - test %ecx,%ecx - jz 1f - xor %r9d,%r9d -0: mov (%rsi,%r9,8),%rax - adc (%rdx,%r9,8),%rax - mov %rax,(%rdi,%r9,8) - inc %r9d - loop 0b -1: setb %al - .leafepilogue - .endfn adc,globl diff --git a/libc/nexgen32e/mul4x4adx.S b/libc/nexgen32e/mul4x4adx.S index 268d91668..86a02797f 100644 --- a/libc/nexgen32e/mul4x4adx.S +++ b/libc/nexgen32e/mul4x4adx.S @@ -18,34 +18,47 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" +// Computes 512-bit product of 256-bit and 256-bit numbers. +// +// Instructions: 88 +// Total Cycles: 36 +// Total uOps: 120 +// uOps Per Cycle: 3.33 +// IPC: 2.44 +// Block RThroughput: 20.0 +// +// @param rdi receives 8 quadword result +// @param rsi is left hand side which must have 4 quadwords +// @param rdx is right hand side which must have 4 quadwords +// @note words are host endian while array is little endian +// @mayalias Mul4x4Adx: push %rbp mov %rsp,%rbp .profilable - push %r15 - push %r14 - push %r13 - push %r12 + sub $56,%rsp + mov %r15,-8(%rbp) + mov %r14,-16(%rbp) + mov %r13,-24(%rbp) + mov %r12,-32(%rbp) + mov %rbx,-40(%rbp) mov %rdx,%r12 - push %rbx - sub $16,%rsp mov (%rdx),%rdx mov (%rsi),%rax mov 16(%rsi),%r11 mov 24(%rsi),%r10 - xor %r13d,%r13d mulx %rax,%rbx,%rax mov %rbx,-48(%rbp) mov 8(%rsi),%rbx mulx %rbx,%rdx,%rcx - adox %rdx,%rax + add %rdx,%rax mov (%r12),%rdx mulx %r11,%rdx,%r9 - adox %rdx,%rcx + adc %rdx,%rcx mov (%r12),%rdx mulx %r10,%rdx,%r8 - adox %rdx,%r9 - adox %r13,%r8 + adc %rdx,%r9 + adc $0,%r8 xor %r13d,%r13d mov (%rsi),%r14 mov 8(%r12),%rdx @@ -105,12 +118,103 @@ Mul4x4Adx: adox %r14,%r10 mov %rsi,(%rdi) mov %r10,56(%rdi) - add $16,%rsp - pop %rbx - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rbp + mov -8(%rbp),%r15 + mov -16(%rbp),%r14 + mov -24(%rbp),%r13 + mov -32(%rbp),%r12 + mov -40(%rbp),%rbx + leave ret .endfn Mul4x4Adx,globl + + .end +TIMELINE VIEW 0123456789 012345 +Index 0123456789 0123456789 +[0,0] DeER . . . . . . . subq $56, %rsp +[0,1] DeER . . . . . . . movq %r15, -8(%rbp) +[0,2] D=eER. . . . . . . movq %r14, -16(%rbp) +[0,3] D==eER . . . . . . movq %r13, -24(%rbp) +[0,4] D===eER . . . . . . movq %r12, -32(%rbp) +[0,5] D====eER . . . . . . movq %rbx, -40(%rbp) +[0,6] .DeE---R . . . . . . movq %rdx, %r12 +[0,7] .DeeeeeER . . . . . . movq (%rdx), %rdx +[0,8] .D=eeeeeER. . . . . . movq (%rsi), %rax +[0,9] .D=eeeeeER. . . . . . movq 16(%rsi), %r11 +[0,10] .D==eeeeeER . . . . . movq 24(%rsi), %r10 +[0,11] . D=====eeeeER . . . . . mulxq %rax, %rbx, %rax +[0,12] . D========eER . . . . . movq %rbx, -48(%rbp) +[0,13] . D=eeeeeE---R . . . . . movq 8(%rsi), %rbx +[0,14] . D=====eeeeER. . . . . mulxq %rbx, %rdx, %rcx +[0,15] . D========eER. . . . . addq %rdx, %rax +[0,16] . D=eeeeeE---R. . . . . movq (%r12), %rdx +[0,17] . D=====eeeeER . . . . mulxq %r11, %rdx, %r9 +[0,18] . D========eER . . . . adcq %rdx, %rcx +[0,19] . DeeeeeE----R . . . . movq (%r12), %rdx +[0,20] . D=====eeeeER . . . . mulxq %r10, %rdx, %r8 +[0,21] . D========eER . . . . adcq %rdx, %r9 +[0,22] . D=========eER . . . . adcq $0, %r8 +[0,23] . D-----------R . . . . xorl %r13d, %r13d +[0,24] . .DeeeeeE----R . . . . movq (%rsi), %r14 +[0,25] . .DeeeeeE----R . . . . movq 8(%r12), %rdx +[0,26] . .D=====eeeeER . . . . mulxq %r14, %r14, %r15 +[0,27] . .D========eER . . . . adoxq %r14, %rax +[0,28] . . D========eER . . . . adcxq %r15, %rcx +[0,29] . . D========eER . . . . movq %rax, -56(%rbp) +[0,30] . . D=====eeeeER . . . . mulxq %rbx, %r14, %rax +[0,31] . . D=========eER. . . . adoxq %r14, %rcx +[0,32] . . D=========eER . . . adcxq %rax, %r9 +[0,33] . . D=====eeeeE-R . . . mulxq %r11, %r14, %rax +[0,34] . . D==========eER . . . adoxq %r14, %r9 +[0,35] . . D===========eER . . . adcxq %rax, %r8 +[0,36] . . D=====eeeeE--R . . . mulxq %r10, %rdx, %rax +[0,37] . . D===========eER . . . adoxq %rdx, %r8 +[0,38] . . DeeeeeE-------R . . . movq 16(%r12), %rdx +[0,39] . . D============eER. . . adcxq %r13, %rax +[0,40] . . D============eER . . adoxq %r13, %rax +[0,41] . . DeeeeeE--------R . . movq (%rsi), %r13 +[0,42] . . D=====E--------R . . xorl %r15d, %r15d +[0,43] . . D=====eeeeE----R . . mulxq %r13, %r13, %r14 +[0,44] . . .D=======eE----R . . adoxq %r13, %rcx +[0,45] . . .D========eE---R . . adcxq %r14, %r9 +[0,46] . . .D=====eeeeE---R . . mulxq %rbx, %r14, %r13 +[0,47] . . .D=========eE--R . . adoxq %r14, %r9 +[0,48] . . . D=========eE-R . . adcxq %r13, %r8 +[0,49] . . . D=====eeeeE--R . . mulxq %r11, %r14, %r13 +[0,50] . . . D==========eER . . adoxq %r14, %r8 +[0,51] . . . D===========eER . . adcxq %r13, %rax +[0,52] . . . DeeeeeE------R . . movq (%rsi), %rsi +[0,53] . . . D=====eeeeE--R . . mulxq %r10, %rdx, %r13 +[0,54] . . . D===========eER . . adoxq %rdx, %rax +[0,55] . . . D============eER . . adcxq %r15, %r13 +[0,56] . . . DeeeeeE-------R . . movq 24(%r12), %rdx +[0,57] . . . D============eER. . adoxq %r15, %r13 +[0,58] . . . D=====eeeeE----R. . mulxq %rsi, %r12, %rsi +[0,59] . . . D======E-------R. . xorl %r14d, %r14d +[0,60] . . . D========eE---R. . adoxq %r12, %r9 +[0,61] . . . D=========eE--R. . adcxq %rsi, %r8 +[0,62] . . . D=====eeeeE---R. . mulxq %rbx, %rsi, %rbx +[0,63] . . . D==========eE-R. . adoxq %rsi, %r8 +[0,64] . . . .D==========eER. . adcxq %rbx, %rax +[0,65] . . . .D=====eeeeE--R. . mulxq %r11, %r11, %rsi +[0,66] . . . .DeeeeeE------R. . movq -56(%rbp), %rbx +[0,67] . . . .D===eE-------R. . movq %rcx, 16(%rdi) +[0,68] . . . . D==========eER . adcxq %rsi, %r13 +[0,69] . . . . DeeeeeE------R . movq -48(%rbp), %rsi +[0,70] . . . . D====eE------R . movq %rbx, 8(%rdi) +[0,71] . . . . D===========eER . adoxq %r11, %rax +[0,72] . . . . D=======eE----R . movq %r9, 24(%rdi) +[0,73] . . . . D=========eE--R . movq %r8, 32(%rdi) +[0,74] . . . . D===========eER . movq %rax, 40(%rdi) +[0,75] . . . . D====eeeeE----R . mulxq %r10, %rdx, %r10 +[0,76] . . . . D===========eER . adoxq %rdx, %r13 +[0,77] . . . . D============eER . adcxq %r14, %r10 +[0,78] . . . . D===========eER . movq %r13, 48(%rdi) +[0,79] . . . . D============eER. adoxq %r14, %r10 +[0,80] . . . . D============eER. movq %rsi, (%rdi) +[0,81] . . . . D=============eER movq %r10, 56(%rdi) +[0,82] . . . . DeeeeeE---------R movq -8(%rbp), %r15 +[0,83] . . . . DeeeeeE---------R movq -16(%rbp), %r14 +[0,84] . . . . DeeeeeE--------R movq -24(%rbp), %r13 +[0,85] . . . . DeeeeeE--------R movq -32(%rbp), %r12 +[0,86] . . . . D=eeeeeE-------R movq -40(%rbp), %rbx +[0,87] . . . . D===eE---------R addq $56, %rsp diff --git a/libc/nexgen32e/mul6x6adx.S b/libc/nexgen32e/mul6x6adx.S index b90906014..313658bec 100644 --- a/libc/nexgen32e/mul6x6adx.S +++ b/libc/nexgen32e/mul6x6adx.S @@ -18,37 +18,50 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" +// Computes 768-bit product of 384-bit and 384-bit numbers. +// +// Instructions: 153 +// Total Cycles: 73 +// Total uOps: 261 +// uOps Per Cycle: 3.58 +// IPC: 2.10 +// Block RThroughput: 43.5 +// +// @param rdi receives 8 quadword result +// @param rsi is left hand side which must have 4 quadwords +// @param rdx is right hand side which must have 4 quadwords +// @note words are host endian while array is little endian +// @mayalias Mul6x6Adx: push %rbp mov %rsp,%rbp .profilable - push %r15 - push %r14 - push %r13 - push %r12 - push %rbx + sub $64,%rsp + mov %r15,-8(%rbp) + mov %r14,-16(%rbp) + mov %r13,-24(%rbp) + mov %r12,-32(%rbp) + mov %rbx,-40(%rbp) mov %rdx,%rbx - sub $24,%rsp mov (%rdx),%rdx - xor %r8d,%r8d mulx (%rsi),%rcx,%rax mulx 8(%rsi),%rdx,%r12 mov %rcx,-48(%rbp) - adox %rdx,%rax + add %rdx,%rax mov (%rbx),%rdx mulx 16(%rsi),%rdx,%r15 - adox %rdx,%r12 + adc %rdx,%r12 mov (%rbx),%rdx mulx 24(%rsi),%rdx,%r10 - adox %rdx,%r15 + adc %rdx,%r15 mov (%rbx),%rdx mulx 32(%rsi),%rdx,%r9 - adox %rdx,%r10 + adc %rdx,%r10 mov (%rbx),%rdx mulx 40(%rsi),%rdx,%rcx - adox %rdx,%r9 + adc %rdx,%r9 mov 8(%rbx),%rdx - adox %r8,%rcx + adc $0,%rcx mulx (%rsi),%r13,%r11 xor %r8d,%r8d adox %r13,%rax @@ -171,12 +184,167 @@ Mul6x6Adx: mov %r8,64(%rdi) mov %r11,72(%rdi) mov %rdx,88(%rdi) - add $24,%rsp - pop %rbx - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rbp + mov -8(%rbp),%r15 + mov -16(%rbp),%r14 + mov -24(%rbp),%r13 + mov -32(%rbp),%r12 + mov -40(%rbp),%rbx + leave ret .endfn Mul6x6Adx,globl + + .end +SIMULATION 0123456789 0123456789 0123456789 012 +Index 0123456789 0123456789 0123456789 0123456789 +[0,0] DeER . . . . . . . . . . . . . . . movq %r15, -8(%rbp) +[0,1] D=eER. . . . . . . . . . . . . . . movq %r14, -16(%rbp) +[0,2] D==eER . . . . . . . . . . . . . . movq %r13, -24(%rbp) +[0,3] D===eER . . . . . . . . . . . . . . movq %r12, -32(%rbp) +[0,4] D====eER . . . . . . . . . . . . . . movq %rbx, -40(%rbp) +[0,5] DeE----R . . . . . . . . . . . . . . movq %rdx, %rbx +[0,6] .DeeeeeER . . . . . . . . . . . . . . movq (%rdx), %rdx +[0,7] .D=====eeeeeeeeeER . . . . . . . . . . . . mulxq (%rsi), %rcx, %rax +[0,8] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 8(%rsi), %rdx, %r12 +[0,9] . D=======eE------R . . . . . . . . . . . . movq %rcx, -48(%rbp) +[0,10] . D=============eER . . . . . . . . . . . . addq %rdx, %rax +[0,11] . DeeeeeE--------R . . . . . . . . . . . . movq (%rbx), %rdx +[0,12] . D=====eeeeeeeeeER. . . . . . . . . . . . mulxq 16(%rsi), %rdx, %r15 +[0,13] . D=============eER. . . . . . . . . . . . adcq %rdx, %r12 +[0,14] . DeeeeeE--------R. . . . . . . . . . . . movq (%rbx), %rdx +[0,15] . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 24(%rsi), %rdx, %r10 +[0,16] . D=============eER . . . . . . . . . . . adcq %rdx, %r15 +[0,17] . DeeeeeE--------R . . . . . . . . . . . movq (%rbx), %rdx +[0,18] . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 32(%rsi), %rdx, %r9 +[0,19] . D=============eER . . . . . . . . . . . adcq %rdx, %r10 +[0,20] . .DeeeeeE--------R . . . . . . . . . . . movq (%rbx), %rdx +[0,21] . .D=====eeeeeeeeeER . . . . . . . . . . . mulxq 40(%rsi), %rdx, %rcx +[0,22] . .D=============eER . . . . . . . . . . . adcq %rdx, %r9 +[0,23] . . DeeeeeE--------R . . . . . . . . . . . movq 8(%rbx), %rdx +[0,24] . . D=============eER . . . . . . . . . . . adcq $0, %rcx +[0,25] . . D=====eeeeeeeeeER . . . . . . . . . . . mulxq (%rsi), %r13, %r11 +[0,26] . . D--------------R . . . . . . . . . . . xorl %r8d, %r8d +[0,27] . . D========eE----R . . . . . . . . . . . adoxq %r13, %rax +[0,28] . . D=============eER. . . . . . . . . . . adcxq %r11, %r12 +[0,29] . . D=========eE----R. . . . . . . . . . . movq %rax, -56(%rbp) +[0,30] . . D====eeeeeeeeeER. . . . . . . . . . . mulxq 8(%rsi), %r11, %rax +[0,31] . . D=============eER . . . . . . . . . . adoxq %r11, %r12 +[0,32] . . D==============eER . . . . . . . . . . adcxq %rax, %r15 +[0,33] . . D=============eER . . . . . . . . . . movq %r12, %r14 +[0,34] . . D====eeeeeeeeeE-R . . . . . . . . . . mulxq 16(%rsi), %r11, %rax +[0,35] . . D==============eER . . . . . . . . . . adoxq %r11, %r15 +[0,36] . . .D==============eER . . . . . . . . . . adcxq %rax, %r10 +[0,37] . . .D====eeeeeeeeeE--R . . . . . . . . . . mulxq 24(%rsi), %r11, %rax +[0,38] . . .D===============eER. . . . . . . . . . adoxq %r11, %r10 +[0,39] . . . D===============eER . . . . . . . . . adcxq %rax, %r9 +[0,40] . . . D====eeeeeeeeeE---R . . . . . . . . . mulxq 32(%rsi), %r11, %rax +[0,41] . . . D================eER . . . . . . . . . adoxq %r11, %r9 +[0,42] . . . D================eER . . . . . . . . . adcxq %rax, %rcx +[0,43] . . . D====eeeeeeeeeE----R . . . . . . . . . mulxq 40(%rsi), %rdx, %rax +[0,44] . . . D=================eER . . . . . . . . . adoxq %rdx, %rcx +[0,45] . . . D=================eER. . . . . . . . . adcxq %r8, %rax +[0,46] . . . DeeeeeE-------------R. . . . . . . . . movq 16(%rbx), %rdx +[0,47] . . . D==================eER . . . . . . . . adoxq %r8, %rax +[0,48] . . . D====eeeeeeeeeE-----R . . . . . . . . mulxq (%rsi), %r13, %r8 +[0,49] . . . D====E--------------R . . . . . . . . xorl %r11d, %r11d +[0,50] . . . D=========eE--------R . . . . . . . . adoxq %r13, %r14 +[0,51] . . . .D=========eE-------R . . . . . . . . movq %r14, -64(%rbp) +[0,52] . . . .D============eE----R . . . . . . . . adcxq %r8, %r15 +[0,53] . . . .D====eeeeeeeeeE----R . . . . . . . . mulxq 8(%rsi), %r12, %r8 +[0,54] . . . . D============eE---R . . . . . . . . adoxq %r12, %r15 +[0,55] . . . . D=============eE--R . . . . . . . . adcxq %r8, %r10 +[0,56] . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 16(%rsi), %r12, %r8 +[0,57] . . . . D=============eE-R . . . . . . . . adoxq %r12, %r10 +[0,58] . . . . D==============eER . . . . . . . . adcxq %r8, %r9 +[0,59] . . . . D====eeeeeeeeeE--R . . . . . . . . mulxq 24(%rsi), %r12, %r8 +[0,60] . . . . D==============eER . . . . . . . . adoxq %r12, %r9 +[0,61] . . . . D===============eER . . . . . . . . adcxq %r8, %rcx +[0,62] . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 32(%rsi), %r12, %r8 +[0,63] . . . . D===============eER . . . . . . . . adoxq %r12, %rcx +[0,64] . . . . D================eER. . . . . . . . adcxq %r8, %rax +[0,65] . . . . D====eeeeeeeeeE----R. . . . . . . . mulxq 40(%rsi), %rdx, %r8 +[0,66] . . . . .D================eER . . . . . . . adoxq %rdx, %rax +[0,67] . . . . .D=================eER . . . . . . . adcxq %r11, %r8 +[0,68] . . . . .DeeeeeE-------------R . . . . . . . movq 24(%rbx), %rdx +[0,69] . . . . .D==================eER . . . . . . . adoxq %r11, %r8 +[0,70] . . . . . D====eeeeeeeeeE-----R . . . . . . . mulxq (%rsi), %r13, %r11 +[0,71] . . . . . D====E--------------R . . . . . . . xorl %r12d, %r12d +[0,72] . . . . . D===========eE------R . . . . . . . adoxq %r13, %r15 +[0,73] . . . . . D============eE----R . . . . . . . adcxq %r11, %r10 +[0,74] . . . . . D====eeeeeeeeeE----R . . . . . . . mulxq 8(%rsi), %r13, %r11 +[0,75] . . . . . D=============eE---R . . . . . . . adoxq %r13, %r10 +[0,76] . . . . . D=============eE--R . . . . . . . adcxq %r11, %r9 +[0,77] . . . . . D====eeeeeeeeeE---R . . . . . . . mulxq 16(%rsi), %r13, %r11 +[0,78] . . . . . D==============eE-R . . . . . . . adoxq %r13, %r9 +[0,79] . . . . . D==============eER . . . . . . . adcxq %r11, %rcx +[0,80] . . . . . D====eeeeeeeeeE--R . . . . . . . mulxq 24(%rsi), %r13, %r11 +[0,81] . . . . . D===============eER . . . . . . . adoxq %r13, %rcx +[0,82] . . . . . .D===============eER. . . . . . . adcxq %r11, %rax +[0,83] . . . . . .D====eeeeeeeeeE---R. . . . . . . mulxq 32(%rsi), %r13, %r11 +[0,84] . . . . . .D================eER . . . . . . adoxq %r13, %rax +[0,85] . . . . . . D================eER . . . . . . adcxq %r11, %r8 +[0,86] . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 40(%rsi), %rdx, %r11 +[0,87] . . . . . . D=================eER . . . . . . adoxq %rdx, %r8 +[0,88] . . . . . . DeeeeeE------------R . . . . . . movq 32(%rbx), %rdx +[0,89] . . . . . . D=================eER . . . . . . adcxq %r12, %r11 +[0,90] . . . . . . D=====eeeeeeeeeE----R . . . . . . mulxq (%rsi), %r14, %r13 +[0,91] . . . . . . D=================eER. . . . . . adoxq %r12, %r11 +[0,92] . . . . . . D-------------------R. . . . . . xorl %r12d, %r12d +[0,93] . . . . . . D===========eE------R. . . . . . adoxq %r14, %r10 +[0,94] . . . . . . D=============eE----R. . . . . . adcxq %r13, %r9 +[0,95] . . . . . . D====eeeeeeeeeE----R. . . . . . mulxq 8(%rsi), %r14, %r13 +[0,96] . . . . . . D=============eE---R. . . . . . adoxq %r14, %r9 +[0,97] . . . . . . D==============eE--R. . . . . . adcxq %r13, %rcx +[0,98] . . . . . . .D====eeeeeeeeeE---R. . . . . . mulxq 16(%rsi), %r14, %r13 +[0,99] . . . . . . .D==============eE-R. . . . . . adoxq %r14, %rcx +[0,100] . . . . . . .D===============eER. . . . . . adcxq %r13, %rax +[0,101] . . . . . . . D====eeeeeeeeeE--R. . . . . . mulxq 24(%rsi), %r14, %r13 +[0,102] . . . . . . . D===============eER . . . . . adoxq %r14, %rax +[0,103] . . . . . . . D================eER . . . . . adcxq %r13, %r8 +[0,104] . . . . . . . D====eeeeeeeeeE---R . . . . . mulxq 32(%rsi), %r14, %r13 +[0,105] . . . . . . . D================eER . . . . . adoxq %r14, %r8 +[0,106] . . . . . . . D=================eER . . . . . adcxq %r13, %r11 +[0,107] . . . . . . . D====eeeeeeeeeE----R . . . . . mulxq 40(%rsi), %rdx, %r13 +[0,108] . . . . . . . D=================eER. . . . . adoxq %rdx, %r11 +[0,109] . . . . . . . D==================eER . . . . adcxq %r12, %r13 +[0,110] . . . . . . . DeeeeeE-------------R . . . . movq 40(%rbx), %rdx +[0,111] . . . . . . . D==================eER . . . . adoxq %r12, %r13 +[0,112] . . . . . . . D=====eeeeeeeeeE-----R . . . . mulxq (%rsi), %r14, %rbx +[0,113] . . . . . . . .D-------------------R . . . . xorl %r12d, %r12d +[0,114] . . . . . . . .D===========eE------R . . . . adoxq %r14, %r9 +[0,115] . . . . . . . .D=============eE----R . . . . adcxq %rbx, %rcx +[0,116] . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 8(%rsi), %r14, %rbx +[0,117] . . . . . . . . D=============eE---R . . . . adoxq %r14, %rcx +[0,118] . . . . . . . . D==============eE--R . . . . adcxq %rbx, %rax +[0,119] . . . . . . . . D====eeeeeeeeeE---R . . . . mulxq 16(%rsi), %r14, %rbx +[0,120] . . . . . . . . D==============eE-R . . . . adoxq %r14, %rax +[0,121] . . . . . . . . D===============eER . . . . adcxq %rbx, %r8 +[0,122] . . . . . . . . D====eeeeeeeeeE--R . . . . mulxq 24(%rsi), %r14, %rbx +[0,123] . . . . . . . . D===============eER . . . . adoxq %r14, %r8 +[0,124] . . . . . . . . D================eER . . . . adcxq %rbx, %r11 +[0,125] . . . . . . . . D====eeeeeeeeeE---R . . . . mulxq 32(%rsi), %r14, %rbx +[0,126] . . . . . . . . .D====eeeeeeeeeE--R . . . . mulxq 40(%rsi), %rsi, %rdx +[0,127] . . . . . . . . .D===============eER. . . . adoxq %r14, %r11 +[0,128] . . . . . . . . .D================eER . . . adcxq %rbx, %r13 +[0,129] . . . . . . . . . D================eER . . . adoxq %rsi, %r13 +[0,130] . . . . . . . . . D=================eER . . . adcxq %r12, %rdx +[0,131] . . . . . . . . . D==================eER . . . adoxq %r12, %rdx +[0,132] . . . . . . . . . DeeeeeE--------------R . . . movq -48(%rbp), %rsi +[0,133] . . . . . . . . . D=eeeeeE-------------R . . . movq -56(%rbp), %rbx +[0,134] . . . . . . . . . D===eE---------------R . . . movq %r15, 24(%rdi) +[0,135] . . . . . . . . . D=eeeeeE------------R . . . movq -64(%rbp), %r14 +[0,136] . . . . . . . . . D================eE-R . . . movq %r13, 80(%rdi) +[0,137] . . . . . . . . . D=================eER . . . movq %rbx, 8(%rdi) +[0,138] . . . . . . . . . D==================eER. . . movq %r14, 16(%rdi) +[0,139] . . . . . . . . . D===================eER . . movq %rsi, (%rdi) +[0,140] . . . . . . . . . D====================eER . . movq %r10, 32(%rdi) +[0,141] . . . . . . . . . D====================eER . . movq %r9, 40(%rdi) +[0,142] . . . . . . . . . D=====================eER . . movq %rcx, 48(%rdi) +[0,143] . . . . . . . . . D======================eER. . movq %rax, 56(%rdi) +[0,144] . . . . . . . . . D=======================eER . movq %r8, 64(%rdi) +[0,145] . . . . . . . . . D========================eER. movq %r11, 72(%rdi) +[0,146] . . . . . . . . . D=========================eER movq %rdx, 88(%rdi) +[0,147] . . . . . . . . . DeeeeeE--------------------R movq -8(%rbp), %r15 +[0,148] . . . . . . . . . D=eeeeeE-------------------R movq -16(%rbp), %r14 +[0,149] . . . . . . . . . D=eeeeeE-------------------R movq -24(%rbp), %r13 +[0,150] . . . . . . . . . D==eeeeeE------------------R movq -32(%rbp), %r12 +[0,151] . . . . . . . . . D==eeeeeE------------------R movq -40(%rbp), %rbx diff --git a/libc/nexgen32e/mul8x8.S b/libc/nexgen32e/mul8x8.S deleted file mode 100644 index 3ad62baf5..000000000 --- a/libc/nexgen32e/mul8x8.S +++ /dev/null @@ -1,483 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2021 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -/ Computes 1024-bit product of 512-bit and 512-bit numbers. -/ -/ Instructions: 262 -/ Total Cycles: 114 -/ Total uOps: 469 -/ Dispatch Width: 6 -/ uOps Per Cycle: 4.11 -/ IPC: 2.30 -/ Block RThroughput: 78.2 -/ -/ @param rdi receives 16 quadword result -/ @param rsi is left hand side which must have 8 quadwords -/ @param rdx is right hand side which must have 8 quadwords -/ @note words are host endian while array is little endian -/ @mayalias -Mul8x8Adx: - push %rbp - mov %rsp,%rbp - .profilable - push %r15 - push %r14 - push %r13 - push %r12 - mov %rdx,%r12 - push %rbx - sub $64,%rsp - mov (%rdx),%rdx - xor %r13d,%r13d - mulx (%rsi),%rax,%rcx - mov %rdi,-48(%rbp) - mov %rax,-56(%rbp) - mulx 8(%rsi),%rdx,%rax - adox %rdx,%rcx - mov (%r12),%rdx - mulx 16(%rsi),%rdx,%rbx - adox %rdx,%rax - mov (%r12),%rdx - mulx 24(%rsi),%rdx,%r11 - adox %rdx,%rbx - mov (%r12),%rdx - mulx 32(%rsi),%rdx,%r10 - adox %rdx,%r11 - mov (%r12),%rdx - mulx 40(%rsi),%rdx,%r9 - adox %rdx,%r10 - mov (%r12),%rdx - mulx 48(%rsi),%rdx,%r8 - adox %rdx,%r9 - mov (%r12),%rdx - mulx 56(%rsi),%rdx,%rdi - adox %rdx,%r8 - adox %r13,%rdi - xor %r13d,%r13d - mov 8(%r12),%rdx - mulx (%rsi),%r15,%r14 - adox %r15,%rcx - adcx %r14,%rax - mov %rcx,-64(%rbp) - mulx 8(%rsi),%r14,%rcx - adox %r14,%rax - adcx %rcx,%rbx - mulx 16(%rsi),%r14,%rcx - adox %r14,%rbx - adcx %rcx,%r11 - mulx 24(%rsi),%r14,%rcx - adox %r14,%r11 - adcx %rcx,%r10 - mulx 32(%rsi),%r14,%rcx - adox %r14,%r10 - adcx %rcx,%r9 - mulx 40(%rsi),%r14,%rcx - adox %r14,%r9 - adcx %rcx,%r8 - mulx 48(%rsi),%r14,%rcx - adox %r14,%r8 - adcx %rcx,%rdi - mulx 56(%rsi),%rdx,%rcx - adox %rdx,%rdi - adcx %r13,%rcx - mov 16(%r12),%rdx - adox %r13,%rcx - mulx (%rsi),%r15,%r14 - xor %r13d,%r13d - adox %r15,%rax - adcx %r14,%rbx - mov %rax,-72(%rbp) - mulx 8(%rsi),%r14,%rax - adox %r14,%rbx - adcx %rax,%r11 - mulx 16(%rsi),%r14,%rax - adox %r14,%r11 - adcx %rax,%r10 - mulx 24(%rsi),%r14,%rax - adox %r14,%r10 - adcx %rax,%r9 - mulx 32(%rsi),%r14,%rax - adox %r14,%r9 - adcx %rax,%r8 - mulx 40(%rsi),%r14,%rax - adox %r14,%r8 - adcx %rax,%rdi - mulx 48(%rsi),%r14,%rax - adox %r14,%rdi - adcx %rax,%rcx - mulx 56(%rsi),%rdx,%rax - adox %rdx,%rcx - adcx %r13,%rax - adox %r13,%rax - xor %r13d,%r13d - mov 24(%r12),%rdx - mulx (%rsi),%r15,%r14 - adox %r15,%rbx - adcx %r14,%r11 - mov %rbx,-80(%rbp) - mov %r11,%r15 - mulx 8(%rsi),%r14,%rbx - adox %r14,%r15 - adcx %rbx,%r10 - mulx 16(%rsi),%rbx,%r11 - adox %rbx,%r10 - adcx %r11,%r9 - mulx 24(%rsi),%rbx,%r11 - adox %rbx,%r9 - adcx %r11,%r8 - mulx 32(%rsi),%rbx,%r11 - adox %rbx,%r8 - adcx %r11,%rdi - mulx 40(%rsi),%rbx,%r11 - adox %rbx,%rdi - adcx %r11,%rcx - mulx 48(%rsi),%rbx,%r11 - adox %rbx,%rcx - adcx %r11,%rax - mulx 56(%rsi),%rdx,%r11 - adox %rdx,%rax - adcx %r13,%r11 - mov 32(%r12),%rdx - adox %r13,%r11 - xor %ebx,%ebx - mulx (%rsi),%r14,%r13 - adox %r14,%r15 - adcx %r13,%r10 - mov %r15,-88(%rbp) - mulx 8(%rsi),%r14,%r13 - mov %r10,%r15 - adcx %r13,%r9 - adox %r14,%r15 - mulx 16(%rsi),%r13,%r10 - adox %r13,%r9 - adcx %r10,%r8 - mulx 24(%rsi),%r13,%r10 - adcx %r10,%rdi - adox %r13,%r8 - mulx 32(%rsi),%r13,%r10 - adox %r13,%rdi - adcx %r10,%rcx - mulx 40(%rsi),%r13,%r10 - adox %r13,%rcx - adcx %r10,%rax - mulx 48(%rsi),%r13,%r10 - adox %r13,%rax - adcx %r10,%r11 - mulx 56(%rsi),%rdx,%r10 - adox %rdx,%r11 - adcx %rbx,%r10 - mov 40(%r12),%rdx - adox %rbx,%r10 - mulx (%rsi),%r14,%r13 - xor %ebx,%ebx - adox %r14,%r15 - mov %r15,-96(%rbp) - adcx %r13,%r9 - mulx 8(%rsi),%r14,%r13 - mov %r9,%r15 - adox %r14,%r15 - adcx %r13,%r8 - mulx 16(%rsi),%r13,%r9 - adox %r13,%r8 - adcx %r9,%rdi - mulx 24(%rsi),%r13,%r9 - adox %r13,%rdi - adcx %r9,%rcx - mulx 32(%rsi),%r13,%r9 - adox %r13,%rcx - adcx %r9,%rax - mulx 40(%rsi),%r13,%r9 - adox %r13,%rax - adcx %r9,%r11 - mulx 48(%rsi),%r13,%r9 - adox %r13,%r11 - adcx %r9,%r10 - mulx 56(%rsi),%rdx,%r9 - adox %rdx,%r10 - adcx %rbx,%r9 - adox %rbx,%r9 - xor %ebx,%ebx - mov 48(%r12),%rdx - mulx (%rsi),%r14,%r13 - adox %r14,%r15 - adcx %r13,%r8 - mov %r15,-104(%rbp) - mulx 8(%rsi),%r14,%r13 - mov %r8,%r15 - adcx %r13,%rdi - adox %r14,%r15 - mulx 16(%rsi),%r13,%r8 - adox %r13,%rdi - adcx %r8,%rcx - mulx 24(%rsi),%r13,%r8 - adox %r13,%rcx - adcx %r8,%rax - mulx 32(%rsi),%r13,%r8 - adox %r13,%rax - adcx %r8,%r11 - mulx 40(%rsi),%r13,%r8 - adox %r13,%r11 - adcx %r8,%r10 - mulx 48(%rsi),%r13,%r8 - adox %r13,%r10 - adcx %r8,%r9 - mulx 56(%rsi),%rdx,%r8 - adox %rdx,%r9 - mov 56(%r12),%rdx - adcx %rbx,%r8 - mulx (%rsi),%r13,%r12 - adox %rbx,%r8 - xor %ebx,%ebx - adox %r13,%r15 - adcx %r12,%rdi - mulx 8(%rsi),%r13,%r12 - adox %r13,%rdi - adcx %r12,%rcx - mulx 16(%rsi),%r13,%r12 - adox %r13,%rcx - adcx %r12,%rax - mulx 24(%rsi),%r13,%r12 - adox %r13,%rax - adcx %r12,%r11 - mulx 32(%rsi),%r13,%r12 - adox %r13,%r11 - adcx %r12,%r10 - mulx 40(%rsi),%r13,%r12 - adox %r13,%r10 - adcx %r12,%r9 - mulx 48(%rsi),%r13,%r12 - mulx 56(%rsi),%rsi,%rdx - adox %r13,%r9 - adcx %r12,%r8 - adox %rsi,%r8 - adcx %rbx,%rdx - mov -64(%rbp),%rsi - adox %rbx,%rdx - mov -48(%rbp),%rbx - mov -56(%rbp),%r14 - mov %rsi,8(%rbx) - mov -72(%rbp),%rsi - mov %r14,(%rbx) - mov %rsi,16(%rbx) - mov -80(%rbp),%rsi - mov %rsi,24(%rbx) - mov -88(%rbp),%rsi - mov %rsi,32(%rbx) - mov -96(%rbp),%rsi - mov %rsi,40(%rbx) - mov -104(%rbp),%rsi - mov %r15,56(%rbx) - mov %rsi,48(%rbx) - mov %rdi,64(%rbx) - mov %rcx,72(%rbx) - mov %rax,80(%rbx) - mov %r11,88(%rbx) - mov %r10,96(%rbx) - mov %r9,104(%rbx) - mov %r8,112(%rbx) - mov %rdx,120(%rbx) - add $64,%rsp - pop %rbx - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rbp - ret - .endfn Mul8x8Adx,globl - - .end -Timeline view: 0123456789 0123456789 0123456789 0123456789 -Index 0123456789 0123456789 0123456789 0123456789 -[0,0] DeeER. . . . . . . . . . . . . . . . pushq %r15 -[0,1] D==eeER . . . . . . . . . . . . . . . pushq %r14 -[0,2] .D===eeER . . . . . . . . . . . . . . . pushq %r13 -[0,3] .D=====eeER . . . . . . . . . . . . . . pushq %r12 -[0,4] . DeE-----R . . . . . . . . . . . . . . movq %rdx, %r12 -[0,5] . D======eeER . . . . . . . . . . . . . . pushq %rbx -[0,6] . D========eER . . . . . . . . . . . . . . subq $64, %rsp -[0,7] . DeeeeeE----R . . . . . . . . . . . . . . movq (%rdx), %rdx -[0,8] . D---------R . . . . . . . . . . . . . . xorl %r13d, %r13d -[0,9] . D====eeeeeeeeeER . . . . . . . . . . . . . mulxq (%rsi), %rax, %rcx -[0,10] . D======eE------R . . . . . . . . . . . . . movq %rdi, -48(%rbp) -[0,11] . D======eE-----R . . . . . . . . . . . . . movq %rax, -56(%rbp) -[0,12] . D====eeeeeeeeeER. . . . . . . . . . . . . mulxq 8(%rsi), %rdx, %rax -[0,13] . D============eER. . . . . . . . . . . . . adoxq %rdx, %rcx -[0,14] . DeeeeeE-------R. . . . . . . . . . . . . movq (%r12), %rdx -[0,15] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 16(%rsi), %rdx, %rbx -[0,16] . D============eE-R . . . . . . . . . . . . adoxq %rdx, %rax -[0,17] . .DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx -[0,18] . .D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 24(%rsi), %rdx, %r11 -[0,19] . .D=============eER . . . . . . . . . . . . adoxq %rdx, %rbx -[0,20] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx -[0,21] . . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 32(%rsi), %rdx, %r10 -[0,22] . . D=============eER . . . . . . . . . . . . adoxq %rdx, %r11 -[0,23] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx -[0,24] . . D=====eeeeeeeeeER. . . . . . . . . . . . mulxq 40(%rsi), %rdx, %r9 -[0,25] . . D=============eER. . . . . . . . . . . . adoxq %rdx, %r10 -[0,26] . . DeeeeeE--------R. . . . . . . . . . . . movq (%r12), %rdx -[0,27] . . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 48(%rsi), %rdx, %r8 -[0,28] . . D=============eER . . . . . . . . . . . adoxq %rdx, %r9 -[0,29] . . DeeeeeE--------R . . . . . . . . . . . movq (%r12), %rdx -[0,30] . . D=====eeeeeeeeeER . . . . . . . . . . . mulxq 56(%rsi), %rdx, %rdi -[0,31] . . D=============eER . . . . . . . . . . . adoxq %rdx, %r8 -[0,32] . . .D=============eER . . . . . . . . . . . adoxq %r13, %rdi -[0,33] . . .D---------------R . . . . . . . . . . . xorl %r13d, %r13d -[0,34] . . .DeeeeeE---------R . . . . . . . . . . . movq 8(%r12), %rdx -[0,35] . . . D====eeeeeeeeeER . . . . . . . . . . . mulxq (%rsi), %r15, %r14 -[0,36] . . . D=======eE-----R . . . . . . . . . . . adoxq %r15, %rcx -[0,37] . . . D=============eER . . . . . . . . . . . adcxq %r14, %rax -[0,38] . . . D=======eE-----R . . . . . . . . . . . movq %rcx, -64(%rbp) -[0,39] . . . D====eeeeeeeeeER . . . . . . . . . . . mulxq 8(%rsi), %r14, %rcx -[0,40] . . . D=============eER. . . . . . . . . . . adoxq %r14, %rax -[0,41] . . . D=============eER . . . . . . . . . . adcxq %rcx, %rbx -[0,42] . . . D====eeeeeeeeeE-R . . . . . . . . . . mulxq 16(%rsi), %r14, %rcx -[0,43] . . . D==============eER . . . . . . . . . . adoxq %r14, %rbx -[0,44] . . . D==============eER . . . . . . . . . . adcxq %rcx, %r11 -[0,45] . . . D====eeeeeeeeeE--R . . . . . . . . . . mulxq 24(%rsi), %r14, %rcx -[0,46] . . . D===============eER . . . . . . . . . . adoxq %r14, %r11 -[0,47] . . . .D===============eER. . . . . . . . . . adcxq %rcx, %r10 -[0,48] . . . .D====eeeeeeeeeE---R. . . . . . . . . . mulxq 32(%rsi), %r14, %rcx -[0,49] . . . .D================eER . . . . . . . . . adoxq %r14, %r10 -[0,50] . . . . D================eER . . . . . . . . . adcxq %rcx, %r9 -[0,51] . . . . D====eeeeeeeeeE----R . . . . . . . . . mulxq 40(%rsi), %r14, %rcx -[0,52] . . . . D=================eER . . . . . . . . . adoxq %r14, %r9 -[0,53] . . . . D=================eER . . . . . . . . . adcxq %rcx, %r8 -[0,54] . . . . D====eeeeeeeeeE-----R . . . . . . . . . mulxq 48(%rsi), %r14, %rcx -[0,55] . . . . D==================eER. . . . . . . . . adoxq %r14, %r8 -[0,56] . . . . D==================eER . . . . . . . . adcxq %rcx, %rdi -[0,57] . . . . D====eeeeeeeeeE------R . . . . . . . . mulxq 56(%rsi), %rdx, %rcx -[0,58] . . . . D===================eER . . . . . . . . adoxq %rdx, %rdi -[0,59] . . . . D===================eER . . . . . . . . adcxq %r13, %rcx -[0,60] . . . . DeeeeeE---------------R . . . . . . . . movq 16(%r12), %rdx -[0,61] . . . . D====================eER . . . . . . . . adoxq %r13, %rcx -[0,62] . . . . .D====eeeeeeeeeE-------R . . . . . . . . mulxq (%rsi), %r15, %r14 -[0,63] . . . . .D---------------------R . . . . . . . . xorl %r13d, %r13d -[0,64] . . . . .D=======eE------------R . . . . . . . . adoxq %r15, %rax -[0,65] . . . . . D============eE------R . . . . . . . . adcxq %r14, %rbx -[0,66] . . . . . D=======eE-----------R . . . . . . . . movq %rax, -72(%rbp) -[0,67] . . . . . D====eeeeeeeeeE------R . . . . . . . . mulxq 8(%rsi), %r14, %rax -[0,68] . . . . . D============eE-----R . . . . . . . . adoxq %r14, %rbx -[0,69] . . . . . D=============eE----R . . . . . . . . adcxq %rax, %r11 -[0,70] . . . . . D====eeeeeeeeeE-----R . . . . . . . . mulxq 16(%rsi), %r14, %rax -[0,71] . . . . . D=============eE---R . . . . . . . . adoxq %r14, %r11 -[0,72] . . . . . D==============eE--R . . . . . . . . adcxq %rax, %r10 -[0,73] . . . . . D====eeeeeeeeeE----R . . . . . . . . mulxq 24(%rsi), %r14, %rax -[0,74] . . . . . D==============eE-R . . . . . . . . adoxq %r14, %r10 -[0,75] . . . . . D===============eER . . . . . . . . adcxq %rax, %r9 -[0,76] . . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 32(%rsi), %r14, %rax -[0,77] . . . . . .D===============eER. . . . . . . . adoxq %r14, %r9 -[0,78] . . . . . .D================eER . . . . . . . adcxq %rax, %r8 -[0,79] . . . . . .D====eeeeeeeeeE----R . . . . . . . mulxq 40(%rsi), %r14, %rax -[0,80] . . . . . . D================eER . . . . . . . adoxq %r14, %r8 -[0,81] . . . . . . D=================eER . . . . . . . adcxq %rax, %rdi -[0,82] . . . . . . D====eeeeeeeeeE-----R . . . . . . . mulxq 48(%rsi), %r14, %rax -[0,83] . . . . . . D=================eER . . . . . . . adoxq %r14, %rdi -[0,84] . . . . . . D==================eER. . . . . . . adcxq %rax, %rcx -[0,85] . . . . . . D====eeeeeeeeeE------R. . . . . . . mulxq 56(%rsi), %rdx, %rax -[0,86] . . . . . . D==================eER . . . . . . adoxq %rdx, %rcx -[0,87] . . . . . . D===================eER . . . . . . adcxq %r13, %rax -[0,88] . . . . . . D====================eER . . . . . . adoxq %r13, %rax -[0,89] . . . . . . D----------------------R . . . . . . xorl %r13d, %r13d -[0,90] . . . . . . DeeeeeE----------------R . . . . . . movq 24(%r12), %rdx -[0,91] . . . . . . D====eeeeeeeeeE-------R . . . . . . mulxq (%rsi), %r15, %r14 -[0,92] . . . . . . D===========eE--------R . . . . . . adoxq %r15, %rbx -[0,93] . . . . . . D=============eE------R . . . . . . adcxq %r14, %r11 -[0,94] . . . . . . .D===========eE-------R . . . . . . movq %rbx, -80(%rbp) -[0,95] . . . . . . .D=============eE-----R . . . . . . movq %r11, %r15 -[0,96] . . . . . . .D====eeeeeeeeeE------R . . . . . . mulxq 8(%rsi), %r14, %rbx -[0,97] . . . . . . . D=============eE----R . . . . . . adoxq %r14, %r15 -[0,98] . . . . . . . D==============eE---R . . . . . . adcxq %rbx, %r10 -[0,99] . . . . . . . D====eeeeeeeeeE-----R . . . . . . mulxq 16(%rsi), %rbx, %r11 -[0,100] . . . . . . . D==============eE--R . . . . . . adoxq %rbx, %r10 -[0,101] . . . . . . . D===============eE-R . . . . . . adcxq %r11, %r9 -[0,102] . . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 24(%rsi), %rbx, %r11 -[0,103] . . . . . . . D===============eER . . . . . . adoxq %rbx, %r9 -[0,104] . . . . . . . D================eER . . . . . . adcxq %r11, %r8 -[0,105] . . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 32(%rsi), %rbx, %r11 -[0,106] . . . . . . . D================eER. . . . . . adoxq %rbx, %r8 -[0,107] . . . . . . . D=================eER . . . . . adcxq %r11, %rdi -[0,108] . . . . . . . D====eeeeeeeeeE-----R . . . . . mulxq 40(%rsi), %rbx, %r11 -[0,109] . . . . . . . .D=================eER . . . . . adoxq %rbx, %rdi -[0,110] . . . . . . . .D==================eER . . . . . adcxq %r11, %rcx -[0,111] . . . . . . . .D====eeeeeeeeeE------R . . . . . mulxq 48(%rsi), %rbx, %r11 -[0,112] . . . . . . . . D==================eER . . . . . adoxq %rbx, %rcx -[0,113] . . . . . . . . D===================eER. . . . . adcxq %r11, %rax -[0,114] . . . . . . . . D====eeeeeeeeeE-------R. . . . . mulxq 56(%rsi), %rdx, %r11 -[0,115] . . . . . . . . D===================eER . . . . adoxq %rdx, %rax -[0,116] . . . . . . . . D====================eER . . . . adcxq %r13, %r11 -[0,117] . . . . . . . . DeeeeeE----------------R . . . . movq 32(%r12), %rdx -[0,118] . . . . . . . . D=====================eER . . . . adoxq %r13, %r11 -[0,119] . . . . . . . . D=====E-----------------R . . . . xorl %ebx, %ebx -[0,120] . . . . . . . . D====eeeeeeeeeE--------R . . . . mulxq (%rsi), %r14, %r13 -[0,121] . . . . . . . . D===========eE---------R . . . . adoxq %r14, %r15 -[0,122] . . . . . . . . D=============eE-------R . . . . adcxq %r13, %r10 -[0,123] . . . . . . . . D===========eE--------R . . . . movq %r15, -88(%rbp) -[0,124] . . . . . . . . D====eeeeeeeeeE-------R . . . . mulxq 8(%rsi), %r14, %r13 -[0,125] . . . . . . . . D=============eE------R . . . . movq %r10, %r15 -[0,126] . . . . . . . . .D============eE------R . . . . adcxq %r13, %r9 -[0,127] . . . . . . . . .D=============eE-----R . . . . adoxq %r14, %r15 -[0,128] . . . . . . . . .D====eeeeeeeeeE------R . . . . mulxq 16(%rsi), %r13, %r10 -[0,129] . . . . . . . . . D=============eE----R . . . . adoxq %r13, %r9 -[0,130] . . . . . . . . . D==============eE---R . . . . adcxq %r10, %r8 -[0,131] . . . . . . . . . D====eeeeeeeeeE-----R . . . . mulxq 24(%rsi), %r13, %r10 -[0,132] . . . . . . . . . D==============eE--R . . . . adcxq %r10, %rdi -[0,133] . . . . . . . . . D===============eE-R . . . . adoxq %r13, %r8 -[0,134] . . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 32(%rsi), %r13, %r10 -[0,135] . . . . . . . . . D===============eER . . . . adoxq %r13, %rdi -[0,136] . . . . . . . . . D================eER . . . . adcxq %r10, %rcx -[0,137] . . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 40(%rsi), %r13, %r10 -[0,138] . . . . . . . . . D================eER. . . . adoxq %r13, %rcx -[0,139] . . . . . . . . . D=================eER . . . adcxq %r10, %rax -[0,140] . . . . . . . . . D====eeeeeeeeeE-----R . . . mulxq 48(%rsi), %r13, %r10 -[0,141] . . . . . . . . . .D=================eER . . . adoxq %r13, %rax -[0,142] . . . . . . . . . .D==================eER . . . adcxq %r10, %r11 -[0,143] . . . . . . . . . .D====eeeeeeeeeE------R . . . mulxq 56(%rsi), %rdx, %r10 -[0,144] . . . . . . . . . . D==================eER . . . adoxq %rdx, %r11 -[0,145] . . . . . . . . . . D===================eER. . . adcxq %rbx, %r10 -[0,146] . . . . . . . . . . DeeeeeE---------------R. . . movq 40(%r12), %rdx -[0,147] . . . . . . . . . . D====================eER . . adoxq %rbx, %r10 -[0,148] . . . . . . . . . . D====eeeeeeeeeE-------R . . mulxq (%rsi), %r14, %r13 -[0,149] . . . . . . . . . . D---------------------R . . xorl %ebx, %ebx -[0,150] . . . . . . . . . . D============eE-------R . . adoxq %r14, %r15 -[0,151] . . . . . . . . . . D============eE------R . . movq %r15, -96(%rbp) -[0,152] . . . . . . . . . . D============eE------R . . adcxq %r13, %r9 -[0,153] . . . . . . . . . . D=====eeeeeeeeeE-----R . . mulxq 8(%rsi), %r14, %r13 -[0,154] . . . . . . . . . . D============eE-----R . . movq %r9, %r15 -[0,155] . . . . . . . . . . D=============eE----R . . adoxq %r14, %r15 -[0,156] . . . . . . . . . . D==============eE---R . . adcxq %r13, %r8 -[0,157] . . . . . . . . . . .D====eeeeeeeeeE----R . . mulxq 16(%rsi), %r13, %r9 -[0,158] . . . . . . . . . . .D==============eE--R . . adoxq %r13, %r8 -[0,159] . . . . . . . . . . .D===============eE-R . . adcxq %r9, %rdi -[0,160] . . . . . . . . . . . D====eeeeeeeeeE---R . . mulxq 24(%rsi), %r13, %r9 -[0,161] . . . . . . . . . . . D===============eER . . adoxq %r13, %rdi -[0,162] . . . . . . . . . . . D================eER . . adcxq %r9, %rcx -[0,163] . . . . . . . . . . . D====eeeeeeeeeE---R . . mulxq 32(%rsi), %r13, %r9 -[0,164] . . . . . . . . . . . D================eER . . adoxq %r13, %rcx -[0,165] . . . . . . . . . . . D=================eER . . adcxq %r9, %rax -[0,166] . . . . . . . . . . . D====eeeeeeeeeE----R . . mulxq 40(%rsi), %r13, %r9 -[0,167] . . . . . . . . . . . D=================eER. . adoxq %r13, %rax -[0,168] . . . . . . . . . . . D==================eER . adcxq %r9, %r11 -[0,169] . . . . . . . . . . . D====eeeeeeeeeE-----R . mulxq 48(%rsi), %r13, %r9 -[0,170] . . . . . . . . . . . D==================eER . adoxq %r13, %r11 -[0,171] . . . . . . . . . . . D===================eER . adcxq %r9, %r10 -[0,172] . . . . . . . . . . . .D====eeeeeeeeeE------R . mulxq 56(%rsi), %rdx, %r9 -[0,173] . . . . . . . . . . . .D===================eER. adoxq %rdx, %r10 -[0,174] . . . . . . . . . . . .D====================eER adcxq %rbx, %r9 diff --git a/libc/nexgen32e/mul8x8adx.S b/libc/nexgen32e/mul8x8adx.S new file mode 100644 index 000000000..12d9f98df --- /dev/null +++ b/libc/nexgen32e/mul8x8adx.S @@ -0,0 +1,495 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.internal.h" + +// Computes 1024-bit product of 512-bit and 512-bit numbers. +// +// Instructions: 260 +// Total Cycles: 98 +// Total uOps: 452 +// uOps Per Cycle: 4.61 +// IPC: 2.65 +// Block RThroughput: 75.3 +// +// @param rdi receives 16 quadword result +// @param rsi is left hand side which must have 8 quadwords +// @param rdx is right hand side which must have 8 quadwords +// @note words are host endian while array is little endian +// @mayalias +Mul8x8Adx: + push %rbp + mov %rsp,%rbp + .profilable + sub $104,%rsp + mov %r15,-8(%rbp) + mov %r14,-16(%rbp) + mov %r13,-24(%rbp) + mov %r12,-32(%rbp) + mov %rbx,-40(%rbp) + mov %rdx,%r12 + mov (%rdx),%rdx + mulx (%rsi),%rax,%rcx + mov %rdi,-48(%rbp) + mov %rax,-56(%rbp) + mulx 8(%rsi),%rdx,%rax + add %rdx,%rcx + mov (%r12),%rdx + mulx 16(%rsi),%rdx,%rbx + adc %rdx,%rax + mov (%r12),%rdx + mulx 24(%rsi),%rdx,%r11 + adc %rdx,%rbx + mov (%r12),%rdx + mulx 32(%rsi),%rdx,%r10 + adc %rdx,%r11 + mov (%r12),%rdx + mulx 40(%rsi),%rdx,%r9 + adc %rdx,%r10 + mov (%r12),%rdx + mulx 48(%rsi),%rdx,%r8 + adc %rdx,%r9 + mov (%r12),%rdx + mulx 56(%rsi),%rdx,%rdi + adc %rdx,%r8 + adc $0,%rdi + xor %r13d,%r13d + mov 8(%r12),%rdx + mulx (%rsi),%r15,%r14 + adox %r15,%rcx + adcx %r14,%rax + mov %rcx,-64(%rbp) + mulx 8(%rsi),%r14,%rcx + adox %r14,%rax + adcx %rcx,%rbx + mulx 16(%rsi),%r14,%rcx + adox %r14,%rbx + adcx %rcx,%r11 + mulx 24(%rsi),%r14,%rcx + adox %r14,%r11 + adcx %rcx,%r10 + mulx 32(%rsi),%r14,%rcx + adox %r14,%r10 + adcx %rcx,%r9 + mulx 40(%rsi),%r14,%rcx + adox %r14,%r9 + adcx %rcx,%r8 + mulx 48(%rsi),%r14,%rcx + adox %r14,%r8 + adcx %rcx,%rdi + mulx 56(%rsi),%rdx,%rcx + adox %rdx,%rdi + adcx %r13,%rcx + mov 16(%r12),%rdx + adox %r13,%rcx + mulx (%rsi),%r15,%r14 + xor %r13d,%r13d + adox %r15,%rax + adcx %r14,%rbx + mov %rax,-72(%rbp) + mulx 8(%rsi),%r14,%rax + adox %r14,%rbx + adcx %rax,%r11 + mulx 16(%rsi),%r14,%rax + adox %r14,%r11 + adcx %rax,%r10 + mulx 24(%rsi),%r14,%rax + adox %r14,%r10 + adcx %rax,%r9 + mulx 32(%rsi),%r14,%rax + adox %r14,%r9 + adcx %rax,%r8 + mulx 40(%rsi),%r14,%rax + adox %r14,%r8 + adcx %rax,%rdi + mulx 48(%rsi),%r14,%rax + adox %r14,%rdi + adcx %rax,%rcx + mulx 56(%rsi),%rdx,%rax + adox %rdx,%rcx + adcx %r13,%rax + adox %r13,%rax + xor %r13d,%r13d + mov 24(%r12),%rdx + mulx (%rsi),%r15,%r14 + adox %r15,%rbx + adcx %r14,%r11 + mov %rbx,-80(%rbp) + mov %r11,%r15 + mulx 8(%rsi),%r14,%rbx + adox %r14,%r15 + adcx %rbx,%r10 + mulx 16(%rsi),%rbx,%r11 + adox %rbx,%r10 + adcx %r11,%r9 + mulx 24(%rsi),%rbx,%r11 + adox %rbx,%r9 + adcx %r11,%r8 + mulx 32(%rsi),%rbx,%r11 + adox %rbx,%r8 + adcx %r11,%rdi + mulx 40(%rsi),%rbx,%r11 + adox %rbx,%rdi + adcx %r11,%rcx + mulx 48(%rsi),%rbx,%r11 + adox %rbx,%rcx + adcx %r11,%rax + mulx 56(%rsi),%rdx,%r11 + adox %rdx,%rax + adcx %r13,%r11 + mov 32(%r12),%rdx + adox %r13,%r11 + xor %ebx,%ebx + mulx (%rsi),%r14,%r13 + adox %r14,%r15 + adcx %r13,%r10 + mov %r15,-88(%rbp) + mulx 8(%rsi),%r14,%r13 + mov %r10,%r15 + adcx %r13,%r9 + adox %r14,%r15 + mulx 16(%rsi),%r13,%r10 + adox %r13,%r9 + adcx %r10,%r8 + mulx 24(%rsi),%r13,%r10 + adcx %r10,%rdi + adox %r13,%r8 + mulx 32(%rsi),%r13,%r10 + adox %r13,%rdi + adcx %r10,%rcx + mulx 40(%rsi),%r13,%r10 + adox %r13,%rcx + adcx %r10,%rax + mulx 48(%rsi),%r13,%r10 + adox %r13,%rax + adcx %r10,%r11 + mulx 56(%rsi),%rdx,%r10 + adox %rdx,%r11 + adcx %rbx,%r10 + mov 40(%r12),%rdx + adox %rbx,%r10 + mulx (%rsi),%r14,%r13 + xor %ebx,%ebx + adox %r14,%r15 + mov %r15,-96(%rbp) + adcx %r13,%r9 + mulx 8(%rsi),%r14,%r13 + mov %r9,%r15 + adox %r14,%r15 + adcx %r13,%r8 + mulx 16(%rsi),%r13,%r9 + adox %r13,%r8 + adcx %r9,%rdi + mulx 24(%rsi),%r13,%r9 + adox %r13,%rdi + adcx %r9,%rcx + mulx 32(%rsi),%r13,%r9 + adox %r13,%rcx + adcx %r9,%rax + mulx 40(%rsi),%r13,%r9 + adox %r13,%rax + adcx %r9,%r11 + mulx 48(%rsi),%r13,%r9 + adox %r13,%r11 + adcx %r9,%r10 + mulx 56(%rsi),%rdx,%r9 + adox %rdx,%r10 + adcx %rbx,%r9 + adox %rbx,%r9 + xor %ebx,%ebx + mov 48(%r12),%rdx + mulx (%rsi),%r14,%r13 + adox %r14,%r15 + adcx %r13,%r8 + mov %r15,-104(%rbp) + mulx 8(%rsi),%r14,%r13 + mov %r8,%r15 + adcx %r13,%rdi + adox %r14,%r15 + mulx 16(%rsi),%r13,%r8 + adox %r13,%rdi + adcx %r8,%rcx + mulx 24(%rsi),%r13,%r8 + adox %r13,%rcx + adcx %r8,%rax + mulx 32(%rsi),%r13,%r8 + adox %r13,%rax + adcx %r8,%r11 + mulx 40(%rsi),%r13,%r8 + adox %r13,%r11 + adcx %r8,%r10 + mulx 48(%rsi),%r13,%r8 + adox %r13,%r10 + adcx %r8,%r9 + mulx 56(%rsi),%rdx,%r8 + adox %rdx,%r9 + mov 56(%r12),%rdx + adcx %rbx,%r8 + mulx (%rsi),%r13,%r12 + adox %rbx,%r8 + xor %ebx,%ebx + adox %r13,%r15 + adcx %r12,%rdi + mulx 8(%rsi),%r13,%r12 + adox %r13,%rdi + adcx %r12,%rcx + mulx 16(%rsi),%r13,%r12 + adox %r13,%rcx + adcx %r12,%rax + mulx 24(%rsi),%r13,%r12 + adox %r13,%rax + adcx %r12,%r11 + mulx 32(%rsi),%r13,%r12 + adox %r13,%r11 + adcx %r12,%r10 + mulx 40(%rsi),%r13,%r12 + adox %r13,%r10 + adcx %r12,%r9 + mulx 48(%rsi),%r13,%r12 + mulx 56(%rsi),%rsi,%rdx + adox %r13,%r9 + adcx %r12,%r8 + adox %rsi,%r8 + adcx %rbx,%rdx + mov -64(%rbp),%rsi + adox %rbx,%rdx + mov -48(%rbp),%rbx + mov -56(%rbp),%r14 + mov %rsi,8(%rbx) + mov -72(%rbp),%rsi + mov %r14,(%rbx) + mov %rsi,16(%rbx) + mov -80(%rbp),%rsi + mov %rsi,24(%rbx) + mov -88(%rbp),%rsi + mov %rsi,32(%rbx) + mov -96(%rbp),%rsi + mov %rsi,40(%rbx) + mov -104(%rbp),%rsi + mov %r15,56(%rbx) + mov %rsi,48(%rbx) + mov %rdi,64(%rbx) + mov %rcx,72(%rbx) + mov %rax,80(%rbx) + mov %r11,88(%rbx) + mov %r10,96(%rbx) + mov %r9,104(%rbx) + mov %r8,112(%rbx) + mov %rdx,120(%rbx) + mov -8(%rbp),%r15 + mov -16(%rbp),%r14 + mov -24(%rbp),%r13 + mov -32(%rbp),%r12 + mov -40(%rbp),%rbx + leave + ret + .endfn Mul8x8Adx,globl + + .end +TIMELINE VIEW 0123456789 0123456789 0123456789 0123456789 +Index 0123456789 0123456789 0123456789 0123456789 +[0,0] DeER . . . . . . . . . . . . . . . . subq $104, %rsp +[0,1] DeER . . . . . . . . . . . . . . . . movq %r15, -8(%rbp) +[0,2] D=eER. . . . . . . . . . . . . . . . movq %r14, -16(%rbp) +[0,3] D==eER . . . . . . . . . . . . . . . movq %r13, -24(%rbp) +[0,4] D===eER . . . . . . . . . . . . . . . movq %r12, -32(%rbp) +[0,5] D====eER . . . . . . . . . . . . . . . movq %rbx, -40(%rbp) +[0,6] .DeE---R . . . . . . . . . . . . . . . movq %rdx, %r12 +[0,7] .DeeeeeER . . . . . . . . . . . . . . . movq (%rdx), %rdx +[0,8] .D=====eeeeeeeeeER . . . . . . . . . . . . . mulxq (%rsi), %rax, %rcx +[0,9] . D====eE--------R . . . . . . . . . . . . . movq %rdi, -48(%rbp) +[0,10] . D=======eE-----R . . . . . . . . . . . . . movq %rax, -56(%rbp) +[0,11] . D=====eeeeeeeeeER . . . . . . . . . . . . . mulxq 8(%rsi), %rdx, %rax +[0,12] . D============eER . . . . . . . . . . . . . addq %rdx, %rcx +[0,13] . DeeeeeE--------R . . . . . . . . . . . . . movq (%r12), %rdx +[0,14] . D=====eeeeeeeeeER. . . . . . . . . . . . . mulxq 16(%rsi), %rdx, %rbx +[0,15] . D============eER. . . . . . . . . . . . . adcq %rdx, %rax +[0,16] . DeeeeeE--------R. . . . . . . . . . . . . movq (%r12), %rdx +[0,17] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 24(%rsi), %rdx, %r11 +[0,18] . D============eER . . . . . . . . . . . . adcq %rdx, %rbx +[0,19] . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx +[0,20] . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 32(%rsi), %rdx, %r10 +[0,21] . .D============eER . . . . . . . . . . . . adcq %rdx, %r11 +[0,22] . .DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx +[0,23] . .D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 40(%rsi), %rdx, %r9 +[0,24] . . D============eER . . . . . . . . . . . . adcq %rdx, %r10 +[0,25] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx +[0,26] . . D=====eeeeeeeeeER . . . . . . . . . . . . mulxq 48(%rsi), %rdx, %r8 +[0,27] . . D============eER . . . . . . . . . . . . adcq %rdx, %r9 +[0,28] . . DeeeeeE--------R . . . . . . . . . . . . movq (%r12), %rdx +[0,29] . . D=====eeeeeeeeeER. . . . . . . . . . . . mulxq 56(%rsi), %rdx, %rdi +[0,30] . . D============eER. . . . . . . . . . . . adcq %rdx, %r8 +[0,31] . . D=============eER . . . . . . . . . . . adcq $0, %rdi +[0,32] . . D---------------R . . . . . . . . . . . xorl %r13d, %r13d +[0,33] . . DeeeeeE---------R . . . . . . . . . . . movq 8(%r12), %rdx +[0,34] . . D====eeeeeeeeeER . . . . . . . . . . . mulxq (%rsi), %r15, %r14 +[0,35] . . D=======eE-----R . . . . . . . . . . . adoxq %r15, %rcx +[0,36] . . D=============eER . . . . . . . . . . . adcxq %r14, %rax +[0,37] . . .D=======eE-----R . . . . . . . . . . . movq %rcx, -64(%rbp) +[0,38] . . .D====eeeeeeeeeER . . . . . . . . . . . mulxq 8(%rsi), %r14, %rcx +[0,39] . . .D=============eER . . . . . . . . . . . adoxq %r14, %rax +[0,40] . . . D=============eER . . . . . . . . . . . adcxq %rcx, %rbx +[0,41] . . . D====eeeeeeeeeE-R . . . . . . . . . . . mulxq 16(%rsi), %r14, %rcx +[0,42] . . . D==============eER. . . . . . . . . . . adoxq %r14, %rbx +[0,43] . . . D==============eER . . . . . . . . . . adcxq %rcx, %r11 +[0,44] . . . D====eeeeeeeeeE--R . . . . . . . . . . mulxq 24(%rsi), %r14, %rcx +[0,45] . . . D===============eER . . . . . . . . . . adoxq %r14, %r11 +[0,46] . . . D===============eER . . . . . . . . . . adcxq %rcx, %r10 +[0,47] . . . D====eeeeeeeeeE---R . . . . . . . . . . mulxq 32(%rsi), %r14, %rcx +[0,48] . . . D================eER . . . . . . . . . . adoxq %r14, %r10 +[0,49] . . . D================eER. . . . . . . . . . adcxq %rcx, %r9 +[0,50] . . . D====eeeeeeeeeE----R. . . . . . . . . . mulxq 40(%rsi), %r14, %rcx +[0,51] . . . D=================eER . . . . . . . . . adoxq %r14, %r9 +[0,52] . . . .D=================eER . . . . . . . . . adcxq %rcx, %r8 +[0,53] . . . .D====eeeeeeeeeE-----R . . . . . . . . . mulxq 48(%rsi), %r14, %rcx +[0,54] . . . .D==================eER . . . . . . . . . adoxq %r14, %r8 +[0,55] . . . . D==================eER . . . . . . . . . adcxq %rcx, %rdi +[0,56] . . . . D====eeeeeeeeeE------R . . . . . . . . . mulxq 56(%rsi), %rdx, %rcx +[0,57] . . . . D===================eER. . . . . . . . . adoxq %rdx, %rdi +[0,58] . . . . D===================eER . . . . . . . . adcxq %r13, %rcx +[0,59] . . . . DeeeeeE---------------R . . . . . . . . movq 16(%r12), %rdx +[0,60] . . . . D====================eER . . . . . . . . adoxq %r13, %rcx +[0,61] . . . . D====eeeeeeeeeE-------R . . . . . . . . mulxq (%rsi), %r15, %r14 +[0,62] . . . . D---------------------R . . . . . . . . xorl %r13d, %r13d +[0,63] . . . . D=======eE------------R . . . . . . . . adoxq %r15, %rax +[0,64] . . . . D============eE------R . . . . . . . . adcxq %r14, %rbx +[0,65] . . . . D=======eE-----------R . . . . . . . . movq %rax, -72(%rbp) +[0,66] . . . . D====eeeeeeeeeE------R . . . . . . . . mulxq 8(%rsi), %r14, %rax +[0,67] . . . . .D============eE-----R . . . . . . . . adoxq %r14, %rbx +[0,68] . . . . .D=============eE----R . . . . . . . . adcxq %rax, %r11 +[0,69] . . . . .D====eeeeeeeeeE-----R . . . . . . . . mulxq 16(%rsi), %r14, %rax +[0,70] . . . . . D=============eE---R . . . . . . . . adoxq %r14, %r11 +[0,71] . . . . . D==============eE--R . . . . . . . . adcxq %rax, %r10 +[0,72] . . . . . D====eeeeeeeeeE----R . . . . . . . . mulxq 24(%rsi), %r14, %rax +[0,73] . . . . . D==============eE-R . . . . . . . . adoxq %r14, %r10 +[0,74] . . . . . D===============eER . . . . . . . . adcxq %rax, %r9 +[0,75] . . . . . D====eeeeeeeeeE---R . . . . . . . . mulxq 32(%rsi), %r14, %rax +[0,76] . . . . . D===============eER . . . . . . . . adoxq %r14, %r9 +[0,77] . . . . . D================eER . . . . . . . . adcxq %rax, %r8 +[0,78] . . . . . D====eeeeeeeeeE----R . . . . . . . . mulxq 40(%rsi), %r14, %rax +[0,79] . . . . . D================eER. . . . . . . . adoxq %r14, %r8 +[0,80] . . . . . D=================eER . . . . . . . adcxq %rax, %rdi +[0,81] . . . . . D====eeeeeeeeeE-----R . . . . . . . mulxq 48(%rsi), %r14, %rax +[0,82] . . . . . .D=================eER . . . . . . . adoxq %r14, %rdi +[0,83] . . . . . .D==================eER . . . . . . . adcxq %rax, %rcx +[0,84] . . . . . .D====eeeeeeeeeE------R . . . . . . . mulxq 56(%rsi), %rdx, %rax +[0,85] . . . . . . D==================eER . . . . . . . adoxq %rdx, %rcx +[0,86] . . . . . . D===================eER. . . . . . . adcxq %r13, %rax +[0,87] . . . . . . D====================eER . . . . . . adoxq %r13, %rax +[0,88] . . . . . . D----------------------R . . . . . . xorl %r13d, %r13d +[0,89] . . . . . . DeeeeeE----------------R . . . . . . movq 24(%r12), %rdx +[0,90] . . . . . . D====eeeeeeeeeE-------R . . . . . . mulxq (%rsi), %r15, %r14 +[0,91] . . . . . . D===========eE--------R . . . . . . adoxq %r15, %rbx +[0,92] . . . . . . D=============eE------R . . . . . . adcxq %r14, %r11 +[0,93] . . . . . . D===========eE-------R . . . . . . movq %rbx, -80(%rbp) +[0,94] . . . . . . D=============eE-----R . . . . . . movq %r11, %r15 +[0,95] . . . . . . D====eeeeeeeeeE------R . . . . . . mulxq 8(%rsi), %r14, %rbx +[0,96] . . . . . . D=============eE----R . . . . . . adoxq %r14, %r15 +[0,97] . . . . . . D==============eE---R . . . . . . adcxq %rbx, %r10 +[0,98] . . . . . . D====eeeeeeeeeE-----R . . . . . . mulxq 16(%rsi), %rbx, %r11 +[0,99] . . . . . . .D==============eE--R . . . . . . adoxq %rbx, %r10 +[0,100] . . . . . . .D===============eE-R . . . . . . adcxq %r11, %r9 +[0,101] . . . . . . .D====eeeeeeeeeE----R . . . . . . mulxq 24(%rsi), %rbx, %r11 +[0,102] . . . . . . . D===============eER . . . . . . adoxq %rbx, %r9 +[0,103] . . . . . . . D================eER . . . . . . adcxq %r11, %r8 +[0,104] . . . . . . . D====eeeeeeeeeE----R . . . . . . mulxq 32(%rsi), %rbx, %r11 +[0,105] . . . . . . . D================eER . . . . . . adoxq %rbx, %r8 +[0,106] . . . . . . . D=================eER . . . . . . adcxq %r11, %rdi +[0,107] . . . . . . . D====eeeeeeeeeE-----R . . . . . . mulxq 40(%rsi), %rbx, %r11 +[0,108] . . . . . . . D=================eER. . . . . . adoxq %rbx, %rdi +[0,109] . . . . . . . D==================eER . . . . . adcxq %r11, %rcx +[0,110] . . . . . . . D====eeeeeeeeeE------R . . . . . mulxq 48(%rsi), %rbx, %r11 +[0,111] . . . . . . . D==================eER . . . . . adoxq %rbx, %rcx +[0,112] . . . . . . . D===================eER . . . . . adcxq %r11, %rax +[0,113] . . . . . . . D====eeeeeeeeeE-------R . . . . . mulxq 56(%rsi), %rdx, %r11 +[0,114] . . . . . . . .D===================eER . . . . . adoxq %rdx, %rax +[0,115] . . . . . . . .D====================eER. . . . . adcxq %r13, %r11 +[0,116] . . . . . . . .DeeeeeE----------------R. . . . . movq 32(%r12), %rdx +[0,117] . . . . . . . .D=====================eER . . . . adoxq %r13, %r11 +[0,118] . . . . . . . .D=====E-----------------R . . . . xorl %ebx, %ebx +[0,119] . . . . . . . . D====eeeeeeeeeE--------R . . . . mulxq (%rsi), %r14, %r13 +[0,120] . . . . . . . . D===========eE---------R . . . . adoxq %r14, %r15 +[0,121] . . . . . . . . D=============eE-------R . . . . adcxq %r13, %r10 +[0,122] . . . . . . . . D===========eE--------R . . . . movq %r15, -88(%rbp) +[0,123] . . . . . . . . D====eeeeeeeeeE-------R . . . . mulxq 8(%rsi), %r14, %r13 +[0,124] . . . . . . . . D=============eE------R . . . . movq %r10, %r15 +[0,125] . . . . . . . . D============eE------R . . . . adcxq %r13, %r9 +[0,126] . . . . . . . . D=============eE-----R . . . . adoxq %r14, %r15 +[0,127] . . . . . . . . D====eeeeeeeeeE------R . . . . mulxq 16(%rsi), %r13, %r10 +[0,128] . . . . . . . . D=============eE----R . . . . adoxq %r13, %r9 +[0,129] . . . . . . . . D==============eE---R . . . . adcxq %r10, %r8 +[0,130] . . . . . . . . D====eeeeeeeeeE-----R . . . . mulxq 24(%rsi), %r13, %r10 +[0,131] . . . . . . . . .D==============eE--R . . . . adcxq %r10, %rdi +[0,132] . . . . . . . . .D===============eE-R . . . . adoxq %r13, %r8 +[0,133] . . . . . . . . .D====eeeeeeeeeE----R . . . . mulxq 32(%rsi), %r13, %r10 +[0,134] . . . . . . . . . D===============eER . . . . adoxq %r13, %rdi +[0,135] . . . . . . . . . D================eER . . . . adcxq %r10, %rcx +[0,136] . . . . . . . . . D====eeeeeeeeeE----R . . . . mulxq 40(%rsi), %r13, %r10 +[0,137] . . . . . . . . . D================eER . . . . adoxq %r13, %rcx +[0,138] . . . . . . . . . D=================eER . . . . adcxq %r10, %rax +[0,139] . . . . . . . . . D====eeeeeeeeeE-----R . . . . mulxq 48(%rsi), %r13, %r10 +[0,140] . . . . . . . . . D=================eER. . . . adoxq %r13, %rax +[0,141] . . . . . . . . . D==================eER . . . adcxq %r10, %r11 +[0,142] . . . . . . . . . D====eeeeeeeeeE------R . . . mulxq 56(%rsi), %rdx, %r10 +[0,143] . . . . . . . . . D==================eER . . . adoxq %rdx, %r11 +[0,144] . . . . . . . . . D===================eER . . . adcxq %rbx, %r10 +[0,145] . . . . . . . . . DeeeeeE---------------R . . . movq 40(%r12), %rdx +[0,146] . . . . . . . . . D====================eER . . . adoxq %rbx, %r10 +[0,147] . . . . . . . . . .D====eeeeeeeeeE-------R . . . mulxq (%rsi), %r14, %r13 +[0,148] . . . . . . . . . .D---------------------R . . . xorl %ebx, %ebx +[0,149] . . . . . . . . . .D============eE-------R . . . adoxq %r14, %r15 +[0,150] . . . . . . . . . . D============eE------R . . . movq %r15, -96(%rbp) +[0,151] . . . . . . . . . . D============eE------R . . . adcxq %r13, %r9 +[0,152] . . . . . . . . . . D=====eeeeeeeeeE-----R . . . mulxq 8(%rsi), %r14, %r13 +[0,153] . . . . . . . . . . D============eE-----R . . . movq %r9, %r15 +[0,154] . . . . . . . . . . D=============eE----R . . . adoxq %r14, %r15 +[0,155] . . . . . . . . . . D==============eE---R . . . adcxq %r13, %r8 +[0,156] . . . . . . . . . . D====eeeeeeeeeE----R . . . mulxq 16(%rsi), %r13, %r9 +[0,157] . . . . . . . . . . D==============eE--R . . . adoxq %r13, %r8 +[0,158] . . . . . . . . . . D===============eE-R . . . adcxq %r9, %rdi +[0,159] . . . . . . . . . . D====eeeeeeeeeE---R . . . mulxq 24(%rsi), %r13, %r9 +[0,160] . . . . . . . . . . D===============eER . . . adoxq %r13, %rdi +[0,161] . . . . . . . . . . D================eER. . . adcxq %r9, %rcx +[0,162] . . . . . . . . . . .D====eeeeeeeeeE---R. . . mulxq 32(%rsi), %r13, %r9 +[0,163] . . . . . . . . . . .D================eER . . adoxq %r13, %rcx +[0,164] . . . . . . . . . . .D=================eER . . adcxq %r9, %rax +[0,165] . . . . . . . . . . . D====eeeeeeeeeE----R . . mulxq 40(%rsi), %r13, %r9 +[0,166] . . . . . . . . . . . D=================eER . . adoxq %r13, %rax +[0,167] . . . . . . . . . . . D==================eER . . adcxq %r9, %r11 +[0,168] . . . . . . . . . . . D====eeeeeeeeeE-----R . . mulxq 48(%rsi), %r13, %r9 +[0,169] . . . . . . . . . . . D==================eER. . adoxq %r13, %r11 +[0,170] . . . . . . . . . . . D===================eER . adcxq %r9, %r10 +[0,171] . . . . . . . . . . . D====eeeeeeeeeE------R . mulxq 56(%rsi), %rdx, %r9 +[0,172] . . . . . . . . . . . D===================eER . adoxq %rdx, %r10 +[0,173] . . . . . . . . . . . D====================eER . adcxq %rbx, %r9 +[0,174] . . . . . . . . . . . D====================eER. adoxq %rbx, %r9 +[0,175] . . . . . . . . . . . D----------------------R. xorl %ebx, %ebx +[0,176] . . . . . . . . . . . DeeeeeE----------------R. movq 48(%r12), %rdx +[0,177] . . . . . . . . . . . .D=====eeeeeeeeeE------R. mulxq (%rsi), %r14, %r13 +[0,178] . . . . . . . . . . . .D==========eE---------R. adoxq %r14, %r15 +[0,179] . . . . . . . . . . . .D==============eE-----R. adcxq %r13, %r8 +[0,180] . . . . . . . . . . . . D==========eE--------R. movq %r15, -104(%rbp) +[0,181] . . . . . . . . . . . . D=====eeeeeeeeeE-----R. mulxq 8(%rsi), %r14, %r13 +[0,182] . . . . . . . . . . . . D==============eE----R. movq %r8, %r15 +[0,183] . . . . . . . . . . . . D==============eE---R. adcxq %r13, %rdi +[0,184] . . . . . . . . . . . . D===============eE--R. adoxq %r14, %r15 +[0,185] . . . . . . . . . . . . D=====eeeeeeeeeE----R. mulxq 16(%rsi), %r13, %r8 +[0,186] . . . . . . . . . . . . D===============eE-R. adoxq %r13, %rdi +[0,187] . . . . . . . . . . . . D================eER. adcxq %r8, %rcx +[0,188] . . . . . . . . . . . . D=====eeeeeeeeeE---R. mulxq 24(%rsi), %r13, %r8 +[0,189] . . . . . . . . . . . . D================eER adoxq %r13, %rcx diff --git a/libc/nexgen32e/sub.S b/libc/nexgen32e/sub.S deleted file mode 100644 index b065b90ff..000000000 --- a/libc/nexgen32e/sub.S +++ /dev/null @@ -1,41 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2021 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Computes C = A - B -// -// Aliasing such as sbb(A,A,B) or sbb(B,A,B) is OK. -// -// @param rdi is C -// @param rsi is A -// @param rdx is B -// @param rcx is number of subtracts -// @return al is carry -sbb: .leafprologue - test %ecx,%ecx - jz 1f - xor %r9d,%r9d -0: mov (%rsi,%r9,8),%rax - sbb (%rdx,%r9,8),%rax - mov %rax,(%rdi,%r9,8) - inc %r9d - loop 0b -1: setb %al - .leafepilogue - .endfn sbb,globl diff --git a/test/net/https/mbedtls_test.c b/test/net/https/mbedtls_test.c index 3cb664029..b8a52b6af 100644 --- a/test/net/https/mbedtls_test.c +++ b/test/net/https/mbedtls_test.c @@ -39,6 +39,7 @@ #include "third_party/mbedtls/des.h" #include "third_party/mbedtls/dhm.h" #include "third_party/mbedtls/ecp.h" +#include "third_party/mbedtls/ecp_internal.h" #include "third_party/mbedtls/entropy.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/gcm.h" @@ -148,17 +149,17 @@ static void P256_MPI(mbedtls_mpi *N) { static void P256_JUSTINE(mbedtls_mpi *N) { memcpy(N->p, rng, 8 * 8); - ecp_mod_p256(N); + secp256r1(N->p); } static void P384_MPI(mbedtls_mpi *N) { - memcpy(N->p, rng, 8 * 8); + memcpy(N->p, rng, 12 * 8); ASSERT_EQ(0, mbedtls_mpi_mod_mpi(N, N, &grp.P)); } static void P384_JUSTINE(mbedtls_mpi *N) { - memcpy(N->p, rng, 8 * 8); - ecp_mod_p384(N); + memcpy(N->p, rng, 12 * 8); + secp384r1(N->p); } BENCH(p256, bench) { @@ -166,6 +167,7 @@ BENCH(p256, bench) { mbedtls_ecp_group_init(&grp); mbedtls_ecp_group_load(&grp, MBEDTLS_ECP_DP_SECP256R1); mbedtls_mpi x = {1, 8, gc(calloc(8, 8))}; + rngset(x.p, 8 * 8, rand64, -1); EZBENCH2("P-256 modulus MbedTLS MPI lib", donothing, P256_MPI(&x)); EZBENCH2("P-256 modulus Justine rewrite", donothing, P256_JUSTINE(&x)); mbedtls_ecp_group_free(&grp); @@ -176,10 +178,10 @@ BENCH(p384, bench) { #ifdef MBEDTLS_ECP_C mbedtls_ecp_group_init(&grp); mbedtls_ecp_group_load(&grp, MBEDTLS_ECP_DP_SECP384R1); + uint64_t y[12]; mbedtls_mpi x = {1, 12, gc(calloc(12, 8))}; EZBENCH2("P-384 modulus MbedTLS MPI lib", donothing, P384_MPI(&x)); EZBENCH2("P-384 modulus Justine rewrite", donothing, P384_JUSTINE(&x)); - rngset(x.p, 12 * 8, rand64, -1); mbedtls_ecp_group_free(&grp); #endif } @@ -1112,3 +1114,49 @@ BENCH(cmpint, bench) { EZBENCH2("cmpint 3.1", donothing, mbedtls_mpi_cmp_int(&z, 0)); EZBENCH2("cmpint 3.2", donothing, mbedtls_mpi_cmp_int(&z, 1)); } + +mbedtls_mpi_uint F1(mbedtls_mpi_uint *d, const mbedtls_mpi_uint *a, + const mbedtls_mpi_uint *b, size_t n) { + size_t i; + unsigned char cf; + mbedtls_mpi_uint c, x; + cf = c = i = 0; + for (; i < n; ++i) SBB(d[i], a[i], b[i], c, c); + return c; +} + +mbedtls_mpi_uint F2(mbedtls_mpi_uint *d, const mbedtls_mpi_uint *a, + const mbedtls_mpi_uint *b, size_t n) { + size_t i; + unsigned char cf; + mbedtls_mpi_uint c, x; + cf = c = i = 0; + asm volatile("xor\t%1,%1\n\t" + ".align\t16\n1:\t" + "mov\t(%5,%3,8),%1\n\t" + "sbb\t(%6,%3,8),%1\n\t" + "mov\t%1,(%4,%3,8)\n\t" + "lea\t1(%3),%3\n\t" + "dec\t%2\n\t" + "jnz\t1b" + : "=@ccb"(cf), "=&r"(x), "+c"(n), "=r"(i) + : "r"(d), "r"(a), "r"(b), "3"(0) + : "cc", "memory"); + return cf; +} + +TEST(wut, wut) { + uint64_t A[8]; + uint64_t B[8]; + uint64_t C[8]; + uint64_t D[8]; + int i; + for (i = 0; i < 1000; ++i) { + rngset(A, sizeof(A), rand64, -1); + rngset(B, sizeof(B), rand64, -1); + int x = F1(C, A, B, 8); + int y = F2(D, A, B, 8); + ASSERT_EQ(x, y); + ASSERT_EQ(0, memcmp(C, D, sizeof(C))); + } +} diff --git a/third_party/mbedtls/bignum.c b/third_party/mbedtls/bignum.c index 6dc7879fa..b8b1df68a 100644 --- a/third_party/mbedtls/bignum.c +++ b/third_party/mbedtls/bignum.c @@ -26,7 +26,6 @@ #include "libc/nexgen32e/nexgen32e.h" #include "libc/nexgen32e/x86feature.h" #include "libc/runtime/runtime.h" -#include "libc/stdio/stdio.h" #include "third_party/mbedtls/bignum.h" #include "third_party/mbedtls/bignum_internal.h" #include "third_party/mbedtls/chk.h" @@ -65,20 +64,10 @@ asm(".include \"libc/disclaimer.inc\""); #if defined(MBEDTLS_BIGNUM_C) -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define mpi_uint_bigendian_to_host(x) (x) -#elif __SIZEOF_LONG__ == 8 -#define mpi_uint_bigendian_to_host(x) __builtin_bswap64(x) -#elif __SIZEOF_LONG__ == 4 -#define mpi_uint_bigendian_to_host(x) __builtin_bswap32(x) -#endif - -/* Get a specific byte, without range checks. */ -#define GET_BYTE(X, i) (((X)->p[(i) / ciL] >> (((i) % ciL) * 8)) & 0xff) - -static inline void mbedtls_mpi_zeroize(mbedtls_mpi_uint *v, size_t n) +/* Implementation that should never be optimized out by the compiler */ +static void mbedtls_mpi_zeroize( mbedtls_mpi_uint *v, size_t n ) { - mbedtls_platform_zeroize(v, ciL * n); + mbedtls_platform_zeroize( v, ciL * n ); } /** @@ -88,15 +77,18 @@ static inline void mbedtls_mpi_zeroize(mbedtls_mpi_uint *v, size_t n) * in which case this function is a no-op. If it is * not \c NULL, it must point to an initialized MPI. */ -void mbedtls_mpi_free(mbedtls_mpi *X) +void mbedtls_mpi_free( mbedtls_mpi *X ) { - if (!X) return; - if (X->p) + if( !X ) + return; + if( X->p ) { - mbedtls_mpi_zeroize(X->p, X->n); - mbedtls_free(X->p); + mbedtls_mpi_zeroize( X->p, X->n ); + mbedtls_free( X->p ); } - mbedtls_mpi_init(X); + X->s = 1; + X->n = 0; + X->p = NULL; } /** @@ -216,28 +208,35 @@ int mbedtls_mpi_shrink(mbedtls_mpi *X, size_t nblimbs) * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed. * \return Another negative error code on other kinds of failure. */ -int mbedtls_mpi_copy(mbedtls_mpi *X, const mbedtls_mpi *Y) +int mbedtls_mpi_copy( mbedtls_mpi *X, const mbedtls_mpi *Y ) { int ret = 0; size_t i; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(Y); - if (X == Y) - return 0; - if (!Y->n) + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( Y ); + if( X == Y ) + return( 0 ); + if( Y->n == 0 ) { - mbedtls_mpi_free(X); - return 0; + mbedtls_mpi_free( X ); + return( 0 ); } - i = MAX(1, mbedtls_mpi_limbs(Y)); + for( i = Y->n - 1; i > 0; i-- ) + if( Y->p[i] != 0 ) + break; + i++; X->s = Y->s; - if (X->n < i) - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, i)); + if( X->n < i ) + { + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i ) ); + } else - mbedtls_mpi_zeroize(X->p + i, X->n - i); - memcpy(X->p, Y->p, i * ciL); + { + mbedtls_platform_zeroize( X->p + i, ( X->n - i ) * ciL ); + } + memcpy( X->p, Y->p, i * ciL ); cleanup: - return ret; + return( ret ); } /** @@ -246,14 +245,14 @@ cleanup: * \param X The first MPI. It must be initialized. * \param Y The second MPI. It must be initialized. */ -void mbedtls_mpi_swap(mbedtls_mpi *X, mbedtls_mpi *Y) +void mbedtls_mpi_swap( mbedtls_mpi *X, mbedtls_mpi *Y ) { mbedtls_mpi T; - MPI_VALIDATE(X); - MPI_VALIDATE(Y); - memcpy(&T, X, sizeof(mbedtls_mpi)); - memcpy(X, Y, sizeof(mbedtls_mpi)); - memcpy(Y, &T, sizeof(mbedtls_mpi)); + MPI_VALIDATE( X ); + MPI_VALIDATE( Y ); + memcpy( &T, X, sizeof( mbedtls_mpi ) ); + memcpy( X, Y, sizeof( mbedtls_mpi ) ); + memcpy( Y, &T, sizeof( mbedtls_mpi ) ); } /** @@ -289,7 +288,8 @@ int mbedtls_mpi_safe_cond_assign(mbedtls_mpi *X, MPI_VALIDATE_RET(X); MPI_VALIDATE_RET(Y); /* make sure assign is 0 or 1 in a time-constant manner */ - if (Y->n > X->n) MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, Y->n)); + if (Y->n > X->n) + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, Y->n ) ); assign = (assign | (unsigned char)-assign) >> 7; X->s = Select(Y->s, X->s, -assign); for (i = 0; i < Y->n; i++) @@ -297,7 +297,7 @@ int mbedtls_mpi_safe_cond_assign(mbedtls_mpi *X, for (i = Y->n; i < X->n; i++) X->p[i] &= CONCEAL("r", assign - 1); cleanup: - return ret; + return( ret ); } /** @@ -323,31 +323,30 @@ cleanup: * \return Another negative error code on other kinds of failure. * */ -int mbedtls_mpi_safe_cond_swap(mbedtls_mpi *X, - mbedtls_mpi *Y, - unsigned char swap) +int mbedtls_mpi_safe_cond_swap( mbedtls_mpi *X, mbedtls_mpi *Y, unsigned char swap ) { int ret, s; size_t i; mbedtls_mpi_uint tmp; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(Y); - if (X == Y) return (0); + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( Y ); + if( X == Y ) + return( 0 ); /* make sure swap is 0 or 1 in a time-constant manner */ swap = (swap | (unsigned char)-swap) >> 7; - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, Y->n)); - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(Y, X->n)); + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, Y->n ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( Y, X->n ) ); s = X->s; - X->s = X->s * (1 - swap) + Y->s * swap; - Y->s = Y->s * (1 - swap) + s * swap; - for (i = 0; i < X->n; i++) + X->s = X->s * ( 1 - swap ) + Y->s * swap; + Y->s = Y->s * ( 1 - swap ) + s * swap; + for( i = 0; i < X->n; i++ ) { tmp = X->p[i]; - X->p[i] = X->p[i] * (1 - swap) + Y->p[i] * swap; - Y->p[i] = Y->p[i] * (1 - swap) + tmp * swap; + X->p[i] = X->p[i] * ( 1 - swap ) + Y->p[i] * swap; + Y->p[i] = Y->p[i] * ( 1 - swap ) + tmp * swap; } cleanup: - return ret; + return( ret ); } /** @@ -360,16 +359,16 @@ cleanup: * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed. * \return Another negative error code on other kinds of failure. */ -int mbedtls_mpi_lset(mbedtls_mpi *X, mbedtls_mpi_sint z) +int mbedtls_mpi_lset( mbedtls_mpi *X, mbedtls_mpi_sint z ) { int ret = MBEDTLS_ERR_THIS_CORRUPTION; - MPI_VALIDATE_RET(X); - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, 1)); - mbedtls_mpi_zeroize(X->p, X->n); - X->p[0] = (z < 0) ? -z : z; - X->s = (z < 0) ? -1 : 1; + MPI_VALIDATE_RET( X ); + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, 1 ) ); + mbedtls_platform_zeroize( X->p, X->n * ciL ); + X->p[0] = ( z < 0 ) ? -z : z; + X->s = ( z < 0 ) ? -1 : 1; cleanup: - return ret; + return( ret ); } /** @@ -382,13 +381,18 @@ cleanup: * of \c X is unset or set. * \return A negative error code on failure. */ -int mbedtls_mpi_get_bit(const mbedtls_mpi *X, size_t pos) +int mbedtls_mpi_get_bit( const mbedtls_mpi *X, size_t pos ) { - MPI_VALIDATE_RET(X); - if (X->n * biL <= pos) return 0; - return ((X->p[pos / biL] >> (pos % biL)) & 0x01); + MPI_VALIDATE_RET( X ); + if( X->n * biL <= pos ) + return( 0 ); + return( ( X->p[pos / biL] >> ( pos % biL ) ) & 0x01 ); } +/* Get a specific byte, without range checks. */ +#define GET_BYTE( X, i ) \ + ( ( ( X )->p[( i ) / ciL] >> ( ( ( i ) % ciL ) * 8 ) ) & 0xff ) + /** * \brief Modify a specific bit in an MPI. * @@ -404,23 +408,24 @@ int mbedtls_mpi_get_bit(const mbedtls_mpi *X, size_t pos) * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed. * \return Another negative error code on other kinds of failure. */ -int mbedtls_mpi_set_bit(mbedtls_mpi *X, size_t pos, unsigned char val) +int mbedtls_mpi_set_bit( mbedtls_mpi *X, size_t pos, unsigned char val ) { int ret = 0; size_t off = pos / biL; size_t idx = pos % biL; - MPI_VALIDATE_RET(X); - if (val && val != 1) - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - if (X->n * biL <= pos) + MPI_VALIDATE_RET( X ); + if( val != 0 && val != 1 ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + if( X->n * biL <= pos ) { - if (!val) return 0; - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, off + 1)); + if( !val ) + return( 0 ); + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, off + 1 ) ); } - X->p[off] &= ~((mbedtls_mpi_uint)0x01 << idx); - X->p[off] |= (mbedtls_mpi_uint)val << idx; + X->p[off] &= ~( (mbedtls_mpi_uint) 0x01 << idx ); + X->p[off] |= (mbedtls_mpi_uint) val << idx; cleanup: - return ret; + return( ret ); } /** @@ -435,13 +440,13 @@ cleanup: * \return The number of bits of value \c 0 before the least significant * bit of value \c 1 in \p X. */ -size_t mbedtls_mpi_lsb(const mbedtls_mpi *X) +size_t mbedtls_mpi_lsb( const mbedtls_mpi *X ) { size_t i, j, count = 0; MBEDTLS_INTERNAL_VALIDATE_RET(X, 0); - for (i = 0; i < X->n; i++) + for( i = 0; i < X->n; i++ ) { - if (X->p[i]) + if ( X->p[i] ) return count + __builtin_ctzll(X->p[i]); else count += biL; @@ -452,7 +457,7 @@ size_t mbedtls_mpi_lsb(const mbedtls_mpi *X) /* * Count leading zero bits in a given integer */ -static inline size_t mbedtls_clz(const mbedtls_mpi_uint x) +static inline size_t mbedtls_clz( const mbedtls_mpi_uint x ) { return x ? __builtin_clzll(x) : biL; } @@ -490,23 +495,23 @@ size_t mbedtls_mpi_bitlen(const mbedtls_mpi *X) * \return The least number of bytes capable of storing * the absolute value of \p X. */ -size_t mbedtls_mpi_size(const mbedtls_mpi *X) +size_t mbedtls_mpi_size( const mbedtls_mpi *X ) { - return (mbedtls_mpi_bitlen(X) + 7) >> 3; + return( ( mbedtls_mpi_bitlen( X ) + 7 ) >> 3 ); } /* * Convert an ASCII character to digit value */ -static int mpi_get_digit(mbedtls_mpi_uint *d, int radix, char c) +static int mpi_get_digit( mbedtls_mpi_uint *d, int radix, char c ) { *d = 255; - if (c >= 0x30 && c <= 0x39) *d = c - 0x30; - if (c >= 0x41 && c <= 0x46) *d = c - 0x37; - if (c >= 0x61 && c <= 0x66) *d = c - 0x57; - if (*d >= (mbedtls_mpi_uint)radix) - return MBEDTLS_ERR_MPI_INVALID_CHARACTER; - return 0; + if( c >= 0x30 && c <= 0x39 ) *d = c - 0x30; + if( c >= 0x41 && c <= 0x46 ) *d = c - 0x37; + if( c >= 0x61 && c <= 0x66 ) *d = c - 0x57; + if( *d >= (mbedtls_mpi_uint) radix ) + return( MBEDTLS_ERR_MPI_INVALID_CHARACTER ); + return( 0 ); } /** @@ -519,87 +524,94 @@ static int mpi_get_digit(mbedtls_mpi_uint *d, int radix, char c) * \return \c 0 if successful. * \return A negative error code on failure. */ -int mbedtls_mpi_read_string(mbedtls_mpi *X, int radix, const char *s) +int mbedtls_mpi_read_string( mbedtls_mpi *X, int radix, const char *s ) { int ret = MBEDTLS_ERR_THIS_CORRUPTION; size_t i, j, slen, n; mbedtls_mpi_uint d; mbedtls_mpi T; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(s); - if (radix < 2 || radix > 16) - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - mbedtls_mpi_init(&T); - slen = strlen(s); - if (radix == 16) + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( s ); + if( radix < 2 || radix > 16 ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + mbedtls_mpi_init( &T ); + slen = strlen( s ); + if( radix == 16 ) { - if (slen > MPI_SIZE_T_MAX >> 2) - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - n = BITS_TO_LIMBS(slen << 2); - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, n)); - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 0)); - for (i = slen, j = 0; i > 0; i--, j++) + if( slen > MPI_SIZE_T_MAX >> 2 ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + n = BITS_TO_LIMBS( slen << 2 ); + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, n ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) ); + for( i = slen, j = 0; i > 0; i--, j++ ) { - if (i == 1 && s[i - 1] == '-') + if( i == 1 && s[i - 1] == '-' ) { X->s = -1; break; } - MBEDTLS_MPI_CHK(mpi_get_digit(&d, radix, s[i - 1])); - X->p[j / (2 * ciL)] |= d << ((j % (2 * ciL)) << 2); + MBEDTLS_MPI_CHK( mpi_get_digit( &d, radix, s[i - 1] ) ); + X->p[j / ( 2 * ciL )] |= d << ( ( j % ( 2 * ciL ) ) << 2 ); } } else { - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 0)); - for (i = 0; i < slen; i++) + MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) ); + for( i = 0; i < slen; i++ ) { - if (!i && s[i] == '-') + if( i == 0 && s[i] == '-' ) { X->s = -1; continue; } - MBEDTLS_MPI_CHK(mpi_get_digit(&d, radix, s[i])); - MBEDTLS_MPI_CHK(mbedtls_mpi_mul_int(&T, X, radix)); - if (X->s == 1) - MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(X, &T, d)); + MBEDTLS_MPI_CHK( mpi_get_digit( &d, radix, s[i] ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &T, X, radix ) ); + if( X->s == 1 ) + { + MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, &T, d ) ); + } else - MBEDTLS_MPI_CHK(mbedtls_mpi_sub_int(X, &T, d)); + { + MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int( X, &T, d ) ); + } } } cleanup: - mbedtls_mpi_free(&T); - return ret; + mbedtls_mpi_free( &T ); + return( ret ); } /* * Helper to write the digits high-order first. */ -static int mpi_write_hlp(mbedtls_mpi *X, int radix, char **p, - const size_t buflen) +static int mpi_write_hlp( mbedtls_mpi *X, int radix, + char **p, const size_t buflen ) { int ret = MBEDTLS_ERR_THIS_CORRUPTION; mbedtls_mpi_uint r; size_t length = 0; char *p_end = *p + buflen; - do { - if (length >= buflen) - return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_int(&r, X, radix)); - MBEDTLS_MPI_CHK(mbedtls_mpi_div_int(X, NULL, X, radix)); + do + { + if( length >= buflen ) + { + return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL ); + } + MBEDTLS_MPI_CHK( mbedtls_mpi_mod_int( &r, X, radix ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_div_int( X, NULL, X, radix ) ); /* * Write the residue in the current position, as an ASCII character. */ - if (r < 0xA) - *(--p_end) = (char)('0' + r); + if( r < 0xA ) + *(--p_end) = (char)( '0' + r ); else - *(--p_end) = (char)('A' + (r - 0xA)); + *(--p_end) = (char)( 'A' + ( r - 0xA ) ); length++; - } while (!mbedtls_mpi_is_zero(X)); - memmove(*p, p_end, length); + } while( mbedtls_mpi_cmp_int( X, 0 ) != 0 ); + memmove( *p, p_end, length ); *p += length; cleanup: - return ret; + return( ret ); } /** @@ -624,74 +636,75 @@ cleanup: * size of \p buf required for a successful call. * \return Another negative error code on different kinds of failure. */ -int mbedtls_mpi_write_string(const mbedtls_mpi *X, int radix, char *buf, - size_t buflen, size_t *olen) +int mbedtls_mpi_write_string( const mbedtls_mpi *X, int radix, + char *buf, size_t buflen, size_t *olen ) { int ret = 0; size_t n; char *p; mbedtls_mpi T; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(olen); - MPI_VALIDATE_RET(!buflen || buf); - if (radix < 2 || radix > 16) - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - n = mbedtls_mpi_bitlen(X); /* Number of bits necessary to present `n`. */ - if (radix >= 4) - n >>= 1; /* Number of 4-adic digits necessary to present - * `n`. If radix > 4, this might be a strict - * overapproximation of the number of - * radix-adic digits needed to present `n`. */ - if (radix >= 16) - n >>= 1; /* Number of hexadecimal digits necessary to - * present `n`. */ - n += 1; /* Terminating null byte */ - n += 1; /* Compensate for the divisions above, which round down `n` - * in case it's not even. */ - n += 1; /* Potential '-'-sign. */ - n += (n & 1); /* Make n even to have enough space for hexadecimal writing, - * which always uses an even number of hex-digits. */ - if (buflen < n) + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( olen ); + MPI_VALIDATE_RET( buflen == 0 || buf ); + if( radix < 2 || radix > 16 ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + n = mbedtls_mpi_bitlen( X ); /* Number of bits necessary to present `n`. */ + if( radix >= 4 ) n >>= 1; /* Number of 4-adic digits necessary to present + * `n`. If radix > 4, this might be a strict + * overapproximation of the number of + * radix-adic digits needed to present `n`. */ + if( radix >= 16 ) n >>= 1; /* Number of hexadecimal digits necessary to + * present `n`. */ + n += 1; /* Terminating null byte */ + n += 1; /* Compensate for the divisions above, which round down `n` + * in case it's not even. */ + n += 1; /* Potential '-'-sign. */ + n += ( n & 1 ); /* Make n even to have enough space for hexadecimal writing, + * which always uses an even number of hex-digits. */ + if( buflen < n ) { *olen = n; - return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; + return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL ); } p = buf; - mbedtls_mpi_init(&T); - if (X->s == -1) + mbedtls_mpi_init( &T ); + if( X->s == -1 ) { *p++ = '-'; buflen--; } - if (radix == 16) + if( radix == 16 ) { int c; size_t i, j, k; - for (i = X->n, k = 0; i > 0; i--) + for( i = X->n, k = 0; i > 0; i-- ) { - for (j = ciL; j > 0; j--) + for( j = ciL; j > 0; j-- ) { - c = (X->p[i - 1] >> ((j - 1) << 3)) & 0xFF; - if (!c && !k && (i + j) != 2) continue; - *(p++) = "0123456789ABCDEF"[c / 16]; - *(p++) = "0123456789ABCDEF"[c % 16]; + c = ( X->p[i - 1] >> ( ( j - 1 ) << 3) ) & 0xFF; + if( c == 0 && k == 0 && ( i + j ) != 2 ) + continue; + *(p++) = "0123456789ABCDEF" [c / 16]; + *(p++) = "0123456789ABCDEF" [c % 16]; k = 1; } } } else { - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&T, X)); - if (T.s == -1) T.s = 1; - MBEDTLS_MPI_CHK(mpi_write_hlp(&T, radix, &p, buflen)); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &T, X ) ); + if( T.s == -1 ) + T.s = 1; + MBEDTLS_MPI_CHK( mpi_write_hlp( &T, radix, &p, buflen ) ); } *p++ = '\0'; *olen = p - buf; cleanup: - mbedtls_mpi_free(&T); - return ret; + mbedtls_mpi_free( &T ); + return( ret ); } +#if defined(MBEDTLS_FS_IO) /** * \brief Read an MPI from a line in an opened file. * @@ -713,7 +726,7 @@ cleanup: * is too small. * \return Another negative error code on failure. */ -int mbedtls_mpi_read_file(mbedtls_mpi *X, int radix, FILE *fin) +int mbedtls_mpi_read_file( mbedtls_mpi *X, int radix, FILE *fin ) { mbedtls_mpi_uint d; size_t slen; @@ -722,32 +735,24 @@ int mbedtls_mpi_read_file(mbedtls_mpi *X, int radix, FILE *fin) * Buffer should have space for (short) label and decimal formatted MPI, * newline characters and '\0' */ - char s[MBEDTLS_MPI_RW_BUFFER_SIZE]; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(fin); - if (radix < 2 || radix > 16) - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - mbedtls_platform_zeroize(s, sizeof(s)); - if (!fgets(s, sizeof(s) - 1, fin)) - return MBEDTLS_ERR_MPI_FILE_IO_ERROR; - slen = strlen(s); - if (slen == sizeof(s) - 2) - return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; - if (slen > 0 && s[slen - 1] == '\n') - { - slen--; - s[slen] = '\0'; - } - if (slen > 0 && s[slen - 1] == '\r') - { - slen--; - s[slen] = '\0'; - } + char s[ MBEDTLS_MPI_RW_BUFFER_SIZE ]; + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( fin ); + if( radix < 2 || radix > 16 ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + mbedtls_platform_zeroize( s, sizeof( s ) ); + if( fgets( s, sizeof( s ) - 1, fin ) == NULL ) + return( MBEDTLS_ERR_MPI_FILE_IO_ERROR ); + slen = strlen( s ); + if( slen == sizeof( s ) - 2 ) + return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL ); + if( slen > 0 && s[slen - 1] == '\n' ) { slen--; s[slen] = '\0'; } + if( slen > 0 && s[slen - 1] == '\r' ) { slen--; s[slen] = '\0'; } p = s + slen; - while (p-- > s) - if (mpi_get_digit(&d, radix, *p)) + while( p-- > s ) + if( mpi_get_digit( &d, radix, *p ) != 0 ) break; - return mbedtls_mpi_read_string(X, radix, p + 1); + return( mbedtls_mpi_read_string( X, radix, p + 1 ) ); } /** @@ -765,8 +770,7 @@ int mbedtls_mpi_read_file(mbedtls_mpi *X, int radix, FILE *fin) * \return \c 0 if successful. * \return A negative error code on failure. */ -int mbedtls_mpi_write_file(const char *p, const mbedtls_mpi *X, int radix, - FILE *fout) +int mbedtls_mpi_write_file( const char *p, const mbedtls_mpi *X, int radix, FILE *fout ) { int ret = MBEDTLS_ERR_THIS_CORRUPTION; size_t n, slen, plen; @@ -774,35 +778,43 @@ int mbedtls_mpi_write_file(const char *p, const mbedtls_mpi *X, int radix, * Buffer should have space for (short) label and decimal formatted MPI, * newline characters and '\0' */ - char s[MBEDTLS_MPI_RW_BUFFER_SIZE]; - MPI_VALIDATE_RET(X); - if (radix < 2 || radix > 16) - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - mbedtls_platform_zeroize(s, sizeof(s)); - MBEDTLS_MPI_CHK(mbedtls_mpi_write_string(X, radix, s, sizeof(s) - 2, &n)); - if (!p) p = ""; - plen = strlen(p); - slen = strlen(s); + char s[ MBEDTLS_MPI_RW_BUFFER_SIZE ]; + MPI_VALIDATE_RET( X ); + if( radix < 2 || radix > 16 ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + mbedtls_platform_zeroize( s, sizeof( s ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_write_string( X, radix, s, sizeof( s ) - 2, &n ) ); + if( p == NULL ) p = ""; + plen = strlen( p ); + slen = strlen( s ); s[slen++] = '\r'; s[slen++] = '\n'; - if (fout) + if( fout ) { - if (fwrite(p, 1, plen, fout) != plen || fwrite(s, 1, slen, fout) != slen) - return MBEDTLS_ERR_MPI_FILE_IO_ERROR; + if( fwrite( p, 1, plen, fout ) != plen || + fwrite( s, 1, slen, fout ) != slen ) + return( MBEDTLS_ERR_MPI_FILE_IO_ERROR ); } else - { - mbedtls_printf("%s%s", p, s); - } + mbedtls_printf( "%s%s", p, s ); cleanup: - return ret; + return( ret ); } +#endif /* MBEDTLS_FS_IO */ -static void mpi_bigendian_to_host(mbedtls_mpi_uint *const p, size_t limbs) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define mpi_uint_bigendian_to_host(x) (x) +#elif __SIZEOF_LONG__ == 8 +#define mpi_uint_bigendian_to_host(x) __builtin_bswap64(x) +#elif __SIZEOF_LONG__ == 4 +#define mpi_uint_bigendian_to_host(x) __builtin_bswap32(x) +#endif + +static void mpi_bigendian_to_host( mbedtls_mpi_uint * const p, size_t limbs ) { mbedtls_mpi_uint *cur_limb_left; mbedtls_mpi_uint *cur_limb_right; - if (!limbs) + if( !limbs ) return; /* * Traverse limbs and @@ -813,14 +825,15 @@ static void mpi_bigendian_to_host(mbedtls_mpi_uint *const p, size_t limbs) * than the right index (it's not a problem if limbs is odd and the * indices coincide in the last iteration). */ - for (cur_limb_left = p, cur_limb_right = p + (limbs - 1); - cur_limb_left <= cur_limb_right; cur_limb_left++, cur_limb_right--) + for( cur_limb_left = p, cur_limb_right = p + ( limbs - 1 ); + cur_limb_left <= cur_limb_right; + cur_limb_left++, cur_limb_right-- ) { mbedtls_mpi_uint tmp; /* Note that if cur_limb_left == cur_limb_right, * this code effectively swaps the bytes only once. */ - tmp = mpi_uint_bigendian_to_host(*cur_limb_left); - *cur_limb_left = mpi_uint_bigendian_to_host(*cur_limb_right); + tmp = mpi_uint_bigendian_to_host( *cur_limb_left ); + *cur_limb_left = mpi_uint_bigendian_to_host( *cur_limb_right ); *cur_limb_right = tmp; } } @@ -923,13 +936,13 @@ int mbedtls_mpi_read_binary(mbedtls_mpi *X, const unsigned char *p, size_t n) * large enough to hold the value of \p X. * \return Another negative error code on different kinds of failure. */ -int mbedtls_mpi_write_binary_le(const mbedtls_mpi *X, unsigned char *buf, - size_t buflen) +int mbedtls_mpi_write_binary_le( const mbedtls_mpi *X, + unsigned char *buf, size_t buflen ) { size_t stored_bytes = X->n * ciL; size_t bytes_to_copy; size_t i; - if (stored_bytes < buflen) + if( stored_bytes < buflen ) { bytes_to_copy = stored_bytes; } @@ -938,19 +951,20 @@ int mbedtls_mpi_write_binary_le(const mbedtls_mpi *X, unsigned char *buf, bytes_to_copy = buflen; /* The output buffer is smaller than the allocated size of X. * However X may fit if its leading bytes are zero. */ - for (i = bytes_to_copy; i < stored_bytes; i++) + for( i = bytes_to_copy; i < stored_bytes; i++ ) { - if (GET_BYTE(X, i)) - return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; + if( GET_BYTE( X, i ) != 0 ) + return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL ); } } - for (i = 0; i < bytes_to_copy; i++) buf[i] = GET_BYTE(X, i); - if (stored_bytes < buflen) + for( i = 0; i < bytes_to_copy; i++ ) + buf[i] = GET_BYTE( X, i ); + if( stored_bytes < buflen ) { /* Write trailing 0 bytes */ - mbedtls_platform_zeroize(buf + stored_bytes, buflen - stored_bytes); + mbedtls_platform_zeroize( buf + stored_bytes, buflen - stored_bytes ); } - return 0; + return( 0 ); } /** @@ -968,17 +982,17 @@ int mbedtls_mpi_write_binary_le(const mbedtls_mpi *X, unsigned char *buf, * large enough to hold the value of \p X. * \return Another negative error code on different kinds of failure. */ -int mbedtls_mpi_write_binary(const mbedtls_mpi *X, unsigned char *buf, - size_t buflen) +int mbedtls_mpi_write_binary( const mbedtls_mpi *X, + unsigned char *buf, size_t buflen ) { size_t stored_bytes; size_t bytes_to_copy; unsigned char *p; size_t i; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(!buflen || buf); + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( buflen == 0 || buf ); stored_bytes = X->n * ciL; - if (stored_bytes < buflen) + if( stored_bytes < buflen ) { /* There is enough space in the output buffer. Write initial * null bytes and record the position at which to start @@ -987,7 +1001,7 @@ int mbedtls_mpi_write_binary(const mbedtls_mpi *X, unsigned char *buf, * number. */ bytes_to_copy = stored_bytes; p = buf + buflen - stored_bytes; - mbedtls_platform_zeroize(buf, buflen - stored_bytes); + mbedtls_platform_zeroize( buf, buflen - stored_bytes ); } else { @@ -995,14 +1009,352 @@ int mbedtls_mpi_write_binary(const mbedtls_mpi *X, unsigned char *buf, * However X may fit if its leading bytes are zero. */ bytes_to_copy = buflen; p = buf; - for (i = bytes_to_copy; i < stored_bytes; i++) + for( i = bytes_to_copy; i < stored_bytes; i++ ) { - if (GET_BYTE(X, i)) - return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; + if( GET_BYTE( X, i ) != 0 ) + return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL ); } } - for (i = 0; i < bytes_to_copy; i++) p[bytes_to_copy - i - 1] = GET_BYTE(X, i); - return 0; + for( i = 0; i < bytes_to_copy; i++ ) + p[bytes_to_copy - i - 1] = GET_BYTE( X, i ); + return( 0 ); +} + +/** + * \brief Compare the absolute values of two MPIs. + * + * \param X The left-hand MPI. This must point to an initialized MPI. + * \param Y The right-hand MPI. This must point to an initialized MPI. + * + * \return \c 1 if `|X|` is greater than `|Y|`. + * \return \c -1 if `|X|` is lesser than `|Y|`. + * \return \c 0 if `|X|` is equal to `|Y|`. + */ +int mbedtls_mpi_cmp_abs( const mbedtls_mpi *X, const mbedtls_mpi *Y ) +{ + size_t i, j; + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( Y ); + i = mbedtls_mpi_limbs(X); + j = mbedtls_mpi_limbs(Y); + if( !i && !j ) + return( 0 ); + if( i > j ) return( 1 ); + if( j > i ) return( -1 ); + for( ; i > 0; i-- ) + { + if( X->p[i - 1] > Y->p[i - 1] ) return( 1 ); + if( X->p[i - 1] < Y->p[i - 1] ) return( -1 ); + } + return( 0 ); +} + +/** + * \brief Compare two MPIs. + * + * \param X The left-hand MPI. This must point to an initialized MPI. + * \param Y The right-hand MPI. This must point to an initialized MPI. + * + * \return \c 1 if \p X is greater than \p Y. + * \return \c -1 if \p X is lesser than \p Y. + * \return \c 0 if \p X is equal to \p Y. + */ +int mbedtls_mpi_cmp_mpi( const mbedtls_mpi *X, const mbedtls_mpi *Y ) +{ + size_t i, j; + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( Y ); + i = mbedtls_mpi_limbs(X); + j = mbedtls_mpi_limbs(Y); + if( !i && !j ) + return( 0 ); + if( i > j ) return( X->s ); + if( j > i ) return( -Y->s ); + if( X->s > 0 && Y->s < 0 ) return( 1 ); + if( Y->s > 0 && X->s < 0 ) return( -1 ); + for( ; i > 0; i-- ) + { + if( X->p[i - 1] > Y->p[i - 1] ) return( X->s ); + if( X->p[i - 1] < Y->p[i - 1] ) return( -X->s ); + } + return( 0 ); +} + +/** + * Decide if an integer is less than the other, without branches. + * + * \param x First integer. + * \param y Second integer. + * + * \return 1 if \p x is less than \p y, 0 otherwise + */ +static unsigned ct_lt_mpi_uint( const mbedtls_mpi_uint x, + const mbedtls_mpi_uint y ) +{ + mbedtls_mpi_uint ret; + mbedtls_mpi_uint cond; + /* + * Check if the most significant bits (MSB) of the operands are different. + */ + cond = ( x ^ y ); + /* + * If the MSB are the same then the difference x-y will be negative (and + * have its MSB set to 1 during conversion to unsigned) if and only if x> ( biL - 1 ); + return (unsigned) ret; +} + +/** + * \brief Check if an MPI is less than the other in constant time. + * + * \param X The left-hand MPI. This must point to an initialized MPI + * with the same allocated length as Y. + * \param Y The right-hand MPI. This must point to an initialized MPI + * with the same allocated length as X. + * \param ret The result of the comparison: + * \c 1 if \p X is less than \p Y. + * \c 0 if \p X is greater than or equal to \p Y. + * + * \return 0 on success. + * \return MBEDTLS_ERR_MPI_BAD_INPUT_DATA if the allocated length of + * the two input MPIs is not the same. + */ +int mbedtls_mpi_lt_mpi_ct( const mbedtls_mpi *X, const mbedtls_mpi *Y, + unsigned *ret ) +{ + size_t i; + /* The value of any of these variables is either 0 or 1 at all times. */ + unsigned cond, done, X_is_negative, Y_is_negative; + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( Y ); + MPI_VALIDATE_RET( ret ); + if( X->n != Y->n ) + return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; + /* + * Set sign_N to 1 if N >= 0, 0 if N < 0. + * We know that N->s == 1 if N >= 0 and N->s == -1 if N < 0. + */ + X_is_negative = ( X->s & 2 ) >> 1; + Y_is_negative = ( Y->s & 2 ) >> 1; + /* + * If the signs are different, then the positive operand is the bigger. + * That is if X is negative (X_is_negative == 1), then X < Y is true and it + * is false if X is positive (X_is_negative == 0). + */ + cond = ( X_is_negative ^ Y_is_negative ); + *ret = cond & X_is_negative; + /* + * This is a constant-time function. We might have the result, but we still + * need to go through the loop. Record if we have the result already. + */ + done = cond; + for( i = X->n; i > 0; i-- ) + { + /* + * If Y->p[i - 1] < X->p[i - 1] then X < Y is true if and only if both + * X and Y are negative. + * + * Again even if we can make a decision, we just mark the result and + * the fact that we are done and continue looping. + */ + cond = ct_lt_mpi_uint( Y->p[i - 1], X->p[i - 1] ); + *ret |= cond & ( 1 - done ) & X_is_negative; + done |= cond; + /* + * If X->p[i - 1] < Y->p[i - 1] then X < Y is true if and only if both + * X and Y are positive. + * + * Again even if we can make a decision, we just mark the result and + * the fact that we are done and continue looping. + */ + cond = ct_lt_mpi_uint( X->p[i - 1], Y->p[i - 1] ); + *ret |= cond & ( 1 - done ) & ( 1 - X_is_negative ); + done |= cond; + } + return( 0 ); +} + +/** + * \brief Compare an MPI with an integer. + * + * \param X The left-hand MPI. This must point to an initialized MPI. + * \param z The integer value to compare \p X to. + * + * \return \c 1 if \p X is greater than \p z. + * \return \c -1 if \p X is lesser than \p z. + * \return \c 0 if \p X is equal to \p z. + */ +int mbedtls_mpi_cmp_int( const mbedtls_mpi *X, mbedtls_mpi_sint z ) +{ + mbedtls_mpi Y; + mbedtls_mpi_uint p[1]; + MPI_VALIDATE_RET( X ); + *p = ( z < 0 ) ? -z : z; + Y.s = ( z < 0 ) ? -1 : 1; + Y.n = 1; + Y.p = p; + return( mbedtls_mpi_cmp_mpi( X, &Y ) ); +} + +/** + * \brief Perform an unsigned addition of MPIs: X = |A| + |B| + * + * \param X The destination MPI. This must point to an initialized MPI. + * \param A The first summand. This must point to an initialized MPI. + * \param B The second summand. This must point to an initialized MPI. + * + * \return \c 0 if successful. + * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed. + * \return Another negative error code on different kinds of failure. + */ +int mbedtls_mpi_add_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B ) +{ + int ret = MBEDTLS_ERR_THIS_CORRUPTION; + size_t i, j; + mbedtls_mpi_uint *o, *p, c, tmp; + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( A ); + MPI_VALIDATE_RET( B ); + if( X == B ) + { + const mbedtls_mpi *T = A; A = X; B = T; + } + if( X != A ) + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) ); + /* + * X should always be positive as a result of unsigned additions. + */ + X->s = 1; + for( j = B->n; j > 0; j-- ) + if( B->p[j - 1] != 0 ) + break; + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, j ) ); + o = B->p; p = X->p; c = 0; + /* + * tmp is used because it might happen that p == o + */ + for( i = 0; i < j; i++, o++, p++ ) + { + tmp= *o; + *p += c; c = ( *p < c ); + *p += tmp; c += ( *p < tmp ); + } + while( c != 0 ) + { + if( i >= X->n ) + { + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + 1 ) ); + p = X->p + i; + } + *p += c; c = ( *p < c ); i++; p++; + } +cleanup: + return( ret ); +} + +/** + * Helper for mbedtls_mpi subtraction. + * + * Calculate d = a - b where d, a, and b have the same size. + * This function operates modulo (2^ciL)^n and returns the carry + * (1 if there was a wraparound, i.e. if `a < b`, and 0 otherwise). + * + * \param[out] d Result of subtraction. + * \param[in] a Left operand. + * \param[in] b Right operand. + * \param n Number of limbs of \p a and \p b. + * \return 1 if `d < s`. + * 0 if `d >= s`. + */ +forceinline mbedtls_mpi_uint mpi_sub_hlp(mbedtls_mpi_uint *d, + const mbedtls_mpi_uint *a, + const mbedtls_mpi_uint *b, + size_t n) +{ + size_t i; + unsigned char cf; + mbedtls_mpi_uint c, x; + cf = c = i = 0; +#ifdef __x86_64__ + if (!n) return 0; + asm volatile("xor\t%1,%1\n\t" + ".align\t16\n1:\t" + "mov\t(%5,%3,8),%1\n\t" + "sbb\t(%6,%3,8),%1\n\t" + "mov\t%1,(%4,%3,8)\n\t" + "lea\t1(%3),%3\n\t" + "dec\t%2\n\t" + "jnz\t1b" + : "=@ccb"(cf), "=&r"(x), "+&c"(n), "=&r"(i) + : "r"(d), "r"(a), "r"(b), "3"(0) + : "cc", "memory"); + return cf; +#else + for (; i < n; ++i) + SBB(d[i], a[i], b[i], c, c); + return c; +#endif +} + +/** + * \brief Perform an unsigned subtraction of MPIs: X = |A| - |B| + * + * \param X The destination MPI. This must point to an initialized MPI. + * \param A The minuend. This must point to an initialized MPI. + * \param B The subtrahend. This must point to an initialized MPI. + * + * \return \c 0 if successful. + * \return #MBEDTLS_ERR_MPI_NEGATIVE_VALUE if \p B is greater than \p A. + * \return Another negative error code on different kinds of failure. + */ +int mbedtls_mpi_sub_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B ) +{ + size_t n, m, r; + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( A ); + MPI_VALIDATE_RET( B ); + if( X != A && !B->n ) + return mbedtls_mpi_copy( X, A ); /* wut */ + for( n = B->n; n > 0; n-- ) + if( B->p[n - 1] != 0 ) + break; + if( n > A->n ) + return MBEDTLS_ERR_MPI_NEGATIVE_VALUE; /* B >= (2^ciL)^n > A */ + if (X != A) + { + if (X->n < A->n) { + if ((r = mbedtls_mpi_grow(X, A->n))) return r; + } else if (X->n > A->n) { + mbedtls_mpi_zeroize(X->p + A->n, X->n - A->n); + } + if ((m = A->n - n)) + memcpy(X->p + n, A->p + n, m * ciL); + } + /* + * X should always be positive as a result of unsigned subtractions. + */ + X->s = 1; + if( mpi_sub_hlp( X->p, A->p, B->p, n ) ){ + /* Propagate the carry to the first nonzero limb of X. */ + for( ; n < A->n && A->p[n] == 0; n++ ) + /* --X->p[n]; */ + X->p[n] = A->p[n] - 1; + /* If we ran out of space for the carry, it means that the result + * is negative. */ + if( n == X->n ) + return MBEDTLS_ERR_MPI_NEGATIVE_VALUE; + --X->p[n]; + } + return( 0 ); } static int mpi_cmp_abs(const mbedtls_mpi *X, @@ -1026,310 +1378,17 @@ static int mpi_cmp_abs(const mbedtls_mpi *X, return 0; } -/** - * \brief Compare the absolute values of two MPIs. - * - * \param X The left-hand MPI. This must point to an initialized MPI. - * \param Y The right-hand MPI. This must point to an initialized MPI. - * - * \return \c 1 if `|X|` is greater than `|Y|`. - * \return \c -1 if `|X|` is lesser than `|Y|`. - * \return \c 0 if `|X|` is equal to `|Y|`. - */ -int mbedtls_mpi_cmp_abs(const mbedtls_mpi *X, const mbedtls_mpi *Y) +static int mpi_sub_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B, size_t n ) { - size_t i, j; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(Y); - return mpi_cmp_abs(X, Y, &i, &j); -} - -static int mpi_cmp_mpi(const mbedtls_mpi *X, const mbedtls_mpi *Y, - size_t *Xn, size_t *Yn) { - size_t i, j; - i = mbedtls_mpi_limbs(X); - j = mbedtls_mpi_limbs(Y); - *Xn = i; - *Yn = j; - if (!i && !j) return 0; - if (i > j) return X->s; - if (j > i) return -Y->s; - if (X->s > 0 && Y->s < 0) return 1; - if (Y->s > 0 && X->s < 0) return -1; - for (; i > 0; i--) { - if (X->p[i - 1] > Y->p[i - 1]) return X->s; - if (X->p[i - 1] < Y->p[i - 1]) return -X->s; - } - return 0; -} - -/** - * \brief Compare two MPIs. - * - * \param X The left-hand MPI. This must point to an initialized MPI. - * \param Y The right-hand MPI. This must point to an initialized MPI. - * - * \return \c 1 if \p X is greater than \p Y. - * \return \c -1 if \p X is lesser than \p Y. - * \return \c 0 if \p X is equal to \p Y. - */ -int mbedtls_mpi_cmp_mpi(const mbedtls_mpi *X, const mbedtls_mpi *Y) { - size_t i, j; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(Y); - return mpi_cmp_mpi(X, Y, &i, &j); -} - -/** - * Decide if an integer is less than the other, without branches. - * - * \param x First integer. - * \param y Second integer. - * - * \return 1 if \p x is less than \p y, 0 otherwise - */ -static unsigned ct_lt_mpi_uint(const mbedtls_mpi_uint x, - const mbedtls_mpi_uint y) { - mbedtls_mpi_uint ret; - mbedtls_mpi_uint cond; - /* - * Check if the most significant bits (MSB) of the operands are different. - */ - cond = (x ^ y); - /* - * If the MSB are the same then the difference x-y will be negative (and - * have its MSB set to 1 during conversion to unsigned) if and only if x> (biL - 1); - return (unsigned)ret; -} - -/** - * \brief Check if an MPI is less than the other in constant time. - * - * \param X The left-hand MPI. This must point to an initialized MPI - * with the same allocated length as Y. - * \param Y The right-hand MPI. This must point to an initialized MPI - * with the same allocated length as X. - * \param ret The result of the comparison: - * \c 1 if \p X is less than \p Y. - * \c 0 if \p X is greater than or equal to \p Y. - * - * \return 0 on success. - * \return MBEDTLS_ERR_MPI_BAD_INPUT_DATA if the allocated length of - * the two input MPIs is not the same. - */ -int mbedtls_mpi_lt_mpi_ct(const mbedtls_mpi *X, const mbedtls_mpi *Y, - unsigned *ret) -{ - size_t i; - /* The value of any of these variables is either 0 or 1 at all times. */ - unsigned cond, done, X_is_negative, Y_is_negative; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(Y); - MPI_VALIDATE_RET(ret); - if (X->n != Y->n) - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - /* - * Set sign_N to 1 if N >= 0, 0 if N < 0. - * We know that N->s == 1 if N >= 0 and N->s == -1 if N < 0. - */ - X_is_negative = (X->s & 2) >> 1; - Y_is_negative = (Y->s & 2) >> 1; - /* - * If the signs are different, then the positive operand is the bigger. - * That is if X is negative (X_is_negative == 1), then X < Y is true and it - * is false if X is positive (X_is_negative == 0). - */ - cond = (X_is_negative ^ Y_is_negative); - *ret = cond & X_is_negative; - /* - * This is a constant-time function. We might have the result, but we still - * need to go through the loop. Record if we have the result already. - */ - done = cond; - for (i = X->n; i > 0; i--) - { - /* - * If Y->p[i - 1] < X->p[i - 1] then X < Y is true if and only if both - * X and Y are negative. - * - * Again even if we can make a decision, we just mark the result and - * the fact that we are done and continue looping. - */ - cond = ct_lt_mpi_uint(Y->p[i - 1], X->p[i - 1]); - *ret |= cond & (1 - done) & X_is_negative; - done |= cond; - /* - * If X->p[i - 1] < Y->p[i - 1] then X < Y is true if and only if both - * X and Y are positive. - * - * Again even if we can make a decision, we just mark the result and - * the fact that we are done and continue looping. - */ - cond = ct_lt_mpi_uint(X->p[i - 1], Y->p[i - 1]); - *ret |= cond & (1 - done) & (1 - X_is_negative); - done |= cond; - } - return 0; -} - -/** - * \brief Compare an MPI with an integer. - * - * \param X The left-hand MPI. This must point to an initialized MPI. - * \param z The integer value to compare \p X to. - * - * \return \c 1 if \p X is greater than \p z. - * \return \c -1 if \p X is lesser than \p z. - * \return \c 0 if \p X is equal to \p z. - */ -int mbedtls_mpi_cmp_int(const mbedtls_mpi *X, mbedtls_mpi_sint z) -{ - mbedtls_mpi Y; - mbedtls_mpi_uint p[1]; - MPI_VALIDATE_RET(X); - *p = (z < 0) ? -z : z; - Y.s = (z < 0) ? -1 : 1; - Y.n = 1; - Y.p = p; - return mbedtls_mpi_cmp_mpi(X, &Y); -} - -forceinline mbedtls_mpi_uint mpi_add_hlp(mbedtls_mpi_uint *d, - const mbedtls_mpi_uint *b, - size_t n) -{ - size_t i; - unsigned char cf; - mbedtls_mpi_uint c, t, *e; - e = d + n; - c = i = 0; -#ifdef __x86_64__ - for (; d + 4 <= e; d += 4, b += 4, c = cf) - { - asm("add\t%5,%1\n\t" - "adc\t%6,%2\n\t" - "adc\t%7,%3\n\t" - "adc\t%8,%4" - : "=@ccc"(cf), "+m"(d[0]), "+m"(d[1]), "+m"(d[2]), "+m"(d[3]) - : "r"(b[0] + c), "r"(b[1]), "r"(b[2]), "r"(b[3]) - : "cc"); - } -#endif - for (; d < e; ++d, ++b) - ADC(*d, *d, *b, c, c); - return c; -} - -/** - * Helper for mbedtls_mpi subtraction. - * - * Calculate d = a - b where d, a, and b have the same size. - * This function operates modulo (2^ciL)^n and returns the carry - * (1 if there was a wraparound, i.e. if `a < b`, and 0 otherwise). - * - * \param[out] d Result of subtraction. - * \param[in] a Left operand. - * \param[in] b Right operand. - * \param n Number of limbs of \p a and \p b. - * \return 1 if `d < s`. - * 0 if `d >= s`. - */ -forceinline mbedtls_mpi_uint mpi_sub_hlp(mbedtls_mpi_uint *d, - const mbedtls_mpi_uint *a, - const mbedtls_mpi_uint *b, - size_t n) -{ - size_t i; - unsigned char cf; - uint64_t q, r, s, t; - mbedtls_mpi_uint c, z, x, y; - cf = c = i = 0; -#ifdef __x86_64__ - for (; i + 4 <= n; i += 4, c = cf) - { - q = a[i + 0]; - r = a[i + 1]; - s = a[i + 2]; - t = a[i + 3]; - asm volatile("sub\t%5,%1\n\t" - "sbb\t1*8(%6),%2\n\t" - "sbb\t2*8(%6),%3\n\t" - "sbb\t3*8(%6),%4" - : "=@ccc"(cf), "+r"(q), "+r"(r), "+r"(s), "+r"(t) - : "r"(b[i] + c), "r"(b + i) - : "memory", "cc"); - d[i + 0] = q; - d[i + 1] = r; - d[i + 2] = s; - d[i + 3] = t; - } -#endif - for (; i < n; ++i) - SBB(d[i], a[i], b[i], c, c); - return c; -} - -/** - * \brief Perform an unsigned addition of MPIs: X = |A| + |B| - * - * \param X The destination MPI. This must point to an initialized MPI. - * \param A The first summand. This must point to an initialized MPI. - * \param B The second summand. This must point to an initialized MPI. - * - * \return \c 0 if successful. - * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed. - * \return Another negative error code on different kinds of failure. - */ -int mbedtls_mpi_add_abs(mbedtls_mpi *X, const mbedtls_mpi *A, - const mbedtls_mpi *B) -{ - int ret = MBEDTLS_ERR_THIS_CORRUPTION; - size_t i, j; - unsigned char cf; - const mbedtls_mpi *T; - mbedtls_mpi_uint c, tmp; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(A); - MPI_VALIDATE_RET(B); - if (X == B) T = A, A = X, B = T; - if (X != A) MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, A)); - X->s = 1; /* always positive b/c unsigned addition */ - j = mbedtls_mpi_limbs(B); - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, j)); - c = mpi_add_hlp(X->p, B->p, j); - for (; c; ++j) - { - if (j >= X->n) - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, j + 1)); - X->p[j] += c; - c = X->p[j] < c; - } -cleanup: - return ret; -} - -static int mpi_sub_abs(mbedtls_mpi *X, const mbedtls_mpi *A, - const mbedtls_mpi *B, size_t Bn) -{ - int ret; - size_t n, m; - unsigned char cf; - n = Bn; - if (n > A->n) + size_t m, r; + if( X != A && !B->n ) + return mbedtls_mpi_copy( X, A ); /* wut */ + if( n > A->n ) return MBEDTLS_ERR_MPI_NEGATIVE_VALUE; /* B >= (2^ciL)^n > A */ if (X != A) { if (X->n < A->n) { - if ((ret = mbedtls_mpi_grow(X, A->n))) return ret; + if ((r = mbedtls_mpi_grow(X, A->n))) return r; } else if (X->n > A->n) { mbedtls_mpi_zeroize(X->p + A->n, X->n - A->n); } @@ -1340,43 +1399,18 @@ static int mpi_sub_abs(mbedtls_mpi *X, const mbedtls_mpi *A, * X should always be positive as a result of unsigned subtractions. */ X->s = 1; - cf = mpi_sub_hlp(X->p, A->p, B->p, n); - if (cf) - { + if( mpi_sub_hlp( X->p, A->p, B->p, n ) ){ /* Propagate the carry to the first nonzero limb of X. */ - for (; n < A->n && !A->p[n]; n++) { /* --X->p[n]; */ + for( ; n < A->n && A->p[n] == 0; n++ ) + /* --X->p[n]; */ X->p[n] = A->p[n] - 1; - } /* If we ran out of space for the carry, it means that the result * is negative. */ - if (n == X->n) + if( n == X->n ) return MBEDTLS_ERR_MPI_NEGATIVE_VALUE; --X->p[n]; } - return 0; -} - -/** - * \brief Perform an unsigned subtraction of MPIs: X = |A| - |B| - * - * \param X The destination MPI. This must point to an initialized MPI. - * \param A The minuend. This must point to an initialized MPI. - * \param B The subtrahend. This must point to an initialized MPI. - * - * \return \c 0 if successful. - * \return #MBEDTLS_ERR_MPI_NEGATIVE_VALUE if \p B is greater than \p A. - * \return Another negative error code on different kinds of failure. - */ -int mbedtls_mpi_sub_abs(mbedtls_mpi *X, const mbedtls_mpi *A, - const mbedtls_mpi *B) -{ - size_t n, m; - unsigned char cf; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(A); - MPI_VALIDATE_RET(B); - if (X != A && !B->n) return mbedtls_mpi_copy(X, A); /* wut */ - return mpi_sub_abs(X, A, B, mbedtls_mpi_limbs(B)); + return( 0 ); } /** @@ -1390,35 +1424,34 @@ int mbedtls_mpi_sub_abs(mbedtls_mpi *X, const mbedtls_mpi *A, * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed. * \return Another negative error code on different kinds of failure. */ -int mbedtls_mpi_add_mpi(mbedtls_mpi *X, const mbedtls_mpi *A, - const mbedtls_mpi *B) +int mbedtls_mpi_add_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B ) { int ret, s; size_t i, j; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(A); - MPI_VALIDATE_RET(B); + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( A ); + MPI_VALIDATE_RET( B ); s = A->s; - if (A->s * B->s < 0) + if( A->s * B->s < 0 ) { - if (mpi_cmp_abs(A, B, &i, &j) >= 0) + if( mpi_cmp_abs( A, B, &i, &j ) >= 0 ) { - MBEDTLS_MPI_CHK(mpi_sub_abs(X, A, B, j)); - X->s = s; + MBEDTLS_MPI_CHK( mpi_sub_abs( X, A, B, j ) ); + X->s = s; } else { - MBEDTLS_MPI_CHK(mpi_sub_abs(X, B, A, i)); + MBEDTLS_MPI_CHK( mpi_sub_abs( X, B, A, i ) ); X->s = -s; } } else { - MBEDTLS_MPI_CHK(mbedtls_mpi_add_abs(X, A, B)); + MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( X, A, B ) ); X->s = s; } cleanup: - return ret; + return( ret ); } /** @@ -1432,60 +1465,58 @@ cleanup: * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed. * \return Another negative error code on different kinds of failure. */ -int mbedtls_mpi_sub_mpi(mbedtls_mpi *X, const mbedtls_mpi *A, - const mbedtls_mpi *B) +int mbedtls_mpi_sub_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B ) { int ret, s; size_t i, j; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(A); - MPI_VALIDATE_RET(B); + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( A ); + MPI_VALIDATE_RET( B ); s = A->s; - if (A->s * B->s > 0) + if( A->s * B->s > 0 ) { - if (mpi_cmp_abs(A, B, &i, &j) >= 0) + if( mpi_cmp_abs( A, B, &i, &j ) >= 0 ) { - MBEDTLS_MPI_CHK(mpi_sub_abs(X, A, B, j)); - X->s = s; + MBEDTLS_MPI_CHK( mpi_sub_abs( X, A, B, j ) ); + X->s = s; } else { - MBEDTLS_MPI_CHK(mpi_sub_abs(X, B, A, i)); + MBEDTLS_MPI_CHK( mpi_sub_abs( X, B, A, i ) ); X->s = -s; } } else { - MBEDTLS_MPI_CHK(mbedtls_mpi_add_abs(X, A, B)); + MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( X, A, B ) ); X->s = s; } cleanup: - return ret; + return( ret ); } /** - * \brief Performs signed addition of MPI and integer: X = A + b + * \brief Perform a signed addition of an MPI and an integer: X = A + b * * \param X The destination MPI. This must point to an initialized MPI. * \param A The first summand. This must point to an initialized MPI. * \param b The second summand. * * \return \c 0 if successful. - * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if a allocation failed. - * \return Another negative error code on different kinds of - * failure. + * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed. + * \return Another negative error code on different kinds of failure. */ -int mbedtls_mpi_add_int(mbedtls_mpi *X, const mbedtls_mpi *A, - mbedtls_mpi_sint b) { +int mbedtls_mpi_add_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b ) +{ mbedtls_mpi _B; mbedtls_mpi_uint p[1]; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(A); - p[0] = (b < 0) ? -b : b; - _B.s = (b < 0) ? -1 : 1; + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( A ); + p[0] = ( b < 0 ) ? -b : b; + _B.s = ( b < 0 ) ? -1 : 1; _B.n = 1; _B.p = p; - return mbedtls_mpi_add_mpi(X, A, &_B); + return( mbedtls_mpi_add_mpi( X, A, &_B ) ); } /** @@ -1500,50 +1531,69 @@ int mbedtls_mpi_add_int(mbedtls_mpi *X, const mbedtls_mpi *A, * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed. * \return Another negative error code on different kinds of failure. */ -int mbedtls_mpi_sub_int(mbedtls_mpi *X, const mbedtls_mpi *A, - mbedtls_mpi_sint b) { +int mbedtls_mpi_sub_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b ) +{ mbedtls_mpi _B; mbedtls_mpi_uint p[1]; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(A); - p[0] = (b < 0) ? -b : b; - _B.s = (b < 0) ? -1 : 1; + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( A ); + p[0] = ( b < 0 ) ? -b : b; + _B.s = ( b < 0 ) ? -1 : 1; _B.n = 1; _B.p = p; - return mbedtls_mpi_sub_mpi(X, A, &_B); + return( mbedtls_mpi_sub_mpi( X, A, &_B ) ); } /* * Unsigned integer divide - double mbedtls_mpi_uint dividend, u1/u0, and * mbedtls_mpi_uint divisor, d */ -static inline mbedtls_mpi_uint mbedtls_int_div_int(mbedtls_mpi_uint u1, - mbedtls_mpi_uint u0, - mbedtls_mpi_uint d, - mbedtls_mpi_uint *r) +static mbedtls_mpi_uint mbedtls_int_div_int( mbedtls_mpi_uint u1, + mbedtls_mpi_uint u0, + mbedtls_mpi_uint d, + mbedtls_mpi_uint *r ) { - if (d && u1 < d) - { #ifdef __x86_64__ - mbedtls_mpi_uint quo, rem; - asm("div\t%2" : "=a"(quo), "=d"(rem) : "r"(d), "0"(u0), "1"(u1) : "cc"); - if (r) *r = rem; - return quo; -#elif defined(MBEDTLS_HAVE_UDBL) - mbedtls_t_udbl dividend, quotient; - dividend = (mbedtls_t_udbl)u1 << biL; - dividend |= (mbedtls_t_udbl)u0; - quotient = dividend / d; - if (quotient > ((mbedtls_t_udbl)1 << biL) - 1) - quotient = ((mbedtls_t_udbl)1 << biL) - 1; - if (r) *r = (mbedtls_mpi_uint)(dividend - (quotient * d)); - return (mbedtls_mpi_uint)quotient; + if (d && u1 < d) + { + mbedtls_mpi_uint quo, rem; + asm("div\t%2" : "=a"(quo), "=d"(rem) : "r"(d), "0"(u0), "1"(u1) : "cc"); + if (r) *r = rem; + return quo; + } + else + { + if (r) *r = ~0; + return ~0; + } #else - size_t s; - mbedtls_mpi_uint radix = (mbedtls_mpi_uint)1 << biH; - mbedtls_mpi_uint uint_halfword_mask = ((mbedtls_mpi_uint)1 << biH) - 1; +#if defined(MBEDTLS_HAVE_UDBL) + mbedtls_t_udbl dividend, quotient; +#else + const mbedtls_mpi_uint radix = (mbedtls_mpi_uint) 1 << biH; + const mbedtls_mpi_uint uint_halfword_mask = ( (mbedtls_mpi_uint) 1 << biH ) - 1; mbedtls_mpi_uint d0, d1, q0, q1, rAX, r0, quotient; mbedtls_mpi_uint u0_msw, u0_lsw; + size_t s; +#endif + /* + * Check for overflow + */ + if( 0 == d || u1 >= d ) + { + if (r) *r = ~0; + return ( ~0 ); + } +#if defined(MBEDTLS_HAVE_UDBL) + dividend = (mbedtls_t_udbl) u1 << biL; + dividend |= (mbedtls_t_udbl) u0; + quotient = dividend / d; + if( quotient > ( (mbedtls_t_udbl) 1 << biL ) - 1 ) + quotient = ( (mbedtls_t_udbl) 1 << biL ) - 1; + if( r ) + *r = (mbedtls_mpi_uint)( dividend - (quotient * d ) ); + return (mbedtls_mpi_uint) quotient; +#else /* * Algorithm D, Section 4.3.1 - The Art of Computer Programming * Vol. 2 - Seminumerical Algorithms, Knuth @@ -1551,11 +1601,11 @@ static inline mbedtls_mpi_uint mbedtls_int_div_int(mbedtls_mpi_uint u1, /* * Normalize the divisor, d, and dividend, u0, u1 */ - s = mbedtls_clz(d); + s = mbedtls_clz( d ); d = d << s; u1 = u1 << s; - u1 |= (u0 >> (biL - s)) & (-(mbedtls_mpi_sint)s >> (biL - 1)); - u0 = u0 << s; + u1 |= ( u0 >> ( biL - s ) ) & ( -(mbedtls_mpi_sint)s >> ( biL - 1 ) ); + u0 = u0 << s; d1 = d >> biH; d0 = d & uint_halfword_mask; u0_msw = u0 >> biH; @@ -1565,33 +1615,27 @@ static inline mbedtls_mpi_uint mbedtls_int_div_int(mbedtls_mpi_uint u1, */ q1 = u1 / d1; r0 = u1 - d1 * q1; - while (q1 >= radix || (q1 * d0 > radix * r0 + u0_msw)) + while( q1 >= radix || ( q1 * d0 > radix * r0 + u0_msw ) ) { - q1 -= 1; - r0 += d1; - if (r0 >= radix) - break; + q1 -= 1; + r0 += d1; + if ( r0 >= radix ) break; } - rAX = (u1 * radix) + (u0_msw - q1 * d); + rAX = ( u1 * radix ) + ( u0_msw - q1 * d ); q0 = rAX / d1; r0 = rAX - q0 * d1; - while (q0 >= radix || (q0 * d0 > radix * r0 + u0_lsw)) + while( q0 >= radix || ( q0 * d0 > radix * r0 + u0_lsw ) ) { - q0 -= 1; - r0 += d1; - if (r0 >= radix) - break; + q0 -= 1; + r0 += d1; + if ( r0 >= radix ) break; } - if (r) *r = (rAX * radix + u0_lsw - q0 * d) >> s; + if (r) + *r = ( rAX * radix + u0_lsw - q0 * d ) >> s; quotient = q1 * radix + q0; return quotient; #endif - } - else - { - if (r) *r = ~0; - return ~0; - } +#endif } static inline void Multiply2x1(uint64_t a[3], uint64_t b) { @@ -1686,10 +1730,10 @@ int mbedtls_mpi_div_mpi(mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A, n = X.n - 1; t = Y.n - 1; MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&Y, biL * (n - t))); - while (mpi_cmp_abs(&X, &Y, &Xn, &Yn) >= 0) + while (mbedtls_mpi_cmp_abs(&X, &Y) >= 0) { Z.p[n - t]++; - MBEDTLS_MPI_CHK(mpi_sub_abs(&X, &X, &Y, Yn)); + MBEDTLS_MPI_CHK(mbedtls_mpi_sub_abs(&X, &X, &Y)); } mbedtls_mpi_shift_r(&Y, biL * (n - t)); for (i = n; i > t; i--) @@ -1758,17 +1802,18 @@ cleanup: * \return #MBEDTLS_ERR_MPI_DIVISION_BY_ZERO if \p b equals zero. * \return Another negative error code on different kinds of failure. */ -int mbedtls_mpi_div_int(mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A, - mbedtls_mpi_sint b) +int mbedtls_mpi_div_int( mbedtls_mpi *Q, mbedtls_mpi *R, + const mbedtls_mpi *A, + mbedtls_mpi_sint b ) { mbedtls_mpi _B; mbedtls_mpi_uint p[1]; - MPI_VALIDATE_RET(A); - p[0] = (b < 0) ? -b : b; - _B.s = (b < 0) ? -1 : 1; + MPI_VALIDATE_RET( A ); + p[0] = ( b < 0 ) ? -b : b; + _B.s = ( b < 0 ) ? -1 : 1; _B.n = 1; _B.p = p; - return mbedtls_mpi_div_mpi(Q, R, A, &_B); + return( mbedtls_mpi_div_mpi( Q, R, A, &_B ) ); } /** @@ -1786,22 +1831,23 @@ int mbedtls_mpi_div_int(mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A, * \return #MBEDTLS_ERR_MPI_DIVISION_BY_ZERO if \p B equals zero. * \return #MBEDTLS_ERR_MPI_NEGATIVE_VALUE if \p B is negative. * \return Another negative error code on different kinds of failure. + * */ -int mbedtls_mpi_mod_mpi(mbedtls_mpi *R, const mbedtls_mpi *A, - const mbedtls_mpi *B) +int mbedtls_mpi_mod_mpi( mbedtls_mpi *R, const mbedtls_mpi *A, const mbedtls_mpi *B ) { - size_t i, j; int ret = MBEDTLS_ERR_THIS_CORRUPTION; - MPI_VALIDATE_RET(R); - MPI_VALIDATE_RET(A); - MPI_VALIDATE_RET(B); - if (B->s < 0) return MBEDTLS_ERR_MPI_NEGATIVE_VALUE; - MBEDTLS_MPI_CHK(mbedtls_mpi_div_mpi(NULL, R, A, B)); - while (R->s < 0) MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(R, R, B)); - while (mbedtls_mpi_cmp_mpi(R, B) >= 0) - MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(R, R, B)); + MPI_VALIDATE_RET( R ); + MPI_VALIDATE_RET( A ); + MPI_VALIDATE_RET( B ); + if( mbedtls_mpi_cmp_int( B, 0 ) < 0 ) + return( MBEDTLS_ERR_MPI_NEGATIVE_VALUE ); + MBEDTLS_MPI_CHK( mbedtls_mpi_div_mpi( NULL, R, A, B ) ); + while( mbedtls_mpi_cmp_int( R, 0 ) < 0 ) + MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( R, R, B ) ); + while( mbedtls_mpi_cmp_mpi( R, B ) >= 0 ) + MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( R, R, B ) ); cleanup: - return ret; + return( ret ); } /** @@ -1820,63 +1866,64 @@ cleanup: * \return #MBEDTLS_ERR_MPI_NEGATIVE_VALUE if \p b is negative. * \return Another negative error code on different kinds of failure. */ -int mbedtls_mpi_mod_int( mbedtls_mpi_uint *r, const mbedtls_mpi *A, - mbedtls_mpi_sint b ) +int mbedtls_mpi_mod_int( mbedtls_mpi_uint *r, const mbedtls_mpi *A, mbedtls_mpi_sint b ) { size_t i; mbedtls_mpi_uint x, y, z; - MPI_VALIDATE_RET(r); - MPI_VALIDATE_RET(A); - if (!b) - return MBEDTLS_ERR_MPI_DIVISION_BY_ZERO; - if (b < 0) - return MBEDTLS_ERR_MPI_NEGATIVE_VALUE; + MPI_VALIDATE_RET( r ); + MPI_VALIDATE_RET( A ); + if( b == 0 ) + return( MBEDTLS_ERR_MPI_DIVISION_BY_ZERO ); + if( b < 0 ) + return( MBEDTLS_ERR_MPI_NEGATIVE_VALUE ); /* * handle trivial cases */ - if (b == 1) + if( b == 1 ) { *r = 0; - return 0; + return( 0 ); } - if (b == 2) + if( b == 2 ) { *r = A->p[0] & 1; - return 0; + return( 0 ); } /* * general case */ - for (i = A->n, y = 0; i > 0; i--) + for( i = A->n, y = 0; i > 0; i-- ) { - x = A->p[i - 1]; - y = (y << biH) | (x >> biH); - z = y / b; + x = A->p[i - 1]; + y = ( y << biH ) | ( x >> biH ); + z = y / b; y -= z * b; x <<= biH; - y = (y << biH) | (x >> biH); - z = y / b; + y = ( y << biH ) | ( x >> biH ); + z = y / b; y -= z * b; } /* * If A is negative, then the current y represents a negative value. * Flipping it to the positive side. */ - if (A->s < 0 && y) y = b - y; + if( A->s < 0 && y != 0 ) + y = b - y; *r = y; - return 0; + return( 0 ); } /* * Fast Montgomery initialization (thanks to Tom St Denis) */ -static void mpi_montg_init(mbedtls_mpi_uint *mm, const mbedtls_mpi *N) +static void mpi_montg_init( mbedtls_mpi_uint *mm, const mbedtls_mpi *N ) { mbedtls_mpi_uint x, m0 = N->p[0]; unsigned int i; - x = m0; - x += ((m0 + 2) & 4) << 1; - for (i = biL; i >= 8; i /= 2) x *= 2 - m0 * x; + x = m0; + x += ( ( m0 + 2 ) & 4 ) << 1; + for( i = biL; i >= 8; i /= 2 ) + x *= ( 2 - ( m0 * x ) ); *mm = -x; } @@ -1903,42 +1950,40 @@ static void mpi_montg_init(mbedtls_mpi_uint *mm, const mbedtls_mpi *N) * Note that unlike the usual convention in the library * for `const mbedtls_mpi*`, the content of T can change. */ -static void mpi_montmul(mbedtls_mpi *A, const mbedtls_mpi *B, - const mbedtls_mpi *N, mbedtls_mpi_uint mm, - const mbedtls_mpi *T) +static void mpi_montmul( mbedtls_mpi *A, const mbedtls_mpi *B, const mbedtls_mpi *N, mbedtls_mpi_uint mm, + const mbedtls_mpi *T ) { size_t i, n, m; mbedtls_mpi_uint u0, u1, *d, *Ap, *Bp, *Np; - mbedtls_mpi_zeroize(T->p, T->n); + mbedtls_platform_zeroize( T->p, T->n * ciL ); d = T->p; n = N->n; - m = (B->n < n) ? B->n : n; + m = ( B->n < n ) ? B->n : n; Ap = A->p; Bp = B->p; Np = N->p; - for (i = 0; i < n; i++) + for( i = 0; i < n; i++ ) { /* * T = (T + u0*B + u1*N) / 2^biL */ u0 = Ap[i]; - u1 = (d[0] + u0 * Bp[0]) * mm; - mbedtls_mpi_mul_hlp(m, Bp, d, u0); - mbedtls_mpi_mul_hlp(n, Np, d, u1); - *d++ = u0; - d[n + 1] = 0; + u1 = ( d[0] + u0 * Bp[0] ) * mm; + mbedtls_mpi_mul_hlp( m, Bp, d, u0 ); + mbedtls_mpi_mul_hlp( n, Np, d, u1 ); + *d++ = u0; d[n + 1] = 0; } /* At this point, d is either the desired result or the desired result * plus N. We now potentially subtract N, avoiding leaking whether the * subtraction is performed through side channels. */ /* Copy the n least significant limbs of d to A, so that * A = d if d < N (recall that N has n limbs). */ - memcpy(Ap, d, n * ciL); + memcpy( Ap, d, n * ciL ); /* If d >= N then we want to set A to d - N. To prevent timing attacks, * do the calculation without using conditional tests. */ /* Set d to d0 + (2^biL)^n - N where d0 is the current value of d. */ d[n] += 1; - d[n] -= mpi_sub_hlp(d, d, Np, n); + d[n] -= mpi_sub_hlp( d, d, Np, n ); /* If d0 < N then d < (2^biL)^n * so d[n] == 0 and we want to keep A as it is. * If d0 >= N then d >= (2^biL)^n, and d <= (2^biL)^n + N < 2 * (2^biL)^n @@ -1955,14 +2000,14 @@ static void mpi_montmul(mbedtls_mpi *A, const mbedtls_mpi *B, * * See mpi_montmul() regarding constraints and guarantees on the parameters. */ -static void mpi_montred(mbedtls_mpi *A, const mbedtls_mpi *N, - mbedtls_mpi_uint mm, const mbedtls_mpi *T) +static void mpi_montred( mbedtls_mpi *A, const mbedtls_mpi *N, + mbedtls_mpi_uint mm, const mbedtls_mpi *T ) { - mbedtls_mpi U; mbedtls_mpi_uint z = 1; - U.n = U.s = (int)z; + mbedtls_mpi U; + U.n = U.s = (int) z; U.p = &z; - mpi_montmul(A, &U, N, mm, T); + mpi_montmul( A, &U, N, mm, T ); } /** @@ -1991,127 +2036,129 @@ static void mpi_montred(mbedtls_mpi *A, const mbedtls_mpi *N, * \return Another negative error code on different kinds of failures. * */ -int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A, - const mbedtls_mpi *E, const mbedtls_mpi *N, - mbedtls_mpi *_RR) +int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A, + const mbedtls_mpi *E, const mbedtls_mpi *N, + mbedtls_mpi *_RR ) { int ret = MBEDTLS_ERR_THIS_CORRUPTION; size_t wbits, wsize, one = 1; size_t i, j, nblimbs; size_t bufsize, nbits; mbedtls_mpi_uint ei, mm, state; - mbedtls_mpi RR, T, W[1 << MBEDTLS_MPI_WINDOW_SIZE], Apos; + mbedtls_mpi RR, T, W[ 1 << MBEDTLS_MPI_WINDOW_SIZE ], Apos; int neg; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(A); - MPI_VALIDATE_RET(E); - MPI_VALIDATE_RET(N); - if (mbedtls_mpi_cmp_int(N, 0) <= 0 || !(N->p[0] & 1)) - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - if (E->s < 0) - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - if (mbedtls_mpi_bitlen(E) > MBEDTLS_MPI_MAX_BITS || - mbedtls_mpi_bitlen(N) > MBEDTLS_MPI_MAX_BITS) - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( A ); + MPI_VALIDATE_RET( E ); + MPI_VALIDATE_RET( N ); + if( mbedtls_mpi_cmp_int( N, 0 ) <= 0 || ( N->p[0] & 1 ) == 0 ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + if( mbedtls_mpi_cmp_int( E, 0 ) < 0 ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + if( mbedtls_mpi_bitlen( E ) > MBEDTLS_MPI_MAX_BITS || + mbedtls_mpi_bitlen( N ) > MBEDTLS_MPI_MAX_BITS ) + return ( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); /* * Init temps and window size */ - mpi_montg_init(&mm, N); - mbedtls_mpi_init(&RR); - mbedtls_mpi_init(&T); - mbedtls_mpi_init(&Apos); - mbedtls_platform_zeroize(W, sizeof(W)); - i = mbedtls_mpi_bitlen(E); - wsize = (i > 671) ? 6 : (i > 239) ? 5 : (i > 79) ? 4 : (i > 23) ? 3 : 1; -#if (MBEDTLS_MPI_WINDOW_SIZE < 6) - if (wsize > MBEDTLS_MPI_WINDOW_SIZE) wsize = MBEDTLS_MPI_WINDOW_SIZE; + mpi_montg_init( &mm, N ); + mbedtls_mpi_init( &RR ); mbedtls_mpi_init( &T ); + mbedtls_mpi_init( &Apos ); + mbedtls_platform_zeroize( W, sizeof( W ) ); + i = mbedtls_mpi_bitlen( E ); + wsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 : + ( i > 79 ) ? 4 : ( i > 23 ) ? 3 : 1; +#if( MBEDTLS_MPI_WINDOW_SIZE < 6 ) + if( wsize > MBEDTLS_MPI_WINDOW_SIZE ) + wsize = MBEDTLS_MPI_WINDOW_SIZE; #endif j = N->n + 1; - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, j)); - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[1], j)); - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&T, j * 2)); + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, j ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1], j ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) ); /* * Compensate for negative A (and correct at the end) */ - neg = (A->s == -1); - if (neg) + neg = ( A->s == -1 ); + if( neg ) { - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&Apos, A)); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Apos, A ) ); Apos.s = 1; A = &Apos; } /* * If 1st call, pre-compute R^2 mod N */ - if (!_RR || !_RR->p) + if( _RR == NULL || _RR->p == NULL ) { - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&RR, 1)); - MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&RR, N->n * 2 * biL)); - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&RR, &RR, N)); - if (_RR) memcpy(_RR, &RR, sizeof(mbedtls_mpi)); + MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &RR, 1 ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &RR, N->n * 2 * biL ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &RR, &RR, N ) ); + if( _RR ) + memcpy( _RR, &RR, sizeof( mbedtls_mpi ) ); } else - { - memcpy(&RR, _RR, sizeof(mbedtls_mpi)); - } + memcpy( &RR, _RR, sizeof( mbedtls_mpi ) ); /* * W[1] = A * R^2 * R^-1 mod N = A * R mod N */ - if (mbedtls_mpi_cmp_mpi(A, N) >= 0) - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&W[1], A, N)); + if( mbedtls_mpi_cmp_mpi( A, N ) >= 0 ) + MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &W[1], A, N ) ); else - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[1], A)); - mpi_montmul(&W[1], &RR, N, mm, &T); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[1], A ) ); + mpi_montmul( &W[1], &RR, N, mm, &T ); /* * X = R^2 * R^-1 mod N = R mod N */ - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, &RR)); - mpi_montred(X, N, mm, &T); - if (wsize > 1) + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, &RR ) ); + mpi_montred( X, N, mm, &T ); + if( wsize > 1 ) { /* * W[1 << (wsize - 1)] = W[1] ^ (wsize - 1) */ - j = one << (wsize - 1); - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[j], N->n + 1)); - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[j], &W[1])); - for (i = 0; i < wsize - 1; i++) - mpi_montmul(&W[j], &W[j], N, mm, &T); + j = one << ( wsize - 1 ); + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[j], N->n + 1 ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[j], &W[1] ) ); + for( i = 0; i < wsize - 1; i++ ) + mpi_montmul( &W[j], &W[j], N, mm, &T ); /* * W[i] = W[i - 1] * W[1] */ - for (i = j + 1; i < (one << wsize); i++) + for( i = j + 1; i < ( one << wsize ); i++ ) { - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[i], N->n + 1)); - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[i], &W[i - 1])); - mpi_montmul(&W[i], &W[1], N, mm, &T); + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[i], N->n + 1 ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[i], &W[i - 1] ) ); + mpi_montmul( &W[i], &W[1], N, mm, &T ); } } nblimbs = E->n; bufsize = 0; - nbits = 0; - wbits = 0; - state = 0; - while (1) + nbits = 0; + wbits = 0; + state = 0; + while( 1 ) { - if (!bufsize) + if( bufsize == 0 ) { - if (!nblimbs) break; + if( nblimbs == 0 ) + break; nblimbs--; - bufsize = sizeof(mbedtls_mpi_uint) << 3; + bufsize = sizeof( mbedtls_mpi_uint ) << 3; } bufsize--; ei = (E->p[nblimbs] >> bufsize) & 1; /* * skip leading 0s */ - if (ei == 0 && state == 0) continue; - if (ei == 0 && state == 1) + if( ei == 0 && state == 0 ) + continue; + if( ei == 0 && state == 1 ) { /* * out of window, square X */ - mpi_montmul(X, X, N, mm, &T); + mpi_montmul( X, X, N, mm, &T ); continue; } /* @@ -2119,18 +2166,18 @@ int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A, */ state = 2; nbits++; - wbits |= (ei << (wsize - nbits)); - if (nbits == wsize) + wbits |= ( ei << ( wsize - nbits ) ); + if( nbits == wsize ) { /* * X = X^wsize R^-1 mod N */ - for (i = 0; i < wsize; i++) - mpi_montmul(X, X, N, mm, &T); + for( i = 0; i < wsize; i++ ) + mpi_montmul( X, X, N, mm, &T ); /* * X = X * W[wbits] R^-1 mod N */ - mpi_montmul(X, &W[wbits], N, mm, &T); + mpi_montmul( X, &W[wbits], N, mm, &T ); state--; nbits = 0; wbits = 0; @@ -2139,47 +2186,29 @@ int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A, /* * process the remaining bits */ - for (i = 0; i < nbits; i++) + for( i = 0; i < nbits; i++ ) { - mpi_montmul(X, X, N, mm, &T); + mpi_montmul( X, X, N, mm, &T ); wbits <<= 1; - if ((wbits & (one << wsize))) - mpi_montmul(X, &W[1], N, mm, &T); + if( ( wbits & ( one << wsize ) ) != 0 ) + mpi_montmul( X, &W[1], N, mm, &T ); } /* * X = A^E * R * R^-1 mod N = A^E mod N */ - mpi_montred(X, N, mm, &T); - if (neg && E->n && (E->p[0] & 1)) + mpi_montred( X, N, mm, &T ); + if( neg && E->n != 0 && ( E->p[0] & 1 ) != 0 ) { X->s = -1; - MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(X, N, X)); + MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( X, N, X ) ); } cleanup: - for (i = (one << (wsize - 1)); i < (one << wsize); i++) - mbedtls_mpi_free(&W[i]); - mbedtls_mpi_free(&W[1]); - mbedtls_mpi_free(&T); - mbedtls_mpi_free(&Apos); - if (!_RR || !_RR->p) - mbedtls_mpi_free(&RR); - return ret; -} - -static inline int Compare(const mbedtls_mpi *X, - const mbedtls_mpi *Y, - size_t i, - size_t j) -{ - if (!i && !j) return 0; - if (i > j) return 1; - if (j > i) return -1; - for (; i > 0; i--) - { - if (X->p[i - 1] > Y->p[i - 1]) return 1; - if (X->p[i - 1] < Y->p[i - 1]) return -1; - } - return 0; + for( i = ( one << ( wsize - 1 ) ); i < ( one << wsize ); i++ ) + mbedtls_mpi_free( &W[i] ); + mbedtls_mpi_free( &W[1] ); mbedtls_mpi_free( &T ); mbedtls_mpi_free( &Apos ); + if( _RR == NULL || _RR->p == NULL ) + mbedtls_mpi_free( &RR ); + return( ret ); } /** @@ -2193,53 +2222,53 @@ static inline int Compare(const mbedtls_mpi *X, * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed. * \return Another negative error code on different kinds of failure. */ -int mbedtls_mpi_gcd(mbedtls_mpi *G, const mbedtls_mpi *A, - const mbedtls_mpi *B) +int mbedtls_mpi_gcd( mbedtls_mpi *G, const mbedtls_mpi *A, const mbedtls_mpi *B ) { int ret = MBEDTLS_ERR_THIS_CORRUPTION; mbedtls_mpi TA, TB; size_t lz, lzt, i, j; - MPI_VALIDATE_RET(G); - MPI_VALIDATE_RET(A); - MPI_VALIDATE_RET(B); - mbedtls_mpi_init(&TA); - mbedtls_mpi_init(&TB); - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TA, A)); - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TB, B)); - lz = mbedtls_mpi_lsb(&TA); - lzt = mbedtls_mpi_lsb(&TB); - if (lzt < lz) lz = lzt; - mbedtls_mpi_shift_r(&TA, lz); - mbedtls_mpi_shift_r(&TB, lz); + MPI_VALIDATE_RET( G ); + MPI_VALIDATE_RET( A ); + MPI_VALIDATE_RET( B ); + mbedtls_mpi_init( &TA ); mbedtls_mpi_init( &TB ); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) ); + lz = mbedtls_mpi_lsb( &TA ); + lzt = mbedtls_mpi_lsb( &TB ); + if( lzt < lz ) + lz = lzt; + MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TA, lz ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TB, lz ) ); TA.s = TB.s = 1; - i = mbedtls_mpi_bitlen(&TA); - j = mbedtls_mpi_bitlen(&TB); - while (!mbedtls_mpi_is_zero(&TA)) + while( !mbedtls_mpi_is_zero( &TA ) ) { - mbedtls_mpi_shift_r(&TA, mbedtls_mpi_lsb(&TA)); - mbedtls_mpi_shift_r(&TB, mbedtls_mpi_lsb(&TB)); - if (mpi_cmp_abs(&TA, &TB, &i, &j) >= 0) + MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TA, mbedtls_mpi_lsb( &TA ) ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TB, mbedtls_mpi_lsb( &TB ) ) ); + if( mpi_cmp_abs( &TA, &TB, &i, &j ) >= 0 ) { - MBEDTLS_MPI_CHK(mpi_sub_abs(&TA, &TA, &TB, j)); - mbedtls_mpi_shift_r(&TA, 1); + MBEDTLS_MPI_CHK( mpi_sub_abs( &TA, &TA, &TB, j ) ); + ShiftRight( TA.p, TA.n, 1 ); } else { - MBEDTLS_MPI_CHK(mpi_sub_abs(&TB, &TB, &TA, i)); - mbedtls_mpi_shift_r(&TB, 1); + MBEDTLS_MPI_CHK( mpi_sub_abs( &TB, &TB, &TA, i ) ); + ShiftRight( TB.p, TB.n, 1 ); } } - MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&TB, lz)); - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(G, &TB)); + MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &TB, lz ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( G, &TB ) ); cleanup: - mbedtls_mpi_free(&TA); - mbedtls_mpi_free(&TB); - return ret; + mbedtls_mpi_free( &TA ); mbedtls_mpi_free( &TB ); + return( ret ); } /** * \brief Fill an MPI with a number of random bytes. * + * Use a temporary bytes representation to make sure the result is the + * same regardless of the platform endianness (useful when f_rng is + * actually deterministic, eg for tests). + * * \param X The destination MPI. This must point to an initialized MPI. * \param size The number of random bytes to generate. * \param f_rng The RNG function to use. This must not be \c NULL. @@ -2254,23 +2283,23 @@ cleanup: * as a big-endian representation of an MPI; this can * be relevant in applications like deterministic ECDSA. */ -int mbedtls_mpi_fill_random(mbedtls_mpi *X, size_t size, - int (*f_rng)(void *, unsigned char *, size_t), - void *p_rng) +int mbedtls_mpi_fill_random( mbedtls_mpi *X, size_t size, + int (*f_rng)(void *, unsigned char *, size_t), + void *p_rng ) { int ret = MBEDTLS_ERR_THIS_CORRUPTION; - size_t const limbs = CHARS_TO_LIMBS(size); - size_t const overhead = (limbs * ciL) - size; + size_t const limbs = CHARS_TO_LIMBS( size ); + size_t const overhead = ( limbs * ciL ) - size; unsigned char *Xp; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(f_rng); - MBEDTLS_MPI_CHK(mbedtls_mpi_resize(X, limbs)); - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 0)); - Xp = (unsigned char *)X->p; - MBEDTLS_MPI_CHK(f_rng(p_rng, Xp + overhead, size)); - mpi_bigendian_to_host(X->p, limbs); + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( f_rng ); + MBEDTLS_MPI_CHK(mbedtls_mpi_resize( X, limbs )); + MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) ); + Xp = (unsigned char*) X->p; + MBEDTLS_MPI_CHK( f_rng( p_rng, Xp + overhead, size ) ); + mpi_bigendian_to_host( X->p, limbs ); cleanup: - return ret; + return( ret ); } /** @@ -2289,136 +2318,108 @@ cleanup: * \return #MBEDTLS_ERR_MPI_NOT_ACCEPTABLE if \p has no modular inverse * with respect to \p N. */ -int mbedtls_mpi_inv_mod(mbedtls_mpi *X, const mbedtls_mpi *A, - const mbedtls_mpi *N) +int mbedtls_mpi_inv_mod( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *N ) { int ret = MBEDTLS_ERR_THIS_CORRUPTION; mbedtls_mpi G, TA, TU, U1, U2, TB, TV, V1, V2; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(A); - MPI_VALIDATE_RET(N); - if (mbedtls_mpi_cmp_int(N, 1) <= 0) - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - mbedtls_mpi_init(&TA); - mbedtls_mpi_init(&TU); - mbedtls_mpi_init(&U1); - mbedtls_mpi_init(&U2); - mbedtls_mpi_init(&G); - mbedtls_mpi_init(&TB); - mbedtls_mpi_init(&TV); - mbedtls_mpi_init(&V1); - mbedtls_mpi_init(&V2); - MBEDTLS_MPI_CHK(mbedtls_mpi_gcd(&G, A, N)); - if (!mbedtls_mpi_is_one(&G)) + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( A ); + MPI_VALIDATE_RET( N ); + if( mbedtls_mpi_cmp_int( N, 1 ) <= 0 ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + mbedtls_mpi_init( &TA ); mbedtls_mpi_init( &TU ); mbedtls_mpi_init( &U1 ); mbedtls_mpi_init( &U2 ); + mbedtls_mpi_init( &G ); mbedtls_mpi_init( &TB ); mbedtls_mpi_init( &TV ); + mbedtls_mpi_init( &V1 ); mbedtls_mpi_init( &V2 ); + MBEDTLS_MPI_CHK( mbedtls_mpi_gcd( &G, A, N ) ); + if( mbedtls_mpi_cmp_int( &G, 1 ) != 0 ) { ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE; goto cleanup; } - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&TA, A, N)); - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TU, &TA)); - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TB, N)); - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TV, N)); - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&U1, 1)); - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&U2, 0)); - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&V1, 0)); - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&V2, 1)); + MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &TA, A, N ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TU, &TA ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, N ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TV, N ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &U1, 1 ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &U2, 0 ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &V1, 0 ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &V2, 1 ) ); do { - while (!(TU.p[0] & 1)) + while( ( TU.p[0] & 1 ) == 0 ) { - mbedtls_mpi_shift_r(&TU, 1); - if ((U1.p[0] & 1) || (U2.p[0] & 1)) + ShiftRight( TU.p, TU.n, 1 ); + if( ( U1.p[0] & 1 ) != 0 || ( U2.p[0] & 1 ) != 0 ) { - MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&U1, &U1, &TB)); - MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&U2, &U2, &TA)); + MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &U1, &U1, &TB ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &U2, &U2, &TA ) ); } - mbedtls_mpi_shift_r(&U1, 1); - mbedtls_mpi_shift_r(&U2, 1); + ShiftRight( U1.p, U1.n, 1 ); + ShiftRight( U2.p, U2.n, 1 ); } - while (!(TV.p[0] & 1)) + while( ( TV.p[0] & 1 ) == 0 ) { - mbedtls_mpi_shift_r(&TV, 1); - if ((V1.p[0] & 1) || (V2.p[0] & 1)) + ShiftRight( TV.p, TV.n, 1 ); + if( ( V1.p[0] & 1 ) != 0 || ( V2.p[0] & 1 ) != 0 ) { - MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&V1, &V1, &TB)); - MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&V2, &V2, &TA)); + MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &V1, &V1, &TB ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V2, &V2, &TA ) ); } - mbedtls_mpi_shift_r(&V1, 1); - mbedtls_mpi_shift_r(&V2, 1); + ShiftRight( V1.p, V1.n, 1 ); + ShiftRight( V2.p, V2.n, 1 ); } - if (mbedtls_mpi_cmp_mpi(&TU, &TV) >= 0) + if( mbedtls_mpi_cmp_mpi( &TU, &TV ) >= 0 ) { - MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&TU, &TU, &TV)); - MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&U1, &U1, &V1)); - MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&U2, &U2, &V2)); + MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &TU, &TU, &TV ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &U1, &U1, &V1 ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &U2, &U2, &V2 ) ); } else { - MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&TV, &TV, &TU)); - MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&V1, &V1, &U1)); - MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&V2, &V2, &U2)); + MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &TV, &TV, &TU ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V1, &V1, &U1 ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V2, &V2, &U2 ) ); } - } while (!mbedtls_mpi_is_zero(&TU)); - while (V1.s < 0) - { - MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&V1, &V1, N)); } - while (mbedtls_mpi_cmp_mpi(&V1, N) >= 0) - { - MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&V1, &V1, N)); - } - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, &V1)); + while( !mbedtls_mpi_is_zero(&TU) ); + while( mbedtls_mpi_cmp_int( &V1, 0 ) < 0 ) + MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &V1, &V1, N ) ); + while( mbedtls_mpi_cmp_mpi( &V1, N ) >= 0 ) + MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V1, &V1, N ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, &V1 ) ); cleanup: - mbedtls_mpi_free(&TA); - mbedtls_mpi_free(&TU); - mbedtls_mpi_free(&U1); - mbedtls_mpi_free(&U2); - mbedtls_mpi_free(&G); - mbedtls_mpi_free(&TB); - mbedtls_mpi_free(&TV); - mbedtls_mpi_free(&V1); - mbedtls_mpi_free(&V2); - return ret; + mbedtls_mpi_free( &TA ); mbedtls_mpi_free( &TU ); mbedtls_mpi_free( &U1 ); mbedtls_mpi_free( &U2 ); + mbedtls_mpi_free( &G ); mbedtls_mpi_free( &TB ); mbedtls_mpi_free( &TV ); + mbedtls_mpi_free( &V1 ); mbedtls_mpi_free( &V2 ); + return( ret ); } #if defined(MBEDTLS_GENPRIME) -static const short kSmallPrime[] = { - 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, - 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, - 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, - 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, - 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, - 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, - 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, - 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, - 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, - 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, - 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, - 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997, -}; - -static struct Divisor kSmallDivisor[ARRAYLEN(kSmallPrime)]; - -static bool IsDivisible( const mbedtls_mpi_uint *Ap, size_t An, - mbedtls_mpi_sint b, struct Divisor d ) +static const short small_prime[] = { - size_t i; - mbedtls_mpi_uint x, y, z; - MBEDTLS_ASSERT(b >= 3); - for (i = An, y = 0; i > 0; i--) - { - x = Ap[i - 1]; - y = (y << biH) | (x >> biH); - z = Divide(y, d); - y -= z * b; - x <<= biH; - y = (y << biH) | (x >> biH); - z = Divide(y, d); - y -= z * b; - } - return !y; -} + 3, 5, 7, 11, 13, 17, 19, 23, + 29, 31, 37, 41, 43, 47, 53, 59, + 61, 67, 71, 73, 79, 83, 89, 97, + 101, 103, 107, 109, 113, 127, 131, 137, + 139, 149, 151, 157, 163, 167, 173, 179, + 181, 191, 193, 197, 199, 211, 223, 227, + 229, 233, 239, 241, 251, 257, 263, 269, + 271, 277, 281, 283, 293, 307, 311, 313, + 317, 331, 337, 347, 349, 353, 359, 367, + 373, 379, 383, 389, 397, 401, 409, 419, + 421, 431, 433, 439, 443, 449, 457, 461, + 463, 467, 479, 487, 491, 499, 503, 509, + 521, 523, 541, 547, 557, 563, 569, 571, + 577, 587, 593, 599, 601, 607, 613, 617, + 619, 631, 641, 643, 647, 653, 659, 661, + 673, 677, 683, 691, 701, 709, 719, 727, + 733, 739, 743, 751, 757, 761, 769, 773, + 787, 797, 809, 811, 821, 823, 827, 829, + 839, 853, 857, 859, 863, 877, 881, 883, + 887, 907, 911, 919, 929, 937, 941, 947, + 953, 967, 971, 977, 983, 991, 997, -103 +}; /* * Small divisors test (X must be positive) @@ -2429,126 +2430,114 @@ static bool IsDivisible( const mbedtls_mpi_uint *Ap, size_t An, * MBEDTLS_ERR_MPI_NOT_ACCEPTABLE: certain non-prime * other negative: error */ -static int mpi_check_small_factors(const mbedtls_mpi *X) +static int mpi_check_small_factors( const mbedtls_mpi *X ) { int ret = 0; - size_t i, n; - static bool once; - if (!(X->p[0] & 1)) - return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE; - n = mbedtls_mpi_limbs(X); - if (!once) { - for (i = 0; i < ARRAYLEN(kSmallPrime); ++i) - kSmallDivisor[i] = GetDivisor(kSmallPrime[i]); - once = true; + size_t i; + mbedtls_mpi_uint r; + if( ( X->p[0] & 1 ) == 0 ) + return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE ); + for( i = 0; small_prime[i] > 0; i++ ) + { + if( mbedtls_mpi_cmp_int( X, small_prime[i] ) <= 0 ) + return( 1 ); + MBEDTLS_MPI_CHK( mbedtls_mpi_mod_int( &r, X, small_prime[i] ) ); + if( r == 0 ) + return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE ); } - for (i = 0; i < ARRAYLEN(kSmallPrime); i++) { - if (n == 1 && mbedtls_mpi_cmp_int(X, kSmallPrime[i]) <= 0) - return 1; - if (IsDivisible(X->p, X->n, kSmallPrime[i], kSmallDivisor[i])) - return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE; - } - return ret; +cleanup: + return( ret ); } /* * Miller-Rabin pseudo-primality test (HAC 4.24) */ -static int mpi_miller_rabin(const mbedtls_mpi *X, size_t rounds, - int (*f_rng)(void *, unsigned char *, size_t), - void *p_rng) +static int mpi_miller_rabin( const mbedtls_mpi *X, size_t rounds, + int (*f_rng)(void *, unsigned char *, size_t), + void *p_rng ) { int ret, count; size_t i, j, k, s; mbedtls_mpi W, R, T, A, RR; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(f_rng); - mbedtls_mpi_init(&W); - mbedtls_mpi_init(&R); - mbedtls_mpi_init(&T); - mbedtls_mpi_init(&A); - mbedtls_mpi_init(&RR); + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( f_rng ); + mbedtls_mpi_init( &W ); mbedtls_mpi_init( &R ); + mbedtls_mpi_init( &T ); mbedtls_mpi_init( &A ); + mbedtls_mpi_init( &RR ); /* * W = |X| - 1 * R = W >> lsb( W ) */ - MBEDTLS_MPI_CHK(mbedtls_mpi_sub_int(&W, X, 1)); - s = mbedtls_mpi_lsb(&W); - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&R, &W)); - mbedtls_mpi_shift_r(&R, s); - for (i = 0; i < rounds; i++) + MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int( &W, X, 1 ) ); + s = mbedtls_mpi_lsb( &W ); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R, &W ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &R, s ) ); + for( i = 0; i < rounds; i++ ) { /* * pick a random A, 1 < A < |X| - 1 */ count = 0; - do - { - MBEDTLS_MPI_CHK(mbedtls_mpi_fill_random(&A, X->n * ciL, f_rng, p_rng)); - j = mbedtls_mpi_bitlen(&A); - k = mbedtls_mpi_bitlen(&W); - if (j > k) - { - A.p[A.n - 1] &= ((mbedtls_mpi_uint)1 << (k - (A.n - 1) * biL - 1)) - 1; + do { + MBEDTLS_MPI_CHK( mbedtls_mpi_fill_random( &A, X->n * ciL, f_rng, p_rng ) ); + j = mbedtls_mpi_bitlen( &A ); + k = mbedtls_mpi_bitlen( &W ); + if (j > k) { + A.p[A.n - 1] &= ( (mbedtls_mpi_uint) 1 << ( k - ( A.n - 1 ) * biL - 1 ) ) - 1; } - if (count++ > 30) - { + if (count++ > 30) { ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE; goto cleanup; } - } while (mbedtls_mpi_cmp_mpi(&A, &W) >= 0 || - mbedtls_mpi_cmp_int(&A, 1) <= 0); - + } while ( mbedtls_mpi_cmp_mpi( &A, &W ) >= 0 || + mbedtls_mpi_cmp_int( &A, 1 ) <= 0 ); /* * A = A^R mod |X| */ - MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&A, &A, &R, X, &RR)); - if (!mbedtls_mpi_cmp_mpi(&A, &W) || mbedtls_mpi_is_one(&A)) + MBEDTLS_MPI_CHK( mbedtls_mpi_exp_mod( &A, &A, &R, X, &RR ) ); + if( mbedtls_mpi_cmp_mpi( &A, &W ) == 0 || + mbedtls_mpi_cmp_int( &A, 1 ) == 0 ) continue; j = 1; - - while (j < s && mbedtls_mpi_cmp_mpi(&A, &W)) + while( j < s && mbedtls_mpi_cmp_mpi( &A, &W ) != 0 ) { /* * A = A * A mod |X| */ - MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&T, &A, &A)); - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&A, &T, X)); - if (mbedtls_mpi_is_one(&A)) break; + MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &T, &A, &A ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &A, &T, X ) ); + if( mbedtls_mpi_cmp_int( &A, 1 ) == 0 ) + break; j++; } - /* * not prime if A != |X| - 1 or A == 1 */ - if (mbedtls_mpi_cmp_mpi(&A, &W) || mbedtls_mpi_is_one(&A)) + if( mbedtls_mpi_cmp_mpi( &A, &W ) != 0 || + mbedtls_mpi_cmp_int( &A, 1 ) == 0 ) { ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE; break; } } - cleanup: - mbedtls_mpi_free(&W); - mbedtls_mpi_free(&R); - mbedtls_mpi_free(&T); - mbedtls_mpi_free(&A); - mbedtls_mpi_free(&RR); - return ret; + mbedtls_mpi_free( &W ); mbedtls_mpi_free( &R ); + mbedtls_mpi_free( &T ); mbedtls_mpi_free( &A ); + mbedtls_mpi_free( &RR ); + return( ret ); } /** * \brief Miller-Rabin primality test. * - * \warning If \p X is potentially generated by an adversary, for - * example when validating cryptographic parameters that - * you didn't generate yourself and that are supposed to - * be prime, then \p rounds should be at least the half - * of the security strength of the cryptographic - * algorithm. On the other hand, if \p X is chosen - * uniformly or non-adversially (as is the case when - * mbedtls_mpi_gen_prime calls this function), then \p - * rounds can be much lower. + * \warning If \p X is potentially generated by an adversary, for example + * when validating cryptographic parameters that you didn't + * generate yourself and that are supposed to be prime, then + * \p rounds should be at least the half of the security + * strength of the cryptographic algorithm. On the other hand, + * if \p X is chosen uniformly or non-adversially (as is the + * case when mbedtls_mpi_gen_prime calls this function), then + * \p rounds can be much lower. * * \param X The MPI to check for primality. * This must point to an initialized MPI. @@ -2561,32 +2550,33 @@ cleanup: * a context parameter. * * \return \c 0 if successful, i.e. \p X is probably prime. - * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if a allocation failed. + * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed. * \return #MBEDTLS_ERR_MPI_NOT_ACCEPTABLE if \p X is not prime. - * \return Another negative error code on other failures. + * \return Another negative error code on other kinds of failure. */ -int mbedtls_mpi_is_prime_ext(const mbedtls_mpi *X, int rounds, - int (*f_rng)(void *, unsigned char *, size_t), - void *p_rng) +int mbedtls_mpi_is_prime_ext( const mbedtls_mpi *X, int rounds, + int (*f_rng)(void *, unsigned char *, size_t), + void *p_rng ) { int ret = MBEDTLS_ERR_THIS_CORRUPTION; mbedtls_mpi XX; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(f_rng); + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( f_rng ); XX.s = 1; XX.n = X->n; XX.p = X->p; - if (mbedtls_mpi_is_zero(&XX) || mbedtls_mpi_is_one(&XX)) - return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE; - if (!mbedtls_mpi_cmp_int(&XX, 2)) - return 0; - if ((ret = mpi_check_small_factors(&XX))) + if( mbedtls_mpi_cmp_int( &XX, 0 ) == 0 || + mbedtls_mpi_cmp_int( &XX, 1 ) == 0 ) + return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE ); + if( mbedtls_mpi_cmp_int( &XX, 2 ) == 0 ) + return( 0 ); + if( ( ret = mpi_check_small_factors( &XX ) ) != 0 ) { - if (ret == 1) - return 0; - return ret; + if( ret == 1 ) + return( 0 ); + return( ret ); } - return mpi_miller_rabin(&XX, rounds, f_rng, p_rng); + return( mpi_miller_rabin( &XX, rounds, f_rng, p_rng ) ); } /** @@ -2609,37 +2599,33 @@ int mbedtls_mpi_is_prime_ext(const mbedtls_mpi *X, int rounds, * * \return \c 0 if successful, in which case \p X holds a * probably prime number. - * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if a allocation failed. - * \return #MBEDTLS_ERR_MPI_BAD_INPUT_DATA if `nbits` is not - * between \c 3 and #MBEDTLS_MPI_MAX_BITS. + * \return #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed. + * \return #MBEDTLS_ERR_MPI_BAD_INPUT_DATA if `nbits` is not between + * \c 3 and #MBEDTLS_MPI_MAX_BITS. */ -int mbedtls_mpi_gen_prime(mbedtls_mpi *X, size_t nbits, int flags, - int (*f_rng)(void *, unsigned char *, size_t), - void *p_rng) +int mbedtls_mpi_gen_prime( mbedtls_mpi *X, size_t nbits, int flags, + int (*f_rng)(void *, unsigned char *, size_t), + void *p_rng ) { int ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE; size_t k, n; int rounds; mbedtls_mpi_uint r; mbedtls_mpi Y; - MPI_VALIDATE_RET(X); - MPI_VALIDATE_RET(f_rng); - if (nbits < 3 || nbits > MBEDTLS_MPI_MAX_BITS) - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - mbedtls_mpi_init(&Y); - n = BITS_TO_LIMBS(nbits); - if (!(flags & MBEDTLS_MPI_GEN_PRIME_FLAG_LOW_ERR)) + MPI_VALIDATE_RET( X ); + MPI_VALIDATE_RET( f_rng ); + if( nbits < 3 || nbits > MBEDTLS_MPI_MAX_BITS ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + mbedtls_mpi_init( &Y ); + n = BITS_TO_LIMBS( nbits ); + if( ( flags & MBEDTLS_MPI_GEN_PRIME_FLAG_LOW_ERR ) == 0 ) { /* * 2^-80 error probability, number of rounds chosen per HAC, table 4.4 */ - rounds = ((nbits >= 1300) ? 2 - : (nbits >= 850) ? 3 - : (nbits >= 650) ? 4 - : (nbits >= 350) ? 8 - : (nbits >= 250) ? 12 - : (nbits >= 150) ? 18 - : 27); + rounds = ( ( nbits >= 1300 ) ? 2 : ( nbits >= 850 ) ? 3 : + ( nbits >= 650 ) ? 4 : ( nbits >= 350 ) ? 8 : + ( nbits >= 250 ) ? 12 : ( nbits >= 150 ) ? 18 : 27 ); } else { @@ -2647,29 +2633,24 @@ int mbedtls_mpi_gen_prime(mbedtls_mpi *X, size_t nbits, int flags, * 2^-100 error probability, number of rounds computed based on HAC, * fact 4.48 */ - rounds = ((nbits >= 1450) ? 4 - : (nbits >= 1150) ? 5 - : (nbits >= 1000) ? 6 - : (nbits >= 850) ? 7 - : (nbits >= 750) ? 8 - : (nbits >= 500) ? 13 - : (nbits >= 250) ? 28 - : (nbits >= 150) ? 40 - : 51); + rounds = ( ( nbits >= 1450 ) ? 4 : ( nbits >= 1150 ) ? 5 : + ( nbits >= 1000 ) ? 6 : ( nbits >= 850 ) ? 7 : + ( nbits >= 750 ) ? 8 : ( nbits >= 500 ) ? 13 : + ( nbits >= 250 ) ? 28 : ( nbits >= 150 ) ? 40 : 51 ); } - while (1) + while( 1 ) { - MBEDTLS_MPI_CHK(mbedtls_mpi_fill_random(X, n * ciL, f_rng, p_rng)); - /* make sure generated number is at least (nbits-1)+0.5 bits (FIPS 186-4 - * §B.3.3 steps 4.4, 5.5) */ - if (X->p[n - 1] < 0xb504f333f9de6485ULL /* ceil(2^63.5) */) continue; + MBEDTLS_MPI_CHK( mbedtls_mpi_fill_random( X, n * ciL, f_rng, p_rng ) ); + /* make sure generated number is at least (nbits-1)+0.5 bits (FIPS 186-4 §B.3.3 steps 4.4, 5.5) */ + if( X->p[n-1] < 0xb504f333f9de6485ULL /* ceil(2^63.5) */ ) continue; k = n * biL; - if (k > nbits) mbedtls_mpi_shift_r(X, k - nbits); + if( k > nbits ) MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( X, k - nbits ) ); X->p[0] |= 1; - if (!(flags & MBEDTLS_MPI_GEN_PRIME_FLAG_DH)) + if( ( flags & MBEDTLS_MPI_GEN_PRIME_FLAG_DH ) == 0 ) { - ret = mbedtls_mpi_is_prime_ext(X, rounds, f_rng, p_rng); - if (ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE) goto cleanup; + ret = mbedtls_mpi_is_prime_ext( X, rounds, f_rng, p_rng ); + if( ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE ) + goto cleanup; } else { @@ -2679,158 +2660,178 @@ int mbedtls_mpi_gen_prime(mbedtls_mpi *X, size_t nbits, int flags, * Make sure it is satisfied, while keeping X = 3 mod 4 */ X->p[0] |= 2; - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_int(&r, X, 3)); - if (r == 0) - MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(X, X, 8)); - else if (r == 1) - MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(X, X, 4)); + MBEDTLS_MPI_CHK( mbedtls_mpi_mod_int( &r, X, 3 ) ); + if( r == 0 ) + MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, X, 8 ) ); + else if( r == 1 ) + MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, X, 4 ) ); /* Set Y = (X-1) / 2, which is X / 2 because X is odd */ - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&Y, X)); - mbedtls_mpi_shift_r( &Y, 1 ); - while (1) + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Y, X ) ); + ShiftRight( Y.p, Y.n, 1 ); + while( 1 ) { /* * First, check small factors for X and Y * before doing Miller-Rabin on any of them */ - if (!(ret = mpi_check_small_factors(X)) && - !(ret = mpi_check_small_factors(&Y)) && - !(ret = mpi_miller_rabin(X, rounds, f_rng, p_rng)) && - !(ret = mpi_miller_rabin(&Y, rounds, f_rng, p_rng))) + if( ( ret = mpi_check_small_factors( X ) ) == 0 && + ( ret = mpi_check_small_factors( &Y ) ) == 0 && + ( ret = mpi_miller_rabin( X, rounds, f_rng, p_rng ) ) + == 0 && + ( ret = mpi_miller_rabin( &Y, rounds, f_rng, p_rng ) ) + == 0 ) + goto cleanup; + if( ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE ) goto cleanup; - if (ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE) goto cleanup; /* * Next candidates. We want to preserve Y = (X-1) / 2 and * Y = 1 mod 2 and Y = 2 mod 3 (eq X = 3 mod 4 and X = 2 mod 3) * so up Y by 6 and X by 12. */ - MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(X, X, 12)); - MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(&Y, &Y, 6)); + MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, X, 12 ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( &Y, &Y, 6 ) ); } } } cleanup: - mbedtls_mpi_free(&Y); - return ret; + mbedtls_mpi_free( &Y ); + return( ret ); } #endif /* MBEDTLS_GENPRIME */ #if defined(MBEDTLS_SELF_TEST) -#define GCD_PAIR_COUNT 3 +#define GCD_PAIR_COUNT 3 -static const int gcd_pairs[GCD_PAIR_COUNT][3] = { - {693, 609, 21}, {1764, 868, 28}, {768454923, 542167814, 1}}; +static const int gcd_pairs[GCD_PAIR_COUNT][3] = +{ + { 693, 609, 21 }, + { 1764, 868, 28 }, + { 768454923, 542167814, 1 } +}; /** * \brief Checkup routine * * \return 0 if successful, or 1 if the test failed */ -int mbedtls_mpi_self_test(int verbose) +int mbedtls_mpi_self_test( int verbose ) { int ret, i; mbedtls_mpi A, E, N, X, Y, U, V; - mbedtls_mpi_init(&A); - mbedtls_mpi_init(&E); - mbedtls_mpi_init(&N); - mbedtls_mpi_init(&X); - mbedtls_mpi_init(&Y); - mbedtls_mpi_init(&U); - mbedtls_mpi_init(&V); - MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&A, 16, - "EFE021C2645FD1DC586E69184AF4A31E" - "D5F53E93B5F123FA41680867BA110131" - "944FE7952E2517337780CB0DB80E61AA" - "E7C8DDC6C5C6AADEB34EB38A2F40D5E6")); - MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&E, 16, - "B2E7EFD37075B9F03FF989C7C5051C20" - "34D2A323810251127E7BF8625A4F49A5" - "F3E27F4DA8BD59C47D6DAABA4C8127BD" - "5B5C25763222FEFCCFC38B832366C29E")); - MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&N, 16, - "0066A198186C18C10B2F5ED9B522752A" - "9830B69916E535C8F047518A889A43A5" - "94B6BED27A168D31D4A52F88925AA8F5")); - MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&X, &A, &N)); - MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&U, 16, - "602AB7ECA597A3D6B56FF9829A5E8B85" - "9E857EA95A03512E2BAE7391688D264A" - "A5663B0341DB9CCFD2C4C5F421FEC814" - "8001B72E848A38CAE1C65F78E56ABDEF" - "E12D3C039B8A02D6BE593F0BBBDA56F1" - "ECF677152EF804370C1A305CAF3B5BF1" - "30879B56C61DE584A0F53A2447A51E")); - if (verbose) mbedtls_printf(" MPI test #1 (mul_mpi): "); - if (mbedtls_mpi_cmp_mpi(&X, &U)) { - if (verbose) mbedtls_printf("failed\n"); + mbedtls_mpi_init( &A ); mbedtls_mpi_init( &E ); mbedtls_mpi_init( &N ); mbedtls_mpi_init( &X ); + mbedtls_mpi_init( &Y ); mbedtls_mpi_init( &U ); mbedtls_mpi_init( &V ); + MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &A, 16, + "EFE021C2645FD1DC586E69184AF4A31E" \ + "D5F53E93B5F123FA41680867BA110131" \ + "944FE7952E2517337780CB0DB80E61AA" \ + "E7C8DDC6C5C6AADEB34EB38A2F40D5E6" ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &E, 16, + "B2E7EFD37075B9F03FF989C7C5051C20" \ + "34D2A323810251127E7BF8625A4F49A5" \ + "F3E27F4DA8BD59C47D6DAABA4C8127BD" \ + "5B5C25763222FEFCCFC38B832366C29E" ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &N, 16, + "0066A198186C18C10B2F5ED9B522752A" \ + "9830B69916E535C8F047518A889A43A5" \ + "94B6BED27A168D31D4A52F88925AA8F5" ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &X, &A, &N ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16, + "602AB7ECA597A3D6B56FF9829A5E8B85" \ + "9E857EA95A03512E2BAE7391688D264A" \ + "A5663B0341DB9CCFD2C4C5F421FEC814" \ + "8001B72E848A38CAE1C65F78E56ABDEF" \ + "E12D3C039B8A02D6BE593F0BBBDA56F1" \ + "ECF677152EF804370C1A305CAF3B5BF1" \ + "30879B56C61DE584A0F53A2447A51E" ) ); + if( verbose != 0 ) + mbedtls_printf( " MPI test #1 (mul_mpi): " ); + if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 ) + { + if( verbose != 0 ) + mbedtls_printf( "failed\n" ); ret = 1; goto cleanup; } - if (verbose) mbedtls_printf("passed\n"); - MBEDTLS_MPI_CHK(mbedtls_mpi_div_mpi(&X, &Y, &A, &N)); - MBEDTLS_MPI_CHK( - mbedtls_mpi_read_string(&U, 16, "256567336059E52CAE22925474705F39A94")); - MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&V, 16, - "6613F26162223DF488E9CD48CC132C7A" - "0AC93C701B001B092E4E5B9F73BCD27B" - "9EE50D0657C77F374E903CDFA4C642")); - if (verbose) mbedtls_printf(" MPI test #2 (div_mpi): "); - if (mbedtls_mpi_cmp_mpi(&X, &U) || mbedtls_mpi_cmp_mpi(&Y, &V)) { - if (verbose) mbedtls_printf("failed\n"); + if( verbose != 0 ) + mbedtls_printf( "passed\n" ); + MBEDTLS_MPI_CHK( mbedtls_mpi_div_mpi( &X, &Y, &A, &N ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16, + "256567336059E52CAE22925474705F39A94" ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &V, 16, + "6613F26162223DF488E9CD48CC132C7A" \ + "0AC93C701B001B092E4E5B9F73BCD27B" \ + "9EE50D0657C77F374E903CDFA4C642" ) ); + if( verbose != 0 ) + mbedtls_printf( " MPI test #2 (div_mpi): " ); + if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 || + mbedtls_mpi_cmp_mpi( &Y, &V ) != 0 ) + { + if( verbose != 0 ) + mbedtls_printf( "failed\n" ); ret = 1; goto cleanup; } - if (verbose) mbedtls_printf("passed\n"); - MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&X, &A, &E, &N, NULL)); - MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&U, 16, - "36E139AEA55215609D2816998ED020BB" - "BD96C37890F65171D948E9BC7CBAA4D9" - "325D24D6A3C12710F10A09FA08AB87")); - if (verbose) mbedtls_printf(" MPI test #3 (exp_mod): "); - if (mbedtls_mpi_cmp_mpi(&X, &U)) { - if (verbose) mbedtls_printf("failed\n"); + if( verbose != 0 ) + mbedtls_printf( "passed\n" ); + MBEDTLS_MPI_CHK( mbedtls_mpi_exp_mod( &X, &A, &E, &N, NULL ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16, + "36E139AEA55215609D2816998ED020BB" \ + "BD96C37890F65171D948E9BC7CBAA4D9" \ + "325D24D6A3C12710F10A09FA08AB87" ) ); + if( verbose != 0 ) + mbedtls_printf( " MPI test #3 (exp_mod): " ); + if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 ) + { + if( verbose != 0 ) + mbedtls_printf( "failed\n" ); ret = 1; goto cleanup; } - if (verbose) mbedtls_printf("passed\n"); - MBEDTLS_MPI_CHK(mbedtls_mpi_inv_mod(&X, &A, &N)); - MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&U, 16, - "003A0AAEDD7E784FC07D8F9EC6E3BFD5" - "C3DBA76456363A10869622EAC2DD84EC" - "C5B8A74DAC4D09E03B5E0BE779F2DF61")); - if (verbose) mbedtls_printf(" MPI test #4 (inv_mod): "); - if (mbedtls_mpi_cmp_mpi(&X, &U)) { - if (verbose) mbedtls_printf("failed\n"); + if( verbose != 0 ) + mbedtls_printf( "passed\n" ); + MBEDTLS_MPI_CHK( mbedtls_mpi_inv_mod( &X, &A, &N ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16, + "003A0AAEDD7E784FC07D8F9EC6E3BFD5" \ + "C3DBA76456363A10869622EAC2DD84EC" \ + "C5B8A74DAC4D09E03B5E0BE779F2DF61" ) ); + if( verbose != 0 ) + mbedtls_printf( " MPI test #4 (inv_mod): " ); + if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 ) + { + if( verbose != 0 ) + mbedtls_printf( "failed\n" ); ret = 1; goto cleanup; } - if (verbose) mbedtls_printf("passed\n"); - if (verbose) mbedtls_printf(" MPI test #5 (simple gcd): "); - for (i = 0; i < GCD_PAIR_COUNT; i++) { - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&X, gcd_pairs[i][0])); - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&Y, gcd_pairs[i][1])); - MBEDTLS_MPI_CHK(mbedtls_mpi_gcd(&A, &X, &Y)); - if (mbedtls_mpi_cmp_int(&A, gcd_pairs[i][2])) { - if (verbose) mbedtls_printf("failed at %d\n", i); + if( verbose != 0 ) + mbedtls_printf( "passed\n" ); + if( verbose != 0 ) + mbedtls_printf( " MPI test #5 (simple gcd): " ); + for( i = 0; i < GCD_PAIR_COUNT; i++ ) + { + MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &X, gcd_pairs[i][0] ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &Y, gcd_pairs[i][1] ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_gcd( &A, &X, &Y ) ); + if( mbedtls_mpi_cmp_int( &A, gcd_pairs[i][2] ) != 0 ) + { + if( verbose != 0 ) + mbedtls_printf( "failed at %d\n", i ); ret = 1; goto cleanup; } } - if (verbose) mbedtls_printf("passed\n"); + if( verbose != 0 ) + mbedtls_printf( "passed\n" ); cleanup: - if (ret && verbose) - mbedtls_printf("Unexpected error, return code = %08X\n", (unsigned int)ret); - mbedtls_mpi_free(&A); - mbedtls_mpi_free(&E); - mbedtls_mpi_free(&N); - mbedtls_mpi_free(&X); - mbedtls_mpi_free(&Y); - mbedtls_mpi_free(&U); - mbedtls_mpi_free(&V); - if (verbose) mbedtls_printf("\n"); - return ret; + if( ret != 0 && verbose != 0 ) + mbedtls_printf( "Unexpected error, return code = %08X\n", (unsigned int) ret ); + mbedtls_mpi_free( &A ); mbedtls_mpi_free( &E ); mbedtls_mpi_free( &N ); mbedtls_mpi_free( &X ); + mbedtls_mpi_free( &Y ); mbedtls_mpi_free( &U ); mbedtls_mpi_free( &V ); + if( verbose != 0 ) + mbedtls_printf( "\n" ); + return( ret ); } #endif /* MBEDTLS_SELF_TEST */ diff --git a/third_party/mbedtls/config.h b/third_party/mbedtls/config.h index fd3a085cf..c94cab0c7 100644 --- a/third_party/mbedtls/config.h +++ b/third_party/mbedtls/config.h @@ -80,17 +80,17 @@ #ifndef TINY #define MBEDTLS_ECP_DP_SECP256R1_ENABLED #define MBEDTLS_ECP_DP_SECP384R1_ENABLED +#define MBEDTLS_ECP_DP_SECP521R1_ENABLED #define MBEDTLS_ECP_DP_CURVE25519_ENABLED +#define MBEDTLS_ECP_DP_CURVE448_ENABLED /*#define MBEDTLS_ECP_DP_SECP192R1_ENABLED*/ /*#define MBEDTLS_ECP_DP_SECP224R1_ENABLED*/ -/*#define MBEDTLS_ECP_DP_SECP521R1_ENABLED*/ /*#define MBEDTLS_ECP_DP_SECP192K1_ENABLED*/ /*#define MBEDTLS_ECP_DP_SECP224K1_ENABLED*/ /*#define MBEDTLS_ECP_DP_SECP256K1_ENABLED*/ /*#define MBEDTLS_ECP_DP_BP256R1_ENABLED*/ /*#define MBEDTLS_ECP_DP_BP384R1_ENABLED*/ /*#define MBEDTLS_ECP_DP_BP512R1_ENABLED*/ -/*#define MBEDTLS_ECP_DP_CURVE448_ENABLED*/ #endif #define MBEDTLS_X509_CHECK_KEY_USAGE diff --git a/third_party/mbedtls/ecdh.h b/third_party/mbedtls/ecdh.h index 93215bae6..16010d7ca 100644 --- a/third_party/mbedtls/ecdh.h +++ b/third_party/mbedtls/ecdh.h @@ -1,8 +1,8 @@ #ifndef MBEDTLS_ECDH_H #define MBEDTLS_ECDH_H #include "third_party/mbedtls/config.h" +#include "third_party/mbedtls/ecdh_everest.h" #include "third_party/mbedtls/ecp.h" -#include "third_party/mbedtls/everest.h" /* clang-format off */ #ifdef __cplusplus diff --git a/third_party/mbedtls/ecdh_everest.c b/third_party/mbedtls/ecdh_everest.c new file mode 100644 index 000000000..d29996de6 --- /dev/null +++ b/third_party/mbedtls/ecdh_everest.c @@ -0,0 +1,279 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:4;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright The Mbed TLS Contributors │ +│ │ +│ Licensed under the Apache License, Version 2.0 (the "License"); │ +│ you may not use this file except in compliance with the License. │ +│ You may obtain a copy of the License at │ +│ │ +│ http://www.apache.org/licenses/LICENSE-2.0 │ +│ │ +│ Unless required by applicable law or agreed to in writing, software │ +│ distributed under the License is distributed on an "AS IS" BASIS, │ +│ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. │ +│ See the License for the specific language governing permissions and │ +│ limitations under the License. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "third_party/mbedtls/ecdh_everest.h" +#include "third_party/mbedtls/everest.h" +#if defined(MBEDTLS_ECDH_C) && defined(MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED) +#define KEYSIZE 32 + +asm(".ident\t\"\\n\\n\ +Mbed TLS (Apache 2.0)\\n\ +Copyright ARM Limited\\n\ +Copyright Mbed TLS Contributors\""); +asm(".include \"libc/disclaimer.inc\""); +/* clang-format off */ + +/** + * \brief This function sets up the ECDH context with the information + * given. + * + * This function should be called after mbedtls_ecdh_init() but + * before mbedtls_ecdh_make_params(). There is no need to call + * this function before mbedtls_ecdh_read_params(). + * + * This is the first function used by a TLS server for + * ECDHE ciphersuites. + * + * \param ctx The ECDH context to set up. + * \param grp_id The group id of the group to set up the context for. + * + * \return \c 0 on success. + */ +int mbedtls_everest_setup(mbedtls_ecdh_context_everest *ctx, int grp_id) +{ + if (grp_id != MBEDTLS_ECP_DP_CURVE25519) + return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + mbedtls_platform_zeroize(ctx, sizeof(*ctx)); + return 0; +} + +/** + * \brief This function frees a context. + * + * \param ctx The context to free. + */ +void mbedtls_everest_free(mbedtls_ecdh_context_everest *ctx) +{ + if (!ctx) return; + mbedtls_platform_zeroize(ctx, sizeof(*ctx)); +} + +/** + * \brief This function generates a public key and a TLS + * ServerKeyExchange payload. + * + * This is the second function used by a TLS server for ECDHE + * ciphersuites. (It is called after mbedtls_ecdh_setup().) + * + * \note This function assumes that the ECP group (grp) of the + * \p ctx context has already been properly set, + * for example, using mbedtls_ecp_group_load(). + * + * \see ecp.h + * + * \param ctx The ECDH context. + * \param olen The number of characters written. + * \param buf The destination buffer. + * \param blen The length of the destination buffer. + * \param f_rng The RNG function. + * \param p_rng The RNG context. + * + * \return \c 0 on success. + * \return An \c MBEDTLS_ERR_ECP_XXX error code on failure. + */ +int mbedtls_everest_make_params(mbedtls_ecdh_context_everest *ctx, size_t *olen, + unsigned char *buf, size_t blen, + int (*f_rng)(void *, unsigned char *, size_t), + void *p_rng) +{ + int ret = 0; + uint8_t base[KEYSIZE] = {9}; + if ((ret = f_rng(p_rng, ctx->our_secret, KEYSIZE)) != 0) return ret; + *olen = KEYSIZE + 4; + if (blen < *olen) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL; + *buf++ = MBEDTLS_ECP_TLS_NAMED_CURVE; + *buf++ = MBEDTLS_ECP_TLS_CURVE25519 >> 8; + *buf++ = MBEDTLS_ECP_TLS_CURVE25519 & 0xFF; + *buf++ = KEYSIZE; + curve25519(buf, ctx->our_secret, base); + base[0] = 0; + if (!timingsafe_memcmp(buf, base, KEYSIZE)) + return MBEDTLS_ERR_ECP_RANDOM_FAILED; + return 0; +} + +/** + * \brief This function parses and processes a TLS ServerKeyExhange + * payload. + * + * This is the first function used by a TLS client for ECDHE + * ciphersuites. + * + * \see ecp.h + * + * \param ctx The ECDH context. + * \param buf The pointer to the start of the input buffer. + * \param end The address for one Byte past the end of the buffer. + * + * \return \c 0 on success. + * \return An \c MBEDTLS_ERR_ECP_XXX error code on failure. + */ +int mbedtls_everest_read_params(mbedtls_ecdh_context_everest *ctx, + const unsigned char **buf, + const unsigned char *end) +{ + if (end - *buf < KEYSIZE + 1) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + if ((*(*buf)++ != KEYSIZE)) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + memcpy(ctx->peer_point, *buf, KEYSIZE); + *buf += KEYSIZE; + return 0; +} + +/** + * \brief This function sets up an ECDH context from an EC key. + * + * It is used by clients and servers in place of the + * ServerKeyEchange for static ECDH, and imports ECDH + * parameters from the EC key information of a certificate. + * + * \see ecp.h + * + * \param ctx The ECDH context to set up. + * \param key The EC key to use. + * \param side Defines the source of the key: 1: Our key, or + * 0: The key of the peer. + * + * \return \c 0 on success. + * \return An \c MBEDTLS_ERR_ECP_XXX error code on failure. + */ +int mbedtls_everest_get_params(mbedtls_ecdh_context_everest *ctx, + const mbedtls_ecp_keypair *key, + mbedtls_everest_ecdh_side side) +{ + size_t olen = 0; + mbedtls_everest_ecdh_side s; + switch (side) + { + case MBEDTLS_EVEREST_ECDH_THEIRS: + return mbedtls_ecp_point_write_binary(&key->grp, &key->Q, + MBEDTLS_ECP_PF_COMPRESSED, &olen, + ctx->peer_point, KEYSIZE); + case MBEDTLS_EVEREST_ECDH_OURS: + return mbedtls_mpi_write_binary_le(&key->d, ctx->our_secret, KEYSIZE); + default: + return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + } +} + +/** + * \brief This function generates a public key and a TLS + * ClientKeyExchange payload. + * + * This is the second function used by a TLS client for ECDH(E) + * ciphersuites. + * + * \see ecp.h + * + * \param ctx The ECDH context. + * \param olen The number of Bytes written. + * \param buf The destination buffer. + * \param blen The size of the destination buffer. + * \param f_rng The RNG function. + * \param p_rng The RNG context. + * + * \return \c 0 on success. + * \return An \c MBEDTLS_ERR_ECP_XXX error code on failure. + */ +int mbedtls_everest_make_public(mbedtls_ecdh_context_everest *ctx, size_t *olen, + unsigned char *buf, size_t blen, + int (*f_rng)(void *, unsigned char *, size_t), + void *p_rng) +{ + int ret = 0; + unsigned char base[KEYSIZE] = {9}; + if (!ctx) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + if ((ret = f_rng(p_rng, ctx->our_secret, KEYSIZE))) return ret; + *olen = KEYSIZE + 1; + if (blen < *olen) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL; + *buf++ = KEYSIZE; + curve25519(buf, ctx->our_secret, base); + base[0] = 0; + if (!timingsafe_memcmp(buf, base, KEYSIZE)) + return MBEDTLS_ERR_ECP_RANDOM_FAILED; + return ret; +} + +/** + * \brief This function parses and processes a TLS ClientKeyExchange + * payload. + * + * This is the third function used by a TLS server for ECDH(E) + * ciphersuites. (It is called after mbedtls_ecdh_setup() and + * mbedtls_ecdh_make_params().) + * + * \see ecp.h + * + * \param ctx The ECDH context. + * \param buf The start of the input buffer. + * \param blen The length of the input buffer. + * + * \return \c 0 on success. + * \return An \c MBEDTLS_ERR_ECP_XXX error code on failure. + */ +int mbedtls_everest_read_public(mbedtls_ecdh_context_everest *ctx, + const unsigned char *buf, size_t blen) +{ + if (blen < KEYSIZE + 1) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL; + if ((*buf++ != KEYSIZE)) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + memcpy(ctx->peer_point, buf, KEYSIZE); + return 0; +} + +/** + * \brief This function derives and exports the shared secret. + * + * This is the last function used by both TLS client + * and servers. + * + * \note If \p f_rng is not NULL, it is used to implement + * countermeasures against side-channel attacks. + * For more information, see mbedtls_ecp_mul(). + * + * \see ecp.h + * + * \param ctx The ECDH context. + * \param olen The number of Bytes written. + * \param buf The destination buffer. + * \param blen The length of the destination buffer. + * \param f_rng The RNG function. + * \param p_rng The RNG context. + * + * \return \c 0 on success. + * \return An \c MBEDTLS_ERR_ECP_XXX error code on failure. + */ +int mbedtls_everest_calc_secret(mbedtls_ecdh_context_everest *ctx, size_t *olen, + unsigned char *buf, size_t blen, + int (*f_rng)(void *, unsigned char *, size_t), + void *p_rng) +{ + /* f_rng and p_rng are not used here because this implementation does not + need blinding since it has constant trace. (todo(jart): wut?) */ + *olen = KEYSIZE; + if (blen < *olen) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL; + curve25519(buf, ctx->our_secret, ctx->peer_point); + if (!timingsafe_memcmp(buf, ctx->our_secret, KEYSIZE)) goto wut; + /* Wipe the DH secret and don't let the peer chose a small subgroup point */ + mbedtls_platform_zeroize(ctx->our_secret, KEYSIZE); + if (!timingsafe_memcmp(buf, ctx->our_secret, KEYSIZE)) goto wut; + return 0; +wut: + mbedtls_platform_zeroize(buf, KEYSIZE); + mbedtls_platform_zeroize(ctx->our_secret, KEYSIZE); + return MBEDTLS_ERR_ECP_RANDOM_FAILED; +} + +#endif diff --git a/third_party/mbedtls/ecdh_everest.h b/third_party/mbedtls/ecdh_everest.h new file mode 100644 index 000000000..6f756d5ac --- /dev/null +++ b/third_party/mbedtls/ecdh_everest.h @@ -0,0 +1,43 @@ +#ifndef COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_ +#define COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_ +#include "third_party/mbedtls/config.h" +#include "third_party/mbedtls/ecp.h" +COSMOPOLITAN_C_START_ + +#define MBEDTLS_ECP_TLS_CURVE25519 0x1d +#define MBEDTLS_X25519_KEY_SIZE_BYTES 32 + +typedef enum { + MBEDTLS_EVEREST_ECDH_OURS, + MBEDTLS_EVEREST_ECDH_THEIRS, +} mbedtls_everest_ecdh_side; + +typedef struct { + unsigned char our_secret[MBEDTLS_X25519_KEY_SIZE_BYTES]; + unsigned char peer_point[MBEDTLS_X25519_KEY_SIZE_BYTES]; +} mbedtls_ecdh_context_everest; + +int mbedtls_everest_setup(mbedtls_ecdh_context_everest *, int); +void mbedtls_everest_free(mbedtls_ecdh_context_everest *); +int mbedtls_everest_make_params(mbedtls_ecdh_context_everest *, size_t *, + unsigned char *, size_t, + int (*)(void *, unsigned char *, size_t), + void *); +int mbedtls_everest_read_params(mbedtls_ecdh_context_everest *, + const unsigned char **, const unsigned char *); +int mbedtls_everest_get_params(mbedtls_ecdh_context_everest *, + const mbedtls_ecp_keypair *, + mbedtls_everest_ecdh_side); +int mbedtls_everest_make_public(mbedtls_ecdh_context_everest *, size_t *, + unsigned char *, size_t, + int (*)(void *, unsigned char *, size_t), + void *); +int mbedtls_everest_read_public(mbedtls_ecdh_context_everest *, + const unsigned char *, size_t); +int mbedtls_everest_calc_secret(mbedtls_ecdh_context_everest *, size_t *, + unsigned char *, size_t, + int (*)(void *, unsigned char *, size_t), + void *); + +COSMOPOLITAN_C_END_ +#endif /* COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_ */ diff --git a/third_party/mbedtls/ecdsa.c b/third_party/mbedtls/ecdsa.c index 04beab389..0ac74dcd1 100644 --- a/third_party/mbedtls/ecdsa.c +++ b/third_party/mbedtls/ecdsa.c @@ -28,31 +28,12 @@ Mbed TLS (Apache 2.0)\\n\ Copyright ARM Limited\\n\ Copyright Mbed TLS Contributors\""); asm(".include \"libc/disclaimer.inc\""); - /* clang-format off */ -/* - * Elliptic curve DSA - * - * Copyright The Mbed TLS Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * References: +/** + * @fileoverview Elliptic curve Digital Signature Algorithm * - * SEC1 http://www.secg.org/index.php?action=secg,docs_secg + * @see SEC1 http://www.secg.org/index.php?action=secg,docs_secg */ #if defined(MBEDTLS_ECDSA_C) diff --git a/third_party/mbedtls/ecp.c b/third_party/mbedtls/ecp.c index 69e3cce88..d30fd906c 100644 --- a/third_party/mbedtls/ecp.c +++ b/third_party/mbedtls/ecp.c @@ -511,12 +511,15 @@ static const mbedtls_ecp_curve_info ecp_supported_curves[] = #if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED) { MBEDTLS_ECP_DP_CURVE25519, 29, 256, "x25519" }, #endif -#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) - { MBEDTLS_ECP_DP_SECP256R1, 23, 256, "secp256r1" }, -#endif #if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED) { MBEDTLS_ECP_DP_SECP384R1, 24, 384, "secp384r1" }, #endif +#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED) + { MBEDTLS_ECP_DP_CURVE448, 30, 448, "x448" }, +#endif +#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) + { MBEDTLS_ECP_DP_SECP256R1, 23, 256, "secp256r1" }, +#endif #if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED) { MBEDTLS_ECP_DP_SECP521R1, 25, 521, "secp521r1" }, #endif @@ -543,9 +546,6 @@ static const mbedtls_ecp_curve_info ecp_supported_curves[] = #endif #if defined(MBEDTLS_ECP_DP_SECP192K1_ENABLED) { MBEDTLS_ECP_DP_SECP192K1, 18, 192, "secp192k1" }, -#endif -#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED) - { MBEDTLS_ECP_DP_CURVE448, 30, 448, "x448" }, #endif { MBEDTLS_ECP_DP_NONE, 0, 0, NULL }, }; diff --git a/third_party/mbedtls/ecp256.c b/third_party/mbedtls/ecp256.c index d2e4c1f65..8e5157669 100644 --- a/third_party/mbedtls/ecp256.c +++ b/third_party/mbedtls/ecp256.c @@ -38,32 +38,15 @@ mbedtls_p256_isz( uint64_t p[4] ) static inline bool mbedtls_p256_gte( uint64_t p[5] ) { - return( (p[4] || - p[3] > 0xffffffff00000001 || - (p[3] == 0xffffffff00000001 && - p[2] > 0x0000000000000000 || - (p[2] == 0x0000000000000000 && - p[1] > 0x00000000ffffffff || - (p[1] == 0x00000000ffffffff && - p[0] > 0xffffffffffffffff || - (p[0] == 0xffffffffffffffff))))) ); -} - -static int -mbedtls_p256_cmp( const uint64_t a[5], - const uint64_t b[5] ) -{ - if( a[4] < b[4] ) return -1; - if( a[4] > b[4] ) return 1; - if( a[3] < b[3] ) return -1; - if( a[3] > b[3] ) return 1; - if( a[2] < b[2] ) return -1; - if( a[2] > b[2] ) return 1; - if( a[1] < b[1] ) return -1; - if( a[1] > b[1] ) return 1; - if( a[0] < b[0] ) return -1; - if( a[0] > b[0] ) return 1; - return 0; + return( ((int64_t)p[4] > 0 || + (p[3] > 0xffffffff00000001 || + (p[3] == 0xffffffff00000001 && + (p[2] > 0x0000000000000000 || + (p[2] == 0x0000000000000000 && + (p[1] > 0x00000000ffffffff || + (p[1] == 0x00000000ffffffff && + (p[0] > 0xffffffffffffffff || + (p[0] == 0xffffffffffffffff))))))))) ); } static inline void @@ -119,125 +102,49 @@ mbedtls_p256_rum( uint64_t p[5] ) mbedtls_p256_red( p ); } +static void +mbedtls_p256_mod(uint64_t X[8]) +{ + secp256r1(X); + if ((int64_t)X[4] < 0) { + do { + mbedtls_p256_gro(X); + } while ((int64_t)X[4] < 0); + } else { + while (mbedtls_p256_gte(X)) { + mbedtls_p256_red(X); + } + } +} + static inline void mbedtls_p256_sar( uint64_t p[5] ) { -#if defined(__x86_64__) && !defined(__STRICT_ANSI__) - asm("sarq\t32+%0\n\t" - "rcrq\t24+%0\n\t" - "rcrq\t16+%0\n\t" - "rcrq\t8+%0\n\t" - "rcrq\t%0\n\t" - : "+o"(*p) - : /* no inputs */ - : "memory", "cc"); -#else p[0] = p[0] >> 1 | p[1] << 63; p[1] = p[1] >> 1 | p[2] << 63; p[2] = p[2] >> 1 | p[3] << 63; p[3] = p[3] >> 1 | p[4] << 63; p[4] = (int64_t)p[4] >> 1; -#endif } static inline void mbedtls_p256_shl( uint64_t p[5] ) { -#if defined(__x86_64__) && !defined(__STRICT_ANSI__) - asm("shlq\t%0\n\t" - "rclq\t8+%0\n\t" - "rclq\t16+%0\n\t" - "rclq\t24+%0\n\t" - "rclq\t32+%0\n\t" - : "+o"(*p) - : /* no inputs */ - : "memory", "cc"); -#else p[4] = p[3] >> 63; p[3] = p[3] << 1 | p[2] >> 63; p[2] = p[2] << 1 | p[1] >> 63; p[1] = p[1] << 1 | p[0] >> 63; p[0] = p[0] << 1; -#endif mbedtls_p256_rum( p ); } static inline void -mbedtls_p256_jam( uint64_t p[5] ) -{ - secp256r1( p ); - if( (int64_t)p[4] < 0 ) - do - mbedtls_p256_gro( p ); - while( (int64_t)p[4] < 0 ); - else - mbedtls_p256_rum( p ); -} - -static void -mbedtls_p256_mul_1x1( uint64_t X[8], - const uint64_t A[4], size_t n, - const uint64_t B[4], size_t m ) -{ - uint128_t t; - t = A[0]; - t *= B[0]; - X[ 0] = t; - X[ 1] = t >> 64; - X[ 2] = 0; - X[ 3] = 0; - X[ 4] = 0; - X[ 5] = 0; - X[ 6] = 0; - X[ 7] = 0; -} - -static void -mbedtls_p256_mul_nx1( uint64_t X[8], - const uint64_t A[4], size_t n, - const uint64_t B[4], size_t m ) -{ - mbedtls_mpi_mul_hlp1(n, A, X, B[0]); - mbedtls_platform_zeroize( X + n + m, ( 8 - n - m ) * 8 ); - if ( n + m >= 4 ) - mbedtls_p256_jam( X ); -} - -static void -mbedtls_p256_mul_4x4( uint64_t X[8], - const uint64_t A[4], size_t n, - const uint64_t B[4], size_t m ) -{ - Mul4x4( X, A, B ); - mbedtls_p256_jam( X ); -} - -static void -mbedtls_p256_mul_nxm( uint64_t X[8], - const uint64_t A[4], size_t n, - const uint64_t B[4], size_t m ) -{ - if (A == X) A = gc(memcpy(malloc(4 * 8), A, 4 * 8)); - if (B == X) B = gc(memcpy(malloc(4 * 8), B, 4 * 8)); - Mul( X, A, n, B, m ); - mbedtls_platform_zeroize( X + n + m, (8 - n - m) * 8 ); - if ( n + m >= 4 ) - mbedtls_p256_jam( X ); -} - -static void mbedtls_p256_mul( uint64_t X[8], const uint64_t A[4], size_t n, const uint64_t B[4], size_t m ) { - if( n == 4 && m == 4 ) - mbedtls_p256_mul_4x4( X, A, n, B, m ); - else if( m == 1 && n == 1 ) - mbedtls_p256_mul_1x1( X, A, n, B, m ); - else if( m == 1 ) - mbedtls_p256_mul_nx1( X, A, n, B, m ); - else - mbedtls_p256_mul_nxm( X, A, n, B, m ); + Mul4x4( X, A, B ); + mbedtls_p256_mod( X ); } static void diff --git a/third_party/mbedtls/ecp384.c b/third_party/mbedtls/ecp384.c index d4421f22f..a213f8b87 100644 --- a/third_party/mbedtls/ecp384.c +++ b/third_party/mbedtls/ecp384.c @@ -36,42 +36,20 @@ mbedtls_p384_isz( uint64_t p[6] ) } static inline bool -mbedtls_p384_gte( uint64_t p[7] ) -{ - return( (p[6] || - p[5] > 0xffffffffffffffff || - (p[5] == 0xffffffffffffffff && - p[4] > 0xffffffffffffffff || - (p[4] == 0xffffffffffffffff && - p[3] > 0xffffffffffffffff || - (p[3] == 0xffffffffffffffff && - p[2] > 0xfffffffffffffffe || - (p[2] == 0xfffffffffffffffe && - p[1] > 0xffffffff00000000 || - (p[1] == 0xffffffff00000000 && - p[0] > 0x00000000ffffffff || - (p[0] == 0x00000000ffffffff))))))) ); -} - -static int -mbedtls_p384_cmp( const uint64_t a[7], - const uint64_t b[7] ) -{ - if( a[6] < b[6] ) return -1; - if( a[6] > b[6] ) return 1; - if( a[5] < b[5] ) return -1; - if( a[5] > b[5] ) return 1; - if( a[4] < b[4] ) return -1; - if( a[4] > b[4] ) return 1; - if( a[3] < b[3] ) return -1; - if( a[3] > b[3] ) return 1; - if( a[2] < b[2] ) return -1; - if( a[2] > b[2] ) return 1; - if( a[1] < b[1] ) return -1; - if( a[1] > b[1] ) return 1; - if( a[0] < b[0] ) return -1; - if( a[0] > b[0] ) return 1; - return 0; +mbedtls_p384_gte( uint64_t p[7] ) { + return( ((int64_t)p[6] > 0 || + (p[5] > 0xffffffffffffffff || + (p[5] == 0xffffffffffffffff && + (p[4] > 0xffffffffffffffff || + (p[4] == 0xffffffffffffffff && + (p[3] > 0xffffffffffffffff || + (p[3] == 0xffffffffffffffff && + (p[2] > 0xfffffffffffffffe || + (p[2] == 0xfffffffffffffffe && + (p[1] > 0xffffffff00000000 || + (p[1] == 0xffffffff00000000 && + (p[0] > 0x00000000ffffffff || + (p[0] == 0x00000000ffffffff))))))))))))) ); } static inline void @@ -97,11 +75,11 @@ mbedtls_p384_red( uint64_t p[7] ) SBB( p[3], p[3], 0xffffffffffffffff, c, c ); SBB( p[4], p[4], 0xffffffffffffffff, c, c ); SBB( p[5], p[5], 0xffffffffffffffff, c, c ); - SBB( p[6], p[6], 0, c, c ); + SBB( p[6], p[6], 0, c, c ); #endif } -static noinline void +static inline void mbedtls_p384_gro( uint64_t p[7] ) { #if defined(__x86_64__) && !defined(__STRICT_ANSI__) @@ -128,28 +106,31 @@ mbedtls_p384_gro( uint64_t p[7] ) #endif } -static void +static inline void mbedtls_p384_rum( uint64_t p[7] ) { while( mbedtls_p384_gte( p ) ) mbedtls_p384_red( p ); } +static inline void +mbedtls_p384_mod(uint64_t X[12]) +{ + secp384r1(X); + if ((int64_t)X[6] < 0) { + do { + mbedtls_p384_gro(X); + } while ((int64_t)X[6] < 0); + } else { + while (mbedtls_p384_gte(X)) { + mbedtls_p384_red(X); + } + } +} + static inline void mbedtls_p384_sar( uint64_t p[7] ) { -#if defined(__x86_64__) && !defined(__STRICT_ANSI__) - asm("sarq\t48+%0\n\t" - "rcrq\t40+%0\n\t" - "rcrq\t32+%0\n\t" - "rcrq\t24+%0\n\t" - "rcrq\t16+%0\n\t" - "rcrq\t8+%0\n\t" - "rcrq\t%0\n\t" - : "+o"(*p) - : /* no inputs */ - : "memory", "cc"); -#else p[0] = p[0] >> 1 | p[1] << 63; p[1] = p[1] >> 1 | p[2] << 63; p[2] = p[2] >> 1 | p[3] << 63; @@ -157,24 +138,11 @@ mbedtls_p384_sar( uint64_t p[7] ) p[4] = p[4] >> 1 | p[5] << 63; p[5] = p[5] >> 1 | p[6] << 63; p[6] = (int64_t)p[6] >> 1; -#endif } static inline void mbedtls_p384_shl( uint64_t p[7] ) { -#if defined(__x86_64__) && !defined(__STRICT_ANSI__) - asm("shlq\t%0\n\t" - "rclq\t8+%0\n\t" - "rclq\t16+%0\n\t" - "rclq\t24+%0\n\t" - "rclq\t32+%0\n\t" - "rclq\t40+%0\n\t" - "rclq\t48+%0\n\t" - : "+o"(*p) - : /* no inputs */ - : "memory", "cc"); -#else p[6] = p[5] >> 63; p[5] = p[5] << 1 | p[4] >> 63; p[4] = p[4] << 1 | p[3] >> 63; @@ -182,90 +150,24 @@ mbedtls_p384_shl( uint64_t p[7] ) p[2] = p[2] << 1 | p[1] >> 63; p[1] = p[1] << 1 | p[0] >> 63; p[0] = p[0] << 1; -#endif mbedtls_p384_rum( p ); } -static inline void -mbedtls_p384_jam( uint64_t p[7] ) -{ - secp384r1( p ); - if( (int64_t)p[6] < 0 ) - do - mbedtls_p384_gro( p ); - while( (int64_t)p[6] < 0 ); - else - mbedtls_p384_rum( p ); -} - -static void -mbedtls_p384_mul_1x1( uint64_t X[12], - const uint64_t A[6], size_t n, - const uint64_t B[6], size_t m ) -{ - uint128_t t; - t = A[0]; - t *= B[0]; - X[ 0] = t; - X[ 1] = t >> 64; - X[ 2] = 0; - X[ 3] = 0; - X[ 4] = 0; - X[ 5] = 0; - X[ 6] = 0; - X[ 7] = 0; - X[ 8] = 0; - X[ 9] = 0; - X[10] = 0; - X[11] = 0; -} - -static void -mbedtls_p384_mul_nx1( uint64_t X[12], - const uint64_t A[6], size_t n, - const uint64_t B[6], size_t m ) -{ - mbedtls_mpi_mul_hlp1(n, A, X, B[0]); - mbedtls_platform_zeroize( X + n + m, ( 12 - n - m ) * 8 ); - if ( n + m >= 6 ) - mbedtls_p384_jam( X ); -} - -static void -mbedtls_p384_mul_6x6( uint64_t X[12], - const uint64_t A[6], size_t n, - const uint64_t B[6], size_t m ) -{ - Mul6x6Adx( X, A, B ); - mbedtls_p384_jam( X ); -} - -static void -mbedtls_p384_mul_nxm( uint64_t X[12], - const uint64_t A[6], size_t n, - const uint64_t B[6], size_t m ) -{ - if (A == X) A = gc(memcpy(malloc(6 * 8), A, 6 * 8)); - if (B == X) B = gc(memcpy(malloc(6 * 8), B, 6 * 8)); - Mul( X, A, n, B, m ); - mbedtls_platform_zeroize( X + n + m, (12 - n - m) * 8 ); - if ( n + m >= 6 ) - mbedtls_p384_jam( X ); -} - static void mbedtls_p384_mul( uint64_t X[12], const uint64_t A[6], size_t n, const uint64_t B[6], size_t m ) { - if( n == 6 && m == 6 && X86_HAVE(ADX) && X86_HAVE(BMI2) ) - mbedtls_p384_mul_6x6( X, A, n, B, m ); - else if( m == 1 && n == 1 ) - mbedtls_p384_mul_1x1( X, A, n, B, m ); - else if( m == 1 ) - mbedtls_p384_mul_nx1( X, A, n, B, m ); + if( X86_HAVE(ADX) && X86_HAVE(BMI2) ) + Mul6x6Adx( X, A, B ); else - mbedtls_p384_mul_nxm( X, A, n, B, m ); + { + if (A == X) A = gc(memcpy(malloc(6 * 8), A, 6 * 8)); + if (B == X) B = gc(memcpy(malloc(6 * 8), B, 6 * 8)); + Mul( X, A, n, B, m ); + mbedtls_platform_zeroize( X + n + m, (12 - n - m) * 8 ); + } + mbedtls_p384_mod( X ); } static void diff --git a/third_party/mbedtls/ecp_curves.c b/third_party/mbedtls/ecp_curves.c index 2dc189e8f..2b13aed58 100644 --- a/third_party/mbedtls/ecp_curves.c +++ b/third_party/mbedtls/ecp_curves.c @@ -46,7 +46,7 @@ asm(".include \"libc/disclaimer.inc\""); * limitations under the License. */ -/* #if defined(MBEDTLS_ECP_C) */ +#if defined(MBEDTLS_ECP_C) #if !defined(MBEDTLS_ECP_ALT) @@ -635,12 +635,7 @@ static int ecp_group_load( mbedtls_ecp_group *grp, #endif /* ECP_LOAD_GROUP */ #if defined(MBEDTLS_ECP_NIST_OPTIM) -#define NIST_MODP( P ) grp->modp = ecp_mod_ ## P; -#else -#define NIST_MODP( P ) -#endif - -#if defined(MBEDTLS_ECP_NIST_OPTIM) +/* Forward declarations */ #if defined(MBEDTLS_ECP_DP_SECP192R1_ENABLED) static int ecp_mod_p192( mbedtls_mpi * ); #endif @@ -650,8 +645,13 @@ static int ecp_mod_p224( mbedtls_mpi * ); #if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED) static int ecp_mod_p521( mbedtls_mpi * ); #endif + +#define NIST_MODP( P ) grp->modp = ecp_mod_ ## P; +#else +#define NIST_MODP( P ) #endif /* MBEDTLS_ECP_NIST_OPTIM */ +/* Additional forward declarations */ #if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED) static int ecp_mod_p255( mbedtls_mpi * ); #endif @@ -771,8 +771,6 @@ cleanup: } #endif /* MBEDTLS_ECP_DP_CURVE448_ENABLED */ - -#if defined(MBEDTLS_ECP_C) /** * \brief This function sets up an ECP group context * from a standardized set of domain parameters. @@ -879,7 +877,6 @@ int mbedtls_ecp_group_load( mbedtls_ecp_group *grp, mbedtls_ecp_group_id id ) return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE ); } } -#endif /* MBEDTLS_ECP_C */ #if defined(MBEDTLS_ECP_NIST_OPTIM) /* @@ -892,6 +889,7 @@ int mbedtls_ecp_group_load( mbedtls_ecp_group *grp, mbedtls_ecp_group_id id ) * MPI remains loose, since these functions can be deactivated at will. */ +#if defined(MBEDTLS_ECP_DP_SECP192R1_ENABLED) /* * Compared to the way things are presented in FIPS 186-3 D.2, * we proceed in columns, from right (least significant chunk) to left, @@ -942,13 +940,17 @@ static int ecp_mod_p192( mbedtls_mpi *N ) int ret = MBEDTLS_ERR_THIS_CORRUPTION; mbedtls_mpi_uint c = 0; mbedtls_mpi_uint *p, *end; + /* Make sure we have enough blocks so that A(5) is legal */ MBEDTLS_MPI_CHK( mbedtls_mpi_grow( N, 6 * WIDTH ) ); + p = N->p; end = p + N->n; + ADD( 3 ); ADD( 5 ); NEXT; // A0 += A3 + A5 ADD( 3 ); ADD( 4 ); ADD( 5 ); NEXT; // A1 += A3 + A4 + A5 ADD( 4 ); ADD( 5 ); LAST; // A2 += A4 + A5 + cleanup: return( ret ); } @@ -958,7 +960,11 @@ cleanup: #undef ADD #undef NEXT #undef LAST +#endif /* MBEDTLS_ECP_DP_SECP192R1_ENABLED */ +#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED) || \ + defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) || \ + defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED) /* * The reader is advised to first understand ecp_mod_p192() since the same * general structure is used here, but with additional complications: @@ -1059,6 +1065,7 @@ static inline void sub32( uint32_t *dst, uint32_t src, signed char *carry ) static inline int fix_negative( mbedtls_mpi *N, signed char c, mbedtls_mpi *C, size_t bits ) { int ret = MBEDTLS_ERR_THIS_CORRUPTION; + /* C = - c * 2^(bits + 32) */ #if !defined(MBEDTLS_HAVE_INT64) ((void) bits); @@ -1068,19 +1075,24 @@ static inline int fix_negative( mbedtls_mpi *N, signed char c, mbedtls_mpi *C, s else #endif C->p[ C->n - 1 ] = (mbedtls_mpi_uint) -c; + /* N = - ( C - N ) */ MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( N, C, N ) ); N->s = -1; + cleanup: + return( ret ); } +#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED) /* * Fast quasi-reduction modulo p224 (FIPS 186-3 D.2.2) */ static int ecp_mod_p224( mbedtls_mpi *N ) { INIT( 224 ); + SUB( 7 ); SUB( 11 ); NEXT; // A0 += -A7 - A11 SUB( 8 ); SUB( 12 ); NEXT; // A1 += -A8 - A12 SUB( 9 ); SUB( 13 ); NEXT; // A2 += -A9 - A13 @@ -1088,9 +1100,97 @@ static int ecp_mod_p224( mbedtls_mpi *N ) SUB( 11 ); ADD( 8 ); ADD( 12 ); NEXT; // A4 += -A11 + A8 + A12 SUB( 12 ); ADD( 9 ); ADD( 13 ); NEXT; // A5 += -A12 + A9 + A13 SUB( 13 ); ADD( 10 ); LAST; // A6 += -A13 + A10 + cleanup: return( ret ); } +#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED */ + +#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) +/* + * Fast quasi-reduction modulo p256 (FIPS 186-3 D.2.3) + */ +int ecp_mod_p256_old( mbedtls_mpi *N ) +{ + INIT( 256 ); + + ADD( 8 ); ADD( 9 ); + SUB( 11 ); SUB( 12 ); SUB( 13 ); SUB( 14 ); NEXT; // A0 + + ADD( 9 ); ADD( 10 ); + SUB( 12 ); SUB( 13 ); SUB( 14 ); SUB( 15 ); NEXT; // A1 + + ADD( 10 ); ADD( 11 ); + SUB( 13 ); SUB( 14 ); SUB( 15 ); NEXT; // A2 + + ADD( 11 ); ADD( 11 ); ADD( 12 ); ADD( 12 ); ADD( 13 ); + SUB( 15 ); SUB( 8 ); SUB( 9 ); NEXT; // A3 + + ADD( 12 ); ADD( 12 ); ADD( 13 ); ADD( 13 ); ADD( 14 ); + SUB( 9 ); SUB( 10 ); NEXT; // A4 + + ADD( 13 ); ADD( 13 ); ADD( 14 ); ADD( 14 ); ADD( 15 ); + SUB( 10 ); SUB( 11 ); NEXT; // A5 + + ADD( 14 ); ADD( 14 ); ADD( 15 ); ADD( 15 ); ADD( 14 ); ADD( 13 ); + SUB( 8 ); SUB( 9 ); NEXT; // A6 + + ADD( 15 ); ADD( 15 ); ADD( 15 ); ADD( 8 ); + SUB( 10 ); SUB( 11 ); SUB( 12 ); SUB( 13 ); LAST; // A7 + +cleanup: + return( ret ); +} +#endif /* MBEDTLS_ECP_DP_SECP256R1_ENABLED */ + +#if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED) +/* + * Fast quasi-reduction modulo p384 (FIPS 186-3 D.2.4) + */ +int ecp_mod_p384_old( mbedtls_mpi *N ) +{ + INIT( 384 ); + + ADD( 12 ); ADD( 21 ); ADD( 20 ); + SUB( 23 ); NEXT; // A0 + + ADD( 13 ); ADD( 22 ); ADD( 23 ); + SUB( 12 ); SUB( 20 ); NEXT; // A2 + + ADD( 14 ); ADD( 23 ); + SUB( 13 ); SUB( 21 ); NEXT; // A2 + + ADD( 15 ); ADD( 12 ); ADD( 20 ); ADD( 21 ); + SUB( 14 ); SUB( 22 ); SUB( 23 ); NEXT; // A3 + + ADD( 21 ); ADD( 21 ); ADD( 16 ); ADD( 13 ); ADD( 12 ); ADD( 20 ); ADD( 22 ); + SUB( 15 ); SUB( 23 ); SUB( 23 ); NEXT; // A4 + + ADD( 22 ); ADD( 22 ); ADD( 17 ); ADD( 14 ); ADD( 13 ); ADD( 21 ); ADD( 23 ); + SUB( 16 ); NEXT; // A5 + + ADD( 23 ); ADD( 23 ); ADD( 18 ); ADD( 15 ); ADD( 14 ); ADD( 22 ); + SUB( 17 ); NEXT; // A6 + + ADD( 19 ); ADD( 16 ); ADD( 15 ); ADD( 23 ); + SUB( 18 ); NEXT; // A7 + + ADD( 20 ); ADD( 17 ); ADD( 16 ); + SUB( 19 ); NEXT; // A8 + + ADD( 21 ); ADD( 18 ); ADD( 17 ); + SUB( 20 ); NEXT; // A9 + + ADD( 22 ); ADD( 19 ); ADD( 18 ); + SUB( 21 ); NEXT; // A10 + + ADD( 23 ); ADD( 20 ); ADD( 19 ); + SUB( 22 ); LAST; // A11 + +cleanup: + return( ret ); +} +#endif /* MBEDTLS_ECP_DP_SECP384R1_ENABLED */ #undef A #undef LOAD32 @@ -1100,6 +1200,10 @@ cleanup: #undef NEXT #undef LAST +#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED || + MBEDTLS_ECP_DP_SECP256R1_ENABLED || + MBEDTLS_ECP_DP_SECP384R1_ENABLED */ + #if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED) /* * Here we have an actual Mersenne prime, so things are more straightforward. @@ -1156,6 +1260,8 @@ cleanup: #endif /* MBEDTLS_ECP_NIST_OPTIM */ +#if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED) + /* Size of p255 in terms of mbedtls_mpi_uint */ #define P255_WIDTH ( 255 / 8 / sizeof( mbedtls_mpi_uint ) + 1 ) @@ -1169,28 +1275,34 @@ static int ecp_mod_p255( mbedtls_mpi *N ) size_t i; mbedtls_mpi M; mbedtls_mpi_uint Mp[P255_WIDTH + 2]; + if( N->n < P255_WIDTH ) return( 0 ); + /* M = A1 */ M.s = 1; M.n = N->n - ( P255_WIDTH - 1 ); if( M.n > P255_WIDTH + 1 ) return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA ); M.p = Mp; - mbedtls_platform_zeroize( Mp, sizeof Mp ); + memset( Mp, 0, sizeof Mp ); memcpy( Mp, N->p + P255_WIDTH - 1, M.n * sizeof( mbedtls_mpi_uint ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, 255 % ( 8 * sizeof( mbedtls_mpi_uint ) ) ) ); M.n++; /* Make room for multiplication by 19 */ + /* N = A0 */ MBEDTLS_MPI_CHK( mbedtls_mpi_set_bit( N, 255, 0 ) ); for( i = P255_WIDTH; i < N->n; i++ ) N->p[i] = 0; + /* N = A0 + 19 * A1 */ MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &M, &M, 19 ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( N, N, &M ) ); + cleanup: return( ret ); } +#endif /* MBEDTLS_ECP_DP_CURVE25519_ENABLED */ #if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED) @@ -1231,7 +1343,7 @@ static int ecp_mod_p448( mbedtls_mpi *N ) /* Shouldn't be called with N larger than 2^896! */ return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA ); M.p = Mp; - mbedtls_platform_zeroize( Mp, sizeof( Mp ) ); + memset( Mp, 0, sizeof( Mp ) ); memcpy( Mp, N->p + P448_WIDTH, M.n * sizeof( mbedtls_mpi_uint ) ); /* N = A0 */ @@ -1299,7 +1411,7 @@ static inline int ecp_mod_koblitz( mbedtls_mpi *N, mbedtls_mpi_uint *Rp, size_t M.n = N->n - ( p_limbs - adjust ); if( M.n > p_limbs + adjust ) M.n = p_limbs + adjust; - mbedtls_platform_zeroize( Mp, sizeof Mp ); + memset( Mp, 0, sizeof Mp ); memcpy( Mp, N->p + p_limbs - adjust, M.n * sizeof( mbedtls_mpi_uint ) ); if( shift != 0 ) MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, shift ) ); @@ -1321,7 +1433,7 @@ static inline int ecp_mod_koblitz( mbedtls_mpi *N, mbedtls_mpi_uint *Rp, size_t M.n = N->n - ( p_limbs - adjust ); if( M.n > p_limbs + adjust ) M.n = p_limbs + adjust; - mbedtls_platform_zeroize( Mp, sizeof Mp ); + memset( Mp, 0, sizeof Mp ); memcpy( Mp, N->p + p_limbs - adjust, M.n * sizeof( mbedtls_mpi_uint ) ); if( shift != 0 ) MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, shift ) ); @@ -1392,4 +1504,4 @@ static int ecp_mod_p256k1( mbedtls_mpi *N ) #endif /* !MBEDTLS_ECP_ALT */ -/* #endif /\* MBEDTLS_ECP_C *\/ */ +#endif /* MBEDTLS_ECP_C */ diff --git a/third_party/mbedtls/everest.c b/third_party/mbedtls/everest.c index fe9ff9d00..437efd6a9 100644 --- a/third_party/mbedtls/everest.c +++ b/third_party/mbedtls/everest.c @@ -16,1186 +16,255 @@ │ limitations under the License. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" -#include "libc/limits.h" -#include "third_party/mbedtls/asn1.h" -#include "third_party/mbedtls/bignum.h" -#include "third_party/mbedtls/common.h" -#include "third_party/mbedtls/error.h" -#include "third_party/mbedtls/everest.h" -#include "third_party/mbedtls/platform.h" -#include "third_party/mbedtls/profile.h" +#include "third_party/mbedtls/endian.h" asm(".ident\t\"\\n\\n\ Everest (Apache 2.0)\\n\ Copyright 2016-2018 INRIA and Microsoft Corporation\""); asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ -#if defined(MBEDTLS_ECDH_C) && defined(MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED) +#define DW(x) (uint128_t)(x) +#define EQ(x, y) ((((x ^ y) | (~(x ^ y) + 1)) >> 63) - 1) +#define GTE(x, y) (((x ^ ((x ^ y) | ((x - y) ^ y))) >> 63) - 1) -#define load64_le(b) READ64LE(b) -#define store64_le(b, i) WRITE64LE(b, i) - -static uint64_t -FStar_UInt64_eq_mask(uint64_t a, uint64_t b) -{ - uint64_t x = a ^ b; - uint64_t minus_x = ~x + 1; - uint64_t x_or_minus_x = x | minus_x; - uint64_t xnx = x_or_minus_x >> 63; - return xnx - 1; -} - -static uint64_t -FStar_UInt64_gte_mask(uint64_t a, uint64_t b) -{ - uint64_t x = a; - uint64_t y = b; - uint64_t x_xor_y = x ^ y; - uint64_t x_sub_y = x - y; - uint64_t x_sub_y_xor_y = x_sub_y ^ y; - uint64_t q = x_xor_y | x_sub_y_xor_y; - uint64_t x_xor_q = x ^ q; - uint64_t x_xor_q_ = x_xor_q >> 63; - return x_xor_q_ - 1; -} - -static uint32_t -FStar_UInt32_eq_mask(uint32_t a, uint32_t b) -{ - uint32_t x = a ^ b; - uint32_t minus_x = ~x + 1; - uint32_t x_or_minus_x = x | minus_x; - uint32_t xnx = x_or_minus_x >> 31; - return xnx - 1; -} - -static uint32_t -FStar_UInt32_gte_mask(uint32_t a, uint32_t b) -{ - uint32_t x = a; - uint32_t y = b; - uint32_t x_xor_y = x ^ y; - uint32_t x_sub_y = x - y; - uint32_t x_sub_y_xor_y = x_sub_y ^ y; - uint32_t q = x_xor_y | x_sub_y_xor_y; - uint32_t x_xor_q = x ^ q; - uint32_t x_xor_q_ = x_xor_q >> 31; - return x_xor_q_ - 1; -} - -static uint16_t -FStar_UInt16_eq_mask(uint16_t a, uint16_t b) -{ - uint16_t x = a ^ b; - uint16_t minus_x = ~x + 1; - uint16_t x_or_minus_x = x | minus_x; - uint16_t xnx = x_or_minus_x >> 15; - return xnx - 1; -} - -static uint16_t -FStar_UInt16_gte_mask(uint16_t a, uint16_t b) -{ - uint16_t x = a; - uint16_t y = b; - uint16_t x_xor_y = x ^ y; - uint16_t x_sub_y = x - y; - uint16_t x_sub_y_xor_y = x_sub_y ^ y; - uint16_t q = x_xor_y | x_sub_y_xor_y; - uint16_t x_xor_q = x ^ q; - uint16_t x_xor_q_ = x_xor_q >> 15; - return x_xor_q_ - 1; -} - -static uint8_t -FStar_UInt8_eq_mask(uint8_t a, uint8_t b) -{ - uint8_t x = a ^ b; - uint8_t minus_x = ~x + 1; - uint8_t x_or_minus_x = x | minus_x; - uint8_t xnx = x_or_minus_x >> 7; - return xnx - 1; -} - -static uint8_t -FStar_UInt8_gte_mask(uint8_t a, uint8_t b) -{ - uint8_t x = a; - uint8_t y = b; - uint8_t x_xor_y = x ^ y; - uint8_t x_sub_y = x - y; - uint8_t x_sub_y_xor_y = x_sub_y ^ y; - uint8_t q = x_xor_y | x_sub_y_xor_y; - uint8_t x_xor_q = x ^ q; - uint8_t x_xor_q_ = x_xor_q >> 7; - return x_xor_q_ - 1; -} - -static void -Hacl_Bignum_Modulo_carry_top(uint64_t *b) -{ - uint64_t b4 = b[4]; - uint64_t b0 = b[0]; - uint64_t b4_ = b4 & 0x7ffffffffffff; - uint64_t b0_ = b0 + 19 * (b4 >> 51); - b[4] = b4_; - b[0] = b0_; -} - -forceinline void -Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, uint128_t *input) -{ - uint32_t i; - for (i = 0; i < 5; ++i) - { - uint128_t xi = input[i]; - output[i] = xi; +forceinline void HaclBignumCopy(uint64_t o[5], uint64_t p[5]) { + for (unsigned i = 0; i < 5; ++i) { + o[i] = p[i]; } } -forceinline void -Hacl_Bignum_Fproduct_sum_scalar_multiplication_(uint128_t *output, uint64_t *input, uint64_t s) -{ - uint32_t i; - for (i = 0; i < 5; ++i) - { - uint128_t xi = output[i]; - uint64_t yi = input[i]; - output[i] = xi + (uint128_t)yi * s; +forceinline void HaclBignumFsum(uint64_t o[5], uint64_t p[5]) { + for (unsigned i = 0; i < 5; ++i) { + o[i] += p[i]; } } -forceinline void -Hacl_Bignum_Fproduct_carry_wide_(uint128_t *tmp) -{ - uint32_t i; - for (i = 0; i < 4; ++i) - { - uint32_t ctr = i; - uint128_t tctr = tmp[ctr]; - uint128_t tctrp1 = tmp[ctr + 1]; - uint64_t r0 = (uint64_t)tctr & 0x7ffffffffffff; - uint128_t c = tctr >> 51; - tmp[ctr] = (uint128_t)r0; - tmp[ctr + 1] = tctrp1 + c; +forceinline void HaclBignumTrunc(uint64_t o[5], uint128_t p[5]) { + for (unsigned i = 0; i < 5; ++i) { + o[i] = p[i]; } } -forceinline void -Hacl_Bignum_Fmul_shift_reduce(uint64_t *output) -{ - uint64_t tmp = output[4]; - uint32_t i; - for (i = 0; i < 4; ++i) - { - uint32_t ctr = 5 - i - 1; - uint64_t z = output[ctr - 1]; - output[ctr] = z; - } - output[0] = tmp * 19; -} - -static inline void -Hacl_Bignum_Fmul_mul_shift_reduce_(uint128_t *output, uint64_t *input, uint64_t *input2) -{ - uint32_t i; - for (i = 0; i < 4; ++i) - { - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2[i]); - Hacl_Bignum_Fmul_shift_reduce(input); - } - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2[4]); -} - -static inline void -Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input2) -{ - uint64_t i0; - uint64_t i1; - uint64_t i0_; - uint64_t i1_; - uint128_t b4; - uint128_t b0; - uint128_t b4_; - uint128_t b0_; - uint128_t t[5]; - uint64_t tmp[5]; - t[0] = 0; - t[1] = 0; - t[2] = 0; - t[3] = 0; - t[4] = 0; - tmp[0] = input[0]; - tmp[1] = input[1]; - tmp[2] = input[2]; - tmp[3] = input[3]; - tmp[4] = input[4]; - Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2); - Hacl_Bignum_Fproduct_carry_wide_(t); - b4 = t[4]; - b0 = t[0]; - b4_ = b4 & 0x7ffffffffffff; - b0_ = b0 + (uint128_t)19 * (uint64_t)(b4 >> 51); - t[4] = b4_; - t[0] = b0_; - Hacl_Bignum_Fproduct_copy_from_wide_(output, t); - i0 = output[0]; - i1 = output[1]; - i0_ = i0 & 0x7ffffffffffff; - i1_ = i1 + (i0 >> 51); - output[0] = i0_; - output[1] = i1_; -} - -forceinline void -Hacl_Bignum_Fsquare_fsquare__(uint128_t *tmp, uint64_t *output) -{ - uint64_t r0 = output[0]; - uint64_t r1 = output[1]; - uint64_t r2 = output[2]; - uint64_t r3 = output[3]; - uint64_t r4 = output[4]; - uint64_t d0 = r0 * 2; - uint64_t d1 = r1 * 2; - uint64_t d2 = r2 * 2 * 19; - uint64_t d419 = r4 * 19; - uint64_t d4 = d419 * 2; - uint128_t s0 = (uint128_t)r0 * r0 + (uint128_t)d4 * r1 + (uint128_t)d2 * r3; - uint128_t s1 = (uint128_t)d0 * r1 + (uint128_t)d4 * r2 + (uint128_t)(r3 * 19) * r3; - uint128_t s2 = (uint128_t)d0 * r2 + (uint128_t)r1 * r1 + (uint128_t)d4 * r3; - uint128_t s3 = (uint128_t)d0 * r3 + (uint128_t)d1 * r2 + (uint128_t)r4 * d419; - uint128_t s4 = (uint128_t)d0 * r4 + (uint128_t)d1 * r3 + (uint128_t)r2 * r2; - tmp[0] = s0; - tmp[1] = s1; - tmp[2] = s2; - tmp[3] = s3; - tmp[4] = s4; -} - -forceinline void -Hacl_Bignum_Fsquare_fsquare_(uint128_t *tmp, uint64_t *output) -{ - uint128_t b4; - uint128_t b0; - uint128_t b4_; - uint128_t b0_; - uint64_t i0; - uint64_t i1; - uint64_t i0_; - uint64_t i1_; - Hacl_Bignum_Fsquare_fsquare__(tmp, output); - Hacl_Bignum_Fproduct_carry_wide_(tmp); - b4 = tmp[4]; - b0 = tmp[0]; - b4_ = b4 & 0x7ffffffffffff; - b0_ = b0 + (uint128_t)19 * (b4 >> 51); - tmp[4] = b4_; - tmp[0] = b0_; - Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp); - i0 = output[0]; - i1 = output[1]; - i0_ = i0 & 0x7ffffffffffff; - i1_ = i1 + (i0 >> 51); - output[0] = i0_; - output[1] = i1_; -} - -static void -Hacl_Bignum_Fsquare_fsquare_times_(uint64_t *input, uint128_t *tmp, uint32_t count1) -{ - uint32_t i; - Hacl_Bignum_Fsquare_fsquare_(tmp, input); - for (i = 1; i < count1; ++i) - Hacl_Bignum_Fsquare_fsquare_(tmp, input); -} - -forceinline void -Hacl_Bignum_Fsquare_fsquare_times(uint64_t *output, uint64_t *input, uint32_t count1) -{ - uint128_t t[5]; - t[0] = 0; - t[1] = 0; - t[2] = 0; - t[3] = 0; - t[4] = 0; - output[0] = input[0]; - output[1] = input[1]; - output[2] = input[2]; - output[3] = input[3]; - output[4] = input[4]; - Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1); -} - -forceinline void -Hacl_Bignum_Fsquare_fsquare_times_inplace(uint64_t *output, uint32_t count1) -{ - uint128_t t[5]; - t[0] = 0; - t[1] = 0; - t[2] = 0; - t[3] = 0; - t[4] = 0; - Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1); -} - -forceinline void -Hacl_Bignum_Crecip_crecip(uint64_t *out, uint64_t *z) -{ - uint32_t i; - uint64_t buf[20]; - uint64_t *a0 = buf; - uint64_t *t00 = buf + 5; - uint64_t *b0 = buf + 10; - uint64_t *t01; - uint64_t *b1; - uint64_t *c0; - uint64_t *a; - uint64_t *t0; - uint64_t *b; - uint64_t *c; - for (i = 0; i < 20; ++i) buf[i] = 0; - Hacl_Bignum_Fsquare_fsquare_times(a0, z, 1); - Hacl_Bignum_Fsquare_fsquare_times(t00, a0, 2); - Hacl_Bignum_Fmul_fmul(b0, t00, z); - Hacl_Bignum_Fmul_fmul(a0, b0, a0); - Hacl_Bignum_Fsquare_fsquare_times(t00, a0, 1); - Hacl_Bignum_Fmul_fmul(b0, t00, b0); - Hacl_Bignum_Fsquare_fsquare_times(t00, b0, 5); - t01 = buf + 5; - b1 = buf + 10; - c0 = buf + 15; - Hacl_Bignum_Fmul_fmul(b1, t01, b1); - Hacl_Bignum_Fsquare_fsquare_times(t01, b1, 10); - Hacl_Bignum_Fmul_fmul(c0, t01, b1); - Hacl_Bignum_Fsquare_fsquare_times(t01, c0, 20); - Hacl_Bignum_Fmul_fmul(t01, t01, c0); - Hacl_Bignum_Fsquare_fsquare_times_inplace(t01, 10); - Hacl_Bignum_Fmul_fmul(b1, t01, b1); - Hacl_Bignum_Fsquare_fsquare_times(t01, b1, 50); - a = buf; - t0 = buf + 5; - b = buf + 10; - c = buf + 15; - Hacl_Bignum_Fmul_fmul(c, t0, b); - Hacl_Bignum_Fsquare_fsquare_times(t0, c, 100); - Hacl_Bignum_Fmul_fmul(t0, t0, c); - Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, 50); - Hacl_Bignum_Fmul_fmul(t0, t0, b); - Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, 5); - Hacl_Bignum_Fmul_fmul(out, t0, a); -} - -forceinline void -Hacl_Bignum_fsum(uint64_t *a, uint64_t *b) -{ - uint32_t i; - for (i = 0; i < 5; ++i) - { - uint64_t xi = a[i]; - uint64_t yi = b[i]; - a[i] = xi + yi; +forceinline void HaclBignumCarry(uint64_t p[5]) { + for (unsigned i = 0; i < 4; ++i) { + p[i + 1] += p[i] >> 51; + p[i] &= 0x7ffffffffffff; } } -forceinline void -Hacl_Bignum_fdifference(uint64_t *a, uint64_t *b) -{ - uint32_t i; - uint64_t tmp[5]; - tmp[0] = b[0] + 0x3fffffffffff68; - tmp[1] = b[1] + 0x3ffffffffffff8; - tmp[2] = b[2] + 0x3ffffffffffff8; - tmp[3] = b[3] + 0x3ffffffffffff8; - tmp[4] = b[4] + 0x3ffffffffffff8; - for (i = 0; i < 5; ++i) - { - uint64_t xi = a[i]; - uint64_t yi = tmp[i]; - a[i] = yi - xi; +forceinline void HaclBignumCarryWide(uint128_t p[5]) { + for (unsigned i = 0; i < 4; ++i) { + p[i + 1] += p[i] >> 51; + p[i] &= 0x7ffffffffffff; } } -forceinline void -Hacl_Bignum_fscalar(uint64_t *output, uint64_t *b, uint64_t s) -{ - uint32_t i; - uint128_t b4; - uint128_t b0; - uint128_t b4_; - uint128_t b0_; - uint128_t tmp[5]; - for (i = 0; i < 5; ++i) - { - tmp[i] = (uint128_t)b[i] * s; - } - Hacl_Bignum_Fproduct_carry_wide_(tmp); - b4 = tmp[4]; - b0 = tmp[0]; - b4_ = b4 & 0x7ffffffffffff; - b0_ = b0 + (uint128_t)19 * (uint64_t)(b4 >> 51); - tmp[4] = b4_; - tmp[0] = b0_; - Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp); -} - -forceinline void -Hacl_Bignum_fmul(uint64_t *output, uint64_t *a, uint64_t *b) -{ - Hacl_Bignum_Fmul_fmul(output, a, b); -} - -forceinline void -Hacl_Bignum_crecip(uint64_t *output, uint64_t *input) -{ - Hacl_Bignum_Crecip_crecip(output, input); -} - -static void -Hacl_EC_Point_swap_conditional_step(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr) -{ - uint32_t i = ctr - 1; - uint64_t ai = a[i]; - uint64_t bi = b[i]; - uint64_t x = swap1 & (ai ^ bi); - uint64_t ai1 = ai ^ x; - uint64_t bi1 = bi ^ x; - a[i] = ai1; - b[i] = bi1; -} - -static void -Hacl_EC_Point_swap_conditional_(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr) -{ - if (ctr) - { - Hacl_EC_Point_swap_conditional_step(a, b, swap1, ctr); - Hacl_EC_Point_swap_conditional_(a, b, swap1, ctr - 1); - } -} - -static void -Hacl_EC_Point_swap_conditional(uint64_t *a, uint64_t *b, uint64_t iswap) -{ - uint64_t swap1 = 0 - iswap; - Hacl_EC_Point_swap_conditional_(a, b, swap1, 5); - Hacl_EC_Point_swap_conditional_(a + 5, b + 5, swap1, 5); -} - -static void -Hacl_EC_Point_copy(uint64_t *output, uint64_t *input) -{ - output[0] = input[0]; - output[1] = input[1]; - output[2] = input[2]; - output[3] = input[3]; - output[4] = input[4]; - output[5] = input[5]; - output[6] = input[6]; - output[7] = input[7]; - output[8] = input[8]; - output[9] = input[9]; -} - -static void -Hacl_EC_Format_fexpand(uint64_t *output, uint8_t *input) -{ - uint64_t i0 = load64_le(input); - uint8_t *x00 = input + 6; - uint64_t i1 = load64_le(x00); - uint8_t *x01 = input + 12; - uint64_t i2 = load64_le(x01); - uint8_t *x02 = input + 19; - uint64_t i3 = load64_le(x02); - uint8_t *x0 = input + 24; - uint64_t i4 = load64_le(x0); - uint64_t output0 = i0 & 0x7ffffffffffff; - uint64_t output1 = i1 >> 3 & 0x7ffffffffffff; - uint64_t output2 = i2 >> 6 & 0x7ffffffffffff; - uint64_t output3 = i3 >> 1 & 0x7ffffffffffff; - uint64_t output4 = i4 >> 12 & 0x7ffffffffffff; - output[0] = output0; - output[1] = output1; - output[2] = output2; - output[3] = output3; - output[4] = output4; -} - -static void -Hacl_EC_Format_fcontract_first_carry_pass(uint64_t *input) -{ - uint64_t t0 = input[0]; - uint64_t t1 = input[1]; - uint64_t t2 = input[2]; - uint64_t t3 = input[3]; - uint64_t t4 = input[4]; - uint64_t t1_ = t1 + (t0 >> 51); - uint64_t t0_ = t0 & 0x7ffffffffffff; - uint64_t t2_ = t2 + (t1_ >> 51); - uint64_t t1__ = t1_ & 0x7ffffffffffff; - uint64_t t3_ = t3 + (t2_ >> 51); - uint64_t t2__ = t2_ & 0x7ffffffffffff; - uint64_t t4_ = t4 + (t3_ >> 51); - uint64_t t3__ = t3_ & 0x7ffffffffffff; - input[0] = t0_; - input[1] = t1__; - input[2] = t2__; - input[3] = t3__; - input[4] = t4_; -} - -static void -Hacl_EC_Format_fcontract_first_carry_full(uint64_t *input) -{ - Hacl_EC_Format_fcontract_first_carry_pass(input); - Hacl_Bignum_Modulo_carry_top(input); -} - -static void -Hacl_EC_Format_fcontract_second_carry_pass(uint64_t *input) -{ - uint64_t t0 = input[0]; - uint64_t t1 = input[1]; - uint64_t t2 = input[2]; - uint64_t t3 = input[3]; - uint64_t t4 = input[4]; - uint64_t t1_ = t1 + (t0 >> 51); - uint64_t t0_ = t0 & 0x7ffffffffffff; - uint64_t t2_ = t2 + (t1_ >> 51); - uint64_t t1__ = t1_ & 0x7ffffffffffff; - uint64_t t3_ = t3 + (t2_ >> 51); - uint64_t t2__ = t2_ & 0x7ffffffffffff; - uint64_t t4_ = t4 + (t3_ >> 51); - uint64_t t3__ = t3_ & 0x7ffffffffffff; - input[0] = t0_; - input[1] = t1__; - input[2] = t2__; - input[3] = t3__; - input[4] = t4_; -} - -static void -Hacl_EC_Format_fcontract_second_carry_full(uint64_t *input) -{ - uint64_t i0; - uint64_t i1; - uint64_t i0_; - uint64_t i1_; - Hacl_EC_Format_fcontract_second_carry_pass(input); - Hacl_Bignum_Modulo_carry_top(input); - i0 = input[0]; - i1 = input[1]; - i0_ = i0 & 0x7ffffffffffff; - i1_ = i1 + (i0 >> 51); - input[0] = i0_; - input[1] = i1_; -} - -static void -Hacl_EC_Format_fcontract_trim(uint64_t *input) -{ - uint64_t a0 = input[0]; - uint64_t a1 = input[1]; - uint64_t a2 = input[2]; - uint64_t a3 = input[3]; - uint64_t a4 = input[4]; - uint64_t mask0 = FStar_UInt64_gte_mask(a0, 0x7ffffffffffed); - uint64_t mask1 = FStar_UInt64_eq_mask( a1, 0x7ffffffffffff); - uint64_t mask2 = FStar_UInt64_eq_mask( a2, 0x7ffffffffffff); - uint64_t mask3 = FStar_UInt64_eq_mask( a3, 0x7ffffffffffff); - uint64_t mask4 = FStar_UInt64_eq_mask( a4, 0x7ffffffffffff); - uint64_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4; - uint64_t a0_ = a0 - (0x7ffffffffffed & mask); - uint64_t a1_ = a1 - (0x7ffffffffffff & mask); - uint64_t a2_ = a2 - (0x7ffffffffffff & mask); - uint64_t a3_ = a3 - (0x7ffffffffffff & mask); - uint64_t a4_ = a4 - (0x7ffffffffffff & mask); - input[0] = a0_; - input[1] = a1_; - input[2] = a2_; - input[3] = a3_; - input[4] = a4_; -} - -static void -Hacl_EC_Format_fcontract_store(uint8_t *output, uint64_t *input) -{ - uint64_t t0 = input[0]; - uint64_t t1 = input[1]; - uint64_t t2 = input[2]; - uint64_t t3 = input[3]; - uint64_t t4 = input[4]; - uint64_t o0 = t1 << 51 | t0; - uint64_t o1 = t2 << 38 | t1 >> 13; - uint64_t o2 = t3 << 25 | t2 >> 26; - uint64_t o3 = t4 << 12 | t3 >> 39; - uint8_t *b0 = output; - uint8_t *b1 = output + 8; - uint8_t *b2 = output + 16; - uint8_t *b3 = output + 24; - store64_le(b0, o0); - store64_le(b1, o1); - store64_le(b2, o2); - store64_le(b3, o3); -} - -static void -Hacl_EC_Format_fcontract(uint8_t *output, uint64_t *input) -{ - Hacl_EC_Format_fcontract_first_carry_full(input); - Hacl_EC_Format_fcontract_second_carry_full(input); - Hacl_EC_Format_fcontract_trim(input); - Hacl_EC_Format_fcontract_store(output, input); -} - -static void -Hacl_EC_Format_scalar_of_point(uint8_t *scalar, uint64_t *point) -{ - uint32_t i; - uint64_t *x = point; - uint64_t *z = point + 5; - uint64_t buf[10]; - uint64_t *zmone = buf; - uint64_t *sc = buf + 5; - for (i = 0; i < 10; ++i) buf[i] = 0; - Hacl_Bignum_crecip(zmone, z); - Hacl_Bignum_fmul(sc, x, zmone); - Hacl_EC_Format_fcontract(scalar, sc); -} - -static void -Hacl_EC_AddAndDouble_fmonty( - uint64_t *pp, - uint64_t *ppq, - uint64_t *p, - uint64_t *pq, - uint64_t *qmqp -) -{ - uint32_t i; - uint64_t *qx = qmqp; - uint64_t *x2 = pp; - uint64_t *z2 = pp + 5; - uint64_t *x3 = ppq; - uint64_t *z3 = ppq + 5; - uint64_t *x = p; - uint64_t *z = p + 5; - uint64_t *xprime = pq; - uint64_t *zprime = pq + 5; - uint64_t buf[40]; - uint64_t *origx = buf; - uint64_t *origxprime0 = buf + 5; - uint64_t *xxprime0 = buf + 25; - uint64_t *zzprime0 = buf + 30; - uint64_t *origxprime; - uint64_t *xx0; - uint64_t *zz0; - uint64_t *xxprime; - uint64_t *zzprime; - uint64_t *zzzprime; - uint64_t *zzz; - uint64_t *xx; - uint64_t *zz; - uint64_t scalar; - for (i = 0; i < 40; ++i) buf[i] = 0; - origx[0] = x[0]; - origx[1] = x[1]; - origx[2] = x[2]; - origx[3] = x[3]; - origx[4] = x[4]; - Hacl_Bignum_fsum(x, z); - Hacl_Bignum_fdifference(z, origx); - origxprime0[0] = xprime[0]; - origxprime0[1] = xprime[1]; - origxprime0[2] = xprime[2]; - origxprime0[3] = xprime[3]; - origxprime0[4] = xprime[4]; - Hacl_Bignum_fsum(xprime, zprime); - Hacl_Bignum_fdifference(zprime, origxprime0); - Hacl_Bignum_fmul(xxprime0, xprime, z); - Hacl_Bignum_fmul(zzprime0, x, zprime); - origxprime = buf + 5; - xx0 = buf + 15; - zz0 = buf + 20; - xxprime = buf + 25; - zzprime = buf + 30; - zzzprime = buf + 35; - origxprime[0] = xxprime[0]; - origxprime[1] = xxprime[1]; - origxprime[2] = xxprime[2]; - origxprime[3] = xxprime[3]; - origxprime[4] = xxprime[4]; - Hacl_Bignum_fsum(xxprime, zzprime); - Hacl_Bignum_fdifference(zzprime, origxprime); - Hacl_Bignum_Fsquare_fsquare_times(x3, xxprime, 1); - Hacl_Bignum_Fsquare_fsquare_times(zzzprime, zzprime, 1); - Hacl_Bignum_fmul(z3, zzzprime, qx); - Hacl_Bignum_Fsquare_fsquare_times(xx0, x, 1); - Hacl_Bignum_Fsquare_fsquare_times(zz0, z, 1); - zzz = buf + 10; - xx = buf + 15; - zz = buf + 20; - Hacl_Bignum_fmul(x2, xx, zz); - Hacl_Bignum_fdifference(zz, xx); - scalar = 121665; - Hacl_Bignum_fscalar(zzz, zz, scalar); - Hacl_Bignum_fsum(zzz, xx); - Hacl_Bignum_fmul(z2, zzz, zz); -} - -static void -Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step( - uint64_t *nq, - uint64_t *nqpq, - uint64_t *nq2, - uint64_t *nqpq2, - uint64_t *q, - uint8_t byt -) -{ - uint64_t bit = byt >> 7; - Hacl_EC_Point_swap_conditional(nq, nqpq, bit); - Hacl_EC_AddAndDouble_fmonty(nq2, nqpq2, nq, nqpq, q); - Hacl_EC_Point_swap_conditional(nq2, nqpq2, bit); -} - -static void -Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step( - uint64_t *nq, - uint64_t *nqpq, - uint64_t *nq2, - uint64_t *nqpq2, - uint64_t *q, - uint8_t byt -) -{ - Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt); - Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt<<1); -} - -static void -Hacl_EC_Ladder_SmallLoop_cmult_small_loop( - uint64_t *nq, - uint64_t *nqpq, - uint64_t *nq2, - uint64_t *nqpq2, - uint64_t *q, - uint8_t byt, - uint32_t i -) -{ - if (i) - { - uint32_t i_ = i - 1; - Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(nq, nqpq, nq2, nqpq2, q, byt); - Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byt << 2, i_); - } -} - -static void -Hacl_EC_Ladder_BigLoop_cmult_big_loop( - uint8_t *n1, - uint64_t *nq, - uint64_t *nqpq, - uint64_t *nq2, - uint64_t *nqpq2, - uint64_t *q, - uint32_t i -) -{ - if (i) - { - uint32_t i1 = i - 1; - uint8_t byte = n1[i1]; - Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byte, 4); - Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, i1); - } -} - -static void Hacl_EC_Ladder_cmult(uint64_t *result, uint8_t *n1, uint64_t *q) -{ - uint32_t i; - uint64_t point_buf[40]; - uint64_t *nq = point_buf; - uint64_t *nqpq = point_buf + 10; - uint64_t *nq2 = point_buf + 20; - uint64_t *nqpq2 = point_buf + 30; - for (i = 0; i < 40; ++i) point_buf[i] = 0; - Hacl_EC_Point_copy(nqpq, q); - nq[0] = 1; - Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32); - Hacl_EC_Point_copy(result, nq); -} - -static void -Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint) -{ - uint32_t i; - uint64_t buf0[10]; - uint64_t *x0 = buf0; - uint64_t *z = buf0 + 5; - for (i = 0; i < 10; ++i) buf0[i] = 0; - Hacl_EC_Format_fexpand(x0, basepoint); - z[0] = 1; - { - uint8_t e[32]; - uint8_t e0; - uint8_t e31; - uint8_t e01; - uint8_t e311; - uint8_t e312; - uint8_t *scalar; - for (i = 0; i < 32; ++i) { - e[i] = secret[i]; +static void HaclBignumFmulReduce(uint128_t o[5], uint64_t p[5], uint64_t q[5]) { + uint64_t t; + unsigned i, j; + for (i = 0;; ++i) { + for (j = 0; j < 5; ++j) { + o[j] += DW(p[j]) * q[i]; } - e0 = e[0]; - e31 = e[31]; - e01 = e0 & 248; - e311 = e31 & 127; - e312 = e311 | 64; - e[0] = e01; - e[31] = e312; - scalar = e; - { - uint64_t buf[15]; - buf[0] = 1; - for (i = 1; i < 15; ++i) buf[i] = 0; - Hacl_EC_Ladder_cmult(buf, scalar, buf0); - Hacl_EC_Format_scalar_of_point(mypublic, buf); + if (i == 4) break; + t = p[4] * 19; + p[4] = p[3]; + p[3] = p[2]; + p[2] = p[1]; + p[1] = p[0]; + p[0] = t; + } +} + +static void HaclBignumFmul(uint64_t o[5], uint64_t p[5], uint64_t q[5]) { + uint128_t t[5] = {0}; + uint64_t u[5] = {p[0], p[1], p[2], p[3], p[4]}; + HaclBignumFmulReduce(t, u, q); + HaclBignumCarryWide(t); + t[0] += DW(19) * (uint64_t)(t[4] >> 51); + HaclBignumTrunc(o, t); + o[1] += o[0] >> 51; + o[4] &= 0x7ffffffffffff; + o[0] &= 0x7ffffffffffff; +} + +static void HaclBignumFsquare(uint128_t t[5], uint64_t p[5]) { + t[0] = DW(p[0] * 1) * p[0] + DW(p[4] * 38) * p[1] + DW(p[2] * 38) * p[3]; + t[1] = DW(p[0] * 2) * p[1] + DW(p[4] * 38) * p[2] + DW(p[3] * 19) * p[3]; + t[2] = DW(p[0] * 2) * p[2] + DW(p[1] * 01) * p[1] + DW(p[4] * 38) * p[3]; + t[3] = DW(p[0] * 2) * p[3] + DW(p[1] * 02) * p[2] + DW(p[4]) * (p[4] * 19); + t[4] = DW(p[0] * 2) * p[4] + DW(p[1] * 02) * p[3] + DW(p[2]) * p[2]; +} + +static void HaclBignumFsqa(uint64_t o[5], uint32_t n) { + uint128_t t[5]; + for (unsigned i = 0; i < n; ++i) { + HaclBignumFsquare(t, o); + HaclBignumCarryWide(t); + t[0] += DW(19) * (uint64_t)(t[4] >> 51); + HaclBignumTrunc(o, t); + o[1] += o[0] >> 51; + o[4] &= 0x7ffffffffffff; + o[0] &= 0x7ffffffffffff; + } +} + +static void HaclBignumFsqr(uint64_t o[5], uint64_t p[5], uint32_t n) { + HaclBignumCopy(o, p); + HaclBignumFsqa(o, n); +} + +static void HaclBignumCrecip(uint64_t o[5], uint64_t z[5]) { + uint64_t b[4][5]; + HaclBignumFsqr(b[0], z, 1); + HaclBignumFsqr(b[1], b[0], 2); + HaclBignumFmul(b[2], b[1], z); + HaclBignumFmul(b[0], b[2], b[0]); + HaclBignumFsqr(b[1], b[0], 1); + HaclBignumFmul(b[2], b[1], b[2]); + HaclBignumFsqr(b[1], b[2], 5); + HaclBignumFmul(b[2], b[1], b[2]); + HaclBignumFsqr(b[1], b[2], 10); + HaclBignumFmul(b[3], b[1], b[2]); + HaclBignumFsqr(b[1], b[3], 20); + HaclBignumFmul(b[1], b[1], b[3]); + HaclBignumFsqa(b[1], 10); + HaclBignumFmul(b[2], b[1], b[2]); + HaclBignumFsqr(b[1], b[2], 50); + HaclBignumFmul(b[3], b[1], b[2]); + HaclBignumFsqr(b[1], b[3], 100); + HaclBignumFmul(b[1], b[1], b[3]); + HaclBignumFsqa(b[1], 50); + HaclBignumFmul(b[1], b[1], b[2]); + HaclBignumFsqa(b[1], 5); + HaclBignumFmul(o, b[1], b[0]); +} + +static void HaclBignumFdif(uint64_t a[5], uint64_t b[5]) { + a[0] = b[0] + 0x3fffffffffff68 - a[0]; + a[1] = b[1] + 0x3ffffffffffff8 - a[1]; + a[2] = b[2] + 0x3ffffffffffff8 - a[2]; + a[3] = b[3] + 0x3ffffffffffff8 - a[3]; + a[4] = b[4] + 0x3ffffffffffff8 - a[4]; +} + +static void HaclBignumFscalar(uint64_t o[5], uint64_t p[5], uint64_t s) { + unsigned i; + uint128_t t[5]; + for (i = 0; i < 5; ++i) t[i] = DW(p[i]) * s; + HaclBignumCarryWide(t); + t[0] += DW(19) * (uint64_t)(t[4] >> 51); + t[4] &= 0x7ffffffffffff; + HaclBignumTrunc(o, t); +} + +static void HaclEcPointSwap(uint64_t a[2][5], uint64_t b[2][5], uint64_t m) { + unsigned i, j; + uint64_t x, y; + for (i = 0; i < 2; ++i) { + for (j = 0; j < 5; ++j) { + x = a[i][j] ^ (-m & (a[i][j] ^ b[i][j])); + y = b[i][j] ^ (-m & (a[i][j] ^ b[i][j])); + a[i][j] = x; + b[i][j] = y; } } } -static void -mbedtls_x25519_init( mbedtls_x25519_context *ctx ) -{ - mbedtls_platform_zeroize( ctx, sizeof( mbedtls_x25519_context ) ); +static void HaclEcFormatFexpand(uint64_t o[5], uint8_t p[32]) { + o[0] = READ64LE(p + 000) >> 00 & 0x7ffffffffffff; + o[1] = READ64LE(p + 006) >> 03 & 0x7ffffffffffff; + o[2] = READ64LE(p + 014) >> 06 & 0x7ffffffffffff; + o[3] = READ64LE(p + 023) >> 01 & 0x7ffffffffffff; + o[4] = READ64LE(p + 030) >> 12 & 0x7ffffffffffff; } -static void -mbedtls_x25519_free( mbedtls_x25519_context *ctx ) -{ - if( !ctx ) - return; - mbedtls_platform_zeroize( ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES ); - mbedtls_platform_zeroize( ctx->peer_point, MBEDTLS_X25519_KEY_SIZE_BYTES ); +static void HaclEcFormatFcontract(uint8_t o[32], uint64_t p[5]) { + uint64_t m; + HaclBignumCarry(p); + p[0] += 19 * (p[4] >> 51); + p[4] &= 0x7ffffffffffff; + HaclBignumCarry(p); + p[0] += 19 * (p[4] >> 51); + p[1] += p[0] >> 51; + p[0] &= 0x7ffffffffffff; + p[1] &= 0x7ffffffffffff; + p[4] &= 0x7ffffffffffff; + m = GTE(p[0], 0x7ffffffffffed); + m &= EQ(p[1], 0x7ffffffffffff); + m &= EQ(p[2], 0x7ffffffffffff); + m &= EQ(p[3], 0x7ffffffffffff); + m &= EQ(p[4], 0x7ffffffffffff); + p[0] -= 0x7ffffffffffed & m; + p[1] -= 0x7ffffffffffff & m; + p[2] -= 0x7ffffffffffff & m; + p[3] -= 0x7ffffffffffff & m; + p[4] -= 0x7ffffffffffff & m; + Write64le(o + 000, p[1] << 51 | p[0] >> 00); + Write64le(o + 010, p[2] << 38 | p[1] >> 13); + Write64le(o + 020, p[3] << 25 | p[2] >> 26); + Write64le(o + 030, p[4] << 12 | p[3] >> 39); } -static int -mbedtls_x25519_make_params( mbedtls_x25519_context *ctx, size_t *olen, - unsigned char *buf, size_t blen, - int( *f_rng )(void *, unsigned char *, size_t), - void *p_rng ) -{ - int ret = 0; - uint8_t base[MBEDTLS_X25519_KEY_SIZE_BYTES] = {0}; - if( ( ret = f_rng( p_rng, ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES ) ) != 0 ) - return ret; - *olen = MBEDTLS_X25519_KEY_SIZE_BYTES + 4; - if( blen < *olen ) - return( MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL ); - *buf++ = MBEDTLS_ECP_TLS_NAMED_CURVE; - *buf++ = MBEDTLS_ECP_TLS_CURVE25519 >> 8; - *buf++ = MBEDTLS_ECP_TLS_CURVE25519 & 0xFF; - *buf++ = MBEDTLS_X25519_KEY_SIZE_BYTES; - base[0] = 9; - Hacl_Curve25519_crypto_scalarmult( buf, ctx->our_secret, base ); - base[0] = 0; - if( timingsafe_memcmp( buf, base, MBEDTLS_X25519_KEY_SIZE_BYTES) == 0 ) - return MBEDTLS_ERR_ECP_RANDOM_FAILED; - return( 0 ); +static void HaclEcFormatScalarOfPoint(uint8_t o[32], uint64_t p[2][5]) { + uint64_t t[2][5]; + HaclBignumCrecip(t[0], p[1]); + HaclBignumFmul(t[1], p[0], t[0]); + HaclEcFormatFcontract(o, t[1]); } -static int -mbedtls_x25519_read_params( mbedtls_x25519_context *ctx, - const unsigned char **buf, const unsigned char *end ) -{ - if( end - *buf < MBEDTLS_X25519_KEY_SIZE_BYTES + 1 ) - return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA ); - if( ( *(*buf)++ != MBEDTLS_X25519_KEY_SIZE_BYTES ) ) - return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA ); - memcpy( ctx->peer_point, *buf, MBEDTLS_X25519_KEY_SIZE_BYTES ); - *buf += MBEDTLS_X25519_KEY_SIZE_BYTES; - return( 0 ); +static void HaclEcAddAndDoubleFmonty(uint64_t xz2[2][5], uint64_t xz3[2][5], + uint64_t xz[2][5], uint64_t xzprime[2][5], + uint64_t qx[5]) { + uint64_t b[7][5]; + HaclBignumCopy(b[0], xz[0]); + HaclBignumFsum(xz[0], xz[1]); + HaclBignumFdif(xz[1], b[0]); + HaclBignumCopy(b[0], xzprime[0]); + HaclBignumFsum(xzprime[0], xzprime[1]); + HaclBignumFdif(xzprime[1], b[0]); + HaclBignumFmul(b[4], xzprime[0], xz[1]); + HaclBignumFmul(b[5], xz[0], xzprime[1]); + HaclBignumCopy(b[0], b[4]); + HaclBignumFsum(b[4], b[5]); + HaclBignumFdif(b[5], b[0]); + HaclBignumFsqr(xz3[0], b[4], 1); + HaclBignumFsqr(b[6], b[5], 1); + HaclBignumFmul(xz3[1], b[6], qx); + HaclBignumFsqr(b[2], xz[0], 1); + HaclBignumFsqr(b[3], xz[1], 1); + HaclBignumFmul(xz2[0], b[2], b[3]); + HaclBignumFdif(b[3], b[2]); + HaclBignumFscalar(b[1], b[3], 121665); + HaclBignumFsum(b[1], b[2]); + HaclBignumFmul(xz2[1], b[1], b[3]); } -static int -mbedtls_x25519_get_params( mbedtls_x25519_context *ctx, const mbedtls_ecp_keypair *key, - mbedtls_x25519_ecdh_side side ) -{ - size_t olen = 0; - switch( side ) { - case MBEDTLS_X25519_ECDH_THEIRS: - return mbedtls_ecp_point_write_binary( &key->grp, &key->Q, - MBEDTLS_ECP_PF_COMPRESSED, - &olen, ctx->peer_point, - MBEDTLS_X25519_KEY_SIZE_BYTES ); - case MBEDTLS_X25519_ECDH_OURS: - return mbedtls_mpi_write_binary_le( &key->d, ctx->our_secret, - MBEDTLS_X25519_KEY_SIZE_BYTES ); - default: - return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA ); +/** + * Computes elliptic curve 25519. + * @note it has 126 bits of security + */ +void curve25519(uint8_t mypublic[32], const uint8_t secret[32], + const uint8_t basepoint[32]) { + uint32_t i, j; + uint8_t e[32], s; + uint64_t q[5], t[4][2][5] = {{{1}}, {{0}, {1}}}; + HaclEcFormatFexpand(q, basepoint); + for (j = 0; j < 32; ++j) e[j] = secret[j]; + e[0] &= 248; + e[31] = (e[31] & 127) | 64; + HaclBignumCopy(t[1][0], q); + for (i = 32; i--;) { + for (s = e[i], j = 4; j--;) { + HaclEcPointSwap(t[0], t[1], s >> 7); + HaclEcAddAndDoubleFmonty(t[2], t[3], t[0], t[1], q); + HaclEcPointSwap(t[2], t[3], s >> 7); + s <<= 1; + HaclEcPointSwap(t[2], t[3], s >> 7); + HaclEcAddAndDoubleFmonty(t[0], t[1], t[2], t[3], q); + HaclEcPointSwap(t[0], t[1], s >> 7); + s <<= 1; } + } + HaclEcFormatScalarOfPoint(mypublic, t[0]); } - -static int -mbedtls_x25519_calc_secret( mbedtls_x25519_context *ctx, size_t *olen, - unsigned char *buf, size_t blen, - int( *f_rng )(void *, unsigned char *, size_t), - void *p_rng ) -{ - /* f_rng and p_rng are not used here because this implementation does not - need blinding since it has constant trace. (todo(jart): wut?) */ - (( void )f_rng); - (( void )p_rng); - *olen = MBEDTLS_X25519_KEY_SIZE_BYTES; - if( blen < *olen ) - return( MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL ); - Hacl_Curve25519_crypto_scalarmult( buf, ctx->our_secret, ctx->peer_point); - /* Wipe the DH secret and don't let the peer chose a small subgroup point */ - mbedtls_platform_zeroize( ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES ); - if( timingsafe_memcmp( buf, ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES ) == 0 ) - return MBEDTLS_ERR_ECP_RANDOM_FAILED; - return( 0 ); -} - -static int -mbedtls_x25519_make_public( mbedtls_x25519_context *ctx, size_t *olen, - unsigned char *buf, size_t blen, - int( *f_rng )(void *, unsigned char *, size_t), - void *p_rng ) -{ - int ret = 0; - unsigned char base[MBEDTLS_X25519_KEY_SIZE_BYTES] = { 0 }; - if( ctx == NULL ) - return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA ); - if( ( ret = f_rng( p_rng, ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES ) ) != 0 ) - return ret; - *olen = MBEDTLS_X25519_KEY_SIZE_BYTES + 1; - if( blen < *olen ) - return(MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL); - *buf++ = MBEDTLS_X25519_KEY_SIZE_BYTES; - base[0] = 9; - Hacl_Curve25519_crypto_scalarmult( buf, ctx->our_secret, base ); - base[0] = 0; - if( memcmp( buf, base, MBEDTLS_X25519_KEY_SIZE_BYTES ) == 0 ) - return MBEDTLS_ERR_ECP_RANDOM_FAILED; - return( ret ); -} - -static int -mbedtls_x25519_read_public( mbedtls_x25519_context *ctx, - const unsigned char *buf, size_t blen ) -{ - if( blen < MBEDTLS_X25519_KEY_SIZE_BYTES + 1 ) - return(MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL); - if( (*buf++ != MBEDTLS_X25519_KEY_SIZE_BYTES) ) - return(MBEDTLS_ERR_ECP_BAD_INPUT_DATA); - memcpy( ctx->peer_point, buf, MBEDTLS_X25519_KEY_SIZE_BYTES ); - return( 0 ); -} - -/** - * \brief This function sets up the ECDH context with the information - * given. - * - * This function should be called after mbedtls_ecdh_init() but - * before mbedtls_ecdh_make_params(). There is no need to call - * this function before mbedtls_ecdh_read_params(). - * - * This is the first function used by a TLS server for ECDHE - * ciphersuites. - * - * \param ctx The ECDH context to set up. - * \param grp_id The group id of the group to set up the context for. - * - * \return \c 0 on success. - */ -int mbedtls_everest_setup( mbedtls_ecdh_context_everest *ctx, int grp_id ) -{ - if( grp_id != MBEDTLS_ECP_DP_CURVE25519 ) - return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; - mbedtls_x25519_init( &ctx->ctx ); - return 0; -} - -/** - * \brief This function frees a context. - * - * \param ctx The context to free. - */ -void mbedtls_everest_free( mbedtls_ecdh_context_everest *ctx ) -{ - mbedtls_x25519_free( &ctx->ctx ); -} - -/** - * \brief This function generates a public key and a TLS - * ServerKeyExchange payload. - * - * This is the second function used by a TLS server for ECDHE - * ciphersuites. (It is called after mbedtls_ecdh_setup().) - * - * \note This function assumes that the ECP group (grp) of the - * \p ctx context has already been properly set, - * for example, using mbedtls_ecp_group_load(). - * - * \see ecp.h - * - * \param ctx The ECDH context. - * \param olen The number of characters written. - * \param buf The destination buffer. - * \param blen The length of the destination buffer. - * \param f_rng The RNG function. - * \param p_rng The RNG context. - * - * \return \c 0 on success. - * \return An \c MBEDTLS_ERR_ECP_XXX error code on failure. - */ -int mbedtls_everest_make_params( mbedtls_ecdh_context_everest *ctx, size_t *olen, - unsigned char *buf, size_t blen, - int( *f_rng )( void *, unsigned char *, size_t ), - void *p_rng ) -{ - mbedtls_x25519_context *x25519_ctx = &ctx->ctx; - return mbedtls_x25519_make_params( x25519_ctx, olen, buf, blen, f_rng, p_rng ); -} - -/** - * \brief This function parses and processes a TLS ServerKeyExhange - * payload. - * - * This is the first function used by a TLS client for ECDHE - * ciphersuites. - * - * \see ecp.h - * - * \param ctx The ECDH context. - * \param buf The pointer to the start of the input buffer. - * \param end The address for one Byte past the end of the buffer. - * - * \return \c 0 on success. - * \return An \c MBEDTLS_ERR_ECP_XXX error code on failure. - * - */ -int mbedtls_everest_read_params( mbedtls_ecdh_context_everest *ctx, - const unsigned char **buf, - const unsigned char *end ) -{ - mbedtls_x25519_context *x25519_ctx = &ctx->ctx; - return mbedtls_x25519_read_params( x25519_ctx, buf, end ); -} - -/** - * \brief This function sets up an ECDH context from an EC key. - * - * It is used by clients and servers in place of the - * ServerKeyEchange for static ECDH, and imports ECDH - * parameters from the EC key information of a certificate. - * - * \see ecp.h - * - * \param ctx The ECDH context to set up. - * \param key The EC key to use. - * \param side Defines the source of the key: 1: Our key, or - * 0: The key of the peer. - * - * \return \c 0 on success. - * \return An \c MBEDTLS_ERR_ECP_XXX error code on failure. - * - */ -int mbedtls_everest_get_params( mbedtls_ecdh_context_everest *ctx, - const mbedtls_ecp_keypair *key, - mbedtls_everest_ecdh_side side ) -{ - mbedtls_x25519_context *x25519_ctx = &ctx->ctx; - mbedtls_x25519_ecdh_side s = side == MBEDTLS_EVEREST_ECDH_OURS ? - MBEDTLS_X25519_ECDH_OURS : - MBEDTLS_X25519_ECDH_THEIRS; - return mbedtls_x25519_get_params( x25519_ctx, key, s ); -} - -/** - * \brief This function generates a public key and a TLS - * ClientKeyExchange payload. - * - * This is the second function used by a TLS client for ECDH(E) - * ciphersuites. - * - * \see ecp.h - * - * \param ctx The ECDH context. - * \param olen The number of Bytes written. - * \param buf The destination buffer. - * \param blen The size of the destination buffer. - * \param f_rng The RNG function. - * \param p_rng The RNG context. - * - * \return \c 0 on success. - * \return An \c MBEDTLS_ERR_ECP_XXX error code on failure. - */ -int mbedtls_everest_make_public( mbedtls_ecdh_context_everest *ctx, size_t *olen, - unsigned char *buf, size_t blen, - int( *f_rng )( void *, unsigned char *, size_t ), - void *p_rng ) -{ - mbedtls_x25519_context *x25519_ctx = &ctx->ctx; - return mbedtls_x25519_make_public( x25519_ctx, olen, buf, blen, f_rng, p_rng ); -} - -/** - * \brief This function parses and processes a TLS ClientKeyExchange - * payload. - * - * This is the third function used by a TLS server for ECDH(E) - * ciphersuites. (It is called after mbedtls_ecdh_setup() and - * mbedtls_ecdh_make_params().) - * - * \see ecp.h - * - * \param ctx The ECDH context. - * \param buf The start of the input buffer. - * \param blen The length of the input buffer. - * - * \return \c 0 on success. - * \return An \c MBEDTLS_ERR_ECP_XXX error code on failure. - */ -int mbedtls_everest_read_public( mbedtls_ecdh_context_everest *ctx, - const unsigned char *buf, size_t blen ) -{ - mbedtls_x25519_context *x25519_ctx = &ctx->ctx; - return mbedtls_x25519_read_public( x25519_ctx, buf, blen ); -} - -/** - * \brief This function derives and exports the shared secret. - * - * This is the last function used by both TLS client - * and servers. - * - * \note If \p f_rng is not NULL, it is used to implement - * countermeasures against side-channel attacks. - * For more information, see mbedtls_ecp_mul(). - * - * \see ecp.h - * - * \param ctx The ECDH context. - * \param olen The number of Bytes written. - * \param buf The destination buffer. - * \param blen The length of the destination buffer. - * \param f_rng The RNG function. - * \param p_rng The RNG context. - * - * \return \c 0 on success. - * \return An \c MBEDTLS_ERR_ECP_XXX error code on failure. - */ -int mbedtls_everest_calc_secret( mbedtls_ecdh_context_everest *ctx, size_t *olen, - unsigned char *buf, size_t blen, - int( *f_rng )( void *, unsigned char *, size_t ), - void *p_rng ) -{ - mbedtls_x25519_context *x25519_ctx = &ctx->ctx; - return mbedtls_x25519_calc_secret( x25519_ctx, olen, buf, blen, f_rng, p_rng ); -} - -#endif /* MBEDTLS_ECDH_C && MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED */ diff --git a/third_party/mbedtls/everest.h b/third_party/mbedtls/everest.h index be4c43f16..592aff1ea 100644 --- a/third_party/mbedtls/everest.h +++ b/third_party/mbedtls/everest.h @@ -1,52 +1,10 @@ -#ifndef COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_ -#define COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_ -#include "third_party/mbedtls/config.h" -#include "third_party/mbedtls/ecp.h" +#ifndef COSMOPOLITAN_THIRD_PARTY_MBEDTLS_EVEREST_H_ +#define COSMOPOLITAN_THIRD_PARTY_MBEDTLS_EVEREST_H_ +#if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -#define MBEDTLS_ECP_TLS_CURVE25519 0x1d -#define MBEDTLS_X25519_KEY_SIZE_BYTES 32 - -typedef enum { - MBEDTLS_X25519_ECDH_OURS, - MBEDTLS_X25519_ECDH_THEIRS, -} mbedtls_x25519_ecdh_side; - -typedef struct { - unsigned char our_secret[MBEDTLS_X25519_KEY_SIZE_BYTES]; - unsigned char peer_point[MBEDTLS_X25519_KEY_SIZE_BYTES]; -} mbedtls_x25519_context; - -typedef enum { - MBEDTLS_EVEREST_ECDH_OURS, - MBEDTLS_EVEREST_ECDH_THEIRS, -} mbedtls_everest_ecdh_side; - -typedef struct { - mbedtls_x25519_context ctx; -} mbedtls_ecdh_context_everest; - -int mbedtls_everest_setup(mbedtls_ecdh_context_everest *, int); -void mbedtls_everest_free(mbedtls_ecdh_context_everest *); -int mbedtls_everest_make_params(mbedtls_ecdh_context_everest *, size_t *, - unsigned char *, size_t, - int (*)(void *, unsigned char *, size_t), - void *); -int mbedtls_everest_read_params(mbedtls_ecdh_context_everest *, - const unsigned char **, const unsigned char *); -int mbedtls_everest_get_params(mbedtls_ecdh_context_everest *, - const mbedtls_ecp_keypair *, - mbedtls_everest_ecdh_side); -int mbedtls_everest_make_public(mbedtls_ecdh_context_everest *, size_t *, - unsigned char *, size_t, - int (*)(void *, unsigned char *, size_t), - void *); -int mbedtls_everest_read_public(mbedtls_ecdh_context_everest *, - const unsigned char *, size_t); -int mbedtls_everest_calc_secret(mbedtls_ecdh_context_everest *, size_t *, - unsigned char *, size_t, - int (*)(void *, unsigned char *, size_t), - void *); +void curve25519(uint8_t[32], const uint8_t[32], const uint8_t[32]); COSMOPOLITAN_C_END_ -#endif /* COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_ */ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_THIRD_PARTY_MBEDTLS_EVEREST_H_ */ diff --git a/third_party/mbedtls/mbedtls.mk b/third_party/mbedtls/mbedtls.mk index 6f3ea1b68..8bbea0621 100644 --- a/third_party/mbedtls/mbedtls.mk +++ b/third_party/mbedtls/mbedtls.mk @@ -55,7 +55,7 @@ $(THIRD_PARTY_MBEDTLS_A_OBJS): \ o/$(MODE)/third_party/mbedtls/everest.o: \ OVERRIDE_CFLAGS += \ - -Os + -O3 o/$(MODE)/third_party/mbedtls/bigmul4.o \ o/$(MODE)/third_party/mbedtls/bigmul6.o: \ @@ -70,11 +70,6 @@ o/$(MODE)/third_party/mbedtls/shiftright2-avx.o: \ OVERRIDE_CFLAGS += \ -O3 -mavx -# tail recursion is so important because everest was written in f* -o/$(MODE)/third_party/mbedtls/everest.o: \ - OVERRIDE_CFLAGS += \ - -foptimize-sibling-calls - THIRD_PARTY_MBEDTLS_LIBS = $(foreach x,$(THIRD_PARTY_MBEDTLS_ARTIFACTS),$($(x))) THIRD_PARTY_MBEDTLS_SRCS = $(foreach x,$(THIRD_PARTY_MBEDTLS_ARTIFACTS),$($(x)_SRCS)) THIRD_PARTY_MBEDTLS_HDRS = $(foreach x,$(THIRD_PARTY_MBEDTLS_ARTIFACTS),$($(x)_HDRS)) diff --git a/third_party/mbedtls/secp256r1.c b/third_party/mbedtls/secp256r1.c index 53ad1f62d..7df7f9ac8 100644 --- a/third_party/mbedtls/secp256r1.c +++ b/third_party/mbedtls/secp256r1.c @@ -26,7 +26,7 @@ #define H(w) (w & 0xffffffff00000000) /** - * Fastest quasi-reduction modulo NIST P-256. + * Fastest quasi-reduction modulo ℘256. * * p = 2²⁵⁶ - 2²²⁴ + 2¹⁹² + 2⁹⁶ - 1 * B = T + 2×S₁ + 2×S₂ + S₃ + S₄ – D₁ – D₂ – D₃ – D₄ mod p diff --git a/third_party/mbedtls/secp384r1.c b/third_party/mbedtls/secp384r1.c index 96652c43e..307b72164 100644 --- a/third_party/mbedtls/secp384r1.c +++ b/third_party/mbedtls/secp384r1.c @@ -24,7 +24,7 @@ #define Q(i) p[i >> 1] /** - * Fastest quasi-reduction modulo Prime 384. + * Fastest quasi-reduction modulo ℘384. * * p = 2³⁸⁴ – 2¹²⁸ – 2⁶ + 2³² – 1 * B = T + 2×S₁ + S₂ + S₃ + S₄ + S₅ + S₆ – D₁ – D₂ – D₃ mod p @@ -44,8 +44,7 @@ void secp384r1(uint64_t p[12]) { int r; char o; - signed char G; - uint64_t A, B, C, D, E, F, a, b, c; + uint64_t A, B, C, D, E, F, G, a, b, c; A = Q(0); B = Q(2); C = Q(4); @@ -57,8 +56,8 @@ void secp384r1(uint64_t p[12]) { a = Q(22) << 32 | Q(21) >> 32; b = Q(23) >> 32; ADC(C, C, a << 1, 0, o); - ADC(D, D, (b << 1 | a >> 63), o, o); - ADC(E, E, (b >> 63), o, o); + ADC(D, D, b << 1 | a >> 63, o, o); + ADC(E, E, b >> 63, o, o); ADC(F, F, o, o, o); G += o; ADC(A, A, Q(12), 0, o); @@ -118,91 +117,105 @@ void secp384r1(uint64_t p[12]) { asm volatile(/* S₁ = (0 ‖0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₂‖A₂₁‖0 ‖0 ‖0 ‖0 ) */ "mov\t21*4(%9),%7\n\t" "mov\t23*4(%9),%k8\n\t" + "mov\t%7,%%r12\n\t" + "shr\t$63,%%r12\n\t" "shl\t%7\n\t" - "rcl\t%8\n\t" + "shl\t%8\n\t" + "or\t%%r12,%8\n\t" + "mov\t13*4(%9),%%r12\n\t" "add\t%7,%2\n\t" + "mov\t23*4(%9),%k7\n\t" "adc\t%8,%3\n\t" + "mov\t15*4(%9),%%r13\n\t" "adc\t$0,%4\n\t" + "mov\t12*4(%9),%k8\n\t" "adc\t$0,%5\n\t" - "adc\t$0,%b6\n\t" - /* S₂ = (A₂₃‖A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂) */ - "add\t12*4(%9),%0\n\t" - "adc\t14*4(%9),%1\n\t" - "adc\t16*4(%9),%2\n\t" - "adc\t18*4(%9),%3\n\t" - "adc\t20*4(%9),%4\n\t" - "adc\t22*4(%9),%5\n\t" - "adc\t$0,%b6\n\t" - /* S₃ = (A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃‖A₂₂‖A₂₁) */ - "mov\t12*4(%9),%k7\n\t" + "mov\t17*4(%9),%%r14\n\t" + "adc\t$0,%6\n\t" + "mov\t19*4(%9),%%r15\n\t" + /* D₁ = (A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃) */ + "shl\t$32,%8\n\t" + "or\t%8,%7\n\t" "mov\t23*4(%9),%k8\n\t" + "sub\t%7,%0\n\t" + "mov\t21*4(%9),%7\n\t" + "sbb\t%%r12,%1\n\t" + "sbb\t%%r13,%2\n\t" + "sbb\t%%r14,%3\n\t" + "sbb\t%%r15,%4\n\t" + "sbb\t%7,%5\n\t" + "mov\t12*4(%9),%k7\n\t" + "sbb\t$0,%6\n\t" + /* S₃ = (A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃‖A₂₂‖A₂₁) */ "shl\t$32,%7\n\t" "or\t%7,%8\n\t" "add\t21*4(%9),%0\n\t" - "adc\t%8,%1\n\t" - "adc\t13*4(%9),%2\n\t" - "adc\t15*4(%9),%3\n\t" - "adc\t17*4(%9),%4\n\t" - "adc\t19*4(%9),%5\n\t" - "adc\t$0,%b6\n\t" - /* S₄ = (A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₀‖0 ‖A₂₃‖0 ) */ "mov\t23*4(%9),%k7\n\t" + "adc\t%8,%1\n\t" "mov\t20*4(%9),%k8\n\t" + "adc\t%%r12,%2\n\t" + "mov\t12*4(%9),%%r12\n\t" + "adc\t%%r13,%3\n\t" + "mov\t14*4(%9),%%r13\n\t" + "adc\t%%r14,%4\n\t" + "mov\t16*4(%9),%%r14\n\t" + "adc\t%%r15,%5\n\t" + "mov\t18*4(%9),%%r15\n\t" + "adc\t$0,%6\n\t" + /* S₄ = (A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₀‖0 ‖A₂₃‖0 ) */ "shl\t$32,%7\n\t" "shl\t$32,%8\n\t" "add\t%7,%0\n\t" "adc\t%8,%1\n\t" - "adc\t12*4(%9),%2\n\t" - "adc\t14*4(%9),%3\n\t" - "adc\t16*4(%9),%4\n\t" - "adc\t18*4(%9),%5\n\t" - "adc\t$0,%b6\n\t" + "adc\t%%r12,%2\n\t" + "adc\t%%r13,%3\n\t" + "adc\t%%r14,%4\n\t" + "adc\t%%r15,%5\n\t" + "adc\t$0,%6\n\t" + /* S₂ = (A₂₃‖A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂) */ + "add\t%%r12,%0\n\t" + "mov\t20*4(%9),%%r12\n\t" + "adc\t%%r13,%1\n\t" + "mov\t22*4(%9),%%r13\n\t" + "adc\t%%r14,%2\n\t" + "adc\t%%r15,%3\n\t" + "adc\t%%r12,%4\n\t" + "adc\t%%r13,%5\n\t" + "adc\t$0,%6\n\t" /* S₅ = (0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₂‖A₂₁‖A₂₀‖0 ‖0 ‖0 ‖0 ) */ - "mov\t23*4(%9),%k7\n\t" - "mov\t20*4(%9),%k8\n\t" - "shl\t$32,%7\n\t" - "shl\t$32,%8\n\t" - "add\t20*4(%9),%2\n\t" - "adc\t22*4(%9),%3\n\t" + "add\t%%r12,%2\n\t" + "adc\t%%r13,%3\n\t" "adc\t$0,%4\n\t" "adc\t$0,%5\n\t" - "adc\t$0,%b6\n\t" + "adc\t$0,%6\n\t" /* S₆ = (0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₂‖A₂₁‖0 ‖0 ‖A₂₀) */ - "mov\t20*4(%9),%k7\n\t" - "mov\t21*4(%9),%k8\n\t" + "mov\t%%r12d,%k7\n\t" + "mov\t%%r12,%8\n\t" + "shr\t$32,%8\n\t" "shl\t$32,%8\n\t" "add\t%7,%0\n\t" "adc\t%8,%1\n\t" - "adc\t22*4(%9),%2\n\t" + "adc\t%%r13,%2\n\t" "adc\t$0,%3\n\t" "adc\t$0,%4\n\t" "adc\t$0,%5\n\t" - "adc\t$0,%b6\n\t" - /* D₁ = (A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃) */ - "mov\t23*4(%9),%k7\n\t" - "mov\t12*4(%9),%k8\n\t" - "shl\t$32,%8\n\t" - "or\t%8,%7\n\t" - "sub\t%7,%0\n\t" - "sbb\t13*4(%9),%1\n\t" - "sbb\t15*4(%9),%2\n\t" - "sbb\t17*4(%9),%3\n\t" - "sbb\t19*4(%9),%4\n\t" - "sbb\t21*4(%9),%5\n\t" - "sbb\t$0,%b6\n\t" + "adc\t$0,%6\n\t" /* D₂ = (0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₂‖A₂₁‖A₂₀‖0 ) */ - "mov\t20*4(%9),%k7\n\t" - "mov\t23*4(%9),%k8\n\t" + "mov\t%%r12d,%k7\n\t" + "mov\t21*4(%9),%%r12\n\t" + "mov\t%%r13,%8\n\t" + "shr\t$32,%8\n\t" "shl\t$32,%7\n\t" "sub\t%7,%0\n\t" - "sbb\t21*4(%9),%1\n\t" + "sbb\t%%r12,%1\n\t" "sbb\t%8,%2\n\t" "sbb\t$0,%3\n\t" "sbb\t$0,%4\n\t" "sbb\t$0,%5\n\t" - "sbb\t$0,%b6\n\t" + "sbb\t$0,%6\n\t" /* D₃ = (0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖0 ‖A₂₃‖A₂₃‖0 ‖0 ‖0 ) */ - "mov\t23*4(%9),%k7\n\t" + "mov\t%%r13,%7\n\t" + "shr\t$32,%7\n\t" "mov\t%k7,%k8\n\t" "shl\t$32,%7\n\t" "sub\t%7,%1\n\t" @@ -210,11 +223,11 @@ void secp384r1(uint64_t p[12]) { "sbb\t$0,%3\n\t" "sbb\t$0,%4\n\t" "sbb\t$0,%5\n\t" - "sbb\t$0,%b6\n\t" + "sbb\t$0,%6" : "+r"(A), "+r"(B), "+r"(C), "+r"(D), "+r"(E), "+r"(F), "+q"(G), "=&r"(a), "=&r"(b) : "r"(p) - : "memory"); + : "memory", "r12", "r13", "r14", "r15"); #endif p[0] = A; p[1] = B; @@ -223,11 +236,12 @@ void secp384r1(uint64_t p[12]) { p[4] = E; p[5] = F; p[6] = G; - p[7] = 0; - p[8] = 0; - p[9] = 0; - p[10] = 0; - p[11] = 0; + G = CONCEAL("r", 0L); + p[7] = G; + p[8] = G; + p[9] = G; + p[10] = G; + p[11] = G; } int ecp_mod_p384(mbedtls_mpi *N) { @@ -249,3 +263,130 @@ int ecp_mod_p384(mbedtls_mpi *N) { } return 0; } + +/* +Instructions: 115 +Total Cycles: 46 +Total uOps: 116 +uOps Per Cycle: 2.52 +IPC: 2.50 +Block RThroughput: 31.0 + +SIMULATION 0123456789 0123456789 +Index 0123456789 0123456789 012345 +[0,0] DR . . . . . . . . . xorl %r10d, %r10d +[0,1] DeeeeeER . . . . . . . . movq (%rdi), %r9 +[0,2] DeeeeeER . . . . . . . . movq 8(%rdi), %r8 +[0,3] D=eeeeeER . . . . . . . . movq 16(%rdi), %rsi +[0,4] D=eeeeeER . . . . . . . . movq 24(%rdi), %rcx +[0,5] D==eeeeeER. . . . . . . . movq 32(%rdi), %rdx +[0,6] .D==eeeeeER . . . . . . . movq 40(%rdi), %rax +[0,7] .D=eeeeeE-R . . . . . . . movq 84(%rdi), %r11 +[0,8] .D==eeeeeER . . . . . . . movl 92(%rdi), %ebx +[0,9] .D======eER . . . . . . . movq %r11, %r12 +[0,10] .D=======eER . . . . . . . shrq $63, %r12 +[0,11] .D======eE-R . . . . . . . shlq %r11 +[0,12] . D======eER . . . . . . . shlq %rbx +[0,13] . D=======eER . . . . . . . orq %r12, %rbx +[0,14] . D==eeeeeE-R . . . . . . . movq 52(%rdi), %r12 +[0,15] . D======eE-R . . . . . . . addq %r11, %rsi +[0,16] . D==eeeeeE-R . . . . . . . movl 92(%rdi), %r11d +[0,17] . D========eER . . . . . . . adcq %rbx, %rcx +[0,18] . D==eeeeeE-R . . . . . . . movq 60(%rdi), %r13 +[0,19] . D========eER. . . . . . . adcq $0, %rdx +[0,20] . D==eeeeeE--R. . . . . . . movl 48(%rdi), %ebx +[0,21] . D=========eER . . . . . . adcq $0, %rax +[0,22] . D===eeeeeE--R . . . . . . movq 68(%rdi), %r14 +[0,23] . D==========eER . . . . . . adcq $0, %r10 +[0,24] . D==eeeeeE---R . . . . . . movq 76(%rdi), %r15 +[0,25] . D======eE---R . . . . . . shlq $32, %rbx +[0,26] . D=======eE--R . . . . . . orq %rbx, %r11 +[0,27] . D===eeeeeE--R . . . . . . movl 92(%rdi), %ebx +[0,28] . D========eE-R . . . . . . subq %r11, %r9 +[0,29] . D===eeeeeE--R . . . . . . movq 84(%rdi), %r11 +[0,30] . D========eER . . . . . . sbbq %r12, %r8 +[0,31] . D=========eER . . . . . . sbbq %r13, %rsi +[0,32] . D==========eER . . . . . . sbbq %r14, %rcx +[0,33] . D===========eER. . . . . . sbbq %r15, %rdx +[0,34] . D============eER . . . . . sbbq %r11, %rax +[0,35] . D===eeeeeE-----R . . . . . movl 48(%rdi), %r11d +[0,36] . .D============eER . . . . . sbbq $0, %r10 +[0,37] . .D========eE----R . . . . . shlq $32, %r11 +[0,38] . .D=========eE---R . . . . . orq %r11, %rbx +[0,39] . .D==eeeeeE------R . . . . . movl 92(%rdi), %r11d +[0,40] . .D======eeeeeeE-R . . . . . addq 84(%rdi), %r9 +[0,41] . . D===========eER . . . . . adcq %rbx, %r8 +[0,42] . . D==eeeeeE-----R . . . . . movl 80(%rdi), %ebx +[0,43] . . D============eER . . . . . adcq %r12, %rsi +[0,44] . . D==eeeeeE------R . . . . . movq 48(%rdi), %r12 +[0,45] . . D=============eER . . . . . adcq %r13, %rcx +[0,46] . . D===eeeeeE------R . . . . . movq 56(%rdi), %r13 +[0,47] . . D=============eER. . . . . adcq %r14, %rdx +[0,48] . . D==eeeeeE-------R. . . . . movq 64(%rdi), %r14 +[0,49] . . D==============eER . . . . adcq %r15, %rax +[0,50] . . D===eeeeeE-------R . . . . movq 72(%rdi), %r15 +[0,51] . . D===============eER . . . . adcq $0, %r10 +[0,52] . . D=======eE--------R . . . . shlq $32, %r11 +[0,53] . . D=======eE-------R . . . . shlq $32, %rbx +[0,54] . . D=========eE-----R . . . . addq %r11, %r9 +[0,55] . . D==========eE----R . . . . adcq %rbx, %r8 +[0,56] . . D===========eE---R . . . . adcq %r12, %rsi +[0,57] . . D============eE--R . . . . adcq %r13, %rcx +[0,58] . . D=============eE-R . . . . adcq %r14, %rdx +[0,59] . . D=============eER . . . . adcq %r15, %rax +[0,60] . . D==============eER . . . . adcq $0, %r10 +[0,61] . . D=========eE-----R . . . . addq %r12, %r9 +[0,62] . . D=eeeeeE---------R . . . . movq 80(%rdi), %r12 +[0,63] . . D==============eER . . . . adcq %r13, %r8 +[0,64] . . D==eeeeeE--------R . . . . movq 88(%rdi), %r13 +[0,65] . . .D==============eER . . . . adcq %r14, %rsi +[0,66] . . .D===============eER. . . . adcq %r15, %rcx +[0,67] . . .D================eER . . . adcq %r12, %rdx +[0,68] . . .D=================eER . . . adcq %r13, %rax +[0,69] . . .D==================eER . . . adcq $0, %r10 +[0,70] . . .D===============eE---R . . . addq %r12, %rsi +[0,71] . . . D===============eE--R . . . adcq %r13, %rcx +[0,72] . . . D================eE-R . . . adcq $0, %rdx +[0,73] . . . D=================eER . . . adcq $0, %rax +[0,74] . . . D==================eER . . . adcq $0, %r10 +[0,75] . . . D====eE--------------R . . . movl %r12d, %r11d +[0,76] . . . D====eE--------------R . . . movq %r12, %rbx +[0,77] . . . D====eE-------------R . . . shrq $32, %rbx +[0,78] . . . D============eE-----R . . . shlq $32, %rbx +[0,79] . . . D=======eE----------R . . . addq %r11, %r9 +[0,80] . . . D=============eE----R . . . adcq %rbx, %r8 +[0,81] . . . D=================eER . . . adcq %r13, %rsi +[0,82] . . . D==================eER. . . adcq $0, %rcx +[0,83] . . . D==================eER . . adcq $0, %rdx +[0,84] . . . D===================eER . . adcq $0, %rax +[0,85] . . . D====================eER . . adcq $0, %r10 +[0,86] . . . D===eE-----------------R . . movl %r12d, %r11d +[0,87] . . . DeeeeeE----------------R . . movq 84(%rdi), %r12 +[0,88] . . . D===eE-----------------R . . movq %r13, %rbx +[0,89] . . . D================eE---R . . shrq $32, %rbx +[0,90] . . . D=================eE--R . . shlq $32, %r11 +[0,91] . . . D==================eE-R . . subq %r11, %r9 +[0,92] . . . D===================eER . . sbbq %r12, %r8 +[0,93] . . . D====================eER . . sbbq %rbx, %rsi +[0,94] . . . D=====================eER. . sbbq $0, %rcx +[0,95] . . . .D=====================eER . sbbq $0, %rdx +[0,96] . . . .D======================eER . sbbq $0, %rax +[0,97] . . . .D=======================eER . sbbq $0, %r10 +[0,98] . . . .D==eE---------------------R . movq %r13, %r11 +[0,99] . . . .D=================eE------R . shrq $32, %r11 +[0,100] . . . .D==================eE-----R . movl %r11d, %ebx +[0,101] . . . . D==================eE----R . shlq $32, %r11 +[0,102] . . . . D===================eE---R . subq %r11, %r8 +[0,103] . . . . D====================eE--R . sbbq %rbx, %rsi +[0,104] . . . . D=====================eE-R . sbbq $0, %rcx +[0,105] . . . . D======================eER . sbbq $0, %rdx +[0,106] . . . . D=======================eER . sbbq $0, %rax +[0,107] . . . . D=======================eER. sbbq $0, %r10 +[0,108] . . . . D================eE-------R. movq %r9, (%rdi) +[0,109] . . . . D===================eE----R. movq %r8, 8(%rdi) +[0,110] . . . . D====================eE---R. movq %rsi, 16(%rdi) +[0,111] . . . . D=====================eE--R. movq %rcx, 24(%rdi) +[0,112] . . . . D======================eE-R. movq %rdx, 32(%rdi) +[0,113] . . . . D======================eER. movq %rax, 40(%rdi) +[0,114] . . . . D=======================eER movq %r10, 48(%rdi) +*/ diff --git a/third_party/mbedtls/ssl_ciphersuites.c b/third_party/mbedtls/ssl_ciphersuites.c index b465480f8..1329ec6e9 100644 --- a/third_party/mbedtls/ssl_ciphersuites.c +++ b/third_party/mbedtls/ssl_ciphersuites.c @@ -61,7 +61,6 @@ static const uint16_t ciphersuite_preference[] = MBEDTLS_TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256, MBEDTLS_TLS_DHE_RSA_WITH_AES_128_CCM, MBEDTLS_TLS_DHE_RSA_WITH_AES_256_CCM, - /* weakened perfect forward secrecy */ MBEDTLS_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, MBEDTLS_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384, MBEDTLS_TLS_DHE_RSA_WITH_AES_128_CBC_SHA256, diff --git a/third_party/mbedtls/test/everest_test.c b/third_party/mbedtls/test/everest_test.c new file mode 100644 index 000000000..e201fa88a --- /dev/null +++ b/third_party/mbedtls/test/everest_test.c @@ -0,0 +1,77 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/rand/rand.h" +#include "libc/stdio/stdio.h" +#include "libc/testlib/ezbench.h" +#include "libc/testlib/testlib.h" +#include "third_party/mbedtls/config.h" +#include "third_party/mbedtls/endian.h" + +void Hacl_Curve25519_crypto_scalarmult(uint8_t *, uint8_t *, uint8_t *); +void curve25519(uint8_t[32], uint8_t[32], uint8_t[32]); + +const uint64_t kNumbers[] = { + 0x0000000000000000, // + 0x0000000000000001, // + 0x0000000000001000, // + 0x0000000002000000, // + 0x0000004000000000, // + 0x0008000000000000, // + 0x8000000000000000, // + 0x0007ffffffffffff, // + 0x0000003fffffffff, // + 0x0000000001ffffff, // + 0x0000000000000fff, // + 0xffffffffffffffff, // + 0xfff8000000000000, // +}; + +TEST(everest, tinierVersionBehavesTheSame) { + size_t i; + uint8_t secret[32], bpoint[32], public[2][32]; + for (i = 0; i < 500; ++i) { + rngset(secret, sizeof(secret), rand64, -1); + rngset(bpoint, sizeof(bpoint), rand64, -1); + Hacl_Curve25519_crypto_scalarmult(public[0], secret, bpoint); + curve25519(public[1], secret, bpoint); + ASSERT_EQ(0, memcmp(public[0], public[1], sizeof(public[0]))); + } + for (i = 0; i < 500; ++i) { + Write64le(secret + 000, kNumbers[rand() % ARRAYLEN(kNumbers)]); + Write64le(secret + 010, kNumbers[rand() % ARRAYLEN(kNumbers)]); + Write64le(secret + 020, kNumbers[rand() % ARRAYLEN(kNumbers)]); + Write64le(secret + 030, kNumbers[rand() % ARRAYLEN(kNumbers)]); + Write64le(bpoint + 000, kNumbers[rand() % ARRAYLEN(kNumbers)]); + Write64le(bpoint + 010, kNumbers[rand() % ARRAYLEN(kNumbers)]); + Write64le(bpoint + 020, kNumbers[rand() % ARRAYLEN(kNumbers)]); + Write64le(bpoint + 030, kNumbers[rand() % ARRAYLEN(kNumbers)]); + Hacl_Curve25519_crypto_scalarmult(public[0], secret, bpoint); + curve25519(public[1], secret, bpoint); + ASSERT_EQ(0, memcmp(public[0], public[1], sizeof(public[0]))); + } +} + +BENCH(everest, bench) { + uint8_t secret[32], bpoint[32], public[32]; + rngset(secret, sizeof(secret), rand64, -1); + rngset(bpoint, sizeof(bpoint), rand64, -1); + EZBENCH2("everest", donothing, + Hacl_Curve25519_crypto_scalarmult(public, secret, bpoint)); + EZBENCH2("mariana", donothing, curve25519(public, secret, bpoint)); +} diff --git a/third_party/mbedtls/test/everest_unravaged.c b/third_party/mbedtls/test/everest_unravaged.c new file mode 100644 index 000000000..3ad6cb66f --- /dev/null +++ b/third_party/mbedtls/test/everest_unravaged.c @@ -0,0 +1,899 @@ +#include "libc/bits/bits.h" +#include "libc/limits.h" +#include "third_party/mbedtls/asn1.h" +#include "third_party/mbedtls/bignum.h" +#include "third_party/mbedtls/common.h" +#include "third_party/mbedtls/error.h" +#include "third_party/mbedtls/platform.h" + +asm(".ident\t\"\\n\\n\ +Everest (Apache 2.0)\\n\ +Copyright 2016-2018 INRIA and Microsoft Corporation\""); +asm(".include \"libc/disclaimer.inc\""); + +/* clang-format off */ +/* + * ECDH with curve-optimized implementation multiplexing + * + * Copyright 2016-2018 INRIA and Microsoft Corporation + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of mbed TLS (https://tls.mbed.org) + */ + +#ifdef memcpy +#undef memcpy +#endif +#define memcpy(x,y,z) __builtin_memcpy(x,y,z) + +#define load64_le(b) READ64LE(b) +#define store64_le(b, i) WRITE64LE(b, i) + +#define KRML_HOST_EXIT exit +#define KRML_HOST_PRINTF printf + +#define KRML_EXIT \ + do { \ + KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \ + KRML_HOST_EXIT(254); \ + } while (0) + +#define _KRML_CHECK_SIZE_PRAGMA \ + _Pragma("GCC diagnostic ignored \"-Wtype-limits\"") + +#define KRML_CHECK_SIZE(size_elt, sz) \ + do { \ + _KRML_CHECK_SIZE_PRAGMA \ + if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) { \ + KRML_HOST_PRINTF( \ + "Maximum allocatable size exceeded, aborting before overflow at " \ + "%s:%d\n", \ + __FILE__, __LINE__); \ + KRML_HOST_EXIT(253); \ + } \ + } while (0) + +typedef const char *Prims_string; + +typedef struct { + uint32_t length; + const char *data; +} FStar_Bytes_bytes; + +typedef int32_t Prims_pos, Prims_nat, Prims_nonzero, Prims_int, + krml_checked_int_t; + +/* Prims_nat not yet in scope */ +inline static int32_t krml_time() { + return (int32_t)time(NULL); +} + +static uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b) +{ + uint64_t x = a ^ b; + uint64_t minus_x = ~x + (uint64_t)1U; + uint64_t x_or_minus_x = x | minus_x; + uint64_t xnx = x_or_minus_x >> (uint32_t)63U; + return xnx - (uint64_t)1U; +} + +static uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b) +{ + uint64_t x = a; + uint64_t y = b; + uint64_t x_xor_y = x ^ y; + uint64_t x_sub_y = x - y; + uint64_t x_sub_y_xor_y = x_sub_y ^ y; + uint64_t q = x_xor_y | x_sub_y_xor_y; + uint64_t x_xor_q = x ^ q; + uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U; + return x_xor_q_ - (uint64_t)1U; +} + +static uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b) +{ + uint32_t x = a ^ b; + uint32_t minus_x = ~x + (uint32_t)1U; + uint32_t x_or_minus_x = x | minus_x; + uint32_t xnx = x_or_minus_x >> (uint32_t)31U; + return xnx - (uint32_t)1U; +} + +static uint32_t FStar_UInt32_gte_mask(uint32_t a, uint32_t b) +{ + uint32_t x = a; + uint32_t y = b; + uint32_t x_xor_y = x ^ y; + uint32_t x_sub_y = x - y; + uint32_t x_sub_y_xor_y = x_sub_y ^ y; + uint32_t q = x_xor_y | x_sub_y_xor_y; + uint32_t x_xor_q = x ^ q; + uint32_t x_xor_q_ = x_xor_q >> (uint32_t)31U; + return x_xor_q_ - (uint32_t)1U; +} + +static uint16_t FStar_UInt16_eq_mask(uint16_t a, uint16_t b) +{ + uint16_t x = a ^ b; + uint16_t minus_x = ~x + (uint16_t)1U; + uint16_t x_or_minus_x = x | minus_x; + uint16_t xnx = x_or_minus_x >> (uint32_t)15U; + return xnx - (uint16_t)1U; +} + +static uint16_t FStar_UInt16_gte_mask(uint16_t a, uint16_t b) +{ + uint16_t x = a; + uint16_t y = b; + uint16_t x_xor_y = x ^ y; + uint16_t x_sub_y = x - y; + uint16_t x_sub_y_xor_y = x_sub_y ^ y; + uint16_t q = x_xor_y | x_sub_y_xor_y; + uint16_t x_xor_q = x ^ q; + uint16_t x_xor_q_ = x_xor_q >> (uint32_t)15U; + return x_xor_q_ - (uint16_t)1U; +} + +static uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b) +{ + uint8_t x = a ^ b; + uint8_t minus_x = ~x + (uint8_t)1U; + uint8_t x_or_minus_x = x | minus_x; + uint8_t xnx = x_or_minus_x >> (uint32_t)7U; + return xnx - (uint8_t)1U; +} + +static uint8_t FStar_UInt8_gte_mask(uint8_t a, uint8_t b) +{ + uint8_t x = a; + uint8_t y = b; + uint8_t x_xor_y = x ^ y; + uint8_t x_sub_y = x - y; + uint8_t x_sub_y_xor_y = x_sub_y ^ y; + uint8_t q = x_xor_y | x_sub_y_xor_y; + uint8_t x_xor_q = x ^ q; + uint8_t x_xor_q_ = x_xor_q >> (uint32_t)7U; + return x_xor_q_ - (uint8_t)1U; +} + +static void Hacl_Bignum_Modulo_carry_top(uint64_t *b) +{ + uint64_t b4 = b[4U]; + uint64_t b0 = b[0U]; + uint64_t b4_ = b4 & (uint64_t)0x7ffffffffffffU; + uint64_t b0_ = b0 + (uint64_t)19U * (b4 >> (uint32_t)51U); + b[4U] = b4_; + b[0U] = b0_; +} + +inline static void Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, uint128_t *input) +{ + uint32_t i; + for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) + { + uint128_t xi = input[i]; + output[i] = (uint64_t)xi; + } +} + +inline static void +Hacl_Bignum_Fproduct_sum_scalar_multiplication_(uint128_t *output, uint64_t *input, uint64_t s) +{ + uint32_t i; + for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) + { + uint128_t xi = output[i]; + uint64_t yi = input[i]; + output[i] = xi + (uint128_t)yi * s; + } +} + +inline static void Hacl_Bignum_Fproduct_carry_wide_(uint128_t *tmp) +{ + uint32_t i; + for (i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) + { + uint32_t ctr = i; + uint128_t tctr = tmp[ctr]; + uint128_t tctrp1 = tmp[ctr + (uint32_t)1U]; + uint64_t r0 = (uint64_t)tctr & (uint64_t)0x7ffffffffffffU; + uint128_t c = tctr >> (uint32_t)51U; + tmp[ctr] = (uint128_t)r0; + tmp[ctr + (uint32_t)1U] = tctrp1 + c; + } +} + +inline static void Hacl_Bignum_Fmul_shift_reduce(uint64_t *output) +{ + uint64_t tmp = output[4U]; + uint64_t b0; + { + uint32_t i; + for (i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) + { + uint32_t ctr = (uint32_t)5U - i - (uint32_t)1U; + uint64_t z = output[ctr - (uint32_t)1U]; + output[ctr] = z; + } + } + output[0U] = tmp; + b0 = output[0U]; + output[0U] = (uint64_t)19U * b0; +} + +static void +Hacl_Bignum_Fmul_mul_shift_reduce_(uint128_t *output, uint64_t *input, uint64_t *input2) +{ + uint32_t i; + uint64_t input2i; + { + uint32_t i0; + for (i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0 = i0 + (uint32_t)1U) + { + uint64_t input2i0 = input2[i0]; + Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i0); + Hacl_Bignum_Fmul_shift_reduce(input); + } + } + i = (uint32_t)4U; + input2i = input2[i]; + Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); +} + +inline static void Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input2) +{ + uint64_t tmp[5U] = { 0U }; + memcpy(tmp, input, (uint32_t)5U * sizeof input[0U]); + KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U); + { + uint128_t t[5U]; + { + uint32_t _i; + for (_i = 0U; _i < (uint32_t)5U; ++_i) + t[_i] = (uint128_t)(uint64_t)0U; + } + { + uint128_t b4; + uint128_t b0; + uint128_t b4_; + uint128_t b0_; + uint64_t i0; + uint64_t i1; + uint64_t i0_; + uint64_t i1_; + Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2); + Hacl_Bignum_Fproduct_carry_wide_(t); + b4 = t[4U]; + b0 = t[0U]; + b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU; + b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U); + t[4U] = b4_; + t[0U] = b0_; + Hacl_Bignum_Fproduct_copy_from_wide_(output, t); + i0 = output[0U]; + i1 = output[1U]; + i0_ = i0 & (uint64_t)0x7ffffffffffffU; + i1_ = i1 + (i0 >> (uint32_t)51U); + output[0U] = i0_; + output[1U] = i1_; + } + } +} + +inline static void Hacl_Bignum_Fsquare_fsquare__(uint128_t *tmp, uint64_t *output) +{ + uint64_t r0 = output[0U]; + uint64_t r1 = output[1U]; + uint64_t r2 = output[2U]; + uint64_t r3 = output[3U]; + uint64_t r4 = output[4U]; + uint64_t d0 = r0 * (uint64_t)2U; + uint64_t d1 = r1 * (uint64_t)2U; + uint64_t d2 = r2 * (uint64_t)2U * (uint64_t)19U; + uint64_t d419 = r4 * (uint64_t)19U; + uint64_t d4 = d419 * (uint64_t)2U; + uint128_t s0 = (uint128_t)r0 * r0 + (uint128_t)d4 * r1 + (uint128_t)d2 * r3; + uint128_t s1 = (uint128_t)d0 * r1 + (uint128_t)d4 * r2 + (uint128_t)(r3 * (uint64_t)19U) * r3; + uint128_t s2 = (uint128_t)d0 * r2 + (uint128_t)r1 * r1 + (uint128_t)d4 * r3; + uint128_t s3 = (uint128_t)d0 * r3 + (uint128_t)d1 * r2 + (uint128_t)r4 * d419; + uint128_t s4 = (uint128_t)d0 * r4 + (uint128_t)d1 * r3 + (uint128_t)r2 * r2; + tmp[0U] = s0; + tmp[1U] = s1; + tmp[2U] = s2; + tmp[3U] = s3; + tmp[4U] = s4; +} + +inline static void Hacl_Bignum_Fsquare_fsquare_(uint128_t *tmp, uint64_t *output) +{ + uint128_t b4; + uint128_t b0; + uint128_t b4_; + uint128_t b0_; + uint64_t i0; + uint64_t i1; + uint64_t i0_; + uint64_t i1_; + Hacl_Bignum_Fsquare_fsquare__(tmp, output); + Hacl_Bignum_Fproduct_carry_wide_(tmp); + b4 = tmp[4U]; + b0 = tmp[0U]; + b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU; + b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U); + tmp[4U] = b4_; + tmp[0U] = b0_; + Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp); + i0 = output[0U]; + i1 = output[1U]; + i0_ = i0 & (uint64_t)0x7ffffffffffffU; + i1_ = i1 + (i0 >> (uint32_t)51U); + output[0U] = i0_; + output[1U] = i1_; +} + +static void +Hacl_Bignum_Fsquare_fsquare_times_(uint64_t *input, uint128_t *tmp, uint32_t count1) +{ + uint32_t i; + Hacl_Bignum_Fsquare_fsquare_(tmp, input); + for (i = (uint32_t)1U; i < count1; i = i + (uint32_t)1U) + Hacl_Bignum_Fsquare_fsquare_(tmp, input); +} + +inline static void +Hacl_Bignum_Fsquare_fsquare_times(uint64_t *output, uint64_t *input, uint32_t count1) +{ + KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U); + { + uint128_t t[5U]; + { + uint32_t _i; + for (_i = 0U; _i < (uint32_t)5U; ++_i) + t[_i] = (uint128_t)(uint64_t)0U; + } + memcpy(output, input, (uint32_t)5U * sizeof input[0U]); + Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1); + } +} + +inline static void Hacl_Bignum_Fsquare_fsquare_times_inplace(uint64_t *output, uint32_t count1) +{ + KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U); + { + uint128_t t[5U]; + { + uint32_t _i; + for (_i = 0U; _i < (uint32_t)5U; ++_i) + t[_i] = (uint128_t)(uint64_t)0U; + } + Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1); + } +} + +inline static void Hacl_Bignum_Crecip_crecip(uint64_t *out, uint64_t *z) +{ + uint64_t buf[20U] = { 0U }; + uint64_t *a0 = buf; + uint64_t *t00 = buf + (uint32_t)5U; + uint64_t *b0 = buf + (uint32_t)10U; + uint64_t *t01; + uint64_t *b1; + uint64_t *c0; + uint64_t *a; + uint64_t *t0; + uint64_t *b; + uint64_t *c; + Hacl_Bignum_Fsquare_fsquare_times(a0, z, (uint32_t)1U); + Hacl_Bignum_Fsquare_fsquare_times(t00, a0, (uint32_t)2U); + Hacl_Bignum_Fmul_fmul(b0, t00, z); + Hacl_Bignum_Fmul_fmul(a0, b0, a0); + Hacl_Bignum_Fsquare_fsquare_times(t00, a0, (uint32_t)1U); + Hacl_Bignum_Fmul_fmul(b0, t00, b0); + Hacl_Bignum_Fsquare_fsquare_times(t00, b0, (uint32_t)5U); + t01 = buf + (uint32_t)5U; + b1 = buf + (uint32_t)10U; + c0 = buf + (uint32_t)15U; + Hacl_Bignum_Fmul_fmul(b1, t01, b1); + Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)10U); + Hacl_Bignum_Fmul_fmul(c0, t01, b1); + Hacl_Bignum_Fsquare_fsquare_times(t01, c0, (uint32_t)20U); + Hacl_Bignum_Fmul_fmul(t01, t01, c0); + Hacl_Bignum_Fsquare_fsquare_times_inplace(t01, (uint32_t)10U); + Hacl_Bignum_Fmul_fmul(b1, t01, b1); + Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)50U); + a = buf; + t0 = buf + (uint32_t)5U; + b = buf + (uint32_t)10U; + c = buf + (uint32_t)15U; + Hacl_Bignum_Fmul_fmul(c, t0, b); + Hacl_Bignum_Fsquare_fsquare_times(t0, c, (uint32_t)100U); + Hacl_Bignum_Fmul_fmul(t0, t0, c); + Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)50U); + Hacl_Bignum_Fmul_fmul(t0, t0, b); + Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)5U); + Hacl_Bignum_Fmul_fmul(out, t0, a); +} + +inline static void Hacl_Bignum_fsum(uint64_t *a, uint64_t *b) +{ + uint32_t i; + for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) + { + uint64_t xi = a[i]; + uint64_t yi = b[i]; + a[i] = xi + yi; + } +} + +inline static void Hacl_Bignum_fdifference(uint64_t *a, uint64_t *b) +{ + uint64_t tmp[5U] = { 0U }; + uint64_t b0; + uint64_t b1; + uint64_t b2; + uint64_t b3; + uint64_t b4; + memcpy(tmp, b, (uint32_t)5U * sizeof b[0U]); + b0 = tmp[0U]; + b1 = tmp[1U]; + b2 = tmp[2U]; + b3 = tmp[3U]; + b4 = tmp[4U]; + tmp[0U] = b0 + (uint64_t)0x3fffffffffff68U; + tmp[1U] = b1 + (uint64_t)0x3ffffffffffff8U; + tmp[2U] = b2 + (uint64_t)0x3ffffffffffff8U; + tmp[3U] = b3 + (uint64_t)0x3ffffffffffff8U; + tmp[4U] = b4 + (uint64_t)0x3ffffffffffff8U; + { + uint32_t i; + for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) + { + uint64_t xi = a[i]; + uint64_t yi = tmp[i]; + a[i] = yi - xi; + } + } +} + +inline static void Hacl_Bignum_fscalar(uint64_t *output, uint64_t *b, uint64_t s) +{ + KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U); + { + uint128_t tmp[5U]; + { + uint32_t _i; + for (_i = 0U; _i < (uint32_t)5U; ++_i) + tmp[_i] = (uint128_t)(uint64_t)0U; + } + { + uint128_t b4; + uint128_t b0; + uint128_t b4_; + uint128_t b0_; + { + uint32_t i; + for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) + { + uint64_t xi = b[i]; + tmp[i] = (uint128_t)xi * s; + } + } + Hacl_Bignum_Fproduct_carry_wide_(tmp); + b4 = tmp[4U]; + b0 = tmp[0U]; + b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU; + b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U); + tmp[4U] = b4_; + tmp[0U] = b0_; + Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp); + } + } +} + +inline static void Hacl_Bignum_fmul(uint64_t *output, uint64_t *a, uint64_t *b) +{ + Hacl_Bignum_Fmul_fmul(output, a, b); +} + +inline static void Hacl_Bignum_crecip(uint64_t *output, uint64_t *input) +{ + Hacl_Bignum_Crecip_crecip(output, input); +} + +static void +Hacl_EC_Point_swap_conditional_step(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr) +{ + uint32_t i = ctr - (uint32_t)1U; + uint64_t ai = a[i]; + uint64_t bi = b[i]; + uint64_t x = swap1 & (ai ^ bi); + uint64_t ai1 = ai ^ x; + uint64_t bi1 = bi ^ x; + a[i] = ai1; + b[i] = bi1; +} + +static void +Hacl_EC_Point_swap_conditional_(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr) +{ + if (!(ctr == (uint32_t)0U)) + { + uint32_t i; + Hacl_EC_Point_swap_conditional_step(a, b, swap1, ctr); + i = ctr - (uint32_t)1U; + Hacl_EC_Point_swap_conditional_(a, b, swap1, i); + } +} + +static void Hacl_EC_Point_swap_conditional(uint64_t *a, uint64_t *b, uint64_t iswap) +{ + uint64_t swap1 = (uint64_t)0U - iswap; + Hacl_EC_Point_swap_conditional_(a, b, swap1, (uint32_t)5U); + Hacl_EC_Point_swap_conditional_(a + (uint32_t)5U, b + (uint32_t)5U, swap1, (uint32_t)5U); +} + +static void Hacl_EC_Point_copy(uint64_t *output, uint64_t *input) +{ + memcpy(output, input, (uint32_t)5U * sizeof input[0U]); + memcpy(output + (uint32_t)5U, + input + (uint32_t)5U, + (uint32_t)5U * sizeof (input + (uint32_t)5U)[0U]); +} + +static void Hacl_EC_Format_fexpand(uint64_t *output, uint8_t *input) +{ + uint64_t i0 = load64_le(input); + uint8_t *x00 = input + (uint32_t)6U; + uint64_t i1 = load64_le(x00); + uint8_t *x01 = input + (uint32_t)12U; + uint64_t i2 = load64_le(x01); + uint8_t *x02 = input + (uint32_t)19U; + uint64_t i3 = load64_le(x02); + uint8_t *x0 = input + (uint32_t)24U; + uint64_t i4 = load64_le(x0); + uint64_t output0 = i0 & (uint64_t)0x7ffffffffffffU; + uint64_t output1 = i1 >> (uint32_t)3U & (uint64_t)0x7ffffffffffffU; + uint64_t output2 = i2 >> (uint32_t)6U & (uint64_t)0x7ffffffffffffU; + uint64_t output3 = i3 >> (uint32_t)1U & (uint64_t)0x7ffffffffffffU; + uint64_t output4 = i4 >> (uint32_t)12U & (uint64_t)0x7ffffffffffffU; + output[0U] = output0; + output[1U] = output1; + output[2U] = output2; + output[3U] = output3; + output[4U] = output4; +} + +static void Hacl_EC_Format_fcontract_first_carry_pass(uint64_t *input) +{ + uint64_t t0 = input[0U]; + uint64_t t1 = input[1U]; + uint64_t t2 = input[2U]; + uint64_t t3 = input[3U]; + uint64_t t4 = input[4U]; + uint64_t t1_ = t1 + (t0 >> (uint32_t)51U); + uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU; + uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U); + uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU; + uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U); + uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU; + uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U); + uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU; + input[0U] = t0_; + input[1U] = t1__; + input[2U] = t2__; + input[3U] = t3__; + input[4U] = t4_; +} + +static void Hacl_EC_Format_fcontract_first_carry_full(uint64_t *input) +{ + Hacl_EC_Format_fcontract_first_carry_pass(input); + Hacl_Bignum_Modulo_carry_top(input); +} + +static void Hacl_EC_Format_fcontract_second_carry_pass(uint64_t *input) +{ + uint64_t t0 = input[0U]; + uint64_t t1 = input[1U]; + uint64_t t2 = input[2U]; + uint64_t t3 = input[3U]; + uint64_t t4 = input[4U]; + uint64_t t1_ = t1 + (t0 >> (uint32_t)51U); + uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU; + uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U); + uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU; + uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U); + uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU; + uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U); + uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU; + input[0U] = t0_; + input[1U] = t1__; + input[2U] = t2__; + input[3U] = t3__; + input[4U] = t4_; +} + +static void Hacl_EC_Format_fcontract_second_carry_full(uint64_t *input) +{ + uint64_t i0; + uint64_t i1; + uint64_t i0_; + uint64_t i1_; + Hacl_EC_Format_fcontract_second_carry_pass(input); + Hacl_Bignum_Modulo_carry_top(input); + i0 = input[0U]; + i1 = input[1U]; + i0_ = i0 & (uint64_t)0x7ffffffffffffU; + i1_ = i1 + (i0 >> (uint32_t)51U); + input[0U] = i0_; + input[1U] = i1_; +} + +static void Hacl_EC_Format_fcontract_trim(uint64_t *input) +{ + uint64_t a0 = input[0U]; + uint64_t a1 = input[1U]; + uint64_t a2 = input[2U]; + uint64_t a3 = input[3U]; + uint64_t a4 = input[4U]; + uint64_t mask0 = FStar_UInt64_gte_mask(a0, (uint64_t)0x7ffffffffffedU); + uint64_t mask1 = FStar_UInt64_eq_mask(a1, (uint64_t)0x7ffffffffffffU); + uint64_t mask2 = FStar_UInt64_eq_mask(a2, (uint64_t)0x7ffffffffffffU); + uint64_t mask3 = FStar_UInt64_eq_mask(a3, (uint64_t)0x7ffffffffffffU); + uint64_t mask4 = FStar_UInt64_eq_mask(a4, (uint64_t)0x7ffffffffffffU); + uint64_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4; + uint64_t a0_ = a0 - ((uint64_t)0x7ffffffffffedU & mask); + uint64_t a1_ = a1 - ((uint64_t)0x7ffffffffffffU & mask); + uint64_t a2_ = a2 - ((uint64_t)0x7ffffffffffffU & mask); + uint64_t a3_ = a3 - ((uint64_t)0x7ffffffffffffU & mask); + uint64_t a4_ = a4 - ((uint64_t)0x7ffffffffffffU & mask); + input[0U] = a0_; + input[1U] = a1_; + input[2U] = a2_; + input[3U] = a3_; + input[4U] = a4_; +} + +static void Hacl_EC_Format_fcontract_store(uint8_t *output, uint64_t *input) +{ + uint64_t t0 = input[0U]; + uint64_t t1 = input[1U]; + uint64_t t2 = input[2U]; + uint64_t t3 = input[3U]; + uint64_t t4 = input[4U]; + uint64_t o0 = t1 << (uint32_t)51U | t0; + uint64_t o1 = t2 << (uint32_t)38U | t1 >> (uint32_t)13U; + uint64_t o2 = t3 << (uint32_t)25U | t2 >> (uint32_t)26U; + uint64_t o3 = t4 << (uint32_t)12U | t3 >> (uint32_t)39U; + uint8_t *b0 = output; + uint8_t *b1 = output + (uint32_t)8U; + uint8_t *b2 = output + (uint32_t)16U; + uint8_t *b3 = output + (uint32_t)24U; + store64_le(b0, o0); + store64_le(b1, o1); + store64_le(b2, o2); + store64_le(b3, o3); +} + +static void Hacl_EC_Format_fcontract(uint8_t *output, uint64_t *input) +{ + Hacl_EC_Format_fcontract_first_carry_full(input); + Hacl_EC_Format_fcontract_second_carry_full(input); + Hacl_EC_Format_fcontract_trim(input); + Hacl_EC_Format_fcontract_store(output, input); +} + +static void Hacl_EC_Format_scalar_of_point(uint8_t *scalar, uint64_t *point) +{ + uint64_t *x = point; + uint64_t *z = point + (uint32_t)5U; + uint64_t buf[10U] = { 0U }; + uint64_t *zmone = buf; + uint64_t *sc = buf + (uint32_t)5U; + Hacl_Bignum_crecip(zmone, z); + Hacl_Bignum_fmul(sc, x, zmone); + Hacl_EC_Format_fcontract(scalar, sc); +} + +static void +Hacl_EC_AddAndDouble_fmonty( + uint64_t *pp, + uint64_t *ppq, + uint64_t *p, + uint64_t *pq, + uint64_t *qmqp +) +{ + uint64_t *qx = qmqp; + uint64_t *x2 = pp; + uint64_t *z2 = pp + (uint32_t)5U; + uint64_t *x3 = ppq; + uint64_t *z3 = ppq + (uint32_t)5U; + uint64_t *x = p; + uint64_t *z = p + (uint32_t)5U; + uint64_t *xprime = pq; + uint64_t *zprime = pq + (uint32_t)5U; + uint64_t buf[40U] = { 0U }; + uint64_t *origx = buf; + uint64_t *origxprime0 = buf + (uint32_t)5U; + uint64_t *xxprime0 = buf + (uint32_t)25U; + uint64_t *zzprime0 = buf + (uint32_t)30U; + uint64_t *origxprime; + uint64_t *xx0; + uint64_t *zz0; + uint64_t *xxprime; + uint64_t *zzprime; + uint64_t *zzzprime; + uint64_t *zzz; + uint64_t *xx; + uint64_t *zz; + uint64_t scalar; + memcpy(origx, x, (uint32_t)5U * sizeof x[0U]); + Hacl_Bignum_fsum(x, z); + Hacl_Bignum_fdifference(z, origx); + memcpy(origxprime0, xprime, (uint32_t)5U * sizeof xprime[0U]); + Hacl_Bignum_fsum(xprime, zprime); + Hacl_Bignum_fdifference(zprime, origxprime0); + Hacl_Bignum_fmul(xxprime0, xprime, z); + Hacl_Bignum_fmul(zzprime0, x, zprime); + origxprime = buf + (uint32_t)5U; + xx0 = buf + (uint32_t)15U; + zz0 = buf + (uint32_t)20U; + xxprime = buf + (uint32_t)25U; + zzprime = buf + (uint32_t)30U; + zzzprime = buf + (uint32_t)35U; + memcpy(origxprime, xxprime, (uint32_t)5U * sizeof xxprime[0U]); + Hacl_Bignum_fsum(xxprime, zzprime); + Hacl_Bignum_fdifference(zzprime, origxprime); + Hacl_Bignum_Fsquare_fsquare_times(x3, xxprime, (uint32_t)1U); + Hacl_Bignum_Fsquare_fsquare_times(zzzprime, zzprime, (uint32_t)1U); + Hacl_Bignum_fmul(z3, zzzprime, qx); + Hacl_Bignum_Fsquare_fsquare_times(xx0, x, (uint32_t)1U); + Hacl_Bignum_Fsquare_fsquare_times(zz0, z, (uint32_t)1U); + zzz = buf + (uint32_t)10U; + xx = buf + (uint32_t)15U; + zz = buf + (uint32_t)20U; + Hacl_Bignum_fmul(x2, xx, zz); + Hacl_Bignum_fdifference(zz, xx); + scalar = (uint64_t)121665U; + Hacl_Bignum_fscalar(zzz, zz, scalar); + Hacl_Bignum_fsum(zzz, xx); + Hacl_Bignum_fmul(z2, zzz, zz); +} + +static void +Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step( + uint64_t *nq, + uint64_t *nqpq, + uint64_t *nq2, + uint64_t *nqpq2, + uint64_t *q, + uint8_t byt +) +{ + uint64_t bit0 = (uint64_t)(byt >> (uint32_t)7U); + uint64_t bit; + Hacl_EC_Point_swap_conditional(nq, nqpq, bit0); + Hacl_EC_AddAndDouble_fmonty(nq2, nqpq2, nq, nqpq, q); + bit = (uint64_t)(byt >> (uint32_t)7U); + Hacl_EC_Point_swap_conditional(nq2, nqpq2, bit); +} + +static void +Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step( + uint64_t *nq, + uint64_t *nqpq, + uint64_t *nq2, + uint64_t *nqpq2, + uint64_t *q, + uint8_t byt +) +{ + uint8_t byt1; + Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt); + byt1 = byt << (uint32_t)1U; + Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1); +} + +static void +Hacl_EC_Ladder_SmallLoop_cmult_small_loop( + uint64_t *nq, + uint64_t *nqpq, + uint64_t *nq2, + uint64_t *nqpq2, + uint64_t *q, + uint8_t byt, + uint32_t i +) +{ + if (!(i == (uint32_t)0U)) + { + uint32_t i_ = i - (uint32_t)1U; + uint8_t byt_; + Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(nq, nqpq, nq2, nqpq2, q, byt); + byt_ = byt << (uint32_t)2U; + Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byt_, i_); + } +} + +static void +Hacl_EC_Ladder_BigLoop_cmult_big_loop( + uint8_t *n1, + uint64_t *nq, + uint64_t *nqpq, + uint64_t *nq2, + uint64_t *nqpq2, + uint64_t *q, + uint32_t i +) +{ + if (!(i == (uint32_t)0U)) + { + uint32_t i1 = i - (uint32_t)1U; + uint8_t byte = n1[i1]; + Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byte, (uint32_t)4U); + Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, i1); + } +} + +static void Hacl_EC_Ladder_cmult(uint64_t *result, uint8_t *n1, uint64_t *q) +{ + uint64_t point_buf[40U] = { 0U }; + uint64_t *nq = point_buf; + uint64_t *nqpq = point_buf + (uint32_t)10U; + uint64_t *nq2 = point_buf + (uint32_t)20U; + uint64_t *nqpq2 = point_buf + (uint32_t)30U; + Hacl_EC_Point_copy(nqpq, q); + nq[0U] = (uint64_t)1U; + Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, (uint32_t)32U); + Hacl_EC_Point_copy(result, nq); +} + +void Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint) +{ + uint64_t buf0[10U] = { 0U }; + uint64_t *x0 = buf0; + uint64_t *z = buf0 + (uint32_t)5U; + uint64_t *q; + Hacl_EC_Format_fexpand(x0, basepoint); + z[0U] = (uint64_t)1U; + q = buf0; + { + uint8_t e[32U] = { 0U }; + uint8_t e0; + uint8_t e31; + uint8_t e01; + uint8_t e311; + uint8_t e312; + uint8_t *scalar; + memcpy(e, secret, (uint32_t)32U * sizeof secret[0U]); + e0 = e[0U]; + e31 = e[31U]; + e01 = e0 & (uint8_t)248U; + e311 = e31 & (uint8_t)127U; + e312 = e311 | (uint8_t)64U; + e[0U] = e01; + e[31U] = e312; + scalar = e; + { + uint64_t buf[15U] = { 0U }; + uint64_t *nq = buf; + uint64_t *x = nq; + x[0U] = (uint64_t)1U; + Hacl_EC_Ladder_cmult(nq, scalar, q); + Hacl_EC_Format_scalar_of_point(mypublic, nq); + } + } +} diff --git a/third_party/mbedtls/test/secp384r1_test.c b/third_party/mbedtls/test/secp384r1_test.c new file mode 100644 index 000000000..68de75ce7 --- /dev/null +++ b/third_party/mbedtls/test/secp384r1_test.c @@ -0,0 +1,294 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/rand/rand.h" +#include "libc/stdio/stdio.h" +#include "libc/str/str.h" +#include "libc/testlib/ezbench.h" +#include "libc/testlib/testlib.h" +#include "third_party/mbedtls/bignum.h" +#include "third_party/mbedtls/ecp.h" +#include "third_party/mbedtls/ecp_internal.h" +#include "third_party/mbedtls/math.h" +#ifdef MBEDTLS_ECP_C + +int ecp_mod_p384_old(mbedtls_mpi *); + +int GetEntropy(void *c, unsigned char *p, size_t n) { + rngset(p, n, rand64, -1); + return 0; +} + +TEST(secp384r1, testIsTheSame) { + int i; + mbedtls_mpi A, B; + mbedtls_mpi_init(&A); + mbedtls_mpi_init(&B); + mbedtls_mpi_fill_random(&A, 12 * 8, GetEntropy, 0); + mbedtls_mpi_copy(&B, &A); + ecp_mod_p384(&A); + ecp_mod_p384_old(&B); + for (i = 0; i < 1000; ++i) { + if (memcmp(A.p, B.p, 12 * 8)) { + for (i = 0; i < 12; ++i) { + printf("0x%016lx vs. 0x%016lx %d\n", A.p[i], B.p[i], A.p[i] == B.p[i]); + } + exit(1); + } + } + mbedtls_mpi_free(&B); + mbedtls_mpi_free(&A); +} + +static inline bool mbedtls_p384_gte(uint64_t p[7]) { + return (((int64_t)p[6] > 0 || + (p[5] > 0xffffffffffffffff || + (p[5] == 0xffffffffffffffff && + (p[4] > 0xffffffffffffffff || + (p[4] == 0xffffffffffffffff && + (p[3] > 0xffffffffffffffff || + (p[3] == 0xffffffffffffffff && + (p[2] > 0xfffffffffffffffe || + (p[2] == 0xfffffffffffffffe && + (p[1] > 0xffffffff00000000 || + (p[1] == 0xffffffff00000000 && + (p[0] > 0x00000000ffffffff || + (p[0] == 0x00000000ffffffff)))))))))))))); +} + +static inline void mbedtls_p384_gro(uint64_t p[7]) { +#if defined(__x86_64__) && !defined(__STRICT_ANSI__) + asm("addq\t%1,%0\n\t" + "adcq\t%2,8+%0\n\t" + "adcq\t%3,16+%0\n\t" + "adcq\t%4,24+%0\n\t" + "adcq\t%4,32+%0\n\t" + "adcq\t%4,40+%0\n\t" + "adcq\t$0,48+%0" + : "+o"(*p) + : "r"(0x00000000ffffffffl), "r"(0xffffffff00000000), + "i"(0xfffffffffffffffel), "i"(0xffffffffffffffff) + : "memory", "cc"); +#else + uint64_t c; + ADC(p[0], p[0], 0x00000000ffffffff, 0, c); + ADC(p[1], p[1], 0xffffffff00000000, c, c); + ADC(p[2], p[2], 0xfffffffffffffffe, c, c); + ADC(p[3], p[3], 0xffffffffffffffff, c, c); + ADC(p[4], p[4], 0xffffffffffffffff, c, c); + ADC(p[5], p[5], 0xffffffffffffffff, c, c); + ADC(p[6], p[6], 0, c, c); +#endif +} + +static inline void mbedtls_p384_red(uint64_t p[7]) { +#if defined(__x86_64__) && !defined(__STRICT_ANSI__) + asm("subq\t%1,%0\n\t" + "sbbq\t%2,8+%0\n\t" + "sbbq\t%3,16+%0\n\t" + "sbbq\t%4,24+%0\n\t" + "sbbq\t%4,32+%0\n\t" + "sbbq\t%4,40+%0\n\t" + "sbbq\t$0,48+%0" + : "+o"(*p) + : "r"(0x00000000ffffffffl), "r"(0xffffffff00000000), + "i"(0xfffffffffffffffel), "i"(0xffffffffffffffff) + : "memory", "cc"); +#else + uint64_t c; + SBB(p[0], p[0], 0x00000000ffffffff, 0, c); + SBB(p[1], p[1], 0xffffffff00000000, c, c); + SBB(p[2], p[2], 0xfffffffffffffffe, c, c); + SBB(p[3], p[3], 0xffffffffffffffff, c, c); + SBB(p[4], p[4], 0xffffffffffffffff, c, c); + SBB(p[5], p[5], 0xffffffffffffffff, c, c); + SBB(p[6], p[6], 0, c, c); +#endif +} + +static inline void mbedtls_p384_rum(uint64_t p[7]) { + while (mbedtls_p384_gte(p)) mbedtls_p384_red(p); +} + +static inline void mbedtls_p384_mod(uint64_t X[12]) { + secp384r1(X); + if ((int64_t)X[6] < 0) { + do { + mbedtls_p384_gro(X); + } while ((int64_t)X[6] < 0); + } else { + while (mbedtls_p384_gte(X)) { + mbedtls_p384_red(X); + } + } +} + +TEST(secp384r1, needsDownwardCorrection) { + int i; + uint64_t P[6] = { + 0x00000000ffffffff, // + 0xffffffff00000000, // + 0xfffffffffffffffe, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + }; + uint64_t X[12] = { + 0xffffffffffffffff, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + }; + uint64_t W[12] /* == X mod P */ = { + 0xfffffffe00000000, // + 0x0000000200000000, // + 0xfffffffe00000000, // + 0x0000000200000000, // + 0x0000000000000001, // + }; + mbedtls_p384_mod(X); + if (memcmp(W, X, 12 * 8)) { + for (i = 0; i < 12; ++i) { + printf("0x%016lx vs. 0x%016lx %d\n", W[i], X[i], W[i] == X[i]); + } + exit(1); + } +} + +TEST(secp384r1, needsUpwardCorrection) { + int i; + uint64_t P[6] = { + 0x00000000ffffffff, // + 0xffffffff00000000, // + 0xfffffffffffffffe, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + 0xffffffffffffffff, // + }; + uint64_t X[12] = { + 0x0000000000000000, // + 0x0000000000000000, // + 0x0000000000000000, // + 0x0000000000000000, // + 0x0000000000000000, // + 0x0000000000000000, // + 0x0000000000000000, // + 0x0000000000000000, // + 0x0000000000000000, // + 0x0000000000000000, // + 0x0000000000000000, // + 0x00000000ffffffff, // + }; + uint64_t W[12] /* == X mod P */ = { + 0xffffffffffffffff, // + 0x0000000000000000, // + 0xfffffffefffffffd, // + 0x0000000100000000, // + 0x0000000000000000, // + 0x00000001ffffffff, // + }; + mbedtls_p384_mod(X); + if (memcmp(W, X, 12 * 8)) { + for (i = 0; i < 12; ++i) { + printf("0x%016lx vs. 0x%016lx %d\n", W[i], X[i], W[i] == X[i]); + } + exit(1); + } +} + +BENCH(secp384r1, bench) { + mbedtls_mpi A; + mbedtls_mpi_init(&A); + mbedtls_mpi_fill_random(&A, 12 * 8, GetEntropy, 0); + EZBENCH2("secp384r1", donothing, secp384r1(A.p)); + EZBENCH2("ecp_mod_p384", donothing, ecp_mod_p384(&A)); + EZBENCH2("ecp_mod_p384_old", donothing, ecp_mod_p384_old(&A)); + mbedtls_mpi_free(&A); +} + +void mbedtls_p384_shl_a(uint64_t p[7]) { + asm("shlq\t%0\n\t" + "rclq\t8+%0\n\t" + "rclq\t16+%0\n\t" + "rclq\t24+%0\n\t" + "rclq\t32+%0\n\t" + "rclq\t40+%0\n\t" + "rclq\t48+%0\n\t" + : "+o"(*p) + : /* no inputs */ + : "memory", "cc"); + mbedtls_p384_rum(p); +} + +void mbedtls_p384_shl_b(uint64_t p[7]) { + p[6] = p[5] >> 63; + p[5] = p[5] << 1 | p[4] >> 63; + p[4] = p[4] << 1 | p[3] >> 63; + p[3] = p[3] << 1 | p[2] >> 63; + p[2] = p[2] << 1 | p[1] >> 63; + p[1] = p[1] << 1 | p[0] >> 63; + p[0] = p[0] << 1; + mbedtls_p384_rum(p); +} + +BENCH(shl, bench) { + uint64_t A[7] = {0}; + EZBENCH2("mbedtls_p384_shl_a", donothing, mbedtls_p384_shl_a(A)); + EZBENCH2("mbedtls_p384_shl_b", donothing, mbedtls_p384_shl_b(A)); +} + +void mbedtls_p384_red_a(uint64_t p[7]) { + asm("subq\t%1,%0\n\t" + "sbbq\t%2,8+%0\n\t" + "sbbq\t%3,16+%0\n\t" + "sbbq\t%4,24+%0\n\t" + "sbbq\t%4,32+%0\n\t" + "sbbq\t%4,40+%0\n\t" + "sbbq\t$0,48+%0" + : "+o"(*p) + : "r"(0x00000000ffffffffl), "r"(0xffffffff00000000), + "i"(0xfffffffffffffffel), "i"(0xffffffffffffffff) + : "memory", "cc"); +} + +void mbedtls_p384_red_b(uint64_t p[7]) { + uint64_t c; + SBB(p[0], p[0], 0x00000000ffffffff, 0, c); + SBB(p[1], p[1], 0xffffffff00000000, c, c); + SBB(p[2], p[2], 0xfffffffffffffffe, c, c); + SBB(p[3], p[3], 0xffffffffffffffff, c, c); + SBB(p[4], p[4], 0xffffffffffffffff, c, c); + SBB(p[5], p[5], 0xffffffffffffffff, c, c); + SBB(p[6], p[6], 0, c, c); +} + +BENCH(red, bench) { + uint64_t A[7] = {0}; + EZBENCH2("mbedtls_p384_red_a", donothing, mbedtls_p384_red_a(A)); + EZBENCH2("mbedtls_p384_red_b", donothing, mbedtls_p384_red_b(A)); +} + +#endif /* MBEDTLS_ECP_C */ diff --git a/third_party/mbedtls/test/test.mk b/third_party/mbedtls/test/test.mk index 51b64adc3..4e492e171 100644 --- a/third_party/mbedtls/test/test.mk +++ b/third_party/mbedtls/test/test.mk @@ -78,7 +78,9 @@ THIRD_PARTY_MBEDTLS_TEST_COMS = \ o/$(MODE)/third_party/mbedtls/test/test_suite_timing.com \ o/$(MODE)/third_party/mbedtls/test/test_suite_version.com \ o/$(MODE)/third_party/mbedtls/test/test_suite_x509parse.com \ - o/$(MODE)/third_party/mbedtls/test/test_suite_x509write.com + o/$(MODE)/third_party/mbedtls/test/test_suite_x509write.com \ + o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com \ + o/$(MODE)/third_party/mbedtls/test/everest_test.com THIRD_PARTY_MBEDTLS_TEST_TESTS = \ $(THIRD_PARTY_MBEDTLS_TEST_COMS:%=%.ok) @@ -1340,3 +1342,22 @@ o/$(MODE)/third_party/mbedtls/test/test_suite_x509write.com.dbg: \ $(CRT) \ $(APE) @$(APELINK) + +o/$(MODE)/third_party/mbedtls/test/everest_test.com: o/$(MODE)/third_party/mbedtls/test/everest_test.com.dbg +o/$(MODE)/third_party/mbedtls/test/everest_test.com.dbg: \ + $(THIRD_PARTY_MBEDTLS_TEST_DEPS) \ + o/$(MODE)/third_party/mbedtls/test/everest_test.o \ + o/$(MODE)/third_party/mbedtls/test/everest_unravaged.o \ + $(LIBC_TESTMAIN) \ + $(CRT) \ + $(APE) + @$(APELINK) + +o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com: o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com.dbg +o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com.dbg: \ + $(THIRD_PARTY_MBEDTLS_TEST_DEPS) \ + o/$(MODE)/third_party/mbedtls/test/secp384r1_test.o \ + $(LIBC_TESTMAIN) \ + $(CRT) \ + $(APE) + @$(APELINK)