diff --git a/libc/nexgen32e/adc.S b/libc/nexgen32e/adc.S
deleted file mode 100644
index d58f7089b..000000000
--- a/libc/nexgen32e/adc.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8     -*-│
-│vi: set et ft=asm ts=8 tw=8 fenc=utf-8                                     :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2021 Justine Alexandra Roberts Tunney                              │
-│                                                                              │
-│ Permission to use, copy, modify, and/or distribute this software for         │
-│ any purpose with or without fee is hereby granted, provided that the         │
-│ above copyright notice and this permission notice appear in all copies.      │
-│                                                                              │
-│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
-│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
-│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
-│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
-│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
-│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
-│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
-│ PERFORMANCE OF THIS SOFTWARE.                                                │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/macros.internal.h"
-
-//	Computes C = A + B
-//
-//	@param	rdi is C
-//	@param	rsi is A
-//	@param	rdx is B
-//	@param	rcx is number of additions
-//	@return	al has carry
-adc:	.leafprologue
-	test	%ecx,%ecx
-	jz	1f
-	xor	%r9d,%r9d
-0:	mov	(%rsi,%r9,8),%rax
-	adc	(%rdx,%r9,8),%rax
-	mov	%rax,(%rdi,%r9,8)
-	inc	%r9d
-	loop	0b
-1:	setb	%al
-	.leafepilogue
-	.endfn	adc,globl
diff --git a/libc/nexgen32e/mul4x4adx.S b/libc/nexgen32e/mul4x4adx.S
index 268d91668..86a02797f 100644
--- a/libc/nexgen32e/mul4x4adx.S
+++ b/libc/nexgen32e/mul4x4adx.S
@@ -18,34 +18,47 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
 
+//	Computes 512-bit product of 256-bit and 256-bit numbers.
+//
+//		Instructions:        88
+//		Total Cycles:        36
+//		Total uOps:         120
+//		uOps Per Cycle:    3.33
+//		IPC:               2.44
+//		Block RThroughput: 20.0
+//
+//	@param	rdi receives 8 quadword result
+//	@param	rsi is left hand side which must have 4 quadwords
+//	@param	rdx is right hand side which must have 4 quadwords
+//	@note	words are host endian while array is little endian
+//	@mayalias
 Mul4x4Adx:
 	push	%rbp
 	mov	%rsp,%rbp
 	.profilable
-	push	%r15
-	push	%r14
-	push	%r13
-	push	%r12
+	sub	$56,%rsp
+	mov	%r15,-8(%rbp)
+	mov	%r14,-16(%rbp)
+	mov	%r13,-24(%rbp)
+	mov	%r12,-32(%rbp)
+	mov	%rbx,-40(%rbp)
 	mov	%rdx,%r12
-	push	%rbx
-	sub	$16,%rsp
 	mov	(%rdx),%rdx
 	mov	(%rsi),%rax
 	mov	16(%rsi),%r11
 	mov	24(%rsi),%r10
-	xor	%r13d,%r13d
 	mulx	%rax,%rbx,%rax
 	mov	%rbx,-48(%rbp)
 	mov	8(%rsi),%rbx
 	mulx	%rbx,%rdx,%rcx
-	adox	%rdx,%rax
+	add	%rdx,%rax
 	mov	(%r12),%rdx
 	mulx	%r11,%rdx,%r9
-	adox	%rdx,%rcx
+	adc	%rdx,%rcx
 	mov	(%r12),%rdx
 	mulx	%r10,%rdx,%r8
-	adox	%rdx,%r9
-	adox	%r13,%r8
+	adc	%rdx,%r9
+	adc	$0,%r8
 	xor	%r13d,%r13d
 	mov	(%rsi),%r14
 	mov	8(%r12),%rdx
@@ -105,12 +118,103 @@ Mul4x4Adx:
 	adox	%r14,%r10
 	mov	%rsi,(%rdi)
 	mov	%r10,56(%rdi)
-	add	$16,%rsp
-	pop	%rbx
-	pop	%r12
-	pop	%r13
-	pop	%r14
-	pop	%r15
-	pop	%rbp
+	mov	-8(%rbp),%r15
+	mov	-16(%rbp),%r14
+	mov	-24(%rbp),%r13
+	mov	-32(%rbp),%r12
+	mov	-40(%rbp),%rbx
+	leave
 	ret
 	.endfn	Mul4x4Adx,globl
+
+	.end
+TIMELINE VIEW       0123456789          012345
+Index     0123456789          0123456789
+[0,0]     DeER .    .    .    .    .    .    .   subq	$56, %rsp
+[0,1]     DeER .    .    .    .    .    .    .   movq	%r15, -8(%rbp)
+[0,2]     D=eER.    .    .    .    .    .    .   movq	%r14, -16(%rbp)
+[0,3]     D==eER    .    .    .    .    .    .   movq	%r13, -24(%rbp)
+[0,4]     D===eER   .    .    .    .    .    .   movq	%r12, -32(%rbp)
+[0,5]     D====eER  .    .    .    .    .    .   movq	%rbx, -40(%rbp)
+[0,6]     .DeE---R  .    .    .    .    .    .   movq	%rdx, %r12
+[0,7]     .DeeeeeER .    .    .    .    .    .   movq	(%rdx), %rdx
+[0,8]     .D=eeeeeER.    .    .    .    .    .   movq	(%rsi), %rax
+[0,9]     .D=eeeeeER.    .    .    .    .    .   movq	16(%rsi), %r11
+[0,10]    .D==eeeeeER    .    .    .    .    .   movq	24(%rsi), %r10
+[0,11]    . D=====eeeeER .    .    .    .    .   mulxq	%rax, %rbx, %rax
+[0,12]    . D========eER .    .    .    .    .   movq	%rbx, -48(%rbp)
+[0,13]    . D=eeeeeE---R .    .    .    .    .   movq	8(%rsi), %rbx
+[0,14]    .  D=====eeeeER.    .    .    .    .   mulxq	%rbx, %rdx, %rcx
+[0,15]    .  D========eER.    .    .    .    .   addq	%rdx, %rax
+[0,16]    .  D=eeeeeE---R.    .    .    .    .   movq	(%r12), %rdx
+[0,17]    .   D=====eeeeER    .    .    .    .   mulxq	%r11, %rdx, %r9
+[0,18]    .   D========eER    .    .    .    .   adcq	%rdx, %rcx
+[0,19]    .   DeeeeeE----R    .    .    .    .   movq	(%r12), %rdx
+[0,20]    .    D=====eeeeER   .    .    .    .   mulxq	%r10, %rdx, %r8
+[0,21]    .    D========eER   .    .    .    .   adcq	%rdx, %r9
+[0,22]    .    D=========eER  .    .    .    .   adcq	$0, %r8
+[0,23]    .    D-----------R  .    .    .    .   xorl	%r13d, %r13d
+[0,24]    .    .DeeeeeE----R  .    .    .    .   movq	(%rsi), %r14
+[0,25]    .    .DeeeeeE----R  .    .    .    .   movq	8(%r12), %rdx
+[0,26]    .    .D=====eeeeER  .    .    .    .   mulxq	%r14, %r14, %r15
+[0,27]    .    .D========eER  .    .    .    .   adoxq	%r14, %rax
+[0,28]    .    . D========eER .    .    .    .   adcxq	%r15, %rcx
+[0,29]    .    . D========eER .    .    .    .   movq	%rax, -56(%rbp)
+[0,30]    .    . D=====eeeeER .    .    .    .   mulxq	%rbx, %r14, %rax
+[0,31]    .    . D=========eER.    .    .    .   adoxq	%r14, %rcx
+[0,32]    .    .  D=========eER    .    .    .   adcxq	%rax, %r9
+[0,33]    .    .  D=====eeeeE-R    .    .    .   mulxq	%r11, %r14, %rax
+[0,34]    .    .  D==========eER   .    .    .   adoxq	%r14, %r9
+[0,35]    .    .  D===========eER  .    .    .   adcxq	%rax, %r8
+[0,36]    .    .   D=====eeeeE--R  .    .    .   mulxq	%r10, %rdx, %rax
+[0,37]    .    .   D===========eER .    .    .   adoxq	%rdx, %r8
+[0,38]    .    .   DeeeeeE-------R .    .    .   movq	16(%r12), %rdx
+[0,39]    .    .   D============eER.    .    .   adcxq	%r13, %rax
+[0,40]    .    .    D============eER    .    .   adoxq	%r13, %rax
+[0,41]    .    .    DeeeeeE--------R    .    .   movq	(%rsi), %r13
+[0,42]    .    .    D=====E--------R    .    .   xorl	%r15d, %r15d
+[0,43]    .    .    D=====eeeeE----R    .    .   mulxq	%r13, %r13, %r14
+[0,44]    .    .    .D=======eE----R    .    .   adoxq	%r13, %rcx
+[0,45]    .    .    .D========eE---R    .    .   adcxq	%r14, %r9
+[0,46]    .    .    .D=====eeeeE---R    .    .   mulxq	%rbx, %r14, %r13
+[0,47]    .    .    .D=========eE--R    .    .   adoxq	%r14, %r9
+[0,48]    .    .    . D=========eE-R    .    .   adcxq	%r13, %r8
+[0,49]    .    .    . D=====eeeeE--R    .    .   mulxq	%r11, %r14, %r13
+[0,50]    .    .    . D==========eER    .    .   adoxq	%r14, %r8
+[0,51]    .    .    . D===========eER   .    .   adcxq	%r13, %rax
+[0,52]    .    .    .  DeeeeeE------R   .    .   movq	(%rsi), %rsi
+[0,53]    .    .    .  D=====eeeeE--R   .    .   mulxq	%r10, %rdx, %r13
+[0,54]    .    .    .  D===========eER  .    .   adoxq	%rdx, %rax
+[0,55]    .    .    .  D============eER .    .   adcxq	%r15, %r13
+[0,56]    .    .    .   DeeeeeE-------R .    .   movq	24(%r12), %rdx
+[0,57]    .    .    .   D============eER.    .   adoxq	%r15, %r13
+[0,58]    .    .    .   D=====eeeeE----R.    .   mulxq	%rsi, %r12, %rsi
+[0,59]    .    .    .   D======E-------R.    .   xorl	%r14d, %r14d
+[0,60]    .    .    .    D========eE---R.    .   adoxq	%r12, %r9
+[0,61]    .    .    .    D=========eE--R.    .   adcxq	%rsi, %r8
+[0,62]    .    .    .    D=====eeeeE---R.    .   mulxq	%rbx, %rsi, %rbx
+[0,63]    .    .    .    D==========eE-R.    .   adoxq	%rsi, %r8
+[0,64]    .    .    .    .D==========eER.    .   adcxq	%rbx, %rax
+[0,65]    .    .    .    .D=====eeeeE--R.    .   mulxq	%r11, %r11, %rsi
+[0,66]    .    .    .    .DeeeeeE------R.    .   movq	-56(%rbp), %rbx
+[0,67]    .    .    .    .D===eE-------R.    .   movq	%rcx, 16(%rdi)
+[0,68]    .    .    .    . D==========eER    .   adcxq	%rsi, %r13
+[0,69]    .    .    .    . DeeeeeE------R    .   movq	-48(%rbp), %rsi
+[0,70]    .    .    .    . D====eE------R    .   movq	%rbx, 8(%rdi)
+[0,71]    .    .    .    . D===========eER   .   adoxq	%r11, %rax
+[0,72]    .    .    .    . D=======eE----R   .   movq	%r9, 24(%rdi)
+[0,73]    .    .    .    . D=========eE--R   .   movq	%r8, 32(%rdi)
+[0,74]    .    .    .    .  D===========eER  .   movq	%rax, 40(%rdi)
+[0,75]    .    .    .    .  D====eeeeE----R  .   mulxq	%r10, %rdx, %r10
+[0,76]    .    .    .    .  D===========eER  .   adoxq	%rdx, %r13
+[0,77]    .    .    .    .  D============eER .   adcxq	%r14, %r10
+[0,78]    .    .    .    .   D===========eER .   movq	%r13, 48(%rdi)
+[0,79]    .    .    .    .   D============eER.   adoxq	%r14, %r10
+[0,80]    .    .    .    .   D============eER.   movq	%rsi, (%rdi)
+[0,81]    .    .    .    .   D=============eER   movq	%r10, 56(%rdi)
+[0,82]    .    .    .    .   DeeeeeE---------R   movq	-8(%rbp), %r15
+[0,83]    .    .    .    .   DeeeeeE---------R   movq	-16(%rbp), %r14
+[0,84]    .    .    .    .    DeeeeeE--------R   movq	-24(%rbp), %r13
+[0,85]    .    .    .    .    DeeeeeE--------R   movq	-32(%rbp), %r12
+[0,86]    .    .    .    .    D=eeeeeE-------R   movq	-40(%rbp), %rbx
+[0,87]    .    .    .    .    D===eE---------R   addq	$56, %rsp
diff --git a/libc/nexgen32e/mul6x6adx.S b/libc/nexgen32e/mul6x6adx.S
index b90906014..313658bec 100644
--- a/libc/nexgen32e/mul6x6adx.S
+++ b/libc/nexgen32e/mul6x6adx.S
@@ -18,37 +18,50 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
 
+//	Computes 768-bit product of 384-bit and 384-bit numbers.
+//
+//		Instructions:       153
+//		Total Cycles:        73
+//		Total uOps:         261
+//		uOps Per Cycle:    3.58
+//		IPC:               2.10
+//		Block RThroughput: 43.5
+//
+//	@param	rdi receives 8 quadword result
+//	@param	rsi is left hand side which must have 4 quadwords
+//	@param	rdx is right hand side which must have 4 quadwords
+//	@note	words are host endian while array is little endian
+//	@mayalias
 Mul6x6Adx:
 	push	%rbp
 	mov	%rsp,%rbp
 	.profilable
-	push	%r15
-	push	%r14
-	push	%r13
-	push	%r12
-	push	%rbx
+	sub	$64,%rsp
+	mov	%r15,-8(%rbp)
+	mov	%r14,-16(%rbp)
+	mov	%r13,-24(%rbp)
+	mov	%r12,-32(%rbp)
+	mov	%rbx,-40(%rbp)
 	mov	%rdx,%rbx
-	sub	$24,%rsp
 	mov	(%rdx),%rdx
-	xor	%r8d,%r8d
 	mulx	(%rsi),%rcx,%rax
 	mulx	8(%rsi),%rdx,%r12
 	mov	%rcx,-48(%rbp)
-	adox	%rdx,%rax
+	add	%rdx,%rax
 	mov	(%rbx),%rdx
 	mulx	16(%rsi),%rdx,%r15
-	adox	%rdx,%r12
+	adc	%rdx,%r12
 	mov	(%rbx),%rdx
 	mulx	24(%rsi),%rdx,%r10
-	adox	%rdx,%r15
+	adc	%rdx,%r15
 	mov	(%rbx),%rdx
 	mulx	32(%rsi),%rdx,%r9
-	adox	%rdx,%r10
+	adc	%rdx,%r10
 	mov	(%rbx),%rdx
 	mulx	40(%rsi),%rdx,%rcx
-	adox	%rdx,%r9
+	adc	%rdx,%r9
 	mov	8(%rbx),%rdx
-	adox	%r8,%rcx
+	adc	$0,%rcx
 	mulx	(%rsi),%r13,%r11
 	xor	%r8d,%r8d
 	adox	%r13,%rax
@@ -171,12 +184,167 @@ Mul6x6Adx:
 	mov	%r8,64(%rdi)
 	mov	%r11,72(%rdi)
 	mov	%rdx,88(%rdi)
-	add	$24,%rsp
-	pop	%rbx
-	pop	%r12
-	pop	%r13
-	pop	%r14
-	pop	%r15
-	pop	%rbp
+	mov	-8(%rbp),%r15
+	mov	-16(%rbp),%r14
+	mov	-24(%rbp),%r13
+	mov	-32(%rbp),%r12
+	mov	-40(%rbp),%rbx
+	leave
 	ret
 	.endfn	Mul6x6Adx,globl
+
+	.end
+SIMULATION          0123456789          0123456789          0123456789          012
+Index     0123456789          0123456789          0123456789          0123456789
+[0,0]     DeER .    .    .    .    .    .    .    .    .    .    .    .    .    . .   movq	%r15, -8(%rbp)
+[0,1]     D=eER.    .    .    .    .    .    .    .    .    .    .    .    .    . .   movq	%r14, -16(%rbp)
+[0,2]     D==eER    .    .    .    .    .    .    .    .    .    .    .    .    . .   movq	%r13, -24(%rbp)
+[0,3]     D===eER   .    .    .    .    .    .    .    .    .    .    .    .    . .   movq	%r12, -32(%rbp)
+[0,4]     D====eER  .    .    .    .    .    .    .    .    .    .    .    .    . .   movq	%rbx, -40(%rbp)
+[0,5]     DeE----R  .    .    .    .    .    .    .    .    .    .    .    .    . .   movq	%rdx, %rbx
+[0,6]     .DeeeeeER .    .    .    .    .    .    .    .    .    .    .    .    . .   movq	(%rdx), %rdx
+[0,7]     .D=====eeeeeeeeeER  .    .    .    .    .    .    .    .    .    .    . .   mulxq	(%rsi), %rcx, %rax
+[0,8]     . D=====eeeeeeeeeER .    .    .    .    .    .    .    .    .    .    . .   mulxq	8(%rsi), %rdx, %r12
+[0,9]     . D=======eE------R .    .    .    .    .    .    .    .    .    .    . .   movq	%rcx, -48(%rbp)
+[0,10]    . D=============eER .    .    .    .    .    .    .    .    .    .    . .   addq	%rdx, %rax
+[0,11]    .  DeeeeeE--------R .    .    .    .    .    .    .    .    .    .    . .   movq	(%rbx), %rdx
+[0,12]    .  D=====eeeeeeeeeER.    .    .    .    .    .    .    .    .    .    . .   mulxq	16(%rsi), %rdx, %r15
+[0,13]    .  D=============eER.    .    .    .    .    .    .    .    .    .    . .   adcq	%rdx, %r12
+[0,14]    .   DeeeeeE--------R.    .    .    .    .    .    .    .    .    .    . .   movq	(%rbx), %rdx
+[0,15]    .   D=====eeeeeeeeeER    .    .    .    .    .    .    .    .    .    . .   mulxq	24(%rsi), %rdx, %r10
+[0,16]    .   D=============eER    .    .    .    .    .    .    .    .    .    . .   adcq	%rdx, %r15
+[0,17]    .    DeeeeeE--------R    .    .    .    .    .    .    .    .    .    . .   movq	(%rbx), %rdx
+[0,18]    .    D=====eeeeeeeeeER   .    .    .    .    .    .    .    .    .    . .   mulxq	32(%rsi), %rdx, %r9
+[0,19]    .    D=============eER   .    .    .    .    .    .    .    .    .    . .   adcq	%rdx, %r10
+[0,20]    .    .DeeeeeE--------R   .    .    .    .    .    .    .    .    .    . .   movq	(%rbx), %rdx
+[0,21]    .    .D=====eeeeeeeeeER  .    .    .    .    .    .    .    .    .    . .   mulxq	40(%rsi), %rdx, %rcx
+[0,22]    .    .D=============eER  .    .    .    .    .    .    .    .    .    . .   adcq	%rdx, %r9
+[0,23]    .    . DeeeeeE--------R  .    .    .    .    .    .    .    .    .    . .   movq	8(%rbx), %rdx
+[0,24]    .    . D=============eER .    .    .    .    .    .    .    .    .    . .   adcq	$0, %rcx
+[0,25]    .    . D=====eeeeeeeeeER .    .    .    .    .    .    .    .    .    . .   mulxq	(%rsi), %r13, %r11
+[0,26]    .    .  D--------------R .    .    .    .    .    .    .    .    .    . .   xorl	%r8d, %r8d
+[0,27]    .    .  D========eE----R .    .    .    .    .    .    .    .    .    . .   adoxq	%r13, %rax
+[0,28]    .    .  D=============eER.    .    .    .    .    .    .    .    .    . .   adcxq	%r11, %r12
+[0,29]    .    .  D=========eE----R.    .    .    .    .    .    .    .    .    . .   movq	%rax, -56(%rbp)
+[0,30]    .    .   D====eeeeeeeeeER.    .    .    .    .    .    .    .    .    . .   mulxq	8(%rsi), %r11, %rax
+[0,31]    .    .   D=============eER    .    .    .    .    .    .    .    .    . .   adoxq	%r11, %r12
+[0,32]    .    .   D==============eER   .    .    .    .    .    .    .    .    . .   adcxq	%rax, %r15
+[0,33]    .    .    D=============eER   .    .    .    .    .    .    .    .    . .   movq	%r12, %r14
+[0,34]    .    .    D====eeeeeeeeeE-R   .    .    .    .    .    .    .    .    . .   mulxq	16(%rsi), %r11, %rax
+[0,35]    .    .    D==============eER  .    .    .    .    .    .    .    .    . .   adoxq	%r11, %r15
+[0,36]    .    .    .D==============eER .    .    .    .    .    .    .    .    . .   adcxq	%rax, %r10
+[0,37]    .    .    .D====eeeeeeeeeE--R .    .    .    .    .    .    .    .    . .   mulxq	24(%rsi), %r11, %rax
+[0,38]    .    .    .D===============eER.    .    .    .    .    .    .    .    . .   adoxq	%r11, %r10
+[0,39]    .    .    . D===============eER    .    .    .    .    .    .    .    . .   adcxq	%rax, %r9
+[0,40]    .    .    . D====eeeeeeeeeE---R    .    .    .    .    .    .    .    . .   mulxq	32(%rsi), %r11, %rax
+[0,41]    .    .    . D================eER   .    .    .    .    .    .    .    . .   adoxq	%r11, %r9
+[0,42]    .    .    .  D================eER  .    .    .    .    .    .    .    . .   adcxq	%rax, %rcx
+[0,43]    .    .    .  D====eeeeeeeeeE----R  .    .    .    .    .    .    .    . .   mulxq	40(%rsi), %rdx, %rax
+[0,44]    .    .    .  D=================eER .    .    .    .    .    .    .    . .   adoxq	%rdx, %rcx
+[0,45]    .    .    .   D=================eER.    .    .    .    .    .    .    . .   adcxq	%r8, %rax
+[0,46]    .    .    .   DeeeeeE-------------R.    .    .    .    .    .    .    . .   movq	16(%rbx), %rdx
+[0,47]    .    .    .   D==================eER    .    .    .    .    .    .    . .   adoxq	%r8, %rax
+[0,48]    .    .    .    D====eeeeeeeeeE-----R    .    .    .    .    .    .    . .   mulxq	(%rsi), %r13, %r8
+[0,49]    .    .    .    D====E--------------R    .    .    .    .    .    .    . .   xorl	%r11d, %r11d
+[0,50]    .    .    .    D=========eE--------R    .    .    .    .    .    .    . .   adoxq	%r13, %r14
+[0,51]    .    .    .    .D=========eE-------R    .    .    .    .    .    .    . .   movq	%r14, -64(%rbp)
+[0,52]    .    .    .    .D============eE----R    .    .    .    .    .    .    . .   adcxq	%r8, %r15
+[0,53]    .    .    .    .D====eeeeeeeeeE----R    .    .    .    .    .    .    . .   mulxq	8(%rsi), %r12, %r8
+[0,54]    .    .    .    . D============eE---R    .    .    .    .    .    .    . .   adoxq	%r12, %r15
+[0,55]    .    .    .    . D=============eE--R    .    .    .    .    .    .    . .   adcxq	%r8, %r10
+[0,56]    .    .    .    . D====eeeeeeeeeE---R    .    .    .    .    .    .    . .   mulxq	16(%rsi), %r12, %r8
+[0,57]    .    .    .    .  D=============eE-R    .    .    .    .    .    .    . .   adoxq	%r12, %r10
+[0,58]    .    .    .    .  D==============eER    .    .    .    .    .    .    . .   adcxq	%r8, %r9
+[0,59]    .    .    .    .  D====eeeeeeeeeE--R    .    .    .    .    .    .    . .   mulxq	24(%rsi), %r12, %r8
+[0,60]    .    .    .    .   D==============eER   .    .    .    .    .    .    . .   adoxq	%r12, %r9
+[0,61]    .    .    .    .   D===============eER  .    .    .    .    .    .    . .   adcxq	%r8, %rcx
+[0,62]    .    .    .    .   D====eeeeeeeeeE---R  .    .    .    .    .    .    . .   mulxq	32(%rsi), %r12, %r8
+[0,63]    .    .    .    .    D===============eER .    .    .    .    .    .    . .   adoxq	%r12, %rcx
+[0,64]    .    .    .    .    D================eER.    .    .    .    .    .    . .   adcxq	%r8, %rax
+[0,65]    .    .    .    .    D====eeeeeeeeeE----R.    .    .    .    .    .    . .   mulxq	40(%rsi), %rdx, %r8
+[0,66]    .    .    .    .    .D================eER    .    .    .    .    .    . .   adoxq	%rdx, %rax
+[0,67]    .    .    .    .    .D=================eER   .    .    .    .    .    . .   adcxq	%r11, %r8
+[0,68]    .    .    .    .    .DeeeeeE-------------R   .    .    .    .    .    . .   movq	24(%rbx), %rdx
+[0,69]    .    .    .    .    .D==================eER  .    .    .    .    .    . .   adoxq	%r11, %r8
+[0,70]    .    .    .    .    . D====eeeeeeeeeE-----R  .    .    .    .    .    . .   mulxq	(%rsi), %r13, %r11
+[0,71]    .    .    .    .    . D====E--------------R  .    .    .    .    .    . .   xorl	%r12d, %r12d
+[0,72]    .    .    .    .    . D===========eE------R  .    .    .    .    .    . .   adoxq	%r13, %r15
+[0,73]    .    .    .    .    .  D============eE----R  .    .    .    .    .    . .   adcxq	%r11, %r10
+[0,74]    .    .    .    .    .  D====eeeeeeeeeE----R  .    .    .    .    .    . .   mulxq	8(%rsi), %r13, %r11
+[0,75]    .    .    .    .    .  D=============eE---R  .    .    .    .    .    . .   adoxq	%r13, %r10
+[0,76]    .    .    .    .    .   D=============eE--R  .    .    .    .    .    . .   adcxq	%r11, %r9
+[0,77]    .    .    .    .    .   D====eeeeeeeeeE---R  .    .    .    .    .    . .   mulxq	16(%rsi), %r13, %r11
+[0,78]    .    .    .    .    .   D==============eE-R  .    .    .    .    .    . .   adoxq	%r13, %r9
+[0,79]    .    .    .    .    .    D==============eER  .    .    .    .    .    . .   adcxq	%r11, %rcx
+[0,80]    .    .    .    .    .    D====eeeeeeeeeE--R  .    .    .    .    .    . .   mulxq	24(%rsi), %r13, %r11
+[0,81]    .    .    .    .    .    D===============eER .    .    .    .    .    . .   adoxq	%r13, %rcx
+[0,82]    .    .    .    .    .    .D===============eER.    .    .    .    .    . .   adcxq	%r11, %rax
+[0,83]    .    .    .    .    .    .D====eeeeeeeeeE---R.    .    .    .    .    . .   mulxq	32(%rsi), %r13, %r11
+[0,84]    .    .    .    .    .    .D================eER    .    .    .    .    . .   adoxq	%r13, %rax
+[0,85]    .    .    .    .    .    . D================eER   .    .    .    .    . .   adcxq	%r11, %r8
+[0,86]    .    .    .    .    .    . D====eeeeeeeeeE----R   .    .    .    .    . .   mulxq	40(%rsi), %rdx, %r11
+[0,87]    .    .    .    .    .    . D=================eER  .    .    .    .    . .   adoxq	%rdx, %r8
+[0,88]    .    .    .    .    .    .  DeeeeeE------------R  .    .    .    .    . .   movq	32(%rbx), %rdx
+[0,89]    .    .    .    .    .    .  D=================eER .    .    .    .    . .   adcxq	%r12, %r11
+[0,90]    .    .    .    .    .    .  D=====eeeeeeeeeE----R .    .    .    .    . .   mulxq	(%rsi), %r14, %r13
+[0,91]    .    .    .    .    .    .   D=================eER.    .    .    .    . .   adoxq	%r12, %r11
+[0,92]    .    .    .    .    .    .   D-------------------R.    .    .    .    . .   xorl	%r12d, %r12d
+[0,93]    .    .    .    .    .    .   D===========eE------R.    .    .    .    . .   adoxq	%r14, %r10
+[0,94]    .    .    .    .    .    .   D=============eE----R.    .    .    .    . .   adcxq	%r13, %r9
+[0,95]    .    .    .    .    .    .    D====eeeeeeeeeE----R.    .    .    .    . .   mulxq	8(%rsi), %r14, %r13
+[0,96]    .    .    .    .    .    .    D=============eE---R.    .    .    .    . .   adoxq	%r14, %r9
+[0,97]    .    .    .    .    .    .    D==============eE--R.    .    .    .    . .   adcxq	%r13, %rcx
+[0,98]    .    .    .    .    .    .    .D====eeeeeeeeeE---R.    .    .    .    . .   mulxq	16(%rsi), %r14, %r13
+[0,99]    .    .    .    .    .    .    .D==============eE-R.    .    .    .    . .   adoxq	%r14, %rcx
+[0,100]   .    .    .    .    .    .    .D===============eER.    .    .    .    . .   adcxq	%r13, %rax
+[0,101]   .    .    .    .    .    .    . D====eeeeeeeeeE--R.    .    .    .    . .   mulxq	24(%rsi), %r14, %r13
+[0,102]   .    .    .    .    .    .    . D===============eER    .    .    .    . .   adoxq	%r14, %rax
+[0,103]   .    .    .    .    .    .    . D================eER   .    .    .    . .   adcxq	%r13, %r8
+[0,104]   .    .    .    .    .    .    .  D====eeeeeeeeeE---R   .    .    .    . .   mulxq	32(%rsi), %r14, %r13
+[0,105]   .    .    .    .    .    .    .  D================eER  .    .    .    . .   adoxq	%r14, %r8
+[0,106]   .    .    .    .    .    .    .  D=================eER .    .    .    . .   adcxq	%r13, %r11
+[0,107]   .    .    .    .    .    .    .   D====eeeeeeeeeE----R .    .    .    . .   mulxq	40(%rsi), %rdx, %r13
+[0,108]   .    .    .    .    .    .    .   D=================eER.    .    .    . .   adoxq	%rdx, %r11
+[0,109]   .    .    .    .    .    .    .   D==================eER    .    .    . .   adcxq	%r12, %r13
+[0,110]   .    .    .    .    .    .    .    DeeeeeE-------------R    .    .    . .   movq	40(%rbx), %rdx
+[0,111]   .    .    .    .    .    .    .    D==================eER   .    .    . .   adoxq	%r12, %r13
+[0,112]   .    .    .    .    .    .    .    D=====eeeeeeeeeE-----R   .    .    . .   mulxq	(%rsi), %r14, %rbx
+[0,113]   .    .    .    .    .    .    .    .D-------------------R   .    .    . .   xorl	%r12d, %r12d
+[0,114]   .    .    .    .    .    .    .    .D===========eE------R   .    .    . .   adoxq	%r14, %r9
+[0,115]   .    .    .    .    .    .    .    .D=============eE----R   .    .    . .   adcxq	%rbx, %rcx
+[0,116]   .    .    .    .    .    .    .    . D====eeeeeeeeeE----R   .    .    . .   mulxq	8(%rsi), %r14, %rbx
+[0,117]   .    .    .    .    .    .    .    . D=============eE---R   .    .    . .   adoxq	%r14, %rcx
+[0,118]   .    .    .    .    .    .    .    . D==============eE--R   .    .    . .   adcxq	%rbx, %rax
+[0,119]   .    .    .    .    .    .    .    .  D====eeeeeeeeeE---R   .    .    . .   mulxq	16(%rsi), %r14, %rbx
+[0,120]   .    .    .    .    .    .    .    .  D==============eE-R   .    .    . .   adoxq	%r14, %rax
+[0,121]   .    .    .    .    .    .    .    .  D===============eER   .    .    . .   adcxq	%rbx, %r8
+[0,122]   .    .    .    .    .    .    .    .   D====eeeeeeeeeE--R   .    .    . .   mulxq	24(%rsi), %r14, %rbx
+[0,123]   .    .    .    .    .    .    .    .   D===============eER  .    .    . .   adoxq	%r14, %r8
+[0,124]   .    .    .    .    .    .    .    .   D================eER .    .    . .   adcxq	%rbx, %r11
+[0,125]   .    .    .    .    .    .    .    .    D====eeeeeeeeeE---R .    .    . .   mulxq	32(%rsi), %r14, %rbx
+[0,126]   .    .    .    .    .    .    .    .    .D====eeeeeeeeeE--R .    .    . .   mulxq	40(%rsi), %rsi, %rdx
+[0,127]   .    .    .    .    .    .    .    .    .D===============eER.    .    . .   adoxq	%r14, %r11
+[0,128]   .    .    .    .    .    .    .    .    .D================eER    .    . .   adcxq	%rbx, %r13
+[0,129]   .    .    .    .    .    .    .    .    . D================eER   .    . .   adoxq	%rsi, %r13
+[0,130]   .    .    .    .    .    .    .    .    . D=================eER  .    . .   adcxq	%r12, %rdx
+[0,131]   .    .    .    .    .    .    .    .    . D==================eER .    . .   adoxq	%r12, %rdx
+[0,132]   .    .    .    .    .    .    .    .    . DeeeeeE--------------R .    . .   movq	-48(%rbp), %rsi
+[0,133]   .    .    .    .    .    .    .    .    . D=eeeeeE-------------R .    . .   movq	-56(%rbp), %rbx
+[0,134]   .    .    .    .    .    .    .    .    . D===eE---------------R .    . .   movq	%r15, 24(%rdi)
+[0,135]   .    .    .    .    .    .    .    .    .  D=eeeeeE------------R .    . .   movq	-64(%rbp), %r14
+[0,136]   .    .    .    .    .    .    .    .    .  D================eE-R .    . .   movq	%r13, 80(%rdi)
+[0,137]   .    .    .    .    .    .    .    .    .  D=================eER .    . .   movq	%rbx, 8(%rdi)
+[0,138]   .    .    .    .    .    .    .    .    .  D==================eER.    . .   movq	%r14, 16(%rdi)
+[0,139]   .    .    .    .    .    .    .    .    .  D===================eER    . .   movq	%rsi, (%rdi)
+[0,140]   .    .    .    .    .    .    .    .    .  D====================eER   . .   movq	%r10, 32(%rdi)
+[0,141]   .    .    .    .    .    .    .    .    .   D====================eER  . .   movq	%r9, 40(%rdi)
+[0,142]   .    .    .    .    .    .    .    .    .   D=====================eER . .   movq	%rcx, 48(%rdi)
+[0,143]   .    .    .    .    .    .    .    .    .   D======================eER. .   movq	%rax, 56(%rdi)
+[0,144]   .    .    .    .    .    .    .    .    .   D=======================eER .   movq	%r8, 64(%rdi)
+[0,145]   .    .    .    .    .    .    .    .    .   D========================eER.   movq	%r11, 72(%rdi)
+[0,146]   .    .    .    .    .    .    .    .    .   D=========================eER   movq	%rdx, 88(%rdi)
+[0,147]   .    .    .    .    .    .    .    .    .    DeeeeeE--------------------R   movq	-8(%rbp), %r15
+[0,148]   .    .    .    .    .    .    .    .    .    D=eeeeeE-------------------R   movq	-16(%rbp), %r14
+[0,149]   .    .    .    .    .    .    .    .    .    D=eeeeeE-------------------R   movq	-24(%rbp), %r13
+[0,150]   .    .    .    .    .    .    .    .    .    D==eeeeeE------------------R   movq	-32(%rbp), %r12
+[0,151]   .    .    .    .    .    .    .    .    .    D==eeeeeE------------------R   movq	-40(%rbp), %rbx
diff --git a/libc/nexgen32e/mul8x8.S b/libc/nexgen32e/mul8x8.S
deleted file mode 100644
index 3ad62baf5..000000000
--- a/libc/nexgen32e/mul8x8.S
+++ /dev/null
@@ -1,483 +0,0 @@
-/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8     -*-│
-│vi: set et ft=asm ts=8 tw=8 fenc=utf-8                                     :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2021 Justine Alexandra Roberts Tunney                              │
-│                                                                              │
-│ Permission to use, copy, modify, and/or distribute this software for         │
-│ any purpose with or without fee is hereby granted, provided that the         │
-│ above copyright notice and this permission notice appear in all copies.      │
-│                                                                              │
-│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
-│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
-│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
-│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
-│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
-│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
-│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
-│ PERFORMANCE OF THIS SOFTWARE.                                                │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/macros.internal.h"
-
-/	Computes 1024-bit product of 512-bit and 512-bit numbers.
-/
-/		Instructions:      262
-/		Total Cycles:      114
-/		Total uOps:        469
-/		Dispatch Width:    6
-/		uOps Per Cycle:    4.11
-/		IPC:               2.30
-/		Block RThroughput: 78.2
-/
-/	@param	rdi receives 16 quadword result
-/	@param	rsi is left hand side which must have 8 quadwords
-/	@param	rdx is right hand side which must have 8 quadwords
-/	@note	words are host endian while array is little endian
-/	@mayalias
-Mul8x8Adx:
-	push	%rbp
-	mov	%rsp,%rbp
-	.profilable
-	push	%r15
-	push	%r14
-	push	%r13
-	push	%r12
-	mov	%rdx,%r12
-	push	%rbx
-	sub	$64,%rsp
-	mov	(%rdx),%rdx
-	xor	%r13d,%r13d
-	mulx	(%rsi),%rax,%rcx
-	mov	%rdi,-48(%rbp)
-	mov	%rax,-56(%rbp)
-	mulx	8(%rsi),%rdx,%rax
-	adox	%rdx,%rcx
-	mov	(%r12),%rdx
-	mulx	16(%rsi),%rdx,%rbx
-	adox	%rdx,%rax
-	mov	(%r12),%rdx
-	mulx	24(%rsi),%rdx,%r11
-	adox	%rdx,%rbx
-	mov	(%r12),%rdx
-	mulx	32(%rsi),%rdx,%r10
-	adox	%rdx,%r11
-	mov	(%r12),%rdx
-	mulx	40(%rsi),%rdx,%r9
-	adox	%rdx,%r10
-	mov	(%r12),%rdx
-	mulx	48(%rsi),%rdx,%r8
-	adox	%rdx,%r9
-	mov	(%r12),%rdx
-	mulx	56(%rsi),%rdx,%rdi
-	adox	%rdx,%r8
-	adox	%r13,%rdi
-	xor	%r13d,%r13d
-	mov	8(%r12),%rdx
-	mulx	(%rsi),%r15,%r14
-	adox	%r15,%rcx
-	adcx	%r14,%rax
-	mov	%rcx,-64(%rbp)
-	mulx	8(%rsi),%r14,%rcx
-	adox	%r14,%rax
-	adcx	%rcx,%rbx
-	mulx	16(%rsi),%r14,%rcx
-	adox	%r14,%rbx
-	adcx	%rcx,%r11
-	mulx	24(%rsi),%r14,%rcx
-	adox	%r14,%r11
-	adcx	%rcx,%r10
-	mulx	32(%rsi),%r14,%rcx
-	adox	%r14,%r10
-	adcx	%rcx,%r9
-	mulx	40(%rsi),%r14,%rcx
-	adox	%r14,%r9
-	adcx	%rcx,%r8
-	mulx	48(%rsi),%r14,%rcx
-	adox	%r14,%r8
-	adcx	%rcx,%rdi
-	mulx	56(%rsi),%rdx,%rcx
-	adox	%rdx,%rdi
-	adcx	%r13,%rcx
-	mov	16(%r12),%rdx
-	adox	%r13,%rcx
-	mulx	(%rsi),%r15,%r14
-	xor	%r13d,%r13d
-	adox	%r15,%rax
-	adcx	%r14,%rbx
-	mov	%rax,-72(%rbp)
-	mulx	8(%rsi),%r14,%rax
-	adox	%r14,%rbx
-	adcx	%rax,%r11
-	mulx	16(%rsi),%r14,%rax
-	adox	%r14,%r11
-	adcx	%rax,%r10
-	mulx	24(%rsi),%r14,%rax
-	adox	%r14,%r10
-	adcx	%rax,%r9
-	mulx	32(%rsi),%r14,%rax
-	adox	%r14,%r9
-	adcx	%rax,%r8
-	mulx	40(%rsi),%r14,%rax
-	adox	%r14,%r8
-	adcx	%rax,%rdi
-	mulx	48(%rsi),%r14,%rax
-	adox	%r14,%rdi
-	adcx	%rax,%rcx
-	mulx	56(%rsi),%rdx,%rax
-	adox	%rdx,%rcx
-	adcx	%r13,%rax
-	adox	%r13,%rax
-	xor	%r13d,%r13d
-	mov	24(%r12),%rdx
-	mulx	(%rsi),%r15,%r14
-	adox	%r15,%rbx
-	adcx	%r14,%r11
-	mov	%rbx,-80(%rbp)
-	mov	%r11,%r15
-	mulx	8(%rsi),%r14,%rbx
-	adox	%r14,%r15
-	adcx	%rbx,%r10
-	mulx	16(%rsi),%rbx,%r11
-	adox	%rbx,%r10
-	adcx	%r11,%r9
-	mulx	24(%rsi),%rbx,%r11
-	adox	%rbx,%r9
-	adcx	%r11,%r8
-	mulx	32(%rsi),%rbx,%r11
-	adox	%rbx,%r8
-	adcx	%r11,%rdi
-	mulx	40(%rsi),%rbx,%r11
-	adox	%rbx,%rdi
-	adcx	%r11,%rcx
-	mulx	48(%rsi),%rbx,%r11
-	adox	%rbx,%rcx
-	adcx	%r11,%rax
-	mulx	56(%rsi),%rdx,%r11
-	adox	%rdx,%rax
-	adcx	%r13,%r11
-	mov	32(%r12),%rdx
-	adox	%r13,%r11
-	xor	%ebx,%ebx
-	mulx	(%rsi),%r14,%r13
-	adox	%r14,%r15
-	adcx	%r13,%r10
-	mov	%r15,-88(%rbp)
-	mulx	8(%rsi),%r14,%r13
-	mov	%r10,%r15
-	adcx	%r13,%r9
-	adox	%r14,%r15
-	mulx	16(%rsi),%r13,%r10
-	adox	%r13,%r9
-	adcx	%r10,%r8
-	mulx	24(%rsi),%r13,%r10
-	adcx	%r10,%rdi
-	adox	%r13,%r8
-	mulx	32(%rsi),%r13,%r10
-	adox	%r13,%rdi
-	adcx	%r10,%rcx
-	mulx	40(%rsi),%r13,%r10
-	adox	%r13,%rcx
-	adcx	%r10,%rax
-	mulx	48(%rsi),%r13,%r10
-	adox	%r13,%rax
-	adcx	%r10,%r11
-	mulx	56(%rsi),%rdx,%r10
-	adox	%rdx,%r11
-	adcx	%rbx,%r10
-	mov	40(%r12),%rdx
-	adox	%rbx,%r10
-	mulx	(%rsi),%r14,%r13
-	xor	%ebx,%ebx
-	adox	%r14,%r15
-	mov	%r15,-96(%rbp)
-	adcx	%r13,%r9
-	mulx	8(%rsi),%r14,%r13
-	mov	%r9,%r15
-	adox	%r14,%r15
-	adcx	%r13,%r8
-	mulx	16(%rsi),%r13,%r9
-	adox	%r13,%r8
-	adcx	%r9,%rdi
-	mulx	24(%rsi),%r13,%r9
-	adox	%r13,%rdi
-	adcx	%r9,%rcx
-	mulx	32(%rsi),%r13,%r9
-	adox	%r13,%rcx
-	adcx	%r9,%rax
-	mulx	40(%rsi),%r13,%r9
-	adox	%r13,%rax
-	adcx	%r9,%r11
-	mulx	48(%rsi),%r13,%r9
-	adox	%r13,%r11
-	adcx	%r9,%r10
-	mulx	56(%rsi),%rdx,%r9
-	adox	%rdx,%r10
-	adcx	%rbx,%r9
-	adox	%rbx,%r9
-	xor	%ebx,%ebx
-	mov	48(%r12),%rdx
-	mulx	(%rsi),%r14,%r13
-	adox	%r14,%r15
-	adcx	%r13,%r8
-	mov	%r15,-104(%rbp)
-	mulx	8(%rsi),%r14,%r13
-	mov	%r8,%r15
-	adcx	%r13,%rdi
-	adox	%r14,%r15
-	mulx	16(%rsi),%r13,%r8
-	adox	%r13,%rdi
-	adcx	%r8,%rcx
-	mulx	24(%rsi),%r13,%r8
-	adox	%r13,%rcx
-	adcx	%r8,%rax
-	mulx	32(%rsi),%r13,%r8
-	adox	%r13,%rax
-	adcx	%r8,%r11
-	mulx	40(%rsi),%r13,%r8
-	adox	%r13,%r11
-	adcx	%r8,%r10
-	mulx	48(%rsi),%r13,%r8
-	adox	%r13,%r10
-	adcx	%r8,%r9
-	mulx	56(%rsi),%rdx,%r8
-	adox	%rdx,%r9
-	mov	56(%r12),%rdx
-	adcx	%rbx,%r8
-	mulx	(%rsi),%r13,%r12
-	adox	%rbx,%r8
-	xor	%ebx,%ebx
-	adox	%r13,%r15
-	adcx	%r12,%rdi
-	mulx	8(%rsi),%r13,%r12
-	adox	%r13,%rdi
-	adcx	%r12,%rcx
-	mulx	16(%rsi),%r13,%r12
-	adox	%r13,%rcx
-	adcx	%r12,%rax
-	mulx	24(%rsi),%r13,%r12
-	adox	%r13,%rax
-	adcx	%r12,%r11
-	mulx	32(%rsi),%r13,%r12
-	adox	%r13,%r11
-	adcx	%r12,%r10
-	mulx	40(%rsi),%r13,%r12
-	adox	%r13,%r10
-	adcx	%r12,%r9
-	mulx	48(%rsi),%r13,%r12
-	mulx	56(%rsi),%rsi,%rdx
-	adox	%r13,%r9
-	adcx	%r12,%r8
-	adox	%rsi,%r8
-	adcx	%rbx,%rdx
-	mov	-64(%rbp),%rsi
-	adox	%rbx,%rdx
-	mov	-48(%rbp),%rbx
-	mov	-56(%rbp),%r14
-	mov	%rsi,8(%rbx)
-	mov	-72(%rbp),%rsi
-	mov	%r14,(%rbx)
-	mov	%rsi,16(%rbx)
-	mov	-80(%rbp),%rsi
-	mov	%rsi,24(%rbx)
-	mov	-88(%rbp),%rsi
-	mov	%rsi,32(%rbx)
-	mov	-96(%rbp),%rsi
-	mov	%rsi,40(%rbx)
-	mov	-104(%rbp),%rsi
-	mov	%r15,56(%rbx)
-	mov	%rsi,48(%rbx)
-	mov	%rdi,64(%rbx)
-	mov	%rcx,72(%rbx)
-	mov	%rax,80(%rbx)
-	mov	%r11,88(%rbx)
-	mov	%r10,96(%rbx)
-	mov	%r9,104(%rbx)
-	mov	%r8,112(%rbx)
-	mov	%rdx,120(%rbx)
-	add	$64,%rsp
-	pop	%rbx
-	pop	%r12
-	pop	%r13
-	pop	%r14
-	pop	%r15
-	pop	%rbp
-	ret
-	.endfn	Mul8x8Adx,globl
-
-	.end
-Timeline view:      0123456789          0123456789          0123456789          0123456789
-Index     0123456789          0123456789          0123456789          0123456789
-[0,0]     DeeER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .   .   pushq	%r15
-[0,1]     D==eeER   .    .    .    .    .    .    .    .    .    .    .    .    .    .   .   pushq	%r14
-[0,2]     .D===eeER .    .    .    .    .    .    .    .    .    .    .    .    .    .   .   pushq	%r13
-[0,3]     .D=====eeER    .    .    .    .    .    .    .    .    .    .    .    .    .   .   pushq	%r12
-[0,4]     . DeE-----R    .    .    .    .    .    .    .    .    .    .    .    .    .   .   movq	%rdx, %r12
-[0,5]     . D======eeER  .    .    .    .    .    .    .    .    .    .    .    .    .   .   pushq	%rbx
-[0,6]     . D========eER .    .    .    .    .    .    .    .    .    .    .    .    .   .   subq	$64, %rsp
-[0,7]     . DeeeeeE----R .    .    .    .    .    .    .    .    .    .    .    .    .   .   movq	(%rdx), %rdx
-[0,8]     .  D---------R .    .    .    .    .    .    .    .    .    .    .    .    .   .   xorl	%r13d, %r13d
-[0,9]     .  D====eeeeeeeeeER .    .    .    .    .    .    .    .    .    .    .    .   .   mulxq	(%rsi), %rax, %rcx
-[0,10]    .  D======eE------R .    .    .    .    .    .    .    .    .    .    .    .   .   movq	%rdi, -48(%rbp)
-[0,11]    .   D======eE-----R .    .    .    .    .    .    .    .    .    .    .    .   .   movq	%rax, -56(%rbp)
-[0,12]    .   D====eeeeeeeeeER.    .    .    .    .    .    .    .    .    .    .    .   .   mulxq	8(%rsi), %rdx, %rax
-[0,13]    .   D============eER.    .    .    .    .    .    .    .    .    .    .    .   .   adoxq	%rdx, %rcx
-[0,14]    .    DeeeeeE-------R.    .    .    .    .    .    .    .    .    .    .    .   .   movq	(%r12), %rdx
-[0,15]    .    D=====eeeeeeeeeER   .    .    .    .    .    .    .    .    .    .    .   .   mulxq	16(%rsi), %rdx, %rbx
-[0,16]    .    D============eE-R   .    .    .    .    .    .    .    .    .    .    .   .   adoxq	%rdx, %rax
-[0,17]    .    .DeeeeeE--------R   .    .    .    .    .    .    .    .    .    .    .   .   movq	(%r12), %rdx
-[0,18]    .    .D=====eeeeeeeeeER  .    .    .    .    .    .    .    .    .    .    .   .   mulxq	24(%rsi), %rdx, %r11
-[0,19]    .    .D=============eER  .    .    .    .    .    .    .    .    .    .    .   .   adoxq	%rdx, %rbx
-[0,20]    .    . DeeeeeE--------R  .    .    .    .    .    .    .    .    .    .    .   .   movq	(%r12), %rdx
-[0,21]    .    . D=====eeeeeeeeeER .    .    .    .    .    .    .    .    .    .    .   .   mulxq	32(%rsi), %rdx, %r10
-[0,22]    .    . D=============eER .    .    .    .    .    .    .    .    .    .    .   .   adoxq	%rdx, %r11
-[0,23]    .    .  DeeeeeE--------R .    .    .    .    .    .    .    .    .    .    .   .   movq	(%r12), %rdx
-[0,24]    .    .  D=====eeeeeeeeeER.    .    .    .    .    .    .    .    .    .    .   .   mulxq	40(%rsi), %rdx, %r9
-[0,25]    .    .  D=============eER.    .    .    .    .    .    .    .    .    .    .   .   adoxq	%rdx, %r10
-[0,26]    .    .   DeeeeeE--------R.    .    .    .    .    .    .    .    .    .    .   .   movq	(%r12), %rdx
-[0,27]    .    .   D=====eeeeeeeeeER    .    .    .    .    .    .    .    .    .    .   .   mulxq	48(%rsi), %rdx, %r8
-[0,28]    .    .   D=============eER    .    .    .    .    .    .    .    .    .    .   .   adoxq	%rdx, %r9
-[0,29]    .    .    DeeeeeE--------R    .    .    .    .    .    .    .    .    .    .   .   movq	(%r12), %rdx
-[0,30]    .    .    D=====eeeeeeeeeER   .    .    .    .    .    .    .    .    .    .   .   mulxq	56(%rsi), %rdx, %rdi
-[0,31]    .    .    D=============eER   .    .    .    .    .    .    .    .    .    .   .   adoxq	%rdx, %r8
-[0,32]    .    .    .D=============eER  .    .    .    .    .    .    .    .    .    .   .   adoxq	%r13, %rdi
-[0,33]    .    .    .D---------------R  .    .    .    .    .    .    .    .    .    .   .   xorl	%r13d, %r13d
-[0,34]    .    .    .DeeeeeE---------R  .    .    .    .    .    .    .    .    .    .   .   movq	8(%r12), %rdx
-[0,35]    .    .    . D====eeeeeeeeeER  .    .    .    .    .    .    .    .    .    .   .   mulxq	(%rsi), %r15, %r14
-[0,36]    .    .    . D=======eE-----R  .    .    .    .    .    .    .    .    .    .   .   adoxq	%r15, %rcx
-[0,37]    .    .    . D=============eER .    .    .    .    .    .    .    .    .    .   .   adcxq	%r14, %rax
-[0,38]    .    .    .  D=======eE-----R .    .    .    .    .    .    .    .    .    .   .   movq	%rcx, -64(%rbp)
-[0,39]    .    .    .  D====eeeeeeeeeER .    .    .    .    .    .    .    .    .    .   .   mulxq	8(%rsi), %r14, %rcx
-[0,40]    .    .    .  D=============eER.    .    .    .    .    .    .    .    .    .   .   adoxq	%r14, %rax
-[0,41]    .    .    .   D=============eER    .    .    .    .    .    .    .    .    .   .   adcxq	%rcx, %rbx
-[0,42]    .    .    .   D====eeeeeeeeeE-R    .    .    .    .    .    .    .    .    .   .   mulxq	16(%rsi), %r14, %rcx
-[0,43]    .    .    .   D==============eER   .    .    .    .    .    .    .    .    .   .   adoxq	%r14, %rbx
-[0,44]    .    .    .    D==============eER  .    .    .    .    .    .    .    .    .   .   adcxq	%rcx, %r11
-[0,45]    .    .    .    D====eeeeeeeeeE--R  .    .    .    .    .    .    .    .    .   .   mulxq	24(%rsi), %r14, %rcx
-[0,46]    .    .    .    D===============eER .    .    .    .    .    .    .    .    .   .   adoxq	%r14, %r11
-[0,47]    .    .    .    .D===============eER.    .    .    .    .    .    .    .    .   .   adcxq	%rcx, %r10
-[0,48]    .    .    .    .D====eeeeeeeeeE---R.    .    .    .    .    .    .    .    .   .   mulxq	32(%rsi), %r14, %rcx
-[0,49]    .    .    .    .D================eER    .    .    .    .    .    .    .    .   .   adoxq	%r14, %r10
-[0,50]    .    .    .    . D================eER   .    .    .    .    .    .    .    .   .   adcxq	%rcx, %r9
-[0,51]    .    .    .    . D====eeeeeeeeeE----R   .    .    .    .    .    .    .    .   .   mulxq	40(%rsi), %r14, %rcx
-[0,52]    .    .    .    . D=================eER  .    .    .    .    .    .    .    .   .   adoxq	%r14, %r9
-[0,53]    .    .    .    .  D=================eER .    .    .    .    .    .    .    .   .   adcxq	%rcx, %r8
-[0,54]    .    .    .    .  D====eeeeeeeeeE-----R .    .    .    .    .    .    .    .   .   mulxq	48(%rsi), %r14, %rcx
-[0,55]    .    .    .    .  D==================eER.    .    .    .    .    .    .    .   .   adoxq	%r14, %r8
-[0,56]    .    .    .    .   D==================eER    .    .    .    .    .    .    .   .   adcxq	%rcx, %rdi
-[0,57]    .    .    .    .   D====eeeeeeeeeE------R    .    .    .    .    .    .    .   .   mulxq	56(%rsi), %rdx, %rcx
-[0,58]    .    .    .    .   D===================eER   .    .    .    .    .    .    .   .   adoxq	%rdx, %rdi
-[0,59]    .    .    .    .    D===================eER  .    .    .    .    .    .    .   .   adcxq	%r13, %rcx
-[0,60]    .    .    .    .    DeeeeeE---------------R  .    .    .    .    .    .    .   .   movq	16(%r12), %rdx
-[0,61]    .    .    .    .    D====================eER .    .    .    .    .    .    .   .   adoxq	%r13, %rcx
-[0,62]    .    .    .    .    .D====eeeeeeeeeE-------R .    .    .    .    .    .    .   .   mulxq	(%rsi), %r15, %r14
-[0,63]    .    .    .    .    .D---------------------R .    .    .    .    .    .    .   .   xorl	%r13d, %r13d
-[0,64]    .    .    .    .    .D=======eE------------R .    .    .    .    .    .    .   .   adoxq	%r15, %rax
-[0,65]    .    .    .    .    . D============eE------R .    .    .    .    .    .    .   .   adcxq	%r14, %rbx
-[0,66]    .    .    .    .    . D=======eE-----------R .    .    .    .    .    .    .   .   movq	%rax, -72(%rbp)
-[0,67]    .    .    .    .    . D====eeeeeeeeeE------R .    .    .    .    .    .    .   .   mulxq	8(%rsi), %r14, %rax
-[0,68]    .    .    .    .    .  D============eE-----R .    .    .    .    .    .    .   .   adoxq	%r14, %rbx
-[0,69]    .    .    .    .    .  D=============eE----R .    .    .    .    .    .    .   .   adcxq	%rax, %r11
-[0,70]    .    .    .    .    .  D====eeeeeeeeeE-----R .    .    .    .    .    .    .   .   mulxq	16(%rsi), %r14, %rax
-[0,71]    .    .    .    .    .   D=============eE---R .    .    .    .    .    .    .   .   adoxq	%r14, %r11
-[0,72]    .    .    .    .    .   D==============eE--R .    .    .    .    .    .    .   .   adcxq	%rax, %r10
-[0,73]    .    .    .    .    .   D====eeeeeeeeeE----R .    .    .    .    .    .    .   .   mulxq	24(%rsi), %r14, %rax
-[0,74]    .    .    .    .    .    D==============eE-R .    .    .    .    .    .    .   .   adoxq	%r14, %r10
-[0,75]    .    .    .    .    .    D===============eER .    .    .    .    .    .    .   .   adcxq	%rax, %r9
-[0,76]    .    .    .    .    .    D====eeeeeeeeeE---R .    .    .    .    .    .    .   .   mulxq	32(%rsi), %r14, %rax
-[0,77]    .    .    .    .    .    .D===============eER.    .    .    .    .    .    .   .   adoxq	%r14, %r9
-[0,78]    .    .    .    .    .    .D================eER    .    .    .    .    .    .   .   adcxq	%rax, %r8
-[0,79]    .    .    .    .    .    .D====eeeeeeeeeE----R    .    .    .    .    .    .   .   mulxq	40(%rsi), %r14, %rax
-[0,80]    .    .    .    .    .    . D================eER   .    .    .    .    .    .   .   adoxq	%r14, %r8
-[0,81]    .    .    .    .    .    . D=================eER  .    .    .    .    .    .   .   adcxq	%rax, %rdi
-[0,82]    .    .    .    .    .    . D====eeeeeeeeeE-----R  .    .    .    .    .    .   .   mulxq	48(%rsi), %r14, %rax
-[0,83]    .    .    .    .    .    .  D=================eER .    .    .    .    .    .   .   adoxq	%r14, %rdi
-[0,84]    .    .    .    .    .    .  D==================eER.    .    .    .    .    .   .   adcxq	%rax, %rcx
-[0,85]    .    .    .    .    .    .  D====eeeeeeeeeE------R.    .    .    .    .    .   .   mulxq	56(%rsi), %rdx, %rax
-[0,86]    .    .    .    .    .    .   D==================eER    .    .    .    .    .   .   adoxq	%rdx, %rcx
-[0,87]    .    .    .    .    .    .   D===================eER   .    .    .    .    .   .   adcxq	%r13, %rax
-[0,88]    .    .    .    .    .    .   D====================eER  .    .    .    .    .   .   adoxq	%r13, %rax
-[0,89]    .    .    .    .    .    .   D----------------------R  .    .    .    .    .   .   xorl	%r13d, %r13d
-[0,90]    .    .    .    .    .    .   DeeeeeE----------------R  .    .    .    .    .   .   movq	24(%r12), %rdx
-[0,91]    .    .    .    .    .    .    D====eeeeeeeeeE-------R  .    .    .    .    .   .   mulxq	(%rsi), %r15, %r14
-[0,92]    .    .    .    .    .    .    D===========eE--------R  .    .    .    .    .   .   adoxq	%r15, %rbx
-[0,93]    .    .    .    .    .    .    D=============eE------R  .    .    .    .    .   .   adcxq	%r14, %r11
-[0,94]    .    .    .    .    .    .    .D===========eE-------R  .    .    .    .    .   .   movq	%rbx, -80(%rbp)
-[0,95]    .    .    .    .    .    .    .D=============eE-----R  .    .    .    .    .   .   movq	%r11, %r15
-[0,96]    .    .    .    .    .    .    .D====eeeeeeeeeE------R  .    .    .    .    .   .   mulxq	8(%rsi), %r14, %rbx
-[0,97]    .    .    .    .    .    .    . D=============eE----R  .    .    .    .    .   .   adoxq	%r14, %r15
-[0,98]    .    .    .    .    .    .    . D==============eE---R  .    .    .    .    .   .   adcxq	%rbx, %r10
-[0,99]    .    .    .    .    .    .    . D====eeeeeeeeeE-----R  .    .    .    .    .   .   mulxq	16(%rsi), %rbx, %r11
-[0,100]   .    .    .    .    .    .    .  D==============eE--R  .    .    .    .    .   .   adoxq	%rbx, %r10
-[0,101]   .    .    .    .    .    .    .  D===============eE-R  .    .    .    .    .   .   adcxq	%r11, %r9
-[0,102]   .    .    .    .    .    .    .  D====eeeeeeeeeE----R  .    .    .    .    .   .   mulxq	24(%rsi), %rbx, %r11
-[0,103]   .    .    .    .    .    .    .   D===============eER  .    .    .    .    .   .   adoxq	%rbx, %r9
-[0,104]   .    .    .    .    .    .    .   D================eER .    .    .    .    .   .   adcxq	%r11, %r8
-[0,105]   .    .    .    .    .    .    .   D====eeeeeeeeeE----R .    .    .    .    .   .   mulxq	32(%rsi), %rbx, %r11
-[0,106]   .    .    .    .    .    .    .    D================eER.    .    .    .    .   .   adoxq	%rbx, %r8
-[0,107]   .    .    .    .    .    .    .    D=================eER    .    .    .    .   .   adcxq	%r11, %rdi
-[0,108]   .    .    .    .    .    .    .    D====eeeeeeeeeE-----R    .    .    .    .   .   mulxq	40(%rsi), %rbx, %r11
-[0,109]   .    .    .    .    .    .    .    .D=================eER   .    .    .    .   .   adoxq	%rbx, %rdi
-[0,110]   .    .    .    .    .    .    .    .D==================eER  .    .    .    .   .   adcxq	%r11, %rcx
-[0,111]   .    .    .    .    .    .    .    .D====eeeeeeeeeE------R  .    .    .    .   .   mulxq	48(%rsi), %rbx, %r11
-[0,112]   .    .    .    .    .    .    .    . D==================eER .    .    .    .   .   adoxq	%rbx, %rcx
-[0,113]   .    .    .    .    .    .    .    . D===================eER.    .    .    .   .   adcxq	%r11, %rax
-[0,114]   .    .    .    .    .    .    .    . D====eeeeeeeeeE-------R.    .    .    .   .   mulxq	56(%rsi), %rdx, %r11
-[0,115]   .    .    .    .    .    .    .    .  D===================eER    .    .    .   .   adoxq	%rdx, %rax
-[0,116]   .    .    .    .    .    .    .    .  D====================eER   .    .    .   .   adcxq	%r13, %r11
-[0,117]   .    .    .    .    .    .    .    .  DeeeeeE----------------R   .    .    .   .   movq	32(%r12), %rdx
-[0,118]   .    .    .    .    .    .    .    .  D=====================eER  .    .    .   .   adoxq	%r13, %r11
-[0,119]   .    .    .    .    .    .    .    .  D=====E-----------------R  .    .    .   .   xorl	%ebx, %ebx
-[0,120]   .    .    .    .    .    .    .    .   D====eeeeeeeeeE--------R  .    .    .   .   mulxq	(%rsi), %r14, %r13
-[0,121]   .    .    .    .    .    .    .    .   D===========eE---------R  .    .    .   .   adoxq	%r14, %r15
-[0,122]   .    .    .    .    .    .    .    .   D=============eE-------R  .    .    .   .   adcxq	%r13, %r10
-[0,123]   .    .    .    .    .    .    .    .    D===========eE--------R  .    .    .   .   movq	%r15, -88(%rbp)
-[0,124]   .    .    .    .    .    .    .    .    D====eeeeeeeeeE-------R  .    .    .   .   mulxq	8(%rsi), %r14, %r13
-[0,125]   .    .    .    .    .    .    .    .    D=============eE------R  .    .    .   .   movq	%r10, %r15
-[0,126]   .    .    .    .    .    .    .    .    .D============eE------R  .    .    .   .   adcxq	%r13, %r9
-[0,127]   .    .    .    .    .    .    .    .    .D=============eE-----R  .    .    .   .   adoxq	%r14, %r15
-[0,128]   .    .    .    .    .    .    .    .    .D====eeeeeeeeeE------R  .    .    .   .   mulxq	16(%rsi), %r13, %r10
-[0,129]   .    .    .    .    .    .    .    .    . D=============eE----R  .    .    .   .   adoxq	%r13, %r9
-[0,130]   .    .    .    .    .    .    .    .    . D==============eE---R  .    .    .   .   adcxq	%r10, %r8
-[0,131]   .    .    .    .    .    .    .    .    . D====eeeeeeeeeE-----R  .    .    .   .   mulxq	24(%rsi), %r13, %r10
-[0,132]   .    .    .    .    .    .    .    .    .  D==============eE--R  .    .    .   .   adcxq	%r10, %rdi
-[0,133]   .    .    .    .    .    .    .    .    .  D===============eE-R  .    .    .   .   adoxq	%r13, %r8
-[0,134]   .    .    .    .    .    .    .    .    .  D====eeeeeeeeeE----R  .    .    .   .   mulxq	32(%rsi), %r13, %r10
-[0,135]   .    .    .    .    .    .    .    .    .   D===============eER  .    .    .   .   adoxq	%r13, %rdi
-[0,136]   .    .    .    .    .    .    .    .    .   D================eER .    .    .   .   adcxq	%r10, %rcx
-[0,137]   .    .    .    .    .    .    .    .    .   D====eeeeeeeeeE----R .    .    .   .   mulxq	40(%rsi), %r13, %r10
-[0,138]   .    .    .    .    .    .    .    .    .    D================eER.    .    .   .   adoxq	%r13, %rcx
-[0,139]   .    .    .    .    .    .    .    .    .    D=================eER    .    .   .   adcxq	%r10, %rax
-[0,140]   .    .    .    .    .    .    .    .    .    D====eeeeeeeeeE-----R    .    .   .   mulxq	48(%rsi), %r13, %r10
-[0,141]   .    .    .    .    .    .    .    .    .    .D=================eER   .    .   .   adoxq	%r13, %rax
-[0,142]   .    .    .    .    .    .    .    .    .    .D==================eER  .    .   .   adcxq	%r10, %r11
-[0,143]   .    .    .    .    .    .    .    .    .    .D====eeeeeeeeeE------R  .    .   .   mulxq	56(%rsi), %rdx, %r10
-[0,144]   .    .    .    .    .    .    .    .    .    . D==================eER .    .   .   adoxq	%rdx, %r11
-[0,145]   .    .    .    .    .    .    .    .    .    . D===================eER.    .   .   adcxq	%rbx, %r10
-[0,146]   .    .    .    .    .    .    .    .    .    . DeeeeeE---------------R.    .   .   movq	40(%r12), %rdx
-[0,147]   .    .    .    .    .    .    .    .    .    . D====================eER    .   .   adoxq	%rbx, %r10
-[0,148]   .    .    .    .    .    .    .    .    .    .  D====eeeeeeeeeE-------R    .   .   mulxq	(%rsi), %r14, %r13
-[0,149]   .    .    .    .    .    .    .    .    .    .  D---------------------R    .   .   xorl	%ebx, %ebx
-[0,150]   .    .    .    .    .    .    .    .    .    .  D============eE-------R    .   .   adoxq	%r14, %r15
-[0,151]   .    .    .    .    .    .    .    .    .    .   D============eE------R    .   .   movq	%r15, -96(%rbp)
-[0,152]   .    .    .    .    .    .    .    .    .    .   D============eE------R    .   .   adcxq	%r13, %r9
-[0,153]   .    .    .    .    .    .    .    .    .    .   D=====eeeeeeeeeE-----R    .   .   mulxq	8(%rsi), %r14, %r13
-[0,154]   .    .    .    .    .    .    .    .    .    .    D============eE-----R    .   .   movq	%r9, %r15
-[0,155]   .    .    .    .    .    .    .    .    .    .    D=============eE----R    .   .   adoxq	%r14, %r15
-[0,156]   .    .    .    .    .    .    .    .    .    .    D==============eE---R    .   .   adcxq	%r13, %r8
-[0,157]   .    .    .    .    .    .    .    .    .    .    .D====eeeeeeeeeE----R    .   .   mulxq	16(%rsi), %r13, %r9
-[0,158]   .    .    .    .    .    .    .    .    .    .    .D==============eE--R    .   .   adoxq	%r13, %r8
-[0,159]   .    .    .    .    .    .    .    .    .    .    .D===============eE-R    .   .   adcxq	%r9, %rdi
-[0,160]   .    .    .    .    .    .    .    .    .    .    . D====eeeeeeeeeE---R    .   .   mulxq	24(%rsi), %r13, %r9
-[0,161]   .    .    .    .    .    .    .    .    .    .    . D===============eER    .   .   adoxq	%r13, %rdi
-[0,162]   .    .    .    .    .    .    .    .    .    .    . D================eER   .   .   adcxq	%r9, %rcx
-[0,163]   .    .    .    .    .    .    .    .    .    .    .  D====eeeeeeeeeE---R   .   .   mulxq	32(%rsi), %r13, %r9
-[0,164]   .    .    .    .    .    .    .    .    .    .    .  D================eER  .   .   adoxq	%r13, %rcx
-[0,165]   .    .    .    .    .    .    .    .    .    .    .  D=================eER .   .   adcxq	%r9, %rax
-[0,166]   .    .    .    .    .    .    .    .    .    .    .   D====eeeeeeeeeE----R .   .   mulxq	40(%rsi), %r13, %r9
-[0,167]   .    .    .    .    .    .    .    .    .    .    .   D=================eER.   .   adoxq	%r13, %rax
-[0,168]   .    .    .    .    .    .    .    .    .    .    .   D==================eER   .   adcxq	%r9, %r11
-[0,169]   .    .    .    .    .    .    .    .    .    .    .    D====eeeeeeeeeE-----R   .   mulxq	48(%rsi), %r13, %r9
-[0,170]   .    .    .    .    .    .    .    .    .    .    .    D==================eER  .   adoxq	%r13, %r11
-[0,171]   .    .    .    .    .    .    .    .    .    .    .    D===================eER .   adcxq	%r9, %r10
-[0,172]   .    .    .    .    .    .    .    .    .    .    .    .D====eeeeeeeeeE------R .   mulxq	56(%rsi), %rdx, %r9
-[0,173]   .    .    .    .    .    .    .    .    .    .    .    .D===================eER.   adoxq	%rdx, %r10
-[0,174]   .    .    .    .    .    .    .    .    .    .    .    .D====================eER   adcxq	%rbx, %r9
diff --git a/libc/nexgen32e/mul8x8adx.S b/libc/nexgen32e/mul8x8adx.S
new file mode 100644
index 000000000..12d9f98df
--- /dev/null
+++ b/libc/nexgen32e/mul8x8adx.S
@@ -0,0 +1,495 @@
+/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8     -*-│
+│vi: set et ft=asm ts=8 tw=8 fenc=utf-8                                     :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2021 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/macros.internal.h"
+
+//	Computes 1024-bit product of 512-bit and 512-bit numbers.
+//
+//		Instructions:       260
+//		Total Cycles:        98
+//		Total uOps:         452
+//		uOps Per Cycle:    4.61
+//		IPC:               2.65
+//		Block RThroughput: 75.3
+//
+//	@param	rdi receives 16 quadword result
+//	@param	rsi is left hand side which must have 8 quadwords
+//	@param	rdx is right hand side which must have 8 quadwords
+//	@note	words are host endian while array is little endian
+//	@mayalias
+Mul8x8Adx:
+	push	%rbp
+	mov	%rsp,%rbp
+	.profilable
+	sub	$104,%rsp
+	mov	%r15,-8(%rbp)
+	mov	%r14,-16(%rbp)
+	mov	%r13,-24(%rbp)
+	mov	%r12,-32(%rbp)
+	mov	%rbx,-40(%rbp)
+	mov	%rdx,%r12
+	mov	(%rdx),%rdx
+	mulx	(%rsi),%rax,%rcx
+	mov	%rdi,-48(%rbp)
+	mov	%rax,-56(%rbp)
+	mulx	8(%rsi),%rdx,%rax
+	add	%rdx,%rcx
+	mov	(%r12),%rdx
+	mulx	16(%rsi),%rdx,%rbx
+	adc	%rdx,%rax
+	mov	(%r12),%rdx
+	mulx	24(%rsi),%rdx,%r11
+	adc	%rdx,%rbx
+	mov	(%r12),%rdx
+	mulx	32(%rsi),%rdx,%r10
+	adc	%rdx,%r11
+	mov	(%r12),%rdx
+	mulx	40(%rsi),%rdx,%r9
+	adc	%rdx,%r10
+	mov	(%r12),%rdx
+	mulx	48(%rsi),%rdx,%r8
+	adc	%rdx,%r9
+	mov	(%r12),%rdx
+	mulx	56(%rsi),%rdx,%rdi
+	adc	%rdx,%r8
+	adc	$0,%rdi
+	xor	%r13d,%r13d
+	mov	8(%r12),%rdx
+	mulx	(%rsi),%r15,%r14
+	adox	%r15,%rcx
+	adcx	%r14,%rax
+	mov	%rcx,-64(%rbp)
+	mulx	8(%rsi),%r14,%rcx
+	adox	%r14,%rax
+	adcx	%rcx,%rbx
+	mulx	16(%rsi),%r14,%rcx
+	adox	%r14,%rbx
+	adcx	%rcx,%r11
+	mulx	24(%rsi),%r14,%rcx
+	adox	%r14,%r11
+	adcx	%rcx,%r10
+	mulx	32(%rsi),%r14,%rcx
+	adox	%r14,%r10
+	adcx	%rcx,%r9
+	mulx	40(%rsi),%r14,%rcx
+	adox	%r14,%r9
+	adcx	%rcx,%r8
+	mulx	48(%rsi),%r14,%rcx
+	adox	%r14,%r8
+	adcx	%rcx,%rdi
+	mulx	56(%rsi),%rdx,%rcx
+	adox	%rdx,%rdi
+	adcx	%r13,%rcx
+	mov	16(%r12),%rdx
+	adox	%r13,%rcx
+	mulx	(%rsi),%r15,%r14
+	xor	%r13d,%r13d
+	adox	%r15,%rax
+	adcx	%r14,%rbx
+	mov	%rax,-72(%rbp)
+	mulx	8(%rsi),%r14,%rax
+	adox	%r14,%rbx
+	adcx	%rax,%r11
+	mulx	16(%rsi),%r14,%rax
+	adox	%r14,%r11
+	adcx	%rax,%r10
+	mulx	24(%rsi),%r14,%rax
+	adox	%r14,%r10
+	adcx	%rax,%r9
+	mulx	32(%rsi),%r14,%rax
+	adox	%r14,%r9
+	adcx	%rax,%r8
+	mulx	40(%rsi),%r14,%rax
+	adox	%r14,%r8
+	adcx	%rax,%rdi
+	mulx	48(%rsi),%r14,%rax
+	adox	%r14,%rdi
+	adcx	%rax,%rcx
+	mulx	56(%rsi),%rdx,%rax
+	adox	%rdx,%rcx
+	adcx	%r13,%rax
+	adox	%r13,%rax
+	xor	%r13d,%r13d
+	mov	24(%r12),%rdx
+	mulx	(%rsi),%r15,%r14
+	adox	%r15,%rbx
+	adcx	%r14,%r11
+	mov	%rbx,-80(%rbp)
+	mov	%r11,%r15
+	mulx	8(%rsi),%r14,%rbx
+	adox	%r14,%r15
+	adcx	%rbx,%r10
+	mulx	16(%rsi),%rbx,%r11
+	adox	%rbx,%r10
+	adcx	%r11,%r9
+	mulx	24(%rsi),%rbx,%r11
+	adox	%rbx,%r9
+	adcx	%r11,%r8
+	mulx	32(%rsi),%rbx,%r11
+	adox	%rbx,%r8
+	adcx	%r11,%rdi
+	mulx	40(%rsi),%rbx,%r11
+	adox	%rbx,%rdi
+	adcx	%r11,%rcx
+	mulx	48(%rsi),%rbx,%r11
+	adox	%rbx,%rcx
+	adcx	%r11,%rax
+	mulx	56(%rsi),%rdx,%r11
+	adox	%rdx,%rax
+	adcx	%r13,%r11
+	mov	32(%r12),%rdx
+	adox	%r13,%r11
+	xor	%ebx,%ebx
+	mulx	(%rsi),%r14,%r13
+	adox	%r14,%r15
+	adcx	%r13,%r10
+	mov	%r15,-88(%rbp)
+	mulx	8(%rsi),%r14,%r13
+	mov	%r10,%r15
+	adcx	%r13,%r9
+	adox	%r14,%r15
+	mulx	16(%rsi),%r13,%r10
+	adox	%r13,%r9
+	adcx	%r10,%r8
+	mulx	24(%rsi),%r13,%r10
+	adcx	%r10,%rdi
+	adox	%r13,%r8
+	mulx	32(%rsi),%r13,%r10
+	adox	%r13,%rdi
+	adcx	%r10,%rcx
+	mulx	40(%rsi),%r13,%r10
+	adox	%r13,%rcx
+	adcx	%r10,%rax
+	mulx	48(%rsi),%r13,%r10
+	adox	%r13,%rax
+	adcx	%r10,%r11
+	mulx	56(%rsi),%rdx,%r10
+	adox	%rdx,%r11
+	adcx	%rbx,%r10
+	mov	40(%r12),%rdx
+	adox	%rbx,%r10
+	mulx	(%rsi),%r14,%r13
+	xor	%ebx,%ebx
+	adox	%r14,%r15
+	mov	%r15,-96(%rbp)
+	adcx	%r13,%r9
+	mulx	8(%rsi),%r14,%r13
+	mov	%r9,%r15
+	adox	%r14,%r15
+	adcx	%r13,%r8
+	mulx	16(%rsi),%r13,%r9
+	adox	%r13,%r8
+	adcx	%r9,%rdi
+	mulx	24(%rsi),%r13,%r9
+	adox	%r13,%rdi
+	adcx	%r9,%rcx
+	mulx	32(%rsi),%r13,%r9
+	adox	%r13,%rcx
+	adcx	%r9,%rax
+	mulx	40(%rsi),%r13,%r9
+	adox	%r13,%rax
+	adcx	%r9,%r11
+	mulx	48(%rsi),%r13,%r9
+	adox	%r13,%r11
+	adcx	%r9,%r10
+	mulx	56(%rsi),%rdx,%r9
+	adox	%rdx,%r10
+	adcx	%rbx,%r9
+	adox	%rbx,%r9
+	xor	%ebx,%ebx
+	mov	48(%r12),%rdx
+	mulx	(%rsi),%r14,%r13
+	adox	%r14,%r15
+	adcx	%r13,%r8
+	mov	%r15,-104(%rbp)
+	mulx	8(%rsi),%r14,%r13
+	mov	%r8,%r15
+	adcx	%r13,%rdi
+	adox	%r14,%r15
+	mulx	16(%rsi),%r13,%r8
+	adox	%r13,%rdi
+	adcx	%r8,%rcx
+	mulx	24(%rsi),%r13,%r8
+	adox	%r13,%rcx
+	adcx	%r8,%rax
+	mulx	32(%rsi),%r13,%r8
+	adox	%r13,%rax
+	adcx	%r8,%r11
+	mulx	40(%rsi),%r13,%r8
+	adox	%r13,%r11
+	adcx	%r8,%r10
+	mulx	48(%rsi),%r13,%r8
+	adox	%r13,%r10
+	adcx	%r8,%r9
+	mulx	56(%rsi),%rdx,%r8
+	adox	%rdx,%r9
+	mov	56(%r12),%rdx
+	adcx	%rbx,%r8
+	mulx	(%rsi),%r13,%r12
+	adox	%rbx,%r8
+	xor	%ebx,%ebx
+	adox	%r13,%r15
+	adcx	%r12,%rdi
+	mulx	8(%rsi),%r13,%r12
+	adox	%r13,%rdi
+	adcx	%r12,%rcx
+	mulx	16(%rsi),%r13,%r12
+	adox	%r13,%rcx
+	adcx	%r12,%rax
+	mulx	24(%rsi),%r13,%r12
+	adox	%r13,%rax
+	adcx	%r12,%r11
+	mulx	32(%rsi),%r13,%r12
+	adox	%r13,%r11
+	adcx	%r12,%r10
+	mulx	40(%rsi),%r13,%r12
+	adox	%r13,%r10
+	adcx	%r12,%r9
+	mulx	48(%rsi),%r13,%r12
+	mulx	56(%rsi),%rsi,%rdx
+	adox	%r13,%r9
+	adcx	%r12,%r8
+	adox	%rsi,%r8
+	adcx	%rbx,%rdx
+	mov	-64(%rbp),%rsi
+	adox	%rbx,%rdx
+	mov	-48(%rbp),%rbx
+	mov	-56(%rbp),%r14
+	mov	%rsi,8(%rbx)
+	mov	-72(%rbp),%rsi
+	mov	%r14,(%rbx)
+	mov	%rsi,16(%rbx)
+	mov	-80(%rbp),%rsi
+	mov	%rsi,24(%rbx)
+	mov	-88(%rbp),%rsi
+	mov	%rsi,32(%rbx)
+	mov	-96(%rbp),%rsi
+	mov	%rsi,40(%rbx)
+	mov	-104(%rbp),%rsi
+	mov	%r15,56(%rbx)
+	mov	%rsi,48(%rbx)
+	mov	%rdi,64(%rbx)
+	mov	%rcx,72(%rbx)
+	mov	%rax,80(%rbx)
+	mov	%r11,88(%rbx)
+	mov	%r10,96(%rbx)
+	mov	%r9,104(%rbx)
+	mov	%r8,112(%rbx)
+	mov	%rdx,120(%rbx)
+	mov	-8(%rbp),%r15
+	mov	-16(%rbp),%r14
+	mov	-24(%rbp),%r13
+	mov	-32(%rbp),%r12
+	mov	-40(%rbp),%rbx
+	leave
+	ret
+	.endfn	Mul8x8Adx,globl
+
+	.end
+TIMELINE VIEW       0123456789          0123456789          0123456789          0123456789
+Index     0123456789          0123456789          0123456789          0123456789
+[0,0]     DeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   .   subq	$104, %rsp
+[0,1]     DeER .    .    .    .    .    .    .    .    .    .    .    .    .    .    .   .   movq	%r15, -8(%rbp)
+[0,2]     D=eER.    .    .    .    .    .    .    .    .    .    .    .    .    .    .   .   movq	%r14, -16(%rbp)
+[0,3]     D==eER    .    .    .    .    .    .    .    .    .    .    .    .    .    .   .   movq	%r13, -24(%rbp)
+[0,4]     D===eER   .    .    .    .    .    .    .    .    .    .    .    .    .    .   .   movq	%r12, -32(%rbp)
+[0,5]     D====eER  .    .    .    .    .    .    .    .    .    .    .    .    .    .   .   movq	%rbx, -40(%rbp)
+[0,6]     .DeE---R  .    .    .    .    .    .    .    .    .    .    .    .    .    .   .   movq	%rdx, %r12
+[0,7]     .DeeeeeER .    .    .    .    .    .    .    .    .    .    .    .    .    .   .   movq	(%rdx), %rdx
+[0,8]     .D=====eeeeeeeeeER  .    .    .    .    .    .    .    .    .    .    .    .   .   mulxq	(%rsi), %rax, %rcx
+[0,9]     . D====eE--------R  .    .    .    .    .    .    .    .    .    .    .    .   .   movq	%rdi, -48(%rbp)
+[0,10]    . D=======eE-----R  .    .    .    .    .    .    .    .    .    .    .    .   .   movq	%rax, -56(%rbp)
+[0,11]    . D=====eeeeeeeeeER .    .    .    .    .    .    .    .    .    .    .    .   .   mulxq	8(%rsi), %rdx, %rax
+[0,12]    .  D============eER .    .    .    .    .    .    .    .    .    .    .    .   .   addq	%rdx, %rcx
+[0,13]    .  DeeeeeE--------R .    .    .    .    .    .    .    .    .    .    .    .   .   movq	(%r12), %rdx
+[0,14]    .  D=====eeeeeeeeeER.    .    .    .    .    .    .    .    .    .    .    .   .   mulxq	16(%rsi), %rdx, %rbx
+[0,15]    .   D============eER.    .    .    .    .    .    .    .    .    .    .    .   .   adcq	%rdx, %rax
+[0,16]    .   DeeeeeE--------R.    .    .    .    .    .    .    .    .    .    .    .   .   movq	(%r12), %rdx
+[0,17]    .   D=====eeeeeeeeeER    .    .    .    .    .    .    .    .    .    .    .   .   mulxq	24(%rsi), %rdx, %r11
+[0,18]    .    D============eER    .    .    .    .    .    .    .    .    .    .    .   .   adcq	%rdx, %rbx
+[0,19]    .    DeeeeeE--------R    .    .    .    .    .    .    .    .    .    .    .   .   movq	(%r12), %rdx
+[0,20]    .    D=====eeeeeeeeeER   .    .    .    .    .    .    .    .    .    .    .   .   mulxq	32(%rsi), %rdx, %r10
+[0,21]    .    .D============eER   .    .    .    .    .    .    .    .    .    .    .   .   adcq	%rdx, %r11
+[0,22]    .    .DeeeeeE--------R   .    .    .    .    .    .    .    .    .    .    .   .   movq	(%r12), %rdx
+[0,23]    .    .D=====eeeeeeeeeER  .    .    .    .    .    .    .    .    .    .    .   .   mulxq	40(%rsi), %rdx, %r9
+[0,24]    .    . D============eER  .    .    .    .    .    .    .    .    .    .    .   .   adcq	%rdx, %r10
+[0,25]    .    . DeeeeeE--------R  .    .    .    .    .    .    .    .    .    .    .   .   movq	(%r12), %rdx
+[0,26]    .    . D=====eeeeeeeeeER .    .    .    .    .    .    .    .    .    .    .   .   mulxq	48(%rsi), %rdx, %r8
+[0,27]    .    .  D============eER .    .    .    .    .    .    .    .    .    .    .   .   adcq	%rdx, %r9
+[0,28]    .    .  DeeeeeE--------R .    .    .    .    .    .    .    .    .    .    .   .   movq	(%r12), %rdx
+[0,29]    .    .  D=====eeeeeeeeeER.    .    .    .    .    .    .    .    .    .    .   .   mulxq	56(%rsi), %rdx, %rdi
+[0,30]    .    .   D============eER.    .    .    .    .    .    .    .    .    .    .   .   adcq	%rdx, %r8
+[0,31]    .    .   D=============eER    .    .    .    .    .    .    .    .    .    .   .   adcq	$0, %rdi
+[0,32]    .    .   D---------------R    .    .    .    .    .    .    .    .    .    .   .   xorl	%r13d, %r13d
+[0,33]    .    .   DeeeeeE---------R    .    .    .    .    .    .    .    .    .    .   .   movq	8(%r12), %rdx
+[0,34]    .    .    D====eeeeeeeeeER    .    .    .    .    .    .    .    .    .    .   .   mulxq	(%rsi), %r15, %r14
+[0,35]    .    .    D=======eE-----R    .    .    .    .    .    .    .    .    .    .   .   adoxq	%r15, %rcx
+[0,36]    .    .    D=============eER   .    .    .    .    .    .    .    .    .    .   .   adcxq	%r14, %rax
+[0,37]    .    .    .D=======eE-----R   .    .    .    .    .    .    .    .    .    .   .   movq	%rcx, -64(%rbp)
+[0,38]    .    .    .D====eeeeeeeeeER   .    .    .    .    .    .    .    .    .    .   .   mulxq	8(%rsi), %r14, %rcx
+[0,39]    .    .    .D=============eER  .    .    .    .    .    .    .    .    .    .   .   adoxq	%r14, %rax
+[0,40]    .    .    . D=============eER .    .    .    .    .    .    .    .    .    .   .   adcxq	%rcx, %rbx
+[0,41]    .    .    . D====eeeeeeeeeE-R .    .    .    .    .    .    .    .    .    .   .   mulxq	16(%rsi), %r14, %rcx
+[0,42]    .    .    . D==============eER.    .    .    .    .    .    .    .    .    .   .   adoxq	%r14, %rbx
+[0,43]    .    .    .  D==============eER    .    .    .    .    .    .    .    .    .   .   adcxq	%rcx, %r11
+[0,44]    .    .    .  D====eeeeeeeeeE--R    .    .    .    .    .    .    .    .    .   .   mulxq	24(%rsi), %r14, %rcx
+[0,45]    .    .    .  D===============eER   .    .    .    .    .    .    .    .    .   .   adoxq	%r14, %r11
+[0,46]    .    .    .   D===============eER  .    .    .    .    .    .    .    .    .   .   adcxq	%rcx, %r10
+[0,47]    .    .    .   D====eeeeeeeeeE---R  .    .    .    .    .    .    .    .    .   .   mulxq	32(%rsi), %r14, %rcx
+[0,48]    .    .    .   D================eER .    .    .    .    .    .    .    .    .   .   adoxq	%r14, %r10
+[0,49]    .    .    .    D================eER.    .    .    .    .    .    .    .    .   .   adcxq	%rcx, %r9
+[0,50]    .    .    .    D====eeeeeeeeeE----R.    .    .    .    .    .    .    .    .   .   mulxq	40(%rsi), %r14, %rcx
+[0,51]    .    .    .    D=================eER    .    .    .    .    .    .    .    .   .   adoxq	%r14, %r9
+[0,52]    .    .    .    .D=================eER   .    .    .    .    .    .    .    .   .   adcxq	%rcx, %r8
+[0,53]    .    .    .    .D====eeeeeeeeeE-----R   .    .    .    .    .    .    .    .   .   mulxq	48(%rsi), %r14, %rcx
+[0,54]    .    .    .    .D==================eER  .    .    .    .    .    .    .    .   .   adoxq	%r14, %r8
+[0,55]    .    .    .    . D==================eER .    .    .    .    .    .    .    .   .   adcxq	%rcx, %rdi
+[0,56]    .    .    .    . D====eeeeeeeeeE------R .    .    .    .    .    .    .    .   .   mulxq	56(%rsi), %rdx, %rcx
+[0,57]    .    .    .    . D===================eER.    .    .    .    .    .    .    .   .   adoxq	%rdx, %rdi
+[0,58]    .    .    .    .  D===================eER    .    .    .    .    .    .    .   .   adcxq	%r13, %rcx
+[0,59]    .    .    .    .  DeeeeeE---------------R    .    .    .    .    .    .    .   .   movq	16(%r12), %rdx
+[0,60]    .    .    .    .  D====================eER   .    .    .    .    .    .    .   .   adoxq	%r13, %rcx
+[0,61]    .    .    .    .   D====eeeeeeeeeE-------R   .    .    .    .    .    .    .   .   mulxq	(%rsi), %r15, %r14
+[0,62]    .    .    .    .   D---------------------R   .    .    .    .    .    .    .   .   xorl	%r13d, %r13d
+[0,63]    .    .    .    .   D=======eE------------R   .    .    .    .    .    .    .   .   adoxq	%r15, %rax
+[0,64]    .    .    .    .    D============eE------R   .    .    .    .    .    .    .   .   adcxq	%r14, %rbx
+[0,65]    .    .    .    .    D=======eE-----------R   .    .    .    .    .    .    .   .   movq	%rax, -72(%rbp)
+[0,66]    .    .    .    .    D====eeeeeeeeeE------R   .    .    .    .    .    .    .   .   mulxq	8(%rsi), %r14, %rax
+[0,67]    .    .    .    .    .D============eE-----R   .    .    .    .    .    .    .   .   adoxq	%r14, %rbx
+[0,68]    .    .    .    .    .D=============eE----R   .    .    .    .    .    .    .   .   adcxq	%rax, %r11
+[0,69]    .    .    .    .    .D====eeeeeeeeeE-----R   .    .    .    .    .    .    .   .   mulxq	16(%rsi), %r14, %rax
+[0,70]    .    .    .    .    . D=============eE---R   .    .    .    .    .    .    .   .   adoxq	%r14, %r11
+[0,71]    .    .    .    .    . D==============eE--R   .    .    .    .    .    .    .   .   adcxq	%rax, %r10
+[0,72]    .    .    .    .    . D====eeeeeeeeeE----R   .    .    .    .    .    .    .   .   mulxq	24(%rsi), %r14, %rax
+[0,73]    .    .    .    .    .  D==============eE-R   .    .    .    .    .    .    .   .   adoxq	%r14, %r10
+[0,74]    .    .    .    .    .  D===============eER   .    .    .    .    .    .    .   .   adcxq	%rax, %r9
+[0,75]    .    .    .    .    .  D====eeeeeeeeeE---R   .    .    .    .    .    .    .   .   mulxq	32(%rsi), %r14, %rax
+[0,76]    .    .    .    .    .   D===============eER  .    .    .    .    .    .    .   .   adoxq	%r14, %r9
+[0,77]    .    .    .    .    .   D================eER .    .    .    .    .    .    .   .   adcxq	%rax, %r8
+[0,78]    .    .    .    .    .   D====eeeeeeeeeE----R .    .    .    .    .    .    .   .   mulxq	40(%rsi), %r14, %rax
+[0,79]    .    .    .    .    .    D================eER.    .    .    .    .    .    .   .   adoxq	%r14, %r8
+[0,80]    .    .    .    .    .    D=================eER    .    .    .    .    .    .   .   adcxq	%rax, %rdi
+[0,81]    .    .    .    .    .    D====eeeeeeeeeE-----R    .    .    .    .    .    .   .   mulxq	48(%rsi), %r14, %rax
+[0,82]    .    .    .    .    .    .D=================eER   .    .    .    .    .    .   .   adoxq	%r14, %rdi
+[0,83]    .    .    .    .    .    .D==================eER  .    .    .    .    .    .   .   adcxq	%rax, %rcx
+[0,84]    .    .    .    .    .    .D====eeeeeeeeeE------R  .    .    .    .    .    .   .   mulxq	56(%rsi), %rdx, %rax
+[0,85]    .    .    .    .    .    . D==================eER .    .    .    .    .    .   .   adoxq	%rdx, %rcx
+[0,86]    .    .    .    .    .    . D===================eER.    .    .    .    .    .   .   adcxq	%r13, %rax
+[0,87]    .    .    .    .    .    . D====================eER    .    .    .    .    .   .   adoxq	%r13, %rax
+[0,88]    .    .    .    .    .    . D----------------------R    .    .    .    .    .   .   xorl	%r13d, %r13d
+[0,89]    .    .    .    .    .    . DeeeeeE----------------R    .    .    .    .    .   .   movq	24(%r12), %rdx
+[0,90]    .    .    .    .    .    .  D====eeeeeeeeeE-------R    .    .    .    .    .   .   mulxq	(%rsi), %r15, %r14
+[0,91]    .    .    .    .    .    .  D===========eE--------R    .    .    .    .    .   .   adoxq	%r15, %rbx
+[0,92]    .    .    .    .    .    .  D=============eE------R    .    .    .    .    .   .   adcxq	%r14, %r11
+[0,93]    .    .    .    .    .    .   D===========eE-------R    .    .    .    .    .   .   movq	%rbx, -80(%rbp)
+[0,94]    .    .    .    .    .    .   D=============eE-----R    .    .    .    .    .   .   movq	%r11, %r15
+[0,95]    .    .    .    .    .    .   D====eeeeeeeeeE------R    .    .    .    .    .   .   mulxq	8(%rsi), %r14, %rbx
+[0,96]    .    .    .    .    .    .    D=============eE----R    .    .    .    .    .   .   adoxq	%r14, %r15
+[0,97]    .    .    .    .    .    .    D==============eE---R    .    .    .    .    .   .   adcxq	%rbx, %r10
+[0,98]    .    .    .    .    .    .    D====eeeeeeeeeE-----R    .    .    .    .    .   .   mulxq	16(%rsi), %rbx, %r11
+[0,99]    .    .    .    .    .    .    .D==============eE--R    .    .    .    .    .   .   adoxq	%rbx, %r10
+[0,100]   .    .    .    .    .    .    .D===============eE-R    .    .    .    .    .   .   adcxq	%r11, %r9
+[0,101]   .    .    .    .    .    .    .D====eeeeeeeeeE----R    .    .    .    .    .   .   mulxq	24(%rsi), %rbx, %r11
+[0,102]   .    .    .    .    .    .    . D===============eER    .    .    .    .    .   .   adoxq	%rbx, %r9
+[0,103]   .    .    .    .    .    .    . D================eER   .    .    .    .    .   .   adcxq	%r11, %r8
+[0,104]   .    .    .    .    .    .    . D====eeeeeeeeeE----R   .    .    .    .    .   .   mulxq	32(%rsi), %rbx, %r11
+[0,105]   .    .    .    .    .    .    .  D================eER  .    .    .    .    .   .   adoxq	%rbx, %r8
+[0,106]   .    .    .    .    .    .    .  D=================eER .    .    .    .    .   .   adcxq	%r11, %rdi
+[0,107]   .    .    .    .    .    .    .  D====eeeeeeeeeE-----R .    .    .    .    .   .   mulxq	40(%rsi), %rbx, %r11
+[0,108]   .    .    .    .    .    .    .   D=================eER.    .    .    .    .   .   adoxq	%rbx, %rdi
+[0,109]   .    .    .    .    .    .    .   D==================eER    .    .    .    .   .   adcxq	%r11, %rcx
+[0,110]   .    .    .    .    .    .    .   D====eeeeeeeeeE------R    .    .    .    .   .   mulxq	48(%rsi), %rbx, %r11
+[0,111]   .    .    .    .    .    .    .    D==================eER   .    .    .    .   .   adoxq	%rbx, %rcx
+[0,112]   .    .    .    .    .    .    .    D===================eER  .    .    .    .   .   adcxq	%r11, %rax
+[0,113]   .    .    .    .    .    .    .    D====eeeeeeeeeE-------R  .    .    .    .   .   mulxq	56(%rsi), %rdx, %r11
+[0,114]   .    .    .    .    .    .    .    .D===================eER .    .    .    .   .   adoxq	%rdx, %rax
+[0,115]   .    .    .    .    .    .    .    .D====================eER.    .    .    .   .   adcxq	%r13, %r11
+[0,116]   .    .    .    .    .    .    .    .DeeeeeE----------------R.    .    .    .   .   movq	32(%r12), %rdx
+[0,117]   .    .    .    .    .    .    .    .D=====================eER    .    .    .   .   adoxq	%r13, %r11
+[0,118]   .    .    .    .    .    .    .    .D=====E-----------------R    .    .    .   .   xorl	%ebx, %ebx
+[0,119]   .    .    .    .    .    .    .    . D====eeeeeeeeeE--------R    .    .    .   .   mulxq	(%rsi), %r14, %r13
+[0,120]   .    .    .    .    .    .    .    . D===========eE---------R    .    .    .   .   adoxq	%r14, %r15
+[0,121]   .    .    .    .    .    .    .    . D=============eE-------R    .    .    .   .   adcxq	%r13, %r10
+[0,122]   .    .    .    .    .    .    .    .  D===========eE--------R    .    .    .   .   movq	%r15, -88(%rbp)
+[0,123]   .    .    .    .    .    .    .    .  D====eeeeeeeeeE-------R    .    .    .   .   mulxq	8(%rsi), %r14, %r13
+[0,124]   .    .    .    .    .    .    .    .  D=============eE------R    .    .    .   .   movq	%r10, %r15
+[0,125]   .    .    .    .    .    .    .    .   D============eE------R    .    .    .   .   adcxq	%r13, %r9
+[0,126]   .    .    .    .    .    .    .    .   D=============eE-----R    .    .    .   .   adoxq	%r14, %r15
+[0,127]   .    .    .    .    .    .    .    .   D====eeeeeeeeeE------R    .    .    .   .   mulxq	16(%rsi), %r13, %r10
+[0,128]   .    .    .    .    .    .    .    .    D=============eE----R    .    .    .   .   adoxq	%r13, %r9
+[0,129]   .    .    .    .    .    .    .    .    D==============eE---R    .    .    .   .   adcxq	%r10, %r8
+[0,130]   .    .    .    .    .    .    .    .    D====eeeeeeeeeE-----R    .    .    .   .   mulxq	24(%rsi), %r13, %r10
+[0,131]   .    .    .    .    .    .    .    .    .D==============eE--R    .    .    .   .   adcxq	%r10, %rdi
+[0,132]   .    .    .    .    .    .    .    .    .D===============eE-R    .    .    .   .   adoxq	%r13, %r8
+[0,133]   .    .    .    .    .    .    .    .    .D====eeeeeeeeeE----R    .    .    .   .   mulxq	32(%rsi), %r13, %r10
+[0,134]   .    .    .    .    .    .    .    .    . D===============eER    .    .    .   .   adoxq	%r13, %rdi
+[0,135]   .    .    .    .    .    .    .    .    . D================eER   .    .    .   .   adcxq	%r10, %rcx
+[0,136]   .    .    .    .    .    .    .    .    . D====eeeeeeeeeE----R   .    .    .   .   mulxq	40(%rsi), %r13, %r10
+[0,137]   .    .    .    .    .    .    .    .    .  D================eER  .    .    .   .   adoxq	%r13, %rcx
+[0,138]   .    .    .    .    .    .    .    .    .  D=================eER .    .    .   .   adcxq	%r10, %rax
+[0,139]   .    .    .    .    .    .    .    .    .  D====eeeeeeeeeE-----R .    .    .   .   mulxq	48(%rsi), %r13, %r10
+[0,140]   .    .    .    .    .    .    .    .    .   D=================eER.    .    .   .   adoxq	%r13, %rax
+[0,141]   .    .    .    .    .    .    .    .    .   D==================eER    .    .   .   adcxq	%r10, %r11
+[0,142]   .    .    .    .    .    .    .    .    .   D====eeeeeeeeeE------R    .    .   .   mulxq	56(%rsi), %rdx, %r10
+[0,143]   .    .    .    .    .    .    .    .    .    D==================eER   .    .   .   adoxq	%rdx, %r11
+[0,144]   .    .    .    .    .    .    .    .    .    D===================eER  .    .   .   adcxq	%rbx, %r10
+[0,145]   .    .    .    .    .    .    .    .    .    DeeeeeE---------------R  .    .   .   movq	40(%r12), %rdx
+[0,146]   .    .    .    .    .    .    .    .    .    D====================eER .    .   .   adoxq	%rbx, %r10
+[0,147]   .    .    .    .    .    .    .    .    .    .D====eeeeeeeeeE-------R .    .   .   mulxq	(%rsi), %r14, %r13
+[0,148]   .    .    .    .    .    .    .    .    .    .D---------------------R .    .   .   xorl	%ebx, %ebx
+[0,149]   .    .    .    .    .    .    .    .    .    .D============eE-------R .    .   .   adoxq	%r14, %r15
+[0,150]   .    .    .    .    .    .    .    .    .    . D============eE------R .    .   .   movq	%r15, -96(%rbp)
+[0,151]   .    .    .    .    .    .    .    .    .    . D============eE------R .    .   .   adcxq	%r13, %r9
+[0,152]   .    .    .    .    .    .    .    .    .    . D=====eeeeeeeeeE-----R .    .   .   mulxq	8(%rsi), %r14, %r13
+[0,153]   .    .    .    .    .    .    .    .    .    .  D============eE-----R .    .   .   movq	%r9, %r15
+[0,154]   .    .    .    .    .    .    .    .    .    .  D=============eE----R .    .   .   adoxq	%r14, %r15
+[0,155]   .    .    .    .    .    .    .    .    .    .  D==============eE---R .    .   .   adcxq	%r13, %r8
+[0,156]   .    .    .    .    .    .    .    .    .    .   D====eeeeeeeeeE----R .    .   .   mulxq	16(%rsi), %r13, %r9
+[0,157]   .    .    .    .    .    .    .    .    .    .   D==============eE--R .    .   .   adoxq	%r13, %r8
+[0,158]   .    .    .    .    .    .    .    .    .    .   D===============eE-R .    .   .   adcxq	%r9, %rdi
+[0,159]   .    .    .    .    .    .    .    .    .    .    D====eeeeeeeeeE---R .    .   .   mulxq	24(%rsi), %r13, %r9
+[0,160]   .    .    .    .    .    .    .    .    .    .    D===============eER .    .   .   adoxq	%r13, %rdi
+[0,161]   .    .    .    .    .    .    .    .    .    .    D================eER.    .   .   adcxq	%r9, %rcx
+[0,162]   .    .    .    .    .    .    .    .    .    .    .D====eeeeeeeeeE---R.    .   .   mulxq	32(%rsi), %r13, %r9
+[0,163]   .    .    .    .    .    .    .    .    .    .    .D================eER    .   .   adoxq	%r13, %rcx
+[0,164]   .    .    .    .    .    .    .    .    .    .    .D=================eER   .   .   adcxq	%r9, %rax
+[0,165]   .    .    .    .    .    .    .    .    .    .    . D====eeeeeeeeeE----R   .   .   mulxq	40(%rsi), %r13, %r9
+[0,166]   .    .    .    .    .    .    .    .    .    .    . D=================eER  .   .   adoxq	%r13, %rax
+[0,167]   .    .    .    .    .    .    .    .    .    .    . D==================eER .   .   adcxq	%r9, %r11
+[0,168]   .    .    .    .    .    .    .    .    .    .    .  D====eeeeeeeeeE-----R .   .   mulxq	48(%rsi), %r13, %r9
+[0,169]   .    .    .    .    .    .    .    .    .    .    .  D==================eER.   .   adoxq	%r13, %r11
+[0,170]   .    .    .    .    .    .    .    .    .    .    .  D===================eER   .   adcxq	%r9, %r10
+[0,171]   .    .    .    .    .    .    .    .    .    .    .   D====eeeeeeeeeE------R   .   mulxq	56(%rsi), %rdx, %r9
+[0,172]   .    .    .    .    .    .    .    .    .    .    .   D===================eER  .   adoxq	%rdx, %r10
+[0,173]   .    .    .    .    .    .    .    .    .    .    .   D====================eER .   adcxq	%rbx, %r9
+[0,174]   .    .    .    .    .    .    .    .    .    .    .    D====================eER.   adoxq	%rbx, %r9
+[0,175]   .    .    .    .    .    .    .    .    .    .    .    D----------------------R.   xorl	%ebx, %ebx
+[0,176]   .    .    .    .    .    .    .    .    .    .    .    DeeeeeE----------------R.   movq	48(%r12), %rdx
+[0,177]   .    .    .    .    .    .    .    .    .    .    .    .D=====eeeeeeeeeE------R.   mulxq	(%rsi), %r14, %r13
+[0,178]   .    .    .    .    .    .    .    .    .    .    .    .D==========eE---------R.   adoxq	%r14, %r15
+[0,179]   .    .    .    .    .    .    .    .    .    .    .    .D==============eE-----R.   adcxq	%r13, %r8
+[0,180]   .    .    .    .    .    .    .    .    .    .    .    . D==========eE--------R.   movq	%r15, -104(%rbp)
+[0,181]   .    .    .    .    .    .    .    .    .    .    .    . D=====eeeeeeeeeE-----R.   mulxq	8(%rsi), %r14, %r13
+[0,182]   .    .    .    .    .    .    .    .    .    .    .    . D==============eE----R.   movq	%r8, %r15
+[0,183]   .    .    .    .    .    .    .    .    .    .    .    .  D==============eE---R.   adcxq	%r13, %rdi
+[0,184]   .    .    .    .    .    .    .    .    .    .    .    .  D===============eE--R.   adoxq	%r14, %r15
+[0,185]   .    .    .    .    .    .    .    .    .    .    .    .  D=====eeeeeeeeeE----R.   mulxq	16(%rsi), %r13, %r8
+[0,186]   .    .    .    .    .    .    .    .    .    .    .    .   D===============eE-R.   adoxq	%r13, %rdi
+[0,187]   .    .    .    .    .    .    .    .    .    .    .    .   D================eER.   adcxq	%r8, %rcx
+[0,188]   .    .    .    .    .    .    .    .    .    .    .    .   D=====eeeeeeeeeE---R.   mulxq	24(%rsi), %r13, %r8
+[0,189]   .    .    .    .    .    .    .    .    .    .    .    .    D================eER   adoxq	%r13, %rcx
diff --git a/libc/nexgen32e/sub.S b/libc/nexgen32e/sub.S
deleted file mode 100644
index b065b90ff..000000000
--- a/libc/nexgen32e/sub.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8     -*-│
-│vi: set et ft=asm ts=8 tw=8 fenc=utf-8                                     :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2021 Justine Alexandra Roberts Tunney                              │
-│                                                                              │
-│ Permission to use, copy, modify, and/or distribute this software for         │
-│ any purpose with or without fee is hereby granted, provided that the         │
-│ above copyright notice and this permission notice appear in all copies.      │
-│                                                                              │
-│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
-│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
-│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
-│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
-│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
-│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
-│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
-│ PERFORMANCE OF THIS SOFTWARE.                                                │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/macros.internal.h"
-
-//	Computes C = A - B
-//
-//	Aliasing such as sbb(A,A,B) or sbb(B,A,B) is OK.
-//
-//	@param	rdi is C
-//	@param	rsi is A
-//	@param	rdx is B
-//	@param	rcx is number of subtracts
-//	@return	al  is carry
-sbb:	.leafprologue
-	test	%ecx,%ecx
-	jz	1f
-	xor	%r9d,%r9d
-0:	mov	(%rsi,%r9,8),%rax
-	sbb	(%rdx,%r9,8),%rax
-	mov	%rax,(%rdi,%r9,8)
-	inc	%r9d
-	loop	0b
-1:	setb	%al
-	.leafepilogue
-	.endfn	sbb,globl
diff --git a/test/net/https/mbedtls_test.c b/test/net/https/mbedtls_test.c
index 3cb664029..b8a52b6af 100644
--- a/test/net/https/mbedtls_test.c
+++ b/test/net/https/mbedtls_test.c
@@ -39,6 +39,7 @@
 #include "third_party/mbedtls/des.h"
 #include "third_party/mbedtls/dhm.h"
 #include "third_party/mbedtls/ecp.h"
+#include "third_party/mbedtls/ecp_internal.h"
 #include "third_party/mbedtls/entropy.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/gcm.h"
@@ -148,17 +149,17 @@ static void P256_MPI(mbedtls_mpi *N) {
 
 static void P256_JUSTINE(mbedtls_mpi *N) {
   memcpy(N->p, rng, 8 * 8);
-  ecp_mod_p256(N);
+  secp256r1(N->p);
 }
 
 static void P384_MPI(mbedtls_mpi *N) {
-  memcpy(N->p, rng, 8 * 8);
+  memcpy(N->p, rng, 12 * 8);
   ASSERT_EQ(0, mbedtls_mpi_mod_mpi(N, N, &grp.P));
 }
 
 static void P384_JUSTINE(mbedtls_mpi *N) {
-  memcpy(N->p, rng, 8 * 8);
-  ecp_mod_p384(N);
+  memcpy(N->p, rng, 12 * 8);
+  secp384r1(N->p);
 }
 
 BENCH(p256, bench) {
@@ -166,6 +167,7 @@ BENCH(p256, bench) {
   mbedtls_ecp_group_init(&grp);
   mbedtls_ecp_group_load(&grp, MBEDTLS_ECP_DP_SECP256R1);
   mbedtls_mpi x = {1, 8, gc(calloc(8, 8))};
+  rngset(x.p, 8 * 8, rand64, -1);
   EZBENCH2("P-256 modulus MbedTLS MPI lib", donothing, P256_MPI(&x));
   EZBENCH2("P-256 modulus Justine rewrite", donothing, P256_JUSTINE(&x));
   mbedtls_ecp_group_free(&grp);
@@ -176,10 +178,10 @@ BENCH(p384, bench) {
 #ifdef MBEDTLS_ECP_C
   mbedtls_ecp_group_init(&grp);
   mbedtls_ecp_group_load(&grp, MBEDTLS_ECP_DP_SECP384R1);
+  uint64_t y[12];
   mbedtls_mpi x = {1, 12, gc(calloc(12, 8))};
   EZBENCH2("P-384 modulus MbedTLS MPI lib", donothing, P384_MPI(&x));
   EZBENCH2("P-384 modulus Justine rewrite", donothing, P384_JUSTINE(&x));
-  rngset(x.p, 12 * 8, rand64, -1);
   mbedtls_ecp_group_free(&grp);
 #endif
 }
@@ -1112,3 +1114,49 @@ BENCH(cmpint, bench) {
   EZBENCH2("cmpint 3.1", donothing, mbedtls_mpi_cmp_int(&z, 0));
   EZBENCH2("cmpint 3.2", donothing, mbedtls_mpi_cmp_int(&z, 1));
 }
+
+mbedtls_mpi_uint F1(mbedtls_mpi_uint *d, const mbedtls_mpi_uint *a,
+                    const mbedtls_mpi_uint *b, size_t n) {
+  size_t i;
+  unsigned char cf;
+  mbedtls_mpi_uint c, x;
+  cf = c = i = 0;
+  for (; i < n; ++i) SBB(d[i], a[i], b[i], c, c);
+  return c;
+}
+
+mbedtls_mpi_uint F2(mbedtls_mpi_uint *d, const mbedtls_mpi_uint *a,
+                    const mbedtls_mpi_uint *b, size_t n) {
+  size_t i;
+  unsigned char cf;
+  mbedtls_mpi_uint c, x;
+  cf = c = i = 0;
+  asm volatile("xor\t%1,%1\n\t"
+               ".align\t16\n1:\t"
+               "mov\t(%5,%3,8),%1\n\t"
+               "sbb\t(%6,%3,8),%1\n\t"
+               "mov\t%1,(%4,%3,8)\n\t"
+               "lea\t1(%3),%3\n\t"
+               "dec\t%2\n\t"
+               "jnz\t1b"
+               : "=@ccb"(cf), "=&r"(x), "+c"(n), "=r"(i)
+               : "r"(d), "r"(a), "r"(b), "3"(0)
+               : "cc", "memory");
+  return cf;
+}
+
+TEST(wut, wut) {
+  uint64_t A[8];
+  uint64_t B[8];
+  uint64_t C[8];
+  uint64_t D[8];
+  int i;
+  for (i = 0; i < 1000; ++i) {
+    rngset(A, sizeof(A), rand64, -1);
+    rngset(B, sizeof(B), rand64, -1);
+    int x = F1(C, A, B, 8);
+    int y = F2(D, A, B, 8);
+    ASSERT_EQ(x, y);
+    ASSERT_EQ(0, memcmp(C, D, sizeof(C)));
+  }
+}
diff --git a/third_party/mbedtls/bignum.c b/third_party/mbedtls/bignum.c
index 6dc7879fa..b8b1df68a 100644
--- a/third_party/mbedtls/bignum.c
+++ b/third_party/mbedtls/bignum.c
@@ -26,7 +26,6 @@
 #include "libc/nexgen32e/nexgen32e.h"
 #include "libc/nexgen32e/x86feature.h"
 #include "libc/runtime/runtime.h"
-#include "libc/stdio/stdio.h"
 #include "third_party/mbedtls/bignum.h"
 #include "third_party/mbedtls/bignum_internal.h"
 #include "third_party/mbedtls/chk.h"
@@ -65,20 +64,10 @@ asm(".include \"libc/disclaimer.inc\"");
 
 #if defined(MBEDTLS_BIGNUM_C)
 
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-#define mpi_uint_bigendian_to_host(x) (x)
-#elif __SIZEOF_LONG__ == 8
-#define mpi_uint_bigendian_to_host(x) __builtin_bswap64(x)
-#elif __SIZEOF_LONG__ == 4
-#define mpi_uint_bigendian_to_host(x) __builtin_bswap32(x)
-#endif
-
-/* Get a specific byte, without range checks. */
-#define GET_BYTE(X, i) (((X)->p[(i) / ciL] >> (((i) % ciL) * 8)) & 0xff)
-
-static inline void mbedtls_mpi_zeroize(mbedtls_mpi_uint *v, size_t n)
+/* Implementation that should never be optimized out by the compiler */
+static void mbedtls_mpi_zeroize( mbedtls_mpi_uint *v, size_t n )
 {
-    mbedtls_platform_zeroize(v, ciL * n);
+    mbedtls_platform_zeroize( v, ciL * n );
 }
 
 /**
@@ -88,15 +77,18 @@ static inline void mbedtls_mpi_zeroize(mbedtls_mpi_uint *v, size_t n)
  *                 in which case this function is a no-op. If it is
  *                 not \c NULL, it must point to an initialized MPI.
  */
-void mbedtls_mpi_free(mbedtls_mpi *X)
+void mbedtls_mpi_free( mbedtls_mpi *X )
 {
-    if (!X) return;
-    if (X->p)
+    if( !X )
+        return;
+    if( X->p )
     {
-        mbedtls_mpi_zeroize(X->p, X->n);
-        mbedtls_free(X->p);
+        mbedtls_mpi_zeroize( X->p, X->n );
+        mbedtls_free( X->p );
     }
-    mbedtls_mpi_init(X);
+    X->s = 1;
+    X->n = 0;
+    X->p = NULL;
 }
 
 /**
@@ -216,28 +208,35 @@ int mbedtls_mpi_shrink(mbedtls_mpi *X, size_t nblimbs)
  * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed.
  * \return         Another negative error code on other kinds of failure.
  */
-int mbedtls_mpi_copy(mbedtls_mpi *X, const mbedtls_mpi *Y)
+int mbedtls_mpi_copy( mbedtls_mpi *X, const mbedtls_mpi *Y )
 {
     int ret = 0;
     size_t i;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(Y);
-    if (X == Y)
-        return 0;
-    if (!Y->n)
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( Y );
+    if( X == Y )
+        return( 0 );
+    if( Y->n == 0 )
     {
-        mbedtls_mpi_free(X);
-        return 0;
+        mbedtls_mpi_free( X );
+        return( 0 );
     }
-    i = MAX(1, mbedtls_mpi_limbs(Y));
+    for( i = Y->n - 1; i > 0; i-- )
+        if( Y->p[i] != 0 )
+            break;
+    i++;
     X->s = Y->s;
-    if (X->n < i)
-        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, i));
+    if( X->n < i )
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i ) );
+    }
     else
-        mbedtls_mpi_zeroize(X->p + i, X->n - i);
-    memcpy(X->p, Y->p, i * ciL);
+    {
+        mbedtls_platform_zeroize( X->p + i, ( X->n - i ) * ciL );
+    }
+    memcpy( X->p, Y->p, i * ciL );
 cleanup:
-    return ret;
+    return( ret );
 }
 
 /**
@@ -246,14 +245,14 @@ cleanup:
  * \param X        The first MPI. It must be initialized.
  * \param Y        The second MPI. It must be initialized.
  */
-void mbedtls_mpi_swap(mbedtls_mpi *X, mbedtls_mpi *Y)
+void mbedtls_mpi_swap( mbedtls_mpi *X, mbedtls_mpi *Y )
 {
     mbedtls_mpi T;
-    MPI_VALIDATE(X);
-    MPI_VALIDATE(Y);
-    memcpy(&T, X, sizeof(mbedtls_mpi));
-    memcpy(X, Y, sizeof(mbedtls_mpi));
-    memcpy(Y, &T, sizeof(mbedtls_mpi));
+    MPI_VALIDATE( X );
+    MPI_VALIDATE( Y );
+    memcpy( &T,  X, sizeof( mbedtls_mpi ) );
+    memcpy(  X,  Y, sizeof( mbedtls_mpi ) );
+    memcpy(  Y, &T, sizeof( mbedtls_mpi ) );
 }
 
 /**
@@ -289,7 +288,8 @@ int mbedtls_mpi_safe_cond_assign(mbedtls_mpi *X,
     MPI_VALIDATE_RET(X);
     MPI_VALIDATE_RET(Y);
     /* make sure assign is 0 or 1 in a time-constant manner */
-    if (Y->n > X->n) MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, Y->n));
+    if (Y->n > X->n)
+        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, Y->n ) );
     assign = (assign | (unsigned char)-assign) >> 7;
     X->s = Select(Y->s, X->s, -assign);
     for (i = 0; i < Y->n; i++)
@@ -297,7 +297,7 @@ int mbedtls_mpi_safe_cond_assign(mbedtls_mpi *X,
     for (i = Y->n; i < X->n; i++)
         X->p[i] &= CONCEAL("r", assign - 1);
 cleanup:
-    return ret;
+    return( ret );
 }
 
 /**
@@ -323,31 +323,30 @@ cleanup:
  * \return         Another negative error code on other kinds of failure.
  *
  */
-int mbedtls_mpi_safe_cond_swap(mbedtls_mpi *X,
-                               mbedtls_mpi *Y,
-                               unsigned char swap)
+int mbedtls_mpi_safe_cond_swap( mbedtls_mpi *X, mbedtls_mpi *Y, unsigned char swap )
 {
     int ret, s;
     size_t i;
     mbedtls_mpi_uint tmp;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(Y);
-    if (X == Y) return (0);
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( Y );
+    if( X == Y )
+        return( 0 );
     /* make sure swap is 0 or 1 in a time-constant manner */
     swap = (swap | (unsigned char)-swap) >> 7;
-    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, Y->n));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(Y, X->n));
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, Y->n ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( Y, X->n ) );
     s = X->s;
-    X->s = X->s * (1 - swap) + Y->s * swap;
-    Y->s = Y->s * (1 - swap) + s * swap;
-    for (i = 0; i < X->n; i++)
+    X->s = X->s * ( 1 - swap ) + Y->s * swap;
+    Y->s = Y->s * ( 1 - swap ) +    s * swap;
+    for( i = 0; i < X->n; i++ )
     {
         tmp = X->p[i];
-        X->p[i] = X->p[i] * (1 - swap) + Y->p[i] * swap;
-        Y->p[i] = Y->p[i] * (1 - swap) + tmp * swap;
+        X->p[i] = X->p[i] * ( 1 - swap ) + Y->p[i] * swap;
+        Y->p[i] = Y->p[i] * ( 1 - swap ) +     tmp * swap;
     }
 cleanup:
-    return ret;
+    return( ret );
 }
 
 /**
@@ -360,16 +359,16 @@ cleanup:
  * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed.
  * \return         Another negative error code on other kinds of failure.
  */
-int mbedtls_mpi_lset(mbedtls_mpi *X, mbedtls_mpi_sint z)
+int mbedtls_mpi_lset( mbedtls_mpi *X, mbedtls_mpi_sint z )
 {
     int ret = MBEDTLS_ERR_THIS_CORRUPTION;
-    MPI_VALIDATE_RET(X);
-    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, 1));
-    mbedtls_mpi_zeroize(X->p, X->n);
-    X->p[0] = (z < 0) ? -z : z;
-    X->s = (z < 0) ? -1 : 1;
+    MPI_VALIDATE_RET( X );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, 1 ) );
+    mbedtls_platform_zeroize( X->p, X->n * ciL );
+    X->p[0] = ( z < 0 ) ? -z : z;
+    X->s    = ( z < 0 ) ? -1 : 1;
 cleanup:
-    return ret;
+    return( ret );
 }
 
 /**
@@ -382,13 +381,18 @@ cleanup:
  *                 of \c X is unset or set.
  * \return         A negative error code on failure.
  */
-int mbedtls_mpi_get_bit(const mbedtls_mpi *X, size_t pos)
+int mbedtls_mpi_get_bit( const mbedtls_mpi *X, size_t pos )
 {
-    MPI_VALIDATE_RET(X);
-    if (X->n * biL <= pos) return 0;
-    return ((X->p[pos / biL] >> (pos % biL)) & 0x01);
+    MPI_VALIDATE_RET( X );
+    if( X->n * biL <= pos )
+        return( 0 );
+    return( ( X->p[pos / biL] >> ( pos % biL ) ) & 0x01 );
 }
 
+/* Get a specific byte, without range checks. */
+#define GET_BYTE( X, i )                                \
+    ( ( ( X )->p[( i ) / ciL] >> ( ( ( i ) % ciL ) * 8 ) ) & 0xff )
+
 /**
  * \brief          Modify a specific bit in an MPI.
  *
@@ -404,23 +408,24 @@ int mbedtls_mpi_get_bit(const mbedtls_mpi *X, size_t pos)
  * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if memory allocation failed.
  * \return         Another negative error code on other kinds of failure.
  */
-int mbedtls_mpi_set_bit(mbedtls_mpi *X, size_t pos, unsigned char val)
+int mbedtls_mpi_set_bit( mbedtls_mpi *X, size_t pos, unsigned char val )
 {
     int ret = 0;
     size_t off = pos / biL;
     size_t idx = pos % biL;
-    MPI_VALIDATE_RET(X);
-    if (val && val != 1)
-        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
-    if (X->n * biL <= pos)
+    MPI_VALIDATE_RET( X );
+    if( val != 0 && val != 1 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    if( X->n * biL <= pos )
     {
-        if (!val) return 0;
-        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, off + 1));
+        if( !val )
+            return( 0 );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, off + 1 ) );
     }
-    X->p[off] &= ~((mbedtls_mpi_uint)0x01 << idx);
-    X->p[off] |= (mbedtls_mpi_uint)val << idx;
+    X->p[off] &= ~( (mbedtls_mpi_uint) 0x01 << idx );
+    X->p[off] |= (mbedtls_mpi_uint) val << idx;
 cleanup:
-    return ret;
+    return( ret );
 }
 
 /**
@@ -435,13 +440,13 @@ cleanup:
  * \return         The number of bits of value \c 0 before the least significant
  *                 bit of value \c 1 in \p X.
  */
-size_t mbedtls_mpi_lsb(const mbedtls_mpi *X)
+size_t mbedtls_mpi_lsb( const mbedtls_mpi *X )
 {
     size_t i, j, count = 0;
     MBEDTLS_INTERNAL_VALIDATE_RET(X, 0);
-    for (i = 0; i < X->n; i++)
+    for( i = 0; i < X->n; i++ )
     {
-        if (X->p[i])
+        if ( X->p[i] )
             return count + __builtin_ctzll(X->p[i]);
         else
             count += biL;
@@ -452,7 +457,7 @@ size_t mbedtls_mpi_lsb(const mbedtls_mpi *X)
 /*
  * Count leading zero bits in a given integer
  */
-static inline size_t mbedtls_clz(const mbedtls_mpi_uint x)
+static inline size_t mbedtls_clz( const mbedtls_mpi_uint x )
 {
     return x ? __builtin_clzll(x) : biL;
 }
@@ -490,23 +495,23 @@ size_t mbedtls_mpi_bitlen(const mbedtls_mpi *X)
  * \return         The least number of bytes capable of storing
  *                 the absolute value of \p X.
  */
-size_t mbedtls_mpi_size(const mbedtls_mpi *X)
+size_t mbedtls_mpi_size( const mbedtls_mpi *X )
 {
-    return (mbedtls_mpi_bitlen(X) + 7) >> 3;
+    return( ( mbedtls_mpi_bitlen( X ) + 7 ) >> 3 );
 }
 
 /*
  * Convert an ASCII character to digit value
  */
-static int mpi_get_digit(mbedtls_mpi_uint *d, int radix, char c)
+static int mpi_get_digit( mbedtls_mpi_uint *d, int radix, char c )
 {
     *d = 255;
-    if (c >= 0x30 && c <= 0x39) *d = c - 0x30;
-    if (c >= 0x41 && c <= 0x46) *d = c - 0x37;
-    if (c >= 0x61 && c <= 0x66) *d = c - 0x57;
-    if (*d >= (mbedtls_mpi_uint)radix)
-        return MBEDTLS_ERR_MPI_INVALID_CHARACTER;
-    return 0;
+    if( c >= 0x30 && c <= 0x39 ) *d = c - 0x30;
+    if( c >= 0x41 && c <= 0x46 ) *d = c - 0x37;
+    if( c >= 0x61 && c <= 0x66 ) *d = c - 0x57;
+    if( *d >= (mbedtls_mpi_uint) radix )
+        return( MBEDTLS_ERR_MPI_INVALID_CHARACTER );
+    return( 0 );
 }
 
 /**
@@ -519,87 +524,94 @@ static int mpi_get_digit(mbedtls_mpi_uint *d, int radix, char c)
  * \return         \c 0 if successful.
  * \return         A negative error code on failure.
  */
-int mbedtls_mpi_read_string(mbedtls_mpi *X, int radix, const char *s)
+int mbedtls_mpi_read_string( mbedtls_mpi *X, int radix, const char *s )
 {
     int ret = MBEDTLS_ERR_THIS_CORRUPTION;
     size_t i, j, slen, n;
     mbedtls_mpi_uint d;
     mbedtls_mpi T;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(s);
-    if (radix < 2 || radix > 16)
-        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
-    mbedtls_mpi_init(&T);
-    slen = strlen(s);
-    if (radix == 16)
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( s );
+    if( radix < 2 || radix > 16 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    mbedtls_mpi_init( &T );
+    slen = strlen( s );
+    if( radix == 16 )
     {
-        if (slen > MPI_SIZE_T_MAX >> 2)
-            return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
-        n = BITS_TO_LIMBS(slen << 2);
-        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, n));
-        MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 0));
-        for (i = slen, j = 0; i > 0; i--, j++)
+        if( slen > MPI_SIZE_T_MAX >> 2 )
+            return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+        n = BITS_TO_LIMBS( slen << 2 );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, n ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) );
+        for( i = slen, j = 0; i > 0; i--, j++ )
         {
-            if (i == 1 && s[i - 1] == '-')
+            if( i == 1 && s[i - 1] == '-' )
             {
                 X->s = -1;
                 break;
             }
-            MBEDTLS_MPI_CHK(mpi_get_digit(&d, radix, s[i - 1]));
-            X->p[j / (2 * ciL)] |= d << ((j % (2 * ciL)) << 2);
+            MBEDTLS_MPI_CHK( mpi_get_digit( &d, radix, s[i - 1] ) );
+            X->p[j / ( 2 * ciL )] |= d << ( ( j % ( 2 * ciL ) ) << 2 );
         }
     }
     else
     {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 0));
-        for (i = 0; i < slen; i++)
+        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) );
+        for( i = 0; i < slen; i++ )
         {
-            if (!i && s[i] == '-')
+            if( i == 0 && s[i] == '-' )
             {
                 X->s = -1;
                 continue;
             }
-            MBEDTLS_MPI_CHK(mpi_get_digit(&d, radix, s[i]));
-            MBEDTLS_MPI_CHK(mbedtls_mpi_mul_int(&T, X, radix));
-            if (X->s == 1)
-                MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(X, &T, d));
+            MBEDTLS_MPI_CHK( mpi_get_digit( &d, radix, s[i] ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &T, X, radix ) );
+            if( X->s == 1 )
+            {
+                MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, &T, d ) );
+            }
             else
-                MBEDTLS_MPI_CHK(mbedtls_mpi_sub_int(X, &T, d));
+            {
+                MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int( X, &T, d ) );
+            }
         }
     }
 cleanup:
-    mbedtls_mpi_free(&T);
-    return ret;
+    mbedtls_mpi_free( &T );
+    return( ret );
 }
 
 /*
  * Helper to write the digits high-order first.
  */
-static int mpi_write_hlp(mbedtls_mpi *X, int radix, char **p,
-                         const size_t buflen)
+static int mpi_write_hlp( mbedtls_mpi *X, int radix,
+                          char **p, const size_t buflen )
 {
     int ret = MBEDTLS_ERR_THIS_CORRUPTION;
     mbedtls_mpi_uint r;
     size_t length = 0;
     char *p_end = *p + buflen;
-    do {
-        if (length >= buflen)
-            return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
-        MBEDTLS_MPI_CHK(mbedtls_mpi_mod_int(&r, X, radix));
-        MBEDTLS_MPI_CHK(mbedtls_mpi_div_int(X, NULL, X, radix));
+    do
+    {
+        if( length >= buflen )
+        {
+            return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
+        }
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_int( &r, X, radix ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_div_int( X, NULL, X, radix ) );
         /*
          * Write the residue in the current position, as an ASCII character.
          */
-        if (r < 0xA)
-            *(--p_end) = (char)('0' + r);
+        if( r < 0xA )
+            *(--p_end) = (char)( '0' + r );
         else
-            *(--p_end) = (char)('A' + (r - 0xA));
+            *(--p_end) = (char)( 'A' + ( r - 0xA ) );
         length++;
-    } while (!mbedtls_mpi_is_zero(X));
-    memmove(*p, p_end, length);
+    } while( mbedtls_mpi_cmp_int( X, 0 ) != 0 );
+    memmove( *p, p_end, length );
     *p += length;
 cleanup:
-    return ret;
+    return( ret );
 }
 
 /**
@@ -624,74 +636,75 @@ cleanup:
  *                 size of \p buf required for a successful call.
  * \return         Another negative error code on different kinds of failure.
  */
-int mbedtls_mpi_write_string(const mbedtls_mpi *X, int radix, char *buf,
-                             size_t buflen, size_t *olen)
+int mbedtls_mpi_write_string( const mbedtls_mpi *X, int radix,
+                              char *buf, size_t buflen, size_t *olen )
 {
     int ret = 0;
     size_t n;
     char *p;
     mbedtls_mpi T;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(olen);
-    MPI_VALIDATE_RET(!buflen || buf);
-    if (radix < 2 || radix > 16)
-        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
-    n = mbedtls_mpi_bitlen(X); /* Number of bits necessary to present `n`. */
-    if (radix >= 4)
-        n >>= 1; /* Number of 4-adic digits necessary to present
-                  * `n`. If radix > 4, this might be a strict
-                  * overapproximation of the number of
-                  * radix-adic digits needed to present `n`. */
-    if (radix >= 16)
-        n >>= 1;    /* Number of hexadecimal digits necessary to
-                     * present `n`. */
-    n += 1;       /* Terminating null byte */
-    n += 1;       /* Compensate for the divisions above, which round down `n`
-                   * in case it's not even. */
-    n += 1;       /* Potential '-'-sign. */
-    n += (n & 1); /* Make n even to have enough space for hexadecimal writing,
-                   * which always uses an even number of hex-digits. */
-    if (buflen < n)
+    MPI_VALIDATE_RET( X    );
+    MPI_VALIDATE_RET( olen );
+    MPI_VALIDATE_RET( buflen == 0 || buf );
+    if( radix < 2 || radix > 16 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    n = mbedtls_mpi_bitlen( X ); /* Number of bits necessary to present `n`. */
+    if( radix >=  4 ) n >>= 1;   /* Number of 4-adic digits necessary to present
+                                  * `n`. If radix > 4, this might be a strict
+                                  * overapproximation of the number of
+                                  * radix-adic digits needed to present `n`. */
+    if( radix >= 16 ) n >>= 1;   /* Number of hexadecimal digits necessary to
+                                  * present `n`. */
+    n += 1; /* Terminating null byte */
+    n += 1; /* Compensate for the divisions above, which round down `n`
+             * in case it's not even. */
+    n += 1; /* Potential '-'-sign. */
+    n += ( n & 1 ); /* Make n even to have enough space for hexadecimal writing,
+                     * which always uses an even number of hex-digits. */
+    if( buflen < n )
     {
         *olen = n;
-        return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
+        return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
     }
     p = buf;
-    mbedtls_mpi_init(&T);
-    if (X->s == -1)
+    mbedtls_mpi_init( &T );
+    if( X->s == -1 )
     {
         *p++ = '-';
         buflen--;
     }
-    if (radix == 16)
+    if( radix == 16 )
     {
         int c;
         size_t i, j, k;
-        for (i = X->n, k = 0; i > 0; i--)
+        for( i = X->n, k = 0; i > 0; i-- )
         {
-            for (j = ciL; j > 0; j--)
+            for( j = ciL; j > 0; j-- )
             {
-                c = (X->p[i - 1] >> ((j - 1) << 3)) & 0xFF;
-                if (!c && !k && (i + j) != 2) continue;
-                *(p++) = "0123456789ABCDEF"[c / 16];
-                *(p++) = "0123456789ABCDEF"[c % 16];
+                c = ( X->p[i - 1] >> ( ( j - 1 ) << 3) ) & 0xFF;
+                if( c == 0 && k == 0 && ( i + j ) != 2 )
+                    continue;
+                *(p++) = "0123456789ABCDEF" [c / 16];
+                *(p++) = "0123456789ABCDEF" [c % 16];
                 k = 1;
             }
         }
     }
     else
     {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&T, X));
-        if (T.s == -1) T.s = 1;
-        MBEDTLS_MPI_CHK(mpi_write_hlp(&T, radix, &p, buflen));
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &T, X ) );
+        if( T.s == -1 )
+            T.s = 1;
+        MBEDTLS_MPI_CHK( mpi_write_hlp( &T, radix, &p, buflen ) );
     }
     *p++ = '\0';
     *olen = p - buf;
 cleanup:
-    mbedtls_mpi_free(&T);
-    return ret;
+    mbedtls_mpi_free( &T );
+    return( ret );
 }
 
+#if defined(MBEDTLS_FS_IO)
 /**
  * \brief          Read an MPI from a line in an opened file.
  *
@@ -713,7 +726,7 @@ cleanup:
  *                 is too small.
  * \return         Another negative error code on failure.
  */
-int mbedtls_mpi_read_file(mbedtls_mpi *X, int radix, FILE *fin)
+int mbedtls_mpi_read_file( mbedtls_mpi *X, int radix, FILE *fin )
 {
     mbedtls_mpi_uint d;
     size_t slen;
@@ -722,32 +735,24 @@ int mbedtls_mpi_read_file(mbedtls_mpi *X, int radix, FILE *fin)
      * Buffer should have space for (short) label and decimal formatted MPI,
      * newline characters and '\0'
      */
-    char s[MBEDTLS_MPI_RW_BUFFER_SIZE];
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(fin);
-    if (radix < 2 || radix > 16)
-        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
-    mbedtls_platform_zeroize(s, sizeof(s));
-    if (!fgets(s, sizeof(s) - 1, fin))
-        return MBEDTLS_ERR_MPI_FILE_IO_ERROR;
-    slen = strlen(s);
-    if (slen == sizeof(s) - 2)
-        return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
-    if (slen > 0 && s[slen - 1] == '\n')
-    {
-        slen--;
-        s[slen] = '\0';
-    }
-    if (slen > 0 && s[slen - 1] == '\r')
-    {
-        slen--;
-        s[slen] = '\0';
-    }
+    char s[ MBEDTLS_MPI_RW_BUFFER_SIZE ];
+    MPI_VALIDATE_RET( X   );
+    MPI_VALIDATE_RET( fin );
+    if( radix < 2 || radix > 16 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    mbedtls_platform_zeroize( s, sizeof( s ) );
+    if( fgets( s, sizeof( s ) - 1, fin ) == NULL )
+        return( MBEDTLS_ERR_MPI_FILE_IO_ERROR );
+    slen = strlen( s );
+    if( slen == sizeof( s ) - 2 )
+        return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
+    if( slen > 0 && s[slen - 1] == '\n' ) { slen--; s[slen] = '\0'; }
+    if( slen > 0 && s[slen - 1] == '\r' ) { slen--; s[slen] = '\0'; }
     p = s + slen;
-    while (p-- > s)
-        if (mpi_get_digit(&d, radix, *p))
+    while( p-- > s )
+        if( mpi_get_digit( &d, radix, *p ) != 0 )
             break;
-    return mbedtls_mpi_read_string(X, radix, p + 1);
+    return( mbedtls_mpi_read_string( X, radix, p + 1 ) );
 }
 
 /**
@@ -765,8 +770,7 @@ int mbedtls_mpi_read_file(mbedtls_mpi *X, int radix, FILE *fin)
  * \return         \c 0 if successful.
  * \return         A negative error code on failure.
  */
-int mbedtls_mpi_write_file(const char *p, const mbedtls_mpi *X, int radix,
-                           FILE *fout)
+int mbedtls_mpi_write_file( const char *p, const mbedtls_mpi *X, int radix, FILE *fout )
 {
     int ret = MBEDTLS_ERR_THIS_CORRUPTION;
     size_t n, slen, plen;
@@ -774,35 +778,43 @@ int mbedtls_mpi_write_file(const char *p, const mbedtls_mpi *X, int radix,
      * Buffer should have space for (short) label and decimal formatted MPI,
      * newline characters and '\0'
      */
-    char s[MBEDTLS_MPI_RW_BUFFER_SIZE];
-    MPI_VALIDATE_RET(X);
-    if (radix < 2 || radix > 16)
-        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
-    mbedtls_platform_zeroize(s, sizeof(s));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_write_string(X, radix, s, sizeof(s) - 2, &n));
-    if (!p) p = "";
-    plen = strlen(p);
-    slen = strlen(s);
+    char s[ MBEDTLS_MPI_RW_BUFFER_SIZE ];
+    MPI_VALIDATE_RET( X );
+    if( radix < 2 || radix > 16 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    mbedtls_platform_zeroize( s, sizeof( s ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_write_string( X, radix, s, sizeof( s ) - 2, &n ) );
+    if( p == NULL ) p = "";
+    plen = strlen( p );
+    slen = strlen( s );
     s[slen++] = '\r';
     s[slen++] = '\n';
-    if (fout)
+    if( fout )
     {
-        if (fwrite(p, 1, plen, fout) != plen || fwrite(s, 1, slen, fout) != slen)
-            return MBEDTLS_ERR_MPI_FILE_IO_ERROR;
+        if( fwrite( p, 1, plen, fout ) != plen ||
+            fwrite( s, 1, slen, fout ) != slen )
+            return( MBEDTLS_ERR_MPI_FILE_IO_ERROR );
     }
     else
-    {
-        mbedtls_printf("%s%s", p, s);
-    }
+        mbedtls_printf( "%s%s", p, s );
 cleanup:
-    return ret;
+    return( ret );
 }
+#endif /* MBEDTLS_FS_IO */
 
-static void mpi_bigendian_to_host(mbedtls_mpi_uint *const p, size_t limbs)
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define mpi_uint_bigendian_to_host(x) (x)
+#elif __SIZEOF_LONG__ == 8
+#define mpi_uint_bigendian_to_host(x) __builtin_bswap64(x)
+#elif __SIZEOF_LONG__ == 4
+#define mpi_uint_bigendian_to_host(x) __builtin_bswap32(x)
+#endif
+
+static void mpi_bigendian_to_host( mbedtls_mpi_uint * const p, size_t limbs )
 {
     mbedtls_mpi_uint *cur_limb_left;
     mbedtls_mpi_uint *cur_limb_right;
-    if (!limbs)
+    if( !limbs )
         return;
     /*
      * Traverse limbs and
@@ -813,14 +825,15 @@ static void mpi_bigendian_to_host(mbedtls_mpi_uint *const p, size_t limbs)
      * than the right index (it's not a problem if limbs is odd and the
      * indices coincide in the last iteration).
      */
-    for (cur_limb_left = p, cur_limb_right = p + (limbs - 1);
-         cur_limb_left <= cur_limb_right; cur_limb_left++, cur_limb_right--)
+    for( cur_limb_left = p, cur_limb_right = p + ( limbs - 1 );
+         cur_limb_left <= cur_limb_right;
+         cur_limb_left++, cur_limb_right-- )
     {
         mbedtls_mpi_uint tmp;
         /* Note that if cur_limb_left == cur_limb_right,
          * this code effectively swaps the bytes only once. */
-        tmp = mpi_uint_bigendian_to_host(*cur_limb_left);
-        *cur_limb_left = mpi_uint_bigendian_to_host(*cur_limb_right);
+        tmp             = mpi_uint_bigendian_to_host( *cur_limb_left  );
+        *cur_limb_left  = mpi_uint_bigendian_to_host( *cur_limb_right );
         *cur_limb_right = tmp;
     }
 }
@@ -923,13 +936,13 @@ int mbedtls_mpi_read_binary(mbedtls_mpi *X, const unsigned char *p, size_t n)
  *                 large enough to hold the value of \p X.
  * \return         Another negative error code on different kinds of failure.
  */
-int mbedtls_mpi_write_binary_le(const mbedtls_mpi *X, unsigned char *buf,
-                                size_t buflen)
+int mbedtls_mpi_write_binary_le( const mbedtls_mpi *X,
+                                 unsigned char *buf, size_t buflen )
 {
     size_t stored_bytes = X->n * ciL;
     size_t bytes_to_copy;
     size_t i;
-    if (stored_bytes < buflen)
+    if( stored_bytes < buflen )
     {
         bytes_to_copy = stored_bytes;
     }
@@ -938,19 +951,20 @@ int mbedtls_mpi_write_binary_le(const mbedtls_mpi *X, unsigned char *buf,
         bytes_to_copy = buflen;
         /* The output buffer is smaller than the allocated size of X.
          * However X may fit if its leading bytes are zero. */
-        for (i = bytes_to_copy; i < stored_bytes; i++)
+        for( i = bytes_to_copy; i < stored_bytes; i++ )
         {
-            if (GET_BYTE(X, i))
-                return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
+            if( GET_BYTE( X, i ) != 0 )
+                return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
         }
     }
-    for (i = 0; i < bytes_to_copy; i++) buf[i] = GET_BYTE(X, i);
-    if (stored_bytes < buflen)
+    for( i = 0; i < bytes_to_copy; i++ )
+        buf[i] = GET_BYTE( X, i );
+    if( stored_bytes < buflen )
     {
         /* Write trailing 0 bytes */
-        mbedtls_platform_zeroize(buf + stored_bytes, buflen - stored_bytes);
+        mbedtls_platform_zeroize( buf + stored_bytes, buflen - stored_bytes );
     }
-    return 0;
+    return( 0 );
 }
 
 /**
@@ -968,17 +982,17 @@ int mbedtls_mpi_write_binary_le(const mbedtls_mpi *X, unsigned char *buf,
  *                 large enough to hold the value of \p X.
  * \return         Another negative error code on different kinds of failure.
  */
-int mbedtls_mpi_write_binary(const mbedtls_mpi *X, unsigned char *buf,
-                             size_t buflen)
+int mbedtls_mpi_write_binary( const mbedtls_mpi *X,
+                              unsigned char *buf, size_t buflen )
 {
     size_t stored_bytes;
     size_t bytes_to_copy;
     unsigned char *p;
     size_t i;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(!buflen || buf);
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( buflen == 0 || buf );
     stored_bytes = X->n * ciL;
-    if (stored_bytes < buflen)
+    if( stored_bytes < buflen )
     {
         /* There is enough space in the output buffer. Write initial
          * null bytes and record the position at which to start
@@ -987,7 +1001,7 @@ int mbedtls_mpi_write_binary(const mbedtls_mpi *X, unsigned char *buf,
          * number. */
         bytes_to_copy = stored_bytes;
         p = buf + buflen - stored_bytes;
-        mbedtls_platform_zeroize(buf, buflen - stored_bytes);
+        mbedtls_platform_zeroize( buf, buflen - stored_bytes );
     }
     else
     {
@@ -995,14 +1009,352 @@ int mbedtls_mpi_write_binary(const mbedtls_mpi *X, unsigned char *buf,
          * However X may fit if its leading bytes are zero. */
         bytes_to_copy = buflen;
         p = buf;
-        for (i = bytes_to_copy; i < stored_bytes; i++)
+        for( i = bytes_to_copy; i < stored_bytes; i++ )
         {
-            if (GET_BYTE(X, i))
-                return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
+            if( GET_BYTE( X, i ) != 0 )
+                return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
         }
     }
-    for (i = 0; i < bytes_to_copy; i++) p[bytes_to_copy - i - 1] = GET_BYTE(X, i);
-    return 0;
+    for( i = 0; i < bytes_to_copy; i++ )
+        p[bytes_to_copy - i - 1] = GET_BYTE( X, i );
+    return( 0 );
+}
+
+/**
+ * \brief          Compare the absolute values of two MPIs.
+ *
+ * \param X        The left-hand MPI. This must point to an initialized MPI.
+ * \param Y        The right-hand MPI. This must point to an initialized MPI.
+ *
+ * \return         \c 1 if `|X|` is greater than `|Y|`.
+ * \return         \c -1 if `|X|` is lesser than `|Y|`.
+ * \return         \c 0 if `|X|` is equal to `|Y|`.
+ */
+int mbedtls_mpi_cmp_abs( const mbedtls_mpi *X, const mbedtls_mpi *Y )
+{
+    size_t i, j;
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( Y );
+    i = mbedtls_mpi_limbs(X);
+    j = mbedtls_mpi_limbs(Y);
+    if( !i && !j )
+        return( 0 );
+    if( i > j ) return(  1 );
+    if( j > i ) return( -1 );
+    for( ; i > 0; i-- )
+    {
+        if( X->p[i - 1] > Y->p[i - 1] ) return(  1 );
+        if( X->p[i - 1] < Y->p[i - 1] ) return( -1 );
+    }
+    return( 0 );
+}
+
+/**
+ * \brief          Compare two MPIs.
+ *
+ * \param X        The left-hand MPI. This must point to an initialized MPI.
+ * \param Y        The right-hand MPI. This must point to an initialized MPI.
+ *
+ * \return         \c 1 if \p X is greater than \p Y.
+ * \return         \c -1 if \p X is lesser than \p Y.
+ * \return         \c 0 if \p X is equal to \p Y.
+ */
+int mbedtls_mpi_cmp_mpi( const mbedtls_mpi *X, const mbedtls_mpi *Y )
+{
+    size_t i, j;
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( Y );
+    i = mbedtls_mpi_limbs(X);
+    j = mbedtls_mpi_limbs(Y);
+    if( !i && !j )
+        return( 0 );
+    if( i > j ) return(  X->s );
+    if( j > i ) return( -Y->s );
+    if( X->s > 0 && Y->s < 0 ) return(  1 );
+    if( Y->s > 0 && X->s < 0 ) return( -1 );
+    for( ; i > 0; i-- )
+    {
+        if( X->p[i - 1] > Y->p[i - 1] ) return(  X->s );
+        if( X->p[i - 1] < Y->p[i - 1] ) return( -X->s );
+    }
+    return( 0 );
+}
+
+/**
+ * Decide if an integer is less than the other, without branches.
+ *
+ * \param x         First integer.
+ * \param y         Second integer.
+ *
+ * \return          1 if \p x is less than \p y, 0 otherwise
+ */
+static unsigned ct_lt_mpi_uint( const mbedtls_mpi_uint x,
+                                const mbedtls_mpi_uint y )
+{
+    mbedtls_mpi_uint ret;
+    mbedtls_mpi_uint cond;
+    /*
+     * Check if the most significant bits (MSB) of the operands are different.
+     */
+    cond = ( x ^ y );
+    /*
+     * If the MSB are the same then the difference x-y will be negative (and
+     * have its MSB set to 1 during conversion to unsigned) if and only if x<y.
+     */
+    ret = ( x - y ) & ~cond;
+    /*
+     * If the MSB are different, then the operand with the MSB of 1 is the
+     * bigger. (That is if y has MSB of 1, then x<y is true and it is false if
+     * the MSB of y is 0.)
+     */
+    ret |= y & cond;
+
+    ret = ret >> ( biL - 1 );
+    return (unsigned) ret;
+}
+
+/**
+ * \brief          Check if an MPI is less than the other in constant time.
+ *
+ * \param X        The left-hand MPI. This must point to an initialized MPI
+ *                 with the same allocated length as Y.
+ * \param Y        The right-hand MPI. This must point to an initialized MPI
+ *                 with the same allocated length as X.
+ * \param ret      The result of the comparison:
+ *                 \c 1 if \p X is less than \p Y.
+ *                 \c 0 if \p X is greater than or equal to \p Y.
+ *
+ * \return         0 on success.
+ * \return         MBEDTLS_ERR_MPI_BAD_INPUT_DATA if the allocated length of
+ *                 the two input MPIs is not the same.
+ */
+int mbedtls_mpi_lt_mpi_ct( const mbedtls_mpi *X, const mbedtls_mpi *Y,
+        unsigned *ret )
+{
+    size_t i;
+    /* The value of any of these variables is either 0 or 1 at all times. */
+    unsigned cond, done, X_is_negative, Y_is_negative;
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( Y );
+    MPI_VALIDATE_RET( ret );
+    if( X->n != Y->n )
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
+    /*
+     * Set sign_N to 1 if N >= 0, 0 if N < 0.
+     * We know that N->s == 1 if N >= 0 and N->s == -1 if N < 0.
+     */
+    X_is_negative = ( X->s & 2 ) >> 1;
+    Y_is_negative = ( Y->s & 2 ) >> 1;
+    /*
+     * If the signs are different, then the positive operand is the bigger.
+     * That is if X is negative (X_is_negative == 1), then X < Y is true and it
+     * is false if X is positive (X_is_negative == 0).
+     */
+    cond = ( X_is_negative ^ Y_is_negative );
+    *ret = cond & X_is_negative;
+    /*
+     * This is a constant-time function. We might have the result, but we still
+     * need to go through the loop. Record if we have the result already.
+     */
+    done = cond;
+    for( i = X->n; i > 0; i-- )
+    {
+        /*
+         * If Y->p[i - 1] < X->p[i - 1] then X < Y is true if and only if both
+         * X and Y are negative.
+         *
+         * Again even if we can make a decision, we just mark the result and
+         * the fact that we are done and continue looping.
+         */
+        cond = ct_lt_mpi_uint( Y->p[i - 1], X->p[i - 1] );
+        *ret |= cond & ( 1 - done ) & X_is_negative;
+        done |= cond;
+        /*
+         * If X->p[i - 1] < Y->p[i - 1] then X < Y is true if and only if both
+         * X and Y are positive.
+         *
+         * Again even if we can make a decision, we just mark the result and
+         * the fact that we are done and continue looping.
+         */
+        cond = ct_lt_mpi_uint( X->p[i - 1], Y->p[i - 1] );
+        *ret |= cond & ( 1 - done ) & ( 1 - X_is_negative );
+        done |= cond;
+    }
+    return( 0 );
+}
+
+/**
+ * \brief          Compare an MPI with an integer.
+ *
+ * \param X        The left-hand MPI. This must point to an initialized MPI.
+ * \param z        The integer value to compare \p X to.
+ *
+ * \return         \c 1 if \p X is greater than \p z.
+ * \return         \c -1 if \p X is lesser than \p z.
+ * \return         \c 0 if \p X is equal to \p z.
+ */
+int mbedtls_mpi_cmp_int( const mbedtls_mpi *X, mbedtls_mpi_sint z )
+{
+    mbedtls_mpi Y;
+    mbedtls_mpi_uint p[1];
+    MPI_VALIDATE_RET( X );
+    *p  = ( z < 0 ) ? -z : z;
+    Y.s = ( z < 0 ) ? -1 : 1;
+    Y.n = 1;
+    Y.p = p;
+    return( mbedtls_mpi_cmp_mpi( X, &Y ) );
+}
+
+/**
+ * \brief          Perform an unsigned addition of MPIs: X = |A| + |B|
+ *
+ * \param X        The destination MPI. This must point to an initialized MPI.
+ * \param A        The first summand. This must point to an initialized MPI.
+ * \param B        The second summand. This must point to an initialized MPI.
+ *
+ * \return         \c 0 if successful.
+ * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed.
+ * \return         Another negative error code on different kinds of failure.
+ */
+int mbedtls_mpi_add_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
+{
+    int ret = MBEDTLS_ERR_THIS_CORRUPTION;
+    size_t i, j;
+    mbedtls_mpi_uint *o, *p, c, tmp;
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( A );
+    MPI_VALIDATE_RET( B );
+    if( X == B )
+    {
+        const mbedtls_mpi *T = A; A = X; B = T;
+    }
+    if( X != A )
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) );
+    /*
+     * X should always be positive as a result of unsigned additions.
+     */
+    X->s = 1;
+    for( j = B->n; j > 0; j-- )
+        if( B->p[j - 1] != 0 )
+            break;
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, j ) );
+    o = B->p; p = X->p; c = 0;
+    /*
+     * tmp is used because it might happen that p == o
+     */
+    for( i = 0; i < j; i++, o++, p++ )
+    {
+        tmp= *o;
+        *p +=  c; c  = ( *p <  c );
+        *p += tmp; c += ( *p < tmp );
+    }
+    while( c != 0 )
+    {
+        if( i >= X->n )
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + 1 ) );
+            p = X->p + i;
+        }
+        *p += c; c = ( *p < c ); i++; p++;
+    }
+cleanup:
+    return( ret );
+}
+
+/**
+ * Helper for mbedtls_mpi subtraction.
+ *
+ * Calculate d = a - b where d, a, and b have the same size.
+ * This function operates modulo (2^ciL)^n and returns the carry
+ * (1 if there was a wraparound, i.e. if `a < b`, and 0 otherwise).
+ *
+ * \param[out] d        Result of subtraction.
+ * \param[in] a         Left operand.
+ * \param[in] b         Right operand.
+ * \param n             Number of limbs of \p a and \p b.
+ * \return              1 if `d < s`.
+ *                      0 if `d >= s`.
+ */
+forceinline mbedtls_mpi_uint mpi_sub_hlp(mbedtls_mpi_uint *d,
+                                         const mbedtls_mpi_uint *a,
+                                         const mbedtls_mpi_uint *b,
+                                         size_t n)
+{
+    size_t i;
+    unsigned char cf;
+    mbedtls_mpi_uint c, x;
+    cf = c = i = 0;
+#ifdef __x86_64__
+    if (!n) return 0;
+    asm volatile("xor\t%1,%1\n\t"
+                 ".align\t16\n1:\t"
+                 "mov\t(%5,%3,8),%1\n\t"
+                 "sbb\t(%6,%3,8),%1\n\t"
+                 "mov\t%1,(%4,%3,8)\n\t"
+                 "lea\t1(%3),%3\n\t"
+                 "dec\t%2\n\t"
+                 "jnz\t1b"
+                 : "=@ccb"(cf), "=&r"(x), "+&c"(n), "=&r"(i)
+                 : "r"(d), "r"(a), "r"(b), "3"(0)
+                 : "cc", "memory");
+    return cf;
+#else
+    for (; i < n; ++i)
+        SBB(d[i], a[i], b[i], c, c);
+    return c;
+#endif
+}
+
+/**
+ * \brief          Perform an unsigned subtraction of MPIs: X = |A| - |B|
+ *
+ * \param X        The destination MPI. This must point to an initialized MPI.
+ * \param A        The minuend. This must point to an initialized MPI.
+ * \param B        The subtrahend. This must point to an initialized MPI.
+ *
+ * \return         \c 0 if successful.
+ * \return         #MBEDTLS_ERR_MPI_NEGATIVE_VALUE if \p B is greater than \p A.
+ * \return         Another negative error code on different kinds of failure.
+ */
+int mbedtls_mpi_sub_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
+{
+    size_t n, m, r;
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( A );
+    MPI_VALIDATE_RET( B );
+    if( X != A && !B->n )
+        return mbedtls_mpi_copy( X, A ); /* wut */
+    for( n = B->n; n > 0; n-- )
+        if( B->p[n - 1] != 0 )
+            break;
+    if( n > A->n )
+        return MBEDTLS_ERR_MPI_NEGATIVE_VALUE; /* B >= (2^ciL)^n > A */
+    if (X != A)
+    {
+        if (X->n < A->n) {
+            if ((r = mbedtls_mpi_grow(X, A->n))) return r;
+        } else if (X->n > A->n) {
+            mbedtls_mpi_zeroize(X->p + A->n, X->n - A->n);
+        }
+        if ((m = A->n - n))
+            memcpy(X->p + n, A->p + n, m * ciL);
+    }
+    /*
+     * X should always be positive as a result of unsigned subtractions.
+     */
+    X->s = 1;
+    if( mpi_sub_hlp( X->p, A->p, B->p, n ) ){
+        /* Propagate the carry to the first nonzero limb of X. */
+        for( ; n < A->n && A->p[n] == 0; n++ )
+            /* --X->p[n]; */
+            X->p[n] = A->p[n] - 1;
+        /* If we ran out of space for the carry, it means that the result
+         * is negative. */
+        if( n == X->n )
+            return MBEDTLS_ERR_MPI_NEGATIVE_VALUE;
+        --X->p[n];
+    }
+    return( 0 );
 }
 
 static int mpi_cmp_abs(const mbedtls_mpi *X,
@@ -1026,310 +1378,17 @@ static int mpi_cmp_abs(const mbedtls_mpi *X,
     return 0;
 }
 
-/**
- * \brief          Compare the absolute values of two MPIs.
- *
- * \param X        The left-hand MPI. This must point to an initialized MPI.
- * \param Y        The right-hand MPI. This must point to an initialized MPI.
- *
- * \return         \c 1 if `|X|` is greater than `|Y|`.
- * \return         \c -1 if `|X|` is lesser than `|Y|`.
- * \return         \c 0 if `|X|` is equal to `|Y|`.
- */
-int mbedtls_mpi_cmp_abs(const mbedtls_mpi *X, const mbedtls_mpi *Y)
+static int mpi_sub_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B, size_t n )
 {
-    size_t i, j;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(Y);
-    return mpi_cmp_abs(X, Y, &i, &j);
-}
-
-static int mpi_cmp_mpi(const mbedtls_mpi *X, const mbedtls_mpi *Y,
-                       size_t *Xn, size_t *Yn) {
-    size_t i, j;
-    i = mbedtls_mpi_limbs(X);
-    j = mbedtls_mpi_limbs(Y);
-    *Xn = i;
-    *Yn = j;
-    if (!i && !j) return 0;
-    if (i > j) return X->s;
-    if (j > i) return -Y->s;
-    if (X->s > 0 && Y->s < 0) return 1;
-    if (Y->s > 0 && X->s < 0) return -1;
-    for (; i > 0; i--) {
-        if (X->p[i - 1] > Y->p[i - 1]) return X->s;
-        if (X->p[i - 1] < Y->p[i - 1]) return -X->s;
-    }
-    return 0;
-}
-
-/**
- * \brief          Compare two MPIs.
- *
- * \param X        The left-hand MPI. This must point to an initialized MPI.
- * \param Y        The right-hand MPI. This must point to an initialized MPI.
- *
- * \return         \c 1 if \p X is greater than \p Y.
- * \return         \c -1 if \p X is lesser than \p Y.
- * \return         \c 0 if \p X is equal to \p Y.
- */
-int mbedtls_mpi_cmp_mpi(const mbedtls_mpi *X, const mbedtls_mpi *Y) {
-    size_t i, j;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(Y);
-    return mpi_cmp_mpi(X, Y, &i, &j);
-}
-
-/**
- * Decide if an integer is less than the other, without branches.
- *
- * \param x         First integer.
- * \param y         Second integer.
- *
- * \return          1 if \p x is less than \p y, 0 otherwise
- */
-static unsigned ct_lt_mpi_uint(const mbedtls_mpi_uint x,
-                               const mbedtls_mpi_uint y) {
-    mbedtls_mpi_uint ret;
-    mbedtls_mpi_uint cond;
-    /*
-     * Check if the most significant bits (MSB) of the operands are different.
-     */
-    cond = (x ^ y);
-    /*
-     * If the MSB are the same then the difference x-y will be negative (and
-     * have its MSB set to 1 during conversion to unsigned) if and only if x<y.
-     */
-    ret = (x - y) & ~cond;
-    /*
-     * If the MSB are different, then the operand with the MSB of 1 is the
-     * bigger. (That is if y has MSB of 1, then x<y is true and it is false if
-     * the MSB of y is 0.)
-     */
-    ret |= y & cond;
-    ret = ret >> (biL - 1);
-    return (unsigned)ret;
-}
-
-/**
- * \brief          Check if an MPI is less than the other in constant time.
- *
- * \param X        The left-hand MPI. This must point to an initialized MPI
- *                 with the same allocated length as Y.
- * \param Y        The right-hand MPI. This must point to an initialized MPI
- *                 with the same allocated length as X.
- * \param ret      The result of the comparison:
- *                 \c 1 if \p X is less than \p Y.
- *                 \c 0 if \p X is greater than or equal to \p Y.
- *
- * \return         0 on success.
- * \return         MBEDTLS_ERR_MPI_BAD_INPUT_DATA if the allocated length of
- *                 the two input MPIs is not the same.
- */
-int mbedtls_mpi_lt_mpi_ct(const mbedtls_mpi *X, const mbedtls_mpi *Y,
-                          unsigned *ret)
-{
-    size_t i;
-    /* The value of any of these variables is either 0 or 1 at all times. */
-    unsigned cond, done, X_is_negative, Y_is_negative;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(Y);
-    MPI_VALIDATE_RET(ret);
-    if (X->n != Y->n)
-        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
-    /*
-     * Set sign_N to 1 if N >= 0, 0 if N < 0.
-     * We know that N->s == 1 if N >= 0 and N->s == -1 if N < 0.
-     */
-    X_is_negative = (X->s & 2) >> 1;
-    Y_is_negative = (Y->s & 2) >> 1;
-    /*
-     * If the signs are different, then the positive operand is the bigger.
-     * That is if X is negative (X_is_negative == 1), then X < Y is true and it
-     * is false if X is positive (X_is_negative == 0).
-     */
-    cond = (X_is_negative ^ Y_is_negative);
-    *ret = cond & X_is_negative;
-    /*
-     * This is a constant-time function. We might have the result, but we still
-     * need to go through the loop. Record if we have the result already.
-     */
-    done = cond;
-    for (i = X->n; i > 0; i--)
-    {
-        /*
-         * If Y->p[i - 1] < X->p[i - 1] then X < Y is true if and only if both
-         * X and Y are negative.
-         *
-         * Again even if we can make a decision, we just mark the result and
-         * the fact that we are done and continue looping.
-         */
-        cond = ct_lt_mpi_uint(Y->p[i - 1], X->p[i - 1]);
-        *ret |= cond & (1 - done) & X_is_negative;
-        done |= cond;
-        /*
-         * If X->p[i - 1] < Y->p[i - 1] then X < Y is true if and only if both
-         * X and Y are positive.
-         *
-         * Again even if we can make a decision, we just mark the result and
-         * the fact that we are done and continue looping.
-         */
-        cond = ct_lt_mpi_uint(X->p[i - 1], Y->p[i - 1]);
-        *ret |= cond & (1 - done) & (1 - X_is_negative);
-        done |= cond;
-    }
-    return 0;
-}
-
-/**
- * \brief          Compare an MPI with an integer.
- *
- * \param X        The left-hand MPI. This must point to an initialized MPI.
- * \param z        The integer value to compare \p X to.
- *
- * \return         \c 1 if \p X is greater than \p z.
- * \return         \c -1 if \p X is lesser than \p z.
- * \return         \c 0 if \p X is equal to \p z.
- */
-int mbedtls_mpi_cmp_int(const mbedtls_mpi *X, mbedtls_mpi_sint z)
-{
-    mbedtls_mpi Y;
-    mbedtls_mpi_uint p[1];
-    MPI_VALIDATE_RET(X);
-    *p = (z < 0) ? -z : z;
-    Y.s = (z < 0) ? -1 : 1;
-    Y.n = 1;
-    Y.p = p;
-    return mbedtls_mpi_cmp_mpi(X, &Y);
-}
-
-forceinline mbedtls_mpi_uint mpi_add_hlp(mbedtls_mpi_uint *d,
-                                         const mbedtls_mpi_uint *b,
-                                         size_t n)
-{
-    size_t i;
-    unsigned char cf;
-    mbedtls_mpi_uint c, t, *e;
-    e = d + n;
-    c = i = 0;
-#ifdef __x86_64__
-    for (; d + 4 <= e; d += 4, b += 4, c = cf)
-    {
-        asm("add\t%5,%1\n\t"
-            "adc\t%6,%2\n\t"
-            "adc\t%7,%3\n\t"
-            "adc\t%8,%4"
-            : "=@ccc"(cf), "+m"(d[0]), "+m"(d[1]), "+m"(d[2]), "+m"(d[3])
-            : "r"(b[0] + c), "r"(b[1]), "r"(b[2]), "r"(b[3])
-            : "cc");
-    }
-#endif
-    for (; d < e; ++d, ++b)
-        ADC(*d, *d, *b, c, c);
-    return c;
-}
-
-/**
- * Helper for mbedtls_mpi subtraction.
- *
- * Calculate d = a - b where d, a, and b have the same size.
- * This function operates modulo (2^ciL)^n and returns the carry
- * (1 if there was a wraparound, i.e. if `a < b`, and 0 otherwise).
- *
- * \param[out] d        Result of subtraction.
- * \param[in] a         Left operand.
- * \param[in] b         Right operand.
- * \param n             Number of limbs of \p a and \p b.
- * \return              1 if `d < s`.
- *                      0 if `d >= s`.
- */
-forceinline mbedtls_mpi_uint mpi_sub_hlp(mbedtls_mpi_uint *d,
-                                         const mbedtls_mpi_uint *a,
-                                         const mbedtls_mpi_uint *b, 
-                                         size_t n)
-{
-    size_t i;
-    unsigned char cf;
-    uint64_t q, r, s, t;
-    mbedtls_mpi_uint c, z, x, y;
-    cf = c = i = 0;
-#ifdef __x86_64__
-    for (; i + 4 <= n; i += 4, c = cf)
-    {
-        q = a[i + 0];
-        r = a[i + 1];
-        s = a[i + 2];
-        t = a[i + 3];
-        asm volatile("sub\t%5,%1\n\t"
-                     "sbb\t1*8(%6),%2\n\t"
-                     "sbb\t2*8(%6),%3\n\t"
-                     "sbb\t3*8(%6),%4"
-                     : "=@ccc"(cf), "+r"(q), "+r"(r), "+r"(s), "+r"(t)
-                     : "r"(b[i] + c), "r"(b + i)
-                     : "memory", "cc");
-        d[i + 0] = q;
-        d[i + 1] = r;
-        d[i + 2] = s;
-        d[i + 3] = t;
-    }
-#endif
-    for (; i < n; ++i)
-        SBB(d[i], a[i], b[i], c, c);
-    return c;
-}
-
-/**
- * \brief          Perform an unsigned addition of MPIs: X = |A| + |B|
- *
- * \param X        The destination MPI. This must point to an initialized MPI.
- * \param A        The first summand. This must point to an initialized MPI.
- * \param B        The second summand. This must point to an initialized MPI.
- *
- * \return         \c 0 if successful.
- * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed.
- * \return         Another negative error code on different kinds of failure.
- */
-int mbedtls_mpi_add_abs(mbedtls_mpi *X, const mbedtls_mpi *A,
-                        const mbedtls_mpi *B)
-{
-    int ret = MBEDTLS_ERR_THIS_CORRUPTION;
-    size_t i, j;
-    unsigned char cf;
-    const mbedtls_mpi *T;
-    mbedtls_mpi_uint c, tmp;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(A);
-    MPI_VALIDATE_RET(B);
-    if (X == B) T = A, A = X, B = T;
-    if (X != A) MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, A));
-    X->s = 1; /* always positive b/c unsigned addition */
-    j = mbedtls_mpi_limbs(B);
-    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, j));
-    c = mpi_add_hlp(X->p, B->p, j);
-    for (; c; ++j)
-    {
-        if (j >= X->n)
-            MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, j + 1));
-        X->p[j] += c;
-        c = X->p[j] < c;
-    }
-cleanup:
-    return ret;
-}
-
-static int mpi_sub_abs(mbedtls_mpi *X, const mbedtls_mpi *A,
-                       const mbedtls_mpi *B, size_t Bn)
-{
-    int ret;
-    size_t n, m;
-    unsigned char cf;
-    n = Bn;
-    if (n > A->n)
+    size_t m, r;
+    if( X != A && !B->n )
+        return mbedtls_mpi_copy( X, A ); /* wut */
+    if( n > A->n )
         return MBEDTLS_ERR_MPI_NEGATIVE_VALUE; /* B >= (2^ciL)^n > A */
     if (X != A)
     {
         if (X->n < A->n) {
-            if ((ret = mbedtls_mpi_grow(X, A->n))) return ret;
+            if ((r = mbedtls_mpi_grow(X, A->n))) return r;
         } else if (X->n > A->n) {
             mbedtls_mpi_zeroize(X->p + A->n, X->n - A->n);
         }
@@ -1340,43 +1399,18 @@ static int mpi_sub_abs(mbedtls_mpi *X, const mbedtls_mpi *A,
      * X should always be positive as a result of unsigned subtractions.
      */
     X->s = 1;
-    cf = mpi_sub_hlp(X->p, A->p, B->p, n);
-    if (cf)
-    {
+    if( mpi_sub_hlp( X->p, A->p, B->p, n ) ){
         /* Propagate the carry to the first nonzero limb of X. */
-        for (; n < A->n && !A->p[n]; n++) { /* --X->p[n]; */
+        for( ; n < A->n && A->p[n] == 0; n++ )
+            /* --X->p[n]; */
             X->p[n] = A->p[n] - 1;
-        }
         /* If we ran out of space for the carry, it means that the result
          * is negative. */
-        if (n == X->n)
+        if( n == X->n )
             return MBEDTLS_ERR_MPI_NEGATIVE_VALUE;
         --X->p[n];
     }
-    return 0;
-}
-
-/**
- * \brief          Perform an unsigned subtraction of MPIs: X = |A| - |B|
- *
- * \param X        The destination MPI. This must point to an initialized MPI.
- * \param A        The minuend. This must point to an initialized MPI.
- * \param B        The subtrahend. This must point to an initialized MPI.
- *
- * \return         \c 0 if successful.
- * \return         #MBEDTLS_ERR_MPI_NEGATIVE_VALUE if \p B is greater than \p A.
- * \return         Another negative error code on different kinds of failure.
- */
-int mbedtls_mpi_sub_abs(mbedtls_mpi *X, const mbedtls_mpi *A,
-                        const mbedtls_mpi *B)
-{
-    size_t n, m;
-    unsigned char cf;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(A);
-    MPI_VALIDATE_RET(B);
-    if (X != A && !B->n) return mbedtls_mpi_copy(X, A); /* wut */
-    return mpi_sub_abs(X, A, B, mbedtls_mpi_limbs(B));
+    return( 0 );
 }
 
 /**
@@ -1390,35 +1424,34 @@ int mbedtls_mpi_sub_abs(mbedtls_mpi *X, const mbedtls_mpi *A,
  * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed.
  * \return         Another negative error code on different kinds of failure.
  */
-int mbedtls_mpi_add_mpi(mbedtls_mpi *X, const mbedtls_mpi *A,
-                        const mbedtls_mpi *B)
+int mbedtls_mpi_add_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
 {
     int ret, s;
     size_t i, j;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(A);
-    MPI_VALIDATE_RET(B);
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( A );
+    MPI_VALIDATE_RET( B );
     s = A->s;
-    if (A->s * B->s < 0)
+    if( A->s * B->s < 0 )
     {
-        if (mpi_cmp_abs(A, B, &i, &j) >= 0)
+        if( mpi_cmp_abs( A, B, &i, &j ) >= 0 )
         {
-            MBEDTLS_MPI_CHK(mpi_sub_abs(X, A, B, j));
-            X->s = s;
+            MBEDTLS_MPI_CHK( mpi_sub_abs( X, A, B, j ) );
+            X->s =  s;
         }
         else
         {
-            MBEDTLS_MPI_CHK(mpi_sub_abs(X, B, A, i));
+            MBEDTLS_MPI_CHK( mpi_sub_abs( X, B, A, i ) );
             X->s = -s;
         }
     }
     else
     {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_add_abs(X, A, B));
+        MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( X, A, B ) );
         X->s = s;
     }
 cleanup:
-    return ret;
+    return( ret );
 }
 
 /**
@@ -1432,60 +1465,58 @@ cleanup:
  * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed.
  * \return         Another negative error code on different kinds of failure.
  */
-int mbedtls_mpi_sub_mpi(mbedtls_mpi *X, const mbedtls_mpi *A,
-                        const mbedtls_mpi *B)
+int mbedtls_mpi_sub_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
 {
     int ret, s;
     size_t i, j;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(A);
-    MPI_VALIDATE_RET(B);
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( A );
+    MPI_VALIDATE_RET( B );
     s = A->s;
-    if (A->s * B->s > 0)
+    if( A->s * B->s > 0 )
     {
-        if (mpi_cmp_abs(A, B, &i, &j) >= 0)
+        if( mpi_cmp_abs( A, B, &i, &j ) >= 0 )
         {
-            MBEDTLS_MPI_CHK(mpi_sub_abs(X, A, B, j));
-            X->s = s;
+            MBEDTLS_MPI_CHK( mpi_sub_abs( X, A, B, j ) );
+            X->s =  s;
         }
         else
         {
-            MBEDTLS_MPI_CHK(mpi_sub_abs(X, B, A, i));
+            MBEDTLS_MPI_CHK( mpi_sub_abs( X, B, A, i ) );
             X->s = -s;
         }
     }
     else
     {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_add_abs(X, A, B));
+        MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( X, A, B ) );
         X->s = s;
     }
 cleanup:
-    return ret;
+    return( ret );
 }
 
 /**
- * \brief          Performs signed addition of MPI and integer: X = A + b
+ * \brief          Perform a signed addition of an MPI and an integer: X = A + b
  *
  * \param X        The destination MPI. This must point to an initialized MPI.
  * \param A        The first summand. This must point to an initialized MPI.
  * \param b        The second summand.
  *
  * \return         \c 0 if successful.
- * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a allocation failed.
- * \return         Another negative error code on different kinds of
- * failure.
+ * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed.
+ * \return         Another negative error code on different kinds of failure.
  */
-int mbedtls_mpi_add_int(mbedtls_mpi *X, const mbedtls_mpi *A,
-                        mbedtls_mpi_sint b) {
+int mbedtls_mpi_add_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b )
+{
     mbedtls_mpi _B;
     mbedtls_mpi_uint p[1];
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(A);
-    p[0] = (b < 0) ? -b : b;
-    _B.s = (b < 0) ? -1 : 1;
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( A );
+    p[0] = ( b < 0 ) ? -b : b;
+    _B.s = ( b < 0 ) ? -1 : 1;
     _B.n = 1;
     _B.p = p;
-    return mbedtls_mpi_add_mpi(X, A, &_B);
+    return( mbedtls_mpi_add_mpi( X, A, &_B ) );
 }
 
 /**
@@ -1500,50 +1531,69 @@ int mbedtls_mpi_add_int(mbedtls_mpi *X, const mbedtls_mpi *A,
  * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed.
  * \return         Another negative error code on different kinds of failure.
  */
-int mbedtls_mpi_sub_int(mbedtls_mpi *X, const mbedtls_mpi *A,
-                        mbedtls_mpi_sint b) {
+int mbedtls_mpi_sub_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b )
+{
     mbedtls_mpi _B;
     mbedtls_mpi_uint p[1];
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(A);
-    p[0] = (b < 0) ? -b : b;
-    _B.s = (b < 0) ? -1 : 1;
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( A );
+    p[0] = ( b < 0 ) ? -b : b;
+    _B.s = ( b < 0 ) ? -1 : 1;
     _B.n = 1;
     _B.p = p;
-    return mbedtls_mpi_sub_mpi(X, A, &_B);
+    return( mbedtls_mpi_sub_mpi( X, A, &_B ) );
 }
 
 /*
  * Unsigned integer divide - double mbedtls_mpi_uint dividend, u1/u0, and
  * mbedtls_mpi_uint divisor, d
  */
-static inline mbedtls_mpi_uint mbedtls_int_div_int(mbedtls_mpi_uint u1,
-                                                   mbedtls_mpi_uint u0,
-                                                   mbedtls_mpi_uint d,
-                                                   mbedtls_mpi_uint *r)
+static mbedtls_mpi_uint mbedtls_int_div_int( mbedtls_mpi_uint u1,
+                                             mbedtls_mpi_uint u0,
+                                             mbedtls_mpi_uint d,
+                                             mbedtls_mpi_uint *r )
 {
-  if (d && u1 < d)
-  {
 #ifdef __x86_64__
-    mbedtls_mpi_uint quo, rem;
-    asm("div\t%2" : "=a"(quo), "=d"(rem) : "r"(d), "0"(u0), "1"(u1) : "cc");
-    if (r) *r = rem;
-    return quo;
-#elif defined(MBEDTLS_HAVE_UDBL)
-    mbedtls_t_udbl dividend, quotient;
-    dividend = (mbedtls_t_udbl)u1 << biL;
-    dividend |= (mbedtls_t_udbl)u0;
-    quotient = dividend / d;
-    if (quotient > ((mbedtls_t_udbl)1 << biL) - 1)
-      quotient = ((mbedtls_t_udbl)1 << biL) - 1;
-    if (r) *r = (mbedtls_mpi_uint)(dividend - (quotient * d));
-    return (mbedtls_mpi_uint)quotient;
+    if (d && u1 < d)
+    {
+        mbedtls_mpi_uint quo, rem;
+        asm("div\t%2" : "=a"(quo), "=d"(rem) : "r"(d), "0"(u0), "1"(u1) : "cc");
+        if (r) *r = rem;
+        return quo;
+    }
+    else
+    {
+        if (r) *r = ~0;
+        return ~0;
+    }
 #else
-    size_t s;
-    mbedtls_mpi_uint radix = (mbedtls_mpi_uint)1 << biH;
-    mbedtls_mpi_uint uint_halfword_mask = ((mbedtls_mpi_uint)1 << biH) - 1;
+#if defined(MBEDTLS_HAVE_UDBL)
+    mbedtls_t_udbl dividend, quotient;
+#else
+    const mbedtls_mpi_uint radix = (mbedtls_mpi_uint) 1 << biH;
+    const mbedtls_mpi_uint uint_halfword_mask = ( (mbedtls_mpi_uint) 1 << biH ) - 1;
     mbedtls_mpi_uint d0, d1, q0, q1, rAX, r0, quotient;
     mbedtls_mpi_uint u0_msw, u0_lsw;
+    size_t s;
+#endif
+    /*
+     * Check for overflow
+     */
+    if( 0 == d || u1 >= d )
+    {
+        if (r) *r = ~0;
+        return ( ~0 );
+    }
+#if defined(MBEDTLS_HAVE_UDBL)
+    dividend  = (mbedtls_t_udbl) u1 << biL;
+    dividend |= (mbedtls_t_udbl) u0;
+    quotient = dividend / d;
+    if( quotient > ( (mbedtls_t_udbl) 1 << biL ) - 1 )
+        quotient = ( (mbedtls_t_udbl) 1 << biL ) - 1;
+    if( r )
+        *r = (mbedtls_mpi_uint)( dividend - (quotient * d ) );
+    return (mbedtls_mpi_uint) quotient;
+#else
     /*
      * Algorithm D, Section 4.3.1 - The Art of Computer Programming
      *   Vol. 2 - Seminumerical Algorithms, Knuth
@@ -1551,11 +1601,11 @@ static inline mbedtls_mpi_uint mbedtls_int_div_int(mbedtls_mpi_uint u1,
     /*
      * Normalize the divisor, d, and dividend, u0, u1
      */
-    s = mbedtls_clz(d);
+    s = mbedtls_clz( d );
     d = d << s;
     u1 = u1 << s;
-    u1 |= (u0 >> (biL - s)) & (-(mbedtls_mpi_sint)s >> (biL - 1));
-    u0 = u0 << s;
+    u1 |= ( u0 >> ( biL - s ) ) & ( -(mbedtls_mpi_sint)s >> ( biL - 1 ) );
+    u0 =  u0 << s;
     d1 = d >> biH;
     d0 = d & uint_halfword_mask;
     u0_msw = u0 >> biH;
@@ -1565,33 +1615,27 @@ static inline mbedtls_mpi_uint mbedtls_int_div_int(mbedtls_mpi_uint u1,
      */
     q1 = u1 / d1;
     r0 = u1 - d1 * q1;
-    while (q1 >= radix || (q1 * d0 > radix * r0 + u0_msw))
+    while( q1 >= radix || ( q1 * d0 > radix * r0 + u0_msw ) )
     {
-      q1 -= 1;
-      r0 += d1;
-      if (r0 >= radix)
-          break;
+        q1 -= 1;
+        r0 += d1;
+        if ( r0 >= radix ) break;
     }
-    rAX = (u1 * radix) + (u0_msw - q1 * d);
+    rAX = ( u1 * radix ) + ( u0_msw - q1 * d );
     q0 = rAX / d1;
     r0 = rAX - q0 * d1;
-    while (q0 >= radix || (q0 * d0 > radix * r0 + u0_lsw))
+    while( q0 >= radix || ( q0 * d0 > radix * r0 + u0_lsw ) )
     {
-      q0 -= 1;
-      r0 += d1;
-      if (r0 >= radix)
-          break;
+        q0 -= 1;
+        r0 += d1;
+        if ( r0 >= radix ) break;
     }
-    if (r) *r = (rAX * radix + u0_lsw - q0 * d) >> s;
+    if (r)
+        *r = ( rAX * radix + u0_lsw - q0 * d ) >> s;
     quotient = q1 * radix + q0;
     return quotient;
 #endif
-  }
-  else
-  {
-    if (r) *r = ~0;
-    return ~0;
-  }
+#endif
 }
 
 static inline void Multiply2x1(uint64_t a[3], uint64_t b) {
@@ -1686,10 +1730,10 @@ int mbedtls_mpi_div_mpi(mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A,
     n = X.n - 1;
     t = Y.n - 1;
     MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&Y, biL * (n - t)));
-    while (mpi_cmp_abs(&X, &Y, &Xn, &Yn) >= 0)
+    while (mbedtls_mpi_cmp_abs(&X, &Y) >= 0)
     {
         Z.p[n - t]++;
-        MBEDTLS_MPI_CHK(mpi_sub_abs(&X, &X, &Y, Yn));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_sub_abs(&X, &X, &Y));
     }
     mbedtls_mpi_shift_r(&Y, biL * (n - t));
     for (i = n; i > t; i--)
@@ -1758,17 +1802,18 @@ cleanup:
  * \return         #MBEDTLS_ERR_MPI_DIVISION_BY_ZERO if \p b equals zero.
  * \return         Another negative error code on different kinds of failure.
  */
-int mbedtls_mpi_div_int(mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A,
-                        mbedtls_mpi_sint b)
+int mbedtls_mpi_div_int( mbedtls_mpi *Q, mbedtls_mpi *R,
+                         const mbedtls_mpi *A,
+                         mbedtls_mpi_sint b )
 {
     mbedtls_mpi _B;
     mbedtls_mpi_uint p[1];
-    MPI_VALIDATE_RET(A);
-    p[0] = (b < 0) ? -b : b;
-    _B.s = (b < 0) ? -1 : 1;
+    MPI_VALIDATE_RET( A );
+    p[0] = ( b < 0 ) ? -b : b;
+    _B.s = ( b < 0 ) ? -1 : 1;
     _B.n = 1;
     _B.p = p;
-    return mbedtls_mpi_div_mpi(Q, R, A, &_B);
+    return( mbedtls_mpi_div_mpi( Q, R, A, &_B ) );
 }
 
 /**
@@ -1786,22 +1831,23 @@ int mbedtls_mpi_div_int(mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A,
  * \return         #MBEDTLS_ERR_MPI_DIVISION_BY_ZERO if \p B equals zero.
  * \return         #MBEDTLS_ERR_MPI_NEGATIVE_VALUE if \p B is negative.
  * \return         Another negative error code on different kinds of failure.
+ *
  */
-int mbedtls_mpi_mod_mpi(mbedtls_mpi *R, const mbedtls_mpi *A,
-                        const mbedtls_mpi *B)
+int mbedtls_mpi_mod_mpi( mbedtls_mpi *R, const mbedtls_mpi *A, const mbedtls_mpi *B )
 {
-    size_t i, j;
     int ret = MBEDTLS_ERR_THIS_CORRUPTION;
-    MPI_VALIDATE_RET(R);
-    MPI_VALIDATE_RET(A);
-    MPI_VALIDATE_RET(B);
-    if (B->s < 0) return MBEDTLS_ERR_MPI_NEGATIVE_VALUE;
-    MBEDTLS_MPI_CHK(mbedtls_mpi_div_mpi(NULL, R, A, B));
-    while (R->s < 0) MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(R, R, B));
-    while (mbedtls_mpi_cmp_mpi(R, B) >= 0)
-        MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(R, R, B));
+    MPI_VALIDATE_RET( R );
+    MPI_VALIDATE_RET( A );
+    MPI_VALIDATE_RET( B );
+    if( mbedtls_mpi_cmp_int( B, 0 ) < 0 )
+        return( MBEDTLS_ERR_MPI_NEGATIVE_VALUE );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_div_mpi( NULL, R, A, B ) );
+    while( mbedtls_mpi_cmp_int( R, 0 ) < 0 )
+      MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( R, R, B ) );
+    while( mbedtls_mpi_cmp_mpi( R, B ) >= 0 )
+      MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( R, R, B ) );
 cleanup:
-    return ret;
+    return( ret );
 }
 
 /**
@@ -1820,63 +1866,64 @@ cleanup:
  * \return         #MBEDTLS_ERR_MPI_NEGATIVE_VALUE if \p b is negative.
  * \return         Another negative error code on different kinds of failure.
  */
-int mbedtls_mpi_mod_int( mbedtls_mpi_uint *r, const mbedtls_mpi *A,
-                         mbedtls_mpi_sint b )
+int mbedtls_mpi_mod_int( mbedtls_mpi_uint *r, const mbedtls_mpi *A, mbedtls_mpi_sint b )
 {
     size_t i;
     mbedtls_mpi_uint x, y, z;
-    MPI_VALIDATE_RET(r);
-    MPI_VALIDATE_RET(A);
-    if (!b)
-        return MBEDTLS_ERR_MPI_DIVISION_BY_ZERO;
-    if (b < 0)
-        return MBEDTLS_ERR_MPI_NEGATIVE_VALUE;
+    MPI_VALIDATE_RET( r );
+    MPI_VALIDATE_RET( A );
+    if( b == 0 )
+        return( MBEDTLS_ERR_MPI_DIVISION_BY_ZERO );
+    if( b < 0 )
+        return( MBEDTLS_ERR_MPI_NEGATIVE_VALUE );
     /*
      * handle trivial cases
      */
-    if (b == 1)
+    if( b == 1 )
     {
         *r = 0;
-        return 0;
+        return( 0 );
     }
-    if (b == 2)
+    if( b == 2 )
     {
         *r = A->p[0] & 1;
-        return 0;
+        return( 0 );
     }
     /*
      * general case
      */
-    for (i = A->n, y = 0; i > 0; i--)
+    for( i = A->n, y = 0; i > 0; i-- )
     {
-        x = A->p[i - 1];
-        y = (y << biH) | (x >> biH);
-        z = y / b;
+        x  = A->p[i - 1];
+        y  = ( y << biH ) | ( x >> biH );
+        z  = y / b;
         y -= z * b;
         x <<= biH;
-        y = (y << biH) | (x >> biH);
-        z = y / b;
+        y  = ( y << biH ) | ( x >> biH );
+        z  = y / b;
         y -= z * b;
     }
     /*
      * If A is negative, then the current y represents a negative value.
      * Flipping it to the positive side.
      */
-    if (A->s < 0 && y) y = b - y;
+    if( A->s < 0 && y != 0 )
+        y = b - y;
     *r = y;
-    return 0;
+    return( 0 );
 }
 
 /*
  * Fast Montgomery initialization (thanks to Tom St Denis)
  */
-static void mpi_montg_init(mbedtls_mpi_uint *mm, const mbedtls_mpi *N)
+static void mpi_montg_init( mbedtls_mpi_uint *mm, const mbedtls_mpi *N )
 {
     mbedtls_mpi_uint x, m0 = N->p[0];
     unsigned int i;
-    x = m0;
-    x += ((m0 + 2) & 4) << 1;
-    for (i = biL; i >= 8; i /= 2) x *= 2 - m0 * x;
+    x  = m0;
+    x += ( ( m0 + 2 ) & 4 ) << 1;
+    for( i = biL; i >= 8; i /= 2 )
+        x *= ( 2 - ( m0 * x ) );
     *mm = -x;
 }
 
@@ -1903,42 +1950,40 @@ static void mpi_montg_init(mbedtls_mpi_uint *mm, const mbedtls_mpi *N)
  *                      Note that unlike the usual convention in the library
  *                      for `const mbedtls_mpi*`, the content of T can change.
  */
-static void mpi_montmul(mbedtls_mpi *A, const mbedtls_mpi *B,
-                        const mbedtls_mpi *N, mbedtls_mpi_uint mm,
-                        const mbedtls_mpi *T)
+static void mpi_montmul( mbedtls_mpi *A, const mbedtls_mpi *B, const mbedtls_mpi *N, mbedtls_mpi_uint mm,
+                         const mbedtls_mpi *T )
 {
     size_t i, n, m;
     mbedtls_mpi_uint u0, u1, *d, *Ap, *Bp, *Np;
-    mbedtls_mpi_zeroize(T->p, T->n);
+    mbedtls_platform_zeroize( T->p, T->n * ciL );
     d = T->p;
     n = N->n;
-    m = (B->n < n) ? B->n : n;
+    m = ( B->n < n ) ? B->n : n;
     Ap = A->p;
     Bp = B->p;
     Np = N->p;
-    for (i = 0; i < n; i++)
+    for( i = 0; i < n; i++ )
     {
         /*
          * T = (T + u0*B + u1*N) / 2^biL
          */
         u0 = Ap[i];
-        u1 = (d[0] + u0 * Bp[0]) * mm;
-        mbedtls_mpi_mul_hlp(m, Bp, d, u0);
-        mbedtls_mpi_mul_hlp(n, Np, d, u1);
-        *d++ = u0;
-        d[n + 1] = 0;
+        u1 = ( d[0] + u0 * Bp[0] ) * mm;
+        mbedtls_mpi_mul_hlp( m, Bp, d, u0 );
+        mbedtls_mpi_mul_hlp( n, Np, d, u1 );
+        *d++ = u0; d[n + 1] = 0;
     }
     /* At this point, d is either the desired result or the desired result
      * plus N. We now potentially subtract N, avoiding leaking whether the
      * subtraction is performed through side channels. */
     /* Copy the n least significant limbs of d to A, so that
      * A = d if d < N (recall that N has n limbs). */
-    memcpy(Ap, d, n * ciL);
+    memcpy( Ap, d, n * ciL );
     /* If d >= N then we want to set A to d - N. To prevent timing attacks,
      * do the calculation without using conditional tests. */
     /* Set d to d0 + (2^biL)^n - N where d0 is the current value of d. */
     d[n] += 1;
-    d[n] -= mpi_sub_hlp(d, d, Np, n);
+    d[n] -= mpi_sub_hlp( d, d, Np, n );
     /* If d0 < N then d < (2^biL)^n
      * so d[n] == 0 and we want to keep A as it is.
      * If d0 >= N then d >= (2^biL)^n, and d <= (2^biL)^n + N < 2 * (2^biL)^n
@@ -1955,14 +2000,14 @@ static void mpi_montmul(mbedtls_mpi *A, const mbedtls_mpi *B,
  *
  * See mpi_montmul() regarding constraints and guarantees on the parameters.
  */
-static void mpi_montred(mbedtls_mpi *A, const mbedtls_mpi *N,
-                        mbedtls_mpi_uint mm, const mbedtls_mpi *T)
+static void mpi_montred( mbedtls_mpi *A, const mbedtls_mpi *N,
+                         mbedtls_mpi_uint mm, const mbedtls_mpi *T )
 {
-    mbedtls_mpi U;
     mbedtls_mpi_uint z = 1;
-    U.n = U.s = (int)z;
+    mbedtls_mpi U;
+    U.n = U.s = (int) z;
     U.p = &z;
-    mpi_montmul(A, &U, N, mm, T);
+    mpi_montmul( A, &U, N, mm, T );
 }
 
 /**
@@ -1991,127 +2036,129 @@ static void mpi_montred(mbedtls_mpi *A, const mbedtls_mpi *N,
  * \return         Another negative error code on different kinds of failures.
  *
  */
-int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A,
-                        const mbedtls_mpi *E, const mbedtls_mpi *N,
-                        mbedtls_mpi *_RR)
+int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
+                         const mbedtls_mpi *E, const mbedtls_mpi *N,
+                         mbedtls_mpi *_RR )
 {
     int ret = MBEDTLS_ERR_THIS_CORRUPTION;
     size_t wbits, wsize, one = 1;
     size_t i, j, nblimbs;
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
-    mbedtls_mpi RR, T, W[1 << MBEDTLS_MPI_WINDOW_SIZE], Apos;
+    mbedtls_mpi RR, T, W[ 1 << MBEDTLS_MPI_WINDOW_SIZE ], Apos;
     int neg;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(A);
-    MPI_VALIDATE_RET(E);
-    MPI_VALIDATE_RET(N);
-    if (mbedtls_mpi_cmp_int(N, 0) <= 0 || !(N->p[0] & 1))
-        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
-    if (E->s < 0)
-        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
-    if (mbedtls_mpi_bitlen(E) > MBEDTLS_MPI_MAX_BITS ||
-        mbedtls_mpi_bitlen(N) > MBEDTLS_MPI_MAX_BITS)
-        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( A );
+    MPI_VALIDATE_RET( E );
+    MPI_VALIDATE_RET( N );
+    if( mbedtls_mpi_cmp_int( N, 0 ) <= 0 || ( N->p[0] & 1 ) == 0 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    if( mbedtls_mpi_cmp_int( E, 0 ) < 0 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    if( mbedtls_mpi_bitlen( E ) > MBEDTLS_MPI_MAX_BITS ||
+        mbedtls_mpi_bitlen( N ) > MBEDTLS_MPI_MAX_BITS )
+        return ( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
     /*
      * Init temps and window size
      */
-    mpi_montg_init(&mm, N);
-    mbedtls_mpi_init(&RR);
-    mbedtls_mpi_init(&T);
-    mbedtls_mpi_init(&Apos);
-    mbedtls_platform_zeroize(W, sizeof(W));
-    i = mbedtls_mpi_bitlen(E);
-    wsize = (i > 671) ? 6 : (i > 239) ? 5 : (i > 79) ? 4 : (i > 23) ? 3 : 1;
-#if (MBEDTLS_MPI_WINDOW_SIZE < 6)
-    if (wsize > MBEDTLS_MPI_WINDOW_SIZE) wsize = MBEDTLS_MPI_WINDOW_SIZE;
+    mpi_montg_init( &mm, N );
+    mbedtls_mpi_init( &RR ); mbedtls_mpi_init( &T );
+    mbedtls_mpi_init( &Apos );
+    mbedtls_platform_zeroize( W, sizeof( W ) );
+    i = mbedtls_mpi_bitlen( E );
+    wsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
+            ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
+#if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
+    if( wsize > MBEDTLS_MPI_WINDOW_SIZE )
+        wsize = MBEDTLS_MPI_WINDOW_SIZE;
 #endif
     j = N->n + 1;
-    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, j));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[1], j));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&T, j * 2));
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, j ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1],  j ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) );
     /*
      * Compensate for negative A (and correct at the end)
      */
-    neg = (A->s == -1);
-    if (neg)
+    neg = ( A->s == -1 );
+    if( neg )
     {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&Apos, A));
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Apos, A ) );
         Apos.s = 1;
         A = &Apos;
     }
     /*
      * If 1st call, pre-compute R^2 mod N
      */
-    if (!_RR || !_RR->p)
+    if( _RR == NULL || _RR->p == NULL )
     {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&RR, 1));
-        MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&RR, N->n * 2 * biL));
-        MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&RR, &RR, N));
-        if (_RR) memcpy(_RR, &RR, sizeof(mbedtls_mpi));
+        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &RR, 1 ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &RR, N->n * 2 * biL ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &RR, &RR, N ) );
+        if( _RR )
+            memcpy( _RR, &RR, sizeof( mbedtls_mpi ) );
     }
     else
-    {
-        memcpy(&RR, _RR, sizeof(mbedtls_mpi));
-    }
+        memcpy( &RR, _RR, sizeof( mbedtls_mpi ) );
     /*
      * W[1] = A * R^2 * R^-1 mod N = A * R mod N
      */
-    if (mbedtls_mpi_cmp_mpi(A, N) >= 0)
-        MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&W[1], A, N));
+    if( mbedtls_mpi_cmp_mpi( A, N ) >= 0 )
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &W[1], A, N ) );
     else
-        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[1], A));
-    mpi_montmul(&W[1], &RR, N, mm, &T);
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[1], A ) );
+    mpi_montmul( &W[1], &RR, N, mm, &T );
     /*
      * X = R^2 * R^-1 mod N = R mod N
      */
-    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, &RR));
-    mpi_montred(X, N, mm, &T);
-    if (wsize > 1)
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, &RR ) );
+    mpi_montred( X, N, mm, &T );
+    if( wsize > 1 )
     {
         /*
          * W[1 << (wsize - 1)] = W[1] ^ (wsize - 1)
          */
-        j = one << (wsize - 1);
-        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[j], N->n + 1));
-        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[j], &W[1]));
-        for (i = 0; i < wsize - 1; i++)
-            mpi_montmul(&W[j], &W[j], N, mm, &T);
+        j =  one << ( wsize - 1 );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[j], N->n + 1 ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[j], &W[1]    ) );
+        for( i = 0; i < wsize - 1; i++ )
+            mpi_montmul( &W[j], &W[j], N, mm, &T );
         /*
          * W[i] = W[i - 1] * W[1]
          */
-        for (i = j + 1; i < (one << wsize); i++)
+        for( i = j + 1; i < ( one << wsize ); i++ )
         {
-            MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[i], N->n + 1));
-            MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[i], &W[i - 1]));
-            mpi_montmul(&W[i], &W[1], N, mm, &T);
+            MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[i], N->n + 1 ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[i], &W[i - 1] ) );
+            mpi_montmul( &W[i], &W[1], N, mm, &T );
         }
     }
     nblimbs = E->n;
     bufsize = 0;
-    nbits = 0;
-    wbits = 0;
-    state = 0;
-    while (1)
+    nbits   = 0;
+    wbits   = 0;
+    state   = 0;
+    while( 1 )
     {
-        if (!bufsize)
+        if( bufsize == 0 )
         {
-            if (!nblimbs) break;
+            if( nblimbs == 0 )
+                break;
             nblimbs--;
-            bufsize = sizeof(mbedtls_mpi_uint) << 3;
+            bufsize = sizeof( mbedtls_mpi_uint ) << 3;
         }
         bufsize--;
         ei = (E->p[nblimbs] >> bufsize) & 1;
         /*
          * skip leading 0s
          */
-        if (ei == 0 && state == 0) continue;
-        if (ei == 0 && state == 1)
+        if( ei == 0 && state == 0 )
+            continue;
+        if( ei == 0 && state == 1 )
         {
             /*
              * out of window, square X
              */
-            mpi_montmul(X, X, N, mm, &T);
+            mpi_montmul( X, X, N, mm, &T );
             continue;
         }
         /*
@@ -2119,18 +2166,18 @@ int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A,
          */
         state = 2;
         nbits++;
-        wbits |= (ei << (wsize - nbits));
-        if (nbits == wsize)
+        wbits |= ( ei << ( wsize - nbits ) );
+        if( nbits == wsize )
         {
             /*
              * X = X^wsize R^-1 mod N
              */
-            for (i = 0; i < wsize; i++)
-                mpi_montmul(X, X, N, mm, &T);
+            for( i = 0; i < wsize; i++ )
+                mpi_montmul( X, X, N, mm, &T );
             /*
              * X = X * W[wbits] R^-1 mod N
              */
-            mpi_montmul(X, &W[wbits], N, mm, &T);
+            mpi_montmul( X, &W[wbits], N, mm, &T );
             state--;
             nbits = 0;
             wbits = 0;
@@ -2139,47 +2186,29 @@ int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A,
     /*
      * process the remaining bits
      */
-    for (i = 0; i < nbits; i++)
+    for( i = 0; i < nbits; i++ )
     {
-        mpi_montmul(X, X, N, mm, &T);
+        mpi_montmul( X, X, N, mm, &T );
         wbits <<= 1;
-        if ((wbits & (one << wsize)))
-            mpi_montmul(X, &W[1], N, mm, &T);
+        if( ( wbits & ( one << wsize ) ) != 0 )
+            mpi_montmul( X, &W[1], N, mm, &T );
     }
     /*
      * X = A^E * R * R^-1 mod N = A^E mod N
      */
-    mpi_montred(X, N, mm, &T);
-    if (neg && E->n && (E->p[0] & 1))
+    mpi_montred( X, N, mm, &T );
+    if( neg && E->n != 0 && ( E->p[0] & 1 ) != 0 )
     {
         X->s = -1;
-        MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(X, N, X));
+        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( X, N, X ) );
     }
 cleanup:
-    for (i = (one << (wsize - 1)); i < (one << wsize); i++)
-        mbedtls_mpi_free(&W[i]);
-    mbedtls_mpi_free(&W[1]);
-    mbedtls_mpi_free(&T);
-    mbedtls_mpi_free(&Apos);
-    if (!_RR || !_RR->p)
-        mbedtls_mpi_free(&RR);
-    return ret;
-}
-
-static inline int Compare(const mbedtls_mpi *X,
-                          const mbedtls_mpi *Y,
-                          size_t i,
-                          size_t j)
-{
-    if (!i && !j) return 0;
-    if (i > j) return 1;
-    if (j > i) return -1;
-    for (; i > 0; i--)
-    {
-        if (X->p[i - 1] > Y->p[i - 1]) return 1;
-        if (X->p[i - 1] < Y->p[i - 1]) return -1;
-    }
-    return 0;
+    for( i = ( one << ( wsize - 1 ) ); i < ( one << wsize ); i++ )
+        mbedtls_mpi_free( &W[i] );
+    mbedtls_mpi_free( &W[1] ); mbedtls_mpi_free( &T ); mbedtls_mpi_free( &Apos );
+    if( _RR == NULL || _RR->p == NULL )
+        mbedtls_mpi_free( &RR );
+    return( ret );
 }
 
 /**
@@ -2193,53 +2222,53 @@ static inline int Compare(const mbedtls_mpi *X,
  * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed.
  * \return         Another negative error code on different kinds of failure.
  */
-int mbedtls_mpi_gcd(mbedtls_mpi *G, const mbedtls_mpi *A,
-                    const mbedtls_mpi *B)
+int mbedtls_mpi_gcd( mbedtls_mpi *G, const mbedtls_mpi *A, const mbedtls_mpi *B )
 {
     int ret = MBEDTLS_ERR_THIS_CORRUPTION;
     mbedtls_mpi TA, TB;
     size_t lz, lzt, i, j;
-    MPI_VALIDATE_RET(G);
-    MPI_VALIDATE_RET(A);
-    MPI_VALIDATE_RET(B);
-    mbedtls_mpi_init(&TA);
-    mbedtls_mpi_init(&TB);
-    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TA, A));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TB, B));
-    lz = mbedtls_mpi_lsb(&TA);
-    lzt = mbedtls_mpi_lsb(&TB);
-    if (lzt < lz) lz = lzt;
-    mbedtls_mpi_shift_r(&TA, lz);
-    mbedtls_mpi_shift_r(&TB, lz);
+    MPI_VALIDATE_RET( G );
+    MPI_VALIDATE_RET( A );
+    MPI_VALIDATE_RET( B );
+    mbedtls_mpi_init( &TA ); mbedtls_mpi_init( &TB );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) );
+    lz = mbedtls_mpi_lsb( &TA );
+    lzt = mbedtls_mpi_lsb( &TB );
+    if( lzt < lz )
+        lz = lzt;
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TA, lz ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TB, lz ) );
     TA.s = TB.s = 1;
-    i = mbedtls_mpi_bitlen(&TA);
-    j = mbedtls_mpi_bitlen(&TB);
-    while (!mbedtls_mpi_is_zero(&TA))
+    while( !mbedtls_mpi_is_zero( &TA ) )
     {
-        mbedtls_mpi_shift_r(&TA, mbedtls_mpi_lsb(&TA));
-        mbedtls_mpi_shift_r(&TB, mbedtls_mpi_lsb(&TB));
-        if (mpi_cmp_abs(&TA, &TB, &i, &j) >= 0)
+        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TA, mbedtls_mpi_lsb( &TA ) ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TB, mbedtls_mpi_lsb( &TB ) ) );
+        if( mpi_cmp_abs( &TA, &TB, &i, &j ) >= 0 )
         {
-            MBEDTLS_MPI_CHK(mpi_sub_abs(&TA, &TA, &TB, j));
-            mbedtls_mpi_shift_r(&TA, 1);
+            MBEDTLS_MPI_CHK( mpi_sub_abs( &TA, &TA, &TB, j ) );
+            ShiftRight( TA.p, TA.n, 1 );
         }
         else
         {
-            MBEDTLS_MPI_CHK(mpi_sub_abs(&TB, &TB, &TA, i));
-            mbedtls_mpi_shift_r(&TB, 1);
+            MBEDTLS_MPI_CHK( mpi_sub_abs( &TB, &TB, &TA, i ) );
+            ShiftRight( TB.p, TB.n, 1 );
         }
     }
-    MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&TB, lz));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(G, &TB));
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &TB, lz ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( G, &TB ) );
 cleanup:
-    mbedtls_mpi_free(&TA);
-    mbedtls_mpi_free(&TB);
-    return ret;
+    mbedtls_mpi_free( &TA ); mbedtls_mpi_free( &TB );
+    return( ret );
 }
 
 /**
  * \brief          Fill an MPI with a number of random bytes.
  *
+ * Use a temporary bytes representation to make sure the result is the
+ * same regardless of the platform endianness (useful when f_rng is
+ * actually deterministic, eg for tests).
+ *
  * \param X        The destination MPI. This must point to an initialized MPI.
  * \param size     The number of random bytes to generate.
  * \param f_rng    The RNG function to use. This must not be \c NULL.
@@ -2254,23 +2283,23 @@ cleanup:
  *                 as a big-endian representation of an MPI; this can
  *                 be relevant in applications like deterministic ECDSA.
  */
-int mbedtls_mpi_fill_random(mbedtls_mpi *X, size_t size,
-                            int (*f_rng)(void *, unsigned char *, size_t),
-                            void *p_rng)
+int mbedtls_mpi_fill_random( mbedtls_mpi *X, size_t size,
+                             int (*f_rng)(void *, unsigned char *, size_t),
+                             void *p_rng )
 {
     int ret = MBEDTLS_ERR_THIS_CORRUPTION;
-    size_t const limbs = CHARS_TO_LIMBS(size);
-    size_t const overhead = (limbs * ciL) - size;
+    size_t const limbs = CHARS_TO_LIMBS( size );
+    size_t const overhead = ( limbs * ciL ) - size;
     unsigned char *Xp;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(f_rng);
-    MBEDTLS_MPI_CHK(mbedtls_mpi_resize(X, limbs));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 0));
-    Xp = (unsigned char *)X->p;
-    MBEDTLS_MPI_CHK(f_rng(p_rng, Xp + overhead, size));
-    mpi_bigendian_to_host(X->p, limbs);
+    MPI_VALIDATE_RET( X     );
+    MPI_VALIDATE_RET( f_rng );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_resize( X, limbs ));
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) );
+    Xp = (unsigned char*) X->p;
+    MBEDTLS_MPI_CHK( f_rng( p_rng, Xp + overhead, size ) );
+    mpi_bigendian_to_host( X->p, limbs );
 cleanup:
-    return ret;
+    return( ret );
 }
 
 /**
@@ -2289,136 +2318,108 @@ cleanup:
  * \return         #MBEDTLS_ERR_MPI_NOT_ACCEPTABLE if \p has no modular inverse
  *                 with respect to \p N.
  */
-int mbedtls_mpi_inv_mod(mbedtls_mpi *X, const mbedtls_mpi *A,
-                        const mbedtls_mpi *N)
+int mbedtls_mpi_inv_mod( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *N )
 {
     int ret = MBEDTLS_ERR_THIS_CORRUPTION;
     mbedtls_mpi G, TA, TU, U1, U2, TB, TV, V1, V2;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(A);
-    MPI_VALIDATE_RET(N);
-    if (mbedtls_mpi_cmp_int(N, 1) <= 0)
-        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
-    mbedtls_mpi_init(&TA);
-    mbedtls_mpi_init(&TU);
-    mbedtls_mpi_init(&U1);
-    mbedtls_mpi_init(&U2);
-    mbedtls_mpi_init(&G);
-    mbedtls_mpi_init(&TB);
-    mbedtls_mpi_init(&TV);
-    mbedtls_mpi_init(&V1);
-    mbedtls_mpi_init(&V2);
-    MBEDTLS_MPI_CHK(mbedtls_mpi_gcd(&G, A, N));
-    if (!mbedtls_mpi_is_one(&G))
+    MPI_VALIDATE_RET( X );
+    MPI_VALIDATE_RET( A );
+    MPI_VALIDATE_RET( N );
+    if( mbedtls_mpi_cmp_int( N, 1 ) <= 0 )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    mbedtls_mpi_init( &TA ); mbedtls_mpi_init( &TU ); mbedtls_mpi_init( &U1 ); mbedtls_mpi_init( &U2 );
+    mbedtls_mpi_init( &G ); mbedtls_mpi_init( &TB ); mbedtls_mpi_init( &TV );
+    mbedtls_mpi_init( &V1 ); mbedtls_mpi_init( &V2 );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_gcd( &G, A, N ) );
+    if( mbedtls_mpi_cmp_int( &G, 1 ) != 0 )
     {
         ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
         goto cleanup;
     }
-    MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&TA, A, N));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TU, &TA));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TB, N));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TV, N));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&U1, 1));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&U2, 0));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&V1, 0));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&V2, 1));
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &TA, A, N ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TU, &TA ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, N ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TV, N ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &U1, 1 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &U2, 0 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &V1, 0 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &V2, 1 ) );
     do
     {
-        while (!(TU.p[0] & 1))
+        while( ( TU.p[0] & 1 ) == 0 )
         {
-            mbedtls_mpi_shift_r(&TU, 1);
-            if ((U1.p[0] & 1) || (U2.p[0] & 1))
+            ShiftRight( TU.p, TU.n, 1 );
+            if( ( U1.p[0] & 1 ) != 0 || ( U2.p[0] & 1 ) != 0 )
             {
-                MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&U1, &U1, &TB));
-                MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&U2, &U2, &TA));
+                MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &U1, &U1, &TB ) );
+                MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &U2, &U2, &TA ) );
             }
-            mbedtls_mpi_shift_r(&U1, 1);
-            mbedtls_mpi_shift_r(&U2, 1);
+            ShiftRight( U1.p, U1.n, 1 );
+            ShiftRight( U2.p, U2.n, 1 );
         }
-        while (!(TV.p[0] & 1))
+        while( ( TV.p[0] & 1 ) == 0 )
         {
-            mbedtls_mpi_shift_r(&TV, 1);
-            if ((V1.p[0] & 1) || (V2.p[0] & 1))
+            ShiftRight( TV.p, TV.n, 1 );
+            if( ( V1.p[0] & 1 ) != 0 || ( V2.p[0] & 1 ) != 0 )
             {
-                MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&V1, &V1, &TB));
-                MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&V2, &V2, &TA));
+                MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &V1, &V1, &TB ) );
+                MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V2, &V2, &TA ) );
             }
-            mbedtls_mpi_shift_r(&V1, 1);
-            mbedtls_mpi_shift_r(&V2, 1);
+            ShiftRight( V1.p, V1.n, 1 );
+            ShiftRight( V2.p, V2.n, 1 );
         }
-        if (mbedtls_mpi_cmp_mpi(&TU, &TV) >= 0)
+        if( mbedtls_mpi_cmp_mpi( &TU, &TV ) >= 0 )
         {
-            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&TU, &TU, &TV));
-            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&U1, &U1, &V1));
-            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&U2, &U2, &V2));
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &TU, &TU, &TV ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &U1, &U1, &V1 ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &U2, &U2, &V2 ) );
         }
         else
         {
-            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&TV, &TV, &TU));
-            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&V1, &V1, &U1));
-            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&V2, &V2, &U2));
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &TV, &TV, &TU ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V1, &V1, &U1 ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V2, &V2, &U2 ) );
         }
-    } while (!mbedtls_mpi_is_zero(&TU));
-    while (V1.s < 0)
-    {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&V1, &V1, N));
     }
-    while (mbedtls_mpi_cmp_mpi(&V1, N) >= 0)
-    {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&V1, &V1, N));
-    }
-    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, &V1));
+    while( !mbedtls_mpi_is_zero(&TU) );
+    while( mbedtls_mpi_cmp_int( &V1, 0 ) < 0 )
+        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &V1, &V1, N ) );
+    while( mbedtls_mpi_cmp_mpi( &V1, N ) >= 0 )
+        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V1, &V1, N ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, &V1 ) );
 cleanup:
-    mbedtls_mpi_free(&TA);
-    mbedtls_mpi_free(&TU);
-    mbedtls_mpi_free(&U1);
-    mbedtls_mpi_free(&U2);
-    mbedtls_mpi_free(&G);
-    mbedtls_mpi_free(&TB);
-    mbedtls_mpi_free(&TV);
-    mbedtls_mpi_free(&V1);
-    mbedtls_mpi_free(&V2);
-    return ret;
+    mbedtls_mpi_free( &TA ); mbedtls_mpi_free( &TU ); mbedtls_mpi_free( &U1 ); mbedtls_mpi_free( &U2 );
+    mbedtls_mpi_free( &G ); mbedtls_mpi_free( &TB ); mbedtls_mpi_free( &TV );
+    mbedtls_mpi_free( &V1 ); mbedtls_mpi_free( &V2 );
+    return( ret );
 }
 
 #if defined(MBEDTLS_GENPRIME)
 
-static const short kSmallPrime[] = {
-    3,   5,   7,   11,  13,  17,  19,  23,  29,  31,  37,  41,  43,  47,
-    53,  59,  61,  67,  71,  73,  79,  83,  89,  97,  101, 103, 107, 109,
-    113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191,
-    193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269,
-    271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353,
-    359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
-    443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523,
-    541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617,
-    619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709,
-    719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811,
-    821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907,
-    911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997,
-};
-
-static struct Divisor kSmallDivisor[ARRAYLEN(kSmallPrime)];
-
-static bool IsDivisible( const mbedtls_mpi_uint *Ap, size_t An, 
-                         mbedtls_mpi_sint b, struct Divisor d )
+static const short small_prime[] =
 {
-    size_t i;
-    mbedtls_mpi_uint x, y, z;
-    MBEDTLS_ASSERT(b >= 3);
-    for (i = An, y = 0; i > 0; i--)
-    {
-        x = Ap[i - 1];
-        y = (y << biH) | (x >> biH);
-        z = Divide(y, d);
-        y -= z * b;
-        x <<= biH;
-        y = (y << biH) | (x >> biH);
-        z = Divide(y, d);
-        y -= z * b;
-    }
-    return !y;
-}
+        3,    5,    7,   11,   13,   17,   19,   23,
+       29,   31,   37,   41,   43,   47,   53,   59,
+       61,   67,   71,   73,   79,   83,   89,   97,
+      101,  103,  107,  109,  113,  127,  131,  137,
+      139,  149,  151,  157,  163,  167,  173,  179,
+      181,  191,  193,  197,  199,  211,  223,  227,
+      229,  233,  239,  241,  251,  257,  263,  269,
+      271,  277,  281,  283,  293,  307,  311,  313,
+      317,  331,  337,  347,  349,  353,  359,  367,
+      373,  379,  383,  389,  397,  401,  409,  419,
+      421,  431,  433,  439,  443,  449,  457,  461,
+      463,  467,  479,  487,  491,  499,  503,  509,
+      521,  523,  541,  547,  557,  563,  569,  571,
+      577,  587,  593,  599,  601,  607,  613,  617,
+      619,  631,  641,  643,  647,  653,  659,  661,
+      673,  677,  683,  691,  701,  709,  719,  727,
+      733,  739,  743,  751,  757,  761,  769,  773,
+      787,  797,  809,  811,  821,  823,  827,  829,
+      839,  853,  857,  859,  863,  877,  881,  883,
+      887,  907,  911,  919,  929,  937,  941,  947,
+      953,  967,  971,  977,  983,  991,  997, -103
+};
 
 /*
  * Small divisors test (X must be positive)
@@ -2429,126 +2430,114 @@ static bool IsDivisible( const mbedtls_mpi_uint *Ap, size_t An,
  * MBEDTLS_ERR_MPI_NOT_ACCEPTABLE: certain non-prime
  * other negative: error
  */
-static int mpi_check_small_factors(const mbedtls_mpi *X)
+static int mpi_check_small_factors( const mbedtls_mpi *X )
 {
     int ret = 0;
-    size_t i, n;
-    static bool once;
-    if (!(X->p[0] & 1))
-        return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
-    n = mbedtls_mpi_limbs(X);
-    if (!once) {
-        for (i = 0; i < ARRAYLEN(kSmallPrime); ++i)
-            kSmallDivisor[i] = GetDivisor(kSmallPrime[i]);
-        once = true;
+    size_t i;
+    mbedtls_mpi_uint r;
+    if( ( X->p[0] & 1 ) == 0 )
+        return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE );
+    for( i = 0; small_prime[i] > 0; i++ )
+    {
+        if( mbedtls_mpi_cmp_int( X, small_prime[i] ) <= 0 )
+            return( 1 );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_int( &r, X, small_prime[i] ) );
+        if( r == 0 )
+            return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE );
     }
-    for (i = 0; i < ARRAYLEN(kSmallPrime); i++) {
-        if (n == 1 && mbedtls_mpi_cmp_int(X, kSmallPrime[i]) <= 0)
-            return 1;
-        if (IsDivisible(X->p, X->n, kSmallPrime[i], kSmallDivisor[i]))
-            return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
-    }
-    return ret;
+cleanup:
+    return( ret );
 }
 
 /*
  * Miller-Rabin pseudo-primality test  (HAC 4.24)
  */
-static int mpi_miller_rabin(const mbedtls_mpi *X, size_t rounds,
-                            int (*f_rng)(void *, unsigned char *, size_t),
-                            void *p_rng)
+static int mpi_miller_rabin( const mbedtls_mpi *X, size_t rounds,
+                             int (*f_rng)(void *, unsigned char *, size_t),
+                             void *p_rng )
 {
     int ret, count;
     size_t i, j, k, s;
     mbedtls_mpi W, R, T, A, RR;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(f_rng);
-    mbedtls_mpi_init(&W);
-    mbedtls_mpi_init(&R);
-    mbedtls_mpi_init(&T);
-    mbedtls_mpi_init(&A);
-    mbedtls_mpi_init(&RR);
+    MPI_VALIDATE_RET( X     );
+    MPI_VALIDATE_RET( f_rng );
+    mbedtls_mpi_init( &W ); mbedtls_mpi_init( &R );
+    mbedtls_mpi_init( &T ); mbedtls_mpi_init( &A );
+    mbedtls_mpi_init( &RR );
     /*
      * W = |X| - 1
      * R = W >> lsb( W )
      */
-    MBEDTLS_MPI_CHK(mbedtls_mpi_sub_int(&W, X, 1));
-    s = mbedtls_mpi_lsb(&W);
-    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&R, &W));
-    mbedtls_mpi_shift_r(&R, s);
-    for (i = 0; i < rounds; i++)
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int( &W, X, 1 ) );
+    s = mbedtls_mpi_lsb( &W );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R, &W ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &R, s ) );
+    for( i = 0; i < rounds; i++ )
     {
         /*
          * pick a random A, 1 < A < |X| - 1
          */
         count = 0;
-        do
-        {
-            MBEDTLS_MPI_CHK(mbedtls_mpi_fill_random(&A, X->n * ciL, f_rng, p_rng));
-            j = mbedtls_mpi_bitlen(&A);
-            k = mbedtls_mpi_bitlen(&W);
-            if (j > k)
-            {
-                A.p[A.n - 1] &= ((mbedtls_mpi_uint)1 << (k - (A.n - 1) * biL - 1)) - 1;
+        do {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_fill_random( &A, X->n * ciL, f_rng, p_rng ) );
+            j = mbedtls_mpi_bitlen( &A );
+            k = mbedtls_mpi_bitlen( &W );
+            if (j > k) {
+                A.p[A.n - 1] &= ( (mbedtls_mpi_uint) 1 << ( k - ( A.n - 1 ) * biL - 1 ) ) - 1;
             }
-            if (count++ > 30)
-            {
+            if (count++ > 30) {
                 ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
                 goto cleanup;
             }
-        } while (mbedtls_mpi_cmp_mpi(&A, &W) >= 0 ||
-                 mbedtls_mpi_cmp_int(&A, 1) <= 0);
-
+        } while ( mbedtls_mpi_cmp_mpi( &A, &W ) >= 0 ||
+                  mbedtls_mpi_cmp_int( &A, 1 )  <= 0    );
         /*
          * A = A^R mod |X|
          */
-        MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&A, &A, &R, X, &RR));
-        if (!mbedtls_mpi_cmp_mpi(&A, &W) || mbedtls_mpi_is_one(&A))
+        MBEDTLS_MPI_CHK( mbedtls_mpi_exp_mod( &A, &A, &R, X, &RR ) );
+        if( mbedtls_mpi_cmp_mpi( &A, &W ) == 0 ||
+            mbedtls_mpi_cmp_int( &A,  1 ) == 0 )
             continue;
         j = 1;
-
-        while (j < s && mbedtls_mpi_cmp_mpi(&A, &W))
+        while( j < s && mbedtls_mpi_cmp_mpi( &A, &W ) != 0 )
         {
             /*
              * A = A * A mod |X|
              */
-            MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&T, &A, &A));
-            MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&A, &T, X));
-            if (mbedtls_mpi_is_one(&A)) break;
+            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &T, &A, &A ) );
+            MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &A, &T, X  ) );
+            if( mbedtls_mpi_cmp_int( &A, 1 ) == 0 )
+                break;
             j++;
         }
-
         /*
          * not prime if A != |X| - 1 or A == 1
          */
-        if (mbedtls_mpi_cmp_mpi(&A, &W) || mbedtls_mpi_is_one(&A))
+        if( mbedtls_mpi_cmp_mpi( &A, &W ) != 0 ||
+            mbedtls_mpi_cmp_int( &A,  1 ) == 0 )
         {
             ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
             break;
         }
     }
-
 cleanup:
-    mbedtls_mpi_free(&W);
-    mbedtls_mpi_free(&R);
-    mbedtls_mpi_free(&T);
-    mbedtls_mpi_free(&A);
-    mbedtls_mpi_free(&RR);
-    return ret;
+    mbedtls_mpi_free( &W ); mbedtls_mpi_free( &R );
+    mbedtls_mpi_free( &T ); mbedtls_mpi_free( &A );
+    mbedtls_mpi_free( &RR );
+    return( ret );
 }
 
 /**
  * \brief          Miller-Rabin primality test.
  *
- * \warning        If \p X is potentially generated by an adversary, for
- *                 example when validating cryptographic parameters that
- *                 you didn't generate yourself and that are supposed to
- *                 be prime, then \p rounds should be at least the half
- *                 of the security strength of the cryptographic
- *                 algorithm. On the other hand, if \p X is chosen
- *                 uniformly or non-adversially (as is the case when
- *                 mbedtls_mpi_gen_prime calls this function), then \p
- *                 rounds can be much lower.
+ * \warning        If \p X is potentially generated by an adversary, for example
+ *                 when validating cryptographic parameters that you didn't
+ *                 generate yourself and that are supposed to be prime, then
+ *                 \p rounds should be at least the half of the security
+ *                 strength of the cryptographic algorithm. On the other hand,
+ *                 if \p X is chosen uniformly or non-adversially (as is the
+ *                 case when mbedtls_mpi_gen_prime calls this function), then
+ *                 \p rounds can be much lower.
  *
  * \param X        The MPI to check for primality.
  *                 This must point to an initialized MPI.
@@ -2561,32 +2550,33 @@ cleanup:
  *                 a context parameter.
  *
  * \return         \c 0 if successful, i.e. \p X is probably prime.
- * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a allocation failed.
+ * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed.
  * \return         #MBEDTLS_ERR_MPI_NOT_ACCEPTABLE if \p X is not prime.
- * \return         Another negative error code on other failures.
+ * \return         Another negative error code on other kinds of failure.
  */
-int mbedtls_mpi_is_prime_ext(const mbedtls_mpi *X, int rounds,
-                             int (*f_rng)(void *, unsigned char *, size_t),
-                             void *p_rng)
+int mbedtls_mpi_is_prime_ext( const mbedtls_mpi *X, int rounds,
+                              int (*f_rng)(void *, unsigned char *, size_t),
+                              void *p_rng )
 {
     int ret = MBEDTLS_ERR_THIS_CORRUPTION;
     mbedtls_mpi XX;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(f_rng);
+    MPI_VALIDATE_RET( X     );
+    MPI_VALIDATE_RET( f_rng );
     XX.s = 1;
     XX.n = X->n;
     XX.p = X->p;
-    if (mbedtls_mpi_is_zero(&XX) || mbedtls_mpi_is_one(&XX))
-        return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
-    if (!mbedtls_mpi_cmp_int(&XX, 2))
-        return 0;
-    if ((ret = mpi_check_small_factors(&XX)))
+    if( mbedtls_mpi_cmp_int( &XX, 0 ) == 0 ||
+        mbedtls_mpi_cmp_int( &XX, 1 ) == 0 )
+        return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE );
+    if( mbedtls_mpi_cmp_int( &XX, 2 ) == 0 )
+        return( 0 );
+    if( ( ret = mpi_check_small_factors( &XX ) ) != 0 )
     {
-        if (ret == 1)
-            return 0;
-        return ret;
+        if( ret == 1 )
+            return( 0 );
+        return( ret );
     }
-    return mpi_miller_rabin(&XX, rounds, f_rng, p_rng);
+    return( mpi_miller_rabin( &XX, rounds, f_rng, p_rng ) );
 }
 
 /**
@@ -2609,37 +2599,33 @@ int mbedtls_mpi_is_prime_ext(const mbedtls_mpi *X, int rounds,
  *
  * \return         \c 0 if successful, in which case \p X holds a
  *                 probably prime number.
- * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a allocation failed.
- * \return         #MBEDTLS_ERR_MPI_BAD_INPUT_DATA if `nbits` is not
- *                 between \c 3 and #MBEDTLS_MPI_MAX_BITS.
+ * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed.
+ * \return         #MBEDTLS_ERR_MPI_BAD_INPUT_DATA if `nbits` is not between
+ *                 \c 3 and #MBEDTLS_MPI_MAX_BITS.
  */
-int mbedtls_mpi_gen_prime(mbedtls_mpi *X, size_t nbits, int flags,
-                          int (*f_rng)(void *, unsigned char *, size_t),
-                          void *p_rng)
+int mbedtls_mpi_gen_prime( mbedtls_mpi *X, size_t nbits, int flags,
+                           int (*f_rng)(void *, unsigned char *, size_t),
+                           void *p_rng )
 {
     int ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
     size_t k, n;
     int rounds;
     mbedtls_mpi_uint r;
     mbedtls_mpi Y;
-    MPI_VALIDATE_RET(X);
-    MPI_VALIDATE_RET(f_rng);
-    if (nbits < 3 || nbits > MBEDTLS_MPI_MAX_BITS)
-        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
-    mbedtls_mpi_init(&Y);
-    n = BITS_TO_LIMBS(nbits);
-    if (!(flags & MBEDTLS_MPI_GEN_PRIME_FLAG_LOW_ERR))
+    MPI_VALIDATE_RET( X     );
+    MPI_VALIDATE_RET( f_rng );
+    if( nbits < 3 || nbits > MBEDTLS_MPI_MAX_BITS )
+        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    mbedtls_mpi_init( &Y );
+    n = BITS_TO_LIMBS( nbits );
+    if( ( flags & MBEDTLS_MPI_GEN_PRIME_FLAG_LOW_ERR ) == 0 )
     {
         /*
          * 2^-80 error probability, number of rounds chosen per HAC, table 4.4
          */
-        rounds = ((nbits >= 1300) ? 2
-                : (nbits >= 850)  ? 3
-                : (nbits >= 650)  ? 4
-                : (nbits >= 350)  ? 8
-                : (nbits >= 250)  ? 12
-                : (nbits >= 150)  ? 18
-                : 27);
+        rounds = ( ( nbits >= 1300 ) ?  2 : ( nbits >=  850 ) ?  3 :
+                   ( nbits >=  650 ) ?  4 : ( nbits >=  350 ) ?  8 :
+                   ( nbits >=  250 ) ? 12 : ( nbits >=  150 ) ? 18 : 27 );
     }
     else
     {
@@ -2647,29 +2633,24 @@ int mbedtls_mpi_gen_prime(mbedtls_mpi *X, size_t nbits, int flags,
          * 2^-100 error probability, number of rounds computed based on HAC,
          * fact 4.48
          */
-        rounds = ((nbits >= 1450) ? 4
-                : (nbits >= 1150) ? 5
-                : (nbits >= 1000) ? 6
-                : (nbits >= 850)  ? 7
-                : (nbits >= 750)  ? 8
-                : (nbits >= 500)  ? 13
-                : (nbits >= 250)  ? 28
-                : (nbits >= 150)  ? 40
-                : 51);
+        rounds = ( ( nbits >= 1450 ) ?  4 : ( nbits >=  1150 ) ?  5 :
+                   ( nbits >= 1000 ) ?  6 : ( nbits >=   850 ) ?  7 :
+                   ( nbits >=  750 ) ?  8 : ( nbits >=   500 ) ? 13 :
+                   ( nbits >=  250 ) ? 28 : ( nbits >=   150 ) ? 40 : 51 );
     }
-    while (1)
+    while( 1 )
     {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_fill_random(X, n * ciL, f_rng, p_rng));
-        /* make sure generated number is at least (nbits-1)+0.5 bits (FIPS 186-4
-         * §B.3.3 steps 4.4, 5.5) */
-        if (X->p[n - 1] < 0xb504f333f9de6485ULL /* ceil(2^63.5) */) continue;
+        MBEDTLS_MPI_CHK( mbedtls_mpi_fill_random( X, n * ciL, f_rng, p_rng ) );
+        /* make sure generated number is at least (nbits-1)+0.5 bits (FIPS 186-4 §B.3.3 steps 4.4, 5.5) */
+        if( X->p[n-1] < 0xb504f333f9de6485ULL  /* ceil(2^63.5) */ ) continue;
         k = n * biL;
-        if (k > nbits) mbedtls_mpi_shift_r(X, k - nbits);
+        if( k > nbits ) MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( X, k - nbits ) );
         X->p[0] |= 1;
-        if (!(flags & MBEDTLS_MPI_GEN_PRIME_FLAG_DH))
+        if( ( flags & MBEDTLS_MPI_GEN_PRIME_FLAG_DH ) == 0 )
         {
-            ret = mbedtls_mpi_is_prime_ext(X, rounds, f_rng, p_rng);
-            if (ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE) goto cleanup;
+            ret = mbedtls_mpi_is_prime_ext( X, rounds, f_rng, p_rng );
+            if( ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE )
+                goto cleanup;
         }
         else
         {
@@ -2679,158 +2660,178 @@ int mbedtls_mpi_gen_prime(mbedtls_mpi *X, size_t nbits, int flags,
              * Make sure it is satisfied, while keeping X = 3 mod 4
              */
             X->p[0] |= 2;
-            MBEDTLS_MPI_CHK(mbedtls_mpi_mod_int(&r, X, 3));
-            if (r == 0)
-                MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(X, X, 8));
-            else if (r == 1)
-                MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(X, X, 4));
+            MBEDTLS_MPI_CHK( mbedtls_mpi_mod_int( &r, X, 3 ) );
+            if( r == 0 )
+                MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, X, 8 ) );
+            else if( r == 1 )
+                MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, X, 4 ) );
             /* Set Y = (X-1) / 2, which is X / 2 because X is odd */
-            MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&Y, X));
-            mbedtls_mpi_shift_r( &Y, 1 );
-            while (1)
+            MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Y, X ) );
+            ShiftRight( Y.p, Y.n, 1 );
+            while( 1 )
             {
                 /*
                  * First, check small factors for X and Y
                  * before doing Miller-Rabin on any of them
                  */
-                if (!(ret = mpi_check_small_factors(X)) &&
-                    !(ret = mpi_check_small_factors(&Y)) &&
-                    !(ret = mpi_miller_rabin(X, rounds, f_rng, p_rng)) &&
-                    !(ret = mpi_miller_rabin(&Y, rounds, f_rng, p_rng)))
+                if( ( ret = mpi_check_small_factors(  X         ) ) == 0 &&
+                    ( ret = mpi_check_small_factors( &Y         ) ) == 0 &&
+                    ( ret = mpi_miller_rabin(  X, rounds, f_rng, p_rng  ) )
+                                                                    == 0 &&
+                    ( ret = mpi_miller_rabin( &Y, rounds, f_rng, p_rng  ) )
+                                                                    == 0 )
+                    goto cleanup;
+                if( ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE )
                     goto cleanup;
-                if (ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE) goto cleanup;
                 /*
                  * Next candidates. We want to preserve Y = (X-1) / 2 and
                  * Y = 1 mod 2 and Y = 2 mod 3 (eq X = 3 mod 4 and X = 2 mod 3)
                  * so up Y by 6 and X by 12.
                  */
-                MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(X, X, 12));
-                MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(&Y, &Y, 6));
+                MBEDTLS_MPI_CHK( mbedtls_mpi_add_int(  X,  X, 12 ) );
+                MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( &Y, &Y, 6  ) );
             }
         }
     }
 cleanup:
-    mbedtls_mpi_free(&Y);
-    return ret;
+    mbedtls_mpi_free( &Y );
+    return( ret );
 }
 
 #endif /* MBEDTLS_GENPRIME */
 
 #if defined(MBEDTLS_SELF_TEST)
 
-#define GCD_PAIR_COUNT 3
+#define GCD_PAIR_COUNT  3
 
-static const int gcd_pairs[GCD_PAIR_COUNT][3] = {
-    {693, 609, 21}, {1764, 868, 28}, {768454923, 542167814, 1}};
+static const int gcd_pairs[GCD_PAIR_COUNT][3] =
+{
+    { 693, 609, 21 },
+    { 1764, 868, 28 },
+    { 768454923, 542167814, 1 }
+};
 
 /**
  * \brief          Checkup routine
  *
  * \return         0 if successful, or 1 if the test failed
  */
-int mbedtls_mpi_self_test(int verbose)
+int mbedtls_mpi_self_test( int verbose )
 {
     int ret, i;
     mbedtls_mpi A, E, N, X, Y, U, V;
-    mbedtls_mpi_init(&A);
-    mbedtls_mpi_init(&E);
-    mbedtls_mpi_init(&N);
-    mbedtls_mpi_init(&X);
-    mbedtls_mpi_init(&Y);
-    mbedtls_mpi_init(&U);
-    mbedtls_mpi_init(&V);
-    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&A, 16,
-                                            "EFE021C2645FD1DC586E69184AF4A31E"
-                                            "D5F53E93B5F123FA41680867BA110131"
-                                            "944FE7952E2517337780CB0DB80E61AA"
-                                            "E7C8DDC6C5C6AADEB34EB38A2F40D5E6"));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&E, 16,
-                                            "B2E7EFD37075B9F03FF989C7C5051C20"
-                                            "34D2A323810251127E7BF8625A4F49A5"
-                                            "F3E27F4DA8BD59C47D6DAABA4C8127BD"
-                                            "5B5C25763222FEFCCFC38B832366C29E"));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&N, 16,
-                                            "0066A198186C18C10B2F5ED9B522752A"
-                                            "9830B69916E535C8F047518A889A43A5"
-                                            "94B6BED27A168D31D4A52F88925AA8F5"));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&X, &A, &N));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&U, 16,
-                                            "602AB7ECA597A3D6B56FF9829A5E8B85"
-                                            "9E857EA95A03512E2BAE7391688D264A"
-                                            "A5663B0341DB9CCFD2C4C5F421FEC814"
-                                            "8001B72E848A38CAE1C65F78E56ABDEF"
-                                            "E12D3C039B8A02D6BE593F0BBBDA56F1"
-                                            "ECF677152EF804370C1A305CAF3B5BF1"
-                                            "30879B56C61DE584A0F53A2447A51E"));
-    if (verbose) mbedtls_printf("  MPI test #1 (mul_mpi): ");
-    if (mbedtls_mpi_cmp_mpi(&X, &U)) {
-        if (verbose) mbedtls_printf("failed\n");
+    mbedtls_mpi_init( &A ); mbedtls_mpi_init( &E ); mbedtls_mpi_init( &N ); mbedtls_mpi_init( &X );
+    mbedtls_mpi_init( &Y ); mbedtls_mpi_init( &U ); mbedtls_mpi_init( &V );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &A, 16,
+        "EFE021C2645FD1DC586E69184AF4A31E" \
+        "D5F53E93B5F123FA41680867BA110131" \
+        "944FE7952E2517337780CB0DB80E61AA" \
+        "E7C8DDC6C5C6AADEB34EB38A2F40D5E6" ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &E, 16,
+        "B2E7EFD37075B9F03FF989C7C5051C20" \
+        "34D2A323810251127E7BF8625A4F49A5" \
+        "F3E27F4DA8BD59C47D6DAABA4C8127BD" \
+        "5B5C25763222FEFCCFC38B832366C29E" ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &N, 16,
+        "0066A198186C18C10B2F5ED9B522752A" \
+        "9830B69916E535C8F047518A889A43A5" \
+        "94B6BED27A168D31D4A52F88925AA8F5" ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &X, &A, &N ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16,
+        "602AB7ECA597A3D6B56FF9829A5E8B85" \
+        "9E857EA95A03512E2BAE7391688D264A" \
+        "A5663B0341DB9CCFD2C4C5F421FEC814" \
+        "8001B72E848A38CAE1C65F78E56ABDEF" \
+        "E12D3C039B8A02D6BE593F0BBBDA56F1" \
+        "ECF677152EF804370C1A305CAF3B5BF1" \
+        "30879B56C61DE584A0F53A2447A51E" ) );
+    if( verbose != 0 )
+        mbedtls_printf( "  MPI test #1 (mul_mpi): " );
+    if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 )
+    {
+        if( verbose != 0 )
+            mbedtls_printf( "failed\n" );
         ret = 1;
         goto cleanup;
     }
-    if (verbose) mbedtls_printf("passed\n");
-    MBEDTLS_MPI_CHK(mbedtls_mpi_div_mpi(&X, &Y, &A, &N));
-    MBEDTLS_MPI_CHK(
-        mbedtls_mpi_read_string(&U, 16, "256567336059E52CAE22925474705F39A94"));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&V, 16,
-                                            "6613F26162223DF488E9CD48CC132C7A"
-                                            "0AC93C701B001B092E4E5B9F73BCD27B"
-                                            "9EE50D0657C77F374E903CDFA4C642"));
-    if (verbose) mbedtls_printf("  MPI test #2 (div_mpi): ");
-    if (mbedtls_mpi_cmp_mpi(&X, &U) || mbedtls_mpi_cmp_mpi(&Y, &V)) {
-        if (verbose) mbedtls_printf("failed\n");
+    if( verbose != 0 )
+        mbedtls_printf( "passed\n" );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_div_mpi( &X, &Y, &A, &N ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16,
+        "256567336059E52CAE22925474705F39A94" ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &V, 16,
+        "6613F26162223DF488E9CD48CC132C7A" \
+        "0AC93C701B001B092E4E5B9F73BCD27B" \
+        "9EE50D0657C77F374E903CDFA4C642" ) );
+    if( verbose != 0 )
+        mbedtls_printf( "  MPI test #2 (div_mpi): " );
+    if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 ||
+        mbedtls_mpi_cmp_mpi( &Y, &V ) != 0 )
+    {
+        if( verbose != 0 )
+            mbedtls_printf( "failed\n" );
         ret = 1;
         goto cleanup;
     }
-    if (verbose) mbedtls_printf("passed\n");
-    MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&X, &A, &E, &N, NULL));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&U, 16,
-                                            "36E139AEA55215609D2816998ED020BB"
-                                            "BD96C37890F65171D948E9BC7CBAA4D9"
-                                            "325D24D6A3C12710F10A09FA08AB87"));
-    if (verbose) mbedtls_printf("  MPI test #3 (exp_mod): ");
-    if (mbedtls_mpi_cmp_mpi(&X, &U)) {
-        if (verbose) mbedtls_printf("failed\n");
+    if( verbose != 0 )
+        mbedtls_printf( "passed\n" );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_exp_mod( &X, &A, &E, &N, NULL ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16,
+        "36E139AEA55215609D2816998ED020BB" \
+        "BD96C37890F65171D948E9BC7CBAA4D9" \
+        "325D24D6A3C12710F10A09FA08AB87" ) );
+    if( verbose != 0 )
+        mbedtls_printf( "  MPI test #3 (exp_mod): " );
+    if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 )
+    {
+        if( verbose != 0 )
+            mbedtls_printf( "failed\n" );
         ret = 1;
         goto cleanup;
     }
-    if (verbose) mbedtls_printf("passed\n");
-    MBEDTLS_MPI_CHK(mbedtls_mpi_inv_mod(&X, &A, &N));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&U, 16,
-                                            "003A0AAEDD7E784FC07D8F9EC6E3BFD5"
-                                            "C3DBA76456363A10869622EAC2DD84EC"
-                                            "C5B8A74DAC4D09E03B5E0BE779F2DF61"));
-    if (verbose) mbedtls_printf("  MPI test #4 (inv_mod): ");
-    if (mbedtls_mpi_cmp_mpi(&X, &U)) {
-        if (verbose) mbedtls_printf("failed\n");
+    if( verbose != 0 )
+        mbedtls_printf( "passed\n" );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_inv_mod( &X, &A, &N ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16,
+        "003A0AAEDD7E784FC07D8F9EC6E3BFD5" \
+        "C3DBA76456363A10869622EAC2DD84EC" \
+        "C5B8A74DAC4D09E03B5E0BE779F2DF61" ) );
+    if( verbose != 0 )
+        mbedtls_printf( "  MPI test #4 (inv_mod): " );
+    if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 )
+    {
+        if( verbose != 0 )
+            mbedtls_printf( "failed\n" );
         ret = 1;
         goto cleanup;
     }
-    if (verbose) mbedtls_printf("passed\n");
-    if (verbose) mbedtls_printf("  MPI test #5 (simple gcd): ");
-    for (i = 0; i < GCD_PAIR_COUNT; i++) {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&X, gcd_pairs[i][0]));
-        MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&Y, gcd_pairs[i][1]));
-        MBEDTLS_MPI_CHK(mbedtls_mpi_gcd(&A, &X, &Y));
-        if (mbedtls_mpi_cmp_int(&A, gcd_pairs[i][2])) {
-            if (verbose) mbedtls_printf("failed at %d\n", i);
+    if( verbose != 0 )
+        mbedtls_printf( "passed\n" );
+    if( verbose != 0 )
+        mbedtls_printf( "  MPI test #5 (simple gcd): " );
+    for( i = 0; i < GCD_PAIR_COUNT; i++ )
+    {
+        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &X, gcd_pairs[i][0] ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &Y, gcd_pairs[i][1] ) );
+        MBEDTLS_MPI_CHK( mbedtls_mpi_gcd( &A, &X, &Y ) );
+        if( mbedtls_mpi_cmp_int( &A, gcd_pairs[i][2] ) != 0 )
+        {
+            if( verbose != 0 )
+                mbedtls_printf( "failed at %d\n", i );
             ret = 1;
             goto cleanup;
         }
     }
-    if (verbose) mbedtls_printf("passed\n");
+    if( verbose != 0 )
+        mbedtls_printf( "passed\n" );
 cleanup:
-    if (ret && verbose)
-        mbedtls_printf("Unexpected error, return code = %08X\n", (unsigned int)ret);
-    mbedtls_mpi_free(&A);
-    mbedtls_mpi_free(&E);
-    mbedtls_mpi_free(&N);
-    mbedtls_mpi_free(&X);
-    mbedtls_mpi_free(&Y);
-    mbedtls_mpi_free(&U);
-    mbedtls_mpi_free(&V);
-    if (verbose) mbedtls_printf("\n");
-    return ret;
+    if( ret != 0 && verbose != 0 )
+        mbedtls_printf( "Unexpected error, return code = %08X\n", (unsigned int) ret );
+    mbedtls_mpi_free( &A ); mbedtls_mpi_free( &E ); mbedtls_mpi_free( &N ); mbedtls_mpi_free( &X );
+    mbedtls_mpi_free( &Y ); mbedtls_mpi_free( &U ); mbedtls_mpi_free( &V );
+    if( verbose != 0 )
+        mbedtls_printf( "\n" );
+    return( ret );
 }
 
 #endif /* MBEDTLS_SELF_TEST */
diff --git a/third_party/mbedtls/config.h b/third_party/mbedtls/config.h
index fd3a085cf..c94cab0c7 100644
--- a/third_party/mbedtls/config.h
+++ b/third_party/mbedtls/config.h
@@ -80,17 +80,17 @@
 #ifndef TINY
 #define MBEDTLS_ECP_DP_SECP256R1_ENABLED
 #define MBEDTLS_ECP_DP_SECP384R1_ENABLED
+#define MBEDTLS_ECP_DP_SECP521R1_ENABLED
 #define MBEDTLS_ECP_DP_CURVE25519_ENABLED
+#define MBEDTLS_ECP_DP_CURVE448_ENABLED
 /*#define MBEDTLS_ECP_DP_SECP192R1_ENABLED*/
 /*#define MBEDTLS_ECP_DP_SECP224R1_ENABLED*/
-/*#define MBEDTLS_ECP_DP_SECP521R1_ENABLED*/
 /*#define MBEDTLS_ECP_DP_SECP192K1_ENABLED*/
 /*#define MBEDTLS_ECP_DP_SECP224K1_ENABLED*/
 /*#define MBEDTLS_ECP_DP_SECP256K1_ENABLED*/
 /*#define MBEDTLS_ECP_DP_BP256R1_ENABLED*/
 /*#define MBEDTLS_ECP_DP_BP384R1_ENABLED*/
 /*#define MBEDTLS_ECP_DP_BP512R1_ENABLED*/
-/*#define MBEDTLS_ECP_DP_CURVE448_ENABLED*/
 #endif
 
 #define MBEDTLS_X509_CHECK_KEY_USAGE
diff --git a/third_party/mbedtls/ecdh.h b/third_party/mbedtls/ecdh.h
index 93215bae6..16010d7ca 100644
--- a/third_party/mbedtls/ecdh.h
+++ b/third_party/mbedtls/ecdh.h
@@ -1,8 +1,8 @@
 #ifndef MBEDTLS_ECDH_H
 #define MBEDTLS_ECDH_H
 #include "third_party/mbedtls/config.h"
+#include "third_party/mbedtls/ecdh_everest.h"
 #include "third_party/mbedtls/ecp.h"
-#include "third_party/mbedtls/everest.h"
 /* clang-format off */
 
 #ifdef __cplusplus
diff --git a/third_party/mbedtls/ecdh_everest.c b/third_party/mbedtls/ecdh_everest.c
new file mode 100644
index 000000000..d29996de6
--- /dev/null
+++ b/third_party/mbedtls/ecdh_everest.c
@@ -0,0 +1,279 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:4;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright The Mbed TLS Contributors                                          │
+│                                                                              │
+│ Licensed under the Apache License, Version 2.0 (the "License");              │
+│ you may not use this file except in compliance with the License.             │
+│ You may obtain a copy of the License at                                      │
+│                                                                              │
+│     http://www.apache.org/licenses/LICENSE-2.0                               │
+│                                                                              │
+│ Unless required by applicable law or agreed to in writing, software          │
+│ distributed under the License is distributed on an "AS IS" BASIS,            │
+│ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.     │
+│ See the License for the specific language governing permissions and          │
+│ limitations under the License.                                               │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "third_party/mbedtls/ecdh_everest.h"
+#include "third_party/mbedtls/everest.h"
+#if defined(MBEDTLS_ECDH_C) && defined(MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED)
+#define KEYSIZE 32
+
+asm(".ident\t\"\\n\\n\
+Mbed TLS (Apache 2.0)\\n\
+Copyright ARM Limited\\n\
+Copyright Mbed TLS Contributors\"");
+asm(".include \"libc/disclaimer.inc\"");
+/* clang-format off */
+
+/**
+ * \brief           This function sets up the ECDH context with the information
+ *                  given.
+ *
+ *                  This function should be called after mbedtls_ecdh_init() but
+ *                  before mbedtls_ecdh_make_params(). There is no need to call
+ *                  this function before mbedtls_ecdh_read_params().
+ *
+ *                  This is the first function used by a TLS server for
+ *                  ECDHE ciphersuites.
+ *
+ * \param ctx       The ECDH context to set up.
+ * \param grp_id    The group id of the group to set up the context for.
+ *
+ * \return          \c 0 on success.
+ */
+int mbedtls_everest_setup(mbedtls_ecdh_context_everest *ctx, int grp_id)
+{
+  if (grp_id != MBEDTLS_ECP_DP_CURVE25519)
+    return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
+  mbedtls_platform_zeroize(ctx, sizeof(*ctx));
+  return 0;
+}
+
+/**
+ * \brief           This function frees a context.
+ *
+ * \param ctx       The context to free.
+ */
+void mbedtls_everest_free(mbedtls_ecdh_context_everest *ctx)
+{
+  if (!ctx) return;
+  mbedtls_platform_zeroize(ctx, sizeof(*ctx));
+}
+
+/**
+ * \brief           This function generates a public key and a TLS
+ *                  ServerKeyExchange payload.
+ *
+ *                  This is the second function used by a TLS server for ECDHE
+ *                  ciphersuites. (It is called after mbedtls_ecdh_setup().)
+ *
+ * \note            This function assumes that the ECP group (grp) of the
+ *                  \p ctx context has already been properly set,
+ *                  for example, using mbedtls_ecp_group_load().
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The ECDH context.
+ * \param olen      The number of characters written.
+ * \param buf       The destination buffer.
+ * \param blen      The length of the destination buffer.
+ * \param f_rng     The RNG function.
+ * \param p_rng     The RNG context.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_everest_make_params(mbedtls_ecdh_context_everest *ctx, size_t *olen,
+                                unsigned char *buf, size_t blen,
+                                int (*f_rng)(void *, unsigned char *, size_t),
+                                void *p_rng)
+{
+  int ret = 0;
+  uint8_t base[KEYSIZE] = {9};
+  if ((ret = f_rng(p_rng, ctx->our_secret, KEYSIZE)) != 0) return ret;
+  *olen = KEYSIZE + 4;
+  if (blen < *olen) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL;
+  *buf++ = MBEDTLS_ECP_TLS_NAMED_CURVE;
+  *buf++ = MBEDTLS_ECP_TLS_CURVE25519 >> 8;
+  *buf++ = MBEDTLS_ECP_TLS_CURVE25519 & 0xFF;
+  *buf++ = KEYSIZE;
+  curve25519(buf, ctx->our_secret, base);
+  base[0] = 0;
+  if (!timingsafe_memcmp(buf, base, KEYSIZE))
+    return MBEDTLS_ERR_ECP_RANDOM_FAILED;
+  return 0;
+}
+
+/**
+ * \brief           This function parses and processes a TLS ServerKeyExhange
+ *                  payload.
+ *
+ *                  This is the first function used by a TLS client for ECDHE
+ *                  ciphersuites.
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The ECDH context.
+ * \param buf       The pointer to the start of the input buffer.
+ * \param end       The address for one Byte past the end of the buffer.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_everest_read_params(mbedtls_ecdh_context_everest *ctx,
+                                const unsigned char **buf,
+                                const unsigned char *end)
+{
+  if (end - *buf < KEYSIZE + 1) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
+  if ((*(*buf)++ != KEYSIZE)) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
+  memcpy(ctx->peer_point, *buf, KEYSIZE);
+  *buf += KEYSIZE;
+  return 0;
+}
+
+/**
+ * \brief           This function sets up an ECDH context from an EC key.
+ *
+ *                  It is used by clients and servers in place of the
+ *                  ServerKeyEchange for static ECDH, and imports ECDH
+ *                  parameters from the EC key information of a certificate.
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The ECDH context to set up.
+ * \param key       The EC key to use.
+ * \param side      Defines the source of the key: 1: Our key, or
+ *                  0: The key of the peer.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_everest_get_params(mbedtls_ecdh_context_everest *ctx,
+                               const mbedtls_ecp_keypair *key,
+                               mbedtls_everest_ecdh_side side)
+{
+  size_t olen = 0;
+  mbedtls_everest_ecdh_side s;
+  switch (side)
+  {
+    case MBEDTLS_EVEREST_ECDH_THEIRS:
+      return mbedtls_ecp_point_write_binary(&key->grp, &key->Q,
+                                            MBEDTLS_ECP_PF_COMPRESSED, &olen,
+                                            ctx->peer_point, KEYSIZE);
+    case MBEDTLS_EVEREST_ECDH_OURS:
+      return mbedtls_mpi_write_binary_le(&key->d, ctx->our_secret, KEYSIZE);
+    default:
+      return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
+  }
+}
+
+/**
+ * \brief           This function generates a public key and a TLS
+ *                  ClientKeyExchange payload.
+ *
+ *                  This is the second function used by a TLS client for ECDH(E)
+ *                  ciphersuites.
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The ECDH context.
+ * \param olen      The number of Bytes written.
+ * \param buf       The destination buffer.
+ * \param blen      The size of the destination buffer.
+ * \param f_rng     The RNG function.
+ * \param p_rng     The RNG context.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_everest_make_public(mbedtls_ecdh_context_everest *ctx, size_t *olen,
+                                unsigned char *buf, size_t blen,
+                                int (*f_rng)(void *, unsigned char *, size_t),
+                                void *p_rng)
+{
+  int ret = 0;
+  unsigned char base[KEYSIZE] = {9};
+  if (!ctx) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
+  if ((ret = f_rng(p_rng, ctx->our_secret, KEYSIZE))) return ret;
+  *olen = KEYSIZE + 1;
+  if (blen < *olen) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL;
+  *buf++ = KEYSIZE;
+  curve25519(buf, ctx->our_secret, base);
+  base[0] = 0;
+  if (!timingsafe_memcmp(buf, base, KEYSIZE))
+    return MBEDTLS_ERR_ECP_RANDOM_FAILED;
+  return ret;
+}
+
+/**
+ * \brief       This function parses and processes a TLS ClientKeyExchange
+ *              payload.
+ *
+ *              This is the third function used by a TLS server for ECDH(E)
+ *              ciphersuites. (It is called after mbedtls_ecdh_setup() and
+ *              mbedtls_ecdh_make_params().)
+ *
+ * \see         ecp.h
+ *
+ * \param ctx   The ECDH context.
+ * \param buf   The start of the input buffer.
+ * \param blen  The length of the input buffer.
+ *
+ * \return      \c 0 on success.
+ * \return      An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_everest_read_public(mbedtls_ecdh_context_everest *ctx,
+                                const unsigned char *buf, size_t blen)
+{
+  if (blen < KEYSIZE + 1) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL;
+  if ((*buf++ != KEYSIZE)) return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
+  memcpy(ctx->peer_point, buf, KEYSIZE);
+  return 0;
+}
+
+/**
+ * \brief           This function derives and exports the shared secret.
+ *
+ *                  This is the last function used by both TLS client
+ *                  and servers.
+ *
+ * \note            If \p f_rng is not NULL, it is used to implement
+ *                  countermeasures against side-channel attacks.
+ *                  For more information, see mbedtls_ecp_mul().
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The ECDH context.
+ * \param olen      The number of Bytes written.
+ * \param buf       The destination buffer.
+ * \param blen      The length of the destination buffer.
+ * \param f_rng     The RNG function.
+ * \param p_rng     The RNG context.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_everest_calc_secret(mbedtls_ecdh_context_everest *ctx, size_t *olen,
+                                unsigned char *buf, size_t blen,
+                                int (*f_rng)(void *, unsigned char *, size_t),
+                                void *p_rng)
+{
+  /* f_rng and p_rng are not used here because this implementation does not
+     need blinding since it has constant trace. (todo(jart): wut?) */
+  *olen = KEYSIZE;
+  if (blen < *olen) return MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL;
+  curve25519(buf, ctx->our_secret, ctx->peer_point);
+  if (!timingsafe_memcmp(buf, ctx->our_secret, KEYSIZE)) goto wut;
+  /* Wipe the DH secret and don't let the peer chose a small subgroup point */
+  mbedtls_platform_zeroize(ctx->our_secret, KEYSIZE);
+  if (!timingsafe_memcmp(buf, ctx->our_secret, KEYSIZE)) goto wut;
+  return 0;
+wut:
+  mbedtls_platform_zeroize(buf, KEYSIZE);
+  mbedtls_platform_zeroize(ctx->our_secret, KEYSIZE);
+  return MBEDTLS_ERR_ECP_RANDOM_FAILED;
+}
+
+#endif
diff --git a/third_party/mbedtls/ecdh_everest.h b/third_party/mbedtls/ecdh_everest.h
new file mode 100644
index 000000000..6f756d5ac
--- /dev/null
+++ b/third_party/mbedtls/ecdh_everest.h
@@ -0,0 +1,43 @@
+#ifndef COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_
+#define COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_
+#include "third_party/mbedtls/config.h"
+#include "third_party/mbedtls/ecp.h"
+COSMOPOLITAN_C_START_
+
+#define MBEDTLS_ECP_TLS_CURVE25519    0x1d
+#define MBEDTLS_X25519_KEY_SIZE_BYTES 32
+
+typedef enum {
+  MBEDTLS_EVEREST_ECDH_OURS,
+  MBEDTLS_EVEREST_ECDH_THEIRS,
+} mbedtls_everest_ecdh_side;
+
+typedef struct {
+  unsigned char our_secret[MBEDTLS_X25519_KEY_SIZE_BYTES];
+  unsigned char peer_point[MBEDTLS_X25519_KEY_SIZE_BYTES];
+} mbedtls_ecdh_context_everest;
+
+int mbedtls_everest_setup(mbedtls_ecdh_context_everest *, int);
+void mbedtls_everest_free(mbedtls_ecdh_context_everest *);
+int mbedtls_everest_make_params(mbedtls_ecdh_context_everest *, size_t *,
+                                unsigned char *, size_t,
+                                int (*)(void *, unsigned char *, size_t),
+                                void *);
+int mbedtls_everest_read_params(mbedtls_ecdh_context_everest *,
+                                const unsigned char **, const unsigned char *);
+int mbedtls_everest_get_params(mbedtls_ecdh_context_everest *,
+                               const mbedtls_ecp_keypair *,
+                               mbedtls_everest_ecdh_side);
+int mbedtls_everest_make_public(mbedtls_ecdh_context_everest *, size_t *,
+                                unsigned char *, size_t,
+                                int (*)(void *, unsigned char *, size_t),
+                                void *);
+int mbedtls_everest_read_public(mbedtls_ecdh_context_everest *,
+                                const unsigned char *, size_t);
+int mbedtls_everest_calc_secret(mbedtls_ecdh_context_everest *, size_t *,
+                                unsigned char *, size_t,
+                                int (*)(void *, unsigned char *, size_t),
+                                void *);
+
+COSMOPOLITAN_C_END_
+#endif /* COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_ */
diff --git a/third_party/mbedtls/ecdsa.c b/third_party/mbedtls/ecdsa.c
index 04beab389..0ac74dcd1 100644
--- a/third_party/mbedtls/ecdsa.c
+++ b/third_party/mbedtls/ecdsa.c
@@ -28,31 +28,12 @@ Mbed TLS (Apache 2.0)\\n\
 Copyright ARM Limited\\n\
 Copyright Mbed TLS Contributors\"");
 asm(".include \"libc/disclaimer.inc\"");
-
 /* clang-format off */
-/*
- *  Elliptic curve DSA
- *
- *  Copyright The Mbed TLS Contributors
- *  SPDX-License-Identifier: Apache-2.0
- *
- *  Licensed under the Apache License, Version 2.0 (the "License"); you may
- *  not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
 
-/*
- * References:
+/**
+ * @fileoverview Elliptic curve Digital Signature Algorithm
  *
- * SEC1 http://www.secg.org/index.php?action=secg,docs_secg
+ * @see SEC1 http://www.secg.org/index.php?action=secg,docs_secg
  */
 
 #if defined(MBEDTLS_ECDSA_C)
diff --git a/third_party/mbedtls/ecp.c b/third_party/mbedtls/ecp.c
index 69e3cce88..d30fd906c 100644
--- a/third_party/mbedtls/ecp.c
+++ b/third_party/mbedtls/ecp.c
@@ -511,12 +511,15 @@ static const mbedtls_ecp_curve_info ecp_supported_curves[] =
 #if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED)
     { MBEDTLS_ECP_DP_CURVE25519,   29,     256,    "x25519"            },
 #endif
-#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
-    { MBEDTLS_ECP_DP_SECP256R1,    23,     256,    "secp256r1"         },
-#endif
 #if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
     { MBEDTLS_ECP_DP_SECP384R1,    24,     384,    "secp384r1"         },
 #endif
+#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
+    { MBEDTLS_ECP_DP_CURVE448,     30,     448,    "x448"              },
+#endif
+#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
+    { MBEDTLS_ECP_DP_SECP256R1,    23,     256,    "secp256r1"         },
+#endif
 #if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
     { MBEDTLS_ECP_DP_SECP521R1,    25,     521,    "secp521r1"         },
 #endif
@@ -543,9 +546,6 @@ static const mbedtls_ecp_curve_info ecp_supported_curves[] =
 #endif
 #if defined(MBEDTLS_ECP_DP_SECP192K1_ENABLED)
     { MBEDTLS_ECP_DP_SECP192K1,    18,     192,    "secp192k1"         },
-#endif
-#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
-    { MBEDTLS_ECP_DP_CURVE448,     30,     448,    "x448"              },
 #endif
     { MBEDTLS_ECP_DP_NONE,          0,     0,      NULL                },
 };
diff --git a/third_party/mbedtls/ecp256.c b/third_party/mbedtls/ecp256.c
index d2e4c1f65..8e5157669 100644
--- a/third_party/mbedtls/ecp256.c
+++ b/third_party/mbedtls/ecp256.c
@@ -38,32 +38,15 @@ mbedtls_p256_isz( uint64_t p[4] )
 static inline bool
 mbedtls_p256_gte( uint64_t p[5] )
 {
-    return( (p[4] ||
-             p[3] > 0xffffffff00000001 ||
-             (p[3] == 0xffffffff00000001 &&
-              p[2] > 0x0000000000000000 ||
-              (p[2] == 0x0000000000000000 &&
-               p[1] > 0x00000000ffffffff ||
-               (p[1] == 0x00000000ffffffff &&
-                p[0] > 0xffffffffffffffff ||
-                (p[0] == 0xffffffffffffffff))))) );
-}
-
-static int
-mbedtls_p256_cmp( const uint64_t a[5],
-                  const uint64_t b[5] )
-{
-    if( a[4] < b[4] ) return -1;
-    if( a[4] > b[4] ) return  1;
-    if( a[3] < b[3] ) return -1;
-    if( a[3] > b[3] ) return  1;
-    if( a[2] < b[2] ) return -1;
-    if( a[2] > b[2] ) return  1;
-    if( a[1] < b[1] ) return -1;
-    if( a[1] > b[1] ) return  1;
-    if( a[0] < b[0] ) return -1;
-    if( a[0] > b[0] ) return  1;
-    return 0;
+    return( ((int64_t)p[4] > 0 ||
+             (p[3] > 0xffffffff00000001 ||
+              (p[3] == 0xffffffff00000001 &&
+               (p[2] > 0x0000000000000000 ||
+                (p[2] == 0x0000000000000000 &&
+                 (p[1] > 0x00000000ffffffff ||
+                  (p[1] == 0x00000000ffffffff &&
+                   (p[0] > 0xffffffffffffffff ||
+                    (p[0] == 0xffffffffffffffff))))))))) );
 }
 
 static inline void
@@ -119,125 +102,49 @@ mbedtls_p256_rum( uint64_t p[5] )
         mbedtls_p256_red( p );
 }
 
+static void
+mbedtls_p256_mod(uint64_t X[8])
+{
+    secp256r1(X);
+    if ((int64_t)X[4] < 0) {
+        do {
+            mbedtls_p256_gro(X);
+        } while ((int64_t)X[4] < 0);
+    } else {
+        while (mbedtls_p256_gte(X)) {
+            mbedtls_p256_red(X);
+        }
+    }
+}
+
 static inline void
 mbedtls_p256_sar( uint64_t p[5] )
 {
-#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
-    asm("sarq\t32+%0\n\t"
-        "rcrq\t24+%0\n\t"
-        "rcrq\t16+%0\n\t"
-        "rcrq\t8+%0\n\t"
-        "rcrq\t%0\n\t"
-        : "+o"(*p)
-        : /* no inputs */
-        : "memory", "cc");
-#else
     p[0] = p[0] >> 1 | p[1] << 63;
     p[1] = p[1] >> 1 | p[2] << 63;
     p[2] = p[2] >> 1 | p[3] << 63;
     p[3] = p[3] >> 1 | p[4] << 63;
     p[4] = (int64_t)p[4] >> 1;
-#endif
 }
 
 static inline void
 mbedtls_p256_shl( uint64_t p[5] )
 {
-#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
-    asm("shlq\t%0\n\t"
-        "rclq\t8+%0\n\t"
-        "rclq\t16+%0\n\t"
-        "rclq\t24+%0\n\t"
-        "rclq\t32+%0\n\t"
-        : "+o"(*p)
-        : /* no inputs */
-        : "memory", "cc");
-#else
     p[4] =             p[3] >> 63;
     p[3] = p[3] << 1 | p[2] >> 63;
     p[2] = p[2] << 1 | p[1] >> 63;
     p[1] = p[1] << 1 | p[0] >> 63;
     p[0] = p[0] << 1;
-#endif
     mbedtls_p256_rum( p );
 }
 
 static inline void
-mbedtls_p256_jam( uint64_t p[5] )
-{
-    secp256r1( p );
-    if( (int64_t)p[4] < 0 )
-        do
-            mbedtls_p256_gro( p );
-        while( (int64_t)p[4] < 0 );
-    else
-        mbedtls_p256_rum( p );
-}
-
-static void
-mbedtls_p256_mul_1x1( uint64_t X[8],
-                      const uint64_t A[4], size_t n,
-                      const uint64_t B[4], size_t m )
-{
-    uint128_t t;
-    t = A[0];
-    t *= B[0];
-    X[ 0] = t;
-    X[ 1] = t >> 64;
-    X[ 2] = 0;
-    X[ 3] = 0;
-    X[ 4] = 0;
-    X[ 5] = 0;
-    X[ 6] = 0;
-    X[ 7] = 0;
-}
-
-static void
-mbedtls_p256_mul_nx1( uint64_t X[8],
-                      const uint64_t A[4], size_t n,
-                      const uint64_t B[4], size_t m )
-{
-    mbedtls_mpi_mul_hlp1(n, A, X, B[0]);
-    mbedtls_platform_zeroize( X + n + m, ( 8 - n - m ) * 8 );
-    if ( n + m >= 4 )
-        mbedtls_p256_jam( X );
-}
-
-static void
-mbedtls_p256_mul_4x4( uint64_t X[8],
-                      const uint64_t A[4], size_t n,
-                      const uint64_t B[4], size_t m )
-{
-    Mul4x4( X, A, B );
-    mbedtls_p256_jam( X );
-}
-
-static void
-mbedtls_p256_mul_nxm( uint64_t X[8],
-                      const uint64_t A[4], size_t n,
-                      const uint64_t B[4], size_t m )
-{
-    if (A == X) A = gc(memcpy(malloc(4 * 8), A, 4 * 8));
-    if (B == X) B = gc(memcpy(malloc(4 * 8), B, 4 * 8));
-    Mul( X, A, n, B, m );
-    mbedtls_platform_zeroize( X + n + m, (8 - n - m) * 8 );
-    if ( n + m >= 4 )
-        mbedtls_p256_jam( X );
-}
-
-static void
 mbedtls_p256_mul( uint64_t X[8],
                   const uint64_t A[4], size_t n,
                   const uint64_t B[4], size_t m )
 {
-    if( n == 4 && m == 4 )
-        mbedtls_p256_mul_4x4( X, A, n, B, m );
-    else if( m == 1 && n == 1 )
-        mbedtls_p256_mul_1x1( X, A, n, B, m );
-    else if( m == 1 )
-        mbedtls_p256_mul_nx1( X, A, n, B, m );
-    else
-        mbedtls_p256_mul_nxm( X, A, n, B, m );
+    Mul4x4( X, A, B );
+    mbedtls_p256_mod( X );
 }
 
 static void
diff --git a/third_party/mbedtls/ecp384.c b/third_party/mbedtls/ecp384.c
index d4421f22f..a213f8b87 100644
--- a/third_party/mbedtls/ecp384.c
+++ b/third_party/mbedtls/ecp384.c
@@ -36,42 +36,20 @@ mbedtls_p384_isz( uint64_t p[6] )
 }
 
 static inline bool
-mbedtls_p384_gte( uint64_t p[7] )
-{
-    return( (p[6] ||
-             p[5] > 0xffffffffffffffff ||
-             (p[5] == 0xffffffffffffffff &&
-              p[4] > 0xffffffffffffffff ||
-              (p[4] == 0xffffffffffffffff &&
-               p[3] > 0xffffffffffffffff ||
-               (p[3] == 0xffffffffffffffff &&
-                p[2] > 0xfffffffffffffffe ||
-                (p[2] == 0xfffffffffffffffe &&
-                 p[1] > 0xffffffff00000000 ||
-                 (p[1] == 0xffffffff00000000 &&
-                  p[0] > 0x00000000ffffffff ||
-                  (p[0] == 0x00000000ffffffff))))))) );
-}
-
-static int
-mbedtls_p384_cmp( const uint64_t a[7],
-                  const uint64_t b[7] )
-{
-    if( a[6] < b[6] ) return -1;
-    if( a[6] > b[6] ) return  1;
-    if( a[5] < b[5] ) return -1;
-    if( a[5] > b[5] ) return  1;
-    if( a[4] < b[4] ) return -1;
-    if( a[4] > b[4] ) return  1;
-    if( a[3] < b[3] ) return -1;
-    if( a[3] > b[3] ) return  1;
-    if( a[2] < b[2] ) return -1;
-    if( a[2] > b[2] ) return  1;
-    if( a[1] < b[1] ) return -1;
-    if( a[1] > b[1] ) return  1;
-    if( a[0] < b[0] ) return -1;
-    if( a[0] > b[0] ) return  1;
-    return 0;
+mbedtls_p384_gte( uint64_t p[7] ) {
+    return( ((int64_t)p[6] > 0 ||
+             (p[5] > 0xffffffffffffffff ||
+              (p[5] == 0xffffffffffffffff &&
+               (p[4] > 0xffffffffffffffff ||
+                (p[4] == 0xffffffffffffffff &&
+                 (p[3] > 0xffffffffffffffff ||
+                  (p[3] == 0xffffffffffffffff &&
+                   (p[2] > 0xfffffffffffffffe ||
+                    (p[2] == 0xfffffffffffffffe &&
+                     (p[1] > 0xffffffff00000000 ||
+                      (p[1] == 0xffffffff00000000 &&
+                       (p[0] > 0x00000000ffffffff ||
+                        (p[0] == 0x00000000ffffffff))))))))))))) );
 }
 
 static inline void
@@ -97,11 +75,11 @@ mbedtls_p384_red( uint64_t p[7] )
     SBB( p[3], p[3], 0xffffffffffffffff, c, c );
     SBB( p[4], p[4], 0xffffffffffffffff, c, c );
     SBB( p[5], p[5], 0xffffffffffffffff, c, c );
-    SBB( p[6], p[6], 0, c, c );
+    SBB( p[6], p[6], 0,                  c, c );
 #endif
 }
 
-static noinline void
+static inline void
 mbedtls_p384_gro( uint64_t p[7] )
 {
 #if defined(__x86_64__) && !defined(__STRICT_ANSI__)
@@ -128,28 +106,31 @@ mbedtls_p384_gro( uint64_t p[7] )
 #endif
 }
 
-static void
+static inline void
 mbedtls_p384_rum( uint64_t p[7] )
 {
     while( mbedtls_p384_gte( p ) )
         mbedtls_p384_red( p );
 }
 
+static inline void
+mbedtls_p384_mod(uint64_t X[12])
+{
+    secp384r1(X);
+    if ((int64_t)X[6] < 0) {
+        do {
+            mbedtls_p384_gro(X);
+        } while ((int64_t)X[6] < 0);
+    } else {
+        while (mbedtls_p384_gte(X)) {
+            mbedtls_p384_red(X);
+        }
+    }
+}
+
 static inline void
 mbedtls_p384_sar( uint64_t p[7] )
 {
-#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
-    asm("sarq\t48+%0\n\t"
-        "rcrq\t40+%0\n\t"
-        "rcrq\t32+%0\n\t"
-        "rcrq\t24+%0\n\t"
-        "rcrq\t16+%0\n\t"
-        "rcrq\t8+%0\n\t"
-        "rcrq\t%0\n\t"
-        : "+o"(*p)
-        : /* no inputs */
-        : "memory", "cc");
-#else
     p[0] = p[0] >> 1 | p[1] << 63;
     p[1] = p[1] >> 1 | p[2] << 63;
     p[2] = p[2] >> 1 | p[3] << 63;
@@ -157,24 +138,11 @@ mbedtls_p384_sar( uint64_t p[7] )
     p[4] = p[4] >> 1 | p[5] << 63;
     p[5] = p[5] >> 1 | p[6] << 63;
     p[6] = (int64_t)p[6] >> 1;
-#endif
 }
 
 static inline void
 mbedtls_p384_shl( uint64_t p[7] )
 {
-#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
-    asm("shlq\t%0\n\t"
-        "rclq\t8+%0\n\t"
-        "rclq\t16+%0\n\t"
-        "rclq\t24+%0\n\t"
-        "rclq\t32+%0\n\t"
-        "rclq\t40+%0\n\t"
-        "rclq\t48+%0\n\t"
-        : "+o"(*p)
-        : /* no inputs */
-        : "memory", "cc");
-#else
     p[6] =             p[5] >> 63;
     p[5] = p[5] << 1 | p[4] >> 63;
     p[4] = p[4] << 1 | p[3] >> 63;
@@ -182,90 +150,24 @@ mbedtls_p384_shl( uint64_t p[7] )
     p[2] = p[2] << 1 | p[1] >> 63;
     p[1] = p[1] << 1 | p[0] >> 63;
     p[0] = p[0] << 1;
-#endif
     mbedtls_p384_rum( p );
 }
 
-static inline void
-mbedtls_p384_jam( uint64_t p[7] )
-{
-    secp384r1( p );
-    if( (int64_t)p[6] < 0 )
-        do
-            mbedtls_p384_gro( p );
-        while( (int64_t)p[6] < 0 );
-    else
-        mbedtls_p384_rum( p );
-}
-
-static void
-mbedtls_p384_mul_1x1( uint64_t X[12],
-                      const uint64_t A[6], size_t n,
-                      const uint64_t B[6], size_t m )
-{
-    uint128_t t;
-    t = A[0];
-    t *= B[0];
-    X[ 0] = t;
-    X[ 1] = t >> 64;
-    X[ 2] = 0;
-    X[ 3] = 0;
-    X[ 4] = 0;
-    X[ 5] = 0;
-    X[ 6] = 0;
-    X[ 7] = 0;
-    X[ 8] = 0;
-    X[ 9] = 0;
-    X[10] = 0;
-    X[11] = 0;
-}
-
-static void
-mbedtls_p384_mul_nx1( uint64_t X[12],
-                      const uint64_t A[6], size_t n,
-                      const uint64_t B[6], size_t m )
-{
-    mbedtls_mpi_mul_hlp1(n, A, X, B[0]);
-    mbedtls_platform_zeroize( X + n + m, ( 12 - n - m ) * 8 );
-    if ( n + m >= 6 )
-        mbedtls_p384_jam( X );
-}
-
-static void
-mbedtls_p384_mul_6x6( uint64_t X[12],
-                      const uint64_t A[6], size_t n,
-                      const uint64_t B[6], size_t m )
-{
-    Mul6x6Adx( X, A, B );
-    mbedtls_p384_jam( X );
-}
-
-static void
-mbedtls_p384_mul_nxm( uint64_t X[12],
-                      const uint64_t A[6], size_t n,
-                      const uint64_t B[6], size_t m )
-{
-    if (A == X) A = gc(memcpy(malloc(6 * 8), A, 6 * 8));
-    if (B == X) B = gc(memcpy(malloc(6 * 8), B, 6 * 8));
-    Mul( X, A, n, B, m );
-    mbedtls_platform_zeroize( X + n + m, (12 - n - m) * 8 );
-    if ( n + m >= 6 )
-        mbedtls_p384_jam( X );
-}
-
 static void
 mbedtls_p384_mul( uint64_t X[12],
                   const uint64_t A[6], size_t n,
                   const uint64_t B[6], size_t m )
 {
-    if( n == 6 && m == 6 && X86_HAVE(ADX) && X86_HAVE(BMI2) )
-        mbedtls_p384_mul_6x6( X, A, n, B, m );
-    else if( m == 1 && n == 1 )
-        mbedtls_p384_mul_1x1( X, A, n, B, m );
-    else if( m == 1 )
-        mbedtls_p384_mul_nx1( X, A, n, B, m );
+    if( X86_HAVE(ADX) && X86_HAVE(BMI2) )
+        Mul6x6Adx( X, A, B );
     else
-        mbedtls_p384_mul_nxm( X, A, n, B, m );
+    {
+        if (A == X) A = gc(memcpy(malloc(6 * 8), A, 6 * 8));
+        if (B == X) B = gc(memcpy(malloc(6 * 8), B, 6 * 8));
+        Mul( X, A, n, B, m );
+        mbedtls_platform_zeroize( X + n + m, (12 - n - m) * 8 );
+    }
+    mbedtls_p384_mod( X );
 }
 
 static void
diff --git a/third_party/mbedtls/ecp_curves.c b/third_party/mbedtls/ecp_curves.c
index 2dc189e8f..2b13aed58 100644
--- a/third_party/mbedtls/ecp_curves.c
+++ b/third_party/mbedtls/ecp_curves.c
@@ -46,7 +46,7 @@ asm(".include \"libc/disclaimer.inc\"");
  *  limitations under the License.
  */
 
-/* #if defined(MBEDTLS_ECP_C) */
+#if defined(MBEDTLS_ECP_C)
 
 #if !defined(MBEDTLS_ECP_ALT)
 
@@ -635,12 +635,7 @@ static int ecp_group_load( mbedtls_ecp_group *grp,
 #endif /* ECP_LOAD_GROUP */
 
 #if defined(MBEDTLS_ECP_NIST_OPTIM)
-#define NIST_MODP( P )      grp->modp = ecp_mod_ ## P;
-#else
-#define NIST_MODP( P )
-#endif
-
-#if defined(MBEDTLS_ECP_NIST_OPTIM)
+/* Forward declarations */
 #if defined(MBEDTLS_ECP_DP_SECP192R1_ENABLED)
 static int ecp_mod_p192( mbedtls_mpi * );
 #endif
@@ -650,8 +645,13 @@ static int ecp_mod_p224( mbedtls_mpi * );
 #if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
 static int ecp_mod_p521( mbedtls_mpi * );
 #endif
+
+#define NIST_MODP( P )      grp->modp = ecp_mod_ ## P;
+#else
+#define NIST_MODP( P )
 #endif /* MBEDTLS_ECP_NIST_OPTIM */
 
+/* Additional forward declarations */
 #if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED)
 static int ecp_mod_p255( mbedtls_mpi * );
 #endif
@@ -771,8 +771,6 @@ cleanup:
 }
 #endif /* MBEDTLS_ECP_DP_CURVE448_ENABLED */
 
-
-#if defined(MBEDTLS_ECP_C)
 /**
  * \brief           This function sets up an ECP group context
  *                  from a standardized set of domain parameters.
@@ -879,7 +877,6 @@ int mbedtls_ecp_group_load( mbedtls_ecp_group *grp, mbedtls_ecp_group_id id )
             return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
     }
 }
-#endif /* MBEDTLS_ECP_C */
 
 #if defined(MBEDTLS_ECP_NIST_OPTIM)
 /*
@@ -892,6 +889,7 @@ int mbedtls_ecp_group_load( mbedtls_ecp_group *grp, mbedtls_ecp_group_id id )
  * MPI remains loose, since these functions can be deactivated at will.
  */
 
+#if defined(MBEDTLS_ECP_DP_SECP192R1_ENABLED)
 /*
  * Compared to the way things are presented in FIPS 186-3 D.2,
  * we proceed in columns, from right (least significant chunk) to left,
@@ -942,13 +940,17 @@ static int ecp_mod_p192( mbedtls_mpi *N )
     int ret = MBEDTLS_ERR_THIS_CORRUPTION;
     mbedtls_mpi_uint c = 0;
     mbedtls_mpi_uint *p, *end;
+
     /* Make sure we have enough blocks so that A(5) is legal */
     MBEDTLS_MPI_CHK( mbedtls_mpi_grow( N, 6 * WIDTH ) );
+
     p = N->p;
     end = p + N->n;
+
     ADD( 3 ); ADD( 5 );             NEXT; // A0 += A3 + A5
     ADD( 3 ); ADD( 4 ); ADD( 5 );   NEXT; // A1 += A3 + A4 + A5
     ADD( 4 ); ADD( 5 );             LAST; // A2 += A4 + A5
+
 cleanup:
     return( ret );
 }
@@ -958,7 +960,11 @@ cleanup:
 #undef ADD
 #undef NEXT
 #undef LAST
+#endif /* MBEDTLS_ECP_DP_SECP192R1_ENABLED */
 
+#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED) ||   \
+    defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) ||   \
+    defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
 /*
  * The reader is advised to first understand ecp_mod_p192() since the same
  * general structure is used here, but with additional complications:
@@ -1059,6 +1065,7 @@ static inline void sub32( uint32_t *dst, uint32_t src, signed char *carry )
 static inline int fix_negative( mbedtls_mpi *N, signed char c, mbedtls_mpi *C, size_t bits )
 {
     int ret = MBEDTLS_ERR_THIS_CORRUPTION;
+
     /* C = - c * 2^(bits + 32) */
 #if !defined(MBEDTLS_HAVE_INT64)
     ((void) bits);
@@ -1068,19 +1075,24 @@ static inline int fix_negative( mbedtls_mpi *N, signed char c, mbedtls_mpi *C, s
     else
 #endif
         C->p[ C->n - 1 ] = (mbedtls_mpi_uint) -c;
+
     /* N = - ( C - N ) */
     MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( N, C, N ) );
     N->s = -1;
+
 cleanup:
+
     return( ret );
 }
 
+#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED)
 /*
  * Fast quasi-reduction modulo p224 (FIPS 186-3 D.2.2)
  */
 static int ecp_mod_p224( mbedtls_mpi *N )
 {
     INIT( 224 );
+
     SUB(  7 ); SUB( 11 );               NEXT; // A0 += -A7 - A11
     SUB(  8 ); SUB( 12 );               NEXT; // A1 += -A8 - A12
     SUB(  9 ); SUB( 13 );               NEXT; // A2 += -A9 - A13
@@ -1088,9 +1100,97 @@ static int ecp_mod_p224( mbedtls_mpi *N )
     SUB( 11 ); ADD(  8 ); ADD( 12 );    NEXT; // A4 += -A11 + A8 + A12
     SUB( 12 ); ADD(  9 ); ADD( 13 );    NEXT; // A5 += -A12 + A9 + A13
     SUB( 13 ); ADD( 10 );               LAST; // A6 += -A13 + A10
+
 cleanup:
     return( ret );
 }
+#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED */
+
+#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
+/*
+ * Fast quasi-reduction modulo p256 (FIPS 186-3 D.2.3)
+ */
+int ecp_mod_p256_old( mbedtls_mpi *N )
+{
+    INIT( 256 );
+
+    ADD(  8 ); ADD(  9 );
+    SUB( 11 ); SUB( 12 ); SUB( 13 ); SUB( 14 );             NEXT; // A0
+
+    ADD(  9 ); ADD( 10 );
+    SUB( 12 ); SUB( 13 ); SUB( 14 ); SUB( 15 );             NEXT; // A1
+
+    ADD( 10 ); ADD( 11 );
+    SUB( 13 ); SUB( 14 ); SUB( 15 );                        NEXT; // A2
+
+    ADD( 11 ); ADD( 11 ); ADD( 12 ); ADD( 12 ); ADD( 13 );
+    SUB( 15 ); SUB(  8 ); SUB(  9 );                        NEXT; // A3
+
+    ADD( 12 ); ADD( 12 ); ADD( 13 ); ADD( 13 ); ADD( 14 );
+    SUB(  9 ); SUB( 10 );                                   NEXT; // A4
+
+    ADD( 13 ); ADD( 13 ); ADD( 14 ); ADD( 14 ); ADD( 15 );
+    SUB( 10 ); SUB( 11 );                                   NEXT; // A5
+
+    ADD( 14 ); ADD( 14 ); ADD( 15 ); ADD( 15 ); ADD( 14 ); ADD( 13 );
+    SUB(  8 ); SUB(  9 );                                   NEXT; // A6
+
+    ADD( 15 ); ADD( 15 ); ADD( 15 ); ADD( 8 );
+    SUB( 10 ); SUB( 11 ); SUB( 12 ); SUB( 13 );             LAST; // A7
+
+cleanup:
+    return( ret );
+}
+#endif /* MBEDTLS_ECP_DP_SECP256R1_ENABLED */
+
+#if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
+/*
+ * Fast quasi-reduction modulo p384 (FIPS 186-3 D.2.4)
+ */
+int ecp_mod_p384_old( mbedtls_mpi *N )
+{
+    INIT( 384 );
+
+    ADD( 12 ); ADD( 21 ); ADD( 20 );
+    SUB( 23 );                                              NEXT; // A0
+
+    ADD( 13 ); ADD( 22 ); ADD( 23 );
+    SUB( 12 ); SUB( 20 );                                   NEXT; // A2
+
+    ADD( 14 ); ADD( 23 );
+    SUB( 13 ); SUB( 21 );                                   NEXT; // A2
+
+    ADD( 15 ); ADD( 12 ); ADD( 20 ); ADD( 21 );
+    SUB( 14 ); SUB( 22 ); SUB( 23 );                        NEXT; // A3
+
+    ADD( 21 ); ADD( 21 ); ADD( 16 ); ADD( 13 ); ADD( 12 ); ADD( 20 ); ADD( 22 );
+    SUB( 15 ); SUB( 23 ); SUB( 23 );                        NEXT; // A4
+
+    ADD( 22 ); ADD( 22 ); ADD( 17 ); ADD( 14 ); ADD( 13 ); ADD( 21 ); ADD( 23 );
+    SUB( 16 );                                              NEXT; // A5
+
+    ADD( 23 ); ADD( 23 ); ADD( 18 ); ADD( 15 ); ADD( 14 ); ADD( 22 );
+    SUB( 17 );                                              NEXT; // A6
+
+    ADD( 19 ); ADD( 16 ); ADD( 15 ); ADD( 23 );
+    SUB( 18 );                                              NEXT; // A7
+
+    ADD( 20 ); ADD( 17 ); ADD( 16 );
+    SUB( 19 );                                              NEXT; // A8
+
+    ADD( 21 ); ADD( 18 ); ADD( 17 );
+    SUB( 20 );                                              NEXT; // A9
+
+    ADD( 22 ); ADD( 19 ); ADD( 18 );
+    SUB( 21 );                                              NEXT; // A10
+
+    ADD( 23 ); ADD( 20 ); ADD( 19 );
+    SUB( 22 );                                              LAST; // A11
+
+cleanup:
+    return( ret );
+}
+#endif /* MBEDTLS_ECP_DP_SECP384R1_ENABLED */
 
 #undef A
 #undef LOAD32
@@ -1100,6 +1200,10 @@ cleanup:
 #undef NEXT
 #undef LAST
 
+#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED ||
+          MBEDTLS_ECP_DP_SECP256R1_ENABLED ||
+          MBEDTLS_ECP_DP_SECP384R1_ENABLED */
+
 #if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
 /*
  * Here we have an actual Mersenne prime, so things are more straightforward.
@@ -1156,6 +1260,8 @@ cleanup:
 
 #endif /* MBEDTLS_ECP_NIST_OPTIM */
 
+#if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED)
+
 /* Size of p255 in terms of mbedtls_mpi_uint */
 #define P255_WIDTH      ( 255 / 8 / sizeof( mbedtls_mpi_uint ) + 1 )
 
@@ -1169,28 +1275,34 @@ static int ecp_mod_p255( mbedtls_mpi *N )
     size_t i;
     mbedtls_mpi M;
     mbedtls_mpi_uint Mp[P255_WIDTH + 2];
+
     if( N->n < P255_WIDTH )
         return( 0 );
+
     /* M = A1 */
     M.s = 1;
     M.n = N->n - ( P255_WIDTH - 1 );
     if( M.n > P255_WIDTH + 1 )
         return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
     M.p = Mp;
-    mbedtls_platform_zeroize( Mp, sizeof Mp );
+    memset( Mp, 0, sizeof Mp );
     memcpy( Mp, N->p + P255_WIDTH - 1, M.n * sizeof( mbedtls_mpi_uint ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, 255 % ( 8 * sizeof( mbedtls_mpi_uint ) ) ) );
     M.n++; /* Make room for multiplication by 19 */
+
     /* N = A0 */
     MBEDTLS_MPI_CHK( mbedtls_mpi_set_bit( N, 255, 0 ) );
     for( i = P255_WIDTH; i < N->n; i++ )
         N->p[i] = 0;
+
     /* N = A0 + 19 * A1 */
     MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &M, &M, 19 ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( N, N, &M ) );
+
 cleanup:
     return( ret );
 }
+#endif /* MBEDTLS_ECP_DP_CURVE25519_ENABLED */
 
 #if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
 
@@ -1231,7 +1343,7 @@ static int ecp_mod_p448( mbedtls_mpi *N )
         /* Shouldn't be called with N larger than 2^896! */
         return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
     M.p = Mp;
-    mbedtls_platform_zeroize( Mp, sizeof( Mp ) );
+    memset( Mp, 0, sizeof( Mp ) );
     memcpy( Mp, N->p + P448_WIDTH, M.n * sizeof( mbedtls_mpi_uint ) );
 
     /* N = A0 */
@@ -1299,7 +1411,7 @@ static inline int ecp_mod_koblitz( mbedtls_mpi *N, mbedtls_mpi_uint *Rp, size_t
     M.n = N->n - ( p_limbs - adjust );
     if( M.n > p_limbs + adjust )
         M.n = p_limbs + adjust;
-    mbedtls_platform_zeroize( Mp, sizeof Mp );
+    memset( Mp, 0, sizeof Mp );
     memcpy( Mp, N->p + p_limbs - adjust, M.n * sizeof( mbedtls_mpi_uint ) );
     if( shift != 0 )
         MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, shift ) );
@@ -1321,7 +1433,7 @@ static inline int ecp_mod_koblitz( mbedtls_mpi *N, mbedtls_mpi_uint *Rp, size_t
     M.n = N->n - ( p_limbs - adjust );
     if( M.n > p_limbs + adjust )
         M.n = p_limbs + adjust;
-    mbedtls_platform_zeroize( Mp, sizeof Mp );
+    memset( Mp, 0, sizeof Mp );
     memcpy( Mp, N->p + p_limbs - adjust, M.n * sizeof( mbedtls_mpi_uint ) );
     if( shift != 0 )
         MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &M, shift ) );
@@ -1392,4 +1504,4 @@ static int ecp_mod_p256k1( mbedtls_mpi *N )
 
 #endif /* !MBEDTLS_ECP_ALT */
 
-/* #endif /\* MBEDTLS_ECP_C *\/ */
+#endif /* MBEDTLS_ECP_C */
diff --git a/third_party/mbedtls/everest.c b/third_party/mbedtls/everest.c
index fe9ff9d00..437efd6a9 100644
--- a/third_party/mbedtls/everest.c
+++ b/third_party/mbedtls/everest.c
@@ -16,1186 +16,255 @@
 │ limitations under the License.                                               │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/bits/bits.h"
-#include "libc/limits.h"
-#include "third_party/mbedtls/asn1.h"
-#include "third_party/mbedtls/bignum.h"
-#include "third_party/mbedtls/common.h"
-#include "third_party/mbedtls/error.h"
-#include "third_party/mbedtls/everest.h"
-#include "third_party/mbedtls/platform.h"
-#include "third_party/mbedtls/profile.h"
+#include "third_party/mbedtls/endian.h"
 
 asm(".ident\t\"\\n\\n\
 Everest (Apache 2.0)\\n\
 Copyright 2016-2018 INRIA and Microsoft Corporation\"");
 asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
 
-#if defined(MBEDTLS_ECDH_C) && defined(MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED)
+#define DW(x)     (uint128_t)(x)
+#define EQ(x, y)  ((((x ^ y) | (~(x ^ y) + 1)) >> 63) - 1)
+#define GTE(x, y) (((x ^ ((x ^ y) | ((x - y) ^ y))) >> 63) - 1)
 
-#define load64_le(b) READ64LE(b)
-#define store64_le(b, i) WRITE64LE(b, i)
-
-static uint64_t
-FStar_UInt64_eq_mask(uint64_t a, uint64_t b)
-{
-  uint64_t x = a ^ b;
-  uint64_t minus_x = ~x + 1;
-  uint64_t x_or_minus_x = x | minus_x;
-  uint64_t xnx = x_or_minus_x >> 63;
-  return xnx - 1;
-}
-
-static uint64_t
-FStar_UInt64_gte_mask(uint64_t a, uint64_t b)
-{
-  uint64_t x = a;
-  uint64_t y = b;
-  uint64_t x_xor_y = x ^ y;
-  uint64_t x_sub_y = x - y;
-  uint64_t x_sub_y_xor_y = x_sub_y ^ y;
-  uint64_t q = x_xor_y | x_sub_y_xor_y;
-  uint64_t x_xor_q = x ^ q;
-  uint64_t x_xor_q_ = x_xor_q >> 63;
-  return x_xor_q_ - 1;
-}
-
-static uint32_t
-FStar_UInt32_eq_mask(uint32_t a, uint32_t b)
-{
-  uint32_t x = a ^ b;
-  uint32_t minus_x = ~x + 1;
-  uint32_t x_or_minus_x = x | minus_x;
-  uint32_t xnx = x_or_minus_x >> 31;
-  return xnx - 1;
-}
-
-static uint32_t
-FStar_UInt32_gte_mask(uint32_t a, uint32_t b)
-{
-  uint32_t x = a;
-  uint32_t y = b;
-  uint32_t x_xor_y = x ^ y;
-  uint32_t x_sub_y = x - y;
-  uint32_t x_sub_y_xor_y = x_sub_y ^ y;
-  uint32_t q = x_xor_y | x_sub_y_xor_y;
-  uint32_t x_xor_q = x ^ q;
-  uint32_t x_xor_q_ = x_xor_q >> 31;
-  return x_xor_q_ - 1;
-}
-
-static uint16_t
-FStar_UInt16_eq_mask(uint16_t a, uint16_t b)
-{
-  uint16_t x = a ^ b;
-  uint16_t minus_x = ~x + 1;
-  uint16_t x_or_minus_x = x | minus_x;
-  uint16_t xnx = x_or_minus_x >> 15;
-  return xnx - 1;
-}
-
-static uint16_t
-FStar_UInt16_gte_mask(uint16_t a, uint16_t b)
-{
-  uint16_t x = a;
-  uint16_t y = b;
-  uint16_t x_xor_y = x ^ y;
-  uint16_t x_sub_y = x - y;
-  uint16_t x_sub_y_xor_y = x_sub_y ^ y;
-  uint16_t q = x_xor_y | x_sub_y_xor_y;
-  uint16_t x_xor_q = x ^ q;
-  uint16_t x_xor_q_ = x_xor_q >> 15;
-  return x_xor_q_ - 1;
-}
-
-static uint8_t
-FStar_UInt8_eq_mask(uint8_t a, uint8_t b)
-{
-  uint8_t x = a ^ b;
-  uint8_t minus_x = ~x + 1;
-  uint8_t x_or_minus_x = x | minus_x;
-  uint8_t xnx = x_or_minus_x >> 7;
-  return xnx - 1;
-}
-
-static uint8_t
-FStar_UInt8_gte_mask(uint8_t a, uint8_t b)
-{
-  uint8_t x = a;
-  uint8_t y = b;
-  uint8_t x_xor_y = x ^ y;
-  uint8_t x_sub_y = x - y;
-  uint8_t x_sub_y_xor_y = x_sub_y ^ y;
-  uint8_t q = x_xor_y | x_sub_y_xor_y;
-  uint8_t x_xor_q = x ^ q;
-  uint8_t x_xor_q_ = x_xor_q >> 7;
-  return x_xor_q_ - 1;
-}
-
-static void
-Hacl_Bignum_Modulo_carry_top(uint64_t *b)
-{
-  uint64_t b4 = b[4];
-  uint64_t b0 = b[0];
-  uint64_t b4_ = b4 & 0x7ffffffffffff;
-  uint64_t b0_ = b0 + 19 * (b4 >> 51);
-  b[4] = b4_;
-  b[0] = b0_;
-}
-
-forceinline void
-Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, uint128_t *input)
-{
-  uint32_t i;
-  for (i = 0; i < 5; ++i)
-  {
-    uint128_t xi = input[i];
-    output[i] = xi;
+forceinline void HaclBignumCopy(uint64_t o[5], uint64_t p[5]) {
+  for (unsigned i = 0; i < 5; ++i) {
+    o[i] = p[i];
   }
 }
 
-forceinline void
-Hacl_Bignum_Fproduct_sum_scalar_multiplication_(uint128_t *output, uint64_t *input, uint64_t s)
-{
-  uint32_t i;
-  for (i = 0; i < 5; ++i)
-  {
-    uint128_t xi = output[i];
-    uint64_t yi = input[i];
-    output[i] = xi + (uint128_t)yi * s;
+forceinline void HaclBignumFsum(uint64_t o[5], uint64_t p[5]) {
+  for (unsigned i = 0; i < 5; ++i) {
+    o[i] += p[i];
   }
 }
 
-forceinline void
-Hacl_Bignum_Fproduct_carry_wide_(uint128_t *tmp)
-{
-  uint32_t i;
-  for (i = 0; i < 4; ++i)
-  {
-    uint32_t ctr = i;
-    uint128_t tctr = tmp[ctr];
-    uint128_t tctrp1 = tmp[ctr + 1];
-    uint64_t r0 = (uint64_t)tctr & 0x7ffffffffffff;
-    uint128_t c = tctr >> 51;
-    tmp[ctr] = (uint128_t)r0;
-    tmp[ctr + 1] = tctrp1 + c;
+forceinline void HaclBignumTrunc(uint64_t o[5], uint128_t p[5]) {
+  for (unsigned i = 0; i < 5; ++i) {
+    o[i] = p[i];
   }
 }
 
-forceinline void
-Hacl_Bignum_Fmul_shift_reduce(uint64_t *output)
-{
-  uint64_t tmp = output[4];
-  uint32_t i;
-  for (i = 0; i < 4; ++i)
-  {
-    uint32_t ctr = 5 - i - 1;
-    uint64_t z = output[ctr - 1];
-    output[ctr] = z;
-  }
-  output[0] = tmp * 19;
-}
-
-static inline void
-Hacl_Bignum_Fmul_mul_shift_reduce_(uint128_t *output, uint64_t *input, uint64_t *input2)
-{
-  uint32_t i;
-  for (i = 0; i < 4; ++i)
-  {
-    Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2[i]);
-    Hacl_Bignum_Fmul_shift_reduce(input);
-  }
-  Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2[4]);
-}
-
-static inline void
-Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input2)
-{
-  uint64_t i0;
-  uint64_t i1;
-  uint64_t i0_;
-  uint64_t i1_;
-  uint128_t b4;
-  uint128_t b0;
-  uint128_t b4_;
-  uint128_t b0_;
-  uint128_t t[5];
-  uint64_t tmp[5];
-  t[0] = 0;
-  t[1] = 0;
-  t[2] = 0;
-  t[3] = 0;
-  t[4] = 0;
-  tmp[0] = input[0];
-  tmp[1] = input[1];
-  tmp[2] = input[2];
-  tmp[3] = input[3];
-  tmp[4] = input[4];
-  Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2);
-  Hacl_Bignum_Fproduct_carry_wide_(t);
-  b4 = t[4];
-  b0 = t[0];
-  b4_ = b4 & 0x7ffffffffffff;
-  b0_ = b0 + (uint128_t)19 * (uint64_t)(b4 >> 51);
-  t[4] = b4_;
-  t[0] = b0_;
-  Hacl_Bignum_Fproduct_copy_from_wide_(output, t);
-  i0 = output[0];
-  i1 = output[1];
-  i0_ = i0 & 0x7ffffffffffff;
-  i1_ = i1 + (i0 >> 51);
-  output[0] = i0_;
-  output[1] = i1_;
-}
-
-forceinline void
-Hacl_Bignum_Fsquare_fsquare__(uint128_t *tmp, uint64_t *output)
-{
-  uint64_t r0 = output[0];
-  uint64_t r1 = output[1];
-  uint64_t r2 = output[2];
-  uint64_t r3 = output[3];
-  uint64_t r4 = output[4];
-  uint64_t d0 = r0 * 2;
-  uint64_t d1 = r1 * 2;
-  uint64_t d2 = r2 * 2 * 19;
-  uint64_t d419 = r4 * 19;
-  uint64_t d4 = d419 * 2;
-  uint128_t s0 = (uint128_t)r0 * r0 + (uint128_t)d4 * r1 + (uint128_t)d2 * r3;
-  uint128_t s1 = (uint128_t)d0 * r1 + (uint128_t)d4 * r2 + (uint128_t)(r3 * 19) * r3;
-  uint128_t s2 = (uint128_t)d0 * r2 + (uint128_t)r1 * r1 + (uint128_t)d4 * r3;
-  uint128_t s3 = (uint128_t)d0 * r3 + (uint128_t)d1 * r2 + (uint128_t)r4 * d419;
-  uint128_t s4 = (uint128_t)d0 * r4 + (uint128_t)d1 * r3 + (uint128_t)r2 * r2;
-  tmp[0] = s0;
-  tmp[1] = s1;
-  tmp[2] = s2;
-  tmp[3] = s3;
-  tmp[4] = s4;
-}
-
-forceinline void
-Hacl_Bignum_Fsquare_fsquare_(uint128_t *tmp, uint64_t *output)
-{
-  uint128_t b4;
-  uint128_t b0;
-  uint128_t b4_;
-  uint128_t b0_;
-  uint64_t i0;
-  uint64_t i1;
-  uint64_t i0_;
-  uint64_t i1_;
-  Hacl_Bignum_Fsquare_fsquare__(tmp, output);
-  Hacl_Bignum_Fproduct_carry_wide_(tmp);
-  b4 = tmp[4];
-  b0 = tmp[0];
-  b4_ = b4 & 0x7ffffffffffff;
-  b0_ = b0 + (uint128_t)19 * (b4 >> 51);
-  tmp[4] = b4_;
-  tmp[0] = b0_;
-  Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
-  i0 = output[0];
-  i1 = output[1];
-  i0_ = i0 & 0x7ffffffffffff;
-  i1_ = i1 + (i0 >> 51);
-  output[0] = i0_;
-  output[1] = i1_;
-}
-
-static void
-Hacl_Bignum_Fsquare_fsquare_times_(uint64_t *input, uint128_t *tmp, uint32_t count1)
-{
-  uint32_t i;
-  Hacl_Bignum_Fsquare_fsquare_(tmp, input);
-  for (i = 1; i < count1; ++i)
-    Hacl_Bignum_Fsquare_fsquare_(tmp, input);
-}
-
-forceinline void
-Hacl_Bignum_Fsquare_fsquare_times(uint64_t *output, uint64_t *input, uint32_t count1)
-{
-  uint128_t t[5];
-  t[0] = 0;
-  t[1] = 0;
-  t[2] = 0;
-  t[3] = 0;
-  t[4] = 0;
-  output[0] = input[0];
-  output[1] = input[1];
-  output[2] = input[2];
-  output[3] = input[3];
-  output[4] = input[4];
-  Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
-}
-
-forceinline void
-Hacl_Bignum_Fsquare_fsquare_times_inplace(uint64_t *output, uint32_t count1)
-{
-  uint128_t t[5];
-  t[0] = 0;
-  t[1] = 0;
-  t[2] = 0;
-  t[3] = 0;
-  t[4] = 0;
-  Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
-}
-
-forceinline void
-Hacl_Bignum_Crecip_crecip(uint64_t *out, uint64_t *z)
-{
-  uint32_t i;
-  uint64_t buf[20];
-  uint64_t *a0 = buf;
-  uint64_t *t00 = buf + 5;
-  uint64_t *b0 = buf + 10;
-  uint64_t *t01;
-  uint64_t *b1;
-  uint64_t *c0;
-  uint64_t *a;
-  uint64_t *t0;
-  uint64_t *b;
-  uint64_t *c;
-  for (i = 0; i < 20; ++i) buf[i] = 0;
-  Hacl_Bignum_Fsquare_fsquare_times(a0, z, 1);
-  Hacl_Bignum_Fsquare_fsquare_times(t00, a0, 2);
-  Hacl_Bignum_Fmul_fmul(b0, t00, z);
-  Hacl_Bignum_Fmul_fmul(a0, b0, a0);
-  Hacl_Bignum_Fsquare_fsquare_times(t00, a0, 1);
-  Hacl_Bignum_Fmul_fmul(b0, t00, b0);
-  Hacl_Bignum_Fsquare_fsquare_times(t00, b0, 5);
-  t01 = buf + 5;
-  b1 = buf + 10;
-  c0 = buf + 15;
-  Hacl_Bignum_Fmul_fmul(b1, t01, b1);
-  Hacl_Bignum_Fsquare_fsquare_times(t01, b1, 10);
-  Hacl_Bignum_Fmul_fmul(c0, t01, b1);
-  Hacl_Bignum_Fsquare_fsquare_times(t01, c0, 20);
-  Hacl_Bignum_Fmul_fmul(t01, t01, c0);
-  Hacl_Bignum_Fsquare_fsquare_times_inplace(t01, 10);
-  Hacl_Bignum_Fmul_fmul(b1, t01, b1);
-  Hacl_Bignum_Fsquare_fsquare_times(t01, b1, 50);
-  a = buf;
-  t0 = buf + 5;
-  b = buf + 10;
-  c = buf + 15;
-  Hacl_Bignum_Fmul_fmul(c, t0, b);
-  Hacl_Bignum_Fsquare_fsquare_times(t0, c, 100);
-  Hacl_Bignum_Fmul_fmul(t0, t0, c);
-  Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, 50);
-  Hacl_Bignum_Fmul_fmul(t0, t0, b);
-  Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, 5);
-  Hacl_Bignum_Fmul_fmul(out, t0, a);
-}
-
-forceinline void
-Hacl_Bignum_fsum(uint64_t *a, uint64_t *b)
-{
-  uint32_t i;
-  for (i = 0; i < 5; ++i)
-  {
-    uint64_t xi = a[i];
-    uint64_t yi = b[i];
-    a[i] = xi + yi;
+forceinline void HaclBignumCarry(uint64_t p[5]) {
+  for (unsigned i = 0; i < 4; ++i) {
+    p[i + 1] += p[i] >> 51;
+    p[i] &= 0x7ffffffffffff;
   }
 }
 
-forceinline void
-Hacl_Bignum_fdifference(uint64_t *a, uint64_t *b)
-{
-  uint32_t i;
-  uint64_t tmp[5];
-  tmp[0] = b[0] + 0x3fffffffffff68;
-  tmp[1] = b[1] + 0x3ffffffffffff8;
-  tmp[2] = b[2] + 0x3ffffffffffff8;
-  tmp[3] = b[3] + 0x3ffffffffffff8;
-  tmp[4] = b[4] + 0x3ffffffffffff8;
-  for (i = 0; i < 5; ++i)
-  {
-    uint64_t xi = a[i];
-    uint64_t yi = tmp[i];
-    a[i] = yi - xi;
+forceinline void HaclBignumCarryWide(uint128_t p[5]) {
+  for (unsigned i = 0; i < 4; ++i) {
+    p[i + 1] += p[i] >> 51;
+    p[i] &= 0x7ffffffffffff;
   }
 }
 
-forceinline void
-Hacl_Bignum_fscalar(uint64_t *output, uint64_t *b, uint64_t s)
-{
-  uint32_t i;
-  uint128_t b4;
-  uint128_t b0;
-  uint128_t b4_;
-  uint128_t b0_;
-  uint128_t tmp[5];
-  for (i = 0; i < 5; ++i)
-  {
-    tmp[i] = (uint128_t)b[i] * s;
-  }
-  Hacl_Bignum_Fproduct_carry_wide_(tmp);
-  b4 = tmp[4];
-  b0 = tmp[0];
-  b4_ = b4 & 0x7ffffffffffff;
-  b0_ = b0 + (uint128_t)19 * (uint64_t)(b4 >> 51);
-  tmp[4] = b4_;
-  tmp[0] = b0_;
-  Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
-}
-
-forceinline void
-Hacl_Bignum_fmul(uint64_t *output, uint64_t *a, uint64_t *b)
-{
-  Hacl_Bignum_Fmul_fmul(output, a, b);
-}
-
-forceinline void
-Hacl_Bignum_crecip(uint64_t *output, uint64_t *input)
-{
-  Hacl_Bignum_Crecip_crecip(output, input);
-}
-
-static void
-Hacl_EC_Point_swap_conditional_step(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
-{
-  uint32_t i = ctr - 1;
-  uint64_t ai = a[i];
-  uint64_t bi = b[i];
-  uint64_t x = swap1 & (ai ^ bi);
-  uint64_t ai1 = ai ^ x;
-  uint64_t bi1 = bi ^ x;
-  a[i] = ai1;
-  b[i] = bi1;
-}
-
-static void
-Hacl_EC_Point_swap_conditional_(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
-{
-  if (ctr)
-  {
-    Hacl_EC_Point_swap_conditional_step(a, b, swap1, ctr);
-    Hacl_EC_Point_swap_conditional_(a, b, swap1, ctr - 1);
-  }
-}
-
-static void
-Hacl_EC_Point_swap_conditional(uint64_t *a, uint64_t *b, uint64_t iswap)
-{
-  uint64_t swap1 = 0 - iswap;
-  Hacl_EC_Point_swap_conditional_(a, b, swap1, 5);
-  Hacl_EC_Point_swap_conditional_(a + 5, b + 5, swap1, 5);
-}
-
-static void
-Hacl_EC_Point_copy(uint64_t *output, uint64_t *input)
-{
-  output[0] = input[0];
-  output[1] = input[1];
-  output[2] = input[2];
-  output[3] = input[3];
-  output[4] = input[4];
-  output[5] = input[5];
-  output[6] = input[6];
-  output[7] = input[7];
-  output[8] = input[8];
-  output[9] = input[9];
-}
-
-static void
-Hacl_EC_Format_fexpand(uint64_t *output, uint8_t *input)
-{
-  uint64_t i0 = load64_le(input);
-  uint8_t *x00 = input + 6;
-  uint64_t i1 = load64_le(x00);
-  uint8_t *x01 = input + 12;
-  uint64_t i2 = load64_le(x01);
-  uint8_t *x02 = input + 19;
-  uint64_t i3 = load64_le(x02);
-  uint8_t *x0 = input + 24;
-  uint64_t i4 = load64_le(x0);
-  uint64_t output0 = i0 & 0x7ffffffffffff;
-  uint64_t output1 = i1 >> 3 & 0x7ffffffffffff;
-  uint64_t output2 = i2 >> 6 & 0x7ffffffffffff;
-  uint64_t output3 = i3 >> 1 & 0x7ffffffffffff;
-  uint64_t output4 = i4 >> 12 & 0x7ffffffffffff;
-  output[0] = output0;
-  output[1] = output1;
-  output[2] = output2;
-  output[3] = output3;
-  output[4] = output4;
-}
-
-static void
-Hacl_EC_Format_fcontract_first_carry_pass(uint64_t *input)
-{
-  uint64_t t0 = input[0];
-  uint64_t t1 = input[1];
-  uint64_t t2 = input[2];
-  uint64_t t3 = input[3];
-  uint64_t t4 = input[4];
-  uint64_t t1_ = t1 + (t0 >> 51);
-  uint64_t t0_ = t0 & 0x7ffffffffffff;
-  uint64_t t2_ = t2 + (t1_ >> 51);
-  uint64_t t1__ = t1_ & 0x7ffffffffffff;
-  uint64_t t3_ = t3 + (t2_ >> 51);
-  uint64_t t2__ = t2_ & 0x7ffffffffffff;
-  uint64_t t4_ = t4 + (t3_ >> 51);
-  uint64_t t3__ = t3_ & 0x7ffffffffffff;
-  input[0] = t0_;
-  input[1] = t1__;
-  input[2] = t2__;
-  input[3] = t3__;
-  input[4] = t4_;
-}
-
-static void
-Hacl_EC_Format_fcontract_first_carry_full(uint64_t *input)
-{
-  Hacl_EC_Format_fcontract_first_carry_pass(input);
-  Hacl_Bignum_Modulo_carry_top(input);
-}
-
-static void
-Hacl_EC_Format_fcontract_second_carry_pass(uint64_t *input)
-{
-  uint64_t t0 = input[0];
-  uint64_t t1 = input[1];
-  uint64_t t2 = input[2];
-  uint64_t t3 = input[3];
-  uint64_t t4 = input[4];
-  uint64_t t1_ = t1 + (t0 >> 51);
-  uint64_t t0_ = t0 & 0x7ffffffffffff;
-  uint64_t t2_ = t2 + (t1_ >> 51);
-  uint64_t t1__ = t1_ & 0x7ffffffffffff;
-  uint64_t t3_ = t3 + (t2_ >> 51);
-  uint64_t t2__ = t2_ & 0x7ffffffffffff;
-  uint64_t t4_ = t4 + (t3_ >> 51);
-  uint64_t t3__ = t3_ & 0x7ffffffffffff;
-  input[0] = t0_;
-  input[1] = t1__;
-  input[2] = t2__;
-  input[3] = t3__;
-  input[4] = t4_;
-}
-
-static void
-Hacl_EC_Format_fcontract_second_carry_full(uint64_t *input)
-{
-  uint64_t i0;
-  uint64_t i1;
-  uint64_t i0_;
-  uint64_t i1_;
-  Hacl_EC_Format_fcontract_second_carry_pass(input);
-  Hacl_Bignum_Modulo_carry_top(input);
-  i0 = input[0];
-  i1 = input[1];
-  i0_ = i0 & 0x7ffffffffffff;
-  i1_ = i1 + (i0 >> 51);
-  input[0] = i0_;
-  input[1] = i1_;
-}
-
-static void
-Hacl_EC_Format_fcontract_trim(uint64_t *input)
-{
-  uint64_t a0 = input[0];
-  uint64_t a1 = input[1];
-  uint64_t a2 = input[2];
-  uint64_t a3 = input[3];
-  uint64_t a4 = input[4];
-  uint64_t mask0 = FStar_UInt64_gte_mask(a0, 0x7ffffffffffed);
-  uint64_t mask1 = FStar_UInt64_eq_mask( a1, 0x7ffffffffffff);
-  uint64_t mask2 = FStar_UInt64_eq_mask( a2, 0x7ffffffffffff);
-  uint64_t mask3 = FStar_UInt64_eq_mask( a3, 0x7ffffffffffff);
-  uint64_t mask4 = FStar_UInt64_eq_mask( a4, 0x7ffffffffffff);
-  uint64_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
-  uint64_t a0_ = a0 - (0x7ffffffffffed & mask);
-  uint64_t a1_ = a1 - (0x7ffffffffffff & mask);
-  uint64_t a2_ = a2 - (0x7ffffffffffff & mask);
-  uint64_t a3_ = a3 - (0x7ffffffffffff & mask);
-  uint64_t a4_ = a4 - (0x7ffffffffffff & mask);
-  input[0] = a0_;
-  input[1] = a1_;
-  input[2] = a2_;
-  input[3] = a3_;
-  input[4] = a4_;
-}
-
-static void
-Hacl_EC_Format_fcontract_store(uint8_t *output, uint64_t *input)
-{
-  uint64_t t0 = input[0];
-  uint64_t t1 = input[1];
-  uint64_t t2 = input[2];
-  uint64_t t3 = input[3];
-  uint64_t t4 = input[4];
-  uint64_t o0 = t1 << 51 | t0;
-  uint64_t o1 = t2 << 38 | t1 >> 13;
-  uint64_t o2 = t3 << 25 | t2 >> 26;
-  uint64_t o3 = t4 << 12 | t3 >> 39;
-  uint8_t *b0 = output;
-  uint8_t *b1 = output + 8;
-  uint8_t *b2 = output + 16;
-  uint8_t *b3 = output + 24;
-  store64_le(b0, o0);
-  store64_le(b1, o1);
-  store64_le(b2, o2);
-  store64_le(b3, o3);
-}
-
-static void
-Hacl_EC_Format_fcontract(uint8_t *output, uint64_t *input)
-{
-  Hacl_EC_Format_fcontract_first_carry_full(input);
-  Hacl_EC_Format_fcontract_second_carry_full(input);
-  Hacl_EC_Format_fcontract_trim(input);
-  Hacl_EC_Format_fcontract_store(output, input);
-}
-
-static void
-Hacl_EC_Format_scalar_of_point(uint8_t *scalar, uint64_t *point)
-{
-  uint32_t i;
-  uint64_t *x = point;
-  uint64_t *z = point + 5;
-  uint64_t buf[10];
-  uint64_t *zmone = buf;
-  uint64_t *sc = buf + 5;
-  for (i = 0; i < 10; ++i) buf[i] = 0;
-  Hacl_Bignum_crecip(zmone, z);
-  Hacl_Bignum_fmul(sc, x, zmone);
-  Hacl_EC_Format_fcontract(scalar, sc);
-}
-
-static void
-Hacl_EC_AddAndDouble_fmonty(
-  uint64_t *pp,
-  uint64_t *ppq,
-  uint64_t *p,
-  uint64_t *pq,
-  uint64_t *qmqp
-)
-{
-  uint32_t i;
-  uint64_t *qx = qmqp;
-  uint64_t *x2 = pp;
-  uint64_t *z2 = pp + 5;
-  uint64_t *x3 = ppq;
-  uint64_t *z3 = ppq + 5;
-  uint64_t *x = p;
-  uint64_t *z = p + 5;
-  uint64_t *xprime = pq;
-  uint64_t *zprime = pq + 5;
-  uint64_t buf[40];
-  uint64_t *origx = buf;
-  uint64_t *origxprime0 = buf + 5;
-  uint64_t *xxprime0 = buf + 25;
-  uint64_t *zzprime0 = buf + 30;
-  uint64_t *origxprime;
-  uint64_t *xx0;
-  uint64_t *zz0;
-  uint64_t *xxprime;
-  uint64_t *zzprime;
-  uint64_t *zzzprime;
-  uint64_t *zzz;
-  uint64_t *xx;
-  uint64_t *zz;
-  uint64_t scalar;
-  for (i = 0; i < 40; ++i) buf[i] = 0;
-  origx[0] = x[0];
-  origx[1] = x[1];
-  origx[2] = x[2];
-  origx[3] = x[3];
-  origx[4] = x[4];
-  Hacl_Bignum_fsum(x, z);
-  Hacl_Bignum_fdifference(z, origx);
-  origxprime0[0] = xprime[0];
-  origxprime0[1] = xprime[1];
-  origxprime0[2] = xprime[2];
-  origxprime0[3] = xprime[3];
-  origxprime0[4] = xprime[4];
-  Hacl_Bignum_fsum(xprime, zprime);
-  Hacl_Bignum_fdifference(zprime, origxprime0);
-  Hacl_Bignum_fmul(xxprime0, xprime, z);
-  Hacl_Bignum_fmul(zzprime0, x, zprime);
-  origxprime = buf + 5;
-  xx0 = buf + 15;
-  zz0 = buf + 20;
-  xxprime = buf + 25;
-  zzprime = buf + 30;
-  zzzprime = buf + 35;
-  origxprime[0] = xxprime[0];
-  origxprime[1] = xxprime[1];
-  origxprime[2] = xxprime[2];
-  origxprime[3] = xxprime[3];
-  origxprime[4] = xxprime[4];
-  Hacl_Bignum_fsum(xxprime, zzprime);
-  Hacl_Bignum_fdifference(zzprime, origxprime);
-  Hacl_Bignum_Fsquare_fsquare_times(x3, xxprime, 1);
-  Hacl_Bignum_Fsquare_fsquare_times(zzzprime, zzprime, 1);
-  Hacl_Bignum_fmul(z3, zzzprime, qx);
-  Hacl_Bignum_Fsquare_fsquare_times(xx0, x, 1);
-  Hacl_Bignum_Fsquare_fsquare_times(zz0, z, 1);
-  zzz = buf + 10;
-  xx = buf + 15;
-  zz = buf + 20;
-  Hacl_Bignum_fmul(x2, xx, zz);
-  Hacl_Bignum_fdifference(zz, xx);
-  scalar = 121665;
-  Hacl_Bignum_fscalar(zzz, zz, scalar);
-  Hacl_Bignum_fsum(zzz, xx);
-  Hacl_Bignum_fmul(z2, zzz, zz);
-}
-
-static void
-Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(
-  uint64_t *nq,
-  uint64_t *nqpq,
-  uint64_t *nq2,
-  uint64_t *nqpq2,
-  uint64_t *q,
-  uint8_t byt
-)
-{
-  uint64_t bit = byt >> 7;
-  Hacl_EC_Point_swap_conditional(nq, nqpq, bit);
-  Hacl_EC_AddAndDouble_fmonty(nq2, nqpq2, nq, nqpq, q);
-  Hacl_EC_Point_swap_conditional(nq2, nqpq2, bit);
-}
-
-static void
-Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(
-  uint64_t *nq,
-  uint64_t *nqpq,
-  uint64_t *nq2,
-  uint64_t *nqpq2,
-  uint64_t *q,
-  uint8_t byt
-)
-{
-  Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt);
-  Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt<<1);
-}
-
-static void
-Hacl_EC_Ladder_SmallLoop_cmult_small_loop(
-  uint64_t *nq,
-  uint64_t *nqpq,
-  uint64_t *nq2,
-  uint64_t *nqpq2,
-  uint64_t *q,
-  uint8_t byt,
-  uint32_t i
-)
-{
-  if (i)
-  {
-    uint32_t i_ = i - 1;
-    Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(nq, nqpq, nq2, nqpq2, q, byt);
-    Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byt << 2, i_);
-  }
-}
-
-static void
-Hacl_EC_Ladder_BigLoop_cmult_big_loop(
-  uint8_t *n1,
-  uint64_t *nq,
-  uint64_t *nqpq,
-  uint64_t *nq2,
-  uint64_t *nqpq2,
-  uint64_t *q,
-  uint32_t i
-)
-{
-  if (i)
-  {
-    uint32_t i1 = i - 1;
-    uint8_t byte = n1[i1];
-    Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byte, 4);
-    Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, i1);
-  }
-}
-
-static void Hacl_EC_Ladder_cmult(uint64_t *result, uint8_t *n1, uint64_t *q)
-{
-  uint32_t i;
-  uint64_t point_buf[40];
-  uint64_t *nq = point_buf;
-  uint64_t *nqpq = point_buf + 10;
-  uint64_t *nq2 = point_buf + 20;
-  uint64_t *nqpq2 = point_buf + 30;
-  for (i = 0; i < 40; ++i) point_buf[i] = 0;
-  Hacl_EC_Point_copy(nqpq, q);
-  nq[0] = 1;
-  Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32);
-  Hacl_EC_Point_copy(result, nq);
-}
-
-static void
-Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint)
-{
-  uint32_t i;
-  uint64_t buf0[10];
-  uint64_t *x0 = buf0;
-  uint64_t *z = buf0 + 5;
-  for (i = 0; i < 10; ++i) buf0[i] = 0;
-  Hacl_EC_Format_fexpand(x0, basepoint);
-  z[0] = 1;
-  {
-    uint8_t e[32];
-    uint8_t e0;
-    uint8_t e31;
-    uint8_t e01;
-    uint8_t e311;
-    uint8_t e312;
-    uint8_t *scalar;
-    for (i = 0; i < 32; ++i) {
-      e[i] = secret[i];
+static void HaclBignumFmulReduce(uint128_t o[5], uint64_t p[5], uint64_t q[5]) {
+  uint64_t t;
+  unsigned i, j;
+  for (i = 0;; ++i) {
+    for (j = 0; j < 5; ++j) {
+      o[j] += DW(p[j]) * q[i];
     }
-    e0 = e[0];
-    e31 = e[31];
-    e01 = e0 & 248;
-    e311 = e31 & 127;
-    e312 = e311 | 64;
-    e[0] = e01;
-    e[31] = e312;
-    scalar = e;
-    {
-      uint64_t buf[15];
-      buf[0] = 1;
-      for (i = 1; i < 15; ++i) buf[i] = 0;
-      Hacl_EC_Ladder_cmult(buf, scalar, buf0);
-      Hacl_EC_Format_scalar_of_point(mypublic, buf);
+    if (i == 4) break;
+    t = p[4] * 19;
+    p[4] = p[3];
+    p[3] = p[2];
+    p[2] = p[1];
+    p[1] = p[0];
+    p[0] = t;
+  }
+}
+
+static void HaclBignumFmul(uint64_t o[5], uint64_t p[5], uint64_t q[5]) {
+  uint128_t t[5] = {0};
+  uint64_t u[5] = {p[0], p[1], p[2], p[3], p[4]};
+  HaclBignumFmulReduce(t, u, q);
+  HaclBignumCarryWide(t);
+  t[0] += DW(19) * (uint64_t)(t[4] >> 51);
+  HaclBignumTrunc(o, t);
+  o[1] += o[0] >> 51;
+  o[4] &= 0x7ffffffffffff;
+  o[0] &= 0x7ffffffffffff;
+}
+
+static void HaclBignumFsquare(uint128_t t[5], uint64_t p[5]) {
+  t[0] = DW(p[0] * 1) * p[0] + DW(p[4] * 38) * p[1] + DW(p[2] * 38) * p[3];
+  t[1] = DW(p[0] * 2) * p[1] + DW(p[4] * 38) * p[2] + DW(p[3] * 19) * p[3];
+  t[2] = DW(p[0] * 2) * p[2] + DW(p[1] * 01) * p[1] + DW(p[4] * 38) * p[3];
+  t[3] = DW(p[0] * 2) * p[3] + DW(p[1] * 02) * p[2] + DW(p[4]) * (p[4] * 19);
+  t[4] = DW(p[0] * 2) * p[4] + DW(p[1] * 02) * p[3] + DW(p[2]) * p[2];
+}
+
+static void HaclBignumFsqa(uint64_t o[5], uint32_t n) {
+  uint128_t t[5];
+  for (unsigned i = 0; i < n; ++i) {
+    HaclBignumFsquare(t, o);
+    HaclBignumCarryWide(t);
+    t[0] += DW(19) * (uint64_t)(t[4] >> 51);
+    HaclBignumTrunc(o, t);
+    o[1] += o[0] >> 51;
+    o[4] &= 0x7ffffffffffff;
+    o[0] &= 0x7ffffffffffff;
+  }
+}
+
+static void HaclBignumFsqr(uint64_t o[5], uint64_t p[5], uint32_t n) {
+  HaclBignumCopy(o, p);
+  HaclBignumFsqa(o, n);
+}
+
+static void HaclBignumCrecip(uint64_t o[5], uint64_t z[5]) {
+  uint64_t b[4][5];
+  HaclBignumFsqr(b[0], z, 1);
+  HaclBignumFsqr(b[1], b[0], 2);
+  HaclBignumFmul(b[2], b[1], z);
+  HaclBignumFmul(b[0], b[2], b[0]);
+  HaclBignumFsqr(b[1], b[0], 1);
+  HaclBignumFmul(b[2], b[1], b[2]);
+  HaclBignumFsqr(b[1], b[2], 5);
+  HaclBignumFmul(b[2], b[1], b[2]);
+  HaclBignumFsqr(b[1], b[2], 10);
+  HaclBignumFmul(b[3], b[1], b[2]);
+  HaclBignumFsqr(b[1], b[3], 20);
+  HaclBignumFmul(b[1], b[1], b[3]);
+  HaclBignumFsqa(b[1], 10);
+  HaclBignumFmul(b[2], b[1], b[2]);
+  HaclBignumFsqr(b[1], b[2], 50);
+  HaclBignumFmul(b[3], b[1], b[2]);
+  HaclBignumFsqr(b[1], b[3], 100);
+  HaclBignumFmul(b[1], b[1], b[3]);
+  HaclBignumFsqa(b[1], 50);
+  HaclBignumFmul(b[1], b[1], b[2]);
+  HaclBignumFsqa(b[1], 5);
+  HaclBignumFmul(o, b[1], b[0]);
+}
+
+static void HaclBignumFdif(uint64_t a[5], uint64_t b[5]) {
+  a[0] = b[0] + 0x3fffffffffff68 - a[0];
+  a[1] = b[1] + 0x3ffffffffffff8 - a[1];
+  a[2] = b[2] + 0x3ffffffffffff8 - a[2];
+  a[3] = b[3] + 0x3ffffffffffff8 - a[3];
+  a[4] = b[4] + 0x3ffffffffffff8 - a[4];
+}
+
+static void HaclBignumFscalar(uint64_t o[5], uint64_t p[5], uint64_t s) {
+  unsigned i;
+  uint128_t t[5];
+  for (i = 0; i < 5; ++i) t[i] = DW(p[i]) * s;
+  HaclBignumCarryWide(t);
+  t[0] += DW(19) * (uint64_t)(t[4] >> 51);
+  t[4] &= 0x7ffffffffffff;
+  HaclBignumTrunc(o, t);
+}
+
+static void HaclEcPointSwap(uint64_t a[2][5], uint64_t b[2][5], uint64_t m) {
+  unsigned i, j;
+  uint64_t x, y;
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < 5; ++j) {
+      x = a[i][j] ^ (-m & (a[i][j] ^ b[i][j]));
+      y = b[i][j] ^ (-m & (a[i][j] ^ b[i][j]));
+      a[i][j] = x;
+      b[i][j] = y;
     }
   }
 }
 
-static void
-mbedtls_x25519_init( mbedtls_x25519_context *ctx )
-{
-    mbedtls_platform_zeroize( ctx, sizeof( mbedtls_x25519_context ) );
+static void HaclEcFormatFexpand(uint64_t o[5], uint8_t p[32]) {
+  o[0] = READ64LE(p + 000) >> 00 & 0x7ffffffffffff;
+  o[1] = READ64LE(p + 006) >> 03 & 0x7ffffffffffff;
+  o[2] = READ64LE(p + 014) >> 06 & 0x7ffffffffffff;
+  o[3] = READ64LE(p + 023) >> 01 & 0x7ffffffffffff;
+  o[4] = READ64LE(p + 030) >> 12 & 0x7ffffffffffff;
 }
 
-static void
-mbedtls_x25519_free( mbedtls_x25519_context *ctx )
-{
-    if( !ctx )
-        return;
-    mbedtls_platform_zeroize( ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES );
-    mbedtls_platform_zeroize( ctx->peer_point, MBEDTLS_X25519_KEY_SIZE_BYTES );
+static void HaclEcFormatFcontract(uint8_t o[32], uint64_t p[5]) {
+  uint64_t m;
+  HaclBignumCarry(p);
+  p[0] += 19 * (p[4] >> 51);
+  p[4] &= 0x7ffffffffffff;
+  HaclBignumCarry(p);
+  p[0] += 19 * (p[4] >> 51);
+  p[1] += p[0] >> 51;
+  p[0] &= 0x7ffffffffffff;
+  p[1] &= 0x7ffffffffffff;
+  p[4] &= 0x7ffffffffffff;
+  m = GTE(p[0], 0x7ffffffffffed);
+  m &= EQ(p[1], 0x7ffffffffffff);
+  m &= EQ(p[2], 0x7ffffffffffff);
+  m &= EQ(p[3], 0x7ffffffffffff);
+  m &= EQ(p[4], 0x7ffffffffffff);
+  p[0] -= 0x7ffffffffffed & m;
+  p[1] -= 0x7ffffffffffff & m;
+  p[2] -= 0x7ffffffffffff & m;
+  p[3] -= 0x7ffffffffffff & m;
+  p[4] -= 0x7ffffffffffff & m;
+  Write64le(o + 000, p[1] << 51 | p[0] >> 00);
+  Write64le(o + 010, p[2] << 38 | p[1] >> 13);
+  Write64le(o + 020, p[3] << 25 | p[2] >> 26);
+  Write64le(o + 030, p[4] << 12 | p[3] >> 39);
 }
 
-static int
-mbedtls_x25519_make_params( mbedtls_x25519_context *ctx, size_t *olen,
-                            unsigned char *buf, size_t blen,
-                            int( *f_rng )(void *, unsigned char *, size_t),
-                            void *p_rng )
-{
-    int ret = 0;
-    uint8_t base[MBEDTLS_X25519_KEY_SIZE_BYTES] = {0};
-    if( ( ret = f_rng( p_rng, ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES ) ) != 0 )
-        return ret;
-    *olen = MBEDTLS_X25519_KEY_SIZE_BYTES + 4;
-    if( blen < *olen )
-        return( MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL );
-    *buf++ = MBEDTLS_ECP_TLS_NAMED_CURVE;
-    *buf++ = MBEDTLS_ECP_TLS_CURVE25519 >> 8;
-    *buf++ = MBEDTLS_ECP_TLS_CURVE25519 & 0xFF;
-    *buf++ = MBEDTLS_X25519_KEY_SIZE_BYTES;
-    base[0] = 9;
-    Hacl_Curve25519_crypto_scalarmult( buf, ctx->our_secret, base );
-    base[0] = 0;
-    if( timingsafe_memcmp( buf, base, MBEDTLS_X25519_KEY_SIZE_BYTES) == 0 )
-        return MBEDTLS_ERR_ECP_RANDOM_FAILED;
-    return( 0 );
+static void HaclEcFormatScalarOfPoint(uint8_t o[32], uint64_t p[2][5]) {
+  uint64_t t[2][5];
+  HaclBignumCrecip(t[0], p[1]);
+  HaclBignumFmul(t[1], p[0], t[0]);
+  HaclEcFormatFcontract(o, t[1]);
 }
 
-static int
-mbedtls_x25519_read_params( mbedtls_x25519_context *ctx,
-                            const unsigned char **buf, const unsigned char *end )
-{
-    if( end - *buf < MBEDTLS_X25519_KEY_SIZE_BYTES + 1 )
-        return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
-    if( ( *(*buf)++ != MBEDTLS_X25519_KEY_SIZE_BYTES ) )
-        return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
-    memcpy( ctx->peer_point, *buf, MBEDTLS_X25519_KEY_SIZE_BYTES );
-    *buf += MBEDTLS_X25519_KEY_SIZE_BYTES;
-    return( 0 );
+static void HaclEcAddAndDoubleFmonty(uint64_t xz2[2][5], uint64_t xz3[2][5],
+                                     uint64_t xz[2][5], uint64_t xzprime[2][5],
+                                     uint64_t qx[5]) {
+  uint64_t b[7][5];
+  HaclBignumCopy(b[0], xz[0]);
+  HaclBignumFsum(xz[0], xz[1]);
+  HaclBignumFdif(xz[1], b[0]);
+  HaclBignumCopy(b[0], xzprime[0]);
+  HaclBignumFsum(xzprime[0], xzprime[1]);
+  HaclBignumFdif(xzprime[1], b[0]);
+  HaclBignumFmul(b[4], xzprime[0], xz[1]);
+  HaclBignumFmul(b[5], xz[0], xzprime[1]);
+  HaclBignumCopy(b[0], b[4]);
+  HaclBignumFsum(b[4], b[5]);
+  HaclBignumFdif(b[5], b[0]);
+  HaclBignumFsqr(xz3[0], b[4], 1);
+  HaclBignumFsqr(b[6], b[5], 1);
+  HaclBignumFmul(xz3[1], b[6], qx);
+  HaclBignumFsqr(b[2], xz[0], 1);
+  HaclBignumFsqr(b[3], xz[1], 1);
+  HaclBignumFmul(xz2[0], b[2], b[3]);
+  HaclBignumFdif(b[3], b[2]);
+  HaclBignumFscalar(b[1], b[3], 121665);
+  HaclBignumFsum(b[1], b[2]);
+  HaclBignumFmul(xz2[1], b[1], b[3]);
 }
 
-static int
-mbedtls_x25519_get_params( mbedtls_x25519_context *ctx, const mbedtls_ecp_keypair *key,
-                           mbedtls_x25519_ecdh_side side )
-{
-    size_t olen = 0;
-    switch( side ) {
-    case MBEDTLS_X25519_ECDH_THEIRS:
-        return mbedtls_ecp_point_write_binary( &key->grp, &key->Q, 
-                                               MBEDTLS_ECP_PF_COMPRESSED, 
-                                               &olen, ctx->peer_point, 
-                                               MBEDTLS_X25519_KEY_SIZE_BYTES );
-    case MBEDTLS_X25519_ECDH_OURS:
-        return mbedtls_mpi_write_binary_le( &key->d, ctx->our_secret, 
-                                            MBEDTLS_X25519_KEY_SIZE_BYTES );
-    default:
-        return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
+/**
+ * Computes elliptic curve 25519.
+ * @note it has 126 bits of security
+ */
+void curve25519(uint8_t mypublic[32], const uint8_t secret[32],
+                const uint8_t basepoint[32]) {
+  uint32_t i, j;
+  uint8_t e[32], s;
+  uint64_t q[5], t[4][2][5] = {{{1}}, {{0}, {1}}};
+  HaclEcFormatFexpand(q, basepoint);
+  for (j = 0; j < 32; ++j) e[j] = secret[j];
+  e[0] &= 248;
+  e[31] = (e[31] & 127) | 64;
+  HaclBignumCopy(t[1][0], q);
+  for (i = 32; i--;) {
+    for (s = e[i], j = 4; j--;) {
+      HaclEcPointSwap(t[0], t[1], s >> 7);
+      HaclEcAddAndDoubleFmonty(t[2], t[3], t[0], t[1], q);
+      HaclEcPointSwap(t[2], t[3], s >> 7);
+      s <<= 1;
+      HaclEcPointSwap(t[2], t[3], s >> 7);
+      HaclEcAddAndDoubleFmonty(t[0], t[1], t[2], t[3], q);
+      HaclEcPointSwap(t[0], t[1], s >> 7);
+      s <<= 1;
     }
+  }
+  HaclEcFormatScalarOfPoint(mypublic, t[0]);
 }
-
-static int
-mbedtls_x25519_calc_secret( mbedtls_x25519_context *ctx, size_t *olen,
-                            unsigned char *buf, size_t blen,
-                            int( *f_rng )(void *, unsigned char *, size_t),
-                            void *p_rng )
-{
-    /* f_rng and p_rng are not used here because this implementation does not
-       need blinding since it has constant trace. (todo(jart): wut?) */
-    (( void )f_rng);
-    (( void )p_rng);
-    *olen = MBEDTLS_X25519_KEY_SIZE_BYTES;
-    if( blen < *olen )
-        return( MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL );
-    Hacl_Curve25519_crypto_scalarmult( buf, ctx->our_secret, ctx->peer_point);
-    /* Wipe the DH secret and don't let the peer chose a small subgroup point */
-    mbedtls_platform_zeroize( ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES );
-    if( timingsafe_memcmp( buf, ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES ) == 0 )
-        return MBEDTLS_ERR_ECP_RANDOM_FAILED;
-    return( 0 );
-}
-
-static int
-mbedtls_x25519_make_public( mbedtls_x25519_context *ctx, size_t *olen,
-                            unsigned char *buf, size_t blen,
-                            int( *f_rng )(void *, unsigned char *, size_t),
-                            void *p_rng )
-{
-    int ret = 0;
-    unsigned char base[MBEDTLS_X25519_KEY_SIZE_BYTES] = { 0 };
-    if( ctx == NULL )
-        return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
-    if( ( ret = f_rng( p_rng, ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES ) ) != 0 )
-        return ret;
-    *olen = MBEDTLS_X25519_KEY_SIZE_BYTES + 1;
-    if( blen < *olen )
-        return(MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL);
-    *buf++ = MBEDTLS_X25519_KEY_SIZE_BYTES;
-    base[0] = 9;
-    Hacl_Curve25519_crypto_scalarmult( buf, ctx->our_secret, base );
-    base[0] = 0;
-    if( memcmp( buf, base, MBEDTLS_X25519_KEY_SIZE_BYTES ) == 0 )
-        return MBEDTLS_ERR_ECP_RANDOM_FAILED;
-    return( ret );
-}
-
-static int
-mbedtls_x25519_read_public( mbedtls_x25519_context *ctx,
-                            const unsigned char *buf, size_t blen )
-{
-    if( blen < MBEDTLS_X25519_KEY_SIZE_BYTES + 1 )
-        return(MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL);
-    if( (*buf++ != MBEDTLS_X25519_KEY_SIZE_BYTES) )
-        return(MBEDTLS_ERR_ECP_BAD_INPUT_DATA);
-    memcpy( ctx->peer_point, buf, MBEDTLS_X25519_KEY_SIZE_BYTES );
-    return( 0 );
-}
-
-/**
- * \brief           This function sets up the ECDH context with the information
- *                  given.
- *
- *                  This function should be called after mbedtls_ecdh_init() but
- *                  before mbedtls_ecdh_make_params(). There is no need to call
- *                  this function before mbedtls_ecdh_read_params().
- *
- *                  This is the first function used by a TLS server for ECDHE
- *                  ciphersuites.
- *
- * \param ctx       The ECDH context to set up.
- * \param grp_id    The group id of the group to set up the context for.
- *
- * \return          \c 0 on success.
- */
-int mbedtls_everest_setup( mbedtls_ecdh_context_everest *ctx, int grp_id )
-{
-    if( grp_id != MBEDTLS_ECP_DP_CURVE25519 )
-        return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
-    mbedtls_x25519_init( &ctx->ctx );
-    return 0;
-}
-
-/**
- * \brief           This function frees a context.
- *
- * \param ctx       The context to free.
- */
-void mbedtls_everest_free( mbedtls_ecdh_context_everest *ctx )
-{
-    mbedtls_x25519_free( &ctx->ctx );
-}
-
-/**
- * \brief           This function generates a public key and a TLS
- *                  ServerKeyExchange payload.
- *
- *                  This is the second function used by a TLS server for ECDHE
- *                  ciphersuites. (It is called after mbedtls_ecdh_setup().)
- *
- * \note            This function assumes that the ECP group (grp) of the
- *                  \p ctx context has already been properly set,
- *                  for example, using mbedtls_ecp_group_load().
- *
- * \see             ecp.h
- *
- * \param ctx       The ECDH context.
- * \param olen      The number of characters written.
- * \param buf       The destination buffer.
- * \param blen      The length of the destination buffer.
- * \param f_rng     The RNG function.
- * \param p_rng     The RNG context.
- *
- * \return          \c 0 on success.
- * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
- */
-int mbedtls_everest_make_params( mbedtls_ecdh_context_everest *ctx, size_t *olen,
-                                 unsigned char *buf, size_t blen,
-                                 int( *f_rng )( void *, unsigned char *, size_t ),
-                                 void *p_rng )
-{
-    mbedtls_x25519_context *x25519_ctx = &ctx->ctx;
-    return mbedtls_x25519_make_params( x25519_ctx, olen, buf, blen, f_rng, p_rng );
-}
-
-/**
- * \brief           This function parses and processes a TLS ServerKeyExhange
- *                  payload.
- *
- *                  This is the first function used by a TLS client for ECDHE
- *                  ciphersuites.
- *
- * \see             ecp.h
- *
- * \param ctx       The ECDH context.
- * \param buf       The pointer to the start of the input buffer.
- * \param end       The address for one Byte past the end of the buffer.
- *
- * \return          \c 0 on success.
- * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
- *
- */
-int mbedtls_everest_read_params( mbedtls_ecdh_context_everest *ctx,
-                                 const unsigned char **buf,
-                                 const unsigned char *end )
-{
-    mbedtls_x25519_context *x25519_ctx = &ctx->ctx;
-    return mbedtls_x25519_read_params( x25519_ctx, buf, end );
-}
-
-/**
- * \brief           This function sets up an ECDH context from an EC key.
- *
- *                  It is used by clients and servers in place of the
- *                  ServerKeyEchange for static ECDH, and imports ECDH
- *                  parameters from the EC key information of a certificate.
- *
- * \see             ecp.h
- *
- * \param ctx       The ECDH context to set up.
- * \param key       The EC key to use.
- * \param side      Defines the source of the key: 1: Our key, or
- *                  0: The key of the peer.
- *
- * \return          \c 0 on success.
- * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
- *
- */
-int mbedtls_everest_get_params( mbedtls_ecdh_context_everest *ctx,
-                                const mbedtls_ecp_keypair *key,
-                                mbedtls_everest_ecdh_side side )
-{
-    mbedtls_x25519_context *x25519_ctx = &ctx->ctx;
-    mbedtls_x25519_ecdh_side s = side == MBEDTLS_EVEREST_ECDH_OURS ?
-                                            MBEDTLS_X25519_ECDH_OURS :
-                                            MBEDTLS_X25519_ECDH_THEIRS;
-    return mbedtls_x25519_get_params( x25519_ctx, key, s );
-}
-
-/**
- * \brief           This function generates a public key and a TLS
- *                  ClientKeyExchange payload.
- *
- *                  This is the second function used by a TLS client for ECDH(E)
- *                  ciphersuites.
- *
- * \see             ecp.h
- *
- * \param ctx       The ECDH context.
- * \param olen      The number of Bytes written.
- * \param buf       The destination buffer.
- * \param blen      The size of the destination buffer.
- * \param f_rng     The RNG function.
- * \param p_rng     The RNG context.
- *
- * \return          \c 0 on success.
- * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
- */
-int mbedtls_everest_make_public( mbedtls_ecdh_context_everest *ctx, size_t *olen,
-                                 unsigned char *buf, size_t blen,
-                                 int( *f_rng )( void *, unsigned char *, size_t ),
-                                 void *p_rng )
-{
-    mbedtls_x25519_context *x25519_ctx = &ctx->ctx;
-    return mbedtls_x25519_make_public( x25519_ctx, olen, buf, blen, f_rng, p_rng );
-}
-
-/**
- * \brief       This function parses and processes a TLS ClientKeyExchange
- *              payload.
- *
- *              This is the third function used by a TLS server for ECDH(E)
- *              ciphersuites. (It is called after mbedtls_ecdh_setup() and
- *              mbedtls_ecdh_make_params().)
- *
- * \see         ecp.h
- *
- * \param ctx   The ECDH context.
- * \param buf   The start of the input buffer.
- * \param blen  The length of the input buffer.
- *
- * \return      \c 0 on success.
- * \return      An \c MBEDTLS_ERR_ECP_XXX error code on failure.
- */
-int mbedtls_everest_read_public( mbedtls_ecdh_context_everest *ctx,
-                                 const unsigned char *buf, size_t blen )
-{
-    mbedtls_x25519_context *x25519_ctx = &ctx->ctx;
-    return mbedtls_x25519_read_public( x25519_ctx, buf, blen );
-}
-
-/**
- * \brief           This function derives and exports the shared secret.
- *
- *                  This is the last function used by both TLS client
- *                  and servers.
- *
- * \note            If \p f_rng is not NULL, it is used to implement
- *                  countermeasures against side-channel attacks.
- *                  For more information, see mbedtls_ecp_mul().
- *
- * \see             ecp.h
- *
- * \param ctx       The ECDH context.
- * \param olen      The number of Bytes written.
- * \param buf       The destination buffer.
- * \param blen      The length of the destination buffer.
- * \param f_rng     The RNG function.
- * \param p_rng     The RNG context.
- *
- * \return          \c 0 on success.
- * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
- */
-int mbedtls_everest_calc_secret( mbedtls_ecdh_context_everest *ctx, size_t *olen,
-                                 unsigned char *buf, size_t blen,
-                                 int( *f_rng )( void *, unsigned char *, size_t ),
-                                 void *p_rng )
-{
-    mbedtls_x25519_context *x25519_ctx = &ctx->ctx;
-    return mbedtls_x25519_calc_secret( x25519_ctx, olen, buf, blen, f_rng, p_rng );
-}
-
-#endif /* MBEDTLS_ECDH_C && MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED */
diff --git a/third_party/mbedtls/everest.h b/third_party/mbedtls/everest.h
index be4c43f16..592aff1ea 100644
--- a/third_party/mbedtls/everest.h
+++ b/third_party/mbedtls/everest.h
@@ -1,52 +1,10 @@
-#ifndef COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_
-#define COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_
-#include "third_party/mbedtls/config.h"
-#include "third_party/mbedtls/ecp.h"
+#ifndef COSMOPOLITAN_THIRD_PARTY_MBEDTLS_EVEREST_H_
+#define COSMOPOLITAN_THIRD_PARTY_MBEDTLS_EVEREST_H_
+#if !(__ASSEMBLER__ + __LINKER__ + 0)
 COSMOPOLITAN_C_START_
 
-#define MBEDTLS_ECP_TLS_CURVE25519    0x1d
-#define MBEDTLS_X25519_KEY_SIZE_BYTES 32
-
-typedef enum {
-  MBEDTLS_X25519_ECDH_OURS,
-  MBEDTLS_X25519_ECDH_THEIRS,
-} mbedtls_x25519_ecdh_side;
-
-typedef struct {
-  unsigned char our_secret[MBEDTLS_X25519_KEY_SIZE_BYTES];
-  unsigned char peer_point[MBEDTLS_X25519_KEY_SIZE_BYTES];
-} mbedtls_x25519_context;
-
-typedef enum {
-  MBEDTLS_EVEREST_ECDH_OURS,
-  MBEDTLS_EVEREST_ECDH_THEIRS,
-} mbedtls_everest_ecdh_side;
-
-typedef struct {
-  mbedtls_x25519_context ctx;
-} mbedtls_ecdh_context_everest;
-
-int mbedtls_everest_setup(mbedtls_ecdh_context_everest *, int);
-void mbedtls_everest_free(mbedtls_ecdh_context_everest *);
-int mbedtls_everest_make_params(mbedtls_ecdh_context_everest *, size_t *,
-                                unsigned char *, size_t,
-                                int (*)(void *, unsigned char *, size_t),
-                                void *);
-int mbedtls_everest_read_params(mbedtls_ecdh_context_everest *,
-                                const unsigned char **, const unsigned char *);
-int mbedtls_everest_get_params(mbedtls_ecdh_context_everest *,
-                               const mbedtls_ecp_keypair *,
-                               mbedtls_everest_ecdh_side);
-int mbedtls_everest_make_public(mbedtls_ecdh_context_everest *, size_t *,
-                                unsigned char *, size_t,
-                                int (*)(void *, unsigned char *, size_t),
-                                void *);
-int mbedtls_everest_read_public(mbedtls_ecdh_context_everest *,
-                                const unsigned char *, size_t);
-int mbedtls_everest_calc_secret(mbedtls_ecdh_context_everest *, size_t *,
-                                unsigned char *, size_t,
-                                int (*)(void *, unsigned char *, size_t),
-                                void *);
+void curve25519(uint8_t[32], const uint8_t[32], const uint8_t[32]);
 
 COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_THIRD_PARTY_MBEDTLS_X25519_H_ */
+#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
+#endif /* COSMOPOLITAN_THIRD_PARTY_MBEDTLS_EVEREST_H_ */
diff --git a/third_party/mbedtls/mbedtls.mk b/third_party/mbedtls/mbedtls.mk
index 6f3ea1b68..8bbea0621 100644
--- a/third_party/mbedtls/mbedtls.mk
+++ b/third_party/mbedtls/mbedtls.mk
@@ -55,7 +55,7 @@ $(THIRD_PARTY_MBEDTLS_A_OBJS):						\
 
 o/$(MODE)/third_party/mbedtls/everest.o:				\
 			OVERRIDE_CFLAGS +=				\
-				-Os
+				-O3
 
 o/$(MODE)/third_party/mbedtls/bigmul4.o					\
 o/$(MODE)/third_party/mbedtls/bigmul6.o:				\
@@ -70,11 +70,6 @@ o/$(MODE)/third_party/mbedtls/shiftright2-avx.o:			\
 			OVERRIDE_CFLAGS +=				\
 				-O3 -mavx
 
-# tail recursion is so important because everest was written in f*
-o/$(MODE)/third_party/mbedtls/everest.o:				\
-			OVERRIDE_CFLAGS +=				\
-				-foptimize-sibling-calls
-
 THIRD_PARTY_MBEDTLS_LIBS = $(foreach x,$(THIRD_PARTY_MBEDTLS_ARTIFACTS),$($(x)))
 THIRD_PARTY_MBEDTLS_SRCS = $(foreach x,$(THIRD_PARTY_MBEDTLS_ARTIFACTS),$($(x)_SRCS))
 THIRD_PARTY_MBEDTLS_HDRS = $(foreach x,$(THIRD_PARTY_MBEDTLS_ARTIFACTS),$($(x)_HDRS))
diff --git a/third_party/mbedtls/secp256r1.c b/third_party/mbedtls/secp256r1.c
index 53ad1f62d..7df7f9ac8 100644
--- a/third_party/mbedtls/secp256r1.c
+++ b/third_party/mbedtls/secp256r1.c
@@ -26,7 +26,7 @@
 #define H(w) (w & 0xffffffff00000000)
 
 /**
- * Fastest quasi-reduction modulo NIST P-256.
+ * Fastest quasi-reduction modulo ℘256.
  *
  *     p  = 2²⁵⁶ - 2²²⁴ + 2¹⁹² + 2⁹⁶ - 1
  *     B  = T + 2×S₁ + 2×S₂ + S₃ + S₄ – D₁ – D₂ – D₃ – D₄ mod p
diff --git a/third_party/mbedtls/secp384r1.c b/third_party/mbedtls/secp384r1.c
index 96652c43e..307b72164 100644
--- a/third_party/mbedtls/secp384r1.c
+++ b/third_party/mbedtls/secp384r1.c
@@ -24,7 +24,7 @@
 #define Q(i) p[i >> 1]
 
 /**
- * Fastest quasi-reduction modulo Prime 384.
+ * Fastest quasi-reduction modulo ℘384.
  *
  *     p  = 2³⁸⁴ – 2¹²⁸ – 2⁶ + 2³² – 1
  *     B  = T + 2×S₁ + S₂ + S₃ + S₄ + S₅ + S₆ – D₁ – D₂ – D₃ mod p
@@ -44,8 +44,7 @@
 void secp384r1(uint64_t p[12]) {
   int r;
   char o;
-  signed char G;
-  uint64_t A, B, C, D, E, F, a, b, c;
+  uint64_t A, B, C, D, E, F, G, a, b, c;
   A = Q(0);
   B = Q(2);
   C = Q(4);
@@ -57,8 +56,8 @@ void secp384r1(uint64_t p[12]) {
   a = Q(22) << 32 | Q(21) >> 32;
   b = Q(23) >> 32;
   ADC(C, C, a << 1, 0, o);
-  ADC(D, D, (b << 1 | a >> 63), o, o);
-  ADC(E, E, (b >> 63), o, o);
+  ADC(D, D, b << 1 | a >> 63, o, o);
+  ADC(E, E, b >> 63, o, o);
   ADC(F, F, o, o, o);
   G += o;
   ADC(A, A, Q(12), 0, o);
@@ -118,91 +117,105 @@ void secp384r1(uint64_t p[12]) {
   asm volatile(/* S₁ = (0  ‖0  ‖0  ‖0  ‖0  ‖A₂₃‖A₂₂‖A₂₁‖0  ‖0  ‖0  ‖0  ) */
                "mov\t21*4(%9),%7\n\t"
                "mov\t23*4(%9),%k8\n\t"
+               "mov\t%7,%%r12\n\t"
+               "shr\t$63,%%r12\n\t"
                "shl\t%7\n\t"
-               "rcl\t%8\n\t"
+               "shl\t%8\n\t"
+               "or\t%%r12,%8\n\t"
+               "mov\t13*4(%9),%%r12\n\t"
                "add\t%7,%2\n\t"
+               "mov\t23*4(%9),%k7\n\t"
                "adc\t%8,%3\n\t"
+               "mov\t15*4(%9),%%r13\n\t"
                "adc\t$0,%4\n\t"
+               "mov\t12*4(%9),%k8\n\t"
                "adc\t$0,%5\n\t"
-               "adc\t$0,%b6\n\t"
-               /* S₂ = (A₂₃‖A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂) */
-               "add\t12*4(%9),%0\n\t"
-               "adc\t14*4(%9),%1\n\t"
-               "adc\t16*4(%9),%2\n\t"
-               "adc\t18*4(%9),%3\n\t"
-               "adc\t20*4(%9),%4\n\t"
-               "adc\t22*4(%9),%5\n\t"
-               "adc\t$0,%b6\n\t"
-               /* S₃ = (A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃‖A₂₂‖A₂₁) */
-               "mov\t12*4(%9),%k7\n\t"
+               "mov\t17*4(%9),%%r14\n\t"
+               "adc\t$0,%6\n\t"
+               "mov\t19*4(%9),%%r15\n\t"
+               /* D₁ = (A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃) */
+               "shl\t$32,%8\n\t"
+               "or\t%8,%7\n\t"
                "mov\t23*4(%9),%k8\n\t"
+               "sub\t%7,%0\n\t"
+               "mov\t21*4(%9),%7\n\t"
+               "sbb\t%%r12,%1\n\t"
+               "sbb\t%%r13,%2\n\t"
+               "sbb\t%%r14,%3\n\t"
+               "sbb\t%%r15,%4\n\t"
+               "sbb\t%7,%5\n\t"
+               "mov\t12*4(%9),%k7\n\t"
+               "sbb\t$0,%6\n\t"
+               /* S₃ = (A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃‖A₂₂‖A₂₁) */
                "shl\t$32,%7\n\t"
                "or\t%7,%8\n\t"
                "add\t21*4(%9),%0\n\t"
-               "adc\t%8,%1\n\t"
-               "adc\t13*4(%9),%2\n\t"
-               "adc\t15*4(%9),%3\n\t"
-               "adc\t17*4(%9),%4\n\t"
-               "adc\t19*4(%9),%5\n\t"
-               "adc\t$0,%b6\n\t"
-               /* S₄ = (A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₀‖0  ‖A₂₃‖0  ) */
                "mov\t23*4(%9),%k7\n\t"
+               "adc\t%8,%1\n\t"
                "mov\t20*4(%9),%k8\n\t"
+               "adc\t%%r12,%2\n\t"
+               "mov\t12*4(%9),%%r12\n\t"
+               "adc\t%%r13,%3\n\t"
+               "mov\t14*4(%9),%%r13\n\t"
+               "adc\t%%r14,%4\n\t"
+               "mov\t16*4(%9),%%r14\n\t"
+               "adc\t%%r15,%5\n\t"
+               "mov\t18*4(%9),%%r15\n\t"
+               "adc\t$0,%6\n\t"
+               /* S₄ = (A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₀‖0  ‖A₂₃‖0  ) */
                "shl\t$32,%7\n\t"
                "shl\t$32,%8\n\t"
                "add\t%7,%0\n\t"
                "adc\t%8,%1\n\t"
-               "adc\t12*4(%9),%2\n\t"
-               "adc\t14*4(%9),%3\n\t"
-               "adc\t16*4(%9),%4\n\t"
-               "adc\t18*4(%9),%5\n\t"
-               "adc\t$0,%b6\n\t"
+               "adc\t%%r12,%2\n\t"
+               "adc\t%%r13,%3\n\t"
+               "adc\t%%r14,%4\n\t"
+               "adc\t%%r15,%5\n\t"
+               "adc\t$0,%6\n\t"
+               /* S₂ = (A₂₃‖A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂) */
+               "add\t%%r12,%0\n\t"
+               "mov\t20*4(%9),%%r12\n\t"
+               "adc\t%%r13,%1\n\t"
+               "mov\t22*4(%9),%%r13\n\t"
+               "adc\t%%r14,%2\n\t"
+               "adc\t%%r15,%3\n\t"
+               "adc\t%%r12,%4\n\t"
+               "adc\t%%r13,%5\n\t"
+               "adc\t$0,%6\n\t"
                /* S₅ = (0  ‖0  ‖0  ‖0  ‖A₂₃‖A₂₂‖A₂₁‖A₂₀‖0  ‖0  ‖0  ‖0  ) */
-               "mov\t23*4(%9),%k7\n\t"
-               "mov\t20*4(%9),%k8\n\t"
-               "shl\t$32,%7\n\t"
-               "shl\t$32,%8\n\t"
-               "add\t20*4(%9),%2\n\t"
-               "adc\t22*4(%9),%3\n\t"
+               "add\t%%r12,%2\n\t"
+               "adc\t%%r13,%3\n\t"
                "adc\t$0,%4\n\t"
                "adc\t$0,%5\n\t"
-               "adc\t$0,%b6\n\t"
+               "adc\t$0,%6\n\t"
                /* S₆ = (0  ‖0  ‖0  ‖0  ‖0  ‖0  ‖A₂₃‖A₂₂‖A₂₁‖0  ‖0  ‖A₂₀) */
-               "mov\t20*4(%9),%k7\n\t"
-               "mov\t21*4(%9),%k8\n\t"
+               "mov\t%%r12d,%k7\n\t"
+               "mov\t%%r12,%8\n\t"
+               "shr\t$32,%8\n\t"
                "shl\t$32,%8\n\t"
                "add\t%7,%0\n\t"
                "adc\t%8,%1\n\t"
-               "adc\t22*4(%9),%2\n\t"
+               "adc\t%%r13,%2\n\t"
                "adc\t$0,%3\n\t"
                "adc\t$0,%4\n\t"
                "adc\t$0,%5\n\t"
-               "adc\t$0,%b6\n\t"
-               /* D₁ = (A₂₂‖A₂₁‖A₂₀‖A₁₉‖A₁₈‖A₁₇‖A₁₆‖A₁₅‖A₁₄‖A₁₃‖A₁₂‖A₂₃) */
-               "mov\t23*4(%9),%k7\n\t"
-               "mov\t12*4(%9),%k8\n\t"
-               "shl\t$32,%8\n\t"
-               "or\t%8,%7\n\t"
-               "sub\t%7,%0\n\t"
-               "sbb\t13*4(%9),%1\n\t"
-               "sbb\t15*4(%9),%2\n\t"
-               "sbb\t17*4(%9),%3\n\t"
-               "sbb\t19*4(%9),%4\n\t"
-               "sbb\t21*4(%9),%5\n\t"
-               "sbb\t$0,%b6\n\t"
+               "adc\t$0,%6\n\t"
                /* D₂ = (0  ‖0  ‖0  ‖0  ‖0  ‖0  ‖0  ‖A₂₃‖A₂₂‖A₂₁‖A₂₀‖0  ) */
-               "mov\t20*4(%9),%k7\n\t"
-               "mov\t23*4(%9),%k8\n\t"
+               "mov\t%%r12d,%k7\n\t"
+               "mov\t21*4(%9),%%r12\n\t"
+               "mov\t%%r13,%8\n\t"
+               "shr\t$32,%8\n\t"
                "shl\t$32,%7\n\t"
                "sub\t%7,%0\n\t"
-               "sbb\t21*4(%9),%1\n\t"
+               "sbb\t%%r12,%1\n\t"
                "sbb\t%8,%2\n\t"
                "sbb\t$0,%3\n\t"
                "sbb\t$0,%4\n\t"
                "sbb\t$0,%5\n\t"
-               "sbb\t$0,%b6\n\t"
+               "sbb\t$0,%6\n\t"
                /* D₃ = (0  ‖0  ‖0  ‖0  ‖0  ‖0  ‖0  ‖A₂₃‖A₂₃‖0  ‖0  ‖0  ) */
-               "mov\t23*4(%9),%k7\n\t"
+               "mov\t%%r13,%7\n\t"
+               "shr\t$32,%7\n\t"
                "mov\t%k7,%k8\n\t"
                "shl\t$32,%7\n\t"
                "sub\t%7,%1\n\t"
@@ -210,11 +223,11 @@ void secp384r1(uint64_t p[12]) {
                "sbb\t$0,%3\n\t"
                "sbb\t$0,%4\n\t"
                "sbb\t$0,%5\n\t"
-               "sbb\t$0,%b6\n\t"
+               "sbb\t$0,%6"
                : "+r"(A), "+r"(B), "+r"(C), "+r"(D), "+r"(E), "+r"(F), "+q"(G),
                  "=&r"(a), "=&r"(b)
                : "r"(p)
-               : "memory");
+               : "memory", "r12", "r13", "r14", "r15");
 #endif
   p[0] = A;
   p[1] = B;
@@ -223,11 +236,12 @@ void secp384r1(uint64_t p[12]) {
   p[4] = E;
   p[5] = F;
   p[6] = G;
-  p[7] = 0;
-  p[8] = 0;
-  p[9] = 0;
-  p[10] = 0;
-  p[11] = 0;
+  G = CONCEAL("r", 0L);
+  p[7] = G;
+  p[8] = G;
+  p[9] = G;
+  p[10] = G;
+  p[11] = G;
 }
 
 int ecp_mod_p384(mbedtls_mpi *N) {
@@ -249,3 +263,130 @@ int ecp_mod_p384(mbedtls_mpi *N) {
   }
   return 0;
 }
+
+/*
+Instructions:      115
+Total Cycles:      46
+Total uOps:        116
+uOps Per Cycle:    2.52
+IPC:               2.50
+Block RThroughput: 31.0
+
+SIMULATION          0123456789          0123456789
+Index     0123456789          0123456789          012345
+[0,0]     DR   .    .    .    .    .    .    .    .    .   xorl	%r10d, %r10d
+[0,1]     DeeeeeER  .    .    .    .    .    .    .    .   movq	(%rdi), %r9
+[0,2]     DeeeeeER  .    .    .    .    .    .    .    .   movq	8(%rdi), %r8
+[0,3]     D=eeeeeER .    .    .    .    .    .    .    .   movq	16(%rdi), %rsi
+[0,4]     D=eeeeeER .    .    .    .    .    .    .    .   movq	24(%rdi), %rcx
+[0,5]     D==eeeeeER.    .    .    .    .    .    .    .   movq	32(%rdi), %rdx
+[0,6]     .D==eeeeeER    .    .    .    .    .    .    .   movq	40(%rdi), %rax
+[0,7]     .D=eeeeeE-R    .    .    .    .    .    .    .   movq	84(%rdi), %r11
+[0,8]     .D==eeeeeER    .    .    .    .    .    .    .   movl	92(%rdi), %ebx
+[0,9]     .D======eER    .    .    .    .    .    .    .   movq	%r11, %r12
+[0,10]    .D=======eER   .    .    .    .    .    .    .   shrq	$63, %r12
+[0,11]    .D======eE-R   .    .    .    .    .    .    .   shlq	%r11
+[0,12]    . D======eER   .    .    .    .    .    .    .   shlq	%rbx
+[0,13]    . D=======eER  .    .    .    .    .    .    .   orq	%r12, %rbx
+[0,14]    . D==eeeeeE-R  .    .    .    .    .    .    .   movq	52(%rdi), %r12
+[0,15]    . D======eE-R  .    .    .    .    .    .    .   addq	%r11, %rsi
+[0,16]    . D==eeeeeE-R  .    .    .    .    .    .    .   movl	92(%rdi), %r11d
+[0,17]    . D========eER .    .    .    .    .    .    .   adcq	%rbx, %rcx
+[0,18]    .  D==eeeeeE-R .    .    .    .    .    .    .   movq	60(%rdi), %r13
+[0,19]    .  D========eER.    .    .    .    .    .    .   adcq	$0, %rdx
+[0,20]    .  D==eeeeeE--R.    .    .    .    .    .    .   movl	48(%rdi), %ebx
+[0,21]    .  D=========eER    .    .    .    .    .    .   adcq	$0, %rax
+[0,22]    .  D===eeeeeE--R    .    .    .    .    .    .   movq	68(%rdi), %r14
+[0,23]    .  D==========eER   .    .    .    .    .    .   adcq	$0, %r10
+[0,24]    .   D==eeeeeE---R   .    .    .    .    .    .   movq	76(%rdi), %r15
+[0,25]    .   D======eE---R   .    .    .    .    .    .   shlq	$32, %rbx
+[0,26]    .   D=======eE--R   .    .    .    .    .    .   orq	%rbx, %r11
+[0,27]    .   D===eeeeeE--R   .    .    .    .    .    .   movl	92(%rdi), %ebx
+[0,28]    .   D========eE-R   .    .    .    .    .    .   subq	%r11, %r9
+[0,29]    .   D===eeeeeE--R   .    .    .    .    .    .   movq	84(%rdi), %r11
+[0,30]    .    D========eER   .    .    .    .    .    .   sbbq	%r12, %r8
+[0,31]    .    D=========eER  .    .    .    .    .    .   sbbq	%r13, %rsi
+[0,32]    .    D==========eER .    .    .    .    .    .   sbbq	%r14, %rcx
+[0,33]    .    D===========eER.    .    .    .    .    .   sbbq	%r15, %rdx
+[0,34]    .    D============eER    .    .    .    .    .   sbbq	%r11, %rax
+[0,35]    .    D===eeeeeE-----R    .    .    .    .    .   movl	48(%rdi), %r11d
+[0,36]    .    .D============eER   .    .    .    .    .   sbbq	$0, %r10
+[0,37]    .    .D========eE----R   .    .    .    .    .   shlq	$32, %r11
+[0,38]    .    .D=========eE---R   .    .    .    .    .   orq	%r11, %rbx
+[0,39]    .    .D==eeeeeE------R   .    .    .    .    .   movl	92(%rdi), %r11d
+[0,40]    .    .D======eeeeeeE-R   .    .    .    .    .   addq	84(%rdi), %r9
+[0,41]    .    . D===========eER   .    .    .    .    .   adcq	%rbx, %r8
+[0,42]    .    . D==eeeeeE-----R   .    .    .    .    .   movl	80(%rdi), %ebx
+[0,43]    .    . D============eER  .    .    .    .    .   adcq	%r12, %rsi
+[0,44]    .    . D==eeeeeE------R  .    .    .    .    .   movq	48(%rdi), %r12
+[0,45]    .    . D=============eER .    .    .    .    .   adcq	%r13, %rcx
+[0,46]    .    . D===eeeeeE------R .    .    .    .    .   movq	56(%rdi), %r13
+[0,47]    .    .  D=============eER.    .    .    .    .   adcq	%r14, %rdx
+[0,48]    .    .  D==eeeeeE-------R.    .    .    .    .   movq	64(%rdi), %r14
+[0,49]    .    .  D==============eER    .    .    .    .   adcq	%r15, %rax
+[0,50]    .    .  D===eeeeeE-------R    .    .    .    .   movq	72(%rdi), %r15
+[0,51]    .    .  D===============eER   .    .    .    .   adcq	$0, %r10
+[0,52]    .    .  D=======eE--------R   .    .    .    .   shlq	$32, %r11
+[0,53]    .    .   D=======eE-------R   .    .    .    .   shlq	$32, %rbx
+[0,54]    .    .   D=========eE-----R   .    .    .    .   addq	%r11, %r9
+[0,55]    .    .   D==========eE----R   .    .    .    .   adcq	%rbx, %r8
+[0,56]    .    .   D===========eE---R   .    .    .    .   adcq	%r12, %rsi
+[0,57]    .    .   D============eE--R   .    .    .    .   adcq	%r13, %rcx
+[0,58]    .    .   D=============eE-R   .    .    .    .   adcq	%r14, %rdx
+[0,59]    .    .    D=============eER   .    .    .    .   adcq	%r15, %rax
+[0,60]    .    .    D==============eER  .    .    .    .   adcq	$0, %r10
+[0,61]    .    .    D=========eE-----R  .    .    .    .   addq	%r12, %r9
+[0,62]    .    .    D=eeeeeE---------R  .    .    .    .   movq	80(%rdi), %r12
+[0,63]    .    .    D==============eER  .    .    .    .   adcq	%r13, %r8
+[0,64]    .    .    D==eeeeeE--------R  .    .    .    .   movq	88(%rdi), %r13
+[0,65]    .    .    .D==============eER .    .    .    .   adcq	%r14, %rsi
+[0,66]    .    .    .D===============eER.    .    .    .   adcq	%r15, %rcx
+[0,67]    .    .    .D================eER    .    .    .   adcq	%r12, %rdx
+[0,68]    .    .    .D=================eER   .    .    .   adcq	%r13, %rax
+[0,69]    .    .    .D==================eER  .    .    .   adcq	$0, %r10
+[0,70]    .    .    .D===============eE---R  .    .    .   addq	%r12, %rsi
+[0,71]    .    .    . D===============eE--R  .    .    .   adcq	%r13, %rcx
+[0,72]    .    .    . D================eE-R  .    .    .   adcq	$0, %rdx
+[0,73]    .    .    . D=================eER  .    .    .   adcq	$0, %rax
+[0,74]    .    .    . D==================eER .    .    .   adcq	$0, %r10
+[0,75]    .    .    . D====eE--------------R .    .    .   movl	%r12d, %r11d
+[0,76]    .    .    . D====eE--------------R .    .    .   movq	%r12, %rbx
+[0,77]    .    .    .  D====eE-------------R .    .    .   shrq	$32, %rbx
+[0,78]    .    .    .  D============eE-----R .    .    .   shlq	$32, %rbx
+[0,79]    .    .    .  D=======eE----------R .    .    .   addq	%r11, %r9
+[0,80]    .    .    .  D=============eE----R .    .    .   adcq	%rbx, %r8
+[0,81]    .    .    .  D=================eER .    .    .   adcq	%r13, %rsi
+[0,82]    .    .    .  D==================eER.    .    .   adcq	$0, %rcx
+[0,83]    .    .    .   D==================eER    .    .   adcq	$0, %rdx
+[0,84]    .    .    .   D===================eER   .    .   adcq	$0, %rax
+[0,85]    .    .    .   D====================eER  .    .   adcq	$0, %r10
+[0,86]    .    .    .   D===eE-----------------R  .    .   movl	%r12d, %r11d
+[0,87]    .    .    .   DeeeeeE----------------R  .    .   movq	84(%rdi), %r12
+[0,88]    .    .    .   D===eE-----------------R  .    .   movq	%r13, %rbx
+[0,89]    .    .    .    D================eE---R  .    .   shrq	$32, %rbx
+[0,90]    .    .    .    D=================eE--R  .    .   shlq	$32, %r11
+[0,91]    .    .    .    D==================eE-R  .    .   subq	%r11, %r9
+[0,92]    .    .    .    D===================eER  .    .   sbbq	%r12, %r8
+[0,93]    .    .    .    D====================eER .    .   sbbq	%rbx, %rsi
+[0,94]    .    .    .    D=====================eER.    .   sbbq	$0, %rcx
+[0,95]    .    .    .    .D=====================eER    .   sbbq	$0, %rdx
+[0,96]    .    .    .    .D======================eER   .   sbbq	$0, %rax
+[0,97]    .    .    .    .D=======================eER  .   sbbq	$0, %r10
+[0,98]    .    .    .    .D==eE---------------------R  .   movq	%r13, %r11
+[0,99]    .    .    .    .D=================eE------R  .   shrq	$32, %r11
+[0,100]   .    .    .    .D==================eE-----R  .   movl	%r11d, %ebx
+[0,101]   .    .    .    . D==================eE----R  .   shlq	$32, %r11
+[0,102]   .    .    .    . D===================eE---R  .   subq	%r11, %r8
+[0,103]   .    .    .    . D====================eE--R  .   sbbq	%rbx, %rsi
+[0,104]   .    .    .    . D=====================eE-R  .   sbbq	$0, %rcx
+[0,105]   .    .    .    . D======================eER  .   sbbq	$0, %rdx
+[0,106]   .    .    .    . D=======================eER .   sbbq	$0, %rax
+[0,107]   .    .    .    .  D=======================eER.   sbbq	$0, %r10
+[0,108]   .    .    .    .  D================eE-------R.   movq	%r9, (%rdi)
+[0,109]   .    .    .    .  D===================eE----R.   movq	%r8, 8(%rdi)
+[0,110]   .    .    .    .  D====================eE---R.   movq	%rsi, 16(%rdi)
+[0,111]   .    .    .    .  D=====================eE--R.   movq	%rcx, 24(%rdi)
+[0,112]   .    .    .    .  D======================eE-R.   movq	%rdx, 32(%rdi)
+[0,113]   .    .    .    .   D======================eER.   movq	%rax, 40(%rdi)
+[0,114]   .    .    .    .   D=======================eER   movq	%r10, 48(%rdi)
+*/
diff --git a/third_party/mbedtls/ssl_ciphersuites.c b/third_party/mbedtls/ssl_ciphersuites.c
index b465480f8..1329ec6e9 100644
--- a/third_party/mbedtls/ssl_ciphersuites.c
+++ b/third_party/mbedtls/ssl_ciphersuites.c
@@ -61,7 +61,6 @@ static const uint16_t ciphersuite_preference[] =
     MBEDTLS_TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256,
     MBEDTLS_TLS_DHE_RSA_WITH_AES_128_CCM,
     MBEDTLS_TLS_DHE_RSA_WITH_AES_256_CCM,
-    /* weakened perfect forward secrecy */
     MBEDTLS_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
     MBEDTLS_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384,
     MBEDTLS_TLS_DHE_RSA_WITH_AES_128_CBC_SHA256,
diff --git a/third_party/mbedtls/test/everest_test.c b/third_party/mbedtls/test/everest_test.c
new file mode 100644
index 000000000..e201fa88a
--- /dev/null
+++ b/third_party/mbedtls/test/everest_test.c
@@ -0,0 +1,77 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2021 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/rand/rand.h"
+#include "libc/stdio/stdio.h"
+#include "libc/testlib/ezbench.h"
+#include "libc/testlib/testlib.h"
+#include "third_party/mbedtls/config.h"
+#include "third_party/mbedtls/endian.h"
+
+void Hacl_Curve25519_crypto_scalarmult(uint8_t *, uint8_t *, uint8_t *);
+void curve25519(uint8_t[32], uint8_t[32], uint8_t[32]);
+
+const uint64_t kNumbers[] = {
+    0x0000000000000000,  //
+    0x0000000000000001,  //
+    0x0000000000001000,  //
+    0x0000000002000000,  //
+    0x0000004000000000,  //
+    0x0008000000000000,  //
+    0x8000000000000000,  //
+    0x0007ffffffffffff,  //
+    0x0000003fffffffff,  //
+    0x0000000001ffffff,  //
+    0x0000000000000fff,  //
+    0xffffffffffffffff,  //
+    0xfff8000000000000,  //
+};
+
+TEST(everest, tinierVersionBehavesTheSame) {
+  size_t i;
+  uint8_t secret[32], bpoint[32], public[2][32];
+  for (i = 0; i < 500; ++i) {
+    rngset(secret, sizeof(secret), rand64, -1);
+    rngset(bpoint, sizeof(bpoint), rand64, -1);
+    Hacl_Curve25519_crypto_scalarmult(public[0], secret, bpoint);
+    curve25519(public[1], secret, bpoint);
+    ASSERT_EQ(0, memcmp(public[0], public[1], sizeof(public[0])));
+  }
+  for (i = 0; i < 500; ++i) {
+    Write64le(secret + 000, kNumbers[rand() % ARRAYLEN(kNumbers)]);
+    Write64le(secret + 010, kNumbers[rand() % ARRAYLEN(kNumbers)]);
+    Write64le(secret + 020, kNumbers[rand() % ARRAYLEN(kNumbers)]);
+    Write64le(secret + 030, kNumbers[rand() % ARRAYLEN(kNumbers)]);
+    Write64le(bpoint + 000, kNumbers[rand() % ARRAYLEN(kNumbers)]);
+    Write64le(bpoint + 010, kNumbers[rand() % ARRAYLEN(kNumbers)]);
+    Write64le(bpoint + 020, kNumbers[rand() % ARRAYLEN(kNumbers)]);
+    Write64le(bpoint + 030, kNumbers[rand() % ARRAYLEN(kNumbers)]);
+    Hacl_Curve25519_crypto_scalarmult(public[0], secret, bpoint);
+    curve25519(public[1], secret, bpoint);
+    ASSERT_EQ(0, memcmp(public[0], public[1], sizeof(public[0])));
+  }
+}
+
+BENCH(everest, bench) {
+  uint8_t secret[32], bpoint[32], public[32];
+  rngset(secret, sizeof(secret), rand64, -1);
+  rngset(bpoint, sizeof(bpoint), rand64, -1);
+  EZBENCH2("everest", donothing,
+           Hacl_Curve25519_crypto_scalarmult(public, secret, bpoint));
+  EZBENCH2("mariana", donothing, curve25519(public, secret, bpoint));
+}
diff --git a/third_party/mbedtls/test/everest_unravaged.c b/third_party/mbedtls/test/everest_unravaged.c
new file mode 100644
index 000000000..3ad6cb66f
--- /dev/null
+++ b/third_party/mbedtls/test/everest_unravaged.c
@@ -0,0 +1,899 @@
+#include "libc/bits/bits.h"
+#include "libc/limits.h"
+#include "third_party/mbedtls/asn1.h"
+#include "third_party/mbedtls/bignum.h"
+#include "third_party/mbedtls/common.h"
+#include "third_party/mbedtls/error.h"
+#include "third_party/mbedtls/platform.h"
+
+asm(".ident\t\"\\n\\n\
+Everest (Apache 2.0)\\n\
+Copyright 2016-2018 INRIA and Microsoft Corporation\"");
+asm(".include \"libc/disclaimer.inc\"");
+
+/* clang-format off */
+/*
+ *  ECDH with curve-optimized implementation multiplexing
+ *
+ *  Copyright 2016-2018 INRIA and Microsoft Corporation
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of mbed TLS (https://tls.mbed.org)
+ */
+
+#ifdef memcpy
+#undef memcpy
+#endif
+#define memcpy(x,y,z) __builtin_memcpy(x,y,z)
+
+#define load64_le(b) READ64LE(b)
+#define store64_le(b, i) WRITE64LE(b, i)
+
+#define KRML_HOST_EXIT exit
+#define KRML_HOST_PRINTF printf
+
+#define KRML_EXIT                                                              \
+  do {                                                                         \
+    KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \
+    KRML_HOST_EXIT(254);                                                       \
+  } while (0)
+
+#define _KRML_CHECK_SIZE_PRAGMA \
+    _Pragma("GCC diagnostic ignored \"-Wtype-limits\"")
+
+#define KRML_CHECK_SIZE(size_elt, sz)                                          \
+  do {                                                                         \
+    _KRML_CHECK_SIZE_PRAGMA                                                    \
+    if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) {                  \
+      KRML_HOST_PRINTF(                                                        \
+          "Maximum allocatable size exceeded, aborting before overflow at "    \
+          "%s:%d\n",                                                           \
+          __FILE__, __LINE__);                                                 \
+      KRML_HOST_EXIT(253);                                                     \
+    }                                                                          \
+  } while (0)
+
+typedef const char *Prims_string;
+
+typedef struct {
+  uint32_t length;
+  const char *data;
+} FStar_Bytes_bytes;
+
+typedef int32_t Prims_pos, Prims_nat, Prims_nonzero, Prims_int,
+    krml_checked_int_t;
+
+/* Prims_nat not yet in scope */
+inline static int32_t krml_time() {
+  return (int32_t)time(NULL);
+}
+
+static uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b)
+{
+  uint64_t x = a ^ b;
+  uint64_t minus_x = ~x + (uint64_t)1U;
+  uint64_t x_or_minus_x = x | minus_x;
+  uint64_t xnx = x_or_minus_x >> (uint32_t)63U;
+  return xnx - (uint64_t)1U;
+}
+
+static uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b)
+{
+  uint64_t x = a;
+  uint64_t y = b;
+  uint64_t x_xor_y = x ^ y;
+  uint64_t x_sub_y = x - y;
+  uint64_t x_sub_y_xor_y = x_sub_y ^ y;
+  uint64_t q = x_xor_y | x_sub_y_xor_y;
+  uint64_t x_xor_q = x ^ q;
+  uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U;
+  return x_xor_q_ - (uint64_t)1U;
+}
+
+static uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b)
+{
+  uint32_t x = a ^ b;
+  uint32_t minus_x = ~x + (uint32_t)1U;
+  uint32_t x_or_minus_x = x | minus_x;
+  uint32_t xnx = x_or_minus_x >> (uint32_t)31U;
+  return xnx - (uint32_t)1U;
+}
+
+static uint32_t FStar_UInt32_gte_mask(uint32_t a, uint32_t b)
+{
+  uint32_t x = a;
+  uint32_t y = b;
+  uint32_t x_xor_y = x ^ y;
+  uint32_t x_sub_y = x - y;
+  uint32_t x_sub_y_xor_y = x_sub_y ^ y;
+  uint32_t q = x_xor_y | x_sub_y_xor_y;
+  uint32_t x_xor_q = x ^ q;
+  uint32_t x_xor_q_ = x_xor_q >> (uint32_t)31U;
+  return x_xor_q_ - (uint32_t)1U;
+}
+
+static uint16_t FStar_UInt16_eq_mask(uint16_t a, uint16_t b)
+{
+  uint16_t x = a ^ b;
+  uint16_t minus_x = ~x + (uint16_t)1U;
+  uint16_t x_or_minus_x = x | minus_x;
+  uint16_t xnx = x_or_minus_x >> (uint32_t)15U;
+  return xnx - (uint16_t)1U;
+}
+
+static uint16_t FStar_UInt16_gte_mask(uint16_t a, uint16_t b)
+{
+  uint16_t x = a;
+  uint16_t y = b;
+  uint16_t x_xor_y = x ^ y;
+  uint16_t x_sub_y = x - y;
+  uint16_t x_sub_y_xor_y = x_sub_y ^ y;
+  uint16_t q = x_xor_y | x_sub_y_xor_y;
+  uint16_t x_xor_q = x ^ q;
+  uint16_t x_xor_q_ = x_xor_q >> (uint32_t)15U;
+  return x_xor_q_ - (uint16_t)1U;
+}
+
+static uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b)
+{
+  uint8_t x = a ^ b;
+  uint8_t minus_x = ~x + (uint8_t)1U;
+  uint8_t x_or_minus_x = x | minus_x;
+  uint8_t xnx = x_or_minus_x >> (uint32_t)7U;
+  return xnx - (uint8_t)1U;
+}
+
+static uint8_t FStar_UInt8_gte_mask(uint8_t a, uint8_t b)
+{
+  uint8_t x = a;
+  uint8_t y = b;
+  uint8_t x_xor_y = x ^ y;
+  uint8_t x_sub_y = x - y;
+  uint8_t x_sub_y_xor_y = x_sub_y ^ y;
+  uint8_t q = x_xor_y | x_sub_y_xor_y;
+  uint8_t x_xor_q = x ^ q;
+  uint8_t x_xor_q_ = x_xor_q >> (uint32_t)7U;
+  return x_xor_q_ - (uint8_t)1U;
+}
+
+static void Hacl_Bignum_Modulo_carry_top(uint64_t *b)
+{
+  uint64_t b4 = b[4U];
+  uint64_t b0 = b[0U];
+  uint64_t b4_ = b4 & (uint64_t)0x7ffffffffffffU;
+  uint64_t b0_ = b0 + (uint64_t)19U * (b4 >> (uint32_t)51U);
+  b[4U] = b4_;
+  b[0U] = b0_;
+}
+
+inline static void Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, uint128_t *input)
+{
+  uint32_t i;
+  for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+  {
+    uint128_t xi = input[i];
+    output[i] = (uint64_t)xi;
+  }
+}
+
+inline static void
+Hacl_Bignum_Fproduct_sum_scalar_multiplication_(uint128_t *output, uint64_t *input, uint64_t s)
+{
+  uint32_t i;
+  for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+  {
+    uint128_t xi = output[i];
+    uint64_t yi = input[i];
+    output[i] = xi + (uint128_t)yi * s;
+  }
+}
+
+inline static void Hacl_Bignum_Fproduct_carry_wide_(uint128_t *tmp)
+{
+  uint32_t i;
+  for (i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U)
+  {
+    uint32_t ctr = i;
+    uint128_t tctr = tmp[ctr];
+    uint128_t tctrp1 = tmp[ctr + (uint32_t)1U];
+    uint64_t r0 = (uint64_t)tctr & (uint64_t)0x7ffffffffffffU;
+    uint128_t c = tctr >> (uint32_t)51U;
+    tmp[ctr] = (uint128_t)r0;
+    tmp[ctr + (uint32_t)1U] = tctrp1 + c;
+  }
+}
+
+inline static void Hacl_Bignum_Fmul_shift_reduce(uint64_t *output)
+{
+  uint64_t tmp = output[4U];
+  uint64_t b0;
+  {
+    uint32_t i;
+    for (i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U)
+    {
+      uint32_t ctr = (uint32_t)5U - i - (uint32_t)1U;
+      uint64_t z = output[ctr - (uint32_t)1U];
+      output[ctr] = z;
+    }
+  }
+  output[0U] = tmp;
+  b0 = output[0U];
+  output[0U] = (uint64_t)19U * b0;
+}
+
+static void
+Hacl_Bignum_Fmul_mul_shift_reduce_(uint128_t *output, uint64_t *input, uint64_t *input2)
+{
+  uint32_t i;
+  uint64_t input2i;
+  {
+    uint32_t i0;
+    for (i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0 = i0 + (uint32_t)1U)
+    {
+      uint64_t input2i0 = input2[i0];
+      Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i0);
+      Hacl_Bignum_Fmul_shift_reduce(input);
+    }
+  }
+  i = (uint32_t)4U;
+  input2i = input2[i];
+  Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
+}
+
+inline static void Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input2)
+{
+  uint64_t tmp[5U] = { 0U };
+  memcpy(tmp, input, (uint32_t)5U * sizeof input[0U]);
+  KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
+  {
+    uint128_t t[5U];
+    {
+      uint32_t _i;
+      for (_i = 0U; _i < (uint32_t)5U; ++_i)
+        t[_i] = (uint128_t)(uint64_t)0U;
+    }
+    {
+      uint128_t b4;
+      uint128_t b0;
+      uint128_t b4_;
+      uint128_t b0_;
+      uint64_t i0;
+      uint64_t i1;
+      uint64_t i0_;
+      uint64_t i1_;
+      Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2);
+      Hacl_Bignum_Fproduct_carry_wide_(t);
+      b4 = t[4U];
+      b0 = t[0U];
+      b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU;
+      b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U);
+      t[4U] = b4_;
+      t[0U] = b0_;
+      Hacl_Bignum_Fproduct_copy_from_wide_(output, t);
+      i0 = output[0U];
+      i1 = output[1U];
+      i0_ = i0 & (uint64_t)0x7ffffffffffffU;
+      i1_ = i1 + (i0 >> (uint32_t)51U);
+      output[0U] = i0_;
+      output[1U] = i1_;
+    }
+  }
+}
+
+inline static void Hacl_Bignum_Fsquare_fsquare__(uint128_t *tmp, uint64_t *output)
+{
+  uint64_t r0 = output[0U];
+  uint64_t r1 = output[1U];
+  uint64_t r2 = output[2U];
+  uint64_t r3 = output[3U];
+  uint64_t r4 = output[4U];
+  uint64_t d0 = r0 * (uint64_t)2U;
+  uint64_t d1 = r1 * (uint64_t)2U;
+  uint64_t d2 = r2 * (uint64_t)2U * (uint64_t)19U;
+  uint64_t d419 = r4 * (uint64_t)19U;
+  uint64_t d4 = d419 * (uint64_t)2U;
+  uint128_t s0 = (uint128_t)r0 * r0 + (uint128_t)d4 * r1 + (uint128_t)d2 * r3;
+  uint128_t s1 = (uint128_t)d0 * r1 + (uint128_t)d4 * r2 + (uint128_t)(r3 * (uint64_t)19U) * r3;
+  uint128_t s2 = (uint128_t)d0 * r2 + (uint128_t)r1 * r1 + (uint128_t)d4 * r3;
+  uint128_t s3 = (uint128_t)d0 * r3 + (uint128_t)d1 * r2 + (uint128_t)r4 * d419;
+  uint128_t s4 = (uint128_t)d0 * r4 + (uint128_t)d1 * r3 + (uint128_t)r2 * r2;
+  tmp[0U] = s0;
+  tmp[1U] = s1;
+  tmp[2U] = s2;
+  tmp[3U] = s3;
+  tmp[4U] = s4;
+}
+
+inline static void Hacl_Bignum_Fsquare_fsquare_(uint128_t *tmp, uint64_t *output)
+{
+  uint128_t b4;
+  uint128_t b0;
+  uint128_t b4_;
+  uint128_t b0_;
+  uint64_t i0;
+  uint64_t i1;
+  uint64_t i0_;
+  uint64_t i1_;
+  Hacl_Bignum_Fsquare_fsquare__(tmp, output);
+  Hacl_Bignum_Fproduct_carry_wide_(tmp);
+  b4 = tmp[4U];
+  b0 = tmp[0U];
+  b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU;
+  b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U);
+  tmp[4U] = b4_;
+  tmp[0U] = b0_;
+  Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
+  i0 = output[0U];
+  i1 = output[1U];
+  i0_ = i0 & (uint64_t)0x7ffffffffffffU;
+  i1_ = i1 + (i0 >> (uint32_t)51U);
+  output[0U] = i0_;
+  output[1U] = i1_;
+}
+
+static void
+Hacl_Bignum_Fsquare_fsquare_times_(uint64_t *input, uint128_t *tmp, uint32_t count1)
+{
+  uint32_t i;
+  Hacl_Bignum_Fsquare_fsquare_(tmp, input);
+  for (i = (uint32_t)1U; i < count1; i = i + (uint32_t)1U)
+    Hacl_Bignum_Fsquare_fsquare_(tmp, input);
+}
+
+inline static void
+Hacl_Bignum_Fsquare_fsquare_times(uint64_t *output, uint64_t *input, uint32_t count1)
+{
+  KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
+  {
+    uint128_t t[5U];
+    {
+      uint32_t _i;
+      for (_i = 0U; _i < (uint32_t)5U; ++_i)
+        t[_i] = (uint128_t)(uint64_t)0U;
+    }
+    memcpy(output, input, (uint32_t)5U * sizeof input[0U]);
+    Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
+  }
+}
+
+inline static void Hacl_Bignum_Fsquare_fsquare_times_inplace(uint64_t *output, uint32_t count1)
+{
+  KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
+  {
+    uint128_t t[5U];
+    {
+      uint32_t _i;
+      for (_i = 0U; _i < (uint32_t)5U; ++_i)
+        t[_i] = (uint128_t)(uint64_t)0U;
+    }
+    Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
+  }
+}
+
+inline static void Hacl_Bignum_Crecip_crecip(uint64_t *out, uint64_t *z)
+{
+  uint64_t buf[20U] = { 0U };
+  uint64_t *a0 = buf;
+  uint64_t *t00 = buf + (uint32_t)5U;
+  uint64_t *b0 = buf + (uint32_t)10U;
+  uint64_t *t01;
+  uint64_t *b1;
+  uint64_t *c0;
+  uint64_t *a;
+  uint64_t *t0;
+  uint64_t *b;
+  uint64_t *c;
+  Hacl_Bignum_Fsquare_fsquare_times(a0, z, (uint32_t)1U);
+  Hacl_Bignum_Fsquare_fsquare_times(t00, a0, (uint32_t)2U);
+  Hacl_Bignum_Fmul_fmul(b0, t00, z);
+  Hacl_Bignum_Fmul_fmul(a0, b0, a0);
+  Hacl_Bignum_Fsquare_fsquare_times(t00, a0, (uint32_t)1U);
+  Hacl_Bignum_Fmul_fmul(b0, t00, b0);
+  Hacl_Bignum_Fsquare_fsquare_times(t00, b0, (uint32_t)5U);
+  t01 = buf + (uint32_t)5U;
+  b1 = buf + (uint32_t)10U;
+  c0 = buf + (uint32_t)15U;
+  Hacl_Bignum_Fmul_fmul(b1, t01, b1);
+  Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)10U);
+  Hacl_Bignum_Fmul_fmul(c0, t01, b1);
+  Hacl_Bignum_Fsquare_fsquare_times(t01, c0, (uint32_t)20U);
+  Hacl_Bignum_Fmul_fmul(t01, t01, c0);
+  Hacl_Bignum_Fsquare_fsquare_times_inplace(t01, (uint32_t)10U);
+  Hacl_Bignum_Fmul_fmul(b1, t01, b1);
+  Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)50U);
+  a = buf;
+  t0 = buf + (uint32_t)5U;
+  b = buf + (uint32_t)10U;
+  c = buf + (uint32_t)15U;
+  Hacl_Bignum_Fmul_fmul(c, t0, b);
+  Hacl_Bignum_Fsquare_fsquare_times(t0, c, (uint32_t)100U);
+  Hacl_Bignum_Fmul_fmul(t0, t0, c);
+  Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)50U);
+  Hacl_Bignum_Fmul_fmul(t0, t0, b);
+  Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)5U);
+  Hacl_Bignum_Fmul_fmul(out, t0, a);
+}
+
+inline static void Hacl_Bignum_fsum(uint64_t *a, uint64_t *b)
+{
+  uint32_t i;
+  for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+  {
+    uint64_t xi = a[i];
+    uint64_t yi = b[i];
+    a[i] = xi + yi;
+  }
+}
+
+inline static void Hacl_Bignum_fdifference(uint64_t *a, uint64_t *b)
+{
+  uint64_t tmp[5U] = { 0U };
+  uint64_t b0;
+  uint64_t b1;
+  uint64_t b2;
+  uint64_t b3;
+  uint64_t b4;
+  memcpy(tmp, b, (uint32_t)5U * sizeof b[0U]);
+  b0 = tmp[0U];
+  b1 = tmp[1U];
+  b2 = tmp[2U];
+  b3 = tmp[3U];
+  b4 = tmp[4U];
+  tmp[0U] = b0 + (uint64_t)0x3fffffffffff68U;
+  tmp[1U] = b1 + (uint64_t)0x3ffffffffffff8U;
+  tmp[2U] = b2 + (uint64_t)0x3ffffffffffff8U;
+  tmp[3U] = b3 + (uint64_t)0x3ffffffffffff8U;
+  tmp[4U] = b4 + (uint64_t)0x3ffffffffffff8U;
+  {
+    uint32_t i;
+    for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+    {
+      uint64_t xi = a[i];
+      uint64_t yi = tmp[i];
+      a[i] = yi - xi;
+    }
+  }
+}
+
+inline static void Hacl_Bignum_fscalar(uint64_t *output, uint64_t *b, uint64_t s)
+{
+  KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
+  {
+    uint128_t tmp[5U];
+    {
+      uint32_t _i;
+      for (_i = 0U; _i < (uint32_t)5U; ++_i)
+        tmp[_i] = (uint128_t)(uint64_t)0U;
+    }
+    {
+      uint128_t b4;
+      uint128_t b0;
+      uint128_t b4_;
+      uint128_t b0_;
+      {
+        uint32_t i;
+        for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+        {
+          uint64_t xi = b[i];
+          tmp[i] = (uint128_t)xi * s;
+        }
+      }
+      Hacl_Bignum_Fproduct_carry_wide_(tmp);
+      b4 = tmp[4U];
+      b0 = tmp[0U];
+      b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU;
+      b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U);
+      tmp[4U] = b4_;
+      tmp[0U] = b0_;
+      Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
+    }
+  }
+}
+
+inline static void Hacl_Bignum_fmul(uint64_t *output, uint64_t *a, uint64_t *b)
+{
+  Hacl_Bignum_Fmul_fmul(output, a, b);
+}
+
+inline static void Hacl_Bignum_crecip(uint64_t *output, uint64_t *input)
+{
+  Hacl_Bignum_Crecip_crecip(output, input);
+}
+
+static void
+Hacl_EC_Point_swap_conditional_step(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
+{
+  uint32_t i = ctr - (uint32_t)1U;
+  uint64_t ai = a[i];
+  uint64_t bi = b[i];
+  uint64_t x = swap1 & (ai ^ bi);
+  uint64_t ai1 = ai ^ x;
+  uint64_t bi1 = bi ^ x;
+  a[i] = ai1;
+  b[i] = bi1;
+}
+
+static void
+Hacl_EC_Point_swap_conditional_(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
+{
+  if (!(ctr == (uint32_t)0U))
+  {
+    uint32_t i;
+    Hacl_EC_Point_swap_conditional_step(a, b, swap1, ctr);
+    i = ctr - (uint32_t)1U;
+    Hacl_EC_Point_swap_conditional_(a, b, swap1, i);
+  }
+}
+
+static void Hacl_EC_Point_swap_conditional(uint64_t *a, uint64_t *b, uint64_t iswap)
+{
+  uint64_t swap1 = (uint64_t)0U - iswap;
+  Hacl_EC_Point_swap_conditional_(a, b, swap1, (uint32_t)5U);
+  Hacl_EC_Point_swap_conditional_(a + (uint32_t)5U, b + (uint32_t)5U, swap1, (uint32_t)5U);
+}
+
+static void Hacl_EC_Point_copy(uint64_t *output, uint64_t *input)
+{
+  memcpy(output, input, (uint32_t)5U * sizeof input[0U]);
+  memcpy(output + (uint32_t)5U,
+    input + (uint32_t)5U,
+    (uint32_t)5U * sizeof (input + (uint32_t)5U)[0U]);
+}
+
+static void Hacl_EC_Format_fexpand(uint64_t *output, uint8_t *input)
+{
+  uint64_t i0 = load64_le(input);
+  uint8_t *x00 = input + (uint32_t)6U;
+  uint64_t i1 = load64_le(x00);
+  uint8_t *x01 = input + (uint32_t)12U;
+  uint64_t i2 = load64_le(x01);
+  uint8_t *x02 = input + (uint32_t)19U;
+  uint64_t i3 = load64_le(x02);
+  uint8_t *x0 = input + (uint32_t)24U;
+  uint64_t i4 = load64_le(x0);
+  uint64_t output0 = i0 & (uint64_t)0x7ffffffffffffU;
+  uint64_t output1 = i1 >> (uint32_t)3U & (uint64_t)0x7ffffffffffffU;
+  uint64_t output2 = i2 >> (uint32_t)6U & (uint64_t)0x7ffffffffffffU;
+  uint64_t output3 = i3 >> (uint32_t)1U & (uint64_t)0x7ffffffffffffU;
+  uint64_t output4 = i4 >> (uint32_t)12U & (uint64_t)0x7ffffffffffffU;
+  output[0U] = output0;
+  output[1U] = output1;
+  output[2U] = output2;
+  output[3U] = output3;
+  output[4U] = output4;
+}
+
+static void Hacl_EC_Format_fcontract_first_carry_pass(uint64_t *input)
+{
+  uint64_t t0 = input[0U];
+  uint64_t t1 = input[1U];
+  uint64_t t2 = input[2U];
+  uint64_t t3 = input[3U];
+  uint64_t t4 = input[4U];
+  uint64_t t1_ = t1 + (t0 >> (uint32_t)51U);
+  uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU;
+  uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U);
+  uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU;
+  uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U);
+  uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU;
+  uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U);
+  uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU;
+  input[0U] = t0_;
+  input[1U] = t1__;
+  input[2U] = t2__;
+  input[3U] = t3__;
+  input[4U] = t4_;
+}
+
+static void Hacl_EC_Format_fcontract_first_carry_full(uint64_t *input)
+{
+  Hacl_EC_Format_fcontract_first_carry_pass(input);
+  Hacl_Bignum_Modulo_carry_top(input);
+}
+
+static void Hacl_EC_Format_fcontract_second_carry_pass(uint64_t *input)
+{
+  uint64_t t0 = input[0U];
+  uint64_t t1 = input[1U];
+  uint64_t t2 = input[2U];
+  uint64_t t3 = input[3U];
+  uint64_t t4 = input[4U];
+  uint64_t t1_ = t1 + (t0 >> (uint32_t)51U);
+  uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU;
+  uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U);
+  uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU;
+  uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U);
+  uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU;
+  uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U);
+  uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU;
+  input[0U] = t0_;
+  input[1U] = t1__;
+  input[2U] = t2__;
+  input[3U] = t3__;
+  input[4U] = t4_;
+}
+
+static void Hacl_EC_Format_fcontract_second_carry_full(uint64_t *input)
+{
+  uint64_t i0;
+  uint64_t i1;
+  uint64_t i0_;
+  uint64_t i1_;
+  Hacl_EC_Format_fcontract_second_carry_pass(input);
+  Hacl_Bignum_Modulo_carry_top(input);
+  i0 = input[0U];
+  i1 = input[1U];
+  i0_ = i0 & (uint64_t)0x7ffffffffffffU;
+  i1_ = i1 + (i0 >> (uint32_t)51U);
+  input[0U] = i0_;
+  input[1U] = i1_;
+}
+
+static void Hacl_EC_Format_fcontract_trim(uint64_t *input)
+{
+  uint64_t a0 = input[0U];
+  uint64_t a1 = input[1U];
+  uint64_t a2 = input[2U];
+  uint64_t a3 = input[3U];
+  uint64_t a4 = input[4U];
+  uint64_t mask0 = FStar_UInt64_gte_mask(a0, (uint64_t)0x7ffffffffffedU);
+  uint64_t mask1 = FStar_UInt64_eq_mask(a1, (uint64_t)0x7ffffffffffffU);
+  uint64_t mask2 = FStar_UInt64_eq_mask(a2, (uint64_t)0x7ffffffffffffU);
+  uint64_t mask3 = FStar_UInt64_eq_mask(a3, (uint64_t)0x7ffffffffffffU);
+  uint64_t mask4 = FStar_UInt64_eq_mask(a4, (uint64_t)0x7ffffffffffffU);
+  uint64_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
+  uint64_t a0_ = a0 - ((uint64_t)0x7ffffffffffedU & mask);
+  uint64_t a1_ = a1 - ((uint64_t)0x7ffffffffffffU & mask);
+  uint64_t a2_ = a2 - ((uint64_t)0x7ffffffffffffU & mask);
+  uint64_t a3_ = a3 - ((uint64_t)0x7ffffffffffffU & mask);
+  uint64_t a4_ = a4 - ((uint64_t)0x7ffffffffffffU & mask);
+  input[0U] = a0_;
+  input[1U] = a1_;
+  input[2U] = a2_;
+  input[3U] = a3_;
+  input[4U] = a4_;
+}
+
+static void Hacl_EC_Format_fcontract_store(uint8_t *output, uint64_t *input)
+{
+  uint64_t t0 = input[0U];
+  uint64_t t1 = input[1U];
+  uint64_t t2 = input[2U];
+  uint64_t t3 = input[3U];
+  uint64_t t4 = input[4U];
+  uint64_t o0 = t1 << (uint32_t)51U | t0;
+  uint64_t o1 = t2 << (uint32_t)38U | t1 >> (uint32_t)13U;
+  uint64_t o2 = t3 << (uint32_t)25U | t2 >> (uint32_t)26U;
+  uint64_t o3 = t4 << (uint32_t)12U | t3 >> (uint32_t)39U;
+  uint8_t *b0 = output;
+  uint8_t *b1 = output + (uint32_t)8U;
+  uint8_t *b2 = output + (uint32_t)16U;
+  uint8_t *b3 = output + (uint32_t)24U;
+  store64_le(b0, o0);
+  store64_le(b1, o1);
+  store64_le(b2, o2);
+  store64_le(b3, o3);
+}
+
+static void Hacl_EC_Format_fcontract(uint8_t *output, uint64_t *input)
+{
+  Hacl_EC_Format_fcontract_first_carry_full(input);
+  Hacl_EC_Format_fcontract_second_carry_full(input);
+  Hacl_EC_Format_fcontract_trim(input);
+  Hacl_EC_Format_fcontract_store(output, input);
+}
+
+static void Hacl_EC_Format_scalar_of_point(uint8_t *scalar, uint64_t *point)
+{
+  uint64_t *x = point;
+  uint64_t *z = point + (uint32_t)5U;
+  uint64_t buf[10U] = { 0U };
+  uint64_t *zmone = buf;
+  uint64_t *sc = buf + (uint32_t)5U;
+  Hacl_Bignum_crecip(zmone, z);
+  Hacl_Bignum_fmul(sc, x, zmone);
+  Hacl_EC_Format_fcontract(scalar, sc);
+}
+
+static void
+Hacl_EC_AddAndDouble_fmonty(
+  uint64_t *pp,
+  uint64_t *ppq,
+  uint64_t *p,
+  uint64_t *pq,
+  uint64_t *qmqp
+)
+{
+  uint64_t *qx = qmqp;
+  uint64_t *x2 = pp;
+  uint64_t *z2 = pp + (uint32_t)5U;
+  uint64_t *x3 = ppq;
+  uint64_t *z3 = ppq + (uint32_t)5U;
+  uint64_t *x = p;
+  uint64_t *z = p + (uint32_t)5U;
+  uint64_t *xprime = pq;
+  uint64_t *zprime = pq + (uint32_t)5U;
+  uint64_t buf[40U] = { 0U };
+  uint64_t *origx = buf;
+  uint64_t *origxprime0 = buf + (uint32_t)5U;
+  uint64_t *xxprime0 = buf + (uint32_t)25U;
+  uint64_t *zzprime0 = buf + (uint32_t)30U;
+  uint64_t *origxprime;
+  uint64_t *xx0;
+  uint64_t *zz0;
+  uint64_t *xxprime;
+  uint64_t *zzprime;
+  uint64_t *zzzprime;
+  uint64_t *zzz;
+  uint64_t *xx;
+  uint64_t *zz;
+  uint64_t scalar;
+  memcpy(origx, x, (uint32_t)5U * sizeof x[0U]);
+  Hacl_Bignum_fsum(x, z);
+  Hacl_Bignum_fdifference(z, origx);
+  memcpy(origxprime0, xprime, (uint32_t)5U * sizeof xprime[0U]);
+  Hacl_Bignum_fsum(xprime, zprime);
+  Hacl_Bignum_fdifference(zprime, origxprime0);
+  Hacl_Bignum_fmul(xxprime0, xprime, z);
+  Hacl_Bignum_fmul(zzprime0, x, zprime);
+  origxprime = buf + (uint32_t)5U;
+  xx0 = buf + (uint32_t)15U;
+  zz0 = buf + (uint32_t)20U;
+  xxprime = buf + (uint32_t)25U;
+  zzprime = buf + (uint32_t)30U;
+  zzzprime = buf + (uint32_t)35U;
+  memcpy(origxprime, xxprime, (uint32_t)5U * sizeof xxprime[0U]);
+  Hacl_Bignum_fsum(xxprime, zzprime);
+  Hacl_Bignum_fdifference(zzprime, origxprime);
+  Hacl_Bignum_Fsquare_fsquare_times(x3, xxprime, (uint32_t)1U);
+  Hacl_Bignum_Fsquare_fsquare_times(zzzprime, zzprime, (uint32_t)1U);
+  Hacl_Bignum_fmul(z3, zzzprime, qx);
+  Hacl_Bignum_Fsquare_fsquare_times(xx0, x, (uint32_t)1U);
+  Hacl_Bignum_Fsquare_fsquare_times(zz0, z, (uint32_t)1U);
+  zzz = buf + (uint32_t)10U;
+  xx = buf + (uint32_t)15U;
+  zz = buf + (uint32_t)20U;
+  Hacl_Bignum_fmul(x2, xx, zz);
+  Hacl_Bignum_fdifference(zz, xx);
+  scalar = (uint64_t)121665U;
+  Hacl_Bignum_fscalar(zzz, zz, scalar);
+  Hacl_Bignum_fsum(zzz, xx);
+  Hacl_Bignum_fmul(z2, zzz, zz);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(
+  uint64_t *nq,
+  uint64_t *nqpq,
+  uint64_t *nq2,
+  uint64_t *nqpq2,
+  uint64_t *q,
+  uint8_t byt
+)
+{
+  uint64_t bit0 = (uint64_t)(byt >> (uint32_t)7U);
+  uint64_t bit;
+  Hacl_EC_Point_swap_conditional(nq, nqpq, bit0);
+  Hacl_EC_AddAndDouble_fmonty(nq2, nqpq2, nq, nqpq, q);
+  bit = (uint64_t)(byt >> (uint32_t)7U);
+  Hacl_EC_Point_swap_conditional(nq2, nqpq2, bit);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(
+  uint64_t *nq,
+  uint64_t *nqpq,
+  uint64_t *nq2,
+  uint64_t *nqpq2,
+  uint64_t *q,
+  uint8_t byt
+)
+{
+  uint8_t byt1;
+  Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt);
+  byt1 = byt << (uint32_t)1U;
+  Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop(
+  uint64_t *nq,
+  uint64_t *nqpq,
+  uint64_t *nq2,
+  uint64_t *nqpq2,
+  uint64_t *q,
+  uint8_t byt,
+  uint32_t i
+)
+{
+  if (!(i == (uint32_t)0U))
+  {
+    uint32_t i_ = i - (uint32_t)1U;
+    uint8_t byt_;
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(nq, nqpq, nq2, nqpq2, q, byt);
+    byt_ = byt << (uint32_t)2U;
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byt_, i_);
+  }
+}
+
+static void
+Hacl_EC_Ladder_BigLoop_cmult_big_loop(
+  uint8_t *n1,
+  uint64_t *nq,
+  uint64_t *nqpq,
+  uint64_t *nq2,
+  uint64_t *nqpq2,
+  uint64_t *q,
+  uint32_t i
+)
+{
+  if (!(i == (uint32_t)0U))
+  {
+    uint32_t i1 = i - (uint32_t)1U;
+    uint8_t byte = n1[i1];
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byte, (uint32_t)4U);
+    Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, i1);
+  }
+}
+
+static void Hacl_EC_Ladder_cmult(uint64_t *result, uint8_t *n1, uint64_t *q)
+{
+  uint64_t point_buf[40U] = { 0U };
+  uint64_t *nq = point_buf;
+  uint64_t *nqpq = point_buf + (uint32_t)10U;
+  uint64_t *nq2 = point_buf + (uint32_t)20U;
+  uint64_t *nqpq2 = point_buf + (uint32_t)30U;
+  Hacl_EC_Point_copy(nqpq, q);
+  nq[0U] = (uint64_t)1U;
+  Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, (uint32_t)32U);
+  Hacl_EC_Point_copy(result, nq);
+}
+
+void Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint)
+{
+  uint64_t buf0[10U] = { 0U };
+  uint64_t *x0 = buf0;
+  uint64_t *z = buf0 + (uint32_t)5U;
+  uint64_t *q;
+  Hacl_EC_Format_fexpand(x0, basepoint);
+  z[0U] = (uint64_t)1U;
+  q = buf0;
+  {
+    uint8_t e[32U] = { 0U };
+    uint8_t e0;
+    uint8_t e31;
+    uint8_t e01;
+    uint8_t e311;
+    uint8_t e312;
+    uint8_t *scalar;
+    memcpy(e, secret, (uint32_t)32U * sizeof secret[0U]);
+    e0 = e[0U];
+    e31 = e[31U];
+    e01 = e0 & (uint8_t)248U;
+    e311 = e31 & (uint8_t)127U;
+    e312 = e311 | (uint8_t)64U;
+    e[0U] = e01;
+    e[31U] = e312;
+    scalar = e;
+    {
+      uint64_t buf[15U] = { 0U };
+      uint64_t *nq = buf;
+      uint64_t *x = nq;
+      x[0U] = (uint64_t)1U;
+      Hacl_EC_Ladder_cmult(nq, scalar, q);
+      Hacl_EC_Format_scalar_of_point(mypublic, nq);
+    }
+  }
+}
diff --git a/third_party/mbedtls/test/secp384r1_test.c b/third_party/mbedtls/test/secp384r1_test.c
new file mode 100644
index 000000000..68de75ce7
--- /dev/null
+++ b/third_party/mbedtls/test/secp384r1_test.c
@@ -0,0 +1,294 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2021 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/rand/rand.h"
+#include "libc/stdio/stdio.h"
+#include "libc/str/str.h"
+#include "libc/testlib/ezbench.h"
+#include "libc/testlib/testlib.h"
+#include "third_party/mbedtls/bignum.h"
+#include "third_party/mbedtls/ecp.h"
+#include "third_party/mbedtls/ecp_internal.h"
+#include "third_party/mbedtls/math.h"
+#ifdef MBEDTLS_ECP_C
+
+int ecp_mod_p384_old(mbedtls_mpi *);
+
+int GetEntropy(void *c, unsigned char *p, size_t n) {
+  rngset(p, n, rand64, -1);
+  return 0;
+}
+
+TEST(secp384r1, testIsTheSame) {
+  int i;
+  mbedtls_mpi A, B;
+  mbedtls_mpi_init(&A);
+  mbedtls_mpi_init(&B);
+  mbedtls_mpi_fill_random(&A, 12 * 8, GetEntropy, 0);
+  mbedtls_mpi_copy(&B, &A);
+  ecp_mod_p384(&A);
+  ecp_mod_p384_old(&B);
+  for (i = 0; i < 1000; ++i) {
+    if (memcmp(A.p, B.p, 12 * 8)) {
+      for (i = 0; i < 12; ++i) {
+        printf("0x%016lx vs. 0x%016lx %d\n", A.p[i], B.p[i], A.p[i] == B.p[i]);
+      }
+      exit(1);
+    }
+  }
+  mbedtls_mpi_free(&B);
+  mbedtls_mpi_free(&A);
+}
+
+static inline bool mbedtls_p384_gte(uint64_t p[7]) {
+  return (((int64_t)p[6] > 0 ||
+           (p[5] > 0xffffffffffffffff ||
+            (p[5] == 0xffffffffffffffff &&
+             (p[4] > 0xffffffffffffffff ||
+              (p[4] == 0xffffffffffffffff &&
+               (p[3] > 0xffffffffffffffff ||
+                (p[3] == 0xffffffffffffffff &&
+                 (p[2] > 0xfffffffffffffffe ||
+                  (p[2] == 0xfffffffffffffffe &&
+                   (p[1] > 0xffffffff00000000 ||
+                    (p[1] == 0xffffffff00000000 &&
+                     (p[0] > 0x00000000ffffffff ||
+                      (p[0] == 0x00000000ffffffff))))))))))))));
+}
+
+static inline void mbedtls_p384_gro(uint64_t p[7]) {
+#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
+  asm("addq\t%1,%0\n\t"
+      "adcq\t%2,8+%0\n\t"
+      "adcq\t%3,16+%0\n\t"
+      "adcq\t%4,24+%0\n\t"
+      "adcq\t%4,32+%0\n\t"
+      "adcq\t%4,40+%0\n\t"
+      "adcq\t$0,48+%0"
+      : "+o"(*p)
+      : "r"(0x00000000ffffffffl), "r"(0xffffffff00000000),
+        "i"(0xfffffffffffffffel), "i"(0xffffffffffffffff)
+      : "memory", "cc");
+#else
+  uint64_t c;
+  ADC(p[0], p[0], 0x00000000ffffffff, 0, c);
+  ADC(p[1], p[1], 0xffffffff00000000, c, c);
+  ADC(p[2], p[2], 0xfffffffffffffffe, c, c);
+  ADC(p[3], p[3], 0xffffffffffffffff, c, c);
+  ADC(p[4], p[4], 0xffffffffffffffff, c, c);
+  ADC(p[5], p[5], 0xffffffffffffffff, c, c);
+  ADC(p[6], p[6], 0, c, c);
+#endif
+}
+
+static inline void mbedtls_p384_red(uint64_t p[7]) {
+#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
+  asm("subq\t%1,%0\n\t"
+      "sbbq\t%2,8+%0\n\t"
+      "sbbq\t%3,16+%0\n\t"
+      "sbbq\t%4,24+%0\n\t"
+      "sbbq\t%4,32+%0\n\t"
+      "sbbq\t%4,40+%0\n\t"
+      "sbbq\t$0,48+%0"
+      : "+o"(*p)
+      : "r"(0x00000000ffffffffl), "r"(0xffffffff00000000),
+        "i"(0xfffffffffffffffel), "i"(0xffffffffffffffff)
+      : "memory", "cc");
+#else
+  uint64_t c;
+  SBB(p[0], p[0], 0x00000000ffffffff, 0, c);
+  SBB(p[1], p[1], 0xffffffff00000000, c, c);
+  SBB(p[2], p[2], 0xfffffffffffffffe, c, c);
+  SBB(p[3], p[3], 0xffffffffffffffff, c, c);
+  SBB(p[4], p[4], 0xffffffffffffffff, c, c);
+  SBB(p[5], p[5], 0xffffffffffffffff, c, c);
+  SBB(p[6], p[6], 0, c, c);
+#endif
+}
+
+static inline void mbedtls_p384_rum(uint64_t p[7]) {
+  while (mbedtls_p384_gte(p)) mbedtls_p384_red(p);
+}
+
+static inline void mbedtls_p384_mod(uint64_t X[12]) {
+  secp384r1(X);
+  if ((int64_t)X[6] < 0) {
+    do {
+      mbedtls_p384_gro(X);
+    } while ((int64_t)X[6] < 0);
+  } else {
+    while (mbedtls_p384_gte(X)) {
+      mbedtls_p384_red(X);
+    }
+  }
+}
+
+TEST(secp384r1, needsDownwardCorrection) {
+  int i;
+  uint64_t P[6] = {
+      0x00000000ffffffff,  //
+      0xffffffff00000000,  //
+      0xfffffffffffffffe,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+  };
+  uint64_t X[12] = {
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+  };
+  uint64_t W[12] /* == X mod P */ = {
+      0xfffffffe00000000,  //
+      0x0000000200000000,  //
+      0xfffffffe00000000,  //
+      0x0000000200000000,  //
+      0x0000000000000001,  //
+  };
+  mbedtls_p384_mod(X);
+  if (memcmp(W, X, 12 * 8)) {
+    for (i = 0; i < 12; ++i) {
+      printf("0x%016lx vs. 0x%016lx %d\n", W[i], X[i], W[i] == X[i]);
+    }
+    exit(1);
+  }
+}
+
+TEST(secp384r1, needsUpwardCorrection) {
+  int i;
+  uint64_t P[6] = {
+      0x00000000ffffffff,  //
+      0xffffffff00000000,  //
+      0xfffffffffffffffe,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+      0xffffffffffffffff,  //
+  };
+  uint64_t X[12] = {
+      0x0000000000000000,  //
+      0x0000000000000000,  //
+      0x0000000000000000,  //
+      0x0000000000000000,  //
+      0x0000000000000000,  //
+      0x0000000000000000,  //
+      0x0000000000000000,  //
+      0x0000000000000000,  //
+      0x0000000000000000,  //
+      0x0000000000000000,  //
+      0x0000000000000000,  //
+      0x00000000ffffffff,  //
+  };
+  uint64_t W[12] /* == X mod P */ = {
+      0xffffffffffffffff,  //
+      0x0000000000000000,  //
+      0xfffffffefffffffd,  //
+      0x0000000100000000,  //
+      0x0000000000000000,  //
+      0x00000001ffffffff,  //
+  };
+  mbedtls_p384_mod(X);
+  if (memcmp(W, X, 12 * 8)) {
+    for (i = 0; i < 12; ++i) {
+      printf("0x%016lx vs. 0x%016lx %d\n", W[i], X[i], W[i] == X[i]);
+    }
+    exit(1);
+  }
+}
+
+BENCH(secp384r1, bench) {
+  mbedtls_mpi A;
+  mbedtls_mpi_init(&A);
+  mbedtls_mpi_fill_random(&A, 12 * 8, GetEntropy, 0);
+  EZBENCH2("secp384r1", donothing, secp384r1(A.p));
+  EZBENCH2("ecp_mod_p384", donothing, ecp_mod_p384(&A));
+  EZBENCH2("ecp_mod_p384_old", donothing, ecp_mod_p384_old(&A));
+  mbedtls_mpi_free(&A);
+}
+
+void mbedtls_p384_shl_a(uint64_t p[7]) {
+  asm("shlq\t%0\n\t"
+      "rclq\t8+%0\n\t"
+      "rclq\t16+%0\n\t"
+      "rclq\t24+%0\n\t"
+      "rclq\t32+%0\n\t"
+      "rclq\t40+%0\n\t"
+      "rclq\t48+%0\n\t"
+      : "+o"(*p)
+      : /* no inputs */
+      : "memory", "cc");
+  mbedtls_p384_rum(p);
+}
+
+void mbedtls_p384_shl_b(uint64_t p[7]) {
+  p[6] = p[5] >> 63;
+  p[5] = p[5] << 1 | p[4] >> 63;
+  p[4] = p[4] << 1 | p[3] >> 63;
+  p[3] = p[3] << 1 | p[2] >> 63;
+  p[2] = p[2] << 1 | p[1] >> 63;
+  p[1] = p[1] << 1 | p[0] >> 63;
+  p[0] = p[0] << 1;
+  mbedtls_p384_rum(p);
+}
+
+BENCH(shl, bench) {
+  uint64_t A[7] = {0};
+  EZBENCH2("mbedtls_p384_shl_a", donothing, mbedtls_p384_shl_a(A));
+  EZBENCH2("mbedtls_p384_shl_b", donothing, mbedtls_p384_shl_b(A));
+}
+
+void mbedtls_p384_red_a(uint64_t p[7]) {
+  asm("subq\t%1,%0\n\t"
+      "sbbq\t%2,8+%0\n\t"
+      "sbbq\t%3,16+%0\n\t"
+      "sbbq\t%4,24+%0\n\t"
+      "sbbq\t%4,32+%0\n\t"
+      "sbbq\t%4,40+%0\n\t"
+      "sbbq\t$0,48+%0"
+      : "+o"(*p)
+      : "r"(0x00000000ffffffffl), "r"(0xffffffff00000000),
+        "i"(0xfffffffffffffffel), "i"(0xffffffffffffffff)
+      : "memory", "cc");
+}
+
+void mbedtls_p384_red_b(uint64_t p[7]) {
+  uint64_t c;
+  SBB(p[0], p[0], 0x00000000ffffffff, 0, c);
+  SBB(p[1], p[1], 0xffffffff00000000, c, c);
+  SBB(p[2], p[2], 0xfffffffffffffffe, c, c);
+  SBB(p[3], p[3], 0xffffffffffffffff, c, c);
+  SBB(p[4], p[4], 0xffffffffffffffff, c, c);
+  SBB(p[5], p[5], 0xffffffffffffffff, c, c);
+  SBB(p[6], p[6], 0, c, c);
+}
+
+BENCH(red, bench) {
+  uint64_t A[7] = {0};
+  EZBENCH2("mbedtls_p384_red_a", donothing, mbedtls_p384_red_a(A));
+  EZBENCH2("mbedtls_p384_red_b", donothing, mbedtls_p384_red_b(A));
+}
+
+#endif /* MBEDTLS_ECP_C */
diff --git a/third_party/mbedtls/test/test.mk b/third_party/mbedtls/test/test.mk
index 51b64adc3..4e492e171 100644
--- a/third_party/mbedtls/test/test.mk
+++ b/third_party/mbedtls/test/test.mk
@@ -78,7 +78,9 @@ THIRD_PARTY_MBEDTLS_TEST_COMS =											\
 	o/$(MODE)/third_party/mbedtls/test/test_suite_timing.com						\
 	o/$(MODE)/third_party/mbedtls/test/test_suite_version.com						\
 	o/$(MODE)/third_party/mbedtls/test/test_suite_x509parse.com						\
-	o/$(MODE)/third_party/mbedtls/test/test_suite_x509write.com
+	o/$(MODE)/third_party/mbedtls/test/test_suite_x509write.com						\
+	o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com							\
+	o/$(MODE)/third_party/mbedtls/test/everest_test.com
 
 THIRD_PARTY_MBEDTLS_TEST_TESTS =										\
 	$(THIRD_PARTY_MBEDTLS_TEST_COMS:%=%.ok)
@@ -1340,3 +1342,22 @@ o/$(MODE)/third_party/mbedtls/test/test_suite_x509write.com.dbg:						\
 		$(CRT)												\
 		$(APE)
 	@$(APELINK)
+
+o/$(MODE)/third_party/mbedtls/test/everest_test.com: o/$(MODE)/third_party/mbedtls/test/everest_test.com.dbg
+o/$(MODE)/third_party/mbedtls/test/everest_test.com.dbg:							\
+		$(THIRD_PARTY_MBEDTLS_TEST_DEPS)								\
+		o/$(MODE)/third_party/mbedtls/test/everest_test.o						\
+		o/$(MODE)/third_party/mbedtls/test/everest_unravaged.o						\
+		$(LIBC_TESTMAIN)										\
+		$(CRT)												\
+		$(APE)
+	@$(APELINK)
+
+o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com: o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com.dbg
+o/$(MODE)/third_party/mbedtls/test/secp384r1_test.com.dbg:							\
+		$(THIRD_PARTY_MBEDTLS_TEST_DEPS)								\
+		o/$(MODE)/third_party/mbedtls/test/secp384r1_test.o						\
+		$(LIBC_TESTMAIN)										\
+		$(CRT)												\
+		$(APE)
+	@$(APELINK)