diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
index 3a7cc4ece..2544b3cf8 100644
--- a/.vscode/c_cpp_properties.json
+++ b/.vscode/c_cpp_properties.json
@@ -48,11 +48,10 @@
 				"notpossible=",
 				"thatispacked=",
 				"dontthrow=",
-				"nocallback=",
+				"dontcallback=",
 				"relegated=",
 				"hidden=",
 				"textstartup=",
-				"initarray=",
 				"returnsnonnull=",
 				"returnspointerwithnoaliases=",
 				"printfesque(x)=",
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1db711677..304c882a9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -16,10 +16,25 @@ contributors who prefer to remain anonymous to the public.
 
 The first time you send a pull request, you need to send an email to
 Justine Tunney <jtunney@gmail.com> stating that you intend to assign her
-the copyright to the changes you contribute to Cosmopolitan. This only
-applies to the code you *choose* to contribute. It only has to happen
-once. The email should be sent from an email address associated with
-your identity. Your email should link to your pull request.
+the copyright to the changes you contribute to Cosmopolitan. It only
+needs to happen once. This only applies to the code you *choose* to
+contribute. The email should be sent from an email address associated
+with your identity. Your email should link to your pull request.
+
+To make things easy, here's an example of a good email you can use:
+
+> **From**: YOUR NAME (yname@gmail.com)  
+> **To**: Justine Tunney (jtunney@gmail.com)  
+> **Subject**: Cosmopolitan Copyright Assignment for YOUR NAME
+>
+> Hi Justine,
+>
+> I made my first contribution to Cosmopolitan in
+> https://github.com/jart/cosmopolitan/pull/XXXX could you please take a
+> look? I intend to assign you the copyright to the changes I contribute
+> to Cosmopolitan.
+>
+> Thanks!
 
 Please note that in order to give Justine the copyright, it has to be
 yours to give in the first place. If you're employed, then you should
@@ -38,6 +53,27 @@ owners and the code should go in the `third_party/` folder. Every third
 party project should have a `README.cosmo` file that documents its
 provenance as well as any local changes you've made.
 
+## Copyright Policy Exceptions
+
+### Tests
+
+You're encoraged to claim ownership of your test code. If you add a new
+file under the `test/` directory, then you should put your name in the
+ISC license header at the top of the file. If you add new test cases to
+an existing unit test file, then you're encouraged to append a line with
+your name to the existing copyright header of that file.
+
+### Exceptional Features
+
+Let's say you discovered a faster better way to implement `log10()` and
+you want to give it to Cosmopolitan. In cases like this, it really isn't
+appropriate for Justine to own your code. What you could do instead, is
+write your own new and improved `log10.c` from scratch, put your name on
+the top with the ISC license, and then add a `__notice()` directive so
+that your name will be embedded inside every executable that links the
+`log10()` function. This will help you get your name out there. Please
+note you need get approval from Justine each time you want to do this.
+
 ## Style Guide
 
 You can use clang-format to automatically format your files:
@@ -47,34 +83,4 @@ clang-format -i -style=file tool/net/redbean.c
 ```
 
 If you use Emacs this can be automated on save for Cosmopolitan using
-[tool/emacs/cosmo-format.el]([tool/emacs/cosmo-format.el]).
-
-### Source Files
-
-- Must use include paths relative to the root of the repository
-- Must have comment at top of file documenting copyright and license
-- Must have notice embedding if not owned by Justine (exception: tests)
-- May use language extensions that are supported by both GCC and Clang
-- Should use Google indentation (otherwise use `/* clang-format off */`)
-- Should use asm() instead of compiler APIs (exception: ctz, clz, memcpy)
-
-### Header Files
-
-- Must not have copyright or license comments
-- Must have once guards (otherwise change `.h` to `.inc`)
-- Must be ANSI C89 compatible to be included in the amalgamation header
-- Must include its dependencies (exception: libc/integral/normalize.inc)
-- Must not define objects (i.e. `cc -c -xc foo.h` will produce empty `.o`)
-- Should not use typedefs
-- Should not use forward declarations
-- Should not include documentation comments
-- Should not include parameter names in prototypes
-- Should not pose problems if included by C++ or Assembly sources
-- Should not declare non-ANSI code, at all, when the user requests ANSI
-
-### Build Config
-
-- Must not write files outside `o/`
-- Must not communicate with Internet
-- Must not depend on system libraries
-- Must not depend on system commands (exception: sh, make, gzip, zip)
+[tool/emacs/cosmo-format.el](tool/emacs/cosmo-format.el).
diff --git a/Makefile b/Makefile
index 46e0b69d9..18afff34c 100644
--- a/Makefile
+++ b/Makefile
@@ -101,7 +101,6 @@ XARGS ?= xargs -P4 -rs8000
 DOT ?= dot
 CLANG = clang
 TMPDIR = o/tmp
-
 AR = build/bootstrap/ar.com
 CP = build/bootstrap/cp.com
 RM = build/bootstrap/rm.com -f
@@ -137,7 +136,7 @@ ARCH = aarch64
 HOSTS ?= pi studio freebsdarm
 else
 ARCH = x86_64
-HOSTS ?= freebsd rhel7 xnu win10 openbsd netbsd
+HOSTS ?= freebsd rhel7 xnu openbsd netbsd win10
 endif
 
 ZIPOBJ_FLAGS += -a$(ARCH)
@@ -150,9 +149,9 @@ export MODE
 export SOURCE_DATE_EPOCH
 export TMPDIR
 
-COSMOCC = .cosmocc/3.2
+COSMOCC = .cosmocc/3.3.2
 TOOLCHAIN = $(COSMOCC)/bin/$(ARCH)-linux-cosmo-
-DOWNLOAD := $(shell build/download-cosmocc.sh $(COSMOCC) 3.2 28b48682595f0f46b45ab381118cdffdabc8fcfa29aa54e301fe6ffe35269f5e)
+DOWNLOAD := $(shell build/download-cosmocc.sh $(COSMOCC) 3.3.2 a695012ffbeac5e26e3c4a740debc15273f47e9a8bdc55e8b76a623154d5914b)
 
 AS = $(TOOLCHAIN)as
 CC = $(TOOLCHAIN)gcc
@@ -207,9 +206,8 @@ endif
 .UNVEIL +=					\
 	libc/integral				\
 	libc/stdbool.h				\
-	libc/disclaimer.inc			\
 	rwc:/dev/shm				\
-	rx:cosmocc				\
+	rx:.cosmocc				\
 	rx:build/bootstrap			\
 	r:build/portcosmo.h			\
 	/proc/stat				\
@@ -299,6 +297,7 @@ include third_party/nsync/testing/BUILD.mk
 include libc/testlib/BUILD.mk
 include tool/viz/lib/BUILD.mk
 include tool/args/BUILD.mk
+include test/math/BUILD.mk
 include test/posix/BUILD.mk
 include test/libcxx/BUILD.mk
 include test/tool/args/BUILD.mk
@@ -481,6 +480,7 @@ COSMOPOLITAN_OBJECTS =			\
 	LIBC_STR			\
 	LIBC_SYSV			\
 	LIBC_INTRIN			\
+	LIBC_NT_BCRYPTPRIMITIVES	\
 	LIBC_NT_KERNEL32		\
 	LIBC_NEXGEN32E
 
@@ -556,9 +556,9 @@ o/cosmopolitan.html: private .UNSANDBOXED = 1
 o/cosmopolitan.html:							\
 		o/$(MODE)/third_party/chibicc/chibicc.com.dbg		\
 		$(filter-out %.s,$(foreach x,$(COSMOPOLITAN_OBJECTS),$($(x)_SRCS)))	\
-		$(filter-out %.cc,$(SRCS))				\
+		$(filter-out %.cpp,$(filter-out %.cc,$(SRCS)))				\
 		$(HDRS)
-	$(file >$(TMPDIR)/$(subst /,_,$@),$(filter-out %.cc,$(filter-out %.s,$(foreach x,$(COSMOPOLITAN_OBJECTS),$($(x)_SRCS)))))
+	$(file >$(TMPDIR)/$(subst /,_,$@),$(filter-out %.cpp,$(filter-out %.cc,$(filter-out %.s,$(foreach x,$(COSMOPOLITAN_OBJECTS),$($(x)_SRCS))))))
 	o/$(MODE)/third_party/chibicc/chibicc.com.dbg -J		\
 		-fno-common -include libc/integral/normalize.inc -o $@	\
 		-DCOSMO @$(TMPDIR)/$(subst /,_,$@)
diff --git a/ape/BUILD.mk b/ape/BUILD.mk
index 106504d02..afddd13b6 100644
--- a/ape/BUILD.mk
+++ b/ape/BUILD.mk
@@ -157,7 +157,6 @@ o/$(MODE)/ape/ape-no-modify-self.o:		\
 		ape/ape.S			\
 		ape/ape.h			\
 		ape/macros.internal.h		\
-		ape/notice.inc			\
 		ape/relocations.h		\
 		ape/ape.internal.h		\
 		libc/dce.h			\
@@ -185,7 +184,6 @@ o/$(MODE)/ape/ape-copy-self.o:			\
 		ape/ape.S			\
 		ape/ape.h			\
 		ape/macros.internal.h		\
-		ape/notice.inc			\
 		ape/relocations.h		\
 		ape/ape.internal.h		\
 		libc/dce.h			\
diff --git a/ape/aarch64.lds b/ape/aarch64.lds
index 9866ac748..cec91ae89 100644
--- a/ape/aarch64.lds
+++ b/ape/aarch64.lds
@@ -89,10 +89,12 @@ SECTIONS {
     *(.ubsan.data)
   }
 
-  .comment : {
-    __comment_start = .;
-    KEEP(*(.comment))
+  .notice : {
+    __notices = .;
+    KEEP(*(.notice))
     BYTE(0);
+    BYTE(10);
+    BYTE(10);
   }
 
   .eh_frame_hdr : {
@@ -157,8 +159,11 @@ SECTIONS {
 
   .init_array : {
     __init_array_start = .;
-    KEEP(*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
-    KEEP(*(.init_array EXCLUDE_FILE(*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
+    KEEP(*(.preinit_array))
+    KEEP(*(SORT_BY_INIT_PRIORITY(.init_array.*)
+           SORT_BY_INIT_PRIORITY(.ctors.*)))
+    KEEP(*(.init_array))
+    KEEP(*(.ctors))
     __init_array_end = .;
   }
 
diff --git a/ape/ape-m1.c b/ape/ape-m1.c
index 82c98feaa..1afd1edb0 100644
--- a/ape/ape-m1.c
+++ b/ape/ape-m1.c
@@ -39,7 +39,7 @@
 /* maximum path size that cosmo can take */
 #define PATHSIZE       (PATH_MAX < 1024 ? PATH_MAX : 1024)
 #define SYSLIB_MAGIC   ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24)
-#define SYSLIB_VERSION 8
+#define SYSLIB_VERSION 9 /* sync with libc/runtime/syslib.internal.h */
 
 struct Syslib {
   int magic;
@@ -96,11 +96,16 @@ struct Syslib {
   long (*sem_trywait)(int *);
   long (*getrlimit)(int, struct rlimit *);
   long (*setrlimit)(int, const struct rlimit *);
-  // v6 (2023-11-03)
+  /* v6 (2023-11-03) */
   void *(*dlopen)(const char *, int);
   void *(*dlsym)(void *, const char *);
   int (*dlclose)(void *);
   char *(*dlerror)(void);
+  /* MANDATORY (cosmo runtime won't load if version < 8)
+     ---------------------------------------------------
+     OPTIONAL (cosmo lib should check __syslib->version) */
+  /* v9 (2024-01-31) */
+  int (*pthread_cpu_number_np)(size_t *);
 };
 
 #define ELFCLASS32                  1
@@ -660,9 +665,9 @@ __attribute__((__noreturn__)) static void Spawn(const char *exe, int fd,
       size = (p[i].p_vaddr & (pagesz - 1)) + p[i].p_filesz;
       if (prot1 & PROT_EXEC) {
 #ifdef SIP_DISABLED
-        // if sip is disabled then we can load the executable segments
-        // off the binary into memory without needing to copy anything
-        // which provides considerably better performance for building
+        /* if sip is disabled then we can load the executable segments
+           off the binary into memory without needing to copy anything
+           which provides considerably better performance for building */
         rc = sys_mmap(addr, size, prot1, flags, fd, p[i].p_offset & -pagesz);
         if (rc < 0) {
           if (rc == -EPERM) {
@@ -674,12 +679,12 @@ __attribute__((__noreturn__)) static void Spawn(const char *exe, int fd,
           }
         }
 #else
-        // the issue is that if sip is enabled then, attempting to map
-        // it with exec permission will cause xnu to phone home a hash
-        // of the entire file to apple intelligence as a one time cost
-        // which is literally minutes for executables holding big data
-        // since there's no public apple api for detecting sip we read
-        // as the default strategy which is slow but it works for both
+        /* the issue is that if sip is enabled then, attempting to map
+           it with exec permission will cause xnu to phone home a hash
+           of the entire file to apple intelligence as a one time cost
+           which is literally minutes for executables holding big data
+           since there's no public apple api for detecting sip we read
+           as the default strategy which is slow but it works for both */
         rc = sys_mmap(addr, size, (prot1 = PROT_READ | PROT_WRITE),
                       MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
         if (rc < 0) Pexit(exe, rc, "prog mmap anon");
@@ -812,12 +817,10 @@ static const char *TryElf(struct ApeLoader *M, union ElfEhdrBuf *ebuf,
     }
   }
 
-  /*
-   * merge adjacent loads that are contiguous with equal protection,
-   * which prevents our program header overlap check from needlessly
-   * failing later on; it also shaves away a microsecond of latency,
-   * since every program header requires invoking at least 1 syscall
-   */
+  /* merge adjacent loads that are contiguous with equal protection,
+     which prevents our program header overlap check from needlessly
+     failing later on; it also shaves away a microsecond of latency,
+     since every program header requires invoking at least 1 syscall */
   for (i = 0; i + 1 < e->e_phnum;) {
     if (p[i].p_type == PT_LOAD && p[i + 1].p_type == PT_LOAD &&
         ((p[i].p_flags & (PF_R | PF_W | PF_X)) ==
@@ -944,6 +947,7 @@ int main(int argc, char **argv, char **envp) {
   M->lib.dlsym = dlsym;
   M->lib.dlclose = dlclose;
   M->lib.dlerror = dlerror;
+  M->lib.pthread_cpu_number_np = pthread_cpu_number_np;
 
   /* getenv("_") is close enough to at_execfn */
   execfn = 0;
diff --git a/ape/ape.S b/ape/ape.S
index 80e788570..eaf0bfaab 100644
--- a/ape/ape.S
+++ b/ape/ape.S
@@ -33,7 +33,6 @@
 │ αcτµαlly pδrταblε εxεcµταblε § program header                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "ape/macros.internal.h"
-#include "ape/notice.inc"
 #include "ape/relocations.h"
 #include "libc/calls/metalfile.internal.h"
 #include "libc/dce.h"
@@ -1772,49 +1771,31 @@ kernel:	movabs	$ape_stack_vaddr,%rsp
 	.type	ape_text_nops,@object
 	.type	__test_end,@object
 
-	.section .commentprologue,"a",@progbits
-	.globl	__comment_start
-	.type	__comment_start,@object
-	.hidden	__comment_start
-__comment_start:/*
-	...
-	decentralized content
-	...
-	*/.previous
-	.section .commentepilogue,"a",@progbits
-	.byte	0
-	.previous
-
 	.section .ape.pad.head,"a",@progbits
 	.type	ape_pad_head,@object
 	.hidden	ape_pad_head
 ape_pad_head:
-	.previous
 
 	.section .ape.pad.text,"a",@progbits
 	.type	ape_pad_text,@object
 	.hidden	ape_pad_text
 ape_pad_text:
-	.previous
 
 	.section .ape.pad.privileged,"a",@progbits
 	.type	ape_pad_privileged,@object
 	.hidden	ape_pad_privileged
 ape_pad_privileged:
-	.previous
 
 	.section .ape.pad.data,"a",@progbits
 	.type	ape_pad_data,@object
 	.hidden	ape_pad_data
 ape_pad_data:
-	.previous
 
 #if SupportsWindows()
 	.section .idata.ro,"a",@progbits
 	.type	ape_idata_ro,@object
 	.hidden	ape_idata_ro
 ape_idata_ro:
-	.previous
 #endif /* SupportsWindows() */
 
 	.section .dataprologue,"aw",@progbits
@@ -1822,32 +1803,45 @@ ape_idata_ro:
 	.globl	__data_start
 	.hidden	__data_start
 __data_start:
-	.previous
 
 	.section .dataepilogue,"aw",@progbits
 	.type	__data_end,@object
 	.globl	__data_end
 	.hidden	__data_end
 __data_end:
-	.previous
 
 	.section .bssprologue,"aw",@nobits
 	.type	__bss_start,@object
 	.globl	__bss_start
 	.hidden	__bss_start
 __bss_start:
-	.previous
 
 	.section .bssepilogue,"aw",@nobits
 	.type	__bss_end,@object
 	.globl	__bss_end
 	.hidden	__bss_end
 __bss_end:
-	.previous
 
 	.section .fstls,"awT",@nobits
 	.align	TLS_ALIGNMENT
-	.previous
+
+	.section .notice,"aR",@progbits
+	.asciz	"\n\n\
+Cosmopolitan\n\
+Copyright 2024 Justine Alexandra Roberts Tunney\n\
+\n\
+Permission to use, copy, modify, and/or distribute this software for\n\
+any purpose with or without fee is hereby granted, provided that the\n\
+above copyright notice and this permission notice appear in all copies.\n\
+\n\
+THE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL\n\
+WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED\n\
+WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE\n\
+AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL\n\
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR\n\
+PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER\n\
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR\n\
+PERFORMANCE OF THIS SOFTWARE."
 
 .end
 
\ No newline at end of file
diff --git a/ape/ape.lds b/ape/ape.lds
index 288d798d4..9b1d40e5a 100644
--- a/ape/ape.lds
+++ b/ape/ape.lds
@@ -282,12 +282,9 @@ SECTIONS {
     KEEP(*(SORT_BY_NAME(.init.*)))
     KEEP(*(.init))
     KEEP(*(.initepilogue))
-    KEEP(*(.pltprologue))
     *(.plt)
-    KEEP(*(.pltepilogue))
-    KEEP(*(.pltgotprologue))
     *(.plt.got)
-    KEEP(*(.pltgotepilogue))
+    *(.iplt)
     *(.text.startup .text.startup.*)
     *(.text.exit .text.exit.*)
     *(.text.unlikely .text.*_unlikely .text.unlikely.*)
@@ -323,7 +320,7 @@ SECTIONS {
 
 /*BEGIN: Read Only Data */
 
-  .rodata . : {
+  .rodata ALIGN(CONSTANT(COMMONPAGESIZE)) : {
     KEEP(*(.rodata.pytab.0));
     KEEP(*(.rodata.pytab.1));
     KEEP(*(.rodata.pytab.2));
@@ -333,11 +330,11 @@ SECTIONS {
     *(.ubsan.data)
 
     /* Legal Notices */
-#if !defined(IM_FEELING_NAUGHTY) || defined(EMBED_NOTICES)
-    KEEP(*(.commentprologue))
-    KEEP(*(.comment))
-    KEEP(*(.commentepilogue))
-#endif
+    __notices = .;
+    KEEP(*(.notice))
+    BYTE(0);
+    BYTE(10);
+    BYTE(10);
 
 /*BEGIN: read-only data that's only needed for initialization */
 
@@ -393,26 +390,28 @@ SECTIONS {
 /*BEGIN: NT FORK COPYING */
     KEEP(*(.dataprologue))
     *(.data .data.*)
+    *(.gnu_extab)
+    *(.gcc_except_table .gcc_except_table.*)
+    *(.exception_ranges*)
     *(.PyRuntime) /* for python */
     *(.subrs) /* for emacs */
     KEEP(*(SORT_BY_NAME(.sort.data.*)))
     . += . > 0 ? CODE_GRANULE : 0;
 
-    KEEP(*(.gotprologue))
+    . = ALIGN(. != 0 ? __SIZEOF_POINTER__ : 0);
+    __got_start = .;
     *(.got)
-    KEEP(*(.gotepilogue))
+    __got_end = .;
 
-    KEEP(*(.gotpltprologue))
     *(.got.plt)
-    KEEP(*(.gotpltepilogue))
 
     . = ALIGN(. != 0 ? __SIZEOF_POINTER__ : 0);
     __init_array_start = .;
+    KEEP(*(.preinit_array))
     KEEP(*(SORT_BY_INIT_PRIORITY(.init_array.*)
            SORT_BY_INIT_PRIORITY(.ctors.*)))
-    KEEP(*(.ctors))
     KEEP(*(.init_array))
-    KEEP(*(.preinit_array))
+    KEEP(*(.ctors))
     __init_array_end = .;
 
     . = ALIGN(. != 0 ? __SIZEOF_POINTER__ : 0);
@@ -429,7 +428,9 @@ SECTIONS {
     . = ALIGN(. != 0 ? __SIZEOF_POINTER__ : 0);
     KEEP(*(SORT_BY_NAME(.piro.data.sort.*)))
     KEEP(*(.piro.pad.data))
+    *(.igot.plt)
     KEEP(*(.dataepilogue))
+
     . = ALIGN(. != 0 ? CONSTANT(COMMONPAGESIZE) : 0);
 /*END: NT FORK COPYING */
     _edata = .;
@@ -527,7 +528,9 @@ SECTIONS {
     *(.piro.data.sort.iat.*)
 #endif
     *(__patchable_function_entries)
+    *(.note.gnu.property)
     *(__mcount_loc)
+    *(.rela.dyn)
     *(.discard)
     *(.yoink)
   }
diff --git a/ape/loader.c b/ape/loader.c
index 87822315e..752d5ef6e 100644
--- a/ape/loader.c
+++ b/ape/loader.c
@@ -166,13 +166,6 @@
    (unsigned long)(255 & (S)[1]) << 010 | \
    (unsigned long)(255 & (S)[0]) << 000)
 
-#define DEBUG(VAR)                          \
-  {                                         \
-    char ibuf[19] = {0};                    \
-    Utox(ibuf, VAR);                        \
-    Print(os, 2, ibuf, " " #VAR, "\n", 0l); \
-  }
-
 struct ElfEhdr {
   unsigned char e_ident[16];
   unsigned short e_type;
@@ -340,23 +333,6 @@ static char *GetEnv(char **p, const char *s) {
   return 0;
 }
 
-static char *Utox(char p[19], unsigned long x) {
-  int i;
-  if (x) {
-    *p++ = '0';
-    *p++ = 'x';
-    i = (__builtin_clzl(x) ^ (sizeof(long) * 8 - 1)) + 1;
-    i = (i + 3) & -4;
-    do {
-      *p++ = "0123456789abcdef"[(x >> (i -= 4)) & 15];
-    } while (i);
-  } else {
-    *p++ = '0';
-  }
-  *p = 0;
-  return p;
-}
-
 static char *Utoa(char p[20], unsigned long x) {
   char t;
   unsigned long i, a, b;
@@ -534,6 +510,53 @@ static long Print(int os, int fd, const char *s, ...) {
   return Write(fd, b, n, os);
 }
 
+static long Printf(int os, int fd, const char *fmt, ...) {
+  int i;
+  char c;
+  int k = 0;
+  unsigned u;
+  char b[512];
+  const char *s;
+  unsigned long d;
+  __builtin_va_list va;
+  __builtin_va_start(va, fmt);
+  for (;;) {
+    switch ((c = *fmt++)) {
+      case '\0':
+        __builtin_va_end(va);
+        return Write(fd, b, k, os);
+      case '%':
+        switch ((c = *fmt++)) {
+          case 's':
+            for (s = __builtin_va_arg(va, const char *); s && *s; ++s) {
+              if (k < 512) b[k++] = *s;
+            }
+            break;
+          case 'd':
+            d = __builtin_va_arg(va, unsigned long);
+            for (i = 16; i--;) {
+              u = (d >> (i * 4)) & 15;
+              if (u < 10) {
+                c = '0' + u;
+              } else {
+                u -= 10;
+                c = 'a' + u;
+              }
+              if (k < 512) b[k++] = c;
+            }
+            break;
+          default:
+            if (k < 512) b[k++] = c;
+            break;
+        }
+        break;
+      default:
+        if (k < 512) b[k++] = c;
+        break;
+    }
+  }
+}
+
 static void Perror(int os, const char *thing, long rc, const char *reason) {
   char ibuf[21];
   ibuf[0] = 0;
@@ -901,7 +924,7 @@ EXTERN_C __attribute__((__noreturn__)) void ApeLoader(long di, long *sp,
   long *auxv, *ap, *endp, *sp2;
   char *p, *pe, *exe, *prog, **argv, **envp;
 
-  (void)Utox;
+  (void)Printf;
 
   /* detect freebsd */
   if (SupportsXnu() && dl == XNU) {
diff --git a/ape/sections.internal.h b/ape/sections.internal.h
index 119c4990e..6bc8cc312 100644
--- a/ape/sections.internal.h
+++ b/ape/sections.internal.h
@@ -2,7 +2,7 @@
 #define COSMOPOLITAN_APE_SECTIONS_INTERNAL_H_
 COSMOPOLITAN_C_START_
 
-extern const char __comment_start[] __attribute__((__weak__));
+extern const char __notices[] __attribute__((__weak__));
 extern unsigned char __executable_start[] __attribute__((__weak__));
 extern unsigned char __privileged_start[] __attribute__((__weak__));
 extern unsigned char _ehead[] __attribute__((__weak__));
@@ -18,10 +18,12 @@ extern unsigned char _tbss_end[] __attribute__((__weak__));
 extern unsigned char _tls_align[] __attribute__((__weak__));
 extern unsigned char __test_start[] __attribute__((__weak__));
 extern unsigned char __ro[] __attribute__((__weak__));
-extern uint8_t __data_start[] __attribute__((__weak__));
-extern uint8_t __data_end[] __attribute__((__weak__));
-extern uint8_t __bss_start[] __attribute__((__weak__));
-extern uint8_t __bss_end[] __attribute__((__weak__));
+extern unsigned char __data_start[] __attribute__((__weak__));
+extern unsigned char __data_end[] __attribute__((__weak__));
+extern unsigned char __bss_start[] __attribute__((__weak__));
+extern unsigned char __bss_end[] __attribute__((__weak__));
+extern unsigned long __got_start[] __attribute__((__weak__));
+extern unsigned long __got_end[] __attribute__((__weak__));
 extern unsigned char ape_phdrs[] __attribute__((__weak__));
 
 COSMOPOLITAN_C_END_
diff --git a/build/bootstrap/ape.aarch64 b/build/bootstrap/ape.aarch64
index c95c86c7f..27fd3d7fb 100755
Binary files a/build/bootstrap/ape.aarch64 and b/build/bootstrap/ape.aarch64 differ
diff --git a/build/bootstrap/ape.elf b/build/bootstrap/ape.elf
index f56c3ef8e..6a87fd728 100755
Binary files a/build/bootstrap/ape.elf and b/build/bootstrap/ape.elf differ
diff --git a/build/bootstrap/ape.macho b/build/bootstrap/ape.macho
index 2b887117c..42b153c6a 100755
Binary files a/build/bootstrap/ape.macho and b/build/bootstrap/ape.macho differ
diff --git a/build/bootstrap/compile.com b/build/bootstrap/compile.com
index 392c72017..9a213f0f0 100755
Binary files a/build/bootstrap/compile.com and b/build/bootstrap/compile.com differ
diff --git a/build/bootstrap/fixupobj.com b/build/bootstrap/fixupobj.com
index efc2ff872..9df51566f 100755
Binary files a/build/bootstrap/fixupobj.com and b/build/bootstrap/fixupobj.com differ
diff --git a/build/bootstrap/gcc-only-flags.txt b/build/bootstrap/gcc-only-flags.txt
new file mode 100644
index 000000000..77b816a6b
--- /dev/null
+++ b/build/bootstrap/gcc-only-flags.txt
@@ -0,0 +1,58 @@
+--nocompress-debug-sections
+--noexecstack
+-Wa,--nocompress-debug-sections
+-Wa,--noexecstack
+-Wa,-msse2avx
+-Werror=maybe-uninitialized
+-Wno-literal-suffix
+-Wno-unused-but-set-variable
+-Wunsafe-loop-optimizations
+-fbranch-target-load-optimize
+-fcx-limited-range
+-fdelete-dead-exceptions
+-femit-struct-debug-baseonly
+-ffp-int-builtin-inexact
+-finline-functions-called-once
+-fipa-pta
+-fivopts
+-flimit-function-alignment
+-fmerge-constants
+-fmodulo-sched
+-fmodulo-sched-allow-regmoves
+-fno-align-jumps
+-fno-align-labels
+-fno-align-loops
+-fno-code-hoisting
+-fno-cx-limited-range
+-fno-fp-int-builtin-inexact
+-fno-gnu-unique
+-fno-inline-functions-called-once
+-fno-instrument-functions
+-fno-schedule-insns2
+-fno-whole-program
+-fopt-info-vec
+-fopt-info-vec-missed
+-freg-struct-return
+-freschedule-modulo-scheduled-loops
+-frounding-math
+-fsched2-use-superblocks
+-fschedule-insns
+-fschedule-insns2
+-fshrink-wrap
+-fshrink-wrap-separate
+-fsignaling-nans
+-fstack-clash-protection
+-ftracer
+-ftrapv
+-ftree-loop-im
+-ftree-loop-vectorize
+-funsafe-loop-optimizations
+-fversion-loops-for-strides
+-fwhole-program
+-gdescribe-dies
+-gstabs
+-mcall-ms2sysv-xlogues
+-mdispatch-scheduler
+-mfpmath=sse+387
+-mmitigate-rop
+-mno-fentry
diff --git a/build/bootstrap/package.com b/build/bootstrap/package.com
index 015964e47..1c7db4131 100755
Binary files a/build/bootstrap/package.com and b/build/bootstrap/package.com differ
diff --git a/build/definitions.mk b/build/definitions.mk
index 8a023d2d9..b7c825184 100644
--- a/build/definitions.mk
+++ b/build/definitions.mk
@@ -54,7 +54,7 @@
 #
 
 ifeq ($(LANDLOCKMAKE_VERSION),)
-TMPSAFE = $(join $(TMPDIR),$(subst /,_,$@)).tmp
+TMPSAFE = $(join $(TMPDIR)/,$(subst /,_,$@)).tmp
 else
 TMPSAFE = $(TMPDIR)/
 endif
@@ -93,7 +93,6 @@ DEFAULT_CCFLAGS +=							\
 	-frecord-gcc-switches
 
 DEFAULT_COPTS ?=							\
-	-fno-math-errno							\
 	-fno-ident							\
 	-fno-common							\
 	-fno-gnu-unique							\
@@ -138,6 +137,8 @@ MATHEMATICAL =								\
 DEFAULT_CPPFLAGS +=							\
 	-D_COSMO_SOURCE							\
 	-DMODE='"$(MODE)"'						\
+	-Wno-prio-ctor-dtor						\
+	-Wno-unknown-pragmas						\
 	-nostdinc							\
 	-iquote.							\
 	-isystem libc/isystem
@@ -163,6 +164,7 @@ DEFAULT_LDFLAGS =							\
 	-nostdlib							\
 	-znorelro							\
 	--gc-sections							\
+	-z noexecstack							\
 	--build-id=none							\
 	--no-dynamic-linker
 
diff --git a/build/htags b/build/htags
index 698a4de2e..95f264228 100755
--- a/build/htags
+++ b/build/htags
@@ -55,7 +55,7 @@ set -- --regex-c='/_Atomic(\([^)]*\))/\1/b' "$@"
 set -- --regex-c='/^\(\(hidden\|extern\|const\) \)*[_[:alpha:]][_[:alnum:]]*[ *][ *]*\([_[:alpha:]][_[:alnum:]]*[ *][ *]*\)*\([_[:alpha:]][_$[:alnum:]]*\)/\4/b' "$@"
 
 # ctags doesn't understand function prototypes, e.g.
-#   bool isheap(void *p) dontthrow nocallback;
+#   bool isheap(void *p) dontthrow dontcallback;
 set -- --regex-c='/^[_[:alpha:]][_[:alnum:]]*[ *][ *]*\([_[:alpha:]][_[:alnum:]]*[ *][ *]*\)*\([_[:alpha:]][_$[:alnum:]]*\)(.*/\2/b' "$@"
 
 # ctags doesn't understand function pointers, e.g.
diff --git a/build/objdump b/build/objdump
index 32a4e218a..e5827019b 100755
--- a/build/objdump
+++ b/build/objdump
@@ -6,14 +6,14 @@ if [ -n "$OBJDUMP" ]; then
 fi
 
 find_objdump() {
-  if [ -x .cosmocc/3.2/bin/$1-linux-cosmo-objdump ]; then
-    OBJDUMP=.cosmocc/3.2/bin/$1-linux-cosmo-objdump
-  elif [ -x .cosmocc/3.2/bin/$1-linux-musl-objdump ]; then
-    OBJDUMP=.cosmocc/3.2/bin/$1-linux-musl-objdump
-  elif [ -x "$COSMO/.cosmocc/3.2/bin/$1-linux-cosmo-objdump" ]; then
-    OBJDUMP="$COSMO/.cosmocc/3.2/bin/$1-linux-cosmo-objdump"
-  elif [ -x "$COSMO/.cosmocc/3.2/bin/$1-linux-musl-objdump" ]; then
-    OBJDUMP="$COSMO/.cosmocc/3.2/bin/$1-linux-musl-objdump"
+  if [ -x .cosmocc/3.3.2/bin/$1-linux-cosmo-objdump ]; then
+    OBJDUMP=.cosmocc/3.3.2/bin/$1-linux-cosmo-objdump
+  elif [ -x .cosmocc/3.3.2/bin/$1-linux-musl-objdump ]; then
+    OBJDUMP=.cosmocc/3.3.2/bin/$1-linux-musl-objdump
+  elif [ -x "$COSMO/.cosmocc/3.3.2/bin/$1-linux-cosmo-objdump" ]; then
+    OBJDUMP="$COSMO/.cosmocc/3.3.2/bin/$1-linux-cosmo-objdump"
+  elif [ -x "$COSMO/.cosmocc/3.3.2/bin/$1-linux-musl-objdump" ]; then
+    OBJDUMP="$COSMO/.cosmocc/3.3.2/bin/$1-linux-musl-objdump"
   else
     echo "error: toolchain not found (try running 'cosmocc --update' or 'make' in the cosmo monorepo)" >&2
     exit 1
diff --git a/build/online.mk b/build/online.mk
index 0d874e5e6..da1775bbe 100644
--- a/build/online.mk
+++ b/build/online.mk
@@ -23,7 +23,8 @@
 #   - tool/build/runitd.c
 
 .PRECIOUS: o/$(MODE)/%.com.ok
-o/$(MODE)/%.com.ok: private .PLEDGE = stdio rpath wpath cpath proc fattr inet
+o/$(MODE)/%.com.ok: private .PLEDGE = stdio rpath wpath cpath proc fattr inet dns
+o/$(MODE)/%.com.ok: private .UNVEIL += r:/etc/resolv.conf
 o/$(MODE)/%.com.ok:				\
 		o/$(MODE)/tool/build/runit.com	\
 		o/$(MODE)/tool/build/runitd.com	\
diff --git a/build/rules.mk b/build/rules.mk
index 728df92fd..e8ca02541 100644
--- a/build/rules.mk
+++ b/build/rules.mk
@@ -39,6 +39,7 @@ o/$(MODE)/%.h: %.c
 
 o/$(MODE)/%.o: %.cc
 	@$(COMPILE) -AOBJECTIFY.cxx $(OBJECTIFY.cxx) $(OUTPUT_OPTION) $<
+	@$(COMPILE) -AFIXUPOBJ -wT$@ $(FIXUPOBJ) $@
 
 o/$(MODE)/%.o: %.cpp
 	@$(COMPILE) -AOBJECTIFY.cxx $(OBJECTIFY.cxx) $(OUTPUT_OPTION) $<
diff --git a/dsp/mpeg/buffer.c b/dsp/mpeg/buffer.c
index 76ed2be36..f0cb0de38 100644
--- a/dsp/mpeg/buffer.c
+++ b/dsp/mpeg/buffer.c
@@ -35,12 +35,7 @@
 #include "libc/stdio/stdio.h"
 #include "libc/str/str.h"
 #include "libc/sysv/consts/madv.h"
-
-asm(".ident\t\"\\n\\n\
-PL_MPEG (MIT License)\\n\
-Copyright(c) 2019 Dominic Szablewski\\n\
-https://phoboslab.org\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("pl_mpeg_notice");
 
 /* clang-format off */
 // -----------------------------------------------------------------------------
diff --git a/dsp/mpeg/demux.c b/dsp/mpeg/demux.c
index 4ded810e5..66eff844a 100644
--- a/dsp/mpeg/demux.c
+++ b/dsp/mpeg/demux.c
@@ -27,17 +27,12 @@
 │  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE            │
 │  SOFTWARE.                                                                   │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "dsp/mpeg/buffer.h"
 #include "dsp/mpeg/demux.h"
+#include "dsp/mpeg/buffer.h"
 #include "dsp/mpeg/mpeg.h"
 #include "libc/mem/mem.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-PL_MPEG (MIT License)\\n\
-Copyright(c) 2019 Dominic Szablewski\\n\
-https://phoboslab.org\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("pl_mpeg_notice");
 
 /* clang-format off */
 // ----------------------------------------------------------------------------
diff --git a/dsp/mpeg/idct.c b/dsp/mpeg/idct.c
index 87c17ae6a..11312607e 100644
--- a/dsp/mpeg/idct.c
+++ b/dsp/mpeg/idct.c
@@ -28,12 +28,7 @@
 │  SOFTWARE.                                                                   │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "dsp/core/half.h"
-
-asm(".ident\t\"\\n\\n\
-PL_MPEG (MIT License)\\n\
-Copyright(c) 2019 Dominic Szablewski\\n\
-https://phoboslab.org\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("pl_mpeg_notice");
 
 /**
  * Computes Fixed-Point 8x8 Inverse Discrete Cosine Transform.
diff --git a/dsp/mpeg/mp2.c b/dsp/mpeg/mp2.c
index 88a5a0c10..53fc91a23 100644
--- a/dsp/mpeg/mp2.c
+++ b/dsp/mpeg/mp2.c
@@ -33,12 +33,6 @@
 #include "libc/mem/mem.h"
 #include "libc/str/str.h"
 
-asm(".ident\t\"\\n\\n\
-PL_MPEG (MIT License)\\n\
-Copyright(c) 2019 Dominic Szablewski\\n\
-https://phoboslab.org\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 /* clang-format off */
 // -----------------------------------------------------------------------------
 // plm_audio implementation
diff --git a/dsp/mpeg/mpeg1.c b/dsp/mpeg/mpeg1.c
index f9c29910b..905af23da 100644
--- a/dsp/mpeg/mpeg1.c
+++ b/dsp/mpeg/mpeg1.c
@@ -41,12 +41,7 @@
 #include "libc/str/str.h"
 #include "libc/time/time.h"
 #include "libc/x/x.h"
-
-asm(".ident\t\"\\n\\n\
-PL_MPEG (MIT License)\\n\
-Copyright(c) 2019 Dominic Szablewski\\n\
-https://phoboslab.org\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("pl_mpeg_notice");
 
 // -----------------------------------------------------------------------------
 // plm_video implementation
@@ -1104,7 +1099,7 @@ plm_video_t *plm_video_create_with_buffer(plm_buffer_t *buffer,
   return self;
 }
 
-static textstartup void plm_video_init(void) {
+__attribute__((__constructor__)) static textstartup void plm_video_init(void) {
   PLM_VIDEO_MACROBLOCK_TYPE[0] = NULL;
   PLM_VIDEO_MACROBLOCK_TYPE[1] = (void *)PLM_VIDEO_MACROBLOCK_TYPE_INTRA;
   PLM_VIDEO_MACROBLOCK_TYPE[2] = (void *)PLM_VIDEO_MACROBLOCK_TYPE_PREDICTIVE;
@@ -1113,5 +1108,3 @@ static textstartup void plm_video_init(void) {
   PLM_VIDEO_DCT_SIZE[1] = (void *)PLM_VIDEO_DCT_SIZE_CHROMINANCE;
   PLM_VIDEO_DCT_SIZE[2] = (void *)PLM_VIDEO_DCT_SIZE_CHROMINANCE;
 }
-
-const void *const plm_video_init_ctor[] initarray = {plm_video_init};
diff --git a/dsp/mpeg/notice.c b/dsp/mpeg/notice.c
new file mode 100644
index 000000000..264a7549b
--- /dev/null
+++ b/dsp/mpeg/notice.c
@@ -0,0 +1,4 @@
+__notice(pl_mpeg_notice, "\
+PL_MPEG (MIT License)\n\
+Copyright(c) 2019 Dominic Szablewski\n\
+https://phoboslab.org");
diff --git a/dsp/mpeg/plm.c b/dsp/mpeg/plm.c
index f4f1dc504..7704643ff 100644
--- a/dsp/mpeg/plm.c
+++ b/dsp/mpeg/plm.c
@@ -32,12 +32,7 @@
 #include "libc/mem/mem.h"
 #include "libc/stdio/stdio.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-PL_MPEG (MIT License)\\n\
-Copyright(c) 2019 Dominic Szablewski\\n\
-https://phoboslab.org\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("pl_mpeg_notice");
 
 /* clang-format off */
 // -----------------------------------------------------------------------------
diff --git a/dsp/mpeg/slowrgb.c b/dsp/mpeg/slowrgb.c
index 16c819c38..7472d82f3 100644
--- a/dsp/mpeg/slowrgb.c
+++ b/dsp/mpeg/slowrgb.c
@@ -29,12 +29,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "dsp/mpeg/mpeg.h"
 #include "libc/macros.internal.h"
-
-asm(".ident\t\"\\n\\n\
-PL_MPEG (MIT License)\\n\
-Copyright(c) 2019 Dominic Szablewski\\n\
-https://phoboslab.org\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("pl_mpeg_notice");
 
 /**
  * @see YCbCr2RGB() in tool/viz/lib/ycbcr2rgb.c
diff --git a/dsp/scale/magikarp.c b/dsp/scale/magikarp.c
index d8f83dd80..ea5aa7d55 100644
--- a/dsp/scale/magikarp.c
+++ b/dsp/scale/magikarp.c
@@ -121,8 +121,7 @@ void *MagikarpY(long dys, long dxs, unsigned char d[restrict dys][dxs],
   return d;
 }
 
-static textstartup void g_magikarp_init() {
+__attribute__((__constructor__)) static textstartup void g_magikarp_init() {
   memcpy(g_magkern, kMagkern[0], sizeof(g_magkern));
   memcpy(g_magikarp, kMagikarp[0], sizeof(g_magikarp));
 }
-const void *const g_magikarp_ctor[] initarray = {g_magikarp_init};
diff --git a/dsp/tty/itoa8.c b/dsp/tty/itoa8.c
index bf968728e..57c903b69 100644
--- a/dsp/tty/itoa8.c
+++ b/dsp/tty/itoa8.c
@@ -21,7 +21,7 @@
 
 struct Itoa8 kItoa8;
 
-static textstartup void itoa8_init(void) {
+__attribute__((__constructor__)) static textstartup void itoa8_init(void) {
   int i;
   uint8_t z;
   uint32_t w;
@@ -41,5 +41,3 @@ static textstartup void itoa8_init(void) {
     kItoa8.data[i] = w;
   }
 }
-
-const void *const itoa8_init_ctor[] initarray = {itoa8_init};
diff --git a/dsp/tty/rgb2ansi.c b/dsp/tty/rgb2ansi.c
index 053aa4ebb..baece9e8a 100644
--- a/dsp/tty/rgb2ansi.c
+++ b/dsp/tty/rgb2ansi.c
@@ -97,7 +97,7 @@ static int uncube(int x) {
   return x < 48 ? 0 : x < 115 ? 1 : (x - 35) / 40;
 }
 
-static textstartup void rgb2ansi_init(void) {
+__attribute__((__constructor__)) static textstartup void rgb2ansi_init(void) {
   uint8_t c;
   uint32_t i;
   memcpy(g_ansi2rgb_, &kCgaPalette, sizeof(kCgaPalette));
@@ -114,5 +114,3 @@ static textstartup void rgb2ansi_init(void) {
     g_ansi2rgb_[i].xt = i;
   }
 }
-
-const void *const rgb2ansi_init_ctor[] initarray = {rgb2ansi_init};
diff --git a/dsp/tty/ttyquant.c b/dsp/tty/ttyquant.c
index a0cee959e..0e9a0e16e 100644
--- a/dsp/tty/ttyquant.c
+++ b/dsp/tty/ttyquant.c
@@ -73,8 +73,6 @@ textstartup void ttyquantsetup(enum TtyQuantizationAlgorithm alg,
   TTYQUANT()->blocks = blocks;
 }
 
-textstartup void ttyquant_init(void) {
+__attribute__((__constructor__)) textstartup void ttyquant_init(void) {
   ttyquantsetup(kTtyQuantXterm256, kTtyQuantRgb, kTtyBlocksUnicode);
 }
-
-const void *const ttyquant_init_ctor[] initarray = {ttyquant_init};
diff --git a/examples/BUILD.mk b/examples/BUILD.mk
index e69b297b0..5b8a75156 100644
--- a/examples/BUILD.mk
+++ b/examples/BUILD.mk
@@ -85,6 +85,7 @@ EXAMPLES_DIRECTDEPS =								\
 	THIRD_PARTY_MUSL							\
 	THIRD_PARTY_NSYNC							\
 	THIRD_PARTY_NSYNC_MEM							\
+	THIRD_PARTY_OPENMP							\
 	THIRD_PARTY_SED								\
 	THIRD_PARTY_STB								\
 	THIRD_PARTY_TR								\
@@ -153,6 +154,7 @@ o/$(MODE)/examples/picol.com.dbg:				\
 	@$(APELINK)
 
 o/$(MODE)/usr/share/dict/words.zip.o: private ZIPOBJ_FLAGS += -C2
+o/$(MODE)/examples/wut.o: private COPTS += -fopenmp
 
 $(EXAMPLES_OBJS): examples/BUILD.mk
 
diff --git a/examples/kilo.c b/examples/kilo.c
index 7cc1ddefb..9645634e1 100644
--- a/examples/kilo.c
+++ b/examples/kilo.c
@@ -33,11 +33,10 @@
 │  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.        │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 
-asm(".ident \"\n\
+__notice(kilo_notice, "\
 Kilo ─ A very simple editor (BSD-2)\n\
 Copyright 2016 Salvatore Sanfilippo\n\
-Contact: antirez@gmail.com\"\n\
-.include \"libc/disclaimer.inc\"");
+Contact: antirez@gmail.com");
 
 /*
  * This software has been modified by Justine Tunney to:
diff --git a/examples/script.c b/examples/script.c
index 3ea419caa..e840aca8a 100644
--- a/examples/script.c
+++ b/examples/script.c
@@ -67,13 +67,10 @@
  * @see https://asciinema.org/
  */
 
-asm(".ident\t\"\\n\\n\
-FreeBSD Script (BSD-3 License)\\n\
-Copyright (c) 2010, 2012 David E. O'Brien\\n\
-Copyright (c) 1980, 1992, 1993\\n\
-\tThe Regents of the University of California.\\n\
-\tAll rights reserved.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(freebsd_script_notice, "\
+FreeBSD Script (BSD-3 License)\n\
+Copyright (c) 2010, 2012 David E. O'Brien\n\
+Copyright (c) 1980, 1992, 1993 The Regents of the University of California");
 
 #define DEF_BUF 65536
 
diff --git a/examples/system.c b/examples/system.c
index aecfcbfe5..070665191 100644
--- a/examples/system.c
+++ b/examples/system.c
@@ -7,6 +7,7 @@
 │   • http://creativecommons.org/publicdomain/zero/1.0/            │
 ╚─────────────────────────────────────────────────────────────────*/
 #endif
+#include "libc/calls/calls.h"
 #include "libc/runtime/runtime.h"
 #include "libc/stdio/stdio.h"
 
diff --git a/examples/unbourne.c b/examples/unbourne.c
index 6ad057a43..536af38b5 100644
--- a/examples/unbourne.c
+++ b/examples/unbourne.c
@@ -2569,8 +2569,7 @@ static int shlex() {
       case 'y':
       case 'z':
         p = buf;
-        while (buf++, is_in_name(*buf))
-          ;
+        while (buf++, is_in_name(*buf));
         yylval.name = stalloc(buf - p + 1);
         *(char *)mempcpy(yylval.name, p, buf - p) = 0;
         value = ARITH_VAR;
@@ -2994,7 +2993,7 @@ static const char *updatepwd(const char *dir) {
   lim = (char *)stackblock() + 1;
   if (*dir != '/') {
     if (new[-1] != '/') USTPUTC('/', new);
-    if (new > lim &&*lim == '/') lim++;
+    if (new > lim && *lim == '/') lim++;
   } else {
     USTPUTC('/', new);
     cdcomppath++;
@@ -6565,6 +6564,10 @@ struct job *makejob(union node *node, int nprocs) {
   return jp;
 }
 
+#if defined(__GNUC__) && __GNUC__ >= 12
+#pragma GCC diagnostic ignored "-Wuse-after-free"
+#endif
+
 static struct job *growjobtab(void) {
   unsigned len;
   long offset;
@@ -7446,8 +7449,7 @@ static int ulimitcmd(int argc, char **argv) {
         what = optc;
     }
   }
-  for (l = limits; l->option != what; l++)
-    ;
+  for (l = limits; l->option != what; l++);
   set = *argptr ? 1 : 0;
   if (set) {
     char *p = *argptr;
@@ -7660,8 +7662,7 @@ static void setparam(char **argv) {
   char **newparam;
   char **ap;
   int nparam;
-  for (nparam = 0; argv[nparam]; nparam++)
-    ;
+  for (nparam = 0; argv[nparam]; nparam++);
   ap = newparam = ckmalloc((nparam + 1) * sizeof *ap);
   while (*argv) {
     *ap++ = savestr(*argv++);
@@ -7701,8 +7702,7 @@ static int shiftcmd(int argc, char **argv) {
     if (shellparam.malloc) ckfree(*ap1);
   }
   ap2 = shellparam.p;
-  while ((*ap2++ = *ap1++) != NULL)
-    ;
+  while ((*ap2++ = *ap1++) != NULL);
   shellparam.optind = 1;
   shellparam.optoff = -1;
   INTON;
@@ -8308,8 +8308,7 @@ static void parsefname(void) {
     if (heredoclist == NULL)
       heredoclist = here;
     else {
-      for (p = heredoclist; p->next; p = p->next)
-        ;
+      for (p = heredoclist; p->next; p = p->next);
       p->next = here;
     }
   } else if (n->type == NTOFD || n->type == NFROMFD) {
@@ -8432,8 +8431,7 @@ static int xxreadtoken(void) {
       case '\t':
         continue;
       case '#':
-        while ((c = pgetc()) != '\n' && c != PEOF)
-          ;
+        while ((c = pgetc()) != '\n' && c != PEOF);
         pungetc();
         continue;
       case '\n':
@@ -8553,7 +8551,7 @@ static int readtoken1(int firstc, char const *syntax, char *eofmark,
   quotef = 0;
   bqlist = NULL;
   STARTSTACKSTR(out);
-loop : {                   /* for each line, until end of word */
+loop: {                    /* for each line, until end of word */
   CHECKEND();              /* set c to PEOF if at end of here document */
   for (;;) {               /* until end of line or end of word */
     CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
@@ -8701,7 +8699,7 @@ endword:
    * is called, c is set to the first character of the next input line.  If
    * we are at the end of the here document, this routine sets the c to PEOF.
    */
-checkend : {
+checkend: {
   if (realeofmark(eofmark)) {
     int markloc;
     char *p;
@@ -8742,7 +8740,7 @@ checkend : {
    * specifying the fd to be redirected.  The variable "c" contains the
    * first character of the redirection operator.
    */
-parseredir : {
+parseredir: {
   char fd = *out;
   union node *np;
   np = (union node *)stalloc(sizeof(struct nfile));
@@ -8798,7 +8796,7 @@ parseredir : {
    * Parse a substitution.  At this point, we have read the dollar sign
    * and nothing else.
    */
-parsesub : {
+parsesub: {
   int subtype;
   int typeloc;
   char *p;
@@ -8910,7 +8908,7 @@ parsesub : {
    * list of commands (passed by reference), and savelen is the number of
    * characters on the top of the stack which must be preserved.
    */
-parsebackq : {
+parsebackq: {
   struct nodelist **nlpp;
   union node *n;
   char *str;
@@ -9002,7 +9000,7 @@ parsebackq : {
 /*
  * Parse an arithmetic expansion (indicate start of one and set state)
  */
-parsearith : {
+parsearith: {
   synstack_push(&synstack, synstack->prev ?: alloca(sizeof(*synstack)),
                 ARISYNTAX);
   synstack->dblquote = 1;
diff --git a/examples/whois.c b/examples/whois.c
index bae2bc359..95154787f 100644
--- a/examples/whois.c
+++ b/examples/whois.c
@@ -48,12 +48,9 @@
 #include "third_party/musl/netdb.h"
 // clang-format off
 
-asm(".ident\t\"\\n\\n\
-FreeBSD Whois (BSD-3 License)\\n\
-Copyright (c) 1980, 1993\\n\
-\tThe Regents of the University of California.\\n\
-\tAll rights reserved.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(freebsd_whois_notice, "\
+FreeBSD Whois (BSD-3 License)\n\
+Copyright (c) 1980, 1993 The Regents of the University of California");
 
 #define	ABUSEHOST	"whois.abuse.net"
 #define	ANICHOST	"whois.arin.net"
diff --git a/libc/calls/BUILD.mk b/libc/calls/BUILD.mk
index bbf134fc4..a12d7f3e6 100644
--- a/libc/calls/BUILD.mk
+++ b/libc/calls/BUILD.mk
@@ -41,6 +41,7 @@ LIBC_CALLS_A_DIRECTDEPS =				\
 	LIBC_INTRIN					\
 	LIBC_NEXGEN32E					\
 	LIBC_NT_ADVAPI32				\
+	LIBC_NT_BCRYPTPRIMITIVES			\
 	LIBC_NT_IPHLPAPI				\
 	LIBC_NT_KERNEL32				\
 	LIBC_NT_NTDLL					\
@@ -72,12 +73,10 @@ $(LIBC_CALLS_A_OBJS): private				\
 			-Wframe-larger-than=4096	\
 			-Walloca-larger-than=4096
 
-ifneq ($(ARCH), aarch64)
 # we always want -O3 because:
 #   it makes the code size smaller too
-# we need -mstringop-strategy=loop because:
-#   privileged code might generate memcpy call
 o/$(MODE)/libc/calls/termios2host.o			\
+o/$(MODE)/libc/calls/siginfo2cosmo.o			\
 o/$(MODE)/libc/calls/sigenter-freebsd.o			\
 o/$(MODE)/libc/calls/sigenter-netbsd.o			\
 o/$(MODE)/libc/calls/sigenter-openbsd.o			\
@@ -85,6 +84,19 @@ o/$(MODE)/libc/calls/sigenter-xnu.o			\
 o/$(MODE)/libc/calls/ntcontext2linux.o: private		\
 		COPTS +=				\
 			-O3				\
+			-ffreestanding
+
+ifeq ($(ARCH), x86_64)
+# we need -mstringop-strategy=loop because:
+#   privileged code might generate memcpy call
+o/$(MODE)/libc/calls/termios2host.o			\
+o/$(MODE)/libc/calls/siginfo2cosmo.o			\
+o/$(MODE)/libc/calls/sigenter-freebsd.o			\
+o/$(MODE)/libc/calls/sigenter-netbsd.o			\
+o/$(MODE)/libc/calls/sigenter-openbsd.o			\
+o/$(MODE)/libc/calls/sigenter-xnu.o			\
+o/$(MODE)/libc/calls/ntcontext2linux.o: private		\
+		COPTS +=				\
 			-mstringop-strategy=loop
 endif
 
@@ -132,7 +144,8 @@ endif
 o/$(MODE)/libc/calls/pledge-linux.o: private		\
 		CFLAGS +=				\
 			-Os				\
-			-fPIC
+			-fPIC				\
+			-ffreestanding
 
 # these assembly files are safe to build on aarch64
 o/$(MODE)/libc/calls/getcontext.o: libc/calls/getcontext.S
diff --git a/libc/calls/calls.h b/libc/calls/calls.h
index 44811fe1d..eab5592f4 100644
--- a/libc/calls/calls.h
+++ b/libc/calls/calls.h
@@ -208,6 +208,7 @@ int execvpe(const char *, char *const[], char *const[]) libcesque;
 int euidaccess(const char *, int) libcesque;
 int eaccess(const char *, int) libcesque;
 int madvise(void *, uint64_t, int) libcesque;
+int getcpu(unsigned *, unsigned *) libcesque;
 #endif
 
 #ifdef _COSMO_SOURCE
@@ -247,6 +248,8 @@ ssize_t tinyprint(int, const char *, ...) libcesque nullterminated();
 void shm_path_np(const char *, char[hasatleast 78]) libcesque;
 #endif /* _COSMO_SOURCE */
 
+int system(const char *) libcesque;
+
 int __wifstopped(int) libcesque pureconst;
 int __wifcontinued(int) libcesque pureconst;
 int __wifsignaled(int) libcesque pureconst;
diff --git a/libc/calls/clock_gettime-nt.c b/libc/calls/clock_gettime-nt.c
index 363da29cc..5a6464e42 100644
--- a/libc/calls/clock_gettime-nt.c
+++ b/libc/calls/clock_gettime-nt.c
@@ -21,12 +21,17 @@
 #include "libc/dce.h"
 #include "libc/errno.h"
 #include "libc/fmt/wintime.internal.h"
+#include "libc/nt/accounting.h"
+#include "libc/nt/runtime.h"
 #include "libc/nt/synchronization.h"
+#include "libc/nt/thread.h"
 
-#define _CLOCK_REALTIME        0
-#define _CLOCK_MONOTONIC       1
-#define _CLOCK_REALTIME_COARSE 2
-#define _CLOCK_BOOTTIME        3
+#define _CLOCK_REALTIME           0
+#define _CLOCK_MONOTONIC          1
+#define _CLOCK_REALTIME_COARSE    2
+#define _CLOCK_BOOTTIME           3
+#define _CLOCK_PROCESS_CPUTIME_ID 4
+#define _CLOCK_THREAD_CPUTIME_ID  5
 
 static struct {
   uint64_t base;
@@ -35,7 +40,7 @@ static struct {
 
 textwindows int sys_clock_gettime_nt(int clock, struct timespec *ts) {
   uint64_t t;
-  struct NtFileTime ft;
+  struct NtFileTime ft, ftExit, ftUser, ftKernel, ftCreation;
   switch (clock) {
     case _CLOCK_REALTIME:
       if (ts) {
@@ -61,18 +66,30 @@ textwindows int sys_clock_gettime_nt(int clock, struct timespec *ts) {
         *ts = timespec_frommillis(GetTickCount64());
       }
       return 0;
+    case _CLOCK_PROCESS_CPUTIME_ID:
+      if (ts) {
+        GetProcessTimes(GetCurrentProcess(), &ftCreation, &ftExit, &ftKernel,
+                        &ftUser);
+        *ts = WindowsDurationToTimeSpec(ReadFileTime(ftUser) +
+                                        ReadFileTime(ftKernel));
+      }
+      return 0;
+    case _CLOCK_THREAD_CPUTIME_ID:
+      if (ts) {
+        GetThreadTimes(GetCurrentThread(), &ftCreation, &ftExit, &ftKernel,
+                       &ftUser);
+        *ts = WindowsDurationToTimeSpec(ReadFileTime(ftUser) +
+                                        ReadFileTime(ftKernel));
+      }
+      return 0;
     default:
       return -EINVAL;
   }
 }
 
-static textstartup void winclock_init() {
+__attribute__((__constructor__(40))) static textstartup void winclock_init() {
   if (IsWindows()) {
     QueryPerformanceCounter(&g_winclock.base);
     QueryPerformanceFrequency(&g_winclock.freq);
   }
 }
-
-const void *const winclock_ctor[] initarray = {
-    winclock_init,
-};
diff --git a/libc/calls/clock_gettime.c b/libc/calls/clock_gettime.c
index f57b716f3..c5f72fdd9 100644
--- a/libc/calls/clock_gettime.c
+++ b/libc/calls/clock_gettime.c
@@ -61,24 +61,13 @@ static int __clock_gettime_init(int clockid, struct timespec *ts) {
 /**
  * Returns nanosecond time.
  *
- * @param clock can be one of:
- *    - `CLOCK_REALTIME`: universally supported
- *    - `CLOCK_REALTIME_FAST`: ditto but faster on freebsd
- *    - `CLOCK_REALTIME_PRECISE`: ditto but better on freebsd
- *    - `CLOCK_REALTIME_COARSE`: : like `CLOCK_REALTIME_FAST` w/ Linux 2.6.32+
- *    - `CLOCK_MONOTONIC`: universally supported (except on XNU/NT w/o INVTSC)
- *    - `CLOCK_MONOTONIC_FAST`: ditto but faster on freebsd
- *    - `CLOCK_MONOTONIC_PRECISE`: ditto but better on freebsd
- *    - `CLOCK_MONOTONIC_COARSE`: : like `CLOCK_MONOTONIC_FAST` w/ Linux 2.6.32+
- *    - `CLOCK_MONOTONIC_RAW`: is actually monotonic but needs Linux 2.6.28+
- *    - `CLOCK_PROCESS_CPUTIME_ID`: linux and bsd (NetBSD permits OR'd PID)
- *    - `CLOCK_THREAD_CPUTIME_ID`: linux and bsd (NetBSD permits OR'd TID)
- *    - `CLOCK_MONOTONIC_COARSE`: linux, freebsd
- *    - `CLOCK_PROF`: linux and netbsd
- *    - `CLOCK_BOOTTIME`: linux and openbsd
- *    - `CLOCK_REALTIME_ALARM`: linux-only
- *    - `CLOCK_BOOTTIME_ALARM`: linux-only
- *    - `CLOCK_TAI`: linux-only
+ * @param clock supports the following values across OSes:
+ *    - `CLOCK_REALTIME`
+ *    - `CLOCK_MONOTONIC`
+ *    - `CLOCK_REALTIME_COARSE`
+ *    - `CLOCK_MONOTONIC_COARSE`
+ *    - `CLOCK_THREAD_CPUTIME_ID`
+ *    - `CLOCK_PROCESS_CPUTIME_ID`
  * @param ts is where the result is stored (or null to do clock check)
  * @return 0 on success, or -1 w/ errno
  * @raise EFAULT if `ts` points to invalid memory
diff --git a/libc/calls/clock_nanosleep-cosmo.c b/libc/calls/clock_nanosleep-cosmo.c
new file mode 100644
index 000000000..ca438eac4
--- /dev/null
+++ b/libc/calls/clock_nanosleep-cosmo.c
@@ -0,0 +1,84 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2024 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/calls/calls.h"
+#include "libc/calls/internal.h"
+#include "libc/calls/struct/timespec.h"
+#include "libc/errno.h"
+#include "libc/runtime/clktck.h"
+#include "libc/runtime/runtime.h"
+#include "libc/sysv/consts/clock.h"
+#include "libc/sysv/consts/timer.h"
+
+/**
+ * Sleeps with higher accuracy at the cost of cpu.
+ */
+int cosmo_clock_nanosleep(int clock, int flags, const struct timespec *req,
+                          struct timespec *rem) {
+
+  // pick clocks
+  int time_clock;
+  int sleep_clock;
+  if (clock == CLOCK_REALTIME ||  //
+      clock == CLOCK_REALTIME_PRECISE) {
+    time_clock = clock;
+    sleep_clock = CLOCK_REALTIME;
+  } else if (clock == CLOCK_MONOTONIC ||  //
+             clock == CLOCK_MONOTONIC_PRECISE) {
+    time_clock = clock;
+    sleep_clock = CLOCK_MONOTONIC;
+  } else if (clock == CLOCK_REALTIME_COARSE ||  //
+             clock == CLOCK_REALTIME_FAST) {
+    return sys_clock_nanosleep(CLOCK_REALTIME, flags, req, rem);
+  } else if (clock == CLOCK_MONOTONIC_COARSE ||  //
+             clock == CLOCK_MONOTONIC_FAST) {
+    return sys_clock_nanosleep(CLOCK_MONOTONIC, flags, req, rem);
+  } else {
+    return sys_clock_nanosleep(clock, flags, req, rem);
+  }
+
+  // sleep bulk of time in kernel
+  struct timespec start, deadline, remain, waitfor, now;
+  struct timespec quantum = timespec_fromnanos(1000000000 / CLK_TCK);
+  clock_gettime(time_clock, &start);
+  deadline = flags & TIMER_ABSTIME ? *req : timespec_add(start, *req);
+  if (timespec_cmp(start, deadline) >= 0) return 0;
+  remain = timespec_sub(deadline, start);
+  if (timespec_cmp(remain, quantum) > 0) {
+    waitfor = timespec_sub(remain, quantum);
+    if (sys_clock_nanosleep(sleep_clock, 0, &waitfor, rem) == -1) {
+      if (!flags && rem && errno == EINTR) {
+        *rem = timespec_add(*rem, quantum);
+      }
+      return -1;
+    }
+  }
+
+  // spin through final scheduling quantum
+  int rc = 0;
+  ftrace_enabled(-1);
+  do {
+    if (_check_cancel()) {
+      rc = -1;
+      break;
+    }
+    clock_gettime(time_clock, &now);
+  } while (timespec_cmp(now, deadline) < 0);
+  ftrace_enabled(+1);
+  return rc;
+}
diff --git a/libc/calls/clock_nanosleep-sys.c b/libc/calls/clock_nanosleep-sys.c
new file mode 100644
index 000000000..3d0e5cef8
--- /dev/null
+++ b/libc/calls/clock_nanosleep-sys.c
@@ -0,0 +1,61 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2024 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/calls/calls.h"
+#include "libc/calls/cp.internal.h"
+#include "libc/calls/struct/timespec.h"
+#include "libc/calls/struct/timespec.internal.h"
+#include "libc/dce.h"
+#include "libc/errno.h"
+#include "libc/intrin/describeflags.internal.h"
+#include "libc/intrin/strace.internal.h"
+#include "libc/intrin/weaken.h"
+#include "libc/sysv/errfuns.h"
+#include "libc/thread/thread.h"
+
+int sys_clock_nanosleep(int clock, int flags,  //
+                        const struct timespec *req, struct timespec *rem) {
+  int rc;
+  BEGIN_CANCELATION_POINT;
+  if (IsLinux() || IsFreebsd() || IsNetbsd()) {
+    rc = __sys_clock_nanosleep(clock, flags, req, rem);
+  } else if (IsXnu()) {
+    rc = sys_clock_nanosleep_xnu(clock, flags, req, rem);
+  } else if (IsOpenbsd()) {
+    rc = sys_clock_nanosleep_openbsd(clock, flags, req, rem);
+  } else if (IsWindows()) {
+    rc = sys_clock_nanosleep_nt(clock, flags, req, rem);
+  } else {
+    rc = enosys();
+  }
+  if (rc > 0) {
+    errno = rc;
+    rc = -1;
+  }
+  // system call support might not detect cancelation on bsds
+  if (rc == -1 && errno == EINTR &&      //
+      _weaken(pthread_testcancel_np) &&  //
+      _weaken(pthread_testcancel_np)()) {
+    rc = ecanceled();
+  }
+  END_CANCELATION_POINT;
+  STRACE("sys_clock_nanosleep(%s, %s, %s, [%s]) → %d% m",
+         DescribeClockName(clock), DescribeSleepFlags(flags),
+         DescribeTimespec(0, req), DescribeTimespec(rc, rem), rc);
+  return rc;
+}
diff --git a/libc/calls/clock_nanosleep.c b/libc/calls/clock_nanosleep.c
index f9bc8ac27..20a6b03ee 100644
--- a/libc/calls/clock_nanosleep.c
+++ b/libc/calls/clock_nanosleep.c
@@ -16,112 +16,10 @@
 │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/assert.h"
-#include "libc/calls/cp.internal.h"
-#include "libc/calls/internal.h"
 #include "libc/calls/struct/timespec.h"
-#include "libc/calls/struct/timespec.internal.h"
 #include "libc/dce.h"
 #include "libc/errno.h"
-#include "libc/intrin/describeflags.internal.h"
-#include "libc/intrin/kprintf.h"
-#include "libc/intrin/strace.internal.h"
-#include "libc/intrin/weaken.h"
-#include "libc/runtime/clktck.h"
-#include "libc/runtime/runtime.h"
-#include "libc/sysv/consts/clock.h"
 #include "libc/sysv/consts/timer.h"
-#include "libc/sysv/errfuns.h"
-#include "libc/thread/thread.h"
-
-static int sys_clock_nanosleep(int clock, int flags,  //
-                               const struct timespec *req,
-                               struct timespec *rem) {
-  int rc;
-  BEGIN_CANCELATION_POINT;
-  if (IsLinux() || IsFreebsd() || IsNetbsd()) {
-    rc = __sys_clock_nanosleep(clock, flags, req, rem);
-  } else if (IsXnu()) {
-    rc = sys_clock_nanosleep_xnu(clock, flags, req, rem);
-  } else if (IsOpenbsd()) {
-    rc = sys_clock_nanosleep_openbsd(clock, flags, req, rem);
-  } else if (IsWindows()) {
-    rc = sys_clock_nanosleep_nt(clock, flags, req, rem);
-  } else {
-    rc = enosys();
-  }
-  if (rc > 0) {
-    errno = rc;
-    rc = -1;
-  }
-  // system call support might not detect cancelation on bsds
-  if (rc == -1 && errno == EINTR &&      //
-      _weaken(pthread_testcancel_np) &&  //
-      _weaken(pthread_testcancel_np)()) {
-    rc = ecanceled();
-  }
-  END_CANCELATION_POINT;
-  STRACE("sys_clock_nanosleep(%s, %s, %s, [%s]) → %d% m",
-         DescribeClockName(clock), DescribeSleepFlags(flags),
-         DescribeTimespec(0, req), DescribeTimespec(rc, rem), rc);
-  return rc;
-}
-
-static int cosmo_clock_nanosleep(int clock, int flags,
-                                 const struct timespec *req,
-                                 struct timespec *rem) {
-
-  // pick clocks
-  int time_clock;
-  int sleep_clock;
-  if (clock == CLOCK_REALTIME ||  //
-      clock == CLOCK_REALTIME_PRECISE) {
-    time_clock = clock;
-    sleep_clock = CLOCK_REALTIME;
-  } else if (clock == CLOCK_MONOTONIC ||  //
-             clock == CLOCK_MONOTONIC_PRECISE) {
-    time_clock = clock;
-    sleep_clock = CLOCK_MONOTONIC;
-  } else if (clock == CLOCK_REALTIME_COARSE ||  //
-             clock == CLOCK_REALTIME_FAST) {
-    return sys_clock_nanosleep(CLOCK_REALTIME, flags, req, rem);
-  } else if (clock == CLOCK_MONOTONIC_COARSE ||  //
-             clock == CLOCK_MONOTONIC_FAST) {
-    return sys_clock_nanosleep(CLOCK_MONOTONIC, flags, req, rem);
-  } else {
-    return sys_clock_nanosleep(clock, flags, req, rem);
-  }
-
-  // sleep bulk of time in kernel
-  struct timespec start, deadline, remain, waitfor, now;
-  struct timespec quantum = timespec_fromnanos(1000000000 / CLK_TCK);
-  unassert(!clock_gettime(time_clock, &start));
-  deadline = flags & TIMER_ABSTIME ? *req : timespec_add(start, *req);
-  if (timespec_cmp(start, deadline) >= 0) return 0;
-  remain = timespec_sub(deadline, start);
-  if (timespec_cmp(remain, quantum) > 0) {
-    waitfor = timespec_sub(remain, quantum);
-    if (sys_clock_nanosleep(sleep_clock, 0, &waitfor, rem) == -1) {
-      if (!flags && rem && errno == EINTR) {
-        *rem = timespec_add(*rem, quantum);
-      }
-      return -1;
-    }
-  }
-
-  // spin through final scheduling quantum
-  int rc = 0;
-  ftrace_enabled(-1);
-  do {
-    if (_check_cancel()) {
-      rc = -1;
-      break;
-    }
-    unassert(!clock_gettime(time_clock, &now));
-  } while (timespec_cmp(now, deadline) < 0);
-  ftrace_enabled(+1);
-  return rc;
-}
 
 /**
  * Sleeps for particular amount of time.
@@ -157,10 +55,8 @@ static int cosmo_clock_nanosleep(int clock, int flags,
  * on OpenBSD it's good; on XNU it's bad; and on Windows it's ugly.
  *
  * @param clock may be
- *     - `CLOCK_REALTIME` to have nanosecond-accurate wall time sleeps
- *     - `CLOCK_REALTIME_COARSE` to not spin through scheduler quantum
- *     - `CLOCK_MONOTONIC` to base the sleep off the monotinic clock
- *     - `CLOCK_MONOTONIC_COARSE` to once again not do userspace spin
+ *     - `CLOCK_REALTIME`
+ *     - `CLOCK_MONOTONIC`
  * @param flags can be 0 for relative and `TIMER_ABSTIME` for absolute
  * @param req can be a relative or absolute time, depending on `flags`
  * @param rem shall be updated with the remainder of unslept time when
@@ -193,7 +89,7 @@ errno_t clock_nanosleep(int clock, int flags,        //
     return EINVAL;
   }
   errno_t old = errno;
-  int rc = cosmo_clock_nanosleep(clock, flags, req, rem);
+  int rc = sys_clock_nanosleep(clock, flags, req, rem);
   errno_t err = !rc ? 0 : errno;
   errno = old;
   return err;
diff --git a/libc/calls/close.c b/libc/calls/close.c
index 95068563e..7c8a35f50 100644
--- a/libc/calls/close.c
+++ b/libc/calls/close.c
@@ -93,7 +93,7 @@ static int close_impl(int fd) {
  */
 int close(int fd) {
   int rc;
-  if (__isfdkind(fd, kFdZip)) {   // XXX IsWindows()?
+  if (__isfdkind(fd, kFdZip)) {  // XXX IsWindows()?
     BLOCK_SIGNALS;
     __fds_lock();
     rc = close_impl(fd);
diff --git a/libc/calls/fchmodat-linux.c b/libc/calls/fchmodat-linux.c
index 3036be97f..1640eb636 100644
--- a/libc/calls/fchmodat-linux.c
+++ b/libc/calls/fchmodat-linux.c
@@ -32,11 +32,8 @@
 #include "libc/sysv/consts/o.h"
 #include "libc/sysv/consts/s.h"
 #include "libc/sysv/errfuns.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
 // clang-format off
 
 static void __procfdname(char *buf, unsigned fd) {
diff --git a/libc/calls/finddebugbinary.c b/libc/calls/finddebugbinary.c
index b4bae854e..3ca5bd1c0 100644
--- a/libc/calls/finddebugbinary.c
+++ b/libc/calls/finddebugbinary.c
@@ -27,6 +27,7 @@
 #include "libc/elf/tinyelf.internal.h"
 #include "libc/errno.h"
 #include "libc/intrin/directmap.internal.h"
+#include "libc/intrin/kprintf.h"
 #include "libc/nt/memory.h"
 #include "libc/nt/runtime.h"
 #include "libc/runtime/runtime.h"
diff --git a/libc/calls/getcpu.c b/libc/calls/getcpu.c
new file mode 100644
index 000000000..c25c52bad
--- /dev/null
+++ b/libc/calls/getcpu.c
@@ -0,0 +1,76 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2024 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/calls/calls.h"
+#include "libc/calls/struct/cpuset.h"
+#include "libc/calls/syscall_support-nt.internal.h"
+#include "libc/dce.h"
+#include "libc/errno.h"
+#include "libc/nexgen32e/rdtscp.h"
+#include "libc/nexgen32e/x86feature.h"
+#include "libc/nt/struct/processornumber.h"
+#include "libc/nt/synchronization.h"
+#include "libc/runtime/syslib.internal.h"
+#include "libc/sysv/errfuns.h"
+
+int sys_getcpu(unsigned *opt_cpu, unsigned *opt_node, void *tcache);
+
+int getcpu(unsigned *out_opt_cpu, unsigned *out_opt_node) {
+  unsigned cpu;
+  unsigned node;
+  if (X86_HAVE(RDTSCP)) {
+    unsigned tsc_aux;
+    rdtscp(&tsc_aux);
+    cpu = TSC_AUX_CORE(tsc_aux);
+    node = TSC_AUX_NODE(tsc_aux);
+  } else if (IsWindows()) {
+    struct NtProcessorNumber pn;
+    GetCurrentProcessorNumberEx(&pn);
+    cpu = 64 * pn.Group + pn.Number;
+    unsigned short node16;
+    if (GetNumaProcessorNodeEx(&pn, &node16)) {
+      node = node16;
+    } else {
+      return __winerr();
+    }
+  } else if (IsXnuSilicon()) {
+    if (__syslib->__version >= 9) {
+      size_t cpu64;
+      errno_t err = __syslib->__pthread_cpu_number_np(&cpu64);
+      if (!err) {
+        cpu = cpu64;
+        node = 0;
+      } else {
+        errno = err;
+        return -1;
+      }
+    } else {
+      return enosys();
+    }
+  } else {
+    int rc = sys_getcpu(&cpu, &node, 0);
+    if (rc == -1) return -1;
+  }
+  if (out_opt_cpu) {
+    *out_opt_cpu = cpu;
+  }
+  if (out_opt_node) {
+    *out_opt_node = node;
+  }
+  return 0;
+}
diff --git a/libc/calls/getloadavg-nt.c b/libc/calls/getloadavg-nt.c
index 30f940de0..4e8d6d847 100644
--- a/libc/calls/getloadavg-nt.c
+++ b/libc/calls/getloadavg-nt.c
@@ -59,7 +59,7 @@ textwindows int sys_getloadavg_nt(double *a, int n) {
   return rc;
 }
 
-static textstartup void sys_getloadavg_nt_init(void) {
+__attribute__((__constructor__(40))) static textstartup void ntinitload(void) {
   if (IsWindows()) {
     load = 1;
     cpus = __get_cpu_count() / 2;
@@ -67,7 +67,3 @@ static textstartup void sys_getloadavg_nt_init(void) {
     GetSystemTimes(&idle1, &kern1, &user1);
   }
 }
-
-const void *const sys_getloadavg_nt_ctor[] initarray = {
-    sys_getloadavg_nt_init,
-};
diff --git a/libc/calls/getrandom.c b/libc/calls/getrandom.c
index 65171f28c..cdc7b13e5 100644
--- a/libc/calls/getrandom.c
+++ b/libc/calls/getrandom.c
@@ -103,7 +103,7 @@ static ssize_t GetDevUrandom(char *p, size_t n) {
 ssize_t __getrandom(void *p, size_t n, unsigned f) {
   ssize_t rc;
   if (IsWindows()) {
-    rc = RtlGenRandom(p, n) ? n : __winerr();
+    rc = ProcessPrng(p, n) ? n : __winerr();
   } else if (have_getrandom) {
     if (IsXnu() || IsOpenbsd()) {
       rc = GetRandomBsd(p, n, GetRandomEntropy);
@@ -131,7 +131,7 @@ ssize_t __getrandom(void *p, size_t n, unsigned f) {
  *
  * This random number seed generator obtains information from:
  *
- * - RtlGenRandom() on Windows
+ * - ProcessPrng() on Windows
  * - getentropy() on XNU and OpenBSD
  * - getrandom() on Linux, FreeBSD, and NetBSD
  * - sysctl(KERN_ARND) on older versions of FreeBSD and NetBSD
@@ -190,7 +190,8 @@ ssize_t getrandom(void *p, size_t n, unsigned f) {
   return rc;
 }
 
-__attribute__((__constructor__)) static textstartup void getrandom_init(void) {
+__attribute__((__constructor__(30))) static textstartup void getrandom_init(
+    void) {
   int e, rc;
   if (IsWindows() || IsMetal()) return;
   BLOCK_CANCELATION;
diff --git a/libc/calls/metalfile.c b/libc/calls/metalfile.c
index 20d16e56f..0832d52f5 100644
--- a/libc/calls/metalfile.c
+++ b/libc/calls/metalfile.c
@@ -75,7 +75,9 @@ textstartup void InitializeMetalFile(void) {
     memcpy(copied_base, (void *)(BANE + IMAGE_BASE_PHYSICAL), size);
     __ape_com_base = copied_base;
     __ape_com_size = size;
-    KINFOF("%s @ %p,+%#zx", APE_COM_NAME, copied_base, size);
+    // TODO(tkchia): LIBC_CALLS doesn't depend on LIBC_VGA so references
+    //               to its functions need to be weak
+    // KINFOF("%s @ %p,+%#zx", APE_COM_NAME, copied_base, size);
   }
 }
 
diff --git a/libc/calls/program_invocation_short_name.c b/libc/calls/program_invocation_short_name.c
index 8299de395..396d55f47 100644
--- a/libc/calls/program_invocation_short_name.c
+++ b/libc/calls/program_invocation_short_name.c
@@ -21,7 +21,7 @@
 
 char *program_invocation_short_name;
 
-__attribute__((__constructor__)) static void  //
+__attribute__((__constructor__(10))) static textstartup void
 program_invocation_short_name_init(void) {
   char *p, *r;
   if (!__argc) return;
diff --git a/libc/calls/rdrand.c b/libc/calls/rdrand.c
index 7e5ebfa0c..c499c3752 100644
--- a/libc/calls/rdrand.c
+++ b/libc/calls/rdrand.c
@@ -52,7 +52,7 @@ static dontinline uint64_t rdrand_failover(void) {
  *
  * If RDRAND isn't available (we check CPUID and we also disable it
  * automatically for microarchitectures where it's slow or buggy) then
- * we try getrandom(), RtlGenRandom(), or sysctl(KERN_ARND). If those
+ * we try getrandom(), ProcessPrng(), or sysctl(KERN_ARND). If those
  * aren't available then we try /dev/urandom and if that fails, we try
  * getauxval(AT_RANDOM), and if not we finally use RDTSC and getpid().
  *
diff --git a/libc/calls/rdrand_init.c b/libc/calls/rdrand_init.c
index 9751016bf..a3b28ae07 100644
--- a/libc/calls/rdrand_init.c
+++ b/libc/calls/rdrand_init.c
@@ -22,8 +22,8 @@
 #include "libc/nexgen32e/x86info.h"
 #include "libc/stdio/rand.h"
 
-textstartup void rdrand_init(int argc, char **argv, char **envp,
-                             intptr_t *auxv) {
+__attribute__((__constructor__(2))) textstartup void rdrand_init(
+    int argc, char **argv, char **envp, intptr_t *auxv) {
   extern unsigned kMutableCpuids[KCPUIDS_LEN][4] asm("kCpuids");
   /*
    * Clear RDRAND on AMD models before Zen and then some
@@ -39,5 +39,3 @@ textstartup void rdrand_init(int argc, char **argv, char **envp,
     kMutableCpuids[KCPUIDS_7H][KCPUIDS_EBX] &= ~(1u << 18);
   }
 }
-
-const void *const g_rdrand_init[] initarray = {rdrand_init};
diff --git a/libc/calls/read-nt.c b/libc/calls/read-nt.c
index c48b9778c..540855355 100644
--- a/libc/calls/read-nt.c
+++ b/libc/calls/read-nt.c
@@ -157,6 +157,8 @@ static textwindows struct Keystroke *NewKeystroke(void) {
   struct Keystroke *k = KEYSTROKE_CONTAINER(e);
   dll_remove(&__keystroke.free, &k->elem);
   --__keystroke.freekeys;
+  // TODO(jart): What's wrong with GCC 12.3?
+  asm("" : "+r"(k));
   k->buflen = 0;
   return k;
 }
diff --git a/libc/calls/sched_getcpu.c b/libc/calls/sched_getcpu.c
index 761bba995..ac80be47a 100644
--- a/libc/calls/sched_getcpu.c
+++ b/libc/calls/sched_getcpu.c
@@ -19,15 +19,19 @@
 #include "libc/calls/calls.h"
 #include "libc/calls/struct/cpuset.h"
 #include "libc/dce.h"
+#include "libc/errno.h"
 #include "libc/nexgen32e/rdtscp.h"
 #include "libc/nexgen32e/x86feature.h"
 #include "libc/nt/struct/processornumber.h"
 #include "libc/nt/synchronization.h"
+#include "libc/runtime/syslib.internal.h"
+#include "libc/sysv/errfuns.h"
 
 int sys_getcpu(unsigned *opt_cpu, unsigned *opt_node, void *tcache);
 
 /**
  * Returns ID of CPU on which thread is currently scheduled.
+ * @return cpu number on success, or -1 w/ errno
  */
 int sched_getcpu(void) {
   if (X86_HAVE(RDTSCP)) {
@@ -38,6 +42,19 @@ int sched_getcpu(void) {
     struct NtProcessorNumber pn;
     GetCurrentProcessorNumberEx(&pn);
     return 64 * pn.Group + pn.Number;
+  } else if (IsXnuSilicon()) {
+    if (__syslib->__version >= 9) {
+      size_t cpu;
+      errno_t err = __syslib->__pthread_cpu_number_np(&cpu);
+      if (!err) {
+        return cpu;
+      } else {
+        errno = err;
+        return -1;
+      }
+    } else {
+      return enosys();
+    }
   } else {
     unsigned cpu = 0;
     int rc = sys_getcpu(&cpu, 0, 0);
diff --git a/libc/calls/sig.c b/libc/calls/sig.c
index 586e74f8a..cab11bec7 100644
--- a/libc/calls/sig.c
+++ b/libc/calls/sig.c
@@ -32,6 +32,7 @@
 #include "libc/intrin/bsf.h"
 #include "libc/intrin/describebacktrace.internal.h"
 #include "libc/intrin/dll.h"
+#include "libc/intrin/kprintf.h"
 #include "libc/intrin/strace.internal.h"
 #include "libc/intrin/weaken.h"
 #include "libc/nt/console.h"
@@ -441,8 +442,7 @@ textwindows void __sig_generate(int sig, int sic) {
     // to unblock our sig once the wait operation is completed; when
     // that's the case we can cancel the thread's i/o to deliver sig
     if (atomic_load_explicit(&pt->pt_blocker, memory_order_acquire) &&
-        !(atomic_load_explicit(&pt->pt_blkmask, memory_order_relaxed) &
-          (1ull << (sig - 1)))) {
+        !(pt->pt_blkmask & (1ull << (sig - 1)))) {
       _pthread_ref(pt);
       mark = pt;
       break;
@@ -518,6 +518,15 @@ static int __sig_crash_sig(struct NtExceptionPointers *ep, int *code) {
   }
 }
 
+static char *__sig_stpcpy(char *d, const char *s) {
+  size_t i;
+  for (i = 0;; ++i) {
+    if (!(d[i] = s[i])) {
+      return d + i;
+    }
+  }
+}
+
 static void __sig_unmaskable(struct NtExceptionPointers *ep, int code, int sig,
                              struct CosmoTib *tib) {
 
@@ -541,9 +550,10 @@ static void __sig_unmaskable(struct NtExceptionPointers *ep, int code, int sig,
     intptr_t hStderr;
     char sigbuf[21], s[128], *p;
     hStderr = GetStdHandle(kNtStdErrorHandle);
-    p = stpcpy(s, "Terminating on uncaught ");
-    p = stpcpy(p, strsignal_r(sig, sigbuf));
-    p = stpcpy(p, ". Pass --strace and/or ShowCrashReports() for details.\n");
+    p = __sig_stpcpy(s, "Terminating on uncaught ");
+    p = __sig_stpcpy(p, strsignal_r(sig, sigbuf));
+    p = __sig_stpcpy(
+        p, ". Pass --strace and/or ShowCrashReports() for details.\n");
     WriteFile(hStderr, s, p - s, 0, 0);
 #endif
     __sig_terminate(sig);
@@ -657,12 +667,10 @@ textwindows int __sig_check(void) {
   }
 }
 
-textstartup void __sig_init(void) {
+__attribute__((__constructor__(10))) textstartup void __sig_init(void) {
   if (!IsWindows()) return;
   AddVectoredExceptionHandler(true, (void *)__sig_crash);
   SetConsoleCtrlHandler((void *)__sig_console, true);
 }
 
-const void *const __sig_ctor[] initarray = {__sig_init};
-
 #endif /* __x86_64__ */
diff --git a/libc/calls/struct/timespec.h b/libc/calls/struct/timespec.h
index b49eadb60..7dbcb5b28 100644
--- a/libc/calls/struct/timespec.h
+++ b/libc/calls/struct/timespec.h
@@ -19,8 +19,8 @@ int timespec_getres(struct timespec *, int) libcesque;
 int timespec_get(struct timespec *, int) libcesque;
 
 #ifdef _COSMO_SOURCE
-/* cosmopolitan libc's non-posix timespec library
-   removed by default due to emacs codebase clash */
+int sys_clock_nanosleep(int, int, const struct timespec *, struct timespec *);
+int cosmo_clock_nanosleep(int, int, const struct timespec *, struct timespec *);
 #define timespec_zero ((struct timespec){0})
 #define timespec_max  ((struct timespec){0x7fffffffffffffff, 999999999})
 libcesque int timespec_cmp(struct timespec, struct timespec) pureconst;
diff --git a/libc/calls/syscall_support-nt.internal.h b/libc/calls/syscall_support-nt.internal.h
index 7d7bab43c..a002ef9e3 100644
--- a/libc/calls/syscall_support-nt.internal.h
+++ b/libc/calls/syscall_support-nt.internal.h
@@ -18,7 +18,7 @@ int sys_fcntl_nt_setfl(int, unsigned);
 int sys_pause_nt(void);
 int64_t __fix_enotdir(int64_t, char16_t *);
 int64_t __fix_enotdir3(int64_t, char16_t *, char16_t *);
-int64_t __winerr(void) nocallback privileged;
+int64_t __winerr(void) dontcallback privileged;
 int64_t ntreturn(uint32_t);
 void *GetProcAddressModule(const char *, const char *);
 void WinMainForked(void);
diff --git a/libc/calls/ucontext.h b/libc/calls/ucontext.h
index 4c755f3b0..a869ab0ad 100644
--- a/libc/calls/ucontext.h
+++ b/libc/calls/ucontext.h
@@ -130,7 +130,7 @@ typedef struct ucontext ucontext_t;
 int getcontext(ucontext_t *) dontthrow;
 int setcontext(const ucontext_t *) dontthrow;
 int swapcontext(ucontext_t *, const ucontext_t *) dontthrow returnstwice;
-void makecontext(ucontext_t *, void (*)(), int, ...) dontthrow nocallback;
+void makecontext(ucontext_t *, void *, int, ...) dontthrow dontcallback;
 void __sig_restore(const ucontext_t *) wontreturn;
 
 COSMOPOLITAN_C_END_
diff --git a/libc/calls/uname.c b/libc/calls/uname.c
index e7d5905bb..2f03c15b4 100644
--- a/libc/calls/uname.c
+++ b/libc/calls/uname.c
@@ -82,15 +82,27 @@ static textwindows void GetNtName(char *name, int kind) {
 }
 
 static inline textwindows int GetNtMajorVersion(void) {
+#ifdef __x86_64__
   return NtGetPeb()->OSMajorVersion;
+#else
+  return 0;
+#endif
 }
 
 static inline textwindows int GetNtMinorVersion(void) {
+#ifdef __x86_64__
   return NtGetPeb()->OSMinorVersion;
+#else
+  return 0;
+#endif
 }
 
 static inline textwindows int GetNtBuildNumber(void) {
+#ifdef __x86_64__
   return NtGetPeb()->OSBuildNumber;
+#else
+  return 0;
+#endif
 }
 
 static textwindows void GetNtVersion(char *p) {
diff --git a/libc/calls/unveil.c b/libc/calls/unveil.c
index 5eae25662..d47c84889 100644
--- a/libc/calls/unveil.c
+++ b/libc/calls/unveil.c
@@ -100,7 +100,7 @@ static const struct sock_filter kUnveilBlacklistLatestAbi[] = {
 static int landlock_abi_version;
 static int landlock_abi_errno;
 
-__attribute__((__constructor__)) void init_landlock_version() {
+__attribute__((__constructor__(40))) textstartup void init_landlock_version() {
   int e = errno;
   landlock_abi_version =
       landlock_create_ruleset(0, 0, LANDLOCK_CREATE_RULESET_VERSION);
diff --git a/libc/cosmo.h b/libc/cosmo.h
index 35c1a47eb..c84b731eb 100644
--- a/libc/cosmo.h
+++ b/libc/cosmo.h
@@ -4,6 +4,7 @@ COSMOPOLITAN_C_START_
 
 errno_t cosmo_once(_Atomic(uint32_t) *, void (*)(void));
 int systemvpe(const char *, char *const[], char *const[]) libcesque;
+char *GetProgramExecutableName(void);
 
 COSMOPOLITAN_C_END_
 #endif /* COSMOPOLITAN_LIBC_COSMO_H_ */
diff --git a/libc/crt/crt.S b/libc/crt/crt.S
index f55042d09..69c38493e 100644
--- a/libc/crt/crt.S
+++ b/libc/crt/crt.S
@@ -18,7 +18,6 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/dce.h"
 #include "libc/macros.internal.h"
-#include "libc/notice.inc"
 .section .start,"ax",@progbits
 
 #if SupportsXnu() && defined(__x86_64__)
diff --git a/libc/dce.h b/libc/dce.h
index 5b9f7e0fb..f67cb0028 100644
--- a/libc/dce.h
+++ b/libc/dce.h
@@ -1,6 +1,6 @@
+#ifdef _COSMO_SOURCE
 #ifndef COSMOPOLITAN_LIBC_DCE_H_
 #define COSMOPOLITAN_LIBC_DCE_H_
-#ifdef _COSMO_SOURCE
 /*─────────────────────────────────────────────────────────────────────────────╗
 │ cosmopolitan § autotune » dead code elimination                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
@@ -125,5 +125,5 @@ int IsQemuUser(void);
 
 COSMOPOLITAN_C_END_
 #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
-#endif /* _COSMO_SOURCE */
 #endif /* COSMOPOLITAN_LIBC_DCE_H_ */
+#endif /* _COSMO_SOURCE */
diff --git a/libc/disclaimer.inc b/libc/disclaimer.inc
deleted file mode 100644
index e69de29bb..000000000
diff --git a/libc/dlopen/dlfcn.h b/libc/dlopen/dlfcn.h
index 57eecd2f8..0d879dc3e 100644
--- a/libc/dlopen/dlfcn.h
+++ b/libc/dlopen/dlfcn.h
@@ -16,13 +16,11 @@ void *dlopen(const char *, int) libcesque;
 void *dlsym(void *, const char *) libcesque;
 int dlclose(void *) libcesque;
 
-#ifdef _COSMO_SOURCE
 char *cosmo_dlerror(void) libcesque;
 void *cosmo_dlopen(const char *, int) libcesque;
 void *cosmo_dlsym(void *, const char *) libcesque;
 void *cosmo_dltramp(void *) libcesque;
 int cosmo_dlclose(void *) libcesque;
-#endif
 
 COSMOPOLITAN_C_END_
 #endif /* COSMOPOLITAN_LIBC_DLFCN_H_ */
diff --git a/libc/dlopen/dlopen.c b/libc/dlopen/dlopen.c
index 9dcda52f6..ae5af3e40 100644
--- a/libc/dlopen/dlopen.c
+++ b/libc/dlopen/dlopen.c
@@ -920,17 +920,3 @@ char *cosmo_dlerror(void) {
   STRACE("dlerror() → %#s", res);
   return res;
 }
-
-#ifdef __x86_64__
-static textstartup void dlopen_init() {
-  if (IsLinux() || IsFreebsd()) {
-    // switch from %fs to %gs for tls
-    struct CosmoTib *tib = __get_tls();
-    __morph_tls();
-    __set_tls(tib);
-  }
-}
-const void *const dlopen_ctor[] initarray = {
-    dlopen_init,
-};
-#endif
diff --git a/libc/dlopen/foreign_tramp.S b/libc/dlopen/foreign_tramp.S
index f44c2edbe..dbd036306 100644
--- a/libc/dlopen/foreign_tramp.S
+++ b/libc/dlopen/foreign_tramp.S
@@ -55,7 +55,7 @@ foreign_tramp:
 	mov	%rax,-0xc0(%rbp)
 
 //	switch to foreign tls
-	mov	%fs:0,%rax
+	mov	%gs:0x30,%rax
 	mov	%rax,-0xc8(%rbp)
 	mov	__foreign+8(%rip),%rdi
 	call	__set_tls
diff --git a/libc/errno.h b/libc/errno.h
index 01dc1bde4..ed0b39523 100644
--- a/libc/errno.h
+++ b/libc/errno.h
@@ -22,16 +22,15 @@ COSMOPOLITAN_C_START_
  * @see libc/sysv/dos2errno.sh for multimapped numbers
  */
 
-#if defined(__GNUC__) && defined(__aarch64__) && !defined(__STRICT_ANSI__) && \
-    !defined(__cplusplus)
+#if defined(__GNUC__) && defined(__aarch64__) && !defined(__cplusplus)
 /* this header is included by 700+ files; therefore we */
 /* hand-roll &__get_tls()->tib_errno to avoid #include */
 /* cosmopolitan uses x28 as the tls register b/c apple */
-#define errno                                  \
-  (*({                                         \
-    errno_t *__ep;                             \
-    asm("sub\t%0,x28,#192-0x3c" : "=r"(__ep)); \
-    __ep;                                      \
+#define errno                                      \
+  (*__extension__({                                \
+    errno_t *__ep;                                 \
+    __asm__("sub\t%0,x28,#192-0x3c" : "=r"(__ep)); \
+    __ep;                                          \
   }))
 #else
 #define errno (*__errno_location())
diff --git a/libc/fmt/atoi.c b/libc/fmt/atoi.c
index 5aefb9f94..9030bf468 100644
--- a/libc/fmt/atoi.c
+++ b/libc/fmt/atoi.c
@@ -16,44 +16,28 @@
 │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/errno.h"
 #include "libc/fmt/conv.h"
-#include "libc/limits.h"
-#include "libc/stdckdint.h"
 #include "libc/str/str.h"
 
 /**
- * Decodes decimal integer from ASCII string.
+ * Turns string into int.
  *
- *     atoi 10⁸              22𝑐         7𝑛𝑠
- *     strtol 10⁸            37𝑐        12𝑛𝑠
- *     strtoul 10⁸           35𝑐        11𝑛𝑠
- *     wcstol 10⁸            30𝑐        10𝑛𝑠
- *     wcstoul 10⁸           30𝑐        10𝑛𝑠
- *     strtoimax 10⁸         80𝑐        26𝑛𝑠
- *     strtoumax 10⁸         78𝑐        25𝑛𝑠
- *     wcstoimax 10⁸         77𝑐        25𝑛𝑠
- *     wcstoumax 10⁸         76𝑐        25𝑛𝑠
+ * Decimal is the only radix supported. Leading whitespace (as specified
+ * by the isspace() function) is skipped over. Unlike strtol(), the atoi
+ * function has undefined behavior on error and it never changes `errno`
  *
- * @param s is a non-null nul-terminated string
+ * @param nptr is a non-null nul-terminated string
  * @return the decoded signed saturated integer
- * @raise ERANGE on overflow
  */
-int atoi(const char *s) {
+int atoi(const char *nptr) {
   int x, c, d;
-  do c = *s++;
-  while (c == ' ' || c == '\t');
+  do c = *nptr++;
+  while (isspace(c));
   d = c == '-' ? -1 : 1;
-  if (c == '-' || c == '+') c = *s++;
-  for (x = 0; isdigit(c); c = *s++) {
-    if (ckd_mul(&x, x, 10) || ckd_add(&x, x, (c - '0') * d)) {
-      errno = ERANGE;
-      if (d > 0) {
-        return INT_MAX;
-      } else {
-        return INT_MIN;
-      }
-    }
+  if (c == '-' || c == '+') c = *nptr++;
+  for (x = 0; isdigit(c); c = *nptr++) {
+    x *= 10;
+    x += (c - '0') * d;
   }
   return x;
 }
diff --git a/libc/fmt/atol.c b/libc/fmt/atol.c
index bbb4291da..8102c3228 100644
--- a/libc/fmt/atol.c
+++ b/libc/fmt/atol.c
@@ -16,34 +16,29 @@
 │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/errno.h"
 #include "libc/fmt/conv.h"
-#include "libc/limits.h"
-#include "libc/stdckdint.h"
 #include "libc/str/str.h"
 
 /**
- * Decodes decimal integer from ASCII string.
+ * Turns string into long.
  *
- * @param s is a non-null nul-terminated string
+ * Decimal is the only radix supported. Leading whitespace (as specified
+ * by the isspace() function) is skipped over. Unlike strtol(), the atoi
+ * function has undefined behavior on error and it never changes `errno`
+ *
+ * @param nptr is a non-null nul-terminated string
  * @return the decoded signed saturated integer
  */
-long atol(const char *s) {
+long atol(const char *nptr) {
   long x;
   int c, d;
-  do c = *s++;
-  while (c == ' ' || c == '\t');
+  do c = *nptr++;
+  while (isspace(c));
   d = c == '-' ? -1 : 1;
-  if (c == '-' || c == '+') c = *s++;
-  for (x = 0; isdigit(c); c = *s++) {
-    if (ckd_mul(&x, x, 10) || ckd_add(&x, x, (c - '0') * d)) {
-      errno = ERANGE;
-      if (d > 0) {
-        return LONG_MAX;
-      } else {
-        return LONG_MIN;
-      }
-    }
+  if (c == '-' || c == '+') c = *nptr++;
+  for (x = 0; isdigit(c); c = *nptr++) {
+    x *= 10;
+    x += (c - '0') * d;
   }
   return x;
 }
diff --git a/libc/fmt/divmod10.internal.h b/libc/fmt/divmod10.internal.h
index a679636b7..dbf67e567 100644
--- a/libc/fmt/divmod10.internal.h
+++ b/libc/fmt/divmod10.internal.h
@@ -2,8 +2,7 @@
 #define COSMOPOLITAN_LIBC_FMT_DIVMOD10_H_
 
 forceinline uint64_t __divmod10(uint64_t __x, unsigned *__r) {
-#if defined(__STRICT_ANSI__) || !defined(__GNUC__) || \
-    (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__))
+#if !defined(__GNUC__) || (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__))
   *__r = __x % 10;
   return __x / 10;
 #else
diff --git a/libc/fmt/itoa.h b/libc/fmt/itoa.h
index 0ed808f07..258c90e0b 100644
--- a/libc/fmt/itoa.h
+++ b/libc/fmt/itoa.h
@@ -1,6 +1,6 @@
+#ifdef _COSMO_SOURCE
 #ifndef COSMOPOLITAN_LIBC_FMT_ITOA_H_
 #define COSMOPOLITAN_LIBC_FMT_ITOA_H_
-#ifdef _COSMO_SOURCE
 COSMOPOLITAN_C_START_
 
 #define LengthInt64           __LengthInt64
@@ -42,5 +42,5 @@ libcesque size_t uint64toarray_fixed16(uint64_t, char[hasatleast 17], uint8_t);
 libcesque size_t uint64toarray_radix8(uint64_t, char[hasatleast 24]);
 
 COSMOPOLITAN_C_END_
-#endif /* _COSMO_SOURCE */
 #endif /* COSMOPOLITAN_LIBC_FMT_ITOA_H_ */
+#endif /* _COSMO_SOURCE */
diff --git a/libc/fmt/leb128.h b/libc/fmt/leb128.h
index 8da59b5d3..beef341f9 100644
--- a/libc/fmt/leb128.h
+++ b/libc/fmt/leb128.h
@@ -1,6 +1,6 @@
+#ifdef _COSMO_SOURCE
 #ifndef COSMOPOLITAN_LIBC_FMT_LEB128_H_
 #define COSMOPOLITAN_LIBC_FMT_LEB128_H_
-#ifdef _COSMO_SOURCE
 COSMOPOLITAN_C_START_
 
 #define sleb64   __sleb64
@@ -16,5 +16,5 @@ int unzleb64(const char *, size_t, int64_t *) libcesque;
 int unuleb64(const char *, size_t, uint64_t *) libcesque;
 
 COSMOPOLITAN_C_END_
-#endif /* _COSMO_SOURCE */
 #endif /* COSMOPOLITAN_LIBC_FMT_LEB128_H_ */
+#endif /* _COSMO_SOURCE */
diff --git a/libc/integral/c.inc b/libc/integral/c.inc
index 11f6a781d..d393ed6f1 100644
--- a/libc/integral/c.inc
+++ b/libc/integral/c.inc
@@ -16,8 +16,7 @@
 
 #if !defined(__GNUC__) && __cplusplus + 0 >= 201103L
 #define typeof(x) decltype(x)
-#elif (defined(__STRICT_ANSI__) || !defined(__GNUC__)) && \
-    __STDC_VERSION__ + 0 < 201112
+#elif !defined(__GNUC__) && __STDC_VERSION__ + 0 < 201112
 #define typeof(x) __typeof(x)
 #endif
 
@@ -27,7 +26,7 @@
 #endif /* C++11 */
 #else  /* __cplusplus */
 #if __STDC_VERSION__ + 0 < 201112
-#if __GNUC__ + _MSC_VER + 0 && !defined(__STRICT_ANSI__)
+#if __GNUC__ + _MSC_VER + 0
 #define _Alignof(x) __alignof(x)
 #else
 #define _Alignof(x) /* basically all it ever did lool */ sizeof(x)
@@ -36,7 +35,7 @@
 #endif /* __cplusplus */
 
 #if !defined(__cplusplus) && !defined(inline) && __STDC_VERSION__ + 0 < 199901
-#if !defined(__STRICT_ANSI__) && (defined(__GNUC__) || defined(_MSC_VER))
+#if defined(__GNUC__) || defined(_MSC_VER)
 #define inline __inline
 #else
 #define inline
@@ -49,7 +48,7 @@
 #endif
 
 #if __STDC_VERSION__ + 0 < 201112
-#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+#ifdef __GNUC__
 #define _Alignas(x) __attribute__((__aligned__(x)))
 #elif defined(_MSC_VER)
 #define _Alignas(x) __declspec(align(x))
@@ -58,17 +57,15 @@
 
 #ifdef _MSC_VER
 #define __builtin_unreachable() __assume(false)
-#elif defined(__STRICT_ANSI__) ||                           \
-    !((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 405 || \
-      defined(__clang__) || defined(__INTEL_COMPILER) ||    \
-      __has_builtin(__builtin_unreachable))
+#elif !((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 405 || \
+        defined(__clang__) || defined(__INTEL_COMPILER) ||    \
+        __has_builtin(__builtin_unreachable))
 #define __builtin_unreachable() \
   for (;;) {                    \
   }
 #endif
 
-#if defined(__STRICT_ANSI__) || \
-    (!defined(__llvm__) && !__has_builtin(__builtin_assume))
+#if (!defined(__llvm__) && !__has_builtin(__builtin_assume))
 #define __builtin_assume(x)            \
   do {                                 \
     if (!(x)) __builtin_unreachable(); \
@@ -134,9 +131,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
 typedef long double max_align_t;
 
 #ifdef _COSMO_SOURCE
-#if ((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 406 || \
-     defined(__llvm__)) &&                                 \
-    !defined(__STRICT_ANSI__)
+#if (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 406 || defined(__llvm__)
 typedef signed __int128 int128_t;
 typedef unsigned __int128 uint128_t;
 #endif
@@ -159,7 +154,7 @@ typedef struct {
 #include "libc/integral/lp64arg.inc"
 #endif
 
-#define libcesque   dontthrow nocallback
+#define libcesque   dontthrow dontcallback
 #define memcpyesque libcesque
 #define strlenesque libcesque nosideeffect paramsnonnull()
 #define vallocesque \
@@ -169,11 +164,7 @@ typedef struct {
 #define interruptfn  nocallersavedregisters forcealignargpointer
 
 #ifndef pureconst
-#ifndef __STRICT_ANSI__
 #define pureconst __attribute__((__const__))
-#else
-#define pureconst
-#endif
 #endif
 
 #ifndef forcealign
@@ -181,15 +172,9 @@ typedef struct {
 #endif
 #define thatispacked __attribute__((__packed__))
 
-#ifndef __STRICT_ANSI__
 #define printfesque(n)   __attribute__((__format__(__gnu_printf__, n, n + 1)))
 #define scanfesque(n)    __attribute__((__format__(__gnu_scanf__, n, n + 1)))
 #define strftimeesque(n) __attribute__((__format__(__strftime__, n, 0)))
-#else
-#define printfesque(n)
-#define scanfesque(n)
-#define strftimeesque(n)
-#endif
 
 #ifndef privileged
 #define privileged \
@@ -197,8 +182,7 @@ typedef struct {
 #endif
 
 #ifndef wontreturn
-#if !defined(__STRICT_ANSI__) &&      \
-    (__has_attribute(__noreturn__) || \
+#if (__has_attribute(__noreturn__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 208)
 #define wontreturn __attribute__((__noreturn__))
 #else
@@ -207,8 +191,7 @@ typedef struct {
 #endif
 
 #ifndef nosideeffect
-#if !defined(__STRICT_ANSI__) &&  \
-    (__has_attribute(__pure__) || \
+#if (__has_attribute(__pure__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 296)
 #define nosideeffect __attribute__((__pure__))
 #else
@@ -219,9 +202,8 @@ typedef struct {
 #ifndef dontinline
 #ifdef _MSC_VER
 #define dontinline __declspec(noinline)
-#elif !defined(__STRICT_ANSI__) &&    \
-    (__has_attribute(__noinline__) || \
-     (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 301)
+#elif (__has_attribute(__noinline__) || \
+       (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 301)
 #define dontinline __attribute__((__noinline__))
 #else
 #define dontinline
@@ -229,8 +211,7 @@ typedef struct {
 #endif
 
 #ifndef dontclone
-#if !defined(__STRICT_ANSI__) &&     \
-    (__has_attribute(__noclone__) || \
+#if (__has_attribute(__noclone__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 405)
 #define dontclone __attribute__((__noclone__))
 #else
@@ -242,8 +223,7 @@ typedef struct {
 #ifdef __cplusplus
 #define forceinline inline
 #else
-#if !defined(__STRICT_ANSI__) && \
-    (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 302
+#if (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 302
 #if (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 403 || \
     !defined(__cplusplus) ||                              \
     (defined(__clang__) &&                                \
@@ -267,8 +247,7 @@ typedef struct {
 #endif /* forceinline */
 
 #ifndef __wur
-#if !defined(__STRICT_ANSI__) &&                           \
-    ((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 304 || \
+#if ((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 304 || \
      __has_attribute(__warn_unused_result__))
 #define __wur __attribute__((__warn_unused_result__))
 #else
@@ -277,8 +256,7 @@ typedef struct {
 #endif
 
 #ifndef nullterminated
-#if !defined(__STRICT_ANSI__) && \
-    (__has_attribute(__sentinel__) || __GNUC__ + 0 >= 4)
+#if __has_attribute(__sentinel__) || __GNUC__ + 0 >= 4
 #define nullterminated(x) __attribute__((__sentinel__ x))
 #else
 #define nullterminated(x)
@@ -295,8 +273,7 @@ typedef struct {
 #endif
 
 #ifndef externinline
-#if !defined(__STRICT_ANSI__) &&                           \
-    (!defined(__cplusplus) ||                              \
+#if (!defined(__cplusplus) ||                              \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 403 || \
      (defined(__clang__) &&                                \
       (defined(__GNUC_STDC_INLINE__) || defined(__GNUC_GNU_INLINE__))))
@@ -311,8 +288,7 @@ typedef struct {
 #endif
 
 #ifndef relegated
-#if !defined(__STRICT_ANSI__) &&  \
-    (__has_attribute(__cold__) || \
+#if (__has_attribute(__cold__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 403)
 #define relegated __attribute__((__cold__))
 #else
@@ -320,8 +296,7 @@ typedef struct {
 #endif
 #endif
 
-#if !defined(__STRICT_ANSI__) &&     \
-    (__has_attribute(__warning__) || \
+#if (__has_attribute(__warning__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 403)
 #define warnifused(s) __attribute__((__warning__(s)))
 #else
@@ -329,8 +304,7 @@ typedef struct {
 #endif
 
 #ifndef firstclass
-#if !defined(__STRICT_ANSI__) && \
-    (__has_attribute(__hot__) || \
+#if (__has_attribute(__hot__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 403)
 #define firstclass __attribute__((__hot__))
 #else
@@ -339,8 +313,7 @@ typedef struct {
 #endif
 
 #ifndef paramsnonnull
-#if !defined(__STRICT_ANSI__) &&     \
-    (__has_attribute(__nonnull__) || \
+#if (__has_attribute(__nonnull__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 403)
 #define paramsnonnull(opt_1idxs) __attribute__((__nonnull__ opt_1idxs))
 #else
@@ -355,7 +328,7 @@ typedef struct {
 #endif
 
 #if __STDC_VERSION__ + 0 < 199901L && !defined(restrict)
-#if !defined(__STRICT_ANSI__) && !defined(__cplusplus) && \
+#if !defined(__cplusplus) && \
     ((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 301 || defined(_MSC_VER))
 #define restrict __restrict__
 #else
@@ -364,20 +337,19 @@ typedef struct {
 #endif
 #endif
 
-#ifndef nocallback
-#if !defined(__STRICT_ANSI__) &&  \
-    (__has_attribute(__leaf__) || \
+#ifndef dontcallback
+#if (__has_attribute(__leaf__) || \
      (!defined(__llvm__) &&       \
       (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 406))
-#define nocallback __attribute__((__leaf__))
+#define dontcallback __attribute__((__leaf__))
 #else
-#define nocallback
+#define dontcallback
 #endif
 #endif
 
 #ifndef dontthrow
-#if defined(__cplusplus) && !defined(__STRICT_ANSI__) && \
-    (__has_attribute(dontthrow) ||                       \
+#if defined(__cplusplus) &&        \
+    (__has_attribute(dontthrow) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 303)
 #define dontthrow __attribute__((__nothrow__))
 #elif defined(_MSC_VER)
@@ -388,8 +360,7 @@ typedef struct {
 #endif
 
 #ifndef returnstwice
-#if !defined(__STRICT_ANSI__) &&           \
-    (__has_attribute(__returns_twice__) || \
+#if (__has_attribute(__returns_twice__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 402)
 #define returnstwice __attribute__((__returns_twice__))
 #else
@@ -398,8 +369,7 @@ typedef struct {
 #endif
 
 #ifndef nodebuginfo
-#if !defined(__STRICT_ANSI__) && \
-    (__has_attribute(__nodebug__) || defined(__llvm__))
+#if __has_attribute(__nodebug__) || defined(__llvm__)
 #define nodebuginfo __attribute__((__nodebug__))
 #else
 #define nodebuginfo
@@ -414,8 +384,7 @@ typedef struct {
 #endif
 
 #ifndef returnsnonnull
-#if !defined(__STRICT_ANSI__) &&             \
-    (__has_attribute(__returns_nonnull__) || \
+#if (__has_attribute(__returns_nonnull__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 409)
 #define returnsnonnull __attribute__((__returns_nonnull__))
 #else
@@ -423,8 +392,7 @@ typedef struct {
 #endif
 #endif
 
-#if !defined(__STRICT_ANSI__) &&            \
-    (__has_attribute(__assume_aligned__) || \
+#if (__has_attribute(__assume_aligned__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 409)
 #define returnsaligned(x) __attribute__((__assume_aligned__ x))
 #else
@@ -432,8 +400,7 @@ typedef struct {
 #endif
 
 #ifndef returnspointerwithnoaliases
-#if !defined(__STRICT_ANSI__) &&    \
-    (__has_attribute(__malloc__) || \
+#if (__has_attribute(__malloc__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 409)
 #define returnspointerwithnoaliases __attribute__((__malloc__))
 #elif defined(_MSC_VER)
@@ -444,8 +411,7 @@ typedef struct {
 #endif
 
 #ifndef attributeallocsize
-#if !defined(__STRICT_ANSI__) &&        \
-    (__has_attribute(__alloc_size__) || \
+#if (__has_attribute(__alloc_size__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 409)
 #define attributeallocsize(x) __attribute__((__alloc_size__ x))
 #else
@@ -454,8 +420,7 @@ typedef struct {
 #endif
 
 #ifndef attributeallocalign
-#if !defined(__STRICT_ANSI__) &&         \
-    (__has_attribute(__alloc_align__) || \
+#if (__has_attribute(__alloc_align__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 409)
 #define attributeallocalign(x) __attribute__((__alloc_align__ x))
 #else
@@ -478,8 +443,7 @@ typedef struct {
 #ifdef _COSMO_SOURCE
 
 #ifndef dontinstrument
-#if !defined(__STRICT_ANSI__) &&                    \
-    (__has_attribute(__no_instrument_function__) || \
+#if (__has_attribute(__no_instrument_function__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 204)
 #if ((__GNUC__ + 0) >= 7 && !defined(__chibicc__)) || \
     __has_attribute(__patchable_function_entry__)
@@ -495,8 +459,7 @@ typedef struct {
 #endif
 
 #ifndef mayalias
-#if !defined(__STRICT_ANSI__) &&       \
-    (__has_attribute(__may_alias__) || \
+#if (__has_attribute(__may_alias__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 303)
 #define mayalias __attribute__((__may_alias__))
 #else
@@ -505,35 +468,26 @@ typedef struct {
 #endif
 
 #ifndef dontoptimize
-#ifndef __STRICT_ANSI__
 #if defined(__llvm__) || __has_attribute(__optnone__)
 #define dontoptimize __attribute__((__optnone__))
 #elif (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 407 || \
     __has_attribute(__optimize__)
 #define dontoptimize __attribute__((__optimize__(0)))
 #endif
-#else
-#define dontoptimize
-#endif
 #endif
 
 #ifndef optimizesize
-#ifndef __STRICT_ANSI__
 #if (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 407 || \
     __has_attribute(__optimize__)
 #define optimizesize __attribute__((__optimize__("s")))
 #elif defined(__llvm__) || __has_attribute(__optnone__)
 #define optimizesize __attribute__((__optnone__))
 #endif
-#else
-#define optimizesize
-#endif
 #endif
 
 #ifndef optimizespeed
 /* warning: corrupts frame pointer; only use on leaf functions */
-#if !defined(__STRICT_ANSI__) &&                           \
-    ((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 407 || \
+#if ((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 407 || \
      __has_attribute(__optimize__))
 #define optimizespeed __attribute__((__optimize__(3)))
 #else
@@ -542,8 +496,7 @@ typedef struct {
 #endif
 
 #ifndef unrollloops
-#if !defined(__STRICT_ANSI__) &&                           \
-    ((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 407 || \
+#if ((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 407 || \
      __has_attribute(__optimize__))
 #define unrollloops __attribute__((__optimize__("unroll-loops")))
 #else
@@ -552,8 +505,7 @@ typedef struct {
 #endif
 
 #ifndef _Microarchitecture
-#if !defined(__STRICT_ANSI__) &&    \
-    (__has_attribute(__target__) || \
+#if (__has_attribute(__target__) || \
      (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 404)
 #define _Microarchitecture(march) __attribute__((__target__(march)))
 #else
@@ -572,64 +524,48 @@ typedef struct {
 #endif
 
 #if ((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 408 || \
-     __has_attribute(__no_sanitize_address__)) &&          \
-    !defined(__STRICT_ANSI__)
+     __has_attribute(__no_sanitize_address__))
 #define dontasan __attribute__((__no_sanitize_address__))
 #else
 #define dontasan
 #endif
 
 #if ((__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 408 || \
-     __has_attribute(__no_sanitize_undefined__)) &&        \
-    !defined(__STRICT_ANSI__)
+     __has_attribute(__no_sanitize_undefined__))
 #define dontubsan __attribute__((__no_sanitize_undefined__))
 #else
 #define dontubsan
 #endif
 
-#ifdef __STRICT_ANSI__
-void abort(void) wontreturn;
-#define notpossible abort()
-#else
 #ifdef __x86_64__
 #define notpossible          \
   do {                       \
-    asm("nop\n\t"            \
-        "ud2\n\t"            \
-        "nop");              \
+    __asm__("nop\n\t"        \
+            "ud2\n\t"        \
+            "nop");          \
     __builtin_unreachable(); \
   } while (0)
 #elif defined(__aarch64__)
 #define notpossible          \
   do {                       \
-    asm("udf\t#0\n\t"        \
-        "nop");              \
+    __asm__("udf\t#0\n\t"    \
+            "nop");          \
     __builtin_unreachable(); \
   } while (0)
 #else
 #define notpossible __builtin_trap()
 #endif
-#endif
 
 #define donothing \
   do {            \
   } while (0)
 
-#ifndef __STRICT_ANSI__
 #define textstartup _Section(".text.startup")
 #define textexit    _Section(".text.exit")
 #define textreal    _Section(".text.real")
 #define texthead    _Section(".text.head")
 #define textwindows _Section(".text.windows")
 #define antiquity   _Section(".text.antiquity")
-#else
-#define textstartup
-#define textexit
-#define textreal
-#define texthead
-#define textwindows
-#define antiquity
-#endif
 
 #ifdef __llvm__
 #define __builtin_ia32_movntdq(x, y) (*(x) = (y))
@@ -639,81 +575,71 @@ void abort(void) wontreturn;
 #define _Section(s) __attribute__((__section__(s)))
 #endif
 
-#if defined(__x86_64__) && !defined(__llvm__)
-#define initarray _Section(".init_array,\"a\",@init_array #")
+#ifndef __llvm__
+#pragma GCC diagnostic ignored "-Wformat=0" /* todo: patch gcc */
+#pragma GCC diagnostic ignored "-Wbuiltin-declaration-mismatch"
+#pragma GCC diagnostic warning "-Wunknown-pragmas"
 #else
-#define initarray _Section(".init_array")
+#pragma GCC diagnostic ignored "-Wformat"
+#pragma GCC diagnostic ignored "-Wconstant-logical-operand" /* what */
+#pragma GCC diagnostic ignored "-Wunknown-warning-option"
+#pragma GCC diagnostic ignored "-Wunknown-pragmas"
+#pragma GCC diagnostic ignored "-Wstring-plus-int"       /* features 4 losers */
+#pragma GCC diagnostic ignored "-Wkeyword-compat"        /* c++ upgrade */
+#pragma GCC diagnostic ignored "-Wuser-defined-literals" /* reserved for me */
 #endif
 
-#pragma GCC diagnostic ignored "-Wformat"                /* todo: patch gcc */
 #pragma GCC diagnostic ignored "-Wformat-extra-args"     /* todo: patch gcc */
 #pragma GCC diagnostic ignored "-Wunused-function"       /* contradicts dce */
 #pragma GCC diagnostic ignored "-Wunused-const-variable" /* sooo ridiculous */
-#pragma GCC diagnostic ignored "-Wbuiltin-declaration-mismatch"
 #ifndef __cplusplus
 #pragma GCC diagnostic ignored "-Wold-style-definition" /* orwellian bullsh */
 #endif
 
-#ifndef __STRICT_ANSI__
 #ifdef __x86_64__
-#define DebugBreak() asm("int3")
+#define DebugBreak() __asm__("int3")
 #else
 #define DebugBreak() __builtin_trap()
 #endif
-#else
-#define DebugBreak() (void)0
-#endif
 
 #endif /* _COSMO_SOURCE */
 
-#ifndef __STRICT_ANSI__
-#define __veil(CONSTRAINT, EXPRESSION)                           \
-  ({                                                             \
-    autotype(EXPRESSION) VeiledValue = (EXPRESSION);             \
-    asm("" : "=" CONSTRAINT ""(VeiledValue) : "0"(VeiledValue)); \
-    VeiledValue;                                                 \
+#define __veil(CONSTRAINT, EXPRESSION)                               \
+  __extension__({                                                    \
+    autotype(EXPRESSION) VeiledValue = (EXPRESSION);                 \
+    __asm__("" : "=" CONSTRAINT ""(VeiledValue) : "0"(VeiledValue)); \
+    VeiledValue;                                                     \
   })
-#else
-#define __veil(CONSTRAINT, EXPRESSION) (EXPRESSION)
-#endif
 
-#ifndef __STRICT_ANSI__
-#define __conceal(CONSTRAINT, EXPRESSION)                                 \
-  ({                                                                      \
-    autotype(EXPRESSION) VeiledValue = (EXPRESSION);                      \
-    asm volatile("" : "=" CONSTRAINT ""(VeiledValue) : "0"(VeiledValue)); \
-    VeiledValue;                                                          \
+#define __conceal(CONSTRAINT, EXPRESSION)                                     \
+  __extension__({                                                             \
+    autotype(EXPRESSION) VeiledValue = (EXPRESSION);                          \
+    __asm__ volatile("" : "=" CONSTRAINT ""(VeiledValue) : "0"(VeiledValue)); \
+    VeiledValue;                                                              \
   })
-#else
-#define __conceal(CONSTRAINT, EXPRESSION) (EXPRESSION)
-#endif
 
-#ifndef __STRICT_ANSI__
-#define __expropriate(EXPRESSION)                  \
-  ({                                               \
-    asm volatile("" ::"g"(EXPRESSION) : "memory"); \
-    0;                                             \
+#define __expropriate(EXPRESSION)                      \
+  __extension__({                                      \
+    __asm__ volatile("" ::"g"(EXPRESSION) : "memory"); \
+    0;                                                 \
   })
-#else
-#define __expropriate(EXPRESSION) (EXPRESSION)
-#endif
 
-#if !defined(__STRICT_ANSI__) && !defined(__APPLE__) && defined(__x86_64__)
+#if !defined(__APPLE__) && defined(__x86_64__)
 #define __yoink(SYMBOL) \
-  asm(".section .yoink\n\tnopl\t%0\n\t.previous" : : "m"(SYMBOL))
+  __asm__(".section .yoink\n\tnopl\t%0\n\t.previous" : : "m"(SYMBOL))
 #elif defined(__aarch64__)
 #define __yoink(SYMBOL) \
-  asm(".section .yoink\n\tb\t%0\n\t.previous" : : "m"(SYMBOL))
+  __asm__(".section .yoink\n\tb\t%0\n\t.previous" : : "m"(SYMBOL))
 #else
 #define __yoink(SYMBOL) (void)0
 #endif
 
-#if !defined(__STRICT_ANSI__) && !defined(__APPLE__) && defined(__x86_64__)
+#if !defined(__APPLE__) && defined(__x86_64__)
 #define __static_yoink(SYMBOLSTR) \
-  asm(".section .yoink\n\tnopl\t\"" SYMBOLSTR "\"\n\t.previous")
+  __asm__(".section .yoink\n\tnopl\t\"" SYMBOLSTR "\"\n\t.previous")
 #elif defined(__aarch64__)
 #define __static_yoink(SYMBOLSTR) \
-  asm(".section .yoink\n\tb\t\"" SYMBOLSTR "\"\n\t.previous")
+  __asm__(".section .yoink\n\tb\t\"" SYMBOLSTR "\"\n\t.previous")
 #else
 #define __static_yoink(SYMBOLSTR)
 #endif
@@ -745,5 +671,22 @@ void abort(void) wontreturn;
 #define __funline static inline
 #endif
 
+#if defined(__x86_64__) && (defined(__GNUC__) || defined(__llvm__)) && \
+    !defined(__chibicc__) && defined(__OPTIMIZE__)
+#define __target_clones(x) __attribute__((__target_clones__(x ",default")))
+#else
+#define __target_clones(x)
+#endif
+
+#if !defined(TINY) && !defined(__AVX__)
+#define __vex __target_clones("avx")
+#else
+#define __vex
+#endif
+
+#define __notice(sym, str)                                                   \
+  __attribute__((__section__(".notice"), __aligned__(1))) const char sym[] = \
+      "\n\n" str
+
 #define MACHINE_CODE_ANALYSIS_BEGIN_
 #define MACHINE_CODE_ANALYSIS_END_
diff --git a/libc/integral/normalize.inc b/libc/integral/normalize.inc
index e9cba82d8..25614f071 100644
--- a/libc/integral/normalize.inc
+++ b/libc/integral/normalize.inc
@@ -3,8 +3,8 @@
 #endif
 
 #define __COSMOPOLITAN_MAJOR__ 3
-#define __COSMOPOLITAN_MINOR__ 2
-#define __COSMOPOLITAN_PATCH__ 4
+#define __COSMOPOLITAN_MINOR__ 3
+#define __COSMOPOLITAN_PATCH__ 2
 #define __COSMOPOLITAN__                                                   \
   (100000000 * __COSMOPOLITAN_MAJOR__ + 1000000 * __COSMOPOLITAN_MINOR__ + \
    __COSMOPOLITAN_PATCH__)
diff --git a/libc/intrin/BUILD.mk b/libc/intrin/BUILD.mk
index e4ce22629..2ba6fa39b 100644
--- a/libc/intrin/BUILD.mk
+++ b/libc/intrin/BUILD.mk
@@ -74,11 +74,15 @@ o//libc/intrin/memmove.o: private			\
 			-fno-toplevel-reorder
 
 o//libc/intrin/bzero.o					\
+o//libc/intrin/strlen.o					\
+o//libc/intrin/strchr.o					\
+o//libc/intrin/memchr.o					\
+o//libc/intrin/memrchr.o				\
 o//libc/intrin/memcmp.o					\
 o//libc/intrin/memset.o					\
 o//libc/intrin/memmove.o: private			\
 		CFLAGS +=				\
-			-O2 -finline
+			-O2 -finline -foptimize-sibling-calls
 
 o/$(MODE)/libc/intrin/bzero.o				\
 o/$(MODE)/libc/intrin/memcmp.o				\
@@ -86,6 +90,13 @@ o/$(MODE)/libc/intrin/memmove.o: private		\
 		CFLAGS +=				\
 			-fpie
 
+o/$(MODE)/libc/intrin/x86.o: private			\
+		CFLAGS +=				\
+			-ffreestanding			\
+			-fno-jump-tables		\
+			-fpatchable-function-entry=0	\
+			-Os
+
 # these assembly files are safe to build on aarch64
 o/$(MODE)/libc/intrin/aarch64/%.o: libc/intrin/aarch64/%.S
 	@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $<
diff --git a/libc/intrin/aarch64/memchr.S b/libc/intrin/aarch64/memchr.S
index 1f423f311..7242bfc86 100644
--- a/libc/intrin/aarch64/memchr.S
+++ b/libc/intrin/aarch64/memchr.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,14 +26,10 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __memchr_aarch64 memchr
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64
diff --git a/libc/intrin/aarch64/memcpy.S b/libc/intrin/aarch64/memcpy.S
index a736ebdb6..65b58d75a 100644
--- a/libc/intrin/aarch64/memcpy.S
+++ b/libc/intrin/aarch64/memcpy.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,15 +26,11 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __memcpy_aarch64_simd memcpy
 #define __memmove_aarch64_simd memmove
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
@@ -84,11 +80,12 @@ ENTRY (__memcpy_aarch64_simd)
 	PTR_ARG (1)
 	SIZE_ARG (2)
 	add	srcend, src, count
-	add	dstend, dstin, count
 	cmp	count, 128
 	b.hi	L(copy_long)
+	add	dstend, dstin, count
 	cmp	count, 32
 	b.hi	L(copy32_128)
+	nop
 
 	/* Small copies: 0..32 bytes.  */
 	cmp	count, 16
@@ -99,6 +96,18 @@ ENTRY (__memcpy_aarch64_simd)
 	str	B_q, [dstend, -16]
 	ret
 
+	.p2align 4
+	/* Medium copies: 33..128 bytes.  */
+L(copy32_128):
+	ldp	A_q, B_q, [src]
+	ldp	C_q, D_q, [srcend, -32]
+	cmp	count, 64
+	b.hi	L(copy128)
+	stp	A_q, B_q, [dstin]
+	stp	C_q, D_q, [dstend, -32]
+	ret
+
+	.p2align 4
 	/* Copy 8-15 bytes.  */
 L(copy16):
 	tbz	count, 3, L(copy8)
@@ -108,7 +117,6 @@ L(copy16):
 	str	A_h, [dstend, -8]
 	ret
 
-	.p2align 3
 	/* Copy 4-7 bytes.  */
 L(copy8):
 	tbz	count, 2, L(copy4)
@@ -118,6 +126,19 @@ L(copy8):
 	str	B_lw, [dstend, -4]
 	ret
 
+	/* Copy 65..128 bytes.  */
+L(copy128):
+	ldp	E_q, F_q, [src, 32]
+	cmp	count, 96
+	b.ls	L(copy96)
+	ldp	G_q, H_q, [srcend, -64]
+	stp	G_q, H_q, [dstend, -64]
+L(copy96):
+	stp	A_q, B_q, [dstin]
+	stp	E_q, F_q, [dstin, 32]
+	stp	C_q, D_q, [dstend, -32]
+	ret
+
 	/* Copy 0..3 bytes using a branchless sequence.  */
 L(copy4):
 	cbz	count, L(copy0)
@@ -131,33 +152,11 @@ L(copy4):
 L(copy0):
 	ret
 
-	.p2align 4
-	/* Medium copies: 33..128 bytes.  */
-L(copy32_128):
-	ldp	A_q, B_q, [src]
-	ldp	C_q, D_q, [srcend, -32]
-	cmp	count, 64
-	b.hi	L(copy128)
-	stp	A_q, B_q, [dstin]
-	stp	C_q, D_q, [dstend, -32]
-	ret
-
-	.p2align 4
-	/* Copy 65..128 bytes.  */
-L(copy128):
-	ldp	E_q, F_q, [src, 32]
-	cmp	count, 96
-	b.ls	L(copy96)
-	ldp	G_q, H_q, [srcend, -64]
-	stp	G_q, H_q, [dstend, -64]
-L(copy96):
-	stp	A_q, B_q, [dstin]
-	stp	E_q, F_q, [dstin, 32]
-	stp	C_q, D_q, [dstend, -32]
-	ret
-
+	.p2align 3
 	/* Copy more than 128 bytes.  */
 L(copy_long):
+	add	dstend, dstin, count
+
 	/* Use backwards copy if there is an overlap.  */
 	sub	tmp1, dstin, src
 	cmp	tmp1, count
@@ -194,6 +193,9 @@ L(copy64_from_end):
 	stp	A_q, B_q, [dstend, -32]
 	ret
 
+	.p2align 4
+	nop
+
 	/* Large backwards copy for overlapping copies.
 	   Copy 16 bytes and then align srcend to 16-byte alignment.  */
 L(copy_long_backwards):
diff --git a/libc/intrin/aarch64/memrchr.S b/libc/intrin/aarch64/memrchr.S
index d0795c8b1..3041eeab7 100644
--- a/libc/intrin/aarch64/memrchr.S
+++ b/libc/intrin/aarch64/memrchr.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,14 +26,10 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __memrchr_aarch64 memrchr
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD.
diff --git a/libc/intrin/aarch64/memset.S b/libc/intrin/aarch64/memset.S
index 509840c74..cef8edd9c 100644
--- a/libc/intrin/aarch64/memset.S
+++ b/libc/intrin/aarch64/memset.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,14 +26,10 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __memset_aarch64 memset
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
diff --git a/libc/intrin/aarch64/stpcpy.S b/libc/intrin/aarch64/stpcpy.S
index f7e39e52e..a34f1aa8f 100644
--- a/libc/intrin/aarch64/stpcpy.S
+++ b/libc/intrin/aarch64/stpcpy.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,14 +26,10 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __stpcpy_aarch64 stpcpy
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD.
diff --git a/libc/intrin/aarch64/strchr.S b/libc/intrin/aarch64/strchr.S
index ea841ed98..462d94806 100644
--- a/libc/intrin/aarch64/strchr.S
+++ b/libc/intrin/aarch64/strchr.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,15 +26,11 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __strchr_aarch64 strchr
 #define __index_aarch64 index
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64
diff --git a/libc/intrin/aarch64/strchrnul.S b/libc/intrin/aarch64/strchrnul.S
index 9694fca47..184305b22 100644
--- a/libc/intrin/aarch64/strchrnul.S
+++ b/libc/intrin/aarch64/strchrnul.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,14 +26,10 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __strchrnul_aarch64 strchrnul
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64
diff --git a/libc/intrin/aarch64/strcmp.S b/libc/intrin/aarch64/strcmp.S
index 8d5702f09..98f26d486 100644
--- a/libc/intrin/aarch64/strcmp.S
+++ b/libc/intrin/aarch64/strcmp.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,14 +26,10 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __strcmp_aarch64 strcmp
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64.
diff --git a/libc/intrin/aarch64/strcpy.S b/libc/intrin/aarch64/strcpy.S
index 3e6599bb8..93cb85fec 100644
--- a/libc/intrin/aarch64/strcpy.S
+++ b/libc/intrin/aarch64/strcpy.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,14 +26,10 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __strcpy_aarch64 strcpy
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD.
diff --git a/libc/intrin/aarch64/strlen.S b/libc/intrin/aarch64/strlen.S
index a70e7b214..7464eafc9 100644
--- a/libc/intrin/aarch64/strlen.S
+++ b/libc/intrin/aarch64/strlen.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,14 +26,10 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __strlen_aarch64 strlen
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
diff --git a/libc/intrin/aarch64/strncmp.S b/libc/intrin/aarch64/strncmp.S
index 8741d766b..8a0b75397 100644
--- a/libc/intrin/aarch64/strncmp.S
+++ b/libc/intrin/aarch64/strncmp.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,14 +26,10 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __strncmp_aarch64 strncmp
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64.
diff --git a/libc/intrin/aarch64/strnlen.S b/libc/intrin/aarch64/strnlen.S
index 2e7c72d31..988c15f63 100644
--- a/libc/intrin/aarch64/strnlen.S
+++ b/libc/intrin/aarch64/strnlen.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,14 +26,10 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __strnlen_aarch64 strnlen
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64, Advanced SIMD.
diff --git a/libc/intrin/aarch64/strrchr.S b/libc/intrin/aarch64/strrchr.S
index f492a4423..5199254ff 100644
--- a/libc/intrin/aarch64/strrchr.S
+++ b/libc/intrin/aarch64/strrchr.S
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,15 +26,11 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/aarch64/asmdefs.internal.h"
+.yoink arm_optimized_routines_notice
 
 #define __strrchr_aarch64 strrchr
 #define __rindex_aarch64 rindex
 
-.ident "\n\n\
-Optimized Routines (MIT License)\n\
-Copyright 2022 ARM Limited\n"
-.include "libc/disclaimer.inc"
-
 /* Assumptions:
  *
  * ARMv8-a, AArch64
diff --git a/libc/intrin/armopt.c b/libc/intrin/armopt.c
new file mode 100644
index 000000000..19fc8da12
--- /dev/null
+++ b/libc/intrin/armopt.c
@@ -0,0 +1,3 @@
+__notice(arm_optimized_routines_notice, "\
+Optimized Routines (MIT License)\n\
+Copyright 2022 ARM Limited");
diff --git a/libc/intrin/chromium.c b/libc/intrin/chromium.c
new file mode 100644
index 000000000..543de07d3
--- /dev/null
+++ b/libc/intrin/chromium.c
@@ -0,0 +1,3 @@
+__notice(chromium_notice, "\
+Chromium (BSD-3 License)\n\
+Copyright 2017 The Chromium Authors");
diff --git a/libc/intrin/cxalock.c b/libc/intrin/cxalock.c
index 3ebcf3c8d..4e6de59d0 100644
--- a/libc/intrin/cxalock.c
+++ b/libc/intrin/cxalock.c
@@ -33,10 +33,6 @@ void __cxa_unlock(void) {
   pthread_mutex_unlock(&__cxa_lock_obj);
 }
 
-static textstartup void __cxa_init() {
+__attribute__((__constructor__(60))) static textstartup void __cxa_init() {
   pthread_atfork(__cxa_lock, __cxa_unlock, __cxa_wipe);
 }
-
-const void *const __cxa_ctor[] initarray = {
-    __cxa_init,
-};
diff --git a/libc/intrin/describebacktrace.c b/libc/intrin/describebacktrace.c
index 4d1d5fba8..93107f884 100644
--- a/libc/intrin/describebacktrace.c
+++ b/libc/intrin/describebacktrace.c
@@ -17,6 +17,8 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/intrin/describebacktrace.internal.h"
+#include "libc/intrin/kprintf.h"
+#include "libc/intrin/weaken.h"
 #include "libc/log/libfatal.internal.h"
 #include "libc/nexgen32e/stackframe.h"
 
@@ -28,6 +30,9 @@ dontinstrument const char *(DescribeBacktrace)(char buf[N],
   char *pe = p + N;
   bool gotsome = false;
   while (fr) {
+    if (_weaken(kisdangerous) && _weaken(kisdangerous)(fr)) {
+      break;
+    }
     if (p + 16 + 1 + 1 <= pe) {
       if (gotsome) {
         *p++ = ' ';
diff --git a/libc/intrin/dll.h b/libc/intrin/dll.h
index eb1ce8b74..65b701471 100644
--- a/libc/intrin/dll.h
+++ b/libc/intrin/dll.h
@@ -1,6 +1,6 @@
+#ifdef _COSMO_SOURCE
 #ifndef COSMOPOLITAN_LIBC_INTRIN_DLL_H_
 #define COSMOPOLITAN_LIBC_INTRIN_DLL_H_
-#ifdef _COSMO_SOURCE
 #define dll_make_first   __dll_make_first
 #define dll_make_last    __dll_make_last
 #define dll_remove       __dll_remove
@@ -55,5 +55,5 @@ void dll_make_first(struct Dll **, struct Dll *) paramsnonnull((1)) libcesque;
 void dll_splice_after(struct Dll *, struct Dll *) paramsnonnull((1)) libcesque;
 
 COSMOPOLITAN_C_END_
-#endif /* _COSMO_SOURCE */
 #endif /* COSMOPOLITAN_LIBC_INTRIN_DLL_H_ */
+#endif /* _COSMO_SOURCE */
diff --git a/libc/intrin/fbclibm.c b/libc/intrin/fbclibm.c
new file mode 100644
index 000000000..fc698ec05
--- /dev/null
+++ b/libc/intrin/fbclibm.c
@@ -0,0 +1,2 @@
+__notice(freebsd_complex_notice, "FreeBSD Complex Math (BSD-2 License)\n\
+Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG>");
diff --git a/libc/intrin/fblibm.c b/libc/intrin/fblibm.c
new file mode 100644
index 000000000..5a5fbd3fa
--- /dev/null
+++ b/libc/intrin/fblibm.c
@@ -0,0 +1,4 @@
+__notice(freebsd_libm_notice, "FreeBSD libm (BSD-2 License)\n\
+Copyright (c) 1992-2024 The FreeBSD Project.\n\
+Copyright (c) 2005-2011 Bruce D. Evans, Steven G. Kargl, David Schultz\n\
+Copyright (c) 2009-2013 Steven G. Kargl");
diff --git a/libc/intrin/fdlibm.c b/libc/intrin/fdlibm.c
new file mode 100644
index 000000000..2ea7510e2
--- /dev/null
+++ b/libc/intrin/fdlibm.c
@@ -0,0 +1,2 @@
+__notice(fdlibm_notice, "fdlibm (fdlibm license)\n\
+Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.");
diff --git a/libc/runtime/fesetround.c b/libc/intrin/fesetround.c
similarity index 100%
rename from libc/runtime/fesetround.c
rename to libc/intrin/fesetround.c
diff --git a/net/http/khttpmethod.c b/libc/intrin/float16.c
similarity index 78%
rename from net/http/khttpmethod.c
rename to libc/intrin/float16.c
index 6fec68a16..476a2f6c9 100644
--- a/net/http/khttpmethod.c
+++ b/libc/intrin/float16.c
@@ -1,7 +1,7 @@
 /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
 │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
+│ Copyright 2024 Justine Alexandra Roberts Tunney                              │
 │                                                                              │
 │ Permission to use, copy, modify, and/or distribute this software for         │
 │ any purpose with or without fee is hereby granted, provided that the         │
@@ -16,25 +16,27 @@
 │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "net/http/http.h"
 
-const char kHttpMethod[18][8] = {
-    "WUT",      //
-    "GET",      //
-    "HEAD",     //
-    "POST",     //
-    "PUT",      //
-    "DELETE",   //
-    "OPTIONS",  //
-    "CONNECT",  //
-    "TRACE",    //
-    "COPY",     //
-    "LOCK",     //
-    "MERGE",    //
-    "MKCOL",    //
-    "MOVE",     //
-    "NOTIFY",   //
-    "PATCH",    //
-    "REPORT",   //
-    "UNLOCK",   //
+/**
+ * @fileoverview fp16 compiler runtime
+ */
+
+#define asint(x) ((union pun){x}).i
+#define isnan(x) (((x) & 0x7fff) > 0x7c00)
+
+union pun {
+  _Float16 f;
+  unsigned short i;
 };
+
+int __eqhf2(_Float16 fx, _Float16 fy) {
+  int x = asint(fx);
+  int y = asint(fy);
+  return (x == y) & !isnan(x) & !isnan(y);
+}
+
+int __nehf2(_Float16 fx, _Float16 fy) {
+  int x = asint(fx);
+  int y = asint(fy);
+  return (x != y) & !isnan(x) & !isnan(y);
+}
diff --git a/libc/intrin/folibm.c b/libc/intrin/folibm.c
new file mode 100644
index 000000000..2fe77e001
--- /dev/null
+++ b/libc/intrin/folibm.c
@@ -0,0 +1,2 @@
+__notice(openbsd_libm_notice, "OpenBSD libm (ISC License)\n\
+Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>");
diff --git a/libc/intrin/isrunningundermake.c b/libc/intrin/isrunningundermake.c
index 929ece7f8..53bcf4840 100644
--- a/libc/intrin/isrunningundermake.c
+++ b/libc/intrin/isrunningundermake.c
@@ -30,10 +30,6 @@ bool32 IsRunningUnderMake(void) {
   return g_isrunningundermake;
 }
 
-textstartup void g_isrunningundermake_init(void) {
+__attribute__((__constructor__(30))) textstartup void onmake_init(void) {
   g_isrunningundermake = !!getenv("MAKEFLAGS");
 }
-
-const void *const g_isrunningundermake_ctor[] initarray = {
-    g_isrunningundermake_init,
-};
diff --git a/libc/intrin/kprintf.greg.c b/libc/intrin/kprintf.greg.c
index d281a82c2..cb4c87659 100644
--- a/libc/intrin/kprintf.greg.c
+++ b/libc/intrin/kprintf.greg.c
@@ -24,7 +24,6 @@
 #include "libc/fmt/magnumstrs.internal.h"
 #include "libc/intrin/asmflag.h"
 #include "libc/intrin/atomic.h"
-#include "libc/serialize.h"
 #include "libc/intrin/getenv.internal.h"
 #include "libc/intrin/likely.h"
 #include "libc/intrin/nomultics.internal.h"
@@ -46,6 +45,7 @@
 #include "libc/runtime/memtrack.internal.h"
 #include "libc/runtime/runtime.h"
 #include "libc/runtime/symbols.internal.h"
+#include "libc/serialize.h"
 #include "libc/stdckdint.h"
 #include "libc/str/str.h"
 #include "libc/str/tab.internal.h"
diff --git a/libc/intrin/kprintf.h b/libc/intrin/kprintf.h
index d11508a68..ea654acb2 100644
--- a/libc/intrin/kprintf.h
+++ b/libc/intrin/kprintf.h
@@ -1,6 +1,6 @@
+#ifdef _COSMO_SOURCE
 #ifndef COSMOPOLITAN_LIBC_INTRIN_KPRINTF_H_
 #define COSMOPOLITAN_LIBC_INTRIN_KPRINTF_H_
-#ifdef _COSMO_SOURCE
 
 #define klog         __klog
 #define kprintf      __kprintf
@@ -54,5 +54,5 @@ void uvprintf(const char *, va_list) libcesque;
 
 COSMOPOLITAN_C_END_
 #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
-#endif /* _COSMO_SOURCE */
 #endif /* COSMOPOLITAN_LIBC_INTRIN_KPRINTF_H_ */
+#endif /* _COSMO_SOURCE */
diff --git a/libc/intrin/memchr.c b/libc/intrin/memchr.c
index aa4adbb0a..676cb04be 100644
--- a/libc/intrin/memchr.c
+++ b/libc/intrin/memchr.c
@@ -36,8 +36,8 @@ static inline const unsigned char *memchr_pure(const unsigned char *s,
 }
 
 #if defined(__x86_64__) && !defined(__chibicc__)
-static inline const unsigned char *memchr_sse(const unsigned char *s,
-                                              unsigned char c, size_t n) {
+static __vex const unsigned char *memchr_sse(const unsigned char *s,
+                                             unsigned char c, size_t n) {
   size_t i;
   unsigned m;
   xmm_t v, t = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
@@ -69,10 +69,7 @@ static inline const unsigned char *memchr_sse(const unsigned char *s,
  */
 void *memchr(const void *s, int c, size_t n) {
 #if defined(__x86_64__) && !defined(__chibicc__)
-  const void *r;
-  if (IsAsan()) __asan_verify(s, n);
-  r = memchr_sse(s, c, n);
-  return (void *)r;
+  return (void *)memchr_sse(s, c, n);
 #else
   return (void *)memchr_pure(s, c, n);
 #endif
diff --git a/libc/intrin/memmove.c b/libc/intrin/memmove.c
index ff2b4ef62..493210aac 100644
--- a/libc/intrin/memmove.c
+++ b/libc/intrin/memmove.c
@@ -27,66 +27,7 @@
 typedef long long xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
 typedef long long xmm_a __attribute__((__vector_size__(16), __aligned__(16)));
 
-/**
- * Copies memory.
- *
- *     memmove n=0                        661 picoseconds
- *     memmove n=1                        661 ps/byte          1,476 mb/s
- *     memmove n=2                        330 ps/byte          2,952 mb/s
- *     memmove n=3                        330 ps/byte          2,952 mb/s
- *     memmove n=4                        165 ps/byte          5,904 mb/s
- *     memmove n=7                        141 ps/byte          6,888 mb/s
- *     memmove n=8                         82 ps/byte             11 GB/s
- *     memmove n=15                        44 ps/byte             21 GB/s
- *     memmove n=16                        41 ps/byte             23 GB/s
- *     memmove n=31                        32 ps/byte             29 GB/s
- *     memmove n=32                        31 ps/byte             30 GB/s
- *     memmove n=63                        21 ps/byte             45 GB/s
- *     memmove n=64                        15 ps/byte             61 GB/s
- *     memmove n=127                       13 ps/byte             73 GB/s
- *     memmove n=128                       31 ps/byte             30 GB/s
- *     memmove n=255                       20 ps/byte             45 GB/s
- *     memmove n=256                       19 ps/byte             49 GB/s
- *     memmove n=511                       16 ps/byte             56 GB/s
- *     memmove n=512                       17 ps/byte             54 GB/s
- *     memmove n=1023                      18 ps/byte             52 GB/s
- *     memmove n=1024                      13 ps/byte             72 GB/s
- *     memmove n=2047                       9 ps/byte             96 GB/s
- *     memmove n=2048                       9 ps/byte             98 GB/s
- *     memmove n=4095                       8 ps/byte            112 GB/s
- *     memmove n=4096                       8 ps/byte            109 GB/s
- *     memmove n=8191                       7 ps/byte            124 GB/s
- *     memmove n=8192                       7 ps/byte            125 GB/s
- *     memmove n=16383                      7 ps/byte            134 GB/s
- *     memmove n=16384                      7 ps/byte            134 GB/s
- *     memmove n=32767                     13 ps/byte             72 GB/s
- *     memmove n=32768                     13 ps/byte             72 GB/s
- *     memmove n=65535                     13 ps/byte             68 GB/s
- *     memmove n=65536                     14 ps/byte             67 GB/s
- *     memmove n=131071                    14 ps/byte             65 GB/s
- *     memmove n=131072                    14 ps/byte             64 GB/s
- *     memmove n=262143                    15 ps/byte             63 GB/s
- *     memmove n=262144                    15 ps/byte             63 GB/s
- *     memmove n=524287                    15 ps/byte             61 GB/s
- *     memmove n=524288                    15 ps/byte             61 GB/s
- *     memmove n=1048575                   15 ps/byte             61 GB/s
- *     memmove n=1048576                   15 ps/byte             61 GB/s
- *     memmove n=2097151                   19 ps/byte             48 GB/s
- *     memmove n=2097152                   27 ps/byte             35 GB/s
- *     memmove n=4194303                   28 ps/byte             33 GB/s
- *     memmove n=4194304                   28 ps/byte             33 GB/s
- *     memmove n=8388607                   28 ps/byte             33 GB/s
- *     memmove n=8388608                   28 ps/byte             33 GB/s
- *
- * DST and SRC may overlap.
- *
- * @param dst is destination
- * @param src is memory to copy
- * @param n is number of bytes to copy
- * @return dst
- * @asyncsignalsafe
- */
-void *memmove(void *dst, const void *src, size_t n) {
+static __vex void *__memmove(void *dst, const void *src, size_t n) {
   char *d;
   size_t i;
   const char *s;
@@ -338,6 +279,69 @@ void *memmove(void *dst, const void *src, size_t n) {
   }
 }
 
+/**
+ * Copies memory.
+ *
+ *     memmove n=0                        661 picoseconds
+ *     memmove n=1                        661 ps/byte          1,476 mb/s
+ *     memmove n=2                        330 ps/byte          2,952 mb/s
+ *     memmove n=3                        330 ps/byte          2,952 mb/s
+ *     memmove n=4                        165 ps/byte          5,904 mb/s
+ *     memmove n=7                        141 ps/byte          6,888 mb/s
+ *     memmove n=8                         82 ps/byte             11 GB/s
+ *     memmove n=15                        44 ps/byte             21 GB/s
+ *     memmove n=16                        41 ps/byte             23 GB/s
+ *     memmove n=31                        32 ps/byte             29 GB/s
+ *     memmove n=32                        31 ps/byte             30 GB/s
+ *     memmove n=63                        21 ps/byte             45 GB/s
+ *     memmove n=64                        15 ps/byte             61 GB/s
+ *     memmove n=127                       13 ps/byte             73 GB/s
+ *     memmove n=128                       31 ps/byte             30 GB/s
+ *     memmove n=255                       20 ps/byte             45 GB/s
+ *     memmove n=256                       19 ps/byte             49 GB/s
+ *     memmove n=511                       16 ps/byte             56 GB/s
+ *     memmove n=512                       17 ps/byte             54 GB/s
+ *     memmove n=1023                      18 ps/byte             52 GB/s
+ *     memmove n=1024                      13 ps/byte             72 GB/s
+ *     memmove n=2047                       9 ps/byte             96 GB/s
+ *     memmove n=2048                       9 ps/byte             98 GB/s
+ *     memmove n=4095                       8 ps/byte            112 GB/s
+ *     memmove n=4096                       8 ps/byte            109 GB/s
+ *     memmove n=8191                       7 ps/byte            124 GB/s
+ *     memmove n=8192                       7 ps/byte            125 GB/s
+ *     memmove n=16383                      7 ps/byte            134 GB/s
+ *     memmove n=16384                      7 ps/byte            134 GB/s
+ *     memmove n=32767                     13 ps/byte             72 GB/s
+ *     memmove n=32768                     13 ps/byte             72 GB/s
+ *     memmove n=65535                     13 ps/byte             68 GB/s
+ *     memmove n=65536                     14 ps/byte             67 GB/s
+ *     memmove n=131071                    14 ps/byte             65 GB/s
+ *     memmove n=131072                    14 ps/byte             64 GB/s
+ *     memmove n=262143                    15 ps/byte             63 GB/s
+ *     memmove n=262144                    15 ps/byte             63 GB/s
+ *     memmove n=524287                    15 ps/byte             61 GB/s
+ *     memmove n=524288                    15 ps/byte             61 GB/s
+ *     memmove n=1048575                   15 ps/byte             61 GB/s
+ *     memmove n=1048576                   15 ps/byte             61 GB/s
+ *     memmove n=2097151                   19 ps/byte             48 GB/s
+ *     memmove n=2097152                   27 ps/byte             35 GB/s
+ *     memmove n=4194303                   28 ps/byte             33 GB/s
+ *     memmove n=4194304                   28 ps/byte             33 GB/s
+ *     memmove n=8388607                   28 ps/byte             33 GB/s
+ *     memmove n=8388608                   28 ps/byte             33 GB/s
+ *
+ * DST and SRC may overlap.
+ *
+ * @param dst is destination
+ * @param src is memory to copy
+ * @param n is number of bytes to copy
+ * @return dst
+ * @asyncsignalsafe
+ */
+void *memmove(void *dst, const void *src, size_t n) {
+  return __memmove(dst, src, n);
+}
+
 __weak_reference(memmove, memcpy);
 
 #endif /* __aarch64__ */
diff --git a/libc/intrin/memrchr.c b/libc/intrin/memrchr.c
index 3a8efe2c0..f0f8e8689 100644
--- a/libc/intrin/memrchr.c
+++ b/libc/intrin/memrchr.c
@@ -36,8 +36,8 @@ static inline const unsigned char *memrchr_pure(const unsigned char *s,
 }
 
 #if defined(__x86_64__) && !defined(__chibicc__)
-static inline const unsigned char *memrchr_sse(const unsigned char *s,
-                                               unsigned char c, size_t n) {
+static __vex const unsigned char *memrchr_sse(const unsigned char *s,
+                                              unsigned char c, size_t n) {
   size_t i;
   unsigned m;
   xmm_t v, t = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
@@ -69,9 +69,7 @@ static inline const unsigned char *memrchr_sse(const unsigned char *s,
  */
 void *memrchr(const void *s, int c, size_t n) {
 #if defined(__x86_64__) && !defined(__chibicc__)
-  const void *r;
-  r = memrchr_sse(s, c, n);
-  return (void *)r;
+  return (void *)memrchr_sse(s, c, n);
 #else
   return (void *)memrchr_pure(s, c, n);
 #endif
diff --git a/libc/intrin/mman.greg.c b/libc/intrin/mman.greg.c
index 7641a4969..6c886df53 100644
--- a/libc/intrin/mman.greg.c
+++ b/libc/intrin/mman.greg.c
@@ -44,14 +44,16 @@
 #include "libc/runtime/runtime.h"
 #ifdef __x86_64__
 
-#define INVERT(x) (BANE + PHYSICAL(x))
-#define NOPAGE    ((uint64_t)-1)
+#define INVERT(x) (BANE + PHYSICAL((uintptr_t)(x)))
+#define NOPAGE    ((uint64_t) - 1)
 
-#define ABS64(x)                                     \
-  ({                                                 \
-    int64_t vAddr;                                   \
-    __asm__("movabs\t%1,%0" : "=r"(vAddr) : "i"(x)); \
-    vAddr;                                           \
+#define APE_STACK_VADDR                   \
+  ({                                      \
+    int64_t vAddr;                        \
+    __asm__(".weak\tape_stack_vaddr\n\t"  \
+            "movabs\t$ape_stack_vaddr,%0" \
+            : "=r"(vAddr));               \
+    vAddr;                                \
   })
 
 struct ReclaimedPage {
@@ -305,7 +307,6 @@ textreal void __map_phdrs(struct mman *mm, uint64_t *pml4t, uint64_t b,
   extern char ape_phdrs_end[] __attribute__((__weak__));
   extern char ape_stack_pf[] __attribute__((__weak__));
   extern char ape_stack_offset[] __attribute__((__weak__));
-  extern char ape_stack_vaddr[] __attribute__((__weak__));
   extern char ape_stack_filesz[] __attribute__((__weak__));
   extern char ape_stack_memsz[] __attribute__((__weak__));
   __setup_mman(mm, pml4t, top);
@@ -318,7 +319,7 @@ textreal void __map_phdrs(struct mman *mm, uint64_t *pml4t, uint64_t b,
                      .p_type = PT_LOAD,
                      .p_flags = (uintptr_t)ape_stack_pf,
                      .p_offset = (uintptr_t)ape_stack_offset,
-                     .p_vaddr = ABS64(ape_stack_vaddr),
+                     .p_vaddr = APE_STACK_VADDR,
                      .p_filesz = (uintptr_t)ape_stack_filesz,
                      .p_memsz = (uintptr_t)ape_stack_memsz,
                  });
diff --git a/libc/intrin/musl.c b/libc/intrin/musl.c
new file mode 100644
index 000000000..28f7ed4db
--- /dev/null
+++ b/libc/intrin/musl.c
@@ -0,0 +1,3 @@
+__notice(musl_libc_notice, "\
+Musl libc (MIT License)\n\
+Copyright 2005-2014 Rich Felker, et. al.");
diff --git a/libc/intrin/nocolor.c b/libc/intrin/nocolor.c
index e864b0396..efd2fa7f6 100644
--- a/libc/intrin/nocolor.c
+++ b/libc/intrin/nocolor.c
@@ -48,13 +48,9 @@
  */
 bool __nocolor;
 
-optimizesize textstartup void __nocolor_init(int argc, char **argv, char **envp,
-                                             intptr_t *auxv) {
+__attribute__((__constructor__(20))) optimizesize textstartup void
+__nocolor_init(int argc, char **argv, char **envp, intptr_t *auxv) {
   char *s;
   __nocolor = (IsWindows() && !IsAtLeastWindows10()) ||
               ((s = getenv("TERM")) && IsDumb(s));
 }
-
-const void *const __nocolor_ctor[] initarray = {
-    __nocolor_init,
-};
diff --git a/libc/intrin/reservefd.c b/libc/intrin/reservefd.c
index ae96d63d6..447f7ff2e 100644
--- a/libc/intrin/reservefd.c
+++ b/libc/intrin/reservefd.c
@@ -27,8 +27,6 @@
 #include "libc/str/str.h"
 #include "libc/sysv/consts/map.h"
 
-static volatile size_t mapsize;
-
 /**
  * Grows file descriptor array memory if needed.
  *
diff --git a/libc/intrin/scalblnl.c b/libc/intrin/scalblnl.c
index 0150ec683..c93790767 100644
--- a/libc/intrin/scalblnl.c
+++ b/libc/intrin/scalblnl.c
@@ -28,11 +28,8 @@
 #include "libc/limits.h"
 #include "libc/math.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
 // clang-format off
 
 long double scalblnl(long double x, long n) {
diff --git a/libc/intrin/scalbn.c b/libc/intrin/scalbn.c
index 54178a67c..55f7e6cda 100644
--- a/libc/intrin/scalbn.c
+++ b/libc/intrin/scalbn.c
@@ -26,11 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
 // clang-format off
 
 /**
diff --git a/libc/intrin/scalbnf.c b/libc/intrin/scalbnf.c
index d5c5d1d02..b900e589d 100644
--- a/libc/intrin/scalbnf.c
+++ b/libc/intrin/scalbnf.c
@@ -26,11 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
 // clang-format off
 
 /**
diff --git a/libc/intrin/scalbnl.c b/libc/intrin/scalbnl.c
index c87cb76f7..f9739ff36 100644
--- a/libc/intrin/scalbnl.c
+++ b/libc/intrin/scalbnl.c
@@ -28,11 +28,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
 // clang-format off
 
 /**
diff --git a/libc/intrin/segmentation.h b/libc/intrin/segmentation.h
deleted file mode 100644
index 845b9a41a..000000000
--- a/libc/intrin/segmentation.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_BITS_SEGMENTATION_H_
-#define COSMOPOLITAN_LIBC_BITS_SEGMENTATION_H_
-#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
-#ifdef _COSMO_SOURCE
-
-/**
- * Reads scalar from memory, offset by segment.
- *
- * @return *(MEM) relative to segment
- * @see pushpop()
- */
-#define fs(MEM) __peek("fs", MEM)
-#define gs(MEM) __peek("gs", MEM)
-
-#define __peek(SEGMENT, ADDRESS)                                  \
-  ({                                                              \
-    typeof(*(ADDRESS)) Pk;                                        \
-    asm("mov\t%%" SEGMENT ":%1,%0" : "=r"(Pk) : "m"(*(ADDRESS))); \
-    Pk;                                                           \
-  })
-
-#endif /* _COSMO_SOURCE */
-#endif /* __GNUC__ && !__STRICT_ANSI__ */
-#endif /* COSMOPOLITAN_LIBC_BITS_SEGMENTATION_H_ */
diff --git a/libc/intrin/stackchkguard.S b/libc/intrin/stackchkguard.S
index 9f4437ba4..b78117a5d 100644
--- a/libc/intrin/stackchkguard.S
+++ b/libc/intrin/stackchkguard.S
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-#include "libc/notice.inc"
 
 //	Canary for -fstack-protector.
 //
diff --git a/libc/intrin/stpcpy.c b/libc/intrin/stpcpy.c
index b3b7dfed2..c7c15e8e2 100644
--- a/libc/intrin/stpcpy.c
+++ b/libc/intrin/stpcpy.c
@@ -33,7 +33,7 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
  * @return pointer to nul byte
  * @asyncsignalsafe
  */
-char *stpcpy(char *d, const char *s) {
+__vex char *stpcpy(char *d, const char *s) {
   size_t i = 0;
 #if defined(__x86_64__) && !defined(__chibicc__)
   for (; (uintptr_t)(s + i) & 15; ++i) {
diff --git a/libc/intrin/strchr.c b/libc/intrin/strchr.c
index 7eb19a7b1..4d05ffb05 100644
--- a/libc/intrin/strchr.c
+++ b/libc/intrin/strchr.c
@@ -31,7 +31,7 @@ static inline const char *strchr_pure(const char *s, int c) {
 
 #if defined(__x86_64__) && !defined(__chibicc__)
 typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
-static inline const char *strchr_sse(const char *s, unsigned char c) {
+static __vex const char *strchr_sse(const char *s, unsigned char c) {
   unsigned k;
   unsigned m;
   const xmm_t *p;
@@ -102,7 +102,6 @@ char *strchr(const char *s, int c) {
   } else {
     r = strchr_pure(s, c);
   }
-  unassert(!r || *r || !(c & 255));
   return (char *)r;
 #else
   const char *r;
@@ -110,9 +109,7 @@ char *strchr(const char *s, int c) {
     if ((*s & 255) == c) return (char *)s;
     if (!*s) return NULL;
   }
-  r = strchr_x64(s, c);
-  unassert(!r || *r || !c);
-  return (char *)r;
+  return (char *)strchr_x64(s, c);
 #endif
 }
 
diff --git a/libc/intrin/strchrnul.c b/libc/intrin/strchrnul.c
index 5c10b129c..b61cebde7 100644
--- a/libc/intrin/strchrnul.c
+++ b/libc/intrin/strchrnul.c
@@ -92,7 +92,7 @@ static const char *strchrnul_x64(const char *p, uint64_t c) {
  * @return pointer to first instance of c, or pointer to
  *     NUL terminator if c is not found
  */
-char *strchrnul(const char *s, int c) {
+__vex char *strchrnul(const char *s, int c) {
 #if defined(__x86_64__) && !defined(__chibicc__)
   const char *r;
   if (X86_HAVE(SSE)) {
diff --git a/libc/intrin/strcpy.c b/libc/intrin/strcpy.c
index fda91111a..b08c271e1 100644
--- a/libc/intrin/strcpy.c
+++ b/libc/intrin/strcpy.c
@@ -32,7 +32,7 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
  * @return original dest
  * @asyncsignalsafe
  */
-char *strcpy(char *d, const char *s) {
+__vex char *strcpy(char *d, const char *s) {
   size_t i = 0;
 #if defined(__x86_64__) && !defined(__chibicc__)
   for (; (uintptr_t)(s + i) & 15; ++i) {
diff --git a/libc/intrin/strlen.c b/libc/intrin/strlen.c
index bc2a95056..72a32c33d 100644
--- a/libc/intrin/strlen.c
+++ b/libc/intrin/strlen.c
@@ -20,14 +20,7 @@
 #include "libc/str/str.h"
 #ifndef __aarch64__
 
-/**
- * Returns length of NUL-terminated string.
- *
- * @param s is non-null NUL-terminated string pointer
- * @return number of bytes (excluding NUL)
- * @asyncsignalsafe
- */
-size_t strlen(const char *s) {
+static __vex size_t __strlen(const char *s) {
 #if defined(__x86_64__) && !defined(__chibicc__)
   typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
   xmm_t z = {0};
@@ -37,7 +30,7 @@ size_t strlen(const char *s) {
   while (!m) m = __builtin_ia32_pmovmskb128(*++p == z);
   return (const char *)p + __builtin_ctzl(m) - s;
 #else
-#define ONES ((word)-1 / 255)
+#define ONES ((word) - 1 / 255)
 #define BANE (ONES * (255 / 2 + 1))
   typedef unsigned long mayalias word;
   word w;
@@ -57,4 +50,15 @@ size_t strlen(const char *s) {
 #endif
 }
 
+/**
+ * Returns length of NUL-terminated string.
+ *
+ * @param s is non-null NUL-terminated string pointer
+ * @return number of bytes (excluding NUL)
+ * @asyncsignalsafe
+ */
+size_t strlen(const char *s) {
+  return __strlen(s);
+}
+
 #endif /* __aarch64__ */
diff --git a/libc/intrin/ubsan.c b/libc/intrin/ubsan.c
index d42f8e20b..5b3051ac5 100644
--- a/libc/intrin/ubsan.c
+++ b/libc/intrin/ubsan.c
@@ -627,7 +627,7 @@ void *__ubsan_get_current_report_data(void) {
   return 0;
 }
 
-static textstartup void ubsan_init() {
+__attribute__((__constructor__(90))) static textstartup void ubsan_init() {
   STRACE(" _   _ ____ ____    _    _   _");
   STRACE("| | | | __ ) ___|  / \\  | \\ | |");
   STRACE("| | | |  _ \\___ \\ / _ \\ |  \\| |");
@@ -635,7 +635,3 @@ static textstartup void ubsan_init() {
   STRACE(" \\___/|____/____/_/   \\_\\_| \\_|");
   STRACE("cosmopolitan behavior module initialized");
 }
-
-const void *const ubsan_ctor[] initarray = {
-    ubsan_init,
-};
diff --git a/libc/intrin/x86.c b/libc/intrin/x86.c
new file mode 100644
index 000000000..7912d9de8
--- /dev/null
+++ b/libc/intrin/x86.c
@@ -0,0 +1,804 @@
+//===-- cpu_model/x86.c - Support for __cpu_model builtin  --------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file is based on LLVM's lib/Support/Host.cpp.
+//  It implements the operating system Host concept and builtin
+//  __cpu_model for the compiler_rt library for x86.
+//
+//===----------------------------------------------------------------------===//
+#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__))
+
+enum VendorSignatures {
+  SIG_INTEL = 0x756e6547,  // Genu
+  SIG_AMD = 0x68747541,    // Auth
+};
+
+enum ProcessorVendors {
+  VENDOR_INTEL = 1,
+  VENDOR_AMD,
+  VENDOR_OTHER,
+  VENDOR_MAX
+};
+
+enum ProcessorTypes {
+  INTEL_BONNELL = 1,
+  INTEL_CORE2,
+  INTEL_COREI7,
+  AMDFAM10H,
+  AMDFAM15H,
+  INTEL_SILVERMONT,
+  INTEL_KNL,
+  AMD_BTVER1,
+  AMD_BTVER2,
+  AMDFAM17H,
+  INTEL_KNM,
+  INTEL_GOLDMONT,
+  INTEL_GOLDMONT_PLUS,
+  INTEL_TREMONT,
+  AMDFAM19H,
+  ZHAOXIN_FAM7H,
+  INTEL_SIERRAFOREST,
+  INTEL_GRANDRIDGE,
+  INTEL_CLEARWATERFOREST,
+  CPU_TYPE_MAX
+};
+
+enum ProcessorSubtypes {
+  INTEL_COREI7_NEHALEM = 1,
+  INTEL_COREI7_WESTMERE,
+  INTEL_COREI7_SANDYBRIDGE,
+  AMDFAM10H_BARCELONA,
+  AMDFAM10H_SHANGHAI,
+  AMDFAM10H_ISTANBUL,
+  AMDFAM15H_BDVER1,
+  AMDFAM15H_BDVER2,
+  AMDFAM15H_BDVER3,
+  AMDFAM15H_BDVER4,
+  AMDFAM17H_ZNVER1,
+  INTEL_COREI7_IVYBRIDGE,
+  INTEL_COREI7_HASWELL,
+  INTEL_COREI7_BROADWELL,
+  INTEL_COREI7_SKYLAKE,
+  INTEL_COREI7_SKYLAKE_AVX512,
+  INTEL_COREI7_CANNONLAKE,
+  INTEL_COREI7_ICELAKE_CLIENT,
+  INTEL_COREI7_ICELAKE_SERVER,
+  AMDFAM17H_ZNVER2,
+  INTEL_COREI7_CASCADELAKE,
+  INTEL_COREI7_TIGERLAKE,
+  INTEL_COREI7_COOPERLAKE,
+  INTEL_COREI7_SAPPHIRERAPIDS,
+  INTEL_COREI7_ALDERLAKE,
+  AMDFAM19H_ZNVER3,
+  INTEL_COREI7_ROCKETLAKE,
+  ZHAOXIN_FAM7H_LUJIAZUI,
+  AMDFAM19H_ZNVER4,
+  INTEL_COREI7_GRANITERAPIDS,
+  INTEL_COREI7_GRANITERAPIDS_D,
+  INTEL_COREI7_ARROWLAKE,
+  INTEL_COREI7_ARROWLAKE_S,
+  INTEL_COREI7_PANTHERLAKE,
+  CPU_SUBTYPE_MAX
+};
+
+enum ProcessorFeatures {
+  FEATURE_CMOV = 0,
+  FEATURE_MMX,
+  FEATURE_POPCNT,
+  FEATURE_SSE,
+  FEATURE_SSE2,
+  FEATURE_SSE3,
+  FEATURE_SSSE3,
+  FEATURE_SSE4_1,
+  FEATURE_SSE4_2,
+  FEATURE_AVX,
+  FEATURE_AVX2,
+  FEATURE_SSE4_A,
+  FEATURE_FMA4,
+  FEATURE_XOP,
+  FEATURE_FMA,
+  FEATURE_AVX512F,
+  FEATURE_BMI,
+  FEATURE_BMI2,
+  FEATURE_AES,
+  FEATURE_PCLMUL,
+  FEATURE_AVX512VL,
+  FEATURE_AVX512BW,
+  FEATURE_AVX512DQ,
+  FEATURE_AVX512CD,
+  FEATURE_AVX512ER,
+  FEATURE_AVX512PF,
+  FEATURE_AVX512VBMI,
+  FEATURE_AVX512IFMA,
+  FEATURE_AVX5124VNNIW,
+  FEATURE_AVX5124FMAPS,
+  FEATURE_AVX512VPOPCNTDQ,
+  FEATURE_AVX512VBMI2,
+  FEATURE_GFNI,
+  FEATURE_VPCLMULQDQ,
+  FEATURE_AVX512VNNI,
+  FEATURE_AVX512BITALG,
+  FEATURE_AVX512BF16,
+  FEATURE_AVX512VP2INTERSECT,
+
+  FEATURE_CMPXCHG16B = 46,
+  FEATURE_F16C = 49,
+  FEATURE_LAHF_LM = 54,
+  FEATURE_LM,
+  FEATURE_WP,
+  FEATURE_LZCNT,
+  FEATURE_MOVBE,
+
+  FEATURE_AVX512FP16 = 94,
+  FEATURE_X86_64_BASELINE,
+  FEATURE_X86_64_V2,
+  FEATURE_X86_64_V3,
+  FEATURE_X86_64_V4,
+  CPU_FEATURE_MAX
+};
+
+// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
+// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
+// support. Consequently, for i386, the presence of CPUID is checked first
+// via the corresponding eflags bit.
+static bool isCpuIdSupported(void) {
+  return true;
+}
+
+// This code is copied from lib/Support/Host.cpp.
+// Changes to either file should be mirrored in the other.
+
+/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+/// the specified arguments.  If we can't run cpuid on the host, return true.
+static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+                               unsigned *rECX, unsigned *rEDX) {
+  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
+  // FIXME: should we save this for Clang?
+  __asm__("movq\t%%rbx, %%rsi\n\t"
+          "cpuid\n\t"
+          "xchgq\t%%rbx, %%rsi\n\t"
+          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+          : "a"(value));
+  return false;
+}
+
+/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
+/// the 4 values in the specified arguments.  If we can't run cpuid on the host,
+/// return true.
+static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
+                                 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
+                                 unsigned *rEDX) {
+  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
+  // FIXME: should we save this for Clang?
+  __asm__("movq\t%%rbx, %%rsi\n\t"
+          "cpuid\n\t"
+          "xchgq\t%%rbx, %%rsi\n\t"
+          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+          : "a"(value), "c"(subleaf));
+  return false;
+}
+
+// Read control register 0 (XCR0). Used to detect features such as AVX.
+static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
+  // Check xgetbv; this uses a .byte sequence instead of the instruction
+  // directly because older assemblers do not include support for xgetbv and
+  // there is no easy way to conditionally compile based on the assembler used.
+  __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
+  return false;
+}
+
+static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
+                                 unsigned *Model) {
+  *Family = (EAX >> 8) & 0xf;  // Bits 8 - 11
+  *Model = (EAX >> 4) & 0xf;   // Bits 4 - 7
+  if (*Family == 6 || *Family == 0xf) {
+    if (*Family == 0xf)
+      // Examine extended family ID if family ID is F.
+      *Family += (EAX >> 20) & 0xff;  // Bits 20 - 27
+    // Examine extended model ID if family ID is 6 or F.
+    *Model += ((EAX >> 16) & 0xf) << 4;  // Bits 16 - 19
+  }
+}
+
+static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
+                                                   unsigned Model,
+                                                   const unsigned *Features,
+                                                   unsigned *Type,
+                                                   unsigned *Subtype) {
+#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0
+
+  // We select CPU strings to match the code in Host.cpp, but we don't use them
+  // in compiler-rt.
+  const char *CPU = 0;
+
+  switch (Family) {
+    case 6:
+      switch (Model) {
+        case 0x0f:  // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
+                    // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
+                    // mobile processor, Intel Core 2 Extreme processor, Intel
+                    // Pentium Dual-Core processor, Intel Xeon processor, model
+                    // 0Fh. All processors are manufactured using the 65 nm
+                    // process.
+        case 0x16:  // Intel Celeron processor model 16h. All processors are
+                    // manufactured using the 65 nm process
+          CPU = "core2";
+          *Type = INTEL_CORE2;
+          break;
+        case 0x17:  // Intel Core 2 Extreme processor, Intel Xeon processor,
+                    // model 17h. All processors are manufactured using the 45
+                    // nm process.
+                    //
+                    // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+        case 0x1d:  // Intel Xeon processor MP. All processors are manufactured
+                    // using the 45 nm process.
+          CPU = "penryn";
+          *Type = INTEL_CORE2;
+          break;
+        case 0x1a:  // Intel Core i7 processor and Intel Xeon processor. All
+                    // processors are manufactured using the 45 nm process.
+        case 0x1e:  // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
+                    // As found in a Summer 2010 model iMac.
+        case 0x1f:
+        case 0x2e:  // Nehalem EX
+          CPU = "nehalem";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_NEHALEM;
+          break;
+        case 0x25:  // Intel Core i7, laptop version.
+        case 0x2c:  // Intel Core i7 processor and Intel Xeon processor. All
+                    // processors are manufactured using the 32 nm process.
+        case 0x2f:  // Westmere EX
+          CPU = "westmere";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_WESTMERE;
+          break;
+        case 0x2a:  // Intel Core i7 processor. All processors are manufactured
+                    // using the 32 nm process.
+        case 0x2d:
+          CPU = "sandybridge";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_SANDYBRIDGE;
+          break;
+        case 0x3a:
+        case 0x3e:  // Ivy Bridge EP
+          CPU = "ivybridge";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_IVYBRIDGE;
+          break;
+
+        // Haswell:
+        case 0x3c:
+        case 0x3f:
+        case 0x45:
+        case 0x46:
+          CPU = "haswell";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_HASWELL;
+          break;
+
+        // Broadwell:
+        case 0x3d:
+        case 0x47:
+        case 0x4f:
+        case 0x56:
+          CPU = "broadwell";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_BROADWELL;
+          break;
+
+        // Skylake:
+        case 0x4e:  // Skylake mobile
+        case 0x5e:  // Skylake desktop
+        case 0x8e:  // Kaby Lake mobile
+        case 0x9e:  // Kaby Lake desktop
+        case 0xa5:  // Comet Lake-H/S
+        case 0xa6:  // Comet Lake-U
+          CPU = "skylake";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_SKYLAKE;
+          break;
+
+        // Rocketlake:
+        case 0xa7:
+          CPU = "rocketlake";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_ROCKETLAKE;
+          break;
+
+        // Skylake Xeon:
+        case 0x55:
+          *Type = INTEL_COREI7;
+          if (testFeature(FEATURE_AVX512BF16)) {
+            CPU = "cooperlake";
+            *Subtype = INTEL_COREI7_COOPERLAKE;
+          } else if (testFeature(FEATURE_AVX512VNNI)) {
+            CPU = "cascadelake";
+            *Subtype = INTEL_COREI7_CASCADELAKE;
+          } else {
+            CPU = "skylake-avx512";
+            *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
+          }
+          break;
+
+        // Cannonlake:
+        case 0x66:
+          CPU = "cannonlake";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_CANNONLAKE;
+          break;
+
+        // Icelake:
+        case 0x7d:
+        case 0x7e:
+          CPU = "icelake-client";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
+          break;
+
+        // Tigerlake:
+        case 0x8c:
+        case 0x8d:
+          CPU = "tigerlake";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_TIGERLAKE;
+          break;
+
+        // Alderlake:
+        case 0x97:
+        case 0x9a:
+        // Raptorlake:
+        case 0xb7:
+        case 0xba:
+        case 0xbf:
+        // Meteorlake:
+        case 0xaa:
+        case 0xac:
+        // Gracemont:
+        case 0xbe:
+          CPU = "alderlake";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_ALDERLAKE;
+          break;
+
+        // Arrowlake:
+        case 0xc5:
+          CPU = "arrowlake";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_ARROWLAKE;
+          break;
+
+        // Arrowlake S:
+        case 0xc6:
+        // Lunarlake:
+        case 0xbd:
+          CPU = "arrowlake-s";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_ARROWLAKE_S;
+          break;
+
+        // Pantherlake:
+        case 0xcc:
+          CPU = "pantherlake";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_PANTHERLAKE;
+          break;
+
+        // Icelake Xeon:
+        case 0x6a:
+        case 0x6c:
+          CPU = "icelake-server";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_ICELAKE_SERVER;
+          break;
+
+        // Emerald Rapids:
+        case 0xcf:
+        // Sapphire Rapids:
+        case 0x8f:
+          CPU = "sapphirerapids";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
+          break;
+
+        // Granite Rapids:
+        case 0xad:
+          CPU = "graniterapids";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_GRANITERAPIDS;
+          break;
+
+        // Granite Rapids D:
+        case 0xae:
+          CPU = "graniterapids-d";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_GRANITERAPIDS_D;
+          break;
+
+        case 0x1c:  // Most 45 nm Intel Atom processors
+        case 0x26:  // 45 nm Atom Lincroft
+        case 0x27:  // 32 nm Atom Medfield
+        case 0x35:  // 32 nm Atom Midview
+        case 0x36:  // 32 nm Atom Midview
+          CPU = "bonnell";
+          *Type = INTEL_BONNELL;
+          break;
+
+        // Atom Silvermont codes from the Intel software optimization guide.
+        case 0x37:
+        case 0x4a:
+        case 0x4d:
+        case 0x5a:
+        case 0x5d:
+        case 0x4c:  // really airmont
+          CPU = "silvermont";
+          *Type = INTEL_SILVERMONT;
+          break;
+        // Goldmont:
+        case 0x5c:  // Apollo Lake
+        case 0x5f:  // Denverton
+          CPU = "goldmont";
+          *Type = INTEL_GOLDMONT;
+          break;  // "goldmont"
+        case 0x7a:
+          CPU = "goldmont-plus";
+          *Type = INTEL_GOLDMONT_PLUS;
+          break;
+        case 0x86:
+        case 0x8a:  // Lakefield
+        case 0x96:  // Elkhart Lake
+        case 0x9c:  // Jasper Lake
+          CPU = "tremont";
+          *Type = INTEL_TREMONT;
+          break;
+
+        // Sierraforest:
+        case 0xaf:
+          CPU = "sierraforest";
+          *Type = INTEL_SIERRAFOREST;
+          break;
+
+        // Grandridge:
+        case 0xb6:
+          CPU = "grandridge";
+          *Type = INTEL_GRANDRIDGE;
+          break;
+
+        // Clearwaterforest:
+        case 0xdd:
+          CPU = "clearwaterforest";
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_CLEARWATERFOREST;
+          break;
+
+        case 0x57:
+          CPU = "knl";
+          *Type = INTEL_KNL;
+          break;
+
+        case 0x85:
+          CPU = "knm";
+          *Type = INTEL_KNM;
+          break;
+
+        default:  // Unknown family 6 CPU.
+          break;
+      }
+      break;
+    default:
+      break;  // Unknown.
+  }
+
+  return CPU;
+}
+
+static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
+                                                 unsigned Model,
+                                                 const unsigned *Features,
+                                                 unsigned *Type,
+                                                 unsigned *Subtype) {
+  // We select CPU strings to match the code in Host.cpp, but we don't use them
+  // in compiler-rt.
+  const char *CPU = 0;
+
+  switch (Family) {
+    case 16:
+      CPU = "amdfam10";
+      *Type = AMDFAM10H;
+      switch (Model) {
+        case 2:
+          *Subtype = AMDFAM10H_BARCELONA;
+          break;
+        case 4:
+          *Subtype = AMDFAM10H_SHANGHAI;
+          break;
+        case 8:
+          *Subtype = AMDFAM10H_ISTANBUL;
+          break;
+      }
+      break;
+    case 20:
+      CPU = "btver1";
+      *Type = AMD_BTVER1;
+      break;
+    case 21:
+      CPU = "bdver1";
+      *Type = AMDFAM15H;
+      if (Model >= 0x60 && Model <= 0x7f) {
+        CPU = "bdver4";
+        *Subtype = AMDFAM15H_BDVER4;
+        break;  // 60h-7Fh: Excavator
+      }
+      if (Model >= 0x30 && Model <= 0x3f) {
+        CPU = "bdver3";
+        *Subtype = AMDFAM15H_BDVER3;
+        break;  // 30h-3Fh: Steamroller
+      }
+      if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
+        CPU = "bdver2";
+        *Subtype = AMDFAM15H_BDVER2;
+        break;  // 02h, 10h-1Fh: Piledriver
+      }
+      if (Model <= 0x0f) {
+        *Subtype = AMDFAM15H_BDVER1;
+        break;  // 00h-0Fh: Bulldozer
+      }
+      break;
+    case 22:
+      CPU = "btver2";
+      *Type = AMD_BTVER2;
+      break;
+    case 23:
+      CPU = "znver1";
+      *Type = AMDFAM17H;
+      if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
+          (Model >= 0x60 && Model <= 0x67) ||
+          (Model >= 0x68 && Model <= 0x6f) ||
+          (Model >= 0x70 && Model <= 0x7f) ||
+          (Model >= 0x84 && Model <= 0x87) ||
+          (Model >= 0x90 && Model <= 0x97) ||
+          (Model >= 0x98 && Model <= 0x9f) ||
+          (Model >= 0xa0 && Model <= 0xaf)) {
+        // Family 17h Models 30h-3Fh (Starship) Zen 2
+        // Family 17h Models 47h (Cardinal) Zen 2
+        // Family 17h Models 60h-67h (Renoir) Zen 2
+        // Family 17h Models 68h-6Fh (Lucienne) Zen 2
+        // Family 17h Models 70h-7Fh (Matisse) Zen 2
+        // Family 17h Models 84h-87h (ProjectX) Zen 2
+        // Family 17h Models 90h-97h (VanGogh) Zen 2
+        // Family 17h Models 98h-9Fh (Mero) Zen 2
+        // Family 17h Models A0h-AFh (Mendocino) Zen 2
+        CPU = "znver2";
+        *Subtype = AMDFAM17H_ZNVER2;
+        break;
+      }
+      if ((Model >= 0x10 && Model <= 0x1f) ||
+          (Model >= 0x20 && Model <= 0x2f)) {
+        // Family 17h Models 10h-1Fh (Raven1) Zen
+        // Family 17h Models 10h-1Fh (Picasso) Zen+
+        // Family 17h Models 20h-2Fh (Raven2 x86) Zen
+        *Subtype = AMDFAM17H_ZNVER1;
+        break;
+      }
+      break;
+    case 25:
+      CPU = "znver3";
+      *Type = AMDFAM19H;
+      if ((Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) ||
+          (Model >= 0x30 && Model <= 0x3f) ||
+          (Model >= 0x40 && Model <= 0x4f) ||
+          (Model >= 0x50 && Model <= 0x5f)) {
+        // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
+        // Family 19h Models 20h-2Fh (Vermeer) Zen 3
+        // Family 19h Models 30h-3Fh (Badami) Zen 3
+        // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
+        // Family 19h Models 50h-5Fh (Cezanne) Zen 3
+        *Subtype = AMDFAM19H_ZNVER3;
+        break;
+      }
+      if ((Model >= 0x10 && Model <= 0x1f) ||
+          (Model >= 0x60 && Model <= 0x6f) ||
+          (Model >= 0x70 && Model <= 0x77) ||
+          (Model >= 0x78 && Model <= 0x7f) ||
+          (Model >= 0xa0 && Model <= 0xaf)) {
+        // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
+        // Family 19h Models 60h-6Fh (Raphael) Zen 4
+        // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
+        // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
+        // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
+        CPU = "znver4";
+        *Subtype = AMDFAM19H_ZNVER4;
+        break;  //  "znver4"
+      }
+      break;  // family 19h
+    default:
+      break;  // Unknown AMD CPU.
+  }
+
+  return CPU;
+}
+
+static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
+                                 unsigned *Features) {
+  unsigned EAX = 0, EBX = 0;
+
+#define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
+#define setFeature(F) Features[F / 32] |= 1U << (F % 32)
+
+  if ((EDX >> 15) & 1) setFeature(FEATURE_CMOV);
+  if ((EDX >> 23) & 1) setFeature(FEATURE_MMX);
+  if ((EDX >> 25) & 1) setFeature(FEATURE_SSE);
+  if ((EDX >> 26) & 1) setFeature(FEATURE_SSE2);
+
+  if ((ECX >> 0) & 1) setFeature(FEATURE_SSE3);
+  if ((ECX >> 1) & 1) setFeature(FEATURE_PCLMUL);
+  if ((ECX >> 9) & 1) setFeature(FEATURE_SSSE3);
+  if ((ECX >> 12) & 1) setFeature(FEATURE_FMA);
+  if ((ECX >> 13) & 1) setFeature(FEATURE_CMPXCHG16B);
+  if ((ECX >> 19) & 1) setFeature(FEATURE_SSE4_1);
+  if ((ECX >> 20) & 1) setFeature(FEATURE_SSE4_2);
+  if ((ECX >> 22) & 1) setFeature(FEATURE_MOVBE);
+  if ((ECX >> 23) & 1) setFeature(FEATURE_POPCNT);
+  if ((ECX >> 25) & 1) setFeature(FEATURE_AES);
+  if ((ECX >> 29) & 1) setFeature(FEATURE_F16C);
+
+  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
+  // indicates that the AVX registers will be saved and restored on context
+  // switch, then we have full AVX support.
+  const unsigned AVXBits = (1 << 27) | (1 << 28);
+  bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
+                ((EAX & 0x6) == 0x6);
+#if defined(__APPLE__)
+  // Darwin lazily saves the AVX512 context on first use: trust that the OS will
+  // save the AVX512 context if we use AVX512 instructions, even the bit is not
+  // set right now.
+  bool HasAVX512Save = true;
+#else
+  // AVX512 requires additional context to be saved by the OS.
+  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+#endif
+
+  if (HasAVX) setFeature(FEATURE_AVX);
+
+  bool HasLeaf7 =
+      MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+
+  if (HasLeaf7) {
+    if ((EBX >> 3) & 1) setFeature(FEATURE_BMI);
+    if (((EBX >> 5) & 1) && HasAVX) setFeature(FEATURE_AVX2);
+    if ((EBX >> 8) & 1) setFeature(FEATURE_BMI2);
+    if (HasAVX512Save) {
+      if ((EBX >> 16) & 1) setFeature(FEATURE_AVX512F);
+      if ((EBX >> 17) & 1) setFeature(FEATURE_AVX512DQ);
+      if ((EBX >> 21) & 1) setFeature(FEATURE_AVX512IFMA);
+      if ((EBX >> 26) & 1) setFeature(FEATURE_AVX512PF);
+      if ((EBX >> 27) & 1) setFeature(FEATURE_AVX512ER);
+      if ((EBX >> 28) & 1) setFeature(FEATURE_AVX512CD);
+      if ((EBX >> 30) & 1) setFeature(FEATURE_AVX512BW);
+      if ((EBX >> 31) & 1) setFeature(FEATURE_AVX512VL);
+      if ((ECX >> 1) & 1) setFeature(FEATURE_AVX512VBMI);
+      if ((ECX >> 6) & 1) setFeature(FEATURE_AVX512VBMI2);
+      if ((ECX >> 11) & 1) setFeature(FEATURE_AVX512VNNI);
+      if ((ECX >> 12) & 1) setFeature(FEATURE_AVX512BITALG);
+      if ((ECX >> 14) & 1) setFeature(FEATURE_AVX512VPOPCNTDQ);
+      if ((EDX >> 2) & 1) setFeature(FEATURE_AVX5124VNNIW);
+      if ((EDX >> 3) & 1) setFeature(FEATURE_AVX5124FMAPS);
+      if ((EDX >> 8) & 1) setFeature(FEATURE_AVX512VP2INTERSECT);
+      if ((EDX >> 23) & 1) setFeature(FEATURE_AVX512FP16);
+    }
+    if ((ECX >> 8) & 1) setFeature(FEATURE_GFNI);
+    if (((ECX >> 10) & 1) && HasAVX) setFeature(FEATURE_VPCLMULQDQ);
+  }
+
+  // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
+  // return all 0s for invalid subleaves so check the limit.
+  bool HasLeaf7Subleaf1 =
+      HasLeaf7 && EAX >= 1 &&
+      !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+  if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
+    setFeature(FEATURE_AVX512BF16);
+
+  unsigned MaxExtLevel;
+  getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
+                     !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  if (HasExtLeaf1) {
+    if (ECX & 1) setFeature(FEATURE_LAHF_LM);
+    if ((ECX >> 5) & 1) setFeature(FEATURE_LZCNT);
+    if (((ECX >> 6) & 1)) setFeature(FEATURE_SSE4_A);
+    if (((ECX >> 11) & 1)) setFeature(FEATURE_XOP);
+    if (((ECX >> 16) & 1)) setFeature(FEATURE_FMA4);
+    if (((EDX >> 29) & 1)) setFeature(FEATURE_LM);
+  }
+
+  if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
+    setFeature(FEATURE_X86_64_BASELINE);
+    if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
+        hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) {
+      setFeature(FEATURE_X86_64_V2);
+      if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) &&
+          hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) &&
+          hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) &&
+          hasFeature(FEATURE_MOVBE)) {
+        setFeature(FEATURE_X86_64_V3);
+        if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) &&
+            hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL))
+          setFeature(FEATURE_X86_64_V4);
+      }
+    }
+  }
+
+#undef hasFeature
+#undef setFeature
+}
+
+struct __processor_model {
+  unsigned int __cpu_vendor;
+  unsigned int __cpu_type;
+  unsigned int __cpu_subtype;
+  unsigned int __cpu_features[1];
+} __cpu_model = {0, 0, 0, {0}};
+
+unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32];
+
+// A constructor function that is sets __cpu_model and __cpu_features2 with
+// the right values.  This needs to run only once.  This constructor is
+// given the highest priority and it should run before constructors without
+// the priority set.  However, it still runs after ifunc initializers and
+// needs to be called explicitly there.
+
+__attribute__((__constructor__(1))) textstartup int __cpu_indicator_init(void) {
+  unsigned EAX, EBX, ECX, EDX;
+  unsigned MaxLeaf = 5;
+  unsigned Vendor;
+  unsigned Model, Family;
+  unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
+  _Static_assert(sizeof(Features) / sizeof(Features[0]) == 4, "");
+  _Static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, "");
+
+  // This function needs to run just once.
+  if (__cpu_model.__cpu_vendor) return 0;
+
+  if (!isCpuIdSupported() ||
+      getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
+    __cpu_model.__cpu_vendor = VENDOR_OTHER;
+    return -1;
+  }
+
+  getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
+  detectX86FamilyModel(EAX, &Family, &Model);
+
+  // Find available features.
+  getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
+
+  __cpu_model.__cpu_features[0] = Features[0];
+  __cpu_features2[0] = Features[1];
+  __cpu_features2[1] = Features[2];
+  __cpu_features2[2] = Features[3];
+
+  if (Vendor == SIG_INTEL) {
+    // Get CPU type.
+    getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
+                                    &(__cpu_model.__cpu_type),
+                                    &(__cpu_model.__cpu_subtype));
+    __cpu_model.__cpu_vendor = VENDOR_INTEL;
+  } else if (Vendor == SIG_AMD) {
+    // Get CPU type.
+    getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
+                                  &(__cpu_model.__cpu_type),
+                                  &(__cpu_model.__cpu_subtype));
+    __cpu_model.__cpu_vendor = VENDOR_AMD;
+  } else {
+    __cpu_model.__cpu_vendor = VENDOR_OTHER;
+  }
+
+  return 0;
+}
+
+#endif  // __x86_64__ && (gnuc || clang)
diff --git a/libc/isystem/features.h b/libc/isystem/features.h
index 61238fb01..61552c250 100644
--- a/libc/isystem/features.h
+++ b/libc/isystem/features.h
@@ -10,8 +10,7 @@
 #endif
 
 #if !defined(_POSIX_SOURCE) && !defined(_POSIX_C_SOURCE) && \
-    !defined(_XOPEN_SOURCE) && !defined(_GNU_SOURCE) &&     \
-    !defined(_BSD_SOURCE) && !defined(__STRICT_ANSI__)
+    !defined(_XOPEN_SOURCE) && !defined(_GNU_SOURCE) && !defined(_BSD_SOURCE)
 #define _BSD_SOURCE   1
 #define _XOPEN_SOURCE 700
 #endif
diff --git a/libc/isystem/stdlib.h b/libc/isystem/stdlib.h
index 422888746..cfdd6cf40 100644
--- a/libc/isystem/stdlib.h
+++ b/libc/isystem/stdlib.h
@@ -14,6 +14,5 @@
 #include "libc/str/str.h"
 #include "libc/sysv/consts/exit.h"
 #include "libc/temp.h"
-#include "third_party/musl/crypt.h"
 #include "third_party/musl/rand48.h"
 #endif /* _STDLIB_H */
diff --git a/libc/isystem/sys/syscall.h b/libc/isystem/sys/syscall.h
index 9a508f8c4..a65aa2c65 100644
--- a/libc/isystem/sys/syscall.h
+++ b/libc/isystem/sys/syscall.h
@@ -1,4 +1,4 @@
-#ifndef COSMOPOLITAN_LIBC_ISYSTEM_SYSCALL_H_
-#define COSMOPOLITAN_LIBC_ISYSTEM_SYSCALL_H_
+#ifndef COSMOPOLITAN_LIBC_ISYSTEM_SYS_SYSCALL_H_
+#define COSMOPOLITAN_LIBC_ISYSTEM_SYS_SYSCALL_H_
 #include "libc/stdio/syscall.h"
-#endif /* COSMOPOLITAN_LIBC_ISYSTEM_SYSCALL_H_ */
+#endif /* COSMOPOLITAN_LIBC_ISYSTEM_SYS_SYSCALL_H_ */
diff --git a/libc/isystem/syscall.h b/libc/isystem/syscall.h
new file mode 100644
index 000000000..9a508f8c4
--- /dev/null
+++ b/libc/isystem/syscall.h
@@ -0,0 +1,4 @@
+#ifndef COSMOPOLITAN_LIBC_ISYSTEM_SYSCALL_H_
+#define COSMOPOLITAN_LIBC_ISYSTEM_SYSCALL_H_
+#include "libc/stdio/syscall.h"
+#endif /* COSMOPOLITAN_LIBC_ISYSTEM_SYSCALL_H_ */
diff --git a/libc/isystem/unistd.h b/libc/isystem/unistd.h
index 942c30e74..78b62edb9 100644
--- a/libc/isystem/unistd.h
+++ b/libc/isystem/unistd.h
@@ -10,12 +10,8 @@
 #include "libc/sysv/consts/o.h"
 #include "libc/sysv/consts/ok.h"
 #include "libc/time/time.h"
+#include "libc/unistd.h"
 #include "third_party/getopt/long1.h"
 #include "third_party/musl/crypt.h"
 #include "third_party/musl/lockf.h"
-
-#ifndef _CS_PATH
-#define _CS_PATH 0
-#endif
-
 #endif /* _UNISTD_H */
diff --git a/libc/isystem/windowsesque.h b/libc/isystem/windowsesque.h
new file mode 100644
index 000000000..f228173de
--- /dev/null
+++ b/libc/isystem/windowsesque.h
@@ -0,0 +1,4296 @@
+#ifndef COSMOPOLITAN_LIBC_COMPAT_INCLUDE_WINDOWS_H_
+#define COSMOPOLITAN_LIBC_COMPAT_INCLUDE_WINDOWS_H_
+#include "libc/nt/accounting.h"
+#include "libc/nt/automation.h"
+#include "libc/nt/console.h"
+#include "libc/nt/debug.h"
+#include "libc/nt/dll.h"
+#include "libc/nt/enum/keyaccess.h"
+#include "libc/nt/enum/regtype.h"
+#include "libc/nt/errors.h"
+#include "libc/nt/events.h"
+#include "libc/nt/files.h"
+#include "libc/nt/ipc.h"
+#include "libc/nt/memory.h"
+#include "libc/nt/paint.h"
+#include "libc/nt/process.h"
+#include "libc/nt/registry.h"
+#include "libc/nt/synchronization.h"
+#include "libc/nt/thread.h"
+#include "libc/nt/thunk/msabi.h"
+#include "libc/nt/windows.h"
+#include "libc/nt/winsock.h"
+#include "libc/str/str.h"
+/* clang-format off */
+
+/* #if defined(__GNUC__) */
+/* #pragma GCC diagnostic ignored "-Wint-conversion" */
+/* #endif */
+#undef NULL
+#define NULL 0
+
+#define WINAPI __msabi
+#define STDMETHODCALLTYPE __msabi
+#define CALLBACK __msabi
+
+#define FARPROC wambda
+#define NEARPROC wambda
+#define PROC wambda
+
+#define LONG int32_t /* [sic] */
+#define WCHAR char16_t /* [sic] */
+#define BOOL bool32 /* [sic] */
+
+#define TRUE 1
+#define FALSE 0
+
+#define PVOID void*
+#define PVOID64 void*
+#define LPCVOID const void*
+#define CHAR char
+#define SHORT short
+#define CONST const
+#define VOID void
+#define INT8 signed char
+#define PINT8 signed char*
+#define INT16 int16_t
+#define PINT16 int16_t*
+#define INT32 int32_t
+#define PINT32 int32_t*
+#define INT64 int64_t
+#define PINT64 int64_t*
+#define UINT8 unsigned char
+#define PUINT8 unsigned char*
+#define UINT16 uint16_t
+#define PUINT16 uint16_t*
+#define UINT32 uint32_t
+#define PUINT32 uint32_t*
+#define UINT64 uint64_t
+#define PUINT64 uint64_t*
+#define LONG32 int32_t
+#define PLONG32 int32_t*
+#define ULONG32 uint32_t
+#define PULONG32 uint32_t*
+#define DWORD32 uint32_t
+#define PDWORD32 uint32_t*
+
+#define INT_PTR intptr_t
+#define PINT_PTR intptr_t*
+#define UINT_PTR uintptr_t
+#define PUINT_PTR uintptr_t*
+#define LONG_PTR intptr_t
+#define PLONG_PTR int32_t**
+#define ULONG_PTR uintptr_t
+#define PULONG_PTR uint32_t**
+#define POINTER_64_INT int64_t*
+#define __int3264 int64_t
+
+#define SHANDLE_PTR int64_t
+#define HANDLE_PTR uint64_t
+
+#define UHALF_PTR uint32_t
+#define PUHALF_PTR uint32_t*
+#define HALF_PTR int32_t
+#define PHALF_PTR int32_t*
+
+#define SIZE_T size_t
+#define PSIZE_T size_t*
+#define SSIZE_T ssize_t
+#define PSSIZE_T ssize_t*
+#define DWORD_PTR ULONG_PTR
+#define PDWORD_PTR ULONG_PTR*
+#define LONG64 int64_t
+#define PLONG64 int64_t*
+#define ULONG64 uint64_t
+#define PULONG64 uint64_t*
+#define DWORD64 uint64_t
+#define PDWORD64 uint64_t*
+#define KAFFINITY ULONG_PTR
+#define PKAFFINITY KAFFINITY*
+#define KPRIORITY LONG
+
+#define PWCHAR WCHAR*
+#define LPWCH WCHAR*
+#define PWCH WCHAR*
+#define LPCWCH CONST WCHAR*
+#define PCWCH CONST WCHAR*
+#define NWPSTR WCHAR*
+#define LPWSTR WCHAR*
+#define PWSTR WCHAR*
+#define PZPWSTR PWSTR*
+#define PCZPWSTR CONST PWSTR*
+#define LPUWSTR WCHAR forcealign(1)*
+#define PUWSTR WCHAR forcealign(1)*
+#define LPCWSTR CONST WCHAR*
+#define PCWSTR CONST WCHAR*
+#define PZPCWSTR PCWSTR*
+#define LPCUWSTR CONST WCHAR forcealign(1)*
+#define PCUWSTR CONST WCHAR forcealign(1)*
+#define PCHAR CHAR*
+#define LPCH CHAR*
+#define PCH CHAR*
+#define LPCCH CONST CHAR*
+#define PCCH CONST CHAR*
+#define NPSTR CHAR*
+#define LPSTR CHAR*
+#define PSTR CHAR*
+#define PZPSTR PSTR*
+#define PCZPSTR CONST PSTR*
+#define LPCSTR CONST CHAR*
+#define PCSTR CONST CHAR*
+#define PZPCSTR PCSTR*
+#define TCHAR WCHAR
+#define PTCHAR WCHAR*
+#define TBYTE WCHAR
+#define PTBYTE WCHAR*
+#define LPTCH LPWSTR
+#define PTCH LPWSTR
+#define PTSTR LPWSTR
+#define LPTSTR LPWSTR
+#define PCTSTR LPCWSTR
+#define LPCTSTR LPCWSTR
+#define PUTSTR LPUWSTR
+#define LPUTSTR LPUWSTR
+#define PCUTSTR LPCUWSTR
+#define LPCUTSTR LPCUWSTR
+#define LP LPWSTR
+#define PSHORT int16_t*
+#define PLONG int32_t*
+#define HANDLE int64_t
+#define PHANDLE HANDLE*
+#define FCHAR BYTE
+#define FSHORT WORD
+#define FLONG DWORD
+#define HRESULT LONG
+#define CCHAR char
+#define LCID DWORD
+#define PLCID PDWORD
+#define LANGID WORD
+#define LONGLONG int64_t
+#define ULONGLONG uint64_t
+#define USN LONGLONG
+#define PLONGLONG LONGLONG*
+#define PULONGLONG ULONGLONG*
+#define DWORDLONG ULONGLONG
+#define PDWORDLONG DWORDLONG*
+#define LARGE_INTEGER int64_t
+#define PLARGE_INTEGER int64_t*
+
+#define ULONG uint32_t
+#define PULONG ULONG*
+#define USHORT unsigned short
+#define PUSHORT USHORT*
+#define UCHAR unsigned char
+#define PUCHAR UCHAR*
+#define PSZ char*
+#define DWORD uint32_t
+#define WINBOOL BOOL
+#define BOOLEAN BOOL
+#define BYTE unsigned char
+#define WORD unsigned short
+#define FLOAT float
+#define PFLOAT FLOAT*
+#define PBOOL WINBOOL*
+#define PBOOLEAN WINBOOL*
+#define LPBOOL WINBOOL*
+#define PBYTE BYTE*
+#define LPBYTE BYTE*
+#define PINT int*
+#define LPINT int*
+#define PWORD WORD*
+#define LPWORD WORD*
+#define LPLONG int32_t*
+#define PDWORD DWORD*
+#define LPDWORD DWORD*
+#define LPVOID void*
+#define LPCVOID const void*
+#define INT int
+#define UINT unsigned int
+#define PUINT unsigned int*
+#define WPARAM UINT_PTR
+#define LPARAM LONG_PTR
+#define LRESULT LONG_PTR
+#define ATOM WORD
+#define SPHANDLE HANDLE*
+#define LPHANDLE HANDLE*
+#define HGLOBAL HANDLE
+#define HLOCAL HANDLE
+#define GLOBALHANDLE HANDLE
+#define LOCALHANDLE HANDLE
+#define HGDIOBJ void*
+#define PHKEY HKEY*
+#define HMODULE HINSTANCE
+#define HFILE int
+#define HCURSOR HICON
+#define COLORREF DWORD
+#define LPCOLORREF DWORD*
+#define ACCESS_MASK ULONG
+#define REGSAM ACCESS_MASK
+#define HKEY int64_t
+#define SCODE LONG
+
+#define NTSTATUS LONG
+#define HACCEL int64_t
+#define HBITMAP int64_t
+#define HBRUSH int64_t
+#define HCOLORSPACE int64_t
+#define HDC int64_t
+#define HGLRC int64_t
+#define HDESK int64_t
+#define HENHMETAFILE int64_t
+#define HFONT int64_t
+#define HICON int64_t
+#define HMENU int64_t
+#define HMETAFILE int64_t
+#define HINSTANCE int64_t
+#define HPALETTE int64_t
+#define HPEN int64_t
+#define HRGN int64_t
+#define HRSRC int64_t
+#define HSTR int64_t
+#define HTASK int64_t
+#define HWINSTA int64_t
+#define HKL int64_t
+#define HMONITOR int64_t
+#define HWINEVENTHOOK int64_t
+#define HUMPD int64_t
+#define HWND int64_t
+
+#define PDH_FUNCTION LONG
+
+#define PDH_HCOUNTER HANDLE
+#define PDH_HQUERY HANDLE
+#define PDH_HLOG HANDLE
+
+#define ADDRESS_FAMILY uint16_t
+#define TUNNEL_TYPE uint32_t
+#define NET_IF_CONNECTION_TYPE uint32_t
+#define NET_IF_COMPARTMENT_ID uint32_t
+#define IFTYPE uint32_t
+#define NL_PREFIX_ORIGIN uint32_t
+#define NL_SUFFIX_ORIGIN uint32_t
+#define NL_DAD_STATE uint32_t
+#define NET_IF_NETWORK_GUID struct NtGuid
+#define IP_PREFIX_ORIGIN NL_PREFIX_ORIGIN
+#define IP_SUFFIX_ORIGIN NL_SUFFIX_ORIGIN
+#define IP_DAD_STATE NL_DAD_STATE
+#define IP_ADAPTER_ADDRESSES struct NtIpAdapterAddresses
+#define PIP_ADAPTER_ADDRESSES struct NtIpAdapterAddresses*
+#define IP_ADAPTER_UNICAST_ADDRESS struct NtIpAdapterUnicastAddressLh
+#define PIP_ADAPTER_UNICAST_ADDRESS struct NtIpAdapterUnicastAddressLh*
+#define IP_ADAPTER_ANYCAST_ADDRESS struct NtIpAdapterAnycastAddressXp
+#define PIP_ADAPTER_ANYCAST_ADDRESS struct NtIpAdapterAnycastAddressXp*
+#define IP_ADAPTER_MULTICAST_ADDRESS struct NtIpAdapterMulticastAddressXp
+#define PIP_ADAPTER_MULTICAST_ADDRESS struct NtIpAdapterMulticastAddressXp*
+#define IP_ADAPTER_DNS_SERVER_ADDRESS struct NtIpAdapterDnsServerAddressXp
+#define IP_ADAPTER_PREFIX struct NtIpAdapterPrefixXp
+#define PIP_ADAPTER_PREFIX struct NtIpAdapterPrefixXp*
+
+#define _GENERIC_MAPPING NtGenericMapping
+#define GENERIC_MAPPING struct NtGenericMapping
+#define PGENERIC_MAPPING struct NtGenericMapping*
+#define _UNICODE_STRING NtUnicodeString
+#define UNICODE_STRING struct NtUnicodeString
+#define PUNICODE_STRING struct NtUnicodeString*
+#define _IO_COUNTERS NtIoCounters
+#define IO_COUNTERS struct NtIoCounters
+#define PIO_COUNTERS struct NtIoCounters*
+#define _FILE_TIME NtFileTime
+#define FILE_TIME struct NtFileTime
+#define PFILE_TIME struct NtFileTime*
+#define _FILETIME NtFileTime
+#define FILETIME struct NtFileTime
+#define PFILETIME struct NtFileTime*
+#define _CLIENT_ID NtClientId
+#define CLIENT_ID struct NtClientId
+#define PCLIENT_ID struct NtClientId*
+#define _SYSTEM_THREADS NtSystemThreads
+#define SYSTEM_THREADS struct NtSystemThreads
+#define PSYSTEM_THREADS struct NtSystemThreads*
+#define _VM_COUNTERS NtVmCounters
+#define VM_COUNTERS struct NtVmCounters
+#define PVM_COUNTERS struct NtVmCounters*
+#define _SECURITY_DESCRIPTOR NtSecurityDescriptor
+#define SECURITY_DESCRIPTOR struct NtSecurityDescriptor
+#define PSECURITY_DESCRIPTOR struct NtSecurityDescriptor*
+
+#define _OBJECT_ALL_INFORMATION NtObjectAllInformation
+#define OBJECT_ALL_INFORMATION struct NtObjectAllinformation
+#define POBJECT_ALL_INFORMATION struct NtObjectAllInformation*
+#define _OBJECT_TYPE_INFORMATION NtObjectTypeInformation
+#define OBJECT_TYPE_INFORMATION struct NtObjectTypeInformation
+#define POBJECT_TYPE_INFORMATION struct NtObjectTypeInformation*
+#define _OBJECT_NAME_INFORMATION NtObjectNameInformation
+#define OBJECT_NAME_INFORMATION struct NtObjectNameInformation
+#define POBJECT_NAME_INFORMATION struct NtObjectNameInformation*
+#define _OBJECT_BASIC_INFORMATION NtObjectBasicInformation
+#define OBJECT_BASIC_INFORMATION struct NtObjectBasicInformation
+#define POBJECT_BASIC_INFORMATION struct NtObjectBasicInformation*
+#define _FILE_ACCESS_INFORMATION NtFileAccessInformation
+#define FILE_ACCESS_INFORMATION struct NtFileAccessInformation
+#define PFILE_ACCESS_INFORMATION struct NtFileAccessInformation*
+#define _FILE_ALIGNMENT_INFORMATION NtFileAlignmentInformation
+#define FILE_ALIGNMENT_INFORMATION struct NtFileAlignmentInformation
+#define PFILE_ALIGNMENT_INFORMATION struct NtFileAlignmentInformation*
+#define _FILE_ALL_INFORMATION NtFileAllInformation
+#define FILE_ALL_INFORMATION struct NtFileAllInformation
+#define PFILE_ALL_INFORMATION struct NtFileAllInformation*
+#define _FILE_ALLOCATION_INFORMATION NtFileAllocationInformation
+#define FILE_ALLOCATION_INFORMATION struct NtFileAllocationInformation
+#define PFILE_ALLOCATION_INFORMATION struct NtFileAllocationInformation*
+#define _FILE_BASIC_INFORMATION NtFileBasicInformation
+#define FILE_BASIC_INFORMATION struct NtFileBasicInformation
+#define PFILE_BASIC_INFORMATION struct NtFileBasicInformation*
+#define _FILE_BOTH_DIR_INFORMATION NtFileBothDirectoryInformation
+#define FILE_BOTH_DIR_INFORMATION struct NtFileBothDirectoryInformation
+#define PFILE_BOTH_DIR_INFORMATION struct NtFileBothDirectoryInformation*
+#define _FILE_BOTH_DIRECTORY_INFORMATION NtFileBothDirectoryInformation
+#define FILE_BOTH_DIRECTORY_INFORMATION struct NtFileBothDirectoryInformation
+#define PFILE_BOTH_DIRECTORY_INFORMATION struct NtFileBothDirectoryInformation*
+#define _FILE_DIRECTORY_INFORMATION NtFileDirectoryInformation
+#define FILE_DIRECTORY_INFORMATION struct NtFileDirectoryInformation
+#define PFILE_DIRECTORY_INFORMATION struct NtFileDirectoryInformation*
+#define _FILE_DISPOSITION_INFORMATION NtFileDispositionInformation
+#define FILE_DISPOSITION_INFORMATION struct NtFileDispositionInformation
+#define PFILE_DISPOSITION_INFORMATION struct NtFileDispositionInformation*
+#define _FILE_EA_INFORMATION NtFileEaInformation
+#define FILE_EA_INFORMATION struct NtFileEaInformation
+#define PFILE_EA_INFORMATION struct NtFileEaInformation*
+#define _FILE_INTERNAL_INFORMATION NtFileInternalInformation
+#define FILE_INTERNAL_INFORMATION struct NtFileInternalInformation
+#define PFILE_INTERNAL_INFORMATION struct NtFileInternalInformation*
+#define _FILE_MODE_INFORMATION NtFileModeInformation
+#define FILE_MODE_INFORMATION struct NtFileModeInformation
+#define PFILE_MODE_INFORMATION struct NtFileModeInformation*
+#define _FILE_NAME_INFORMATION NtFileNameInformation
+#define FILE_NAME_INFORMATION struct NtFileNameInformation
+#define PFILE_NAME_INFORMATION struct NtFileNameInformation*
+#define _FILE_NAMES_INFORMATION NtFileNamesInformation
+#define FILE_NAMES_INFORMATION struct NtFileNamesInformation
+#define PFILE_NAMES_INFORMATION struct NtFileNamesInformation*
+#define _FILE_POSITION_INFORMATION NtFilePositionInformation
+#define FILE_POSITION_INFORMATION struct NtFilePositionInformation
+#define PFILE_POSITION_INFORMATION struct NtFilePositionInformation*
+#define _FILE_RENAME_INFORMATION NtFileRenameInformation
+#define FILE_RENAME_INFORMATION struct NtFileRenameInformation
+#define PFILE_RENAME_INFORMATION struct NtFileRenameInformation*
+#define _FILE_STANDARD_INFORMATION NtFileStandardInformation
+#define FILE_STANDARD_INFORMATION struct NtFileStandardInformation
+#define PFILE_STANDARD_INFORMATION struct NtFileStandardInformation*
+#define _FILE_STREAM_INFORMATION NtFileStreamInformation
+#define FILE_STREAM_INFORMATION struct NtFileStreamInformation
+#define PFILE_STREAM_INFORMATION struct NtFileStreamInformation*
+#define _KERNEL_USER_TIMES NtKernelUserTimes
+#define KERNEL_USER_TIMES struct NtKernelUserTimes
+#define PKERNEL_USER_TIMES struct NtKernelUserTimes*
+#define _PROCESS_BASIC_INFORMATION NtProcessBasicInformation
+#define PROCESS_BASIC_INFORMATION struct NtProcessBasicInformation
+#define PPROCESS_BASIC_INFORMATION struct NtProcessBasicInformation*
+#define _SYSTEM_BASIC_INFORMATION NtSystemBasicInformation
+#define SYSTEM_BASIC_INFORMATION struct NtSystemBasicInformation
+#define PSYSTEM_BASIC_INFORMATION struct NtSystemBasicInformation*
+#define _SYSTEM_EXCEPTION_INFORMATION NtSystemExceptionInformation
+#define SYSTEM_EXCEPTION_INFORMATION struct NtSystemExceptionInformation
+#define PSYSTEM_EXCEPTION_INFORMATION struct NtSystemExceptionInformation*
+#define _SYSTEM_HANDLE_ENTRY NtSystemHandleEntry
+#define SYSTEM_HANDLE_ENTRY struct NtSystemHandleEntry
+#define PSYSTEM_HANDLE_ENTRY struct NtSystemHandleEntry*
+#define _SYSTEM_HANDLE_INFORMATION NtSystemHandleInformation
+#define SYSTEM_HANDLE_INFORMATION struct NtSystemHandleInformation
+#define PSYSTEM_HANDLE_INFORMATION struct NtSystemHandleInformation*
+#define _SYSTEM_INTERRUPT_INFORMATION NtSystemInterruptInformation
+#define SYSTEM_INTERRUPT_INFORMATION struct NtSystemInterruptInformation
+#define PSYSTEM_INTERRUPT_INFORMATION struct NtSystemInterruptInformation*
+#define _SYSTEM_LOOKASIDE_INFORMATION NtSystemLookasideInformation
+#define SYSTEM_LOOKASIDE_INFORMATION struct NtSystemLookasideInformation
+#define PSYSTEM_LOOKASIDE_INFORMATION struct NtSystemLookasideInformation*
+#define _SYSTEM_PERFORMANCE_INFORMATION NtSystemPerformanceInformation
+#define SYSTEM_PERFORMANCE_INFORMATION struct NtSystemPerformanceInformation
+#define PSYSTEM_PERFORMANCE_INFORMATION struct NtSystemPerformanceInformation*
+#define _SYSTEM_PROCESS_INFORMATION NtSystemProcessInformation
+#define SYSTEM_PROCESS_INFORMATION struct NtSystemProcessInformation
+#define PSYSTEM_PROCESS_INFORMATION struct NtSystemProcessInformation*
+#define _SYSTEM_PROCESSOR_INFORMATION NtSystemProcessorInformation
+#define SYSTEM_PROCESSOR_INFORMATION struct NtSystemProcessorInformation
+#define PSYSTEM_PROCESSOR_INFORMATION struct NtSystemProcessorInformation*
+#define _SYSTEM_TIMEOFDAY_INFORMATION NtSystemTimeofdayInformation
+#define SYSTEM_TIMEOFDAY_INFORMATION struct NtSystemTimeofdayInformation
+#define PSYSTEM_TIMEOFDAY_INFORMATION struct NtSystemTimeofdayInformation*
+
+#define _SYSTEM_REGISTRY_QUOTA_INFORMATION NtSystemRegistryQuotaInformation
+#define SYSTEM_REGISTRY_QUOTA_INFORMATION struct NtSystemRegistryQuotaInformation
+#define PSYSTEM_REGISTRY_QUOTA_INFORMATION struct NtSystemRegistryQuotaInformation*
+#define _SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION NtSystemProcessorPerformanceInformation
+#define SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION struct NtSystemProcessorPerformanceInformation
+#define PSYSTEM_PROCESSOR_PERFORMANCE_INFORMATION struct NtSystemProcessorPerformanceInformation*
+#define _FILE_FULL_DIR_INFORMATION NtFileFullDirectoryInformation
+#define FILE_FULL_DIR_INFORMATION struct NtFileFullDirectoryInformation
+#define PFILE_FULL_DIR_INFORMATION struct NtFileFullDirectoryInformation*
+#define _FILE_FULL_DIRECTORY_INFORMATION NtFileFullDirectoryInformation
+#define FILE_FULL_DIRECTORY_INFORMATION struct NtFileFullDirectoryInformation
+#define PFILE_FULL_DIRECTORY_INFORMATION struct NtFileFullDirectoryInformation*
+#define _FILE_ATTRIBUTE_TAG_INFORMATION NtFileAttributeTagInformation
+#define FILE_ATTRIBUTE_TAG_INFORMATION struct NtFileAttributeTagInformation
+#define PFILE_ATTRIBUTE_TAG_INFORMATION struct NtFileAttributeTagInformation*
+#define _FILE_PIPE_LOCAL_INFORMATION NtFilePipeLocalInformation
+#define FILE_PIPE_LOCAL_INFORMATION struct NtFilePipeLocalInformation
+#define PFILE_PIPE_LOCAL_INFORMATION struct NtFilePipeLocalInformation*
+#define _FILE_NETWORK_OPEN_INFORMATION NtFileNetworkOpenInformation
+#define FILE_NETWORK_OPEN_INFORMATION struct NtFileNetworkOpenInformation
+#define PFILE_NETWORK_OPEN_INFORMATION struct NtFileNetworkOpenInformation*
+#define _FILE_MAILSLOT_QUERY_INFORMATION NtFileMailslotQueryInformation
+#define FILE_MAILSLOT_QUERY_INFORMATION struct NtFileMailslotQueryInformation
+#define PFILE_MAILSLOT_QUERY_INFORMATION struct NtFileMailslotQueryInformation*
+#define _FILE_MAILSLOT_SET_INFORMATION NtFileMailslotSetInformation
+#define FILE_MAILSLOT_SET_INFORMATION struct NtFileMailslotSetInformation
+#define PFILE_MAILSLOT_SET_INFORMATION struct NtFileMailslotSetInformation*
+#define _FILE_FULL_EA_INFORMATION NtFileFullEaInformation
+#define FILE_FULL_EA_INFORMATION struct NtFileFullEaInformation
+#define PFILE_FULL_EA_INFORMATION struct NtFileFullEaInformation*
+#define _PDH_FMT_COUNTERVALUE NtPdhFmtCountervalue
+#define PDH_FMT_COUNTERVALUE struct NtPdhFmtCountervalue
+#define PPDH_FMT_COUNTERVALUE struct NtPdhFmtCountervalue*
+
+#define _LUID NtLuid
+#define LUID struct NtLuid
+#define PLUID struct NtLuid*
+
+#define _LUID_AND_ATTRIBUTES NtLuidAndAttributes
+#define LUID_AND_ATTRIBUTES struct NtLuidAndAttributes
+#define PLUID_AND_ATTRIBUTES struct NtLuidAndAttributes*
+
+#define _PRIVILEGE_SET NtPrivilegeSet
+#define PRIVILEGE_SET struct NtPrivilegeSet
+#define PPRIVILEGE_SET struct NtPrivilegeSet*
+
+#define _IMAGE_FILE_HEADER NtImageFileHeader
+#define IMAGE_FILE_HEADER struct NtImageFileHeader
+#define PIMAGE_FILE_HEADER struct NtImageFileHeader*
+
+#define _IMAGE_DOS_HEADER NtImageDosHeader
+#define IMAGE_DOS_HEADER struct NtImageDosHeader
+#define PIMAGE_DOS_HEADER struct NtImageDosHeader*
+
+#define _BY_HANDLE_FILE_INFORMATION NtByHandleFileInformation
+#define BY_HANDLE_FILE_INFORMATION struct NtByHandleFileInformation
+#define PBY_HANDLE_FILE_INFORMATION struct NtByHandleFileInformation*
+#define LPBY_HANDLE_FILE_INFORMATION struct NtByHandleFileInformation*
+
+#define _WIN32_FILE_ATTRIBUTE_DATA NtWin32FileAttributeData
+#define WIN32_FILE_ATTRIBUTE_DATA struct NtWin32FileAttributeData
+#define LPWIN32_FILE_ATTRIBUTE_DATA struct NtWin32FileAttributeData*
+
+#define _FILE_END_OF_FILE_INFORMATION FileEndOfFileInformation
+#define FILE_END_OF_FILE_INFORMATION struct FileEndOfFileInformation
+#define PFILE_END_OF_FILE_INFORMATION struct FileEndOfFileInformation*
+
+#define _GET_FILEEX_INFO_LEVELS NtGetFileexInfoLevels
+#define GET_FILEEX_INFO_LEVELS int
+#define LPGET_FILEEX_INFO_LEVELS int*
+
+#define _WIN32_FIND_DATA NtWin32FindData
+#define WIN32_FIND_DATA struct NtWin32FindData
+#define LPWIN32_FIND_DATA struct NtWin32FindData*
+
+#define _FINDEX_INFO_LEVELS NtFindexInfoLevels
+#define FINDEX_INFO_LEVELS int
+#define FindExInfoStandard kNtFindExInfoStandard
+#define FindExInfoBasic kNtFindExInfoBasic
+#define FindExInfoMaxInfoLevel kNtFindExInfoMaxInfoLevel
+#define FIND_FIRST_EX_CASE_SENSITIVE kNtFindFirstExCaseSensitive
+#define FIND_FIRST_EX_LARGE_FETCH kNtFindFirstExLargeFetch
+#define _FINDEX_SEARCH_OPS NtFindexSearchOps
+#define FINDEX_SEARCH_OPS int
+#define FindExSearchNameMatch kNtFindExSearchNameMatch
+#define FindExSearchLimitToDirectories kNtFindExSearchLimitToDirectories
+#define FindExSearchLimitToDevices kNtFindExSearchLimitToDevices
+#define FindExSearchMaxSearchOp kNtFindExSearchMaxSearchOp
+
+#define GetFileExInfoStandard kNtGetFileExInfoStandard
+#define GetFileExMaxInfoLevel kNtGetFile_MAX
+
+#define MOVEFILE_REPLACE_EXISTING kNtMovefileReplaceExisting
+#define MOVEFILE_COPY_ALLOWED kNtMovefileCopyAllowed
+#define MOVEFILE_DELAY_UNTIL_REBOOT kNtMovefileDelayUntilReboot
+#define MOVEFILE_CREATE_HARDLINK kNtMovefileCreateHardlink
+#define MOVEFILE_FAIL_IF_NOT_TRACKABLE kNtMovefileFailIfNotTrackable
+#define MOVEFILE_WRITE_THROUGH kNtMovefileWriteThrough
+
+#define OFFER_PRIORITY int
+#define VmOfferPriorityVeryLow kNtVmOfferPriorityVeryLow
+#define VmOfferPriorityLow kNtVmOfferPriorityLow
+#define VmOfferPriorityBelowNormal kNtVmOfferPriorityBelowNormal
+#define VmOfferPriorityNormal kNtVmOfferPriorityNormal
+
+#define _KWAIT_REASON uint32_t
+#define KWAIT_REASON uint32_t
+#define _OBJECT_INFORMATION_CLASS int
+#define OBJECT_INFORMATION_CLASS int
+#define _PROCESSINFOCLASS int
+#define PROCESSINFOCLASS int
+#define _THREAD_STATE int
+#define THREAD_STATE int
+#define _TOKEN_TYPE int
+#define TOKEN_TYPE int
+
+#define _THREADINFOCLASS int
+#define THREADINFOCLASS int
+
+#define _THREAD_INFORMATION_CLASS int
+#define THREAD_INFORMATION_CLASS int
+#define PTHREAD_INFORMATION_CLASS int*
+
+#define OWNER_SECURITY_INFORMATION kNtOwnerSecurityInformation
+#define GROUP_SECURITY_INFORMATION kNtGroupSecurityInformation
+#define DACL_SECURITY_INFORMATION kNtDaclSecurityInformation
+#define SACL_SECURITY_INFORMATION kNtSaclSecurityInformation
+#define LABEL_SECURITY_INFORMATION kNtLabelSecurityInformation
+#define ATTRIBUTE_SECURITY_INFORMATION kNtAttributeSecurityInformation
+#define SCOPE_SECURITY_INFORMATION kNtScopeSecurityInformation
+#define PROCESS_TRUST_LABEL_SECURITY_INFORMATION kNtProcessTrustLabelSecurityInformation
+#define ACCESS_FILTER_SECURITY_INFORMATION kNtAccessFilterSecurityInformation
+#define BACKUP_SECURITY_INFORMATION kNtBackupSecurityInformation
+#define PROTECTED_DACL_SECURITY_INFORMATION kNtProtectedDaclSecurityInformation
+#define PROTECTED_SACL_SECURITY_INFORMATION kNtProtectedSaclSecurityInformation
+#define UNPROTECTED_DACL_SECURITY_INFORMATION kNtUnprotectedDaclSecurityInformation
+#define UNPROTECTED_SACL_SECURITY_INFORMATION kNtUnprotectedSaclSecurityInformation
+
+#include "libc/nt/enum/startf.h"
+#define STARTF_USESHOWWINDOW kNtStartfUseshowwindow
+#define STARTF_USESIZE kNtStartfUsesize
+#define STARTF_USEPOSITION kNtStartfUseposition
+#define STARTF_USECOUNTCHARS kNtStartfUsecountchars
+#define STARTF_USEFILLATTRIBUTE kNtStartfUsefillattribute
+#define STARTF_RUNFULLSCREEN kNtStartfRunfullscreen
+#define STARTF_FORCEONFEEDBACK kNtStartfForceonfeedback
+#define STARTF_FORCEOFFFEEDBACK kNtStartfForceofffeedback
+#define STARTF_USESTDHANDLES kNtStartfUsestdhandles
+#define STARTF_USEHOTKEY kNtStartfUsehotkey
+#define STARTF_TITLEISLINKNAME kNtStartfTitleislinkname
+#define STARTF_TITLEISAPPID kNtStartfTitleisappid
+#define STARTF_PREVENTPINNING kNtStartfPreventpinning
+#define STARTF_UNTRUSTEDSOURCE kNtStartfUntrustedsource
+
+#include "libc/nt/enum/memflags.h"
+#define MEM_COMMIT kNtMemCommit
+#define MEM_RESERVE kNtMemReserve
+#define MEM_DECOMMIT kNtMemDecommit
+#define MEM_RELEASE kNtMemRelease
+#define MEM_FREE kNtMemFree
+#define MEM_PRIVATE kNtMemPrivate
+#define MEM_MAPPED kNtMemMapped
+#define MEM_RESET kNtMemReset
+#define MEM_TOP_DOWN kNtMemTopDown
+#define MEM_WRITE_WATCH kNtMemWriteWatch
+#define MEM_PHYSICAL kNtMemPhysical
+#define MEM_LARGE_PAGES kNtMemLargePages
+#define MEM_4MB_PAGES kNtMem4mbPages
+
+#include "libc/nt/enum/pageflags.h"
+#define PAGE_NOACCESS kNtPageNoaccess
+#define PAGE_READONLY kNtPageReadonly
+#define PAGE_READWRITE kNtPageReadwrite
+#define PAGE_WRITECOPY kNtPageWritecopy
+#define PAGE_EXECUTE kNtPageExecute
+#define PAGE_EXECUTE_READ kNtPageExecuteRead
+#define PAGE_EXECUTE_READWRITE kNtPageExecuteReadwrite
+#define PAGE_EXECUTE_WRITECOPY kNtPageExecuteWritecopy
+#define PAGE_GUARD kNtPageGuard
+#define PAGE_NOCACHE kNtPageNocache
+#define PAGE_WRITECOMBINE kNtPageWritecombine
+#define FILE_MAP_COPY kNtFileMapCopy
+#define FILE_MAP_WRITE kNtFileMapWrite
+#define FILE_MAP_READ kNtFileMapRead
+#define FILE_MAP_EXECUTE kNtFileMapExecute
+#define FILE_MAP_RESERVE kNtFileMapReserve
+#define FILE_MAP_TARGETS_INVALID kNtFileMapTargetsInvalid
+#define FILE_MAP_LARGE_PAGES kNtFileMapLargePages
+
+#define SECTION_QUERY kNtSectionQuery
+#define SECTION_MAP_WRITE kNtSectionMapWrite
+#define SECTION_MAP_READ kNtSectionMapRead
+#define SECTION_MAP_EXECUTE kNtSectionMapExecute
+#define SECTION_EXTEND_SIZE kNtSectionExtendSize
+#define SECTION_MAP_EXECUTE_EXPLICIT kNtSectionMapExecuteExplicit
+
+#include "libc/nt/enum/ctrlevent.h"
+#define CTRL_CEVENT kNtCtrlCEvent
+#define CTRL_BREAK_EVENT kNtCtrlBreakEvent
+#define CTRL_CLOSE_EVENT kNtCtrlCloseEvent
+#define CTRL_LOGOFF_EVENT kNtCtrlLogoffEvent
+#define CTRL_SHUTDOWN_EVENT kNtCtrlShutdownEvent
+
+#include "libc/nt/enum/fileflagandattributes.h"
+#define FILE_ATTRIBUTE_NORMAL kNtFileAttributeNormal
+#define FILE_ATTRIBUTE_HIDDEN kNtFileAttributeHidden
+#define FILE_FLAG_WRITE_THROUGH kNtFileFlagWriteThrough
+#define FILE_FLAG_OVERLAPPED kNtFileFlagOverlapped
+#define FILE_FLAG_NO_BUFFERING kNtFileFlagNoBuffering
+#define FILE_FLAG_RANDOM_ACCESS kNtFileFlagRandomAccess
+#define FILE_FLAG_SEQUENTIAL_SCAN kNtFileFlagSequentialScan
+#define FILE_FLAG_DELETE_ON_CLOSE kNtFileFlagDeleteOnClose
+#define FILE_FLAG_BACKUP_SEMANTICS kNtFileFlagBackupSemantics
+#define FILE_FLAG_POSIX_SEMANTICS kNtFileFlagPosixSemantics
+#define FILE_FLAG_OPEN_REPARSE_POINT kNtFileFlagOpenReparsePoint
+#define FILE_FLAG_OPEN_NO_RECALL kNtFileFlagOpenNoRecall
+#define FILE_FLAG_FIRST_PIPE_INSTANCE kNtFileFlagFirstPipeInstance
+#define FILE_LIST_DIRECTORY kNtFileListDirectory
+#define FILE_ATTRIBUTE_ARCHIVE kNtFileAttributeArchive
+#define FILE_ATTRIBUTE_COMPRESSED kNtFileAttributeCompressed
+#define FILE_ATTRIBUTE_DEVICE kNtFileAttributeDevice
+#define FILE_ATTRIBUTE_DIRECTORY kNtFileAttributeDirectory
+#define FILE_ATTRIBUTE_ENCRYPTED kNtFileAttributeEncrypted
+#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED kNtFileAttributeNotContentIndexed
+#define FILE_ATTRIBUTE_OFFLINE kNtFileAttributeOffline
+#define FILE_ATTRIBUTE_READONLY kNtFileAttributeReadonly
+#define FILE_ATTRIBUTE_REPARSE_POINT kNtFileAttributeReparsePoint
+#define FILE_ATTRIBUTE_SPARSE_FILE kNtFileAttributeSparseFile
+#define FILE_ATTRIBUTE_SYSTEM kNtFileAttributeSystem
+#define FILE_ATTRIBUTE_TEMPORARY kNtFileAttributeTemporary
+
+#include "libc/nt/enum/creationdisposition.h"
+#define CREATE_NEW kNtCreateNew
+#define CREATE_ALWAYS kNtCreateAlways
+#define OPEN_EXISTING kNtOpenExisting
+#define OPEN_ALWAYS kNtOpenAlways
+#define TRUNCATE_EXISTING kNtTruncateExisting
+
+#include "libc/nt/enum/filesharemode.h"
+#define FILE_SHARE_EXCLUSIVE kNtFileShareExclusive
+#define FILE_SHARE_READ kNtFileShareRead
+#define FILE_SHARE_WRITE kNtFileShareWrite
+#define FILE_SHARE_DELETE kNtFileShareDelete
+
+#include "libc/nt/runtime.h"
+#define INVALID_HANDLE_VALUE kNtInvalidHandleValue
+#define STD_INPUT_HANDLE kNtStdInputHandle
+#define STD_OUTPUT_HANDLE kNtStdOutputHandle
+#define STD_ERROR_HANDLE kNtStdErrorHandle
+
+#include "libc/nt/enum/consoleselectionflags.h"
+#define CONSOLE_NO_SELECTION kNtConsoleNoSelection
+#define CONSOLE_SELECTION_IN_PROGRESS kNtConsoleSelectionInProgress
+#define CONSOLE_SELECTION_NOT_EMPTY kNtConsoleSelectionNotEmpty
+#define CONSOLE_MOUSE_SELECTION kNtConsoleMouseSelection
+#define CONSOLE_MOUSE_DOWN kNtConsoleMouseDown
+
+#include "libc/nt/enum/mb.h"
+#define MB_OK kNtMbOk
+#define MB_OKCANCEL kNtMbOkcancel
+#define MB_ABORTRETRYIGNORE kNtMbAbortretryignore
+#define MB_YESNOCANCEL kNtMbYesnocancel
+#define MB_YESNO kNtMbYesno
+#define MB_RETRYCANCEL kNtMbRetrycancel
+#define MB_CANCELTRYCONTINUE kNtMbCanceltrycontinue
+#define MB_ICONHAND kNtMbIconhand
+#define MB_ICONQUESTION kNtMbIconquestion
+#define MB_ICONEXCLAMATION kNtMbIconexclamation
+#define MB_ICONASTERISK kNtMbIconasterisk
+#define MB_USERICON kNtMbUsericon
+#define MB_ICONWARNING kNtMbIconwarning
+#define MB_ICONERROR kNtMbIconerror
+#define MB_ICONINFORMATION kNtMbIconinformation
+#define MB_ICONSTOP kNtMbIconstop
+#define MB_DEFBUTTON1 kNtMbDefbutton1
+#define MB_DEFBUTTON2 kNtMbDefbutton2
+#define MB_DEFBUTTON3 kNtMbDefbutton3
+#define MB_DEFBUTTON4 kNtMbDefbutton4
+#define MB_APPLMODAL kNtMbApplmodal
+#define MB_SYSTEMMODAL kNtMbSystemmodal
+#define MB_TASKMODAL kNtMbTaskmodal
+#define MB_HELP kNtMbHelp
+#define MB_NOFOCUS kNtMbNofocus
+#define MB_SETFOREGROUND kNtMbSetforeground
+#define MB_DEFAULT_DESKTOP_ONLY kNtMbDefaultDesktopOnly
+#define MB_TOPMOST kNtMbTopmost
+#define MB_RIGHT kNtMbRight
+#define MB_RTLREADING kNtMbRtlreading
+#define MB_SERVICE_NOTIFICATION kNtMbServiceNotification
+#define MB_SERVICE_NOTIFICATION_NT3X kNtMbServiceNotificationNt3x
+#define MB_TYPEMASK kNtMbTypemask
+#define MB_ICONMASK kNtMbIconmask
+#define MB_DEFMASK kNtMbDefmask
+#define MB_MODEMASK kNtMbModemask
+#define MB_MISCMASK kNtMbMiscmask
+
+#include "libc/nt/enum/dialogresult.h"
+#define IDOK kNtIdok
+#define IDCANCEL kNtIdcancel
+#define IDABORT kNtIdabort
+#define IDRETRY kNtIdretry
+#define IDIGNORE kNtIdignore
+#define IDYES kNtIdyes
+#define IDNO kNtIdno
+#define IDCLOSE kNtIdclose
+#define IDHELP kNtIdhelp
+#define IDTRYAGAIN kNtIdtryagain
+#define IDCONTINUE kNtIdcontinue
+
+#include "libc/nt/enum/processaccess.h"
+#define PROCESS_TERMINATE kNtProcessTerminate
+#define PROCESS_CREATE_THREAD kNtProcessCreateThread
+#define PROCESS_SET_SESSIONID kNtProcessSetSessionid
+#define PROCESS_VM_OPERATION kNtProcessVmOperation
+#define PROCESS_VM_READ kNtProcessVmRead
+#define PROCESS_VM_WRITE kNtProcessVmWrite
+#define PROCESS_DUP_HANDLE kNtProcessDupHandle
+#define PROCESS_CREATE_PROCESS kNtProcessCreateProcess
+#define PROCESS_SET_QUOTA kNtProcessSetQuota
+#define PROCESS_SET_INFORMATION kNtProcessSetInformation
+#define PROCESS_QUERY_INFORMATION kNtProcessQueryInformation
+#define PROCESS_SUSPEND_RESUME kNtProcessSuspendResume
+#define PROCESS_QUERY_LIMITED_INFORMATION kNtProcessQueryLimitedInformation
+#define PROCESS_SET_LIMITED_INFORMATION kNtProcessSetLimitedInformation
+#define PROCESS_ALL_ACCESS kNtProcessAllAccess
+
+#include "libc/nt/enum/accessmask.h"
+#define GENERIC_READ kNtGenericRead
+#define GENERIC_WRITE kNtGenericWrite
+#define GENERIC_EXECUTE kNtGenericExecute
+#define GENERIC_ALL kNtGenericAll
+#define DELETE kNtDelete
+#define READ_CONTROL kNtReadControl
+#define WRITE_DAC kNtWriteDac
+#define WRITE_OWNER kNtWriteOwner
+#define SYNCHRONIZE kNtSynchronize
+#define STANDARD_RIGHTS_REQUIRED kNtStandardRightsRequired
+#define STANDARD_RIGHTS_READ kNtStandardRightsRead
+#define STANDARD_RIGHTS_WRITE kNtStandardRightsWrite
+#define STANDARD_RIGHTS_EXECUTE kNtStandardRightsExecute
+#define STANDARD_RIGHTS_ALL kNtStandardRightsAll
+#define SPECIFIC_RIGHTS_ALL kNtSpecificRightsAll
+#define ACCESS_SYSTEM_SECURITY kNtAccessSystemSecurity
+#define MAXIMUM_ALLOWED kNtMaximumAllowed
+#define FILE_READ_DATA kNtFileReadData
+#define FILE_LIST_DIRECTORY kNtFileListDirectory
+#define FILE_WRITE_DATA kNtFileWriteData
+#define FILE_ADD_FILE kNtFileAddFile
+#define FILE_APPEND_DATA kNtFileAppendData
+#define FILE_ADD_SUBDIRECTORY kNtFileAddSubdirectory
+#define FILE_CREATE_PIPE_INSTANCE kNtFileCreatePipeInstance
+#define FILE_READ_EA kNtFileReadEa
+#define FILE_WRITE_EA kNtFileWriteEa
+#define FILE_EXECUTE kNtFileExecute
+#define FILE_TRAVERSE kNtFileTraverse
+#define FILE_DELETE_CHILD kNtFileDeleteChild
+#define FILE_READ_ATTRIBUTES kNtFileReadAttributes
+#define FILE_WRITE_ATTRIBUTES kNtFileWriteAttributes
+#define FILE_ALL_ACCESS kNtFileAllAccess
+#define FILE_GENERIC_READ kNtFileGenericRead
+#define FILE_GENERIC_WRITE kNtFileGenericWrite
+#define FILE_GENERIC_EXECUTE kNtFileGenericExecute
+#define TOKEN_ASSIGN_PRIMARY kNtTokenAssignPrimary
+#define TOKEN_DUPLICATE kNtTokenDuplicate
+#define TOKEN_IMPERSONATE kNtTokenImpersonate
+#define TOKEN_QUERY kNtTokenQuery
+#define TOKEN_QUERY_SOURCE kNtTokenQuerySource
+#define TOKEN_ADJUST_PRIVILEGES kNtTokenAdjustPrivileges
+#define TOKEN_ADJUST_GROUPS kNtTokenAdjustGroups
+#define TOKEN_ADJUST_DEFAULT kNtTokenAdjustDefault
+#define TOKEN_ADJUST_SESSIONID kNtTokenAdjustSessionid
+#define TOKEN_ALL_ACCESS_P kNtTokenAllAccessP
+#define TOKEN_ALL_ACCESS kNtTokenAllAccess
+#define TOKEN_READ kNtTokenRead
+#define TOKEN_WRITE kNtTokenWrite
+#define TOKEN_EXECUTE kNtTokenExecute
+#define TOKEN_TRUST_CONSTRAINT_MASK kNtTokenTrustConstraintMask
+#define TOKEN_ACCESS_PSEUDO_HANDLE_WIN8 kNtTokenAccessPseudoHandleWin8
+#define TOKEN_ACCESS_PSEUDO_HANDLE kNtTokenAccessPseudoHandle
+
+#define FILE_TYPE_UNKNOWN kNtFileTypeUnknown
+#define FILE_TYPE_DISK kNtFileTypeDisk
+#define FILE_TYPE_CHAR kNtFileTypeChar
+#define FILE_TYPE_PIPE kNtFileTypePipe
+#define FILE_TYPE_REMOTE kNtFileTypeRemote
+
+#define NT_DEBUG_PROCESS kNtDebugProcess
+#define NT_DEBUG_ONLY_THIS_PROCESS kNtDebugOnlyThisProcess
+#define NT_CREATE_SUSPENDED kNtCreateSuspended
+#define NT_DETACHED_PROCESS kNtDetachedProcess
+#define NT_CREATE_NEW_CONSOLE kNtCreateNewConsole
+#define NT_NORMAL_PRIORITY_CLASS kNtNormalPriorityClass
+#define NT_IDLE_PRIORITY_CLASS kNtIdlePriorityClass
+#define NT_HIGH_PRIORITY_CLASS kNtHighPriorityClass
+#define NT_REALTIME_PRIORITY_CLASS kNtRealtimePriorityClass
+#define NT_CREATE_NEW_PROCESS_GROUP kNtCreateNewProcessGroup
+#define NT_CREATE_UNICODE_ENVIRONMENT kNtCreateUnicodeEnvironment
+#define NT_CREATE_SEPARATE_WOW_VDM kNtCreateSeparateWowVdm
+#define NT_CREATE_SHARED_WOW_VDM kNtCreateSharedWowVdm
+#define NT_CREATE_FORCEDOS kNtCreateForcedos
+#define NT_BELOW_NORMAL_PRIORITY_CLASS kNtBelowNormalPriorityClass
+#define NT_ABOVE_NORMAL_PRIORITY_CLASS kNtAboveNormalPriorityClass
+#define NT_INHERIT_PARENT_AFFINITY kNtInheritParentAffinity
+#define NT_CREATE_PROTECTED_PROCESS kNtCreateProtectedProcess
+#define NT_EXTENDED_STARTUPINFO_PRESENT kNtExtendedStartupinfoPresent
+#define NT_PROCESS_MODE_BACKGROUND_BEGIN kNtProcessModeBackgroundBegin
+#define NT_PROCESS_MODE_BACKGROUND_END kNtProcessModeBackgroundEnd
+#define NT_CREATE_SECURE_PROCESS kNtCreateSecureProcess
+#define NT_CREATE_BREAKAWAY_FROM_JOB kNtCreateBreakawayFromJob
+#define NT_CREATE_PRESERVE_CODE_AUTHZ_LEVEL kNtCreatePreserveCodeAuthzLevel
+#define NT_CREATE_DEFAULT_ERROR_MODE kNtCreateDefaultErrorMode
+#define NT_CREATE_NO_WINDOW kNtCreateNoWindow
+#define NT_PROFILE_USER kNtProfileUser
+#define NT_PROFILE_KERNEL kNtProfileKernel
+#define NT_PROFILE_SERVER kNtProfileServer
+#define NT_CREATE_IGNORE_SYSTEM_DEFAULT kNtCreateIgnoreSystemDefault
+
+#define FILE_READ_DATA kNtFileReadData
+#define FILE_WRITE_DATA kNtFileWriteData
+#define FILE_ADD_FILE kNtFileAddFile
+#define FILE_APPEND_DATA kNtFileAppendData
+#define FILE_ADD_SUBDIRECTORY kNtFileAddSubdirectory
+#define FILE_CREATE_PIPE_INSTANCE kNtFileCreatePipeInstance
+#define FILE_READ_EA kNtFileReadEa
+#define FILE_WRITE_EA kNtFileWriteEa
+#define FILE_EXECUTE kNtFileExecute
+#define FILE_TRAVERSE kNtFileTraverse
+#define FILE_DELETE_CHILD kNtFileDeleteChild
+#define FILE_READ_ATTRIBUTES kNtFileReadAttributes
+#define FILE_WRITE_ATTRIBUTES kNtFileWriteAttributes
+#define FILE_ALL_ACCESS kNtFileAllAccess
+#define FILE_GENERIC_READ kNtFileGenericRead
+#define FILE_GENERIC_WRITE kNtFileGenericWrite
+#define FILE_GENERIC_EXECUTE kNtFileGenericExecute
+
+#define TOKEN_PRIMARY kNtTokenPrimary
+#define TOKEN_IMPERSONATION kNtTokenImpersonation
+
+#define TOKEN_PRIMARY kNtTokenPrimary
+#define TOKEN_IMPERSONATION kNtTokenImpersonation
+#define SECURITY_ANONYMOUS kNtSecurityAnonymous
+#define SECURITY_IDENTIFICATION kNtSecurityIdentification
+#define SECURITY_IMPERSONATION kNtSecurityImpersonation
+#define SECURITY_DELEGATION kNtSecurityDelegation
+
+#define TOKEN_DUPLICATE kNtTokenDuplicate
+#define TOKEN_IMPERSONATE kNtTokenImpersonate
+#define TOKEN_QUERY kNtTokenQuery
+#define TOKEN_QUERY_SOURCE kNtTokenQuerySource
+#define TOKEN_ADJUST_PRIVILEGES kNtTokenAdjustPrivileges
+#define TOKEN_ADJUST_GROUPS kNtTokenAdjustGroups
+#define TOKEN_ADJUST_DEFAULT kNtTokenAdjustDefault
+#define TOKEN_ADJUST_SESSIONID kNtTokenAdjustSessionid
+#define TOKEN_ALL_ACCESS_P kNtTokenAllAccessP
+#define TOKEN_ALL_ACCESS kNtTokenAllAccess
+#define TOKEN_READ kNtTokenRead
+#define TOKEN_WRITE kNtTokenWrite
+#define TOKEN_EXECUTE kNtTokenExecute
+#define TOKEN_TRUST_CONSTRAINT_MASK kNtTokenTrustConstraintMask
+#define TOKEN_ACCESS_PSEUDO_HANDLE_WIN8 kNtTokenAccessPseudoHandleWin8
+#define TOKEN_ACCESS_PSEUDO_HANDLE kNtTokenAccessPseudoHandle
+
+#define FOREGROUND_BLUE kNtForegroundBlue
+#define FOREGROUND_GREEN kNtForegroundGreen
+#define FOREGROUND_RED kNtForegroundRed
+#define FOREGROUND_INTENSITY kNtForegroundIntensity
+#define BACKGROUND_BLUE kNtBackgroundBlue
+#define BACKGROUND_GREEN kNtBackgroundGreen
+#define BACKGROUND_RED kNtBackgroundRed
+#define BACKGROUND_INTENSITY kNtBackgroundIntensity
+
+#define UNLEN 256
+
+#define DUPLICATE_CLOSE_SOURCE kNtDuplicateCloseSource
+#define DUPLICATE_SAME_ACCESS kNtDuplicateSameAccess
+
+#define IMAGE_FILE_MACHINE_UNKNOWN kNtImageFileMachineUnknown
+#define IMAGE_FILE_MACHINE_TARGET_HOST kNtImageFileMachineTargetHost
+#define IMAGE_FILE_MACHINE_I386 kNtImageFileMachineI386
+#define IMAGE_FILE_MACHINE_R3000 kNtImageFileMachineR3000
+#define IMAGE_FILE_MACHINE_R4000 kNtImageFileMachineR4000
+#define IMAGE_FILE_MACHINE_R10000 kNtImageFileMachineR10000
+#define IMAGE_FILE_MACHINE_WCEMIPSV2 kNtImageFileMachineWcemipsv2
+#define IMAGE_FILE_MACHINE_ALPHA kNtImageFileMachineAlpha
+#define IMAGE_FILE_MACHINE_SH3 kNtImageFileMachineSh3
+#define IMAGE_FILE_MACHINE_SH3DSP kNtImageFileMachineSh3dsp
+#define IMAGE_FILE_MACHINE_SH3E kNtImageFileMachineSh3e
+#define IMAGE_FILE_MACHINE_SH4 kNtImageFileMachineSh4
+#define IMAGE_FILE_MACHINE_SH5 kNtImageFileMachineSh5
+#define IMAGE_FILE_MACHINE_ARM kNtImageFileMachineArm
+#define IMAGE_FILE_MACHINE_THUMB kNtImageFileMachineThumb
+#define IMAGE_FILE_MACHINE_ARMNT kNtImageFileMachineArmnt
+#define IMAGE_FILE_MACHINE_AM33 kNtImageFileMachineAm33
+#define IMAGE_FILE_MACHINE_POWERPC kNtImageFileMachinePowerpc
+#define IMAGE_FILE_MACHINE_POWERPCFP kNtImageFileMachinePowerpcfp
+#define IMAGE_FILE_MACHINE_IA64 kNtImageFileMachineIa64
+#define IMAGE_FILE_MACHINE_MIPS16 kNtImageFileMachineMips16
+#define IMAGE_FILE_MACHINE_ALPHA64 kNtImageFileMachineAlpha64
+#define IMAGE_FILE_MACHINE_MIPSFPU kNtImageFileMachineMipsfpu
+#define IMAGE_FILE_MACHINE_MIPSFPU16 kNtImageFileMachineMipsfpu16
+#define IMAGE_FILE_MACHINE_AXP64 IMAGE_FILE_MACHINE_ALPHA64
+#define IMAGE_FILE_MACHINE_TRICORE kNtImageFileMachineTricore
+#define IMAGE_FILE_MACHINE_CEF kNtImageFileMachineCef
+#define IMAGE_FILE_MACHINE_EBC kNtImageFileMachineEbc
+#define IMAGE_FILE_MACHINE_NEXGEN32E kNtImageFileMachineNexgen32e
+#define IMAGE_FILE_MACHINE_M32R kNtImageFileMachineM32r
+#define IMAGE_FILE_MACHINE_ARM64 kNtImageFileMachineArm64
+#define IMAGE_FILE_MACHINE_CEE kNtImageFileMachineCee
+
+#define PE_32BIT kNtPe32bit
+#define PE_64BIT kNtPe64bit
+
+#define IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA kNtImageDllcharacteristicsHighEntropyVa
+#define IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE kNtImageDllcharacteristicsDynamicBase
+#define IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY kNtImageDllcharacteristicsForceIntegrity
+#define IMAGE_DLLCHARACTERISTICS_NX_COMPAT kNtImageDllcharacteristicsNxCompat
+#define IMAGE_DLLCHARACTERISTICS_NO_ISOLATION kNtImageDllcharacteristicsNoIsolation
+#define IMAGE_DLLCHARACTERISTICS_NO_SEH kNtImageDllcharacteristicsNoSeh
+#define IMAGE_DLLCHARACTERISTICS_NO_BIND kNtImageDllcharacteristicsNoBind
+#define IMAGE_DLLCHARACTERISTICS_APPCONTAINER kNtImageDllcharacteristicsAppcontainer
+#define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER kNtImageDllcharacteristicsWdmDriver
+#define IMAGE_DLLCHARACTERISTICS_GUARD_CF kNtImageDllcharacteristicsGuardCf
+#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE kNtImageDllcharacteristicsTerminalServerAware
+
+#define IMAGE_SUBSYSTEM_UNKNOWN kNtImageSubsystemUnknown
+#define IMAGE_SUBSYSTEM_NATIVE kNtImageSubsystemNative
+#define IMAGE_SUBSYSTEM_WINDOWS_GUI kNtImageSubsystemWindowsGui
+#define IMAGE_SUBSYSTEM_WINDOWS_CUI kNtImageSubsystemWindowsCui
+#define IMAGE_SUBSYSTEM_OS2_CUI kNtImageSubsystemOs2Cui
+#define IMAGE_SUBSYSTEM_POSIX_CUI kNtImageSubsystemPosixCui
+#define IMAGE_SUBSYSTEM_NATIVE_WINDOWS kNtImageSubsystemNativeWindows
+#define IMAGE_SUBSYSTEM_WINDOWS_CE_GUI kNtImageSubsystemWindowsCeGui
+#define IMAGE_SUBSYSTEM_EFI_APPLICATION kNtImageSubsystemEfiApplication
+#define IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER kNtImageSubsystemEfiBootServiceDriver
+#define IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER kNtImageSubsystemEfiRuntimeDriver
+#define IMAGE_SUBSYSTEM_EFI_ROM kNtImageSubsystemEfiRom
+#define IMAGE_SUBSYSTEM_XBOX kNtImageSubsystemXbox
+#define IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION kNtImageSubsystemWindowsBootApplication
+#define IMAGE_SUBSYSTEM_XBOX_CODE_CATALOG kNtImageSubsystemXboxCodeCatalog
+
+#define IMAGE_FILE_RELOCS_STRIPPED kNtImageFileRelocsStripped
+#define IMAGE_FILE_EXECUTABLE_IMAGE kNtImageFileExecutableImage
+#define IMAGE_FILE_LINE_NUMS_STRIPPED kNtImageFileLineNumsStripped
+#define IMAGE_FILE_LOCAL_SYMS_STRIPPED kNtImageFileLocalSymsStripped
+#define IMAGE_FILE_AGGRESIVE_WS_TRIM kNtImageFileAggresiveWsTrim
+#define IMAGE_FILE_LARGE_ADDRESS_AWARE kNtImageFileLargeAddressAware
+#define IMAGE_FILE_BYTES_REVERSED_LO kNtImageFileBytesReversedLo
+#define IMAGE_FILE_32BIT_MACHINE kNtImageFile_32bitMachine
+#define IMAGE_FILE_DEBUG_STRIPPED kNtImageFileDebugStripped
+#define IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP kNtImageFileRemovableRunFromSwap
+#define IMAGE_FILE_NET_RUN_FROM_SWAP kNtImageFileNetRunFromSwap
+#define IMAGE_FILE_SYSTEM kNtImageFileSystem
+#define IMAGE_FILE_DLL kNtImageFileDll
+#define IMAGE_FILE_UP_SYSTEM_ONLY kNtImageFileUpSystemOnly
+#define IMAGE_FILE_BYTES_REVERSED_HI kNtImageFileBytesReversedHi
+
+#define IMAGE_DOS_SIGNATURE kNtImageDosSignature
+#define IMAGE_OS2_SIGNATURE kNtImageOs2Signature
+#define IMAGE_OS2_SIGNATURE_LE kNtImageOs2SignatureLe
+#define IMAGE_VXD_SIGNATURE kNtImageVxdSignature
+#define IMAGE_NT_SIGNATURE kNtImageNtSignature
+
+#define RICHKEY kNtRichkey
+#define COOKIE_DEFAULT kNtCookieDefault
+#define SIZE_OF_80387_REGISTERS kNtSizeOf_80387Registers
+#define MAXIMUM_SUPPORTED_EXTENSION kNtMaximumSupportedExtension
+
+#define PE_SUBSYSTEM_WINDOWS_GUI kNtPeSubsystemWindowsGui
+#define PE_SUBSYSTEM_WINDOWS_CUI kNtPeSubsystemWindowsCui
+
+#define PE_FILE_RELOCS_STRIPPED kNtPeFileRelocsStripped
+#define PE_FILE_EXECUTABLE_IMAGE kNtPeFileExecutableImage
+#define PE_FILE_LINE_NUMS_STRIPPED kNtPeFileLineNumsStripped
+#define PE_FILE_LOCAL_SYMS_STRIPPED kNtPeFileLocalSymsStripped
+#define PE_FILE_32BIT_MACHINE kNtPeFile_32bitMachine
+#define PE_FILE_DLL kNtPeFileDll
+
+#define PE_SECTION_CNT_CODE kNtPeSectionCntCode
+#define PE_SECTION_CNT_INITIALIZED_DATA kNtPeSectionCntInitializedData
+#define PE_SECTION_CNT_UNINITIALIZED_DATA kNtPeSectionCntUninitializedData
+#define PE_SECTION_GPREL kNtPeSectionGprel
+#define PE_SECTION_MEM_DISCARDABLE kNtPeSectionMemDiscardable
+#define PE_SECTION_MEM_NOT_CACHED kNtPeSectionMemNotCached
+#define PE_SECTION_MEM_NOT_PAGED kNtPeSectionMemNotPaged
+#define PE_SECTION_MEM_SHARED kNtPeSectionMemShared
+#define PE_SECTION_MEM_EXECUTE kNtPeSectionMemExecute
+#define PE_SECTION_MEM_READ kNtPeSectionMemRead
+#define PE_SECTION_MEM_WRITE kNtPeSectionMemWrite
+
+#define PE_GUARD_CF_INSTRUMENTED kNtPeGuardCfInstrumented
+#define PE_GUARD_CFW_INSTRUMENTED kNtPeGuardCfwInstrumented
+#define PE_GUARD_CF_FUNCTION_TABLE_PRESENT kNtPeGuardCfFunctionTablePresent
+#define PE_GUARD_SECURITY_COOKIE_UNUSED kNtPeGuardSecurityCookieUnused
+
+#define PE_REL_BASED_ABSOLUTE kNtPeRelBasedAbsolute
+#define PE_REL_BASED_HIGH kNtPeRelBasedHigh
+#define PE_REL_BASED_LOW kNtPeRelBasedLow
+#define PE_REL_BASED_HIGHLOW kNtPeRelBasedHighlow
+#define PE_REL_BASED_HIGHADJ kNtPeRelBasedHighadj
+#define PE_REL_BASED_MIPS_JMPADDR kNtPeRelBasedMipsJmpaddr
+#define PE_REL_BASED_SECTION kNtPeRelBasedSection
+#define PE_REL_BASED_REL32 kNtPeRelBasedRel32
+#define PE_REL_BASED_MIPS_JMPADDR16 kNtPeRelBasedMipsJmpaddr16
+#define PE_REL_BASED_IA64_IMM64 kNtPeRelBasedIa64Imm64
+#define PE_REL_BASED_DIR64 kNtPeRelBasedDir64
+#define PE_REL_BASED_HIGH3ADJ kNtPeRelBasedHigh3adj
+
+#define IMAGE_DIRECTORY_ENTRY_EXPORT kNtImageDirectoryEntryExport
+#define IMAGE_DIRECTORY_ENTRY_IMPORT kNtImageDirectoryEntryImport
+#define IMAGE_DIRECTORY_ENTRY_RESOURCE kNtImageDirectoryEntryResource
+#define IMAGE_DIRECTORY_ENTRY_EXCEPTION kNtImageDirectoryEntryException
+#define IMAGE_DIRECTORY_ENTRY_SECURITY kNtImageDirectoryEntrySecurity
+#define IMAGE_DIRECTORY_ENTRY_BASERELOC kNtImageDirectoryEntryBasereloc
+#define IMAGE_DIRECTORY_ENTRY_DEBUG kNtImageDirectoryEntryDebug
+#define IMAGE_DIRECTORY_ENTRY_ARCHITECTURE kNtImageDirectoryEntryArchitecture
+#define IMAGE_DIRECTORY_ENTRY_GLOBALPTR kNtImageDirectoryEntryGlobalptr
+#define IMAGE_DIRECTORY_ENTRY_TLS kNtImageDirectoryEntryTls
+#define IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG kNtImageDirectoryEntryLoadConfig
+#define IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT kNtImageDirectoryEntryBoundImport
+#define IMAGE_DIRECTORY_ENTRY_IAT kNtImageDirectoryEntryIat
+#define IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT kNtImageDirectoryEntryDelayImport
+#define IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR kNtImageDirectoryEntryComDescriptor
+
+#define IMAGE_SCN_TYPE_NO_PAD kNtImageScnTypeNoPad
+#define IMAGE_SCN_CNT_CODE kNtImageScnCntCode
+#define IMAGE_SCN_CNT_INITIALIZED_DATA kNtImageScnCntInitializedData
+#define IMAGE_SCN_CNT_UNINITIALIZED_DATA kNtImageScnCntUninitializedData
+#define IMAGE_SCN_LNK_OTHER kNtImageScnLnkOther
+#define IMAGE_SCN_LNK_INFO kNtImageScnLnkInfo
+#define IMAGE_SCN_LNK_REMOVE kNtImageScnLnkRemove
+#define IMAGE_SCN_LNK_COMDAT kNtImageScnLnkComdat
+#define IMAGE_SCN_NO_DEFER_SPEC_EXC kNtImageScnNoDeferSpecExc
+#define IMAGE_SCN_GPREL kNtImageScnGprel
+#define IMAGE_SCN_MEM_FARDATA kNtImageScnMemFardata
+#define IMAGE_SCN_MEM_PURGEABLE kNtImageScnMemPurgeable
+#define IMAGE_SCN_MEM_16BIT kNtImageScnMem16bit
+#define IMAGE_SCN_MEM_LOCKED kNtImageScnMemLocked
+#define IMAGE_SCN_MEM_PRELOAD kNtImageScnMemPreload
+
+#define IMAGE_SCN_ALIGN_1BYTES kNtImageScnAlign1bytes
+#define IMAGE_SCN_ALIGN_2BYTES kNtImageScnAlign2bytes
+#define IMAGE_SCN_ALIGN_4BYTES kNtImageScnAlign4bytes
+#define IMAGE_SCN_ALIGN_8BYTES kNtImageScnAlign8bytes
+#define IMAGE_SCN_ALIGN_16BYTES kNtImageScnAlign16bytes
+#define IMAGE_SCN_ALIGN_32BYTES kNtImageScnAlign32bytes
+#define IMAGE_SCN_ALIGN_64BYTES kNtImageScnAlign64bytes
+#define IMAGE_SCN_ALIGN_128BYTES kNtImageScnAlign128bytes
+#define IMAGE_SCN_ALIGN_256BYTES kNtImageScnAlign256bytes
+#define IMAGE_SCN_ALIGN_512BYTES kNtImageScnAlign512bytes
+#define IMAGE_SCN_ALIGN_1024BYTES kNtImageScnAlign1024bytes
+#define IMAGE_SCN_ALIGN_2048BYTES kNtImageScnAlign2048bytes
+#define IMAGE_SCN_ALIGN_4096BYTES kNtImageScnAlign4096bytes
+#define IMAGE_SCN_ALIGN_8192BYTES kNtImageScnAlign8192bytes
+#define IMAGE_SCN_ALIGN_MASK kNtImageScnAlignMask
+
+#define IMAGE_SCN_LNK_NRELOC_OVFL kNtImageScnLnkNrelocOvfl
+#define IMAGE_SCN_MEM_DISCARDABLE kNtImageScnMemDiscardable
+#define IMAGE_SCN_MEM_NOT_CACHED kNtImageScnMemNotCached
+#define IMAGE_SCN_MEM_NOT_PAGED kNtImageScnMemNotPaged
+#define IMAGE_SCN_MEM_SHARED kNtImageScnMemShared
+#define IMAGE_SCN_MEM_EXECUTE kNtImageScnMemExecute
+#define IMAGE_SCN_MEM_READ kNtImageScnMemRead
+#define IMAGE_SCN_MEM_WRITE kNtImageScnMemWrite
+#define IMAGE_SCN_SCALE_INDEX kNtImageScnScaleIndex
+
+#define IMAGE_SYM_UNDEFINED kNtImageSymUndefined
+#define IMAGE_SYM_ABSOLUTE kNtImageSymAbsolute
+#define IMAGE_SYM_DEBUG kNtImageSymDebug
+#define IMAGE_SYM_SECTION_MAX kNtImageSymSectionMax
+#define IMAGE_SYM_SECTION_MAX_EX kNtImageSymSectionMaxEx
+#define IMAGE_SYM_TYPE_NULL kNtImageSymTypeNull
+#define IMAGE_SYM_TYPE_VOID kNtImageSymTypeVoid
+#define IMAGE_SYM_TYPE_CHAR kNtImageSymTypeChar
+#define IMAGE_SYM_TYPE_SHORT kNtImageSymTypeShort
+#define IMAGE_SYM_TYPE_INT kNtImageSymTypeInt
+#define IMAGE_SYM_TYPE_LONG kNtImageSymTypeLong
+#define IMAGE_SYM_TYPE_FLOAT kNtImageSymTypeFloat
+#define IMAGE_SYM_TYPE_DOUBLE kNtImageSymTypeDouble
+#define IMAGE_SYM_TYPE_STRUCT kNtImageSymTypeStruct
+#define IMAGE_SYM_TYPE_UNION kNtImageSymTypeUnion
+#define IMAGE_SYM_TYPE_ENUM kNtImageSymTypeEnum
+#define IMAGE_SYM_TYPE_MOE kNtImageSymTypeMoe
+#define IMAGE_SYM_TYPE_BYTE kNtImageSymTypeByte
+#define IMAGE_SYM_TYPE_WORD kNtImageSymTypeWord
+#define IMAGE_SYM_TYPE_UINT kNtImageSymTypeUint
+#define IMAGE_SYM_TYPE_DWORD kNtImageSymTypeDword
+#define IMAGE_SYM_TYPE_PCODE kNtImageSymTypePcode
+#define IMAGE_SYM_DTYPE_NULL kNtImageSymDtypeNull
+#define IMAGE_SYM_DTYPE_POINTER kNtImageSymDtypePointer
+#define IMAGE_SYM_DTYPE_FUNCTION kNtImageSymDtypeFunction
+#define IMAGE_SYM_DTYPE_ARRAY kNtImageSymDtypeArray
+#define IMAGE_SYM_CLASS_END_OF_FUNCTION kNtImageSymClassEndOfFunction
+#define IMAGE_SYM_CLASS_NULL kNtImageSymClassNull
+#define IMAGE_SYM_CLASS_AUTOMATIC kNtImageSymClassAutomatic
+#define IMAGE_SYM_CLASS_EXTERNAL kNtImageSymClassExternal
+#define IMAGE_SYM_CLASS_STATIC kNtImageSymClassStatic
+#define IMAGE_SYM_CLASS_REGISTER kNtImageSymClassRegister
+#define IMAGE_SYM_CLASS_EXTERNAL_DEF kNtImageSymClassExternalDef
+#define IMAGE_SYM_CLASS_LABEL kNtImageSymClassLabel
+#define IMAGE_SYM_CLASS_UNDEFINED_LABEL kNtImageSymClassUndefinedLabel
+#define IMAGE_SYM_CLASS_MEMBER_OF_STRUCT kNtImageSymClassMemberOfStruct
+#define IMAGE_SYM_CLASS_ARGUMENT kNtImageSymClassArgument
+#define IMAGE_SYM_CLASS_STRUCT_TAG kNtImageSymClassStructTag
+#define IMAGE_SYM_CLASS_MEMBER_OF_UNION kNtImageSymClassMemberOfUnion
+#define IMAGE_SYM_CLASS_UNION_TAG kNtImageSymClassUnionTag
+#define IMAGE_SYM_CLASS_TYPE_DEFINITION kNtImageSymClassTypeDefinition
+#define IMAGE_SYM_CLASS_UNDEFINED_STATIC kNtImageSymClassUndefinedStatic
+#define IMAGE_SYM_CLASS_ENUM_TAG kNtImageSymClassEnumTag
+#define IMAGE_SYM_CLASS_MEMBER_OF_ENUM kNtImageSymClassMemberOfEnum
+#define IMAGE_SYM_CLASS_REGISTER_PARAM kNtImageSymClassRegisterParam
+#define IMAGE_SYM_CLASS_BIT_FIELD kNtImageSymClassBitField
+#define IMAGE_SYM_CLASS_FAR_EXTERNAL kNtImageSymClassFarExternal
+#define IMAGE_SYM_CLASS_BLOCK kNtImageSymClassBlock
+#define IMAGE_SYM_CLASS_FUNCTION kNtImageSymClassFunction
+#define IMAGE_SYM_CLASS_END_OF_STRUCT kNtImageSymClassEndOfStruct
+#define IMAGE_SYM_CLASS_FILE kNtImageSymClassFile
+#define IMAGE_SYM_CLASS_SECTION kNtImageSymClassSection
+#define IMAGE_SYM_CLASS_WEAK_EXTERNAL kNtImageSymClassWeakExternal
+#define IMAGE_SYM_CLASS_CLR_TOKEN kNtImageSymClassClrToken
+
+#define IMAGE_COMDAT_SELECT_NODUPLICATES kNtImageComdatSelectNoduplicates
+#define IMAGE_COMDAT_SELECT_ANY kNtImageComdatSelectAny
+#define IMAGE_COMDAT_SELECT_SAME_SIZE kNtImageComdatSelectSameSize
+#define IMAGE_COMDAT_SELECT_EXACT_MATCH kNtImageComdatSelectExactMatch
+#define IMAGE_COMDAT_SELECT_ASSOCIATIVE kNtImageComdatSelectAssociative
+#define IMAGE_COMDAT_SELECT_LARGEST kNtImageComdatSelectLargest
+#define IMAGE_COMDAT_SELECT_NEWEST kNtImageComdatSelectNewest
+
+#define IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY kNtImageWeakExternSearchNolibrary
+#define IMAGE_WEAK_EXTERN_SEARCH_LIBRARY kNtImageWeakExternSearchLibrary
+#define IMAGE_WEAK_EXTERN_SEARCH_ALIAS kNtImageWeakExternSearchAlias
+#define IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY kNtImageWeakExternAntiDependency
+
+#define IMAGE_REL_NEXGEN32E_ABSOLUTE kNtImageRelNexgen32eAbsolute
+#define IMAGE_REL_NEXGEN32E_ADDR64 kNtImageRelNexgen32eAddr64
+#define IMAGE_REL_NEXGEN32E_ADDR32 kNtImageRelNexgen32eAddr32
+#define IMAGE_REL_NEXGEN32E_ADDR32NB kNtImageRelNexgen32eAddr32nb
+#define IMAGE_REL_NEXGEN32E_REL32 kNtImageRelNexgen32eRel32
+#define IMAGE_REL_NEXGEN32E_REL32_1 kNtImageRelNexgen32eRel321
+#define IMAGE_REL_NEXGEN32E_REL32_2 kNtImageRelNexgen32eRel322
+#define IMAGE_REL_NEXGEN32E_REL32_3 kNtImageRelNexgen32eRel323
+#define IMAGE_REL_NEXGEN32E_REL32_4 kNtImageRelNexgen32eRel324
+#define IMAGE_REL_NEXGEN32E_REL32_5 kNtImageRelNexgen32eRel325
+#define IMAGE_REL_NEXGEN32E_SECTION kNtImageRelNexgen32eSection
+#define IMAGE_REL_NEXGEN32E_SECREL kNtImageRelNexgen32eSecrel
+#define IMAGE_REL_NEXGEN32E_SECREL7 kNtImageRelNexgen32eSecrel7
+#define IMAGE_REL_NEXGEN32E_TOKEN kNtImageRelNexgen32eToken
+#define IMAGE_REL_NEXGEN32E_SREL32 kNtImageRelNexgen32eSrel32
+#define IMAGE_REL_NEXGEN32E_PAIR kNtImageRelNexgen32ePair
+#define IMAGE_REL_NEXGEN32E_SSPAN32 kNtImageRelNexgen32eSspan32
+
+#define IMAGE_REL_BASED_ABSOLUTE kNtImageRelBasedAbsolute
+#define IMAGE_REL_BASED_HIGH kNtImageRelBasedHigh
+#define IMAGE_REL_BASED_LOW kNtImageRelBasedLow
+#define IMAGE_REL_BASED_HIGHLOW kNtImageRelBasedHighlow
+#define IMAGE_REL_BASED_HIGHADJ kNtImageRelBasedHighadj
+#define IMAGE_REL_BASED_MACHINE_SPECIFIC_5 kNtImageRelBasedMachineSpecific5
+#define IMAGE_REL_BASED_RESERVED kNtImageRelBasedReserved
+#define IMAGE_REL_BASED_MACHINE_SPECIFIC_7 kNtImageRelBasedMachineSpecific7
+#define IMAGE_REL_BASED_MACHINE_SPECIFIC_8 kNtImageRelBasedMachineSpecific8
+#define IMAGE_REL_BASED_MACHINE_SPECIFIC_9 kNtImageRelBasedMachineSpecific9
+#define IMAGE_REL_BASED_DIR64 kNtImageRelBasedDir64
+
+#define IMAGE_ARCHIVE_START_SIZE kNtImageArchiveStartSize
+#define IMAGE_ARCHIVE_START kNtImageArchiveStart
+#define IMAGE_ARCHIVE_END kNtImageArchiveEnd
+#define IMAGE_ARCHIVE_PAD kNtImageArchivePad
+#define IMAGE_ARCHIVE_LINKER_MEMBER kNtImageArchiveLinkerMember
+#define IMAGE_ARCHIVE_LONGNAMES_MEMBER kNtImageArchiveLongnamesMember
+#define IMAGE_ARCHIVE_HYBRIDMAP_MEMBER kNtImageArchiveHybridmapMember
+
+#define IMAGE_ORDINAL_FLAG kNtImageOrdinalFlag
+#define IMAGE_ORDINAL(Ordinal) NtImageOrdinal(Ordinal)
+#define IMAGE_SNAP_BY_ORDINAL(Ordinal) NtImageSnapByOrdinal(Ordinal)
+
+#define IMAGE_RESOURCE_NAME_IS_STRING kNtImageResourceNameIsString
+#define IMAGE_RESOURCE_DATA_IS_DIRECTORY kNtImageResourceDataIsDirectory
+
+#define IMAGE_DYNAMIC_RELOCATION_GUARD_RF_PROLOGUE kNtImageDynamicRelocationGuardRfPrologue
+#define IMAGE_DYNAMIC_RELOCATION_GUARD_RF_EPILOGUE kNtImageDynamicRelocationGuardRfEpilogue
+
+#define IMAGE_HOT_PATCH_BASE_OBLIGATORY kNtImageHotPatchBaseObligatory
+#define IMAGE_HOT_PATCH_CHUNK_INVERSE kNtImageHotPatchChunkInverse
+#define IMAGE_HOT_PATCH_CHUNK_OBLIGATORY kNtImageHotPatchChunkObligatory
+#define IMAGE_HOT_PATCH_CHUNK_RESERVED kNtImageHotPatchChunkReserved
+#define IMAGE_HOT_PATCH_CHUNK_TYPE kNtImageHotPatchChunkType
+#define IMAGE_HOT_PATCH_CHUNK_SOURCE_RVA kNtImageHotPatchChunkSourceRva
+#define IMAGE_HOT_PATCH_CHUNK_TARGET_RVA kNtImageHotPatchChunkTargetRva
+#define IMAGE_HOT_PATCH_CHUNK_SIZE kNtImageHotPatchChunkSize
+#define IMAGE_HOT_PATCH_NONE kNtImageHotPatchNone
+#define IMAGE_HOT_PATCH_FUNCTION kNtImageHotPatchFunction
+#define IMAGE_HOT_PATCH_ABSOLUTE kNtImageHotPatchAbsolute
+#define IMAGE_HOT_PATCH_REL32 kNtImageHotPatchRel32
+#define IMAGE_HOT_PATCH_CALL_TARGET kNtImageHotPatchCallTarget
+#define IMAGE_HOT_PATCH_INDIRECT kNtImageHotPatchIndirect
+#define IMAGE_HOT_PATCH_NO_CALL_TARGET kNtImageHotPatchNoCallTarget
+#define IMAGE_HOT_PATCH_DYNAMIC_VALUE kNtImageHotPatchDynamicValue
+#define IMAGE_GUARD_CF_INSTRUMENTED kNtImageGuardCfInstrumented
+#define IMAGE_GUARD_CFW_INSTRUMENTED kNtImageGuardCfwInstrumented
+#define IMAGE_GUARD_CF_FUNCTION_TABLE_PRESENT kNtImageGuardCfFunctionTablePresent
+#define IMAGE_GUARD_SECURITY_COOKIE_UNUSED kNtImageGuardSecurityCookieUnused
+#define IMAGE_GUARD_PROTECT_DELAYLOAD_IAT kNtImageGuardProtectDelayloadIat
+#define IMAGE_GUARD_DELAYLOAD_IAT_IN_ITS_OWN_SECTION kNtImageGuardDelayloadIatInItsOwnSection
+#define IMAGE_GUARD_CF_EXPORT_SUPPRESSION_INFO_PRESENT kNtImageGuardCfExportSuppressionInfoPresent
+#define IMAGE_GUARD_CF_ENABLE_EXPORT_SUPPRESSION kNtImageGuardCfEnableExportSuppression
+#define IMAGE_GUARD_CF_LONGJUMP_TABLE_PRESENT kNtImageGuardCfLongjumpTablePresent
+#define IMAGE_GUARD_RF_INSTRUMENTED kNtImageGuardRfInstrumented
+#define IMAGE_GUARD_RF_ENABLE kNtImageGuardRfEnable
+#define IMAGE_GUARD_RF_STRICT kNtImageGuardRfStrict
+#define IMAGE_GUARD_CF_FUNCTION_TABLE_SIZE_MASK kNtImageGuardCfFunctionTableSizeMask
+#define IMAGE_GUARD_CF_FUNCTION_TABLE_SIZE_SHIFT kNtImageGuardCfFunctionTableSizeShift
+#define IMAGE_GUARD_FLAG_FID_SUPPRESSED kNtImageGuardFlagFidSuppressed
+#define IMAGE_GUARD_FLAG_EXPORT_SUPPRESSED kNtImageGuardFlagExportSuppressed
+
+#define IMAGE_ENCLAVE_IMPORT_MATCH_NONE kNtImageEnclaveImportMatchNone
+#define IMAGE_ENCLAVE_IMPORT_MATCH_UNIQUE_ID kNtImageEnclaveImportMatchUniqueId
+#define IMAGE_ENCLAVE_IMPORT_MATCH_AUTHOR_ID kNtImageEnclaveImportMatchAuthorId
+#define IMAGE_ENCLAVE_IMPORT_MATCH_FAMILY_ID kNtImageEnclaveImportMatchFamilyId
+#define IMAGE_ENCLAVE_IMPORT_MATCH_IMAGE_ID kNtImageEnclaveImportMatchImageId
+
+#define IMAGE_DEBUG_TYPE_UNKNOWN kNtImageDebugTypeUnknown
+#define IMAGE_DEBUG_TYPE_COFF kNtImageDebugTypeCoff
+#define IMAGE_DEBUG_TYPE_CODEVIEW kNtImageDebugTypeCodeview
+#define IMAGE_DEBUG_TYPE_FPO kNtImageDebugTypeFpo
+#define IMAGE_DEBUG_TYPE_MISC kNtImageDebugTypeMisc
+#define IMAGE_DEBUG_TYPE_EXCEPTION kNtImageDebugTypeException
+#define IMAGE_DEBUG_TYPE_FIXUP kNtImageDebugTypeFixup
+#define IMAGE_DEBUG_TYPE_OMAP_TO_SRC kNtImageDebugTypeOmapToSrc
+#define IMAGE_DEBUG_TYPE_OMAP_FROM_SRC kNtImageDebugTypeOmapFromSrc
+#define IMAGE_DEBUG_TYPE_BORLAND kNtImageDebugTypeBorland
+#define IMAGE_DEBUG_TYPE_RESERVED10 kNtImageDebugTypeReserved10
+#define IMAGE_DEBUG_TYPE_CLSID kNtImageDebugTypeClsid
+#define IMAGE_DEBUG_TYPE_VC_FEATURE kNtImageDebugTypeVcFeature
+#define IMAGE_DEBUG_TYPE_POGO kNtImageDebugTypePogo
+#define IMAGE_DEBUG_TYPE_ILTCG kNtImageDebugTypeIltcg
+#define IMAGE_DEBUG_TYPE_MPX kNtImageDebugTypeMpx
+#define IMAGE_DEBUG_TYPE_REPRO kNtImageDebugTypeRepro
+
+#define FRAME_FPO kNtFrameFpo
+#define FRAME_TRAP kNtFrameTrap
+#define FRAME_TSS kNtFrameTss
+#define FRAME_NONFPO kNtFrameNonfpo
+
+#define IMAGE_SIZEOF_SHORT_NAME kNtImageSizeofShortName
+#define IMAGE_SIZEOF_SECTION_HEADER kNtImageSizeofSectionHeader
+#define IMAGE_SIZEOF_SYMBOL kNtImageSizeofSymbol
+#define IMAGE_ENCLAVE_LONG_ID_LENGTH kNtImageEnclaveLongIdLength
+#define IMAGE_ENCLAVE_SHORT_ID_LENGTH kNtImageEnclaveShortIdLength
+#define IMAGE_NUMBEROF_DIRECTORY_ENTRIES kNtImageNumberofDirectoryEntries
+
+#define FILE_BEGIN kNtFileBegin
+#define FILE_CURRENT kNtFileCurrent
+#define FILE_END kNtFileEnd
+
+#define WSADATA struct NtWsaData
+#define LPWSADATA struct NtWsaData*
+
+#define FD_MAX_EVENTS 10
+
+#define CSADDR_INFO struct NtCsAddrInfo
+#define PCSADDR_INFO struct NtCsAddrInfo*
+#define LPCSADDR_INFO struct NtCsAddrInfo*
+
+#define AFPROTOCOLS struct NtAfProtocols
+#define PAFPROTOCOLS struct NtAfProtocols*
+#define LPAFPROTOCOLS struct NtAfProtocols*
+
+#define WSAECOMPARATOR int
+#define PWSAECOMPARATOR int*
+#define LPWSAECOMPARATOR int*
+
+#define WSANETWORKEVENTS struct NtWsaNetworkEvents
+#define PWSANETWORKEVENTS struct NtWsaNetworkEvents*
+#define LPWSANETWORKEVENTS struct NtWsaNetworkEvents*
+
+#define WSANSCLASSINFOW struct NtWsansClassInfo
+#define PWSANSCLASSINFOW struct NtWsansClassInfo*
+#define LPWSANSCLASSINFOW struct NtWsansClassInfo*
+
+#define WSASERVICECLASSINFOW struct NtWsaServiceClassInfo
+#define PWSASERVICECLASSINFOW struct NtWsaServiceClassInfo*
+#define LPWSASERVICECLASSINFOW struct NtWsaServiceClassInfo*
+
+#define WSANAMESPACE_INFOEX struct NtWsaNamespaceInfoEx
+#define PWSANAMESPACE_INFOEX struct NtWsaNamespaceInfoEx*
+#define LPWSANAMESPACE_INFOEX struct NtWsaNamespaceInfoEx*
+
+#define WSAQUERYSET struct NtWsaQuerySet
+#define PWSAQUERYSET struct NtWsaQuerySet*
+#define LPWSAQUERYSET struct NtWsaQuerySet*
+
+#define WSAVERSION struct NtWsaVersion
+#define PWSAVERSION struct NtWsaVersion*
+#define LPWSAVERSION struct NtWsaVersion*
+
+#define SOCKADDR struct sockaddr
+#define PSOCKADDR struct sockaddr*
+#define LPSOCKADDR struct sockaddr*
+
+#define SOCKET_ADDRESS struct NtSocketAddress
+#define PSOCKET_ADDRESS struct NtSocketAddress*
+#define LPSOCKET_ADDRESS struct NtSocketAddress*
+
+#define REPARSE_DATA_BUFFER struct NtReparseDataBuffer
+#define PREPARSE_DATA_BUFFER struct NtReparseDataBuffer*
+#define LPREPARSE_DATA_BUFFER struct NtReparseDataBuffer*
+
+#define SOCKET_ADDRESS_LIST struct NtSocketAddressList
+#define PSOCKET_ADDRESS_LIST struct NtSocketAddressList*
+#define LPSOCKET_ADDRESS_LIST struct NtSocketAddressList*
+
+#define FLOWSPEC struct NtFlowSpec
+#define LPFLOWSPEC struct NtFlowSpec*
+
+#define QOS struct NtQos
+#define LPQOS struct NtQos*
+
+#define _WSAPROTOCOLCHAIN NtWsaProtocolChain
+#define WSAPROTOCOLCHAIN struct NtWsaProtocolChain
+#define LPWSAPROTOCOLCHAIN struct NtWsaProtocolChain*
+
+#define _WSAPROTOCOL_INFO NtWsaProtocolInfo
+#define WSAPROTOCOL_INFO struct NtWsaProtocolInfo
+#define LPWSAPROTOCOL_INFO struct NtWsaProtocolInfo*
+
+#define _WSABUF NtIovec
+#define WSABUF struct NtIovec
+#define LPWSABUF struct NtIovec*
+
+#define _GUID NtGuid
+#define GUID struct NtGuid
+#define LPGUID struct NtGuid*
+
+#define IID GUID
+#define IsEqualGUID(guid1, guid2)                       \
+  (!memcmp((guid1), (guid2), sizeof(GUID)))
+#define IsEqualIID IsEqualGUID
+
+#define ADDRINFOEX struct NtAddrInfoEx
+#define LPADDRINFOEX struct NtAddrInfoEx*
+
+#define WSAEVENT HANDLE
+#define GROUP uint32_t
+#define WSAOVERLAPPED OVERLAPPED
+#define INVALID_SOCKET -1ULL
+#define SOCKET_ERROR -1
+#define WSA_INVALID_EVENT -1L
+
+#define WAIT_FAILED -1U
+#define STATUS_WAIT_0 0
+#define WAIT_FAILED 0xFFFFFFFFu
+#define WAIT_OBJECT_0 0
+#define STATUS_ABANDONED_WAIT_0 128
+#define WAIT_ABANDONED 128
+#define WAIT_ABANDONED_0 128
+#define INFINITE 0xFFFFFFFF
+
+#define SOCKET uint64_t
+#define WSA_WAIT_IO_COMPLETION 0xc0
+#define WSA_WAIT_TIMEOUT 258
+
+#define LPCONDITIONPROC NtConditionProc
+#define LPWSAOVERLAPPED_COMPLETION_ROUTINE NtWsaOverlappedCompletionRoutine
+
+#define WSACOMPLETIONTYPE int
+#define PWSACOMPLETIONTYPE int*
+#define LPWSACOMPLETIONTYPE int*
+#define NSP_NOTIFY_IMMEDIATELY kNtNspNotifyImmediately
+#define NSP_NOTIFY_HWND kNtNspNotifyHwnd
+#define NSP_NOTIFY_EVENT kNtNspNotifyEvent
+#define NSP_NOTIFY_PORT kNtNspNotifyPort
+#define NSP_NOTIFY_APC kNtNspNotifyApc
+
+#define WSACOMPLETION struct NtWsaCompletion
+#define PWSACOMPLETION struct NtWsaCompletion*
+#define LPWSACOMPLETION struct NtWsaCompletion*
+
+#define WSAPOLLFD struct pollfd
+#define PWSAPOLLFD struct pollfd*
+#define LPWSAPOLLFD struct pollfd*
+
+#define SD_RECEIVE SHUT_RD
+#define SD_SEND SHUT_WR
+#define SD_BOTH SHUT_RDWR
+
+#define WSAMSG struct NtMsgHdr
+#define PWSAMSG struct NtMsgHdr*
+#define LPWSAMSG struct NtMsgHdr*
+
+#define _MEMORYSTATUSEX NtMemoryStatusEx
+#define MEMORYSTATUSEX struct NtMemoryStatusEx
+#define LPMEMORYSTATUSEX struct NtMemoryStatusEx*
+
+#define HKEY_CLASSES_ROOT kNtHkeyClassesRoot
+#define HKEY_CURRENT_USER kNtHkeyCurrentUser
+#define HKEY_LOCAL_MACHINE kNtHkeyLocalMachine
+#define HKEY_USERS kNtHkeyUsers
+#define HKEY_PERFORMANCE_DATA kNtHkeyPerformanceData
+#define HKEY_PERFORMANCE_TEXT kNtHkeyPerformanceText
+#define HKEY_PERFORMANCE_NLSTEXT kNtHkeyPerformanceNlstext
+#define HKEY_CURRENT_CONFIG kNtHkeyCurrentConfig
+#define HKEY_DYN_DATA kNtHkeyDynData
+#define HKEY_CURRENT_USER_LOCAL_SETTINGS kNtHkeyCurrentUserLocalSettings
+#define KEY_READ kNtKeyRead
+#define KEY_WRITE kNtKeyWrite
+#define KEY_EXECUTE kNtKeyExecute
+#define KEY_ALL_ACCESS kNtKeyAllAccess
+#define REG_NONE kNtRegNone
+#define REG_SZ kNtRegSz
+#define REG_EXPAND_SZ kNtRegExpandSz
+#define REG_BINARY kNtRegBinary
+#define REG_DWORD kNtRegDword
+#define REG_DWORD_BIG_ENDIAN kNtRegDwordBigEndian
+#define REG_LINK kNtRegLink
+#define REG_MULTI_SZ kNtRegMultiSz
+#define REG_RESOURCE_LIST kNtRegResourceList
+#define REG_FULL_RESOURCE_DESCRIPTOR kNtRegFullResourceDescriptor
+#define REG_RESOURCE_REQUIREMENTS_LIST kNtRegResourceRequirementsList
+#define REG_QWORD kNtRegQword
+#define RRF_RT_REG_NONE kNtRrfRtRegNone
+#define RRF_RT_REG_SZ kNtRrfRtRegSz
+#define RRF_RT_REG_EXPAND_SZ kNtRrfRtRegExpandSz
+#define RRF_RT_REG_BINARY kNtRrfRtRegBinary
+#define RRF_RT_REG_DWORD kNtRrfRtRegDword
+#define RRF_RT_REG_MULTI_SZ kNtRrfRtRegMultiSz
+#define RRF_RT_REG_QWORD kNtRrfRtRegQword
+#define RRF_RT_DWORD kNtRrfRtDword
+#define RRF_RT_QWORD kNtRrfRtQword
+#define RRF_RT_ANY kNtRrfRtAny
+#define RRF_SUBKEY_WOW6464KEY kNtRrfSubkeyWow6464key
+#define RRF_SUBKEY_WOW6432KEY kNtRrfSubkeyWow6432key
+#define RRF_WOW64_MASK kNtRrfWow64Mask
+#define RRF_NOEXPAND kNtRrfNoexpand
+#define RRF_ZEROONFAILURE kNtRrfZeroonfailure
+
+#define NUMA_NO_PREFERRED_NODE kNtNumaNoPreferredNode
+
+#define TF_DISCONNECT kNtTfDisconnect
+#define TF_REUSE_SOCKET kNtTfReuseSocket
+#define TF_WRITE_BEHIND kNtTfWriteBehind
+#define TF_USE_DEFAULT_WORKER kNtTfUseDefaultWorker
+#define TF_USE_SYSTEM_THREAD kNtTfUseSystemThread
+#define TF_USE_KERNEL_APC kNtTfUseKernelApc
+
+#define SIO_UDP_CONNRESET kNtSioUdpConnreset
+#define SIO_SOCKET_CLOSE_NOTIFY kNtSioSocketCloseNotify
+#define SIO_UDP_NETRESET kNtSioUdpNetreset
+
+#define THREAD_TERMINATE kNtThreadTerminate
+#define THREAD_SUSPEND_RESUME kNtThreadSuspendResume
+#define THREAD_GET_CONTEXT kNtThreadGetContext
+#define THREAD_SET_CONTEXT kNtThreadSetContext
+#define THREAD_QUERY_INFORMATION kNtThreadQueryInformation
+#define THREAD_SET_INFORMATION kNtThreadSetInformation
+#define THREAD_SET_THREAD_TOKEN kNtThreadSetThreadToken
+#define THREAD_IMPERSONATE kNtThreadImpersonate
+#define THREAD_DIRECT_IMPERSONATION kNtThreadDirectImpersonation
+#define THREAD_SET_LIMITED_INFORMATION kNtThreadSetLimitedInformation
+#define THREAD_QUERY_LIMITED_INFORMATION kNtThreadQueryLimitedInformation
+#define THREAD_RESUME kNtThreadResume
+#define THREAD_ALL_ACCESS kNtThreadAllAccess
+
+#define _FILE_SEGMENT_ELEMENT NtFileSegmentElement
+#define FILE_SEGMENT_ELEMENT union NtFileSegmentElement
+#define PFILE_SEGMENT_ELEMENT union NtFileSegmentElement*
+
+#define FileBasicInfo kNtFileBasicInfo
+#define FileStandardInfo kNtFileStandardInfo
+#define FileNameInfo kNtFileNameInfo
+#define FileRenameInfo kNtFileRenameInfo
+#define FileDispositionInfo kNtFileDispositionInfo
+#define FileAllocationInfo kNtFileAllocationInfo
+#define FileEndOfFileInfo kNtFileEndOfFileInfo
+#define FileStreamInfo kNtFileStreamInfo
+#define FileCompressionInfo kNtFileCompressionInfo
+#define FileAttributeTagInfo kNtFileAttributeTagInfo
+#define FileIdBothDirectoryInfo kNtFileIdBothDirectoryInfo
+#define FileIdBothDirectoryRestartInfo kNtFileIdBothDirectoryRestartInfo
+#define FileIoPriorityHintInfo kNtFileIoPriorityHintInfo
+#define FileRemoteProtocolInfo kNtFileRemoteProtocolInfo
+#define FileFullDirectoryInfo kNtFileFullDirectoryInfo
+#define FileFullDirectoryRestartInfo kNtFileFullDirectoryRestartInfo
+#define FileStorageInfo kNtFileStorageInfo
+#define FileAlignmentInfo kNtFileAlignmentInfo
+#define FileIdInfo kNtFileIdInfo
+#define FileIdExtdDirectoryInfo kNtFileIdExtdDirectoryInfo
+#define FileIdExtdDirectoryRestartInfo kNtFileIdExtdDirectoryRestartInfo
+#define FileDispositionInfoEx kNtFileDispositionInfoEx
+#define FileRenameInfoEx kNtFileRenameInfoEx
+
+#define _FILE_FULL_DIR_INFO NtFileFullDirectoryInformation
+#define FILE_FULL_DIR_INFO struct NtFileFullDirectoryInformation
+#define PFILE_FULL_DIR_INFO struct NtFileFullDirectoryInformation*
+
+#define _FILE_BASIC_INFO NtFileBasicInformation
+#define FILE_BASIC_INFO struct NtFileBasicInformation
+#define PFILE_BASIC_INFO struct NtFileBasicInformation*
+
+#define _FILE_STANDARD_INFO NtFileStandardInformation
+#define FILE_STANDARD_INFO struct NtFileStandardInformation
+#define PFILE_STANDARD_INFO struct NtFileStandardInformation*
+
+#define HANDLE_FLAG_INHERIT kNtHandleFlagInherit
+#define HANDLE_FLAG_PROTECT_FROM_CLOSE kNtHandleFlagProtectFromClose
+
+#define SYMBOLIC_LINK_FLAG_DIRECTORY kNtSymbolicLinkFlagDirectory
+
+#define NT_FORMAT_MESSAGE_ALLOCATE_BUFFER kNtFormatMessageAllocateBuffer
+#define NT_FORMAT_MESSAGE_IGNORE_INSERTS kNtFormatMessageIgnoreInserts
+#define NT_FORMAT_MESSAGE_FROM_STRING kNtFormatMessageFromString
+#define NT_FORMAT_MESSAGE_FROM_HMODULE kNtFormatMessageFromHmodule
+#define NT_FORMAT_MESSAGE_FROM_SYSTEM kNtFormatMessageFromSystem
+#define NT_FORMAT_MESSAGE_ARGUMENT_ARRAY kNtFormatMessageArgumentArray
+#define NT_FORMAT_MESSAGE_MAX_WIDTH_MASK kNtFormatMessageMaxWidthMask
+
+#define THREAD_BASE_PRIORITY_IDLE kNtThreadBasePriorityIdle
+#define THREAD_BASE_PRIORITY_MIN kNtThreadBasePriorityMin
+#define THREAD_BASE_PRIORITY_MAX kNtThreadBasePriorityMax
+#define THREAD_BASE_PRIORITY_LOWRT kNtThreadBasePriorityLowrt
+
+#define THREAD_PRIORITY_IDLE kNtThreadPriorityIdle
+#define THREAD_PRIORITY_LOWEST kNtThreadPriorityLowest
+#define THREAD_PRIORITY_BELOW_NORMAL kNtThreadPriorityBelowNormal
+#define THREAD_PRIORITY_NORMAL kNtThreadPriorityNormal
+#define THREAD_PRIORITY_ABOVE_NORMAL kNtThreadPriorityAboveNormal
+#define THREAD_PRIORITY_HIGHEST kNtThreadPriorityHighest
+#define THREAD_PRIORITY_TIME_CRITICAL kNtThreadPriorityTimeCritical
+
+#define ERROR_SUCCESS kNtErrorSuccess
+#define ERROR_INVALID_FUNCTION kNtErrorInvalidFunction
+#define ERROR_FILE_NOT_FOUND kNtErrorFileNotFound
+#define ERROR_PATH_NOT_FOUND kNtErrorPathNotFound
+#define ERROR_TOO_MANY_OPEN_FILES kNtErrorTooManyOpenFiles
+#define ERROR_ACCESS_DENIED kNtErrorAccessDenied
+#define ERROR_INVALID_HANDLE kNtErrorInvalidHandle
+#define ERROR_ARENA_TRASHED kNtErrorArenaTrashed
+#define ERROR_NOT_ENOUGH_MEMORY kNtErrorNotEnoughMemory
+#define ERROR_INVALID_BLOCK kNtErrorInvalidBlock
+#define ERROR_BAD_ENVIRONMENT kNtErrorBadEnvironment
+#define ERROR_BAD_FORMAT kNtErrorBadFormat
+#define ERROR_INVALID_ACCESS kNtErrorInvalidAccess
+#define ERROR_INVALID_DATA kNtErrorInvalidData
+#define ERROR_OUTOFMEMORY kNtErrorOutofmemory
+#define ERROR_INVALID_DRIVE kNtErrorInvalidDrive
+#define ERROR_CURRENT_DIRECTORY kNtErrorCurrentDirectory
+#define ERROR_NOT_SAME_DEVICE kNtErrorNotSameDevice
+#define ERROR_NO_MORE_FILES kNtErrorNoMoreFiles
+#define ERROR_WRITE_PROTECT kNtErrorWriteProtect
+#define ERROR_BAD_UNIT kNtErrorBadUnit
+#define ERROR_NOT_READY kNtErrorNotReady
+#define ERROR_BAD_COMMAND kNtErrorBadCommand
+#define ERROR_CRC kNtErrorCrc
+#define ERROR_BAD_LENGTH kNtErrorBadLength
+#define ERROR_SEEK kNtErrorSeek
+#define ERROR_NOT_DOS_DISK kNtErrorNotDosDisk
+#define ERROR_SECTOR_NOT_FOUND kNtErrorSectorNotFound
+#define ERROR_OUT_OF_PAPER kNtErrorOutOfPaper
+#define ERROR_WRITE_FAULT kNtErrorWriteFault
+#define ERROR_READ_FAULT kNtErrorReadFault
+#define ERROR_GEN_FAILURE kNtErrorGenFailure
+#define ERROR_SHARING_VIOLATION kNtErrorSharingViolation
+#define ERROR_LOCK_VIOLATION kNtErrorLockViolation
+#define ERROR_WRONG_DISK kNtErrorWrongDisk
+#define ERROR_SHARING_BUFFER_EXCEEDED kNtErrorSharingBufferExceeded
+#define ERROR_HANDLE_EOF kNtErrorHandleEof
+#define ERROR_HANDLE_DISK_FULL kNtErrorHandleDiskFull
+#define ERROR_NOT_SUPPORTED kNtErrorNotSupported
+#define ERROR_REM_NOT_LIST kNtErrorRemNotList
+#define ERROR_DUP_NAME kNtErrorDupName
+#define ERROR_BAD_NETPATH kNtErrorBadNetpath
+#define ERROR_NETWORK_BUSY kNtErrorNetworkBusy
+#define ERROR_DEV_NOT_EXIST kNtErrorDevNotExist
+#define ERROR_TOO_MANY_CMDS kNtErrorTooManyCmds
+#define ERROR_ADAP_HDW_ERR kNtErrorAdapHdwErr
+#define ERROR_BAD_NET_RESP kNtErrorBadNetResp
+#define ERROR_UNEXP_NET_ERR kNtErrorUnexpNetErr
+#define ERROR_BAD_REM_ADAP kNtErrorBadRemAdap
+#define ERROR_PRINTQ_FULL kNtErrorPrintqFull
+#define ERROR_NO_SPOOL_SPACE kNtErrorNoSpoolSpace
+#define ERROR_PRINT_CANCELLED kNtErrorPrintCancelled
+#define ERROR_NETNAME_DELETED kNtErrorNetnameDeleted
+#define ERROR_NETWORK_ACCESS_DENIED kNtErrorNetworkAccessDenied
+#define ERROR_BAD_DEV_TYPE kNtErrorBadDevType
+#define ERROR_BAD_NET_NAME kNtErrorBadNetName
+#define ERROR_TOO_MANY_NAMES kNtErrorTooManyNames
+#define ERROR_TOO_MANY_SESS kNtErrorTooManySess
+#define ERROR_SHARING_PAUSED kNtErrorSharingPaused
+#define ERROR_REQ_NOT_ACCEP kNtErrorReqNotAccep
+#define ERROR_REDIR_PAUSED kNtErrorRedirPaused
+#define ERROR_FILE_EXISTS kNtErrorFileExists
+#define ERROR_CANNOT_MAKE kNtErrorCannotMake
+#define ERROR_FAIL_I24 kNtErrorFailI24
+#define ERROR_OUT_OF_STRUCTURES kNtErrorOutOfStructures
+#define ERROR_ALREADY_ASSIGNED kNtErrorAlreadyAssigned
+#define ERROR_INVALID_PASSWORD kNtErrorInvalidPassword
+#define ERROR_INVALID_PARAMETER kNtErrorInvalidParameter
+#define ERROR_NET_WRITE_FAULT kNtErrorNetWriteFault
+#define ERROR_NO_PROC_SLOTS kNtErrorNoProcSlots
+#define ERROR_TOO_MANY_SEMAPHORES kNtErrorTooManySemaphores
+#define ERROR_EXCL_SEM_ALREADY_OWNED kNtErrorExclSemAlreadyOwned
+#define ERROR_SEM_IS_SET kNtErrorSemIsSet
+#define ERROR_TOO_MANY_SEM_REQUESTS kNtErrorTooManySemRequests
+#define ERROR_INVALID_AT_INTERRUPT_TIME kNtErrorInvalidAtInterruptTime
+#define ERROR_SEM_OWNER_DIED kNtErrorSemOwnerDied
+#define ERROR_SEM_USER_LIMIT kNtErrorSemUserLimit
+#define ERROR_DISK_CHANGE kNtErrorDiskChange
+#define ERROR_DRIVE_LOCKED kNtErrorDriveLocked
+#define ERROR_BROKEN_PIPE kNtErrorBrokenPipe
+#define ERROR_OPEN_FAILED kNtErrorOpenFailed
+#define ERROR_BUFFER_OVERFLOW kNtErrorBufferOverflow
+#define ERROR_DISK_FULL kNtErrorDiskFull
+#define ERROR_NO_MORE_SEARCH_HANDLES kNtErrorNoMoreSearchHandles
+#define ERROR_INVALID_TARGET_HANDLE kNtErrorInvalidTargetHandle
+#define ERROR_INVALID_CATEGORY kNtErrorInvalidCategory
+#define ERROR_INVALID_VERIFY_SWITCH kNtErrorInvalidVerifySwitch
+#define ERROR_BAD_DRIVER_LEVEL kNtErrorBadDriverLevel
+#define ERROR_CALL_NOT_IMPLEMENTED kNtErrorCallNotImplemented
+#define ERROR_SEM_TIMEOUT kNtErrorSemTimeout
+#define ERROR_INSUFFICIENT_BUFFER kNtErrorInsufficientBuffer
+#define ERROR_INVALID_NAME kNtErrorInvalidName
+#define ERROR_INVALID_LEVEL kNtErrorInvalidLevel
+#define ERROR_NO_VOLUME_LABEL kNtErrorNoVolumeLabel
+#define ERROR_MOD_NOT_FOUND kNtErrorModNotFound
+#define ERROR_PROC_NOT_FOUND kNtErrorProcNotFound
+#define ERROR_WAIT_NO_CHILDREN kNtErrorWaitNoChildren
+#define ERROR_CHILD_NOT_COMPLETE kNtErrorChildNotComplete
+#define ERROR_DIRECT_ACCESS_HANDLE kNtErrorDirectAccessHandle
+#define ERROR_NEGATIVE_SEEK kNtErrorNegativeSeek
+#define ERROR_SEEK_ON_DEVICE kNtErrorSeekOnDevice
+#define ERROR_IS_JOIN_TARGET kNtErrorIsJoinTarget
+#define ERROR_IS_JOINED kNtErrorIsJoined
+#define ERROR_IS_SUBSTED kNtErrorIsSubsted
+#define ERROR_NOT_JOINED kNtErrorNotJoined
+#define ERROR_NOT_SUBSTED kNtErrorNotSubsted
+#define ERROR_JOIN_TO_JOIN kNtErrorJoinToJoin
+#define ERROR_SUBST_TO_SUBST kNtErrorSubstToSubst
+#define ERROR_JOIN_TO_SUBST kNtErrorJoinToSubst
+#define ERROR_SUBST_TO_JOIN kNtErrorSubstToJoin
+#define ERROR_BUSY_DRIVE kNtErrorBusyDrive
+#define ERROR_SAME_DRIVE kNtErrorSameDrive
+#define ERROR_DIR_NOT_ROOT kNtErrorDirNotRoot
+#define ERROR_DIR_NOT_EMPTY kNtErrorDirNotEmpty
+#define ERROR_IS_SUBST_PATH kNtErrorIsSubstPath
+#define ERROR_IS_JOIN_PATH kNtErrorIsJoinPath
+#define ERROR_PATH_BUSY kNtErrorPathBusy
+#define ERROR_IS_SUBST_TARGET kNtErrorIsSubstTarget
+#define ERROR_SYSTEM_TRACE kNtErrorSystemTrace
+#define ERROR_INVALID_EVENT_COUNT kNtErrorInvalidEventCount
+#define ERROR_TOO_MANY_MUXWAITERS kNtErrorTooManyMuxwaiters
+#define ERROR_INVALID_LIST_FORMAT kNtErrorInvalidListFormat
+#define ERROR_LABEL_TOO_LONG kNtErrorLabelTooLong
+#define ERROR_TOO_MANY_TCBS kNtErrorTooManyTcbs
+#define ERROR_SIGNAL_REFUSED kNtErrorSignalRefused
+#define ERROR_DISCARDED kNtErrorDiscarded
+#define ERROR_NOT_LOCKED kNtErrorNotLocked
+#define ERROR_BAD_THREADID_ADDR kNtErrorBadThreadidAddr
+#define ERROR_BAD_ARGUMENTS kNtErrorBadArguments
+#define ERROR_BAD_PATHNAME kNtErrorBadPathname
+#define ERROR_SIGNAL_PENDING kNtErrorSignalPending
+#define ERROR_MAX_THRDS_REACHED kNtErrorMaxThrdsReached
+#define ERROR_LOCK_FAILED kNtErrorLockFailed
+#define ERROR_BUSY kNtErrorBusy
+#define ERROR_DEVICE_SUPPORT_IN_PROGRESS kNtErrorDeviceSupportInProgress
+#define ERROR_CANCEL_VIOLATION kNtErrorCancelViolation
+#define ERROR_ATOMIC_LOCKS_NOT_SUPPORTED kNtErrorAtomicLocksNotSupported
+#define ERROR_INVALID_SEGMENT_NUMBER kNtErrorInvalidSegmentNumber
+#define ERROR_INVALID_ORDINAL kNtErrorInvalidOrdinal
+#define ERROR_ALREADY_EXISTS kNtErrorAlreadyExists
+#define ERROR_INVALID_FLAG_NUMBER kNtErrorInvalidFlagNumber
+#define ERROR_SEM_NOT_FOUND kNtErrorSemNotFound
+#define ERROR_INVALID_STARTING_CODESEG kNtErrorInvalidStartingCodeseg
+#define ERROR_INVALID_STACKSEG kNtErrorInvalidStackseg
+#define ERROR_INVALID_MODULETYPE kNtErrorInvalidModuletype
+#define ERROR_INVALID_EXE_SIGNATURE kNtErrorInvalidExeSignature
+#define ERROR_EXE_MARKED_INVALID kNtErrorExeMarkedInvalid
+#define ERROR_BAD_EXE_FORMAT kNtErrorBadExeFormat
+#define ERROR_INVALID_MINALLOCSIZE kNtErrorInvalidMinallocsize
+#define ERROR_DYNLINK_FROM_INVALID_RING kNtErrorDynlinkFromInvalidRing
+#define ERROR_IOPL_NOT_ENABLED kNtErrorIoplNotEnabled
+#define ERROR_INVALID_SEGDPL kNtErrorInvalidSegdpl
+#define ERROR_RING2SEG_MUST_BE_MOVABLE kNtErrorRing2segMustBeMovable
+#define ERROR_RELOC_CHAIN_XEEDS_SEGLIM kNtErrorRelocChainXeedsSeglim
+#define ERROR_INFLOOP_IN_RELOC_CHAIN kNtErrorInfloopInRelocChain
+#define ERROR_ENVVAR_NOT_FOUND kNtErrorEnvvarNotFound
+#define ERROR_NO_SIGNAL_SENT kNtErrorNoSignalSent
+#define ERROR_FILENAME_EXCED_RANGE kNtErrorFilenameExcedRange
+#define ERROR_RING2_STACK_IN_USE kNtErrorRing2StackInUse
+#define ERROR_META_EXPANSION_TOO_LONG kNtErrorMetaExpansionTooLong
+#define ERROR_INVALID_SIGNAL_NUMBER kNtErrorInvalidSignalNumber
+#define ERROR_LOCKED kNtErrorLocked
+#define ERROR_TOO_MANY_MODULES kNtErrorTooManyModules
+#define ERROR_NESTING_NOT_ALLOWED kNtErrorNestingNotAllowed
+#define ERROR_EXE_MACHINE_TYPE_MISMATCH kNtErrorExeMachineTypeMismatch
+#define ERROR_EXE_CANNOT_MODIFY_SIGNED_BINARY kNtErrorExeCannotModifySignedBinary
+#define ERROR_EXE_CANNOT_MODIFY_STRONG_SIGNED_BINARY kNtErrorExeCannotModifyStrongSignedBinary
+#define ERROR_FILE_CHECKED_OUT kNtErrorFileCheckedOut
+#define ERROR_CHECKOUT_REQUIRED kNtErrorCheckoutRequired
+#define ERROR_BAD_FILE_TYPE kNtErrorBadFileType
+#define ERROR_FILE_TOO_LARGE kNtErrorFileTooLarge
+#define ERROR_FORMS_AUTH_REQUIRED kNtErrorFormsAuthRequired
+#define ERROR_VIRUS_INFECTED kNtErrorVirusInfected
+#define ERROR_VIRUS_DELETED kNtErrorVirusDeleted
+#define ERROR_PIPE_LOCAL kNtErrorPipeLocal
+#define ERROR_BAD_PIPE kNtErrorBadPipe
+#define ERROR_PIPE_BUSY kNtErrorPipeBusy
+#define ERROR_NO_DATA kNtErrorNoData
+#define ERROR_PIPE_NOT_CONNECTED kNtErrorPipeNotConnected
+#define ERROR_MORE_DATA kNtErrorMoreData
+#define ERROR_NO_WORK_DONE kNtErrorNoWorkDone
+#define ERROR_VC_DISCONNECTED kNtErrorVcDisconnected
+#define ERROR_INVALID_EA_NAME kNtErrorInvalidEaName
+#define ERROR_EA_LIST_INCONSISTENT kNtErrorEaListInconsistent
+#define ERROR_NO_MORE_ITEMS kNtErrorNoMoreItems
+#define ERROR_CANNOT_COPY kNtErrorCannotCopy
+#define ERROR_DIRECTORY kNtErrorDirectory
+#define ERROR_EAS_DIDNT_FIT kNtErrorEasDidntFit
+#define ERROR_EA_FILE_CORRUPT kNtErrorEaFileCorrupt
+#define ERROR_EA_TABLE_FULL kNtErrorEaTableFull
+#define ERROR_INVALID_EA_HANDLE kNtErrorInvalidEaHandle
+#define ERROR_EAS_NOT_SUPPORTED kNtErrorEasNotSupported
+#define ERROR_NOT_OWNER kNtErrorNotOwner
+#define ERROR_TOO_MANY_POSTS kNtErrorTooManyPosts
+#define ERROR_PARTIAL_COPY kNtErrorPartialCopy
+#define ERROR_OPLOCK_NOT_GRANTED kNtErrorOplockNotGranted
+#define ERROR_INVALID_OPLOCK_PROTOCOL kNtErrorInvalidOplockProtocol
+#define ERROR_DISK_TOO_FRAGMENTED kNtErrorDiskTooFragmented
+#define ERROR_DELETE_PENDING kNtErrorDeletePending
+#define ERROR_INCOMPATIBLE_WITH_GLOBAL_SHORT_NAME_REGISTRY_SETTING kNtErrorIncompatibleWithGlobalShortNameRegistrySetting
+#define ERROR_SHORT_NAMES_NOT_ENABLED_ON_VOLUME kNtErrorShortNamesNotEnabledOnVolume
+#define ERROR_SECURITY_STREAM_IS_INCONSISTENT kNtErrorSecurityStreamIsInconsistent
+#define ERROR_INVALID_LOCK_RANGE kNtErrorInvalidLockRange
+#define ERROR_IMAGE_SUBSYSTEM_NOT_PRESENT kNtErrorImageSubsystemNotPresent
+#define ERROR_NOTIFICATION_GUID_ALREADY_DEFINED kNtErrorNotificationGuidAlreadyDefined
+#define ERROR_INVALID_EXCEPTION_HANDLER kNtErrorInvalidExceptionHandler
+#define ERROR_DUPLICATE_PRIVILEGES kNtErrorDuplicatePrivileges
+#define ERROR_NO_RANGES_PROCESSED kNtErrorNoRangesProcessed
+#define ERROR_NOT_ALLOWED_ON_SYSTEM_FILE kNtErrorNotAllowedOnSystemFile
+#define ERROR_DISK_RESOURCES_EXHAUSTED kNtErrorDiskResourcesExhausted
+#define ERROR_INVALID_TOKEN kNtErrorInvalidToken
+#define ERROR_DEVICE_FEATURE_NOT_SUPPORTED kNtErrorDeviceFeatureNotSupported
+#define ERROR_MR_MID_NOT_FOUND kNtErrorMrMidNotFound
+#define ERROR_SCOPE_NOT_FOUND kNtErrorScopeNotFound
+#define ERROR_UNDEFINED_SCOPE kNtErrorUndefinedScope
+#define ERROR_INVALID_CAP kNtErrorInvalidCap
+#define ERROR_DEVICE_UNREACHABLE kNtErrorDeviceUnreachable
+#define ERROR_DEVICE_NO_RESOURCES kNtErrorDeviceNoResources
+#define ERROR_DATA_CHECKSUM_ERROR kNtErrorDataChecksumError
+#define ERROR_INTERMIXED_KERNEL_EA_OPERATION kNtErrorIntermixedKernelEaOperation
+#define ERROR_FILE_LEVEL_TRIM_NOT_SUPPORTED kNtErrorFileLevelTrimNotSupported
+#define ERROR_OFFSET_ALIGNMENT_VIOLATION kNtErrorOffsetAlignmentViolation
+#define ERROR_INVALID_FIELD_IN_PARAMETER_LIST kNtErrorInvalidFieldInParameterList
+#define ERROR_OPERATION_IN_PROGRESS kNtErrorOperationInProgress
+#define ERROR_BAD_DEVICE_PATH kNtErrorBadDevicePath
+#define ERROR_TOO_MANY_DESCRIPTORS kNtErrorTooManyDescriptors
+#define ERROR_SCRUB_DATA_DISABLED kNtErrorScrubDataDisabled
+#define ERROR_NOT_REDUNDANT_STORAGE kNtErrorNotRedundantStorage
+#define ERROR_RESIDENT_FILE_NOT_SUPPORTED kNtErrorResidentFileNotSupported
+#define ERROR_COMPRESSED_FILE_NOT_SUPPORTED kNtErrorCompressedFileNotSupported
+#define ERROR_DIRECTORY_NOT_SUPPORTED kNtErrorDirectoryNotSupported
+#define ERROR_NOT_READ_FROM_COPY kNtErrorNotReadFromCopy
+#define ERROR_FT_WRITE_FAILURE kNtErrorFtWriteFailure
+#define ERROR_FT_DI_SCAN_REQUIRED kNtErrorFtDiScanRequired
+#define ERROR_INVALID_KERNEL_INFO_VERSION kNtErrorInvalidKernelInfoVersion
+#define ERROR_INVALID_PEP_INFO_VERSION kNtErrorInvalidPepInfoVersion
+#define ERROR_OBJECT_NOT_EXTERNALLY_BACKED kNtErrorObjectNotExternallyBacked
+#define ERROR_EXTERNAL_BACKING_PROVIDER_UNKNOWN kNtErrorExternalBackingProviderUnknown
+#define ERROR_COMPRESSION_NOT_BENEFICIAL kNtErrorCompressionNotBeneficial
+#define ERROR_STORAGE_TOPOLOGY_ID_MISMATCH kNtErrorStorageTopologyIdMismatch
+#define ERROR_BLOCKED_BY_PARENTAL_CONTROLS kNtErrorBlockedByParentalControls
+#define ERROR_BLOCK_TOO_MANY_REFERENCES kNtErrorBlockTooManyReferences
+#define ERROR_MARKED_TO_DISALLOW_WRITES kNtErrorMarkedToDisallowWrites
+#define ERROR_ENCLAVE_FAILURE kNtErrorEnclaveFailure
+#define ERROR_FAIL_NOACTION_REBOOT kNtErrorFailNoactionReboot
+#define ERROR_FAIL_SHUTDOWN kNtErrorFailShutdown
+#define ERROR_FAIL_RESTART kNtErrorFailRestart
+#define ERROR_MAX_SESSIONS_REACHED kNtErrorMaxSessionsReached
+#define ERROR_NETWORK_ACCESS_DENIED_EDP kNtErrorNetworkAccessDeniedEdp
+#define ERROR_DEVICE_HINT_NAME_BUFFER_TOO_SMALL kNtErrorDeviceHintNameBufferTooSmall
+#define ERROR_EDP_POLICY_DENIES_OPERATION kNtErrorEdpPolicyDeniesOperation
+#define ERROR_EDP_DPL_POLICY_CANT_BE_SATISFIED kNtErrorEdpDplPolicyCantBeSatisfied
+#define ERROR_CLOUD_FILE_SYNC_ROOT_METADATA_CORRUPT kNtErrorCloudFileSyncRootMetadataCorrupt
+#define ERROR_DEVICE_IN_MAINTENANCE kNtErrorDeviceInMaintenance
+#define ERROR_NOT_SUPPORTED_ON_DAX kNtErrorNotSupportedOnDax
+#define ERROR_DAX_MAPPING_EXISTS kNtErrorDaxMappingExists
+#define ERROR_CLOUD_FILE_PROVIDER_NOT_RUNNING kNtErrorCloudFileProviderNotRunning
+#define ERROR_CLOUD_FILE_METADATA_CORRUPT kNtErrorCloudFileMetadataCorrupt
+#define ERROR_CLOUD_FILE_METADATA_TOO_LARGE kNtErrorCloudFileMetadataTooLarge
+#define ERROR_CLOUD_FILE_PROPERTY_BLOB_TOO_LARGE kNtErrorCloudFilePropertyBlobTooLarge
+#define ERROR_CLOUD_FILE_PROPERTY_BLOB_CHECKSUM_MISMATCH kNtErrorCloudFilePropertyBlobChecksumMismatch
+#define ERROR_CHILD_PROCESS_BLOCKED kNtErrorChildProcessBlocked
+#define ERROR_STORAGE_LOST_DATA_PERSISTENCE kNtErrorStorageLostDataPersistence
+#define ERROR_FILE_SYSTEM_VIRTUALIZATION_UNAVAILABLE kNtErrorFileSystemVirtualizationUnavailable
+#define ERROR_FILE_SYSTEM_VIRTUALIZATION_METADATA_CORRUPT kNtErrorFileSystemVirtualizationMetadataCorrupt
+#define ERROR_FILE_SYSTEM_VIRTUALIZATION_BUSY kNtErrorFileSystemVirtualizationBusy
+#define ERROR_FILE_SYSTEM_VIRTUALIZATION_PROVIDER_UNKNOWN kNtErrorFileSystemVirtualizationProviderUnknown
+#define ERROR_GDI_HANDLE_LEAK kNtErrorGdiHandleLeak
+#define ERROR_CLOUD_FILE_TOO_MANY_PROPERTY_BLOBS kNtErrorCloudFileTooManyPropertyBlobs
+#define ERROR_CLOUD_FILE_PROPERTY_VERSION_NOT_SUPPORTED kNtErrorCloudFilePropertyVersionNotSupported
+#define ERROR_NOT_ACLOUD_FILE kNtErrorNotACloudFile
+#define ERROR_CLOUD_FILE_NOT_IN_SYNC kNtErrorCloudFileNotInSync
+#define ERROR_CLOUD_FILE_ALREADY_CONNECTED kNtErrorCloudFileAlreadyConnected
+#define ERROR_CLOUD_FILE_NOT_SUPPORTED kNtErrorCloudFileNotSupported
+#define ERROR_CLOUD_FILE_INVALID_REQUEST kNtErrorCloudFileInvalidRequest
+#define ERROR_CLOUD_FILE_READ_ONLY_VOLUME kNtErrorCloudFileReadOnlyVolume
+#define ERROR_CLOUD_FILE_CONNECTED_PROVIDER_ONLY kNtErrorCloudFileConnectedProviderOnly
+#define ERROR_CLOUD_FILE_VALIDATION_FAILED kNtErrorCloudFileValidationFailed
+#define ERROR_SMB1_NOT_AVAILABLE kNtErrorSmb1NotAvailable
+#define ERROR_FILE_SYSTEM_VIRTUALIZATION_INVALID_OPERATION kNtErrorFileSystemVirtualizationInvalidOperation
+#define ERROR_CLOUD_FILE_AUTHENTICATION_FAILED kNtErrorCloudFileAuthenticationFailed
+#define ERROR_CLOUD_FILE_INSUFFICIENT_RESOURCES kNtErrorCloudFileInsufficientResources
+#define ERROR_CLOUD_FILE_NETWORK_UNAVAILABLE kNtErrorCloudFileNetworkUnavailable
+#define ERROR_CLOUD_FILE_UNSUCCESSFUL kNtErrorCloudFileUnsuccessful
+#define ERROR_CLOUD_FILE_NOT_UNDER_SYNC_ROOT kNtErrorCloudFileNotUnderSyncRoot
+#define ERROR_CLOUD_FILE_IN_USE kNtErrorCloudFileInUse
+#define ERROR_CLOUD_FILE_PINNED kNtErrorCloudFilePinned
+#define ERROR_CLOUD_FILE_REQUEST_ABORTED kNtErrorCloudFileRequestAborted
+#define ERROR_CLOUD_FILE_PROPERTY_CORRUPT kNtErrorCloudFilePropertyCorrupt
+#define ERROR_CLOUD_FILE_ACCESS_DENIED kNtErrorCloudFileAccessDenied
+#define ERROR_CLOUD_FILE_INCOMPATIBLE_HARDLINKS kNtErrorCloudFileIncompatibleHardlinks
+#define ERROR_CLOUD_FILE_PROPERTY_LOCK_CONFLICT kNtErrorCloudFilePropertyLockConflict
+#define ERROR_CLOUD_FILE_REQUEST_CANCELED kNtErrorCloudFileRequestCanceled
+#define ERROR_EXTERNAL_SYSKEY_NOT_SUPPORTED kNtErrorExternalSyskeyNotSupported
+#define ERROR_THREAD_MODE_ALREADY_BACKGROUND kNtErrorThreadModeAlreadyBackground
+#define ERROR_THREAD_MODE_NOT_BACKGROUND kNtErrorThreadModeNotBackground
+#define ERROR_PROCESS_MODE_ALREADY_BACKGROUND kNtErrorProcessModeAlreadyBackground
+#define ERROR_PROCESS_MODE_NOT_BACKGROUND kNtErrorProcessModeNotBackground
+#define ERROR_CLOUD_FILE_PROVIDER_TERMINATED kNtErrorCloudFileProviderTerminated
+#define ERROR_NOT_ACLOUD_SYNC_ROOT kNtErrorNotACloudSyncRoot
+#define ERROR_FILE_PROTECTED_UNDER_DPL kNtErrorFileProtectedUnderDpl
+#define ERROR_VOLUME_NOT_CLUSTER_ALIGNED kNtErrorVolumeNotClusterAligned
+#define ERROR_NO_PHYSICALLY_ALIGNED_FREE_SPACE_FOUND kNtErrorNoPhysicallyAlignedFreeSpaceFound
+#define ERROR_APPX_FILE_NOT_ENCRYPTED kNtErrorAppxFileNotEncrypted
+#define ERROR_RWRAW_ENCRYPTED_FILE_NOT_ENCRYPTED kNtErrorRwrawEncryptedFileNotEncrypted
+#define ERROR_RWRAW_ENCRYPTED_INVALID_EDATAINFO_FILEOFFSET kNtErrorRwrawEncryptedInvalidEdatainfoFileoffset
+#define ERROR_RWRAW_ENCRYPTED_INVALID_EDATAINFO_FILERANGE kNtErrorRwrawEncryptedInvalidEdatainfoFilerange
+#define ERROR_RWRAW_ENCRYPTED_INVALID_EDATAINFO_PARAMETER kNtErrorRwrawEncryptedInvalidEdatainfoParameter
+#define ERROR_LINUX_SUBSYSTEM_NOT_PRESENT kNtErrorLinuxSubsystemNotPresent
+#define ERROR_CAPAUTHZ_NOT_DEVUNLOCKED kNtErrorCapauthzNotDevunlocked
+#define ERROR_CAPAUTHZ_CHANGE_TYPE kNtErrorCapauthzChangeType
+#define ERROR_CAPAUTHZ_NOT_PROVISIONED kNtErrorCapauthzNotProvisioned
+#define ERROR_CAPAUTHZ_NOT_AUTHORIZED kNtErrorCapauthzNotAuthorized
+#define ERROR_CAPAUTHZ_NO_POLICY kNtErrorCapauthzNoPolicy
+#define ERROR_CAPAUTHZ_DB_CORRUPTED kNtErrorCapauthzDbCorrupted
+#define ERROR_CAPAUTHZ_SCCD_INVALID_CATALOG kNtErrorCapauthzSccdInvalidCatalog
+#define ERROR_CAPAUTHZ_SCCD_NO_AUTH_ENTITY kNtErrorCapauthzSccdNoAuthEntity
+#define ERROR_CAPAUTHZ_SCCD_PARSE_ERROR kNtErrorCapauthzSccdParseError
+#define ERROR_CAPAUTHZ_SCCD_DEV_MODE_REQUIRED kNtErrorCapauthzSccdDevModeRequired
+#define ERROR_CAPAUTHZ_SCCD_NO_CAPABILITY_MATCH kNtErrorCapauthzSccdNoCapabilityMatch
+#define ERROR_PNP_QUERY_REMOVE_DEVICE_TIMEOUT kNtErrorPnpQueryRemoveDeviceTimeout
+#define ERROR_PNP_QUERY_REMOVE_RELATED_DEVICE_TIMEOUT kNtErrorPnpQueryRemoveRelatedDeviceTimeout
+#define ERROR_PNP_QUERY_REMOVE_UNRELATED_DEVICE_TIMEOUT kNtErrorPnpQueryRemoveUnrelatedDeviceTimeout
+#define ERROR_DEVICE_HARDWARE_ERROR kNtErrorDeviceHardwareError
+#define ERROR_INVALID_ADDRESS kNtErrorInvalidAddress
+#define ERROR_VRF_CFG_ENABLED kNtErrorVrfCfgEnabled
+#define ERROR_PARTITION_TERMINATING kNtErrorPartitionTerminating
+#define ERROR_USER_PROFILE_LOAD kNtErrorUserProfileLoad
+#define ERROR_ARITHMETIC_OVERFLOW kNtErrorArithmeticOverflow
+#define ERROR_PIPE_CONNECTED kNtErrorPipeConnected
+#define ERROR_PIPE_LISTENING kNtErrorPipeListening
+#define ERROR_VERIFIER_STOP kNtErrorVerifierStop
+#define ERROR_ABIOS_ERROR kNtErrorAbiosError
+#define ERROR_WX86_WARNING kNtErrorWx86Warning
+#define ERROR_WX86_ERROR kNtErrorWx86Error
+#define ERROR_TIMER_NOT_CANCELED kNtErrorTimerNotCanceled
+#define ERROR_UNWIND kNtErrorUnwind
+#define ERROR_BAD_STACK kNtErrorBadStack
+#define ERROR_INVALID_UNWIND_TARGET kNtErrorInvalidUnwindTarget
+#define ERROR_INVALID_PORT_ATTRIBUTES kNtErrorInvalidPortAttributes
+#define ERROR_PORT_MESSAGE_TOO_LONG kNtErrorPortMessageTooLong
+#define ERROR_INVALID_QUOTA_LOWER kNtErrorInvalidQuotaLower
+#define ERROR_DEVICE_ALREADY_ATTACHED kNtErrorDeviceAlreadyAttached
+#define ERROR_INSTRUCTION_MISALIGNMENT kNtErrorInstructionMisalignment
+#define ERROR_PROFILING_NOT_STARTED kNtErrorProfilingNotStarted
+#define ERROR_PROFILING_NOT_STOPPED kNtErrorProfilingNotStopped
+#define ERROR_COULD_NOT_INTERPRET kNtErrorCouldNotInterpret
+#define ERROR_PROFILING_AT_LIMIT kNtErrorProfilingAtLimit
+#define ERROR_CANT_WAIT kNtErrorCantWait
+#define ERROR_CANT_TERMINATE_SELF kNtErrorCantTerminateSelf
+#define ERROR_UNEXPECTED_MM_CREATE_ERR kNtErrorUnexpectedMmCreateErr
+#define ERROR_UNEXPECTED_MM_MAP_ERROR kNtErrorUnexpectedMmMapError
+#define ERROR_UNEXPECTED_MM_EXTEND_ERR kNtErrorUnexpectedMmExtendErr
+#define ERROR_BAD_FUNCTION_TABLE kNtErrorBadFunctionTable
+#define ERROR_NO_GUID_TRANSLATION kNtErrorNoGuidTranslation
+#define ERROR_INVALID_LDT_SIZE kNtErrorInvalidLdtSize
+#define ERROR_INVALID_LDT_OFFSET kNtErrorInvalidLdtOffset
+#define ERROR_INVALID_LDT_DESCRIPTOR kNtErrorInvalidLdtDescriptor
+#define ERROR_TOO_MANY_THREADS kNtErrorTooManyThreads
+#define ERROR_THREAD_NOT_IN_PROCESS kNtErrorThreadNotInProcess
+#define ERROR_PAGEFILE_QUOTA_EXCEEDED kNtErrorPagefileQuotaExceeded
+#define ERROR_LOGON_SERVER_CONFLICT kNtErrorLogonServerConflict
+#define ERROR_SYNCHRONIZATION_REQUIRED kNtErrorSynchronizationRequired
+#define ERROR_NET_OPEN_FAILED kNtErrorNetOpenFailed
+#define ERROR_IO_PRIVILEGE_FAILED kNtErrorIoPrivilegeFailed
+#define ERROR_CONTROL_CEXIT kNtErrorControlCExit
+#define ERROR_MISSING_SYSTEMFILE kNtErrorMissingSystemfile
+#define ERROR_UNHANDLED_EXCEPTION kNtErrorUnhandledException
+#define ERROR_APP_INIT_FAILURE kNtErrorAppInitFailure
+#define ERROR_PAGEFILE_CREATE_FAILED kNtErrorPagefileCreateFailed
+#define ERROR_INVALID_IMAGE_HASH kNtErrorInvalidImageHash
+#define ERROR_NO_PAGEFILE kNtErrorNoPagefile
+#define ERROR_ILLEGAL_FLOAT_CONTEXT kNtErrorIllegalFloatContext
+#define ERROR_NO_EVENT_PAIR kNtErrorNoEventPair
+#define ERROR_DOMAIN_CTRLR_CONFIG_ERROR kNtErrorDomainCtrlrConfigError
+#define ERROR_ILLEGAL_CHARACTER kNtErrorIllegalCharacter
+#define ERROR_UNDEFINED_CHARACTER kNtErrorUndefinedCharacter
+#define ERROR_FLOPPY_VOLUME kNtErrorFloppyVolume
+#define ERROR_BIOS_FAILED_TO_CONNECT_INTERRUPT kNtErrorBiosFailedToConnectInterrupt
+#define ERROR_BACKUP_CONTROLLER kNtErrorBackupController
+#define ERROR_MUTANT_LIMIT_EXCEEDED kNtErrorMutantLimitExceeded
+#define ERROR_FS_DRIVER_REQUIRED kNtErrorFsDriverRequired
+#define ERROR_CANNOT_LOAD_REGISTRY_FILE kNtErrorCannotLoadRegistryFile
+#define ERROR_DEBUG_ATTACH_FAILED kNtErrorDebugAttachFailed
+#define ERROR_SYSTEM_PROCESS_TERMINATED kNtErrorSystemProcessTerminated
+#define ERROR_DATA_NOT_ACCEPTED kNtErrorDataNotAccepted
+#define ERROR_VDM_HARD_ERROR kNtErrorVdmHardError
+#define ERROR_DRIVER_CANCEL_TIMEOUT kNtErrorDriverCancelTimeout
+#define ERROR_REPLY_MESSAGE_MISMATCH kNtErrorReplyMessageMismatch
+#define ERROR_LOST_WRITEBEHIND_DATA kNtErrorLostWritebehindData
+#define ERROR_CLIENT_SERVER_PARAMETERS_INVALID kNtErrorClientServerParametersInvalid
+#define ERROR_NOT_TINY_STREAM kNtErrorNotTinyStream
+#define ERROR_STACK_OVERFLOW_READ kNtErrorStackOverflowRead
+#define ERROR_CONVERT_TO_LARGE kNtErrorConvertToLarge
+#define ERROR_FOUND_OUT_OF_SCOPE kNtErrorFoundOutOfScope
+#define ERROR_ALLOCATE_BUCKET kNtErrorAllocateBucket
+#define ERROR_MARSHALL_OVERFLOW kNtErrorMarshallOverflow
+#define ERROR_INVALID_VARIANT kNtErrorInvalidVariant
+#define ERROR_BAD_COMPRESSION_BUFFER kNtErrorBadCompressionBuffer
+#define ERROR_AUDIT_FAILED kNtErrorAuditFailed
+#define ERROR_TIMER_RESOLUTION_NOT_SET kNtErrorTimerResolutionNotSet
+#define ERROR_INSUFFICIENT_LOGON_INFO kNtErrorInsufficientLogonInfo
+#define ERROR_BAD_DLL_ENTRYPOINT kNtErrorBadDllEntrypoint
+#define ERROR_BAD_SERVICE_ENTRYPOINT kNtErrorBadServiceEntrypoint
+#define ERROR_IP_ADDRESS_CONFLICT1 kNtErrorIpAddressConflict1
+#define ERROR_IP_ADDRESS_CONFLICT2 kNtErrorIpAddressConflict2
+#define ERROR_REGISTRY_QUOTA_LIMIT kNtErrorRegistryQuotaLimit
+#define ERROR_NO_CALLBACK_ACTIVE kNtErrorNoCallbackActive
+#define ERROR_PWD_TOO_SHORT kNtErrorPwdTooShort
+#define ERROR_PWD_TOO_RECENT kNtErrorPwdTooRecent
+#define ERROR_PWD_HISTORY_CONFLICT kNtErrorPwdHistoryConflict
+#define ERROR_UNSUPPORTED_COMPRESSION kNtErrorUnsupportedCompression
+#define ERROR_INVALID_HW_PROFILE kNtErrorInvalidHwProfile
+#define ERROR_INVALID_PLUGPLAY_DEVICE_PATH kNtErrorInvalidPlugplayDevicePath
+#define ERROR_QUOTA_LIST_INCONSISTENT kNtErrorQuotaListInconsistent
+#define ERROR_EVALUATION_EXPIRATION kNtErrorEvaluationExpiration
+#define ERROR_ILLEGAL_DLL_RELOCATION kNtErrorIllegalDllRelocation
+#define ERROR_DLL_INIT_FAILED_LOGOFF kNtErrorDllInitFailedLogoff
+#define ERROR_VALIDATE_CONTINUE kNtErrorValidateContinue
+#define ERROR_NO_MORE_MATCHES kNtErrorNoMoreMatches
+#define ERROR_RANGE_LIST_CONFLICT kNtErrorRangeListConflict
+#define ERROR_SERVER_SID_MISMATCH kNtErrorServerSidMismatch
+#define ERROR_CANT_ENABLE_DENY_ONLY kNtErrorCantEnableDenyOnly
+#define ERROR_FLOAT_MULTIPLE_FAULTS kNtErrorFloatMultipleFaults
+#define ERROR_FLOAT_MULTIPLE_TRAPS kNtErrorFloatMultipleTraps
+#define ERROR_NOINTERFACE kNtErrorNointerface
+#define ERROR_DRIVER_FAILED_SLEEP kNtErrorDriverFailedSleep
+#define ERROR_CORRUPT_SYSTEM_FILE kNtErrorCorruptSystemFile
+#define ERROR_COMMITMENT_MINIMUM kNtErrorCommitmentMinimum
+#define ERROR_PNP_RESTART_ENUMERATION kNtErrorPnpRestartEnumeration
+#define ERROR_SYSTEM_IMAGE_BAD_SIGNATURE kNtErrorSystemImageBadSignature
+#define ERROR_PNP_REBOOT_REQUIRED kNtErrorPnpRebootRequired
+#define ERROR_INSUFFICIENT_POWER kNtErrorInsufficientPower
+#define ERROR_MULTIPLE_FAULT_VIOLATION kNtErrorMultipleFaultViolation
+#define ERROR_SYSTEM_SHUTDOWN kNtErrorSystemShutdown
+#define ERROR_PORT_NOT_SET kNtErrorPortNotSet
+#define ERROR_DS_VERSION_CHECK_FAILURE kNtErrorDsVersionCheckFailure
+#define ERROR_RANGE_NOT_FOUND kNtErrorRangeNotFound
+#define ERROR_NOT_SAFE_MODE_DRIVER kNtErrorNotSafeModeDriver
+#define ERROR_FAILED_DRIVER_ENTRY kNtErrorFailedDriverEntry
+#define ERROR_DEVICE_ENUMERATION_ERROR kNtErrorDeviceEnumerationError
+#define ERROR_MOUNT_POINT_NOT_RESOLVED kNtErrorMountPointNotResolved
+#define ERROR_INVALID_DEVICE_OBJECT_PARAMETER kNtErrorInvalidDeviceObjectParameter
+#define ERROR_MCA_OCCURED kNtErrorMcaOccured
+#define ERROR_DRIVER_DATABASE_ERROR kNtErrorDriverDatabaseError
+#define ERROR_SYSTEM_HIVE_TOO_LARGE kNtErrorSystemHiveTooLarge
+#define ERROR_DRIVER_FAILED_PRIOR_UNLOAD kNtErrorDriverFailedPriorUnload
+#define ERROR_VOLSNAP_PREPARE_HIBERNATE kNtErrorVolsnapPrepareHibernate
+#define ERROR_HIBERNATION_FAILURE kNtErrorHibernationFailure
+#define ERROR_PWD_TOO_LONG kNtErrorPwdTooLong
+#define ERROR_FILE_SYSTEM_LIMITATION kNtErrorFileSystemLimitation
+#define ERROR_ASSERTION_FAILURE kNtErrorAssertionFailure
+#define ERROR_ACPI_ERROR kNtErrorAcpiError
+#define ERROR_WOW_ASSERTION kNtErrorWowAssertion
+#define ERROR_PNP_BAD_MPS_TABLE kNtErrorPnpBadMpsTable
+#define ERROR_PNP_TRANSLATION_FAILED kNtErrorPnpTranslationFailed
+#define ERROR_PNP_IRQ_TRANSLATION_FAILED kNtErrorPnpIrqTranslationFailed
+#define ERROR_PNP_INVALID_ID kNtErrorPnpInvalidId
+#define ERROR_WAKE_SYSTEM_DEBUGGER kNtErrorWakeSystemDebugger
+#define ERROR_HANDLES_CLOSED kNtErrorHandlesClosed
+#define ERROR_EXTRANEOUS_INFORMATION kNtErrorExtraneousInformation
+#define ERROR_RXACT_COMMIT_NECESSARY kNtErrorRxactCommitNecessary
+#define ERROR_MEDIA_CHECK kNtErrorMediaCheck
+#define ERROR_GUID_SUBSTITUTION_MADE kNtErrorGuidSubstitutionMade
+#define ERROR_STOPPED_ON_SYMLINK kNtErrorStoppedOnSymlink
+#define ERROR_LONGJUMP kNtErrorLongjump
+#define ERROR_PLUGPLAY_QUERY_VETOED kNtErrorPlugplayQueryVetoed
+#define ERROR_UNWIND_CONSOLIDATE kNtErrorUnwindConsolidate
+#define ERROR_REGISTRY_HIVE_RECOVERED kNtErrorRegistryHiveRecovered
+#define ERROR_DLL_MIGHT_BE_INSECURE kNtErrorDllMightBeInsecure
+#define ERROR_DLL_MIGHT_BE_INCOMPATIBLE kNtErrorDllMightBeIncompatible
+#define ERROR_DBG_EXCEPTION_NOT_HANDLED kNtErrorDbgExceptionNotHandled
+#define ERROR_DBG_REPLY_LATER kNtErrorDbgReplyLater
+#define ERROR_DBG_UNABLE_TO_PROVIDE_HANDLE kNtErrorDbgUnableToProvideHandle
+#define ERROR_DBG_TERMINATE_THREAD kNtErrorDbgTerminateThread
+#define ERROR_DBG_TERMINATE_PROCESS kNtErrorDbgTerminateProcess
+#define ERROR_DBG_CONTROL_C kNtErrorDbgControlC
+#define ERROR_DBG_PRINTEXCEPTION_C kNtErrorDbgPrintexceptionC
+#define ERROR_DBG_RIPEXCEPTION kNtErrorDbgRipexception
+#define ERROR_DBG_CONTROL_BREAK kNtErrorDbgControlBreak
+#define ERROR_DBG_COMMAND_EXCEPTION kNtErrorDbgCommandException
+#define ERROR_OBJECT_NAME_EXISTS kNtErrorObjectNameExists
+#define ERROR_THREAD_WAS_SUSPENDED kNtErrorThreadWasSuspended
+#define ERROR_IMAGE_NOT_AT_BASE kNtErrorImageNotAtBase
+#define ERROR_RXACT_STATE_CREATED kNtErrorRxactStateCreated
+#define ERROR_SEGMENT_NOTIFICATION kNtErrorSegmentNotification
+#define ERROR_BAD_CURRENT_DIRECTORY kNtErrorBadCurrentDirectory
+#define ERROR_FT_READ_RECOVERY_FROM_BACKUP kNtErrorFtReadRecoveryFromBackup
+#define ERROR_FT_WRITE_RECOVERY kNtErrorFtWriteRecovery
+#define ERROR_IMAGE_MACHINE_TYPE_MISMATCH kNtErrorImageMachineTypeMismatch
+#define ERROR_RECEIVE_PARTIAL kNtErrorReceivePartial
+#define ERROR_RECEIVE_EXPEDITED kNtErrorReceiveExpedited
+#define ERROR_RECEIVE_PARTIAL_EXPEDITED kNtErrorReceivePartialExpedited
+#define ERROR_EVENT_DONE kNtErrorEventDone
+#define ERROR_EVENT_PENDING kNtErrorEventPending
+#define ERROR_CHECKING_FILE_SYSTEM kNtErrorCheckingFileSystem
+#define ERROR_FATAL_APP_EXIT kNtErrorFatalAppExit
+#define ERROR_PREDEFINED_HANDLE kNtErrorPredefinedHandle
+#define ERROR_WAS_UNLOCKED kNtErrorWasUnlocked
+#define ERROR_SERVICE_NOTIFICATION kNtErrorServiceNotification
+#define ERROR_WAS_LOCKED kNtErrorWasLocked
+#define ERROR_LOG_HARD_ERROR kNtErrorLogHardError
+#define ERROR_ALREADY_WIN32 kNtErrorAlreadyWin32
+#define ERROR_IMAGE_MACHINE_TYPE_MISMATCH_EXE kNtErrorImageMachineTypeMismatchExe
+#define ERROR_NO_YIELD_PERFORMED kNtErrorNoYieldPerformed
+#define ERROR_TIMER_RESUME_IGNORED kNtErrorTimerResumeIgnored
+#define ERROR_ARBITRATION_UNHANDLED kNtErrorArbitrationUnhandled
+#define ERROR_CARDBUS_NOT_SUPPORTED kNtErrorCardbusNotSupported
+#define ERROR_MP_PROCESSOR_MISMATCH kNtErrorMpProcessorMismatch
+#define ERROR_HIBERNATED kNtErrorHibernated
+#define ERROR_RESUME_HIBERNATION kNtErrorResumeHibernation
+#define ERROR_FIRMWARE_UPDATED kNtErrorFirmwareUpdated
+#define ERROR_DRIVERS_LEAKING_LOCKED_PAGES kNtErrorDriversLeakingLockedPages
+#define ERROR_WAKE_SYSTEM kNtErrorWakeSystem
+#define ERRORWAIT_1 kNtErrorWait_1
+#define ERRORWAIT_2 kNtErrorWait_2
+#define ERRORWAIT_3 kNtErrorWait_3
+#define ERRORWAIT_63 kNtErrorWait_63
+#define ERRORABANDONEDWAIT_0 kNtErrorAbandonedWait_0
+#define ERRORABANDONEDWAIT_63 kNtErrorAbandonedWait_63
+#define ERROR_USER_APC kNtErrorUserApc
+#define ERROR_KERNEL_APC kNtErrorKernelApc
+#define ERROR_ALERTED kNtErrorAlerted
+#define ERROR_ELEVATION_REQUIRED kNtErrorElevationRequired
+#define ERROR_REPARSE kNtErrorReparse
+#define ERROR_OPLOCK_BREAK_IN_PROGRESS kNtErrorOplockBreakInProgress
+#define ERROR_VOLUME_MOUNTED kNtErrorVolumeMounted
+#define ERROR_RXACT_COMMITTED kNtErrorRxactCommitted
+#define ERROR_NOTIFY_CLEANUP kNtErrorNotifyCleanup
+#define ERROR_PRIMARY_TRANSPORT_CONNECT_FAILED kNtErrorPrimaryTransportConnectFailed
+#define ERROR_PAGE_FAULT_TRANSITION kNtErrorPageFaultTransition
+#define ERROR_PAGE_FAULT_DEMAND_ZERO kNtErrorPageFaultDemandZero
+#define ERROR_PAGE_FAULT_COPY_ON_WRITE kNtErrorPageFaultCopyOnWrite
+#define ERROR_PAGE_FAULT_GUARD_PAGE kNtErrorPageFaultGuardPage
+#define ERROR_PAGE_FAULT_PAGING_FILE kNtErrorPageFaultPagingFile
+#define ERROR_CACHE_PAGE_LOCKED kNtErrorCachePageLocked
+#define ERROR_CRASH_DUMP kNtErrorCrashDump
+#define ERROR_BUFFER_ALL_ZEROS kNtErrorBufferAllZeros
+#define ERROR_REPARSE_OBJECT kNtErrorReparseObject
+#define ERROR_RESOURCE_REQUIREMENTS_CHANGED kNtErrorResourceRequirementsChanged
+#define ERROR_TRANSLATION_COMPLETE kNtErrorTranslationComplete
+#define ERROR_NOTHING_TO_TERMINATE kNtErrorNothingToTerminate
+#define ERROR_PROCESS_NOT_IN_JOB kNtErrorProcessNotInJob
+#define ERROR_PROCESS_IN_JOB kNtErrorProcessInJob
+#define ERROR_VOLSNAP_HIBERNATE_READY kNtErrorVolsnapHibernateReady
+#define ERROR_FSFILTER_OP_COMPLETED_SUCCESSFULLY kNtErrorFsfilterOpCompletedSuccessfully
+#define ERROR_INTERRUPT_VECTOR_ALREADY_CONNECTED kNtErrorInterruptVectorAlreadyConnected
+#define ERROR_INTERRUPT_STILL_CONNECTED kNtErrorInterruptStillConnected
+#define ERROR_WAIT_FOR_OPLOCK kNtErrorWaitForOplock
+#define ERROR_DBG_EXCEPTION_HANDLED kNtErrorDbgExceptionHandled
+#define ERROR_DBG_CONTINUE kNtErrorDbgContinue
+#define ERROR_CALLBACK_POP_STACK kNtErrorCallbackPopStack
+#define ERROR_COMPRESSION_DISABLED kNtErrorCompressionDisabled
+#define ERROR_CANTFETCHBACKWARDS kNtErrorCantfetchbackwards
+#define ERROR_CANTSCROLLBACKWARDS kNtErrorCantscrollbackwards
+#define ERROR_ROWSNOTRELEASED kNtErrorRowsnotreleased
+#define ERROR_BAD_ACCESSOR_FLAGS kNtErrorBadAccessorFlags
+#define ERROR_ERRORS_ENCOUNTERED kNtErrorErrorsEncountered
+#define ERROR_NOT_CAPABLE kNtErrorNotCapable
+#define ERROR_REQUEST_OUT_OF_SEQUENCE kNtErrorRequestOutOfSequence
+#define ERROR_VERSION_PARSE_ERROR kNtErrorVersionParseError
+#define ERROR_BADSTARTPOSITION kNtErrorBadstartposition
+#define ERROR_MEMORY_HARDWARE kNtErrorMemoryHardware
+#define ERROR_DISK_REPAIR_DISABLED kNtErrorDiskRepairDisabled
+#define ERROR_INSUFFICIENT_RESOURCE_FOR_SPECIFIED_SHARED_SECTION_SIZE kNtErrorInsufficientResourceForSpecifiedSharedSectionSize
+#define ERROR_SYSTEM_POWERSTATE_TRANSITION kNtErrorSystemPowerstateTransition
+#define ERROR_SYSTEM_POWERSTATE_COMPLEX_TRANSITION kNtErrorSystemPowerstateComplexTransition
+#define ERROR_MCA_EXCEPTION kNtErrorMcaException
+#define ERROR_ACCESS_AUDIT_BY_POLICY kNtErrorAccessAuditByPolicy
+#define ERROR_ACCESS_DISABLED_NO_SAFER_UI_BY_POLICY kNtErrorAccessDisabledNoSaferUiByPolicy
+#define ERROR_ABANDON_HIBERFILE kNtErrorAbandonHiberfile
+#define ERROR_LOST_WRITEBEHIND_DATA_NETWORK_DISCONNECTED kNtErrorLostWritebehindDataNetworkDisconnected
+#define ERROR_LOST_WRITEBEHIND_DATA_NETWORK_SERVER_ERROR kNtErrorLostWritebehindDataNetworkServerError
+#define ERROR_LOST_WRITEBEHIND_DATA_LOCAL_DISK_ERROR kNtErrorLostWritebehindDataLocalDiskError
+#define ERROR_BAD_MCFG_TABLE kNtErrorBadMcfgTable
+#define ERROR_DISK_REPAIR_REDIRECTED kNtErrorDiskRepairRedirected
+#define ERROR_DISK_REPAIR_UNSUCCESSFUL kNtErrorDiskRepairUnsuccessful
+#define ERROR_CORRUPT_LOG_OVERFULL kNtErrorCorruptLogOverfull
+#define ERROR_CORRUPT_LOG_CORRUPTED kNtErrorCorruptLogCorrupted
+#define ERROR_CORRUPT_LOG_UNAVAILABLE kNtErrorCorruptLogUnavailable
+#define ERROR_CORRUPT_LOG_DELETED_FULL kNtErrorCorruptLogDeletedFull
+#define ERROR_CORRUPT_LOG_CLEARED kNtErrorCorruptLogCleared
+#define ERROR_ORPHAN_NAME_EXHAUSTED kNtErrorOrphanNameExhausted
+#define ERROR_OPLOCK_SWITCHED_TO_NEW_HANDLE kNtErrorOplockSwitchedToNewHandle
+#define ERROR_CANNOT_GRANT_REQUESTED_OPLOCK kNtErrorCannotGrantRequestedOplock
+#define ERROR_CANNOT_BREAK_OPLOCK kNtErrorCannotBreakOplock
+#define ERROR_OPLOCK_HANDLE_CLOSED kNtErrorOplockHandleClosed
+#define ERROR_NO_ACE_CONDITION kNtErrorNoAceCondition
+#define ERROR_INVALID_ACE_CONDITION kNtErrorInvalidAceCondition
+#define ERROR_FILE_HANDLE_REVOKED kNtErrorFileHandleRevoked
+#define ERROR_IMAGE_AT_DIFFERENT_BASE kNtErrorImageAtDifferentBase
+#define ERROR_ENCRYPTED_IO_NOT_POSSIBLE kNtErrorEncryptedIoNotPossible
+#define ERROR_FILE_METADATA_OPTIMIZATION_IN_PROGRESS kNtErrorFileMetadataOptimizationInProgress
+#define ERROR_QUOTA_ACTIVITY kNtErrorQuotaActivity
+#define ERROR_HANDLE_REVOKED kNtErrorHandleRevoked
+#define ERROR_CALLBACK_INVOKE_INLINE kNtErrorCallbackInvokeInline
+#define ERROR_CPU_SET_INVALID kNtErrorCpuSetInvalid
+#define ERROR_ENCLAVE_NOT_TERMINATED kNtErrorEnclaveNotTerminated
+#define ERROR_ENCLAVE_VIOLATION kNtErrorEnclaveViolation
+#define ERROR_EA_ACCESS_DENIED kNtErrorEaAccessDenied
+#define ERROR_OPERATION_ABORTED kNtErrorOperationAborted
+#define ERROR_IO_INCOMPLETE kNtErrorIoIncomplete
+#define ERROR_IO_PENDING kNtErrorIoPending
+#define ERROR_NOACCESS kNtErrorNoaccess
+#define ERROR_SWAPERROR kNtErrorSwaperror
+#define ERROR_STACK_OVERFLOW kNtErrorStackOverflow
+#define ERROR_INVALID_MESSAGE kNtErrorInvalidMessage
+#define ERROR_CAN_NOT_COMPLETE kNtErrorCanNotComplete
+#define ERROR_INVALID_FLAGS kNtErrorInvalidFlags
+#define ERROR_UNRECOGNIZED_VOLUME kNtErrorUnrecognizedVolume
+#define ERROR_FILE_INVALID kNtErrorFileInvalid
+#define ERROR_FULLSCREEN_MODE kNtErrorFullscreenMode
+#define ERROR_NO_TOKEN kNtErrorNoToken
+#define ERROR_BADDB kNtErrorBaddb
+#define ERROR_BADKEY kNtErrorBadkey
+#define ERROR_CANTOPEN kNtErrorCantopen
+#define ERROR_CANTREAD kNtErrorCantread
+#define ERROR_CANTWRITE kNtErrorCantwrite
+#define ERROR_REGISTRY_RECOVERED kNtErrorRegistryRecovered
+#define ERROR_REGISTRY_CORRUPT kNtErrorRegistryCorrupt
+#define ERROR_REGISTRY_IO_FAILED kNtErrorRegistryIoFailed
+#define ERROR_NOT_REGISTRY_FILE kNtErrorNotRegistryFile
+#define ERROR_KEY_DELETED kNtErrorKeyDeleted
+#define ERROR_NO_LOG_SPACE kNtErrorNoLogSpace
+#define ERROR_KEY_HAS_CHILDREN kNtErrorKeyHasChildren
+#define ERROR_CHILD_MUST_BE_VOLATILE kNtErrorChildMustBeVolatile
+#define ERROR_NOTIFY_ENUM_DIR kNtErrorNotifyEnumDir
+#define ERROR_DEPENDENT_SERVICES_RUNNING kNtErrorDependentServicesRunning
+#define ERROR_INVALID_SERVICE_CONTROL kNtErrorInvalidServiceControl
+#define ERROR_SERVICE_REQUEST_TIMEOUT kNtErrorServiceRequestTimeout
+#define ERROR_SERVICE_NO_THREAD kNtErrorServiceNoThread
+#define ERROR_SERVICE_DATABASE_LOCKED kNtErrorServiceDatabaseLocked
+#define ERROR_SERVICE_ALREADY_RUNNING kNtErrorServiceAlreadyRunning
+#define ERROR_INVALID_SERVICE_ACCOUNT kNtErrorInvalidServiceAccount
+#define ERROR_SERVICE_DISABLED kNtErrorServiceDisabled
+#define ERROR_CIRCULAR_DEPENDENCY kNtErrorCircularDependency
+#define ERROR_SERVICE_DOES_NOT_EXIST kNtErrorServiceDoesNotExist
+#define ERROR_SERVICE_CANNOT_ACCEPT_CTRL kNtErrorServiceCannotAcceptCtrl
+#define ERROR_SERVICE_NOT_ACTIVE kNtErrorServiceNotActive
+#define ERROR_FAILED_SERVICE_CONTROLLER_CONNECT kNtErrorFailedServiceControllerConnect
+#define ERROR_EXCEPTION_IN_SERVICE kNtErrorExceptionInService
+#define ERROR_DATABASE_DOES_NOT_EXIST kNtErrorDatabaseDoesNotExist
+#define ERROR_SERVICE_SPECIFIC_ERROR kNtErrorServiceSpecificError
+#define ERROR_PROCESS_ABORTED kNtErrorProcessAborted
+#define ERROR_SERVICE_DEPENDENCY_FAIL kNtErrorServiceDependencyFail
+#define ERROR_SERVICE_LOGON_FAILED kNtErrorServiceLogonFailed
+#define ERROR_SERVICE_START_HANG kNtErrorServiceStartHang
+#define ERROR_INVALID_SERVICE_LOCK kNtErrorInvalidServiceLock
+#define ERROR_SERVICE_MARKED_FOR_DELETE kNtErrorServiceMarkedForDelete
+#define ERROR_SERVICE_EXISTS kNtErrorServiceExists
+#define ERROR_ALREADY_RUNNING_LKG kNtErrorAlreadyRunningLkg
+#define ERROR_SERVICE_DEPENDENCY_DELETED kNtErrorServiceDependencyDeleted
+#define ERROR_BOOT_ALREADY_ACCEPTED kNtErrorBootAlreadyAccepted
+#define ERROR_SERVICE_NEVER_STARTED kNtErrorServiceNeverStarted
+#define ERROR_DUPLICATE_SERVICE_NAME kNtErrorDuplicateServiceName
+#define ERROR_DIFFERENT_SERVICE_ACCOUNT kNtErrorDifferentServiceAccount
+#define ERROR_CANNOT_DETECT_DRIVER_FAILURE kNtErrorCannotDetectDriverFailure
+#define ERROR_CANNOT_DETECT_PROCESS_ABORT kNtErrorCannotDetectProcessAbort
+#define ERROR_NO_RECOVERY_PROGRAM kNtErrorNoRecoveryProgram
+#define ERROR_SERVICE_NOT_IN_EXE kNtErrorServiceNotInExe
+#define ERROR_NOT_SAFEBOOT_SERVICE kNtErrorNotSafebootService
+#define ERROR_END_OF_MEDIA kNtErrorEndOfMedia
+#define ERROR_FILEMARK_DETECTED kNtErrorFilemarkDetected
+#define ERROR_BEGINNING_OF_MEDIA kNtErrorBeginningOfMedia
+#define ERROR_SETMARK_DETECTED kNtErrorSetmarkDetected
+#define ERROR_NO_DATA_DETECTED kNtErrorNoDataDetected
+#define ERROR_PARTITION_FAILURE kNtErrorPartitionFailure
+#define ERROR_INVALID_BLOCK_LENGTH kNtErrorInvalidBlockLength
+#define ERROR_DEVICE_NOT_PARTITIONED kNtErrorDeviceNotPartitioned
+#define ERROR_UNABLE_TO_LOCK_MEDIA kNtErrorUnableToLockMedia
+#define ERROR_UNABLE_TO_UNLOAD_MEDIA kNtErrorUnableToUnloadMedia
+#define ERROR_MEDIA_CHANGED kNtErrorMediaChanged
+#define ERROR_BUS_RESET kNtErrorBusReset
+#define ERROR_NO_MEDIA_IN_DRIVE kNtErrorNoMediaInDrive
+#define ERROR_NO_UNICODE_TRANSLATION kNtErrorNoUnicodeTranslation
+#define ERROR_DLL_INIT_FAILED kNtErrorDllInitFailed
+#define ERROR_SHUTDOWN_IN_PROGRESS kNtErrorShutdownInProgress
+#define ERROR_NO_SHUTDOWN_IN_PROGRESS kNtErrorNoShutdownInProgress
+#define ERROR_IO_DEVICE kNtErrorIoDevice
+#define ERROR_SERIAL_NO_DEVICE kNtErrorSerialNoDevice
+#define ERROR_IRQ_BUSY kNtErrorIrqBusy
+#define ERROR_MORE_WRITES kNtErrorMoreWrites
+#define ERROR_COUNTER_TIMEOUT kNtErrorCounterTimeout
+#define ERROR_FLOPPY_ID_MARK_NOT_FOUND kNtErrorFloppyIdMarkNotFound
+#define ERROR_FLOPPY_WRONG_CYLINDER kNtErrorFloppyWrongCylinder
+#define ERROR_FLOPPY_UNKNOWN_ERROR kNtErrorFloppyUnknownError
+#define ERROR_FLOPPY_BAD_REGISTERS kNtErrorFloppyBadRegisters
+#define ERROR_DISK_RECALIBRATE_FAILED kNtErrorDiskRecalibrateFailed
+#define ERROR_DISK_OPERATION_FAILED kNtErrorDiskOperationFailed
+#define ERROR_DISK_RESET_FAILED kNtErrorDiskResetFailed
+#define ERROR_EOM_OVERFLOW kNtErrorEomOverflow
+#define ERROR_NOT_ENOUGH_SERVER_MEMORY kNtErrorNotEnoughServerMemory
+#define ERROR_POSSIBLE_DEADLOCK kNtErrorPossibleDeadlock
+#define ERROR_MAPPED_ALIGNMENT kNtErrorMappedAlignment
+#define ERROR_SET_POWER_STATE_VETOED kNtErrorSetPowerStateVetoed
+#define ERROR_SET_POWER_STATE_FAILED kNtErrorSetPowerStateFailed
+#define ERROR_TOO_MANY_LINKS kNtErrorTooManyLinks
+#define ERROR_OLD_WIN_VERSION kNtErrorOldWinVersion
+#define ERROR_APP_WRONG_OS kNtErrorAppWrongOs
+#define ERROR_SINGLE_INSTANCE_APP kNtErrorSingleInstanceApp
+#define ERROR_RMODE_APP kNtErrorRmodeApp
+#define ERROR_INVALID_DLL kNtErrorInvalidDll
+#define ERROR_NO_ASSOCIATION kNtErrorNoAssociation
+#define ERROR_DDE_FAIL kNtErrorDdeFail
+#define ERROR_DLL_NOT_FOUND kNtErrorDllNotFound
+#define ERROR_NO_MORE_USER_HANDLES kNtErrorNoMoreUserHandles
+#define ERROR_MESSAGE_SYNC_ONLY kNtErrorMessageSyncOnly
+#define ERROR_SOURCE_ELEMENT_EMPTY kNtErrorSourceElementEmpty
+#define ERROR_DESTINATION_ELEMENT_FULL kNtErrorDestinationElementFull
+#define ERROR_ILLEGAL_ELEMENT_ADDRESS kNtErrorIllegalElementAddress
+#define ERROR_MAGAZINE_NOT_PRESENT kNtErrorMagazineNotPresent
+#define ERROR_DEVICE_REINITIALIZATION_NEEDED kNtErrorDeviceReinitializationNeeded
+#define ERROR_DEVICE_REQUIRES_CLEANING kNtErrorDeviceRequiresCleaning
+#define ERROR_DEVICE_DOOR_OPEN kNtErrorDeviceDoorOpen
+#define ERROR_DEVICE_NOT_CONNECTED kNtErrorDeviceNotConnected
+#define ERROR_NOT_FOUND kNtErrorNotFound
+#define ERROR_NO_MATCH kNtErrorNoMatch
+#define ERROR_SET_NOT_FOUND kNtErrorSetNotFound
+#define ERROR_POINT_NOT_FOUND kNtErrorPointNotFound
+#define ERROR_NO_TRACKING_SERVICE kNtErrorNoTrackingService
+#define ERROR_NO_VOLUME_ID kNtErrorNoVolumeId
+#define ERROR_UNABLE_TO_REMOVE_REPLACED kNtErrorUnableToRemoveReplaced
+#define ERROR_UNABLE_TO_MOVE_REPLACEMENT kNtErrorUnableToMoveReplacement
+#define ERROR_JOURNAL_DELETE_IN_PROGRESS kNtErrorJournalDeleteInProgress
+#define ERROR_JOURNAL_NOT_ACTIVE kNtErrorJournalNotActive
+#define ERROR_POTENTIAL_FILE_FOUND kNtErrorPotentialFileFound
+#define ERROR_JOURNAL_ENTRY_DELETED kNtErrorJournalEntryDeleted
+#define ERROR_SHUTDOWN_IS_SCHEDULED kNtErrorShutdownIsScheduled
+#define ERROR_SHUTDOWN_USERS_LOGGED_ON kNtErrorShutdownUsersLoggedOn
+#define ERROR_BAD_DEVICE kNtErrorBadDevice
+#define ERROR_CONNECTION_UNAVAIL kNtErrorConnectionUnavail
+#define ERROR_DEVICE_ALREADY_REMEMBERED kNtErrorDeviceAlreadyRemembered
+#define ERROR_NO_NET_OR_BAD_PATH kNtErrorNoNetOrBadPath
+#define ERROR_BAD_PROVIDER kNtErrorBadProvider
+#define ERROR_CANNOT_OPEN_PROFILE kNtErrorCannotOpenProfile
+#define ERROR_BAD_PROFILE kNtErrorBadProfile
+#define ERROR_NOT_CONTAINER kNtErrorNotContainer
+#define ERROR_EXTENDED_ERROR kNtErrorExtendedError
+#define ERROR_INVALID_GROUPNAME kNtErrorInvalidGroupname
+#define ERROR_INVALID_COMPUTERNAME kNtErrorInvalidComputername
+#define ERROR_INVALID_EVENTNAME kNtErrorInvalidEventname
+#define ERROR_INVALID_DOMAINNAME kNtErrorInvalidDomainname
+#define ERROR_INVALID_SERVICENAME kNtErrorInvalidServicename
+#define ERROR_INVALID_NETNAME kNtErrorInvalidNetname
+#define ERROR_INVALID_SHARENAME kNtErrorInvalidSharename
+#define ERROR_INVALID_PASSWORDNAME kNtErrorInvalidPasswordname
+#define ERROR_INVALID_MESSAGENAME kNtErrorInvalidMessagename
+#define ERROR_INVALID_MESSAGEDEST kNtErrorInvalidMessagedest
+#define ERROR_SESSION_CREDENTIAL_CONFLICT kNtErrorSessionCredentialConflict
+#define ERROR_REMOTE_SESSION_LIMIT_EXCEEDED kNtErrorRemoteSessionLimitExceeded
+#define ERROR_DUP_DOMAINNAME kNtErrorDupDomainname
+#define ERROR_NO_NETWORK kNtErrorNoNetwork
+#define ERROR_CANCELLED kNtErrorCancelled
+#define ERROR_USER_MAPPED_FILE kNtErrorUserMappedFile
+#define ERROR_CONNECTION_REFUSED kNtErrorConnectionRefused
+#define ERROR_GRACEFUL_DISCONNECT kNtErrorGracefulDisconnect
+#define ERROR_ADDRESS_ALREADY_ASSOCIATED kNtErrorAddressAlreadyAssociated
+#define ERROR_ADDRESS_NOT_ASSOCIATED kNtErrorAddressNotAssociated
+#define ERROR_CONNECTION_INVALID kNtErrorConnectionInvalid
+#define ERROR_CONNECTION_ACTIVE kNtErrorConnectionActive
+#define ERROR_NETWORK_UNREACHABLE kNtErrorNetworkUnreachable
+#define ERROR_HOST_UNREACHABLE kNtErrorHostUnreachable
+#define ERROR_PROTOCOL_UNREACHABLE kNtErrorProtocolUnreachable
+#define ERROR_PORT_UNREACHABLE kNtErrorPortUnreachable
+#define ERROR_REQUEST_ABORTED kNtErrorRequestAborted
+#define ERROR_CONNECTION_ABORTED kNtErrorConnectionAborted
+#define ERROR_RETRY kNtErrorRetry
+#define ERROR_CONNECTION_COUNT_LIMIT kNtErrorConnectionCountLimit
+#define ERROR_LOGIN_TIME_RESTRICTION kNtErrorLoginTimeRestriction
+#define ERROR_LOGIN_WKSTA_RESTRICTION kNtErrorLoginWkstaRestriction
+#define ERROR_INCORRECT_ADDRESS kNtErrorIncorrectAddress
+#define ERROR_ALREADY_REGISTERED kNtErrorAlreadyRegistered
+#define ERROR_SERVICE_NOT_FOUND kNtErrorServiceNotFound
+#define ERROR_NOT_AUTHENTICATED kNtErrorNotAuthenticated
+#define ERROR_NOT_LOGGED_ON kNtErrorNotLoggedOn
+#define ERROR_CONTINUE kNtErrorContinue
+#define ERROR_ALREADY_INITIALIZED kNtErrorAlreadyInitialized
+#define ERROR_NO_MORE_DEVICES kNtErrorNoMoreDevices
+#define ERROR_NO_SUCH_SITE kNtErrorNoSuchSite
+#define ERROR_DOMAIN_CONTROLLER_EXISTS kNtErrorDomainControllerExists
+#define ERROR_ONLY_IF_CONNECTED kNtErrorOnlyIfConnected
+#define ERROR_OVERRIDE_NOCHANGES kNtErrorOverrideNochanges
+#define ERROR_BAD_USER_PROFILE kNtErrorBadUserProfile
+#define ERROR_NOT_SUPPORTED_ON_SBS kNtErrorNotSupportedOnSbs
+#define ERROR_SERVER_SHUTDOWN_IN_PROGRESS kNtErrorServerShutdownInProgress
+#define ERROR_HOST_DOWN kNtErrorHostDown
+#define ERROR_NON_ACCOUNT_SID kNtErrorNonAccountSid
+#define ERROR_NON_DOMAIN_SID kNtErrorNonDomainSid
+#define ERROR_APPHELP_BLOCK kNtErrorApphelpBlock
+#define ERROR_ACCESS_DISABLED_BY_POLICY kNtErrorAccessDisabledByPolicy
+#define ERROR_REG_NAT_CONSUMPTION kNtErrorRegNatConsumption
+#define ERROR_CSCSHARE_OFFLINE kNtErrorCscshareOffline
+#define ERROR_PKINIT_FAILURE kNtErrorPkinitFailure
+#define ERROR_SMARTCARD_SUBSYSTEM_FAILURE kNtErrorSmartcardSubsystemFailure
+#define ERROR_DOWNGRADE_DETECTED kNtErrorDowngradeDetected
+#define ERROR_MACHINE_LOCKED kNtErrorMachineLocked
+#define ERROR_SMB_GUEST_LOGON_BLOCKED kNtErrorSmbGuestLogonBlocked
+#define ERROR_CALLBACK_SUPPLIED_INVALID_DATA kNtErrorCallbackSuppliedInvalidData
+#define ERROR_SYNC_FOREGROUND_REFRESH_REQUIRED kNtErrorSyncForegroundRefreshRequired
+#define ERROR_DRIVER_BLOCKED kNtErrorDriverBlocked
+#define ERROR_INVALID_IMPORT_OF_NON_DLL kNtErrorInvalidImportOfNonDll
+#define ERROR_ACCESS_DISABLED_WEBBLADE kNtErrorAccessDisabledWebblade
+#define ERROR_ACCESS_DISABLED_WEBBLADE_TAMPER kNtErrorAccessDisabledWebbladeTamper
+#define ERROR_RECOVERY_FAILURE kNtErrorRecoveryFailure
+#define ERROR_ALREADY_FIBER kNtErrorAlreadyFiber
+#define ERROR_ALREADY_THREAD kNtErrorAlreadyThread
+#define ERROR_STACK_BUFFER_OVERRUN kNtErrorStackBufferOverrun
+#define ERROR_PARAMETER_QUOTA_EXCEEDED kNtErrorParameterQuotaExceeded
+#define ERROR_DEBUGGER_INACTIVE kNtErrorDebuggerInactive
+#define ERROR_DELAY_LOAD_FAILED kNtErrorDelayLoadFailed
+#define ERROR_VDM_DISALLOWED kNtErrorVdmDisallowed
+#define ERROR_UNIDENTIFIED_ERROR kNtErrorUnidentifiedError
+#define ERROR_INVALID_CRUNTIME_PARAMETER kNtErrorInvalidCruntimeParameter
+#define ERROR_BEYOND_VDL kNtErrorBeyondVdl
+#define ERROR_INCOMPATIBLE_SERVICE_SID_TYPE kNtErrorIncompatibleServiceSidType
+#define ERROR_DRIVER_PROCESS_TERMINATED kNtErrorDriverProcessTerminated
+#define ERROR_IMPLEMENTATION_LIMIT kNtErrorImplementationLimit
+#define ERROR_PROCESS_IS_PROTECTED kNtErrorProcessIsProtected
+#define ERROR_SERVICE_NOTIFY_CLIENT_LAGGING kNtErrorServiceNotifyClientLagging
+#define ERROR_DISK_QUOTA_EXCEEDED kNtErrorDiskQuotaExceeded
+#define ERROR_CONTENT_BLOCKED kNtErrorContentBlocked
+#define ERROR_INCOMPATIBLE_SERVICE_PRIVILEGE kNtErrorIncompatibleServicePrivilege
+#define ERROR_APP_HANG kNtErrorAppHang
+#define ERROR_INVALID_LABEL kNtErrorInvalidLabel
+#define ERROR_NOT_ALL_ASSIGNED kNtErrorNotAllAssigned
+#define ERROR_SOME_NOT_MAPPED kNtErrorSomeNotMapped
+#define ERROR_NO_QUOTAS_FOR_ACCOUNT kNtErrorNoQuotasForAccount
+#define ERROR_LOCAL_USER_SESSION_KEY kNtErrorLocalUserSessionKey
+#define ERROR_NULL_LM_PASSWORD kNtErrorNullLmPassword
+#define ERROR_UNKNOWN_REVISION kNtErrorUnknownRevision
+#define ERROR_REVISION_MISMATCH kNtErrorRevisionMismatch
+#define ERROR_INVALID_OWNER kNtErrorInvalidOwner
+#define ERROR_INVALID_PRIMARY_GROUP kNtErrorInvalidPrimaryGroup
+#define ERROR_NO_IMPERSONATION_TOKEN kNtErrorNoImpersonationToken
+#define ERROR_CANT_DISABLE_MANDATORY kNtErrorCantDisableMandatory
+#define ERROR_NO_LOGON_SERVERS kNtErrorNoLogonServers
+#define ERROR_NO_SUCH_LOGON_SESSION kNtErrorNoSuchLogonSession
+#define ERROR_NO_SUCH_PRIVILEGE kNtErrorNoSuchPrivilege
+#define ERROR_PRIVILEGE_NOT_HELD kNtErrorPrivilegeNotHeld
+#define ERROR_INVALID_ACCOUNT_NAME kNtErrorInvalidAccountName
+#define ERROR_USER_EXISTS kNtErrorUserExists
+#define ERROR_NO_SUCH_USER kNtErrorNoSuchUser
+#define ERROR_GROUP_EXISTS kNtErrorGroupExists
+#define ERROR_NO_SUCH_GROUP kNtErrorNoSuchGroup
+#define ERROR_MEMBER_IN_GROUP kNtErrorMemberInGroup
+#define ERROR_MEMBER_NOT_IN_GROUP kNtErrorMemberNotInGroup
+#define ERROR_LAST_ADMIN kNtErrorLastAdmin
+#define ERROR_WRONG_PASSWORD kNtErrorWrongPassword
+#define ERROR_ILL_FORMED_PASSWORD kNtErrorIllFormedPassword
+#define ERROR_PASSWORD_RESTRICTION kNtErrorPasswordRestriction
+#define ERROR_LOGON_FAILURE kNtErrorLogonFailure
+#define ERROR_ACCOUNT_RESTRICTION kNtErrorAccountRestriction
+#define ERROR_INVALID_LOGON_HOURS kNtErrorInvalidLogonHours
+#define ERROR_INVALID_WORKSTATION kNtErrorInvalidWorkstation
+#define ERROR_PASSWORD_EXPIRED kNtErrorPasswordExpired
+#define ERROR_ACCOUNT_DISABLED kNtErrorAccountDisabled
+#define ERROR_NONE_MAPPED kNtErrorNoneMapped
+#define ERROR_TOO_MANY_LUIDS_REQUESTED kNtErrorTooManyLuidsRequested
+#define ERROR_LUIDS_EXHAUSTED kNtErrorLuidsExhausted
+#define ERROR_INVALID_SUB_AUTHORITY kNtErrorInvalidSubAuthority
+#define ERROR_INVALID_ACL kNtErrorInvalidAcl
+#define ERROR_INVALID_SID kNtErrorInvalidSid
+#define ERROR_INVALID_SECURITY_DESCR kNtErrorInvalidSecurityDescr
+#define ERROR_BAD_INHERITANCE_ACL kNtErrorBadInheritanceAcl
+#define ERROR_SERVER_DISABLED kNtErrorServerDisabled
+#define ERROR_SERVER_NOT_DISABLED kNtErrorServerNotDisabled
+#define ERROR_INVALID_ID_AUTHORITY kNtErrorInvalidIdAuthority
+#define ERROR_ALLOTTED_SPACE_EXCEEDED kNtErrorAllottedSpaceExceeded
+#define ERROR_INVALID_GROUP_ATTRIBUTES kNtErrorInvalidGroupAttributes
+#define ERROR_BAD_IMPERSONATION_LEVEL kNtErrorBadImpersonationLevel
+#define ERROR_CANT_OPEN_ANONYMOUS kNtErrorCantOpenAnonymous
+#define ERROR_BAD_VALIDATION_CLASS kNtErrorBadValidationClass
+#define ERROR_BAD_TOKEN_TYPE kNtErrorBadTokenType
+#define ERROR_NO_SECURITY_ON_OBJECT kNtErrorNoSecurityOnObject
+#define ERROR_CANT_ACCESS_DOMAIN_INFO kNtErrorCantAccessDomainInfo
+#define ERROR_INVALID_SERVER_STATE kNtErrorInvalidServerState
+#define ERROR_INVALID_DOMAIN_STATE kNtErrorInvalidDomainState
+#define ERROR_INVALID_DOMAIN_ROLE kNtErrorInvalidDomainRole
+#define ERROR_NO_SUCH_DOMAIN kNtErrorNoSuchDomain
+#define ERROR_DOMAIN_EXISTS kNtErrorDomainExists
+#define ERROR_DOMAIN_LIMIT_EXCEEDED kNtErrorDomainLimitExceeded
+#define ERROR_INTERNAL_DB_CORRUPTION kNtErrorInternalDbCorruption
+#define ERROR_INTERNAL_ERROR kNtErrorInternalError
+#define ERROR_GENERIC_NOT_MAPPED kNtErrorGenericNotMapped
+#define ERROR_BAD_DESCRIPTOR_FORMAT kNtErrorBadDescriptorFormat
+#define ERROR_NOT_LOGON_PROCESS kNtErrorNotLogonProcess
+#define ERROR_LOGON_SESSION_EXISTS kNtErrorLogonSessionExists
+#define ERROR_NO_SUCH_PACKAGE kNtErrorNoSuchPackage
+#define ERROR_BAD_LOGON_SESSION_STATE kNtErrorBadLogonSessionState
+#define ERROR_LOGON_SESSION_COLLISION kNtErrorLogonSessionCollision
+#define ERROR_INVALID_LOGON_TYPE kNtErrorInvalidLogonType
+#define ERROR_CANNOT_IMPERSONATE kNtErrorCannotImpersonate
+#define ERROR_RXACT_INVALID_STATE kNtErrorRxactInvalidState
+#define ERROR_RXACT_COMMIT_FAILURE kNtErrorRxactCommitFailure
+#define ERROR_SPECIAL_ACCOUNT kNtErrorSpecialAccount
+#define ERROR_SPECIAL_GROUP kNtErrorSpecialGroup
+#define ERROR_SPECIAL_USER kNtErrorSpecialUser
+#define ERROR_MEMBERS_PRIMARY_GROUP kNtErrorMembersPrimaryGroup
+#define ERROR_TOKEN_ALREADY_IN_USE kNtErrorTokenAlreadyInUse
+#define ERROR_NO_SUCH_ALIAS kNtErrorNoSuchAlias
+#define ERROR_MEMBER_NOT_IN_ALIAS kNtErrorMemberNotInAlias
+#define ERROR_MEMBER_IN_ALIAS kNtErrorMemberInAlias
+#define ERROR_ALIAS_EXISTS kNtErrorAliasExists
+#define ERROR_LOGON_NOT_GRANTED kNtErrorLogonNotGranted
+#define ERROR_TOO_MANY_SECRETS kNtErrorTooManySecrets
+#define ERROR_SECRET_TOO_LONG kNtErrorSecretTooLong
+#define ERROR_INTERNAL_DB_ERROR kNtErrorInternalDbError
+#define ERROR_TOO_MANY_CONTEXT_IDS kNtErrorTooManyContextIds
+#define ERROR_LOGON_TYPE_NOT_GRANTED kNtErrorLogonTypeNotGranted
+#define ERROR_NT_CROSS_ENCRYPTION_REQUIRED kNtErrorNtCrossEncryptionRequired
+#define ERROR_NO_SUCH_MEMBER kNtErrorNoSuchMember
+#define ERROR_INVALID_MEMBER kNtErrorInvalidMember
+#define ERROR_TOO_MANY_SIDS kNtErrorTooManySids
+#define ERROR_LM_CROSS_ENCRYPTION_REQUIRED kNtErrorLmCrossEncryptionRequired
+#define ERROR_NO_INHERITANCE kNtErrorNoInheritance
+#define ERROR_FILE_CORRUPT kNtErrorFileCorrupt
+#define ERROR_DISK_CORRUPT kNtErrorDiskCorrupt
+#define ERROR_NO_USER_SESSION_KEY kNtErrorNoUserSessionKey
+#define ERROR_LICENSE_QUOTA_EXCEEDED kNtErrorLicenseQuotaExceeded
+#define ERROR_WRONG_TARGET_NAME kNtErrorWrongTargetName
+#define ERROR_MUTUAL_AUTH_FAILED kNtErrorMutualAuthFailed
+#define ERROR_TIME_SKEW kNtErrorTimeSkew
+#define ERROR_CURRENT_DOMAIN_NOT_ALLOWED kNtErrorCurrentDomainNotAllowed
+#define ERROR_INVALID_WINDOW_HANDLE kNtErrorInvalidWindowHandle
+#define ERROR_INVALID_MENU_HANDLE kNtErrorInvalidMenuHandle
+#define ERROR_INVALID_CURSOR_HANDLE kNtErrorInvalidCursorHandle
+#define ERROR_INVALID_ACCEL_HANDLE kNtErrorInvalidAccelHandle
+#define ERROR_INVALID_HOOK_HANDLE kNtErrorInvalidHookHandle
+#define ERROR_INVALID_DWP_HANDLE kNtErrorInvalidDwpHandle
+#define ERROR_TLW_WITH_WSCHILD kNtErrorTlwWithWschild
+#define ERROR_CANNOT_FIND_WND_CLASS kNtErrorCannotFindWndClass
+#define ERROR_WINDOW_OF_OTHER_THREAD kNtErrorWindowOfOtherThread
+#define ERROR_HOTKEY_ALREADY_REGISTERED kNtErrorHotkeyAlreadyRegistered
+#define ERROR_CLASS_ALREADY_EXISTS kNtErrorClassAlreadyExists
+#define ERROR_CLASS_DOES_NOT_EXIST kNtErrorClassDoesNotExist
+#define ERROR_CLASS_HAS_WINDOWS kNtErrorClassHasWindows
+#define ERROR_INVALID_INDEX kNtErrorInvalidIndex
+#define ERROR_INVALID_ICON_HANDLE kNtErrorInvalidIconHandle
+#define ERROR_PRIVATE_DIALOG_INDEX kNtErrorPrivateDialogIndex
+#define ERROR_LISTBOX_ID_NOT_FOUND kNtErrorListboxIdNotFound
+#define ERROR_NO_WILDCARD_CHARACTERS kNtErrorNoWildcardCharacters
+#define ERROR_CLIPBOARD_NOT_OPEN kNtErrorClipboardNotOpen
+#define ERROR_HOTKEY_NOT_REGISTERED kNtErrorHotkeyNotRegistered
+#define ERROR_WINDOW_NOT_DIALOG kNtErrorWindowNotDialog
+#define ERROR_CONTROL_ID_NOT_FOUND kNtErrorControlIdNotFound
+#define ERROR_INVALID_COMBOBOX_MESSAGE kNtErrorInvalidComboboxMessage
+#define ERROR_WINDOW_NOT_COMBOBOX kNtErrorWindowNotCombobox
+#define ERROR_INVALID_EDIT_HEIGHT kNtErrorInvalidEditHeight
+#define ERROR_DC_NOT_FOUND kNtErrorDcNotFound
+#define ERROR_INVALID_HOOK_FILTER kNtErrorInvalidHookFilter
+#define ERROR_INVALID_FILTER_PROC kNtErrorInvalidFilterProc
+#define ERROR_HOOK_NEEDS_HMOD kNtErrorHookNeedsHmod
+#define ERROR_GLOBAL_ONLY_HOOK kNtErrorGlobalOnlyHook
+#define ERROR_JOURNAL_HOOK_SET kNtErrorJournalHookSet
+#define ERROR_HOOK_NOT_INSTALLED kNtErrorHookNotInstalled
+#define ERROR_INVALID_LB_MESSAGE kNtErrorInvalidLbMessage
+#define ERROR_SETCOUNT_ON_BAD_LB kNtErrorSetcountOnBadLb
+#define ERROR_LB_WITHOUT_TABSTOPS kNtErrorLbWithoutTabstops
+#define ERROR_DESTROY_OBJECT_OF_OTHER_THREAD kNtErrorDestroyObjectOfOtherThread
+#define ERROR_CHILD_WINDOW_MENU kNtErrorChildWindowMenu
+#define ERROR_NO_SYSTEM_MENU kNtErrorNoSystemMenu
+#define ERROR_INVALID_MSGBOX_STYLE kNtErrorInvalidMsgboxStyle
+#define ERROR_INVALID_SPI_VALUE kNtErrorInvalidSpiValue
+#define ERROR_SCREEN_ALREADY_LOCKED kNtErrorScreenAlreadyLocked
+#define ERROR_HWNDS_HAVE_DIFF_PARENT kNtErrorHwndsHaveDiffParent
+#define ERROR_NOT_CHILD_WINDOW kNtErrorNotChildWindow
+#define ERROR_INVALID_GW_COMMAND kNtErrorInvalidGwCommand
+#define ERROR_INVALID_THREAD_ID kNtErrorInvalidThreadId
+#define ERROR_NON_MDICHILD_WINDOW kNtErrorNonMdichildWindow
+#define ERROR_POPUP_ALREADY_ACTIVE kNtErrorPopupAlreadyActive
+#define ERROR_NO_SCROLLBARS kNtErrorNoScrollbars
+#define ERROR_INVALID_SCROLLBAR_RANGE kNtErrorInvalidScrollbarRange
+#define ERROR_INVALID_SHOWWIN_COMMAND kNtErrorInvalidShowwinCommand
+#define ERROR_NO_SYSTEM_RESOURCES kNtErrorNoSystemResources
+#define ERROR_NONPAGED_SYSTEM_RESOURCES kNtErrorNonpagedSystemResources
+#define ERROR_PAGED_SYSTEM_RESOURCES kNtErrorPagedSystemResources
+#define ERROR_WORKING_SET_QUOTA kNtErrorWorkingSetQuota
+#define ERROR_PAGEFILE_QUOTA kNtErrorPagefileQuota
+#define ERROR_COMMITMENT_LIMIT kNtErrorCommitmentLimit
+#define ERROR_MENU_ITEM_NOT_FOUND kNtErrorMenuItemNotFound
+#define ERROR_INVALID_KEYBOARD_HANDLE kNtErrorInvalidKeyboardHandle
+#define ERROR_HOOK_TYPE_NOT_ALLOWED kNtErrorHookTypeNotAllowed
+#define ERROR_REQUIRES_INTERACTIVE_WINDOWSTATION kNtErrorRequiresInteractiveWindowstation
+#define ERROR_TIMEOUT kNtErrorTimeout
+#define ERROR_INVALID_MONITOR_HANDLE kNtErrorInvalidMonitorHandle
+#define ERROR_INCORRECT_SIZE kNtErrorIncorrectSize
+#define ERROR_SYMLINK_CLASS_DISABLED kNtErrorSymlinkClassDisabled
+#define ERROR_SYMLINK_NOT_SUPPORTED kNtErrorSymlinkNotSupported
+#define ERROR_XML_PARSE_ERROR kNtErrorXmlParseError
+#define ERROR_XMLDSIG_ERROR kNtErrorXmldsigError
+#define ERROR_RESTART_APPLICATION kNtErrorRestartApplication
+#define ERROR_WRONG_COMPARTMENT kNtErrorWrongCompartment
+#define ERROR_AUTHIP_FAILURE kNtErrorAuthipFailure
+#define ERROR_NO_NVRAM_RESOURCES kNtErrorNoNvramResources
+#define ERROR_NOT_GUI_PROCESS kNtErrorNotGuiProcess
+#define ERROR_EVENTLOG_FILE_CORRUPT kNtErrorEventlogFileCorrupt
+#define ERROR_EVENTLOG_CANT_START kNtErrorEventlogCantStart
+#define ERROR_LOG_FILE_FULL kNtErrorLogFileFull
+#define ERROR_EVENTLOG_FILE_CHANGED kNtErrorEventlogFileChanged
+#define ERROR_CONTAINER_ASSIGNED kNtErrorContainerAssigned
+#define ERROR_JOB_NO_CONTAINER kNtErrorJobNoContainer
+#define ERROR_INVALID_TASK_NAME kNtErrorInvalidTaskName
+#define ERROR_INVALID_TASK_INDEX kNtErrorInvalidTaskIndex
+#define ERROR_THREAD_ALREADY_IN_TASK kNtErrorThreadAlreadyInTask
+#define ERROR_INSTALL_SERVICE_FAILURE kNtErrorInstallServiceFailure
+#define ERROR_INSTALL_USEREXIT kNtErrorInstallUserexit
+#define ERROR_INSTALL_FAILURE kNtErrorInstallFailure
+#define ERROR_INSTALL_SUSPEND kNtErrorInstallSuspend
+#define ERROR_UNKNOWN_PRODUCT kNtErrorUnknownProduct
+#define ERROR_UNKNOWN_FEATURE kNtErrorUnknownFeature
+#define ERROR_UNKNOWN_COMPONENT kNtErrorUnknownComponent
+#define ERROR_UNKNOWN_PROPERTY kNtErrorUnknownProperty
+#define ERROR_INVALID_HANDLE_STATE kNtErrorInvalidHandleState
+#define ERROR_BAD_CONFIGURATION kNtErrorBadConfiguration
+#define ERROR_INDEX_ABSENT kNtErrorIndexAbsent
+#define ERROR_INSTALL_SOURCE_ABSENT kNtErrorInstallSourceAbsent
+#define ERROR_INSTALL_PACKAGE_VERSION kNtErrorInstallPackageVersion
+#define ERROR_PRODUCT_UNINSTALLED kNtErrorProductUninstalled
+#define ERROR_BAD_QUERY_SYNTAX kNtErrorBadQuerySyntax
+#define ERROR_INVALID_FIELD kNtErrorInvalidField
+#define ERROR_DEVICE_REMOVED kNtErrorDeviceRemoved
+#define ERROR_INSTALL_ALREADY_RUNNING kNtErrorInstallAlreadyRunning
+#define ERROR_INSTALL_PACKAGE_OPEN_FAILED kNtErrorInstallPackageOpenFailed
+#define ERROR_INSTALL_PACKAGE_INVALID kNtErrorInstallPackageInvalid
+#define ERROR_INSTALL_UI_FAILURE kNtErrorInstallUiFailure
+#define ERROR_INSTALL_LOG_FAILURE kNtErrorInstallLogFailure
+#define ERROR_INSTALL_LANGUAGE_UNSUPPORTED kNtErrorInstallLanguageUnsupported
+#define ERROR_INSTALL_TRANSFORM_FAILURE kNtErrorInstallTransformFailure
+#define ERROR_INSTALL_PACKAGE_REJECTED kNtErrorInstallPackageRejected
+#define ERROR_FUNCTION_NOT_CALLED kNtErrorFunctionNotCalled
+#define ERROR_FUNCTION_FAILED kNtErrorFunctionFailed
+#define ERROR_INVALID_TABLE kNtErrorInvalidTable
+#define ERROR_DATATYPE_MISMATCH kNtErrorDatatypeMismatch
+#define ERROR_UNSUPPORTED_TYPE kNtErrorUnsupportedType
+#define ERROR_CREATE_FAILED kNtErrorCreateFailed
+#define ERROR_INSTALL_TEMP_UNWRITABLE kNtErrorInstallTempUnwritable
+#define ERROR_INSTALL_PLATFORM_UNSUPPORTED kNtErrorInstallPlatformUnsupported
+#define ERROR_INSTALL_NOTUSED kNtErrorInstallNotused
+#define ERROR_PATCH_PACKAGE_OPEN_FAILED kNtErrorPatchPackageOpenFailed
+#define ERROR_PATCH_PACKAGE_INVALID kNtErrorPatchPackageInvalid
+#define ERROR_PATCH_PACKAGE_UNSUPPORTED kNtErrorPatchPackageUnsupported
+#define ERROR_PRODUCT_VERSION kNtErrorProductVersion
+#define ERROR_INVALID_COMMAND_LINE kNtErrorInvalidCommandLine
+#define ERROR_INSTALL_REMOTE_DISALLOWED kNtErrorInstallRemoteDisallowed
+#define ERROR_SUCCESS_REBOOT_INITIATED kNtErrorSuccessRebootInitiated
+#define ERROR_PATCH_TARGET_NOT_FOUND kNtErrorPatchTargetNotFound
+#define ERROR_PATCH_PACKAGE_REJECTED kNtErrorPatchPackageRejected
+#define ERROR_INSTALL_TRANSFORM_REJECTED kNtErrorInstallTransformRejected
+#define ERROR_INSTALL_REMOTE_PROHIBITED kNtErrorInstallRemoteProhibited
+#define ERROR_PATCH_REMOVAL_UNSUPPORTED kNtErrorPatchRemovalUnsupported
+#define ERROR_UNKNOWN_PATCH kNtErrorUnknownPatch
+#define ERROR_PATCH_NO_SEQUENCE kNtErrorPatchNoSequence
+#define ERROR_PATCH_REMOVAL_DISALLOWED kNtErrorPatchRemovalDisallowed
+#define ERROR_INVALID_PATCH_XML kNtErrorInvalidPatchXml
+#define ERROR_PATCH_MANAGED_ADVERTISED_PRODUCT kNtErrorPatchManagedAdvertisedProduct
+#define ERROR_INSTALL_SERVICE_SAFEBOOT kNtErrorInstallServiceSafeboot
+#define ERROR_FAIL_FAST_EXCEPTION kNtErrorFailFastException
+#define ERROR_INSTALL_REJECTED kNtErrorInstallRejected
+#define ERROR_DYNAMIC_CODE_BLOCKED kNtErrorDynamicCodeBlocked
+#define ERROR_NOT_SAME_OBJECT kNtErrorNotSameObject
+#define ERROR_STRICT_CFG_VIOLATION kNtErrorStrictCfgViolation
+#define ERROR_SET_CONTEXT_DENIED kNtErrorSetContextDenied
+#define ERROR_CROSS_PARTITION_VIOLATION kNtErrorCrossPartitionViolation
+#define ERROR_INVALID_USER_BUFFER kNtErrorInvalidUserBuffer
+#define ERROR_UNRECOGNIZED_MEDIA kNtErrorUnrecognizedMedia
+#define ERROR_NO_TRUST_LSA_SECRET kNtErrorNoTrustLsaSecret
+#define ERROR_NO_TRUST_SAM_ACCOUNT kNtErrorNoTrustSamAccount
+#define ERROR_TRUSTED_DOMAIN_FAILURE kNtErrorTrustedDomainFailure
+#define ERROR_TRUSTED_RELATIONSHIP_FAILURE kNtErrorTrustedRelationshipFailure
+#define ERROR_TRUST_FAILURE kNtErrorTrustFailure
+#define ERROR_NETLOGON_NOT_STARTED kNtErrorNetlogonNotStarted
+#define ERROR_ACCOUNT_EXPIRED kNtErrorAccountExpired
+#define ERROR_REDIRECTOR_HAS_OPEN_HANDLES kNtErrorRedirectorHasOpenHandles
+#define ERROR_PRINTER_DRIVER_ALREADY_INSTALLED kNtErrorPrinterDriverAlreadyInstalled
+#define ERROR_UNKNOWN_PORT kNtErrorUnknownPort
+#define ERROR_UNKNOWN_PRINTER_DRIVER kNtErrorUnknownPrinterDriver
+#define ERROR_UNKNOWN_PRINTPROCESSOR kNtErrorUnknownPrintprocessor
+#define ERROR_INVALID_SEPARATOR_FILE kNtErrorInvalidSeparatorFile
+#define ERROR_INVALID_PRIORITY kNtErrorInvalidPriority
+#define ERROR_INVALID_PRINTER_NAME kNtErrorInvalidPrinterName
+#define ERROR_PRINTER_ALREADY_EXISTS kNtErrorPrinterAlreadyExists
+#define ERROR_INVALID_PRINTER_COMMAND kNtErrorInvalidPrinterCommand
+#define ERROR_INVALID_DATATYPE kNtErrorInvalidDatatype
+#define ERROR_INVALID_ENVIRONMENT kNtErrorInvalidEnvironment
+#define ERROR_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT kNtErrorNologonInterdomainTrustAccount
+#define ERROR_NOLOGON_WORKSTATION_TRUST_ACCOUNT kNtErrorNologonWorkstationTrustAccount
+#define ERROR_NOLOGON_SERVER_TRUST_ACCOUNT kNtErrorNologonServerTrustAccount
+#define ERROR_DOMAIN_TRUST_INCONSISTENT kNtErrorDomainTrustInconsistent
+#define ERROR_SERVER_HAS_OPEN_HANDLES kNtErrorServerHasOpenHandles
+#define ERROR_RESOURCE_DATA_NOT_FOUND kNtErrorResourceDataNotFound
+#define ERROR_RESOURCE_TYPE_NOT_FOUND kNtErrorResourceTypeNotFound
+#define ERROR_RESOURCE_NAME_NOT_FOUND kNtErrorResourceNameNotFound
+#define ERROR_RESOURCE_LANG_NOT_FOUND kNtErrorResourceLangNotFound
+#define ERROR_NOT_ENOUGH_QUOTA kNtErrorNotEnoughQuota
+#define ERROR_INVALID_TIME kNtErrorInvalidTime
+#define ERROR_INVALID_FORM_NAME kNtErrorInvalidFormName
+#define ERROR_INVALID_FORM_SIZE kNtErrorInvalidFormSize
+#define ERROR_ALREADY_WAITING kNtErrorAlreadyWaiting
+#define ERROR_PRINTER_DELETED kNtErrorPrinterDeleted
+#define ERROR_INVALID_PRINTER_STATE kNtErrorInvalidPrinterState
+#define ERROR_PASSWORD_MUST_CHANGE kNtErrorPasswordMustChange
+#define ERROR_DOMAIN_CONTROLLER_NOT_FOUND kNtErrorDomainControllerNotFound
+#define ERROR_ACCOUNT_LOCKED_OUT kNtErrorAccountLockedOut
+#define ERROR_NO_SITENAME kNtErrorNoSitename
+#define ERROR_CANT_ACCESS_FILE kNtErrorCantAccessFile
+#define ERROR_CANT_RESOLVE_FILENAME kNtErrorCantResolveFilename
+#define ERROR_KM_DRIVER_BLOCKED kNtErrorKmDriverBlocked
+#define ERROR_CONTEXT_EXPIRED kNtErrorContextExpired
+#define ERROR_PER_USER_TRUST_QUOTA_EXCEEDED kNtErrorPerUserTrustQuotaExceeded
+#define ERROR_ALL_USER_TRUST_QUOTA_EXCEEDED kNtErrorAllUserTrustQuotaExceeded
+#define ERROR_USER_DELETE_TRUST_QUOTA_EXCEEDED kNtErrorUserDeleteTrustQuotaExceeded
+#define ERROR_AUTHENTICATION_FIREWALL_FAILED kNtErrorAuthenticationFirewallFailed
+#define ERROR_REMOTE_PRINT_CONNECTIONS_BLOCKED kNtErrorRemotePrintConnectionsBlocked
+#define ERROR_NTLM_BLOCKED kNtErrorNtlmBlocked
+#define ERROR_PASSWORD_CHANGE_REQUIRED kNtErrorPasswordChangeRequired
+#define ERROR_LOST_MODE_LOGON_RESTRICTION kNtErrorLostModeLogonRestriction
+#define ERROR_INVALID_PIXEL_FORMAT kNtErrorInvalidPixelFormat
+#define ERROR_BAD_DRIVER kNtErrorBadDriver
+#define ERROR_INVALID_WINDOW_STYLE kNtErrorInvalidWindowStyle
+#define ERROR_METAFILE_NOT_SUPPORTED kNtErrorMetafileNotSupported
+#define ERROR_TRANSFORM_NOT_SUPPORTED kNtErrorTransformNotSupported
+#define ERROR_CLIPPING_NOT_SUPPORTED kNtErrorClippingNotSupported
+#define ERROR_INVALID_CMM kNtErrorInvalidCmm
+#define ERROR_INVALID_PROFILE kNtErrorInvalidProfile
+#define ERROR_TAG_NOT_FOUND kNtErrorTagNotFound
+#define ERROR_TAG_NOT_PRESENT kNtErrorTagNotPresent
+#define ERROR_DUPLICATE_TAG kNtErrorDuplicateTag
+#define ERROR_PROFILE_NOT_ASSOCIATED_WITH_DEVICE kNtErrorProfileNotAssociatedWithDevice
+#define ERROR_PROFILE_NOT_FOUND kNtErrorProfileNotFound
+#define ERROR_INVALID_COLORSPACE kNtErrorInvalidColorspace
+#define ERROR_ICM_NOT_ENABLED kNtErrorIcmNotEnabled
+#define ERROR_DELETING_ICM_XFORM kNtErrorDeletingIcmXform
+#define ERROR_INVALID_TRANSFORM kNtErrorInvalidTransform
+#define ERROR_COLORSPACE_MISMATCH kNtErrorColorspaceMismatch
+#define ERROR_INVALID_COLORINDEX kNtErrorInvalidColorindex
+#define ERROR_PROFILE_DOES_NOT_MATCH_DEVICE kNtErrorProfileDoesNotMatchDevice
+#define ERROR_CONNECTED_OTHER_PASSWORD kNtErrorConnectedOtherPassword
+#define ERROR_CONNECTED_OTHER_PASSWORD_DEFAULT kNtErrorConnectedOtherPasswordDefault
+#define ERROR_BAD_USERNAME kNtErrorBadUsername
+#define ERROR_NOT_CONNECTED kNtErrorNotConnected
+#define ERROR_OPEN_FILES kNtErrorOpenFiles
+#define ERROR_ACTIVE_CONNECTIONS kNtErrorActiveConnections
+#define ERROR_DEVICE_IN_USE kNtErrorDeviceInUse
+#define ERROR_UNKNOWN_PRINT_MONITOR kNtErrorUnknownPrintMonitor
+#define ERROR_PRINTER_DRIVER_IN_USE kNtErrorPrinterDriverInUse
+#define ERROR_SPOOL_FILE_NOT_FOUND kNtErrorSpoolFileNotFound
+#define ERROR_SPL_NO_STARTDOC kNtErrorSplNoStartdoc
+#define ERROR_SPL_NO_ADDJOB kNtErrorSplNoAddjob
+#define ERROR_PRINT_PROCESSOR_ALREADY_INSTALLED kNtErrorPrintProcessorAlreadyInstalled
+#define ERROR_PRINT_MONITOR_ALREADY_INSTALLED kNtErrorPrintMonitorAlreadyInstalled
+#define ERROR_INVALID_PRINT_MONITOR kNtErrorInvalidPrintMonitor
+#define ERROR_PRINT_MONITOR_IN_USE kNtErrorPrintMonitorInUse
+#define ERROR_PRINTER_HAS_JOBS_QUEUED kNtErrorPrinterHasJobsQueued
+#define ERROR_SUCCESS_REBOOT_REQUIRED kNtErrorSuccessRebootRequired
+#define ERROR_SUCCESS_RESTART_REQUIRED kNtErrorSuccessRestartRequired
+#define ERROR_PRINTER_NOT_FOUND kNtErrorPrinterNotFound
+#define ERROR_PRINTER_DRIVER_WARNED kNtErrorPrinterDriverWarned
+#define ERROR_PRINTER_DRIVER_BLOCKED kNtErrorPrinterDriverBlocked
+#define ERROR_PRINTER_DRIVER_PACKAGE_IN_USE kNtErrorPrinterDriverPackageInUse
+#define ERROR_CORE_DRIVER_PACKAGE_NOT_FOUND kNtErrorCoreDriverPackageNotFound
+#define ERROR_FAIL_REBOOT_REQUIRED kNtErrorFailRebootRequired
+#define ERROR_FAIL_REBOOT_INITIATED kNtErrorFailRebootInitiated
+#define ERROR_PRINTER_DRIVER_DOWNLOAD_NEEDED kNtErrorPrinterDriverDownloadNeeded
+#define ERROR_PRINT_JOB_RESTART_REQUIRED kNtErrorPrintJobRestartRequired
+#define ERROR_INVALID_PRINTER_DRIVER_MANIFEST kNtErrorInvalidPrinterDriverManifest
+#define ERROR_PRINTER_NOT_SHAREABLE kNtErrorPrinterNotShareable
+#define ERROR_REQUEST_PAUSED kNtErrorRequestPaused
+#define ERROR_APPEXEC_CONDITION_NOT_SATISFIED kNtErrorAppexecConditionNotSatisfied
+#define ERROR_APPEXEC_HANDLE_INVALIDATED kNtErrorAppexecHandleInvalidated
+#define ERROR_APPEXEC_INVALID_HOST_GENERATION kNtErrorAppexecInvalidHostGeneration
+#define ERROR_APPEXEC_UNEXPECTED_PROCESS_REGISTRATION kNtErrorAppexecUnexpectedProcessRegistration
+#define ERROR_APPEXEC_INVALID_HOST_STATE kNtErrorAppexecInvalidHostState
+#define ERROR_APPEXEC_NO_DONOR kNtErrorAppexecNoDonor
+#define ERROR_APPEXEC_HOST_ID_MISMATCH kNtErrorAppexecHostIdMismatch
+#define ERROR_IO_REISSUE_AS_CACHED kNtErrorIoReissueAsCached
+#define ERROR_WINS_INTERNAL kNtErrorWinsInternal
+#define ERROR_CAN_NOT_DEL_LOCAL_WINS kNtErrorCanNotDelLocalWins
+#define ERROR_STATIC_INIT kNtErrorStaticInit
+#define ERROR_INC_BACKUP kNtErrorIncBackup
+#define ERROR_FULL_BACKUP kNtErrorFullBackup
+#define ERROR_REC_NON_EXISTENT kNtErrorRecNonExistent
+#define ERROR_RPL_NOT_ALLOWED kNtErrorRplNotAllowed
+#define ERROR_DHCP_ADDRESS_CONFLICT kNtErrorDhcpAddressConflict
+#define ERROR_WMI_GUID_NOT_FOUND kNtErrorWmiGuidNotFound
+#define ERROR_WMI_INSTANCE_NOT_FOUND kNtErrorWmiInstanceNotFound
+#define ERROR_WMI_ITEMID_NOT_FOUND kNtErrorWmiItemidNotFound
+#define ERROR_WMI_TRY_AGAIN kNtErrorWmiTryAgain
+#define ERROR_WMI_DP_NOT_FOUND kNtErrorWmiDpNotFound
+#define ERROR_WMI_UNRESOLVED_INSTANCE_REF kNtErrorWmiUnresolvedInstanceRef
+#define ERROR_WMI_ALREADY_ENABLED kNtErrorWmiAlreadyEnabled
+#define ERROR_WMI_GUID_DISCONNECTED kNtErrorWmiGuidDisconnected
+#define ERROR_WMI_SERVER_UNAVAILABLE kNtErrorWmiServerUnavailable
+#define ERROR_WMI_DP_FAILED kNtErrorWmiDpFailed
+#define ERROR_WMI_INVALID_MOF kNtErrorWmiInvalidMof
+#define ERROR_WMI_INVALID_REGINFO kNtErrorWmiInvalidReginfo
+#define ERROR_WMI_ALREADY_DISABLED kNtErrorWmiAlreadyDisabled
+#define ERROR_WMI_READ_ONLY kNtErrorWmiReadOnly
+#define ERROR_WMI_SET_FAILURE kNtErrorWmiSetFailure
+#define ERROR_NOT_APPCONTAINER kNtErrorNotAppcontainer
+#define ERROR_APPCONTAINER_REQUIRED kNtErrorAppcontainerRequired
+#define ERROR_NOT_SUPPORTED_IN_APPCONTAINER kNtErrorNotSupportedInAppcontainer
+#define ERROR_INVALID_PACKAGE_SID_LENGTH kNtErrorInvalidPackageSidLength
+#define ERROR_INVALID_MEDIA kNtErrorInvalidMedia
+#define ERROR_INVALID_LIBRARY kNtErrorInvalidLibrary
+#define ERROR_INVALID_MEDIA_POOL kNtErrorInvalidMediaPool
+#define ERROR_DRIVE_MEDIA_MISMATCH kNtErrorDriveMediaMismatch
+#define ERROR_MEDIA_OFFLINE kNtErrorMediaOffline
+#define ERROR_LIBRARY_OFFLINE kNtErrorLibraryOffline
+#define ERROR_EMPTY kNtErrorEmpty
+#define ERROR_NOT_EMPTY kNtErrorNotEmpty
+#define ERROR_MEDIA_UNAVAILABLE kNtErrorMediaUnavailable
+#define ERROR_RESOURCE_DISABLED kNtErrorResourceDisabled
+#define ERROR_INVALID_CLEANER kNtErrorInvalidCleaner
+#define ERROR_UNABLE_TO_CLEAN kNtErrorUnableToClean
+#define ERROR_OBJECT_NOT_FOUND kNtErrorObjectNotFound
+#define ERROR_DATABASE_FAILURE kNtErrorDatabaseFailure
+#define ERROR_DATABASE_FULL kNtErrorDatabaseFull
+#define ERROR_MEDIA_INCOMPATIBLE kNtErrorMediaIncompatible
+#define ERROR_RESOURCE_NOT_PRESENT kNtErrorResourceNotPresent
+#define ERROR_INVALID_OPERATION kNtErrorInvalidOperation
+#define ERROR_MEDIA_NOT_AVAILABLE kNtErrorMediaNotAvailable
+#define ERROR_DEVICE_NOT_AVAILABLE kNtErrorDeviceNotAvailable
+#define ERROR_REQUEST_REFUSED kNtErrorRequestRefused
+#define ERROR_INVALID_DRIVE_OBJECT kNtErrorInvalidDriveObject
+#define ERROR_LIBRARY_FULL kNtErrorLibraryFull
+#define ERROR_MEDIUM_NOT_ACCESSIBLE kNtErrorMediumNotAccessible
+#define ERROR_UNABLE_TO_LOAD_MEDIUM kNtErrorUnableToLoadMedium
+#define ERROR_UNABLE_TO_INVENTORY_DRIVE kNtErrorUnableToInventoryDrive
+#define ERROR_UNABLE_TO_INVENTORY_SLOT kNtErrorUnableToInventorySlot
+#define ERROR_UNABLE_TO_INVENTORY_TRANSPORT kNtErrorUnableToInventoryTransport
+#define ERROR_TRANSPORT_FULL kNtErrorTransportFull
+#define ERROR_CONTROLLING_IEPORT kNtErrorControllingIeport
+#define ERROR_UNABLE_TO_EJECT_MOUNTED_MEDIA kNtErrorUnableToEjectMountedMedia
+#define ERROR_CLEANER_SLOT_SET kNtErrorCleanerSlotSet
+#define ERROR_CLEANER_SLOT_NOT_SET kNtErrorCleanerSlotNotSet
+#define ERROR_CLEANER_CARTRIDGE_SPENT kNtErrorCleanerCartridgeSpent
+#define ERROR_UNEXPECTED_OMID kNtErrorUnexpectedOmid
+#define ERROR_CANT_DELETE_LAST_ITEM kNtErrorCantDeleteLastItem
+#define ERROR_MESSAGE_EXCEEDS_MAX_SIZE kNtErrorMessageExceedsMaxSize
+#define ERROR_VOLUME_CONTAINS_SYS_FILES kNtErrorVolumeContainsSysFiles
+#define ERROR_INDIGENOUS_TYPE kNtErrorIndigenousType
+#define ERROR_NO_SUPPORTING_DRIVES kNtErrorNoSupportingDrives
+#define ERROR_CLEANER_CARTRIDGE_INSTALLED kNtErrorCleanerCartridgeInstalled
+#define ERROR_IEPORT_FULL kNtErrorIeportFull
+#define ERROR_FILE_OFFLINE kNtErrorFileOffline
+#define ERROR_REMOTE_STORAGE_NOT_ACTIVE kNtErrorRemoteStorageNotActive
+#define ERROR_REMOTE_STORAGE_MEDIA_ERROR kNtErrorRemoteStorageMediaError
+#define ERROR_NOT_AREPARSE_POINT kNtErrorNotAReparsePoint
+#define ERROR_REPARSE_ATTRIBUTE_CONFLICT kNtErrorReparseAttributeConflict
+#define ERROR_INVALID_REPARSE_DATA kNtErrorInvalidReparseData
+#define ERROR_REPARSE_TAG_INVALID kNtErrorReparseTagInvalid
+#define ERROR_REPARSE_TAG_MISMATCH kNtErrorReparseTagMismatch
+#define ERROR_REPARSE_POINT_ENCOUNTERED kNtErrorReparsePointEncountered
+#define ERROR_APP_DATA_NOT_FOUND kNtErrorAppDataNotFound
+#define ERROR_APP_DATA_EXPIRED kNtErrorAppDataExpired
+#define ERROR_APP_DATA_CORRUPT kNtErrorAppDataCorrupt
+#define ERROR_APP_DATA_LIMIT_EXCEEDED kNtErrorAppDataLimitExceeded
+#define ERROR_APP_DATA_REBOOT_REQUIRED kNtErrorAppDataRebootRequired
+#define ERROR_SECUREBOOT_ROLLBACK_DETECTED kNtErrorSecurebootRollbackDetected
+#define ERROR_SECUREBOOT_POLICY_VIOLATION kNtErrorSecurebootPolicyViolation
+#define ERROR_SECUREBOOT_INVALID_POLICY kNtErrorSecurebootInvalidPolicy
+#define ERROR_SECUREBOOT_POLICY_PUBLISHER_NOT_FOUND kNtErrorSecurebootPolicyPublisherNotFound
+#define ERROR_SECUREBOOT_POLICY_NOT_SIGNED kNtErrorSecurebootPolicyNotSigned
+#define ERROR_SECUREBOOT_NOT_ENABLED kNtErrorSecurebootNotEnabled
+#define ERROR_SECUREBOOT_FILE_REPLACED kNtErrorSecurebootFileReplaced
+#define ERROR_SECUREBOOT_POLICY_NOT_AUTHORIZED kNtErrorSecurebootPolicyNotAuthorized
+#define ERROR_SECUREBOOT_POLICY_UNKNOWN kNtErrorSecurebootPolicyUnknown
+#define ERROR_SECUREBOOT_POLICY_MISSING_ANTIROLLBACKVERSION kNtErrorSecurebootPolicyMissingAntirollbackversion
+#define ERROR_SECUREBOOT_PLATFORM_ID_MISMATCH kNtErrorSecurebootPlatformIdMismatch
+#define ERROR_SECUREBOOT_POLICY_ROLLBACK_DETECTED kNtErrorSecurebootPolicyRollbackDetected
+#define ERROR_SECUREBOOT_POLICY_UPGRADE_MISMATCH kNtErrorSecurebootPolicyUpgradeMismatch
+#define ERROR_SECUREBOOT_REQUIRED_POLICY_FILE_MISSING kNtErrorSecurebootRequiredPolicyFileMissing
+#define ERROR_SECUREBOOT_NOT_BASE_POLICY kNtErrorSecurebootNotBasePolicy
+#define ERROR_SECUREBOOT_NOT_SUPPLEMENTAL_POLICY kNtErrorSecurebootNotSupplementalPolicy
+#define ERROR_OFFLOAD_READ_FLT_NOT_SUPPORTED kNtErrorOffloadReadFltNotSupported
+#define ERROR_OFFLOAD_WRITE_FLT_NOT_SUPPORTED kNtErrorOffloadWriteFltNotSupported
+#define ERROR_OFFLOAD_READ_FILE_NOT_SUPPORTED kNtErrorOffloadReadFileNotSupported
+#define ERROR_OFFLOAD_WRITE_FILE_NOT_SUPPORTED kNtErrorOffloadWriteFileNotSupported
+#define ERROR_ALREADY_HAS_STREAM_ID kNtErrorAlreadyHasStreamId
+#define ERROR_SMR_GARBAGE_COLLECTION_REQUIRED kNtErrorSmrGarbageCollectionRequired
+#define ERROR_WOF_WIM_HEADER_CORRUPT kNtErrorWofWimHeaderCorrupt
+#define ERROR_WOF_WIM_RESOURCE_TABLE_CORRUPT kNtErrorWofWimResourceTableCorrupt
+#define ERROR_WOF_FILE_RESOURCE_TABLE_CORRUPT kNtErrorWofFileResourceTableCorrupt
+#define ERROR_VOLUME_NOT_SIS_ENABLED kNtErrorVolumeNotSisEnabled
+#define ERROR_SYSTEM_INTEGRITY_ROLLBACK_DETECTED kNtErrorSystemIntegrityRollbackDetected
+#define ERROR_SYSTEM_INTEGRITY_POLICY_VIOLATION kNtErrorSystemIntegrityPolicyViolation
+#define ERROR_SYSTEM_INTEGRITY_INVALID_POLICY kNtErrorSystemIntegrityInvalidPolicy
+#define ERROR_SYSTEM_INTEGRITY_POLICY_NOT_SIGNED kNtErrorSystemIntegrityPolicyNotSigned
+#define ERROR_VSM_NOT_INITIALIZED kNtErrorVsmNotInitialized
+#define ERROR_VSM_DMA_PROTECTION_NOT_IN_USE kNtErrorVsmDmaProtectionNotInUse
+#define ERROR_PLATFORM_MANIFEST_NOT_AUTHORIZED kNtErrorPlatformManifestNotAuthorized
+#define ERROR_PLATFORM_MANIFEST_INVALID kNtErrorPlatformManifestInvalid
+#define ERROR_PLATFORM_MANIFEST_FILE_NOT_AUTHORIZED kNtErrorPlatformManifestFileNotAuthorized
+#define ERROR_PLATFORM_MANIFEST_CATALOG_NOT_AUTHORIZED kNtErrorPlatformManifestCatalogNotAuthorized
+#define ERROR_PLATFORM_MANIFEST_BINARY_ID_NOT_FOUND kNtErrorPlatformManifestBinaryIdNotFound
+#define ERROR_PLATFORM_MANIFEST_NOT_ACTIVE kNtErrorPlatformManifestNotActive
+#define ERROR_PLATFORM_MANIFEST_NOT_SIGNED kNtErrorPlatformManifestNotSigned
+#define ERROR_DEPENDENT_RESOURCE_EXISTS kNtErrorDependentResourceExists
+#define ERROR_DEPENDENCY_NOT_FOUND kNtErrorDependencyNotFound
+#define ERROR_DEPENDENCY_ALREADY_EXISTS kNtErrorDependencyAlreadyExists
+#define ERROR_RESOURCE_NOT_ONLINE kNtErrorResourceNotOnline
+#define ERROR_HOST_NODE_NOT_AVAILABLE kNtErrorHostNodeNotAvailable
+#define ERROR_RESOURCE_NOT_AVAILABLE kNtErrorResourceNotAvailable
+#define ERROR_RESOURCE_NOT_FOUND kNtErrorResourceNotFound
+#define ERROR_SHUTDOWN_CLUSTER kNtErrorShutdownCluster
+#define ERROR_CANT_EVICT_ACTIVE_NODE kNtErrorCantEvictActiveNode
+#define ERROR_OBJECT_ALREADY_EXISTS kNtErrorObjectAlreadyExists
+#define ERROR_OBJECT_IN_LIST kNtErrorObjectInList
+#define ERROR_GROUP_NOT_AVAILABLE kNtErrorGroupNotAvailable
+#define ERROR_GROUP_NOT_FOUND kNtErrorGroupNotFound
+#define ERROR_GROUP_NOT_ONLINE kNtErrorGroupNotOnline
+#define ERROR_HOST_NODE_NOT_RESOURCE_OWNER kNtErrorHostNodeNotResourceOwner
+#define ERROR_HOST_NODE_NOT_GROUP_OWNER kNtErrorHostNodeNotGroupOwner
+#define ERROR_RESMON_CREATE_FAILED kNtErrorResmonCreateFailed
+#define ERROR_RESMON_ONLINE_FAILED kNtErrorResmonOnlineFailed
+#define ERROR_RESOURCE_ONLINE kNtErrorResourceOnline
+#define ERROR_QUORUM_RESOURCE kNtErrorQuorumResource
+#define ERROR_NOT_QUORUM_CAPABLE kNtErrorNotQuorumCapable
+#define ERROR_CLUSTER_SHUTTING_DOWN kNtErrorClusterShuttingDown
+#define ERROR_INVALID_STATE kNtErrorInvalidState
+#define ERROR_RESOURCE_PROPERTIES_STORED kNtErrorResourcePropertiesStored
+#define ERROR_NOT_QUORUM_CLASS kNtErrorNotQuorumClass
+#define ERROR_CORE_RESOURCE kNtErrorCoreResource
+#define ERROR_QUORUM_RESOURCE_ONLINE_FAILED kNtErrorQuorumResourceOnlineFailed
+#define ERROR_QUORUMLOG_OPEN_FAILED kNtErrorQuorumlogOpenFailed
+#define ERROR_CLUSTERLOG_CORRUPT kNtErrorClusterlogCorrupt
+#define ERROR_CLUSTERLOG_RECORD_EXCEEDS_MAXSIZE kNtErrorClusterlogRecordExceedsMaxsize
+#define ERROR_CLUSTERLOG_EXCEEDS_MAXSIZE kNtErrorClusterlogExceedsMaxsize
+#define ERROR_CLUSTERLOG_CHKPOINT_NOT_FOUND kNtErrorClusterlogChkpointNotFound
+#define ERROR_CLUSTERLOG_NOT_ENOUGH_SPACE kNtErrorClusterlogNotEnoughSpace
+#define ERROR_QUORUM_OWNER_ALIVE kNtErrorQuorumOwnerAlive
+#define ERROR_NETWORK_NOT_AVAILABLE kNtErrorNetworkNotAvailable
+#define ERROR_NODE_NOT_AVAILABLE kNtErrorNodeNotAvailable
+#define ERROR_ALL_NODES_NOT_AVAILABLE kNtErrorAllNodesNotAvailable
+#define ERROR_RESOURCE_FAILED kNtErrorResourceFailed
+#define ERROR_CLUSTER_INVALID_NODE kNtErrorClusterInvalidNode
+#define ERROR_CLUSTER_NODE_EXISTS kNtErrorClusterNodeExists
+#define ERROR_CLUSTER_JOIN_IN_PROGRESS kNtErrorClusterJoinInProgress
+#define ERROR_CLUSTER_NODE_NOT_FOUND kNtErrorClusterNodeNotFound
+#define ERROR_CLUSTER_LOCAL_NODE_NOT_FOUND kNtErrorClusterLocalNodeNotFound
+#define ERROR_CLUSTER_NETWORK_EXISTS kNtErrorClusterNetworkExists
+#define ERROR_CLUSTER_NETWORK_NOT_FOUND kNtErrorClusterNetworkNotFound
+#define ERROR_CLUSTER_NETINTERFACE_EXISTS kNtErrorClusterNetinterfaceExists
+#define ERROR_CLUSTER_NETINTERFACE_NOT_FOUND kNtErrorClusterNetinterfaceNotFound
+#define ERROR_CLUSTER_INVALID_REQUEST kNtErrorClusterInvalidRequest
+#define ERROR_CLUSTER_INVALID_NETWORK_PROVIDER kNtErrorClusterInvalidNetworkProvider
+#define ERROR_CLUSTER_NODE_DOWN kNtErrorClusterNodeDown
+#define ERROR_CLUSTER_NODE_UNREACHABLE kNtErrorClusterNodeUnreachable
+#define ERROR_CLUSTER_NODE_NOT_MEMBER kNtErrorClusterNodeNotMember
+#define ERROR_CLUSTER_JOIN_NOT_IN_PROGRESS kNtErrorClusterJoinNotInProgress
+#define ERROR_CLUSTER_INVALID_NETWORK kNtErrorClusterInvalidNetwork
+#define ERROR_CLUSTER_NODE_UP kNtErrorClusterNodeUp
+#define ERROR_CLUSTER_IPADDR_IN_USE kNtErrorClusterIpaddrInUse
+#define ERROR_CLUSTER_NODE_NOT_PAUSED kNtErrorClusterNodeNotPaused
+#define ERROR_CLUSTER_NO_SECURITY_CONTEXT kNtErrorClusterNoSecurityContext
+#define ERROR_CLUSTER_NETWORK_NOT_INTERNAL kNtErrorClusterNetworkNotInternal
+#define ERROR_CLUSTER_NODE_ALREADY_UP kNtErrorClusterNodeAlreadyUp
+#define ERROR_CLUSTER_NODE_ALREADY_DOWN kNtErrorClusterNodeAlreadyDown
+#define ERROR_CLUSTER_NETWORK_ALREADY_ONLINE kNtErrorClusterNetworkAlreadyOnline
+#define ERROR_CLUSTER_NETWORK_ALREADY_OFFLINE kNtErrorClusterNetworkAlreadyOffline
+#define ERROR_CLUSTER_NODE_ALREADY_MEMBER kNtErrorClusterNodeAlreadyMember
+#define ERROR_CLUSTER_LAST_INTERNAL_NETWORK kNtErrorClusterLastInternalNetwork
+#define ERROR_CLUSTER_NETWORK_HAS_DEPENDENTS kNtErrorClusterNetworkHasDependents
+#define ERROR_INVALID_OPERATION_ON_QUORUM kNtErrorInvalidOperationOnQuorum
+#define ERROR_DEPENDENCY_NOT_ALLOWED kNtErrorDependencyNotAllowed
+#define ERROR_CLUSTER_NODE_PAUSED kNtErrorClusterNodePaused
+#define ERROR_NODE_CANT_HOST_RESOURCE kNtErrorNodeCantHostResource
+#define ERROR_CLUSTER_NODE_NOT_READY kNtErrorClusterNodeNotReady
+#define ERROR_CLUSTER_NODE_SHUTTING_DOWN kNtErrorClusterNodeShuttingDown
+#define ERROR_CLUSTER_JOIN_ABORTED kNtErrorClusterJoinAborted
+#define ERROR_CLUSTER_INCOMPATIBLE_VERSIONS kNtErrorClusterIncompatibleVersions
+#define ERROR_CLUSTER_MAXNUM_OF_RESOURCES_EXCEEDED kNtErrorClusterMaxnumOfResourcesExceeded
+#define ERROR_CLUSTER_SYSTEM_CONFIG_CHANGED kNtErrorClusterSystemConfigChanged
+#define ERROR_CLUSTER_RESOURCE_TYPE_NOT_FOUND kNtErrorClusterResourceTypeNotFound
+#define ERROR_CLUSTER_RESTYPE_NOT_SUPPORTED kNtErrorClusterRestypeNotSupported
+#define ERROR_CLUSTER_RESNAME_NOT_FOUND kNtErrorClusterResnameNotFound
+#define ERROR_CLUSTER_NO_RPC_PACKAGES_REGISTERED kNtErrorClusterNoRpcPackagesRegistered
+#define ERROR_CLUSTER_OWNER_NOT_IN_PREFLIST kNtErrorClusterOwnerNotInPreflist
+#define ERROR_CLUSTER_DATABASE_SEQMISMATCH kNtErrorClusterDatabaseSeqmismatch
+#define ERROR_RESMON_INVALID_STATE kNtErrorResmonInvalidState
+#define ERROR_CLUSTER_GUM_NOT_LOCKER kNtErrorClusterGumNotLocker
+#define ERROR_QUORUM_DISK_NOT_FOUND kNtErrorQuorumDiskNotFound
+#define ERROR_DATABASE_BACKUP_CORRUPT kNtErrorDatabaseBackupCorrupt
+#define ERROR_CLUSTER_NODE_ALREADY_HAS_DFS_ROOT kNtErrorClusterNodeAlreadyHasDfsRoot
+#define ERROR_RESOURCE_PROPERTY_UNCHANGEABLE kNtErrorResourcePropertyUnchangeable
+#define ERROR_NO_ADMIN_ACCESS_POINT kNtErrorNoAdminAccessPoint
+#define ERROR_CLUSTER_MEMBERSHIP_INVALID_STATE kNtErrorClusterMembershipInvalidState
+#define ERROR_CLUSTER_QUORUMLOG_NOT_FOUND kNtErrorClusterQuorumlogNotFound
+#define ERROR_CLUSTER_MEMBERSHIP_HALT kNtErrorClusterMembershipHalt
+#define ERROR_CLUSTER_INSTANCE_ID_MISMATCH kNtErrorClusterInstanceIdMismatch
+#define ERROR_CLUSTER_NETWORK_NOT_FOUND_FOR_IP kNtErrorClusterNetworkNotFoundForIp
+#define ERROR_CLUSTER_PROPERTY_DATA_TYPE_MISMATCH kNtErrorClusterPropertyDataTypeMismatch
+#define ERROR_CLUSTER_EVICT_WITHOUT_CLEANUP kNtErrorClusterEvictWithoutCleanup
+#define ERROR_CLUSTER_PARAMETER_MISMATCH kNtErrorClusterParameterMismatch
+#define ERROR_NODE_CANNOT_BE_CLUSTERED kNtErrorNodeCannotBeClustered
+#define ERROR_CLUSTER_WRONG_OS_VERSION kNtErrorClusterWrongOsVersion
+#define ERROR_CLUSTER_CANT_CREATE_DUP_CLUSTER_NAME kNtErrorClusterCantCreateDupClusterName
+#define ERROR_CLUSCFG_ALREADY_COMMITTED kNtErrorCluscfgAlreadyCommitted
+#define ERROR_CLUSCFG_ROLLBACK_FAILED kNtErrorCluscfgRollbackFailed
+#define ERROR_CLUSCFG_SYSTEM_DISK_DRIVE_LETTER_CONFLICT kNtErrorCluscfgSystemDiskDriveLetterConflict
+#define ERROR_CLUSTER_OLD_VERSION kNtErrorClusterOldVersion
+#define ERROR_CLUSTER_MISMATCHED_COMPUTER_ACCT_NAME kNtErrorClusterMismatchedComputerAcctName
+#define ERROR_CLUSTER_NO_NET_ADAPTERS kNtErrorClusterNoNetAdapters
+#define ERROR_CLUSTER_POISONED kNtErrorClusterPoisoned
+#define ERROR_CLUSTER_GROUP_MOVING kNtErrorClusterGroupMoving
+#define ERROR_CLUSTER_RESOURCE_TYPE_BUSY kNtErrorClusterResourceTypeBusy
+#define ERROR_RESOURCE_CALL_TIMED_OUT kNtErrorResourceCallTimedOut
+#define ERROR_INVALID_CLUSTER_IPV6_ADDRESS kNtErrorInvalidClusterIpv6Address
+#define ERROR_CLUSTER_INTERNAL_INVALID_FUNCTION kNtErrorClusterInternalInvalidFunction
+#define ERROR_CLUSTER_PARAMETER_OUT_OF_BOUNDS kNtErrorClusterParameterOutOfBounds
+#define ERROR_CLUSTER_PARTIAL_SEND kNtErrorClusterPartialSend
+#define ERROR_CLUSTER_REGISTRY_INVALID_FUNCTION kNtErrorClusterRegistryInvalidFunction
+#define ERROR_CLUSTER_INVALID_STRING_TERMINATION kNtErrorClusterInvalidStringTermination
+#define ERROR_CLUSTER_INVALID_STRING_FORMAT kNtErrorClusterInvalidStringFormat
+#define ERROR_CLUSTER_DATABASE_TRANSACTION_IN_PROGRESS kNtErrorClusterDatabaseTransactionInProgress
+#define ERROR_CLUSTER_DATABASE_TRANSACTION_NOT_IN_PROGRESS kNtErrorClusterDatabaseTransactionNotInProgress
+#define ERROR_CLUSTER_NULL_DATA kNtErrorClusterNullData
+#define ERROR_CLUSTER_PARTIAL_READ kNtErrorClusterPartialRead
+#define ERROR_CLUSTER_PARTIAL_WRITE kNtErrorClusterPartialWrite
+#define ERROR_CLUSTER_CANT_DESERIALIZE_DATA kNtErrorClusterCantDeserializeData
+#define ERROR_DEPENDENT_RESOURCE_PROPERTY_CONFLICT kNtErrorDependentResourcePropertyConflict
+#define ERROR_CLUSTER_NO_QUORUM kNtErrorClusterNoQuorum
+#define ERROR_CLUSTER_INVALID_IPV6_NETWORK kNtErrorClusterInvalidIpv6Network
+#define ERROR_CLUSTER_INVALID_IPV6_TUNNEL_NETWORK kNtErrorClusterInvalidIpv6TunnelNetwork
+#define ERROR_QUORUM_NOT_ALLOWED_IN_THIS_GROUP kNtErrorQuorumNotAllowedInThisGroup
+#define ERROR_DEPENDENCY_TREE_TOO_COMPLEX kNtErrorDependencyTreeTooComplex
+#define ERROR_EXCEPTION_IN_RESOURCE_CALL kNtErrorExceptionInResourceCall
+#define ERROR_CLUSTER_RHS_FAILED_INITIALIZATION kNtErrorClusterRhsFailedInitialization
+#define ERROR_CLUSTER_NOT_INSTALLED kNtErrorClusterNotInstalled
+#define ERROR_CLUSTER_RESOURCES_MUST_BE_ONLINE_ON_THE_SAME_NODE kNtErrorClusterResourcesMustBeOnlineOnTheSameNode
+#define ERROR_CLUSTER_MAX_NODES_IN_CLUSTER kNtErrorClusterMaxNodesInCluster
+#define ERROR_CLUSTER_TOO_MANY_NODES kNtErrorClusterTooManyNodes
+#define ERROR_CLUSTER_OBJECT_ALREADY_USED kNtErrorClusterObjectAlreadyUsed
+#define ERROR_NONCORE_GROUPS_FOUND kNtErrorNoncoreGroupsFound
+#define ERROR_FILE_SHARE_RESOURCE_CONFLICT kNtErrorFileShareResourceConflict
+#define ERROR_CLUSTER_EVICT_INVALID_REQUEST kNtErrorClusterEvictInvalidRequest
+#define ERROR_CLUSTER_SINGLETON_RESOURCE kNtErrorClusterSingletonResource
+#define ERROR_CLUSTER_GROUP_SINGLETON_RESOURCE kNtErrorClusterGroupSingletonResource
+#define ERROR_CLUSTER_RESOURCE_PROVIDER_FAILED kNtErrorClusterResourceProviderFailed
+#define ERROR_CLUSTER_RESOURCE_CONFIGURATION_ERROR kNtErrorClusterResourceConfigurationError
+#define ERROR_CLUSTER_GROUP_BUSY kNtErrorClusterGroupBusy
+#define ERROR_CLUSTER_NOT_SHARED_VOLUME kNtErrorClusterNotSharedVolume
+#define ERROR_CLUSTER_INVALID_SECURITY_DESCRIPTOR kNtErrorClusterInvalidSecurityDescriptor
+#define ERROR_CLUSTER_SHARED_VOLUMES_IN_USE kNtErrorClusterSharedVolumesInUse
+#define ERROR_CLUSTER_USE_SHARED_VOLUMES_API kNtErrorClusterUseSharedVolumesApi
+#define ERROR_CLUSTER_BACKUP_IN_PROGRESS kNtErrorClusterBackupInProgress
+#define ERROR_NON_CSV_PATH kNtErrorNonCsvPath
+#define ERROR_CSV_VOLUME_NOT_LOCAL kNtErrorCsvVolumeNotLocal
+#define ERROR_CLUSTER_WATCHDOG_TERMINATING kNtErrorClusterWatchdogTerminating
+#define ERROR_CLUSTER_RESOURCE_VETOED_MOVE_INCOMPATIBLE_NODES kNtErrorClusterResourceVetoedMoveIncompatibleNodes
+#define ERROR_CLUSTER_INVALID_NODE_WEIGHT kNtErrorClusterInvalidNodeWeight
+#define ERROR_CLUSTER_RESOURCE_VETOED_CALL kNtErrorClusterResourceVetoedCall
+#define ERROR_RESMON_SYSTEM_RESOURCES_LACKING kNtErrorResmonSystemResourcesLacking
+#define ERROR_CLUSTER_RESOURCE_VETOED_MOVE_NOT_ENOUGH_RESOURCES_ON_SOURCE kNtErrorClusterResourceVetoedMoveNotEnoughResourcesOnSource
+#define ERROR_CLUSTER_GROUP_QUEUED kNtErrorClusterGroupQueued
+#define ERROR_CLUSTER_RESOURCE_LOCKED_STATUS kNtErrorClusterResourceLockedStatus
+#define ERROR_CLUSTER_SHARED_VOLUME_FAILOVER_NOT_ALLOWED kNtErrorClusterSharedVolumeFailoverNotAllowed
+#define ERROR_CLUSTER_NODE_DRAIN_IN_PROGRESS kNtErrorClusterNodeDrainInProgress
+#define ERROR_CLUSTER_DISK_NOT_CONNECTED kNtErrorClusterDiskNotConnected
+#define ERROR_DISK_NOT_CSV_CAPABLE kNtErrorDiskNotCsvCapable
+#define ERROR_RESOURCE_NOT_IN_AVAILABLE_STORAGE kNtErrorResourceNotInAvailableStorage
+#define ERROR_CLUSTER_SHARED_VOLUME_REDIRECTED kNtErrorClusterSharedVolumeRedirected
+#define ERROR_CLUSTER_SHARED_VOLUME_NOT_REDIRECTED kNtErrorClusterSharedVolumeNotRedirected
+#define ERROR_CLUSTER_CANNOT_RETURN_PROPERTIES kNtErrorClusterCannotReturnProperties
+#define ERROR_CLUSTER_RESOURCE_IS_IN_MAINTENANCE_MODE kNtErrorClusterResourceIsInMaintenanceMode
+#define ERROR_CLUSTER_AFFINITY_CONFLICT kNtErrorClusterAffinityConflict
+#define ERROR_CLUSTER_RESOURCE_IS_REPLICA_VIRTUAL_MACHINE kNtErrorClusterResourceIsReplicaVirtualMachine
+#define ERROR_CLUSTER_UPGRADE_INCOMPATIBLE_VERSIONS kNtErrorClusterUpgradeIncompatibleVersions
+#define ERROR_CLUSTER_UPGRADE_FIX_QUORUM_NOT_SUPPORTED kNtErrorClusterUpgradeFixQuorumNotSupported
+#define ERROR_CLUSTER_UPGRADE_RESTART_REQUIRED kNtErrorClusterUpgradeRestartRequired
+#define ERROR_CLUSTER_UPGRADE_IN_PROGRESS kNtErrorClusterUpgradeInProgress
+#define ERROR_CLUSTER_UPGRADE_INCOMPLETE kNtErrorClusterUpgradeIncomplete
+#define ERROR_CLUSTER_NODE_IN_GRACE_PERIOD kNtErrorClusterNodeInGracePeriod
+#define ERROR_CLUSTER_CSV_IO_PAUSE_TIMEOUT kNtErrorClusterCsvIoPauseTimeout
+#define ERROR_NODE_NOT_ACTIVE_CLUSTER_MEMBER kNtErrorNodeNotActiveClusterMember
+#define ERROR_CLUSTER_RESOURCE_NOT_MONITORED kNtErrorClusterResourceNotMonitored
+#define ERROR_CLUSTER_RESOURCE_DOES_NOT_SUPPORT_UNMONITORED kNtErrorClusterResourceDoesNotSupportUnmonitored
+#define ERROR_CLUSTER_RESOURCE_IS_REPLICATED kNtErrorClusterResourceIsReplicated
+#define ERROR_CLUSTER_NODE_ISOLATED kNtErrorClusterNodeIsolated
+#define ERROR_CLUSTER_NODE_QUARANTINED kNtErrorClusterNodeQuarantined
+#define ERROR_CLUSTER_DATABASE_UPDATE_CONDITION_FAILED kNtErrorClusterDatabaseUpdateConditionFailed
+#define ERROR_CLUSTER_SPACE_DEGRADED kNtErrorClusterSpaceDegraded
+#define ERROR_CLUSTER_TOKEN_DELEGATION_NOT_SUPPORTED kNtErrorClusterTokenDelegationNotSupported
+#define ERROR_CLUSTER_CSV_INVALID_HANDLE kNtErrorClusterCsvInvalidHandle
+#define ERROR_CLUSTER_CSV_SUPPORTED_ONLY_ON_COORDINATOR kNtErrorClusterCsvSupportedOnlyOnCoordinator
+#define ERROR_GROUPSET_NOT_AVAILABLE kNtErrorGroupsetNotAvailable
+#define ERROR_GROUPSET_NOT_FOUND kNtErrorGroupsetNotFound
+#define ERROR_GROUPSET_CANT_PROVIDE kNtErrorGroupsetCantProvide
+#define ERROR_CLUSTER_FAULT_DOMAIN_PARENT_NOT_FOUND kNtErrorClusterFaultDomainParentNotFound
+#define ERROR_CLUSTER_FAULT_DOMAIN_INVALID_HIERARCHY kNtErrorClusterFaultDomainInvalidHierarchy
+#define ERROR_CLUSTER_FAULT_DOMAIN_FAILED_S2D_VALIDATION kNtErrorClusterFaultDomainFailedS2dValidation
+#define ERROR_CLUSTER_FAULT_DOMAIN_S2D_CONNECTIVITY_LOSS kNtErrorClusterFaultDomainS2dConnectivityLoss
+#define ERROR_CLUSTER_INVALID_INFRASTRUCTURE_FILESERVER_NAME kNtErrorClusterInvalidInfrastructureFileserverName
+#define ERROR_CLUSTERSET_MANAGEMENT_CLUSTER_UNREACHABLE kNtErrorClustersetManagementClusterUnreachable
+#define ERROR_ENCRYPTION_FAILED kNtErrorEncryptionFailed
+#define ERROR_DECRYPTION_FAILED kNtErrorDecryptionFailed
+#define ERROR_FILE_ENCRYPTED kNtErrorFileEncrypted
+#define ERROR_NO_RECOVERY_POLICY kNtErrorNoRecoveryPolicy
+#define ERROR_NO_EFS kNtErrorNoEfs
+#define ERROR_WRONG_EFS kNtErrorWrongEfs
+#define ERROR_NO_USER_KEYS kNtErrorNoUserKeys
+#define ERROR_FILE_NOT_ENCRYPTED kNtErrorFileNotEncrypted
+#define ERROR_NOT_EXPORT_FORMAT kNtErrorNotExportFormat
+#define ERROR_FILE_READ_ONLY kNtErrorFileReadOnly
+#define ERROR_DIR_EFS_DISALLOWED kNtErrorDirEfsDisallowed
+#define ERROR_EFS_SERVER_NOT_TRUSTED kNtErrorEfsServerNotTrusted
+#define ERROR_BAD_RECOVERY_POLICY kNtErrorBadRecoveryPolicy
+#define ERROR_EFS_ALG_BLOB_TOO_BIG kNtErrorEfsAlgBlobTooBig
+#define ERROR_VOLUME_NOT_SUPPORT_EFS kNtErrorVolumeNotSupportEfs
+#define ERROR_EFS_DISABLED kNtErrorEfsDisabled
+#define ERROR_EFS_VERSION_NOT_SUPPORT kNtErrorEfsVersionNotSupport
+#define ERROR_CS_ENCRYPTION_INVALID_SERVER_RESPONSE kNtErrorCsEncryptionInvalidServerResponse
+#define ERROR_CS_ENCRYPTION_UNSUPPORTED_SERVER kNtErrorCsEncryptionUnsupportedServer
+#define ERROR_CS_ENCRYPTION_EXISTING_ENCRYPTED_FILE kNtErrorCsEncryptionExistingEncryptedFile
+#define ERROR_CS_ENCRYPTION_NEW_ENCRYPTED_FILE kNtErrorCsEncryptionNewEncryptedFile
+#define ERROR_CS_ENCRYPTION_FILE_NOT_CSE kNtErrorCsEncryptionFileNotCse
+#define ERROR_ENCRYPTION_POLICY_DENIES_OPERATION kNtErrorEncryptionPolicyDeniesOperation
+#define ERROR_NO_BROWSER_SERVERS_FOUND kNtErrorNoBrowserServersFound
+#define ERROR_LOG_SECTOR_INVALID kNtErrorLogSectorInvalid
+#define ERROR_LOG_SECTOR_PARITY_INVALID kNtErrorLogSectorParityInvalid
+#define ERROR_LOG_SECTOR_REMAPPED kNtErrorLogSectorRemapped
+#define ERROR_LOG_BLOCK_INCOMPLETE kNtErrorLogBlockIncomplete
+#define ERROR_LOG_INVALID_RANGE kNtErrorLogInvalidRange
+#define ERROR_LOG_BLOCKS_EXHAUSTED kNtErrorLogBlocksExhausted
+#define ERROR_LOG_READ_CONTEXT_INVALID kNtErrorLogReadContextInvalid
+#define ERROR_LOG_RESTART_INVALID kNtErrorLogRestartInvalid
+#define ERROR_LOG_BLOCK_VERSION kNtErrorLogBlockVersion
+#define ERROR_LOG_BLOCK_INVALID kNtErrorLogBlockInvalid
+#define ERROR_LOG_READ_MODE_INVALID kNtErrorLogReadModeInvalid
+#define ERROR_LOG_NO_RESTART kNtErrorLogNoRestart
+#define ERROR_LOG_METADATA_CORRUPT kNtErrorLogMetadataCorrupt
+#define ERROR_LOG_METADATA_INVALID kNtErrorLogMetadataInvalid
+#define ERROR_LOG_METADATA_INCONSISTENT kNtErrorLogMetadataInconsistent
+#define ERROR_LOG_RESERVATION_INVALID kNtErrorLogReservationInvalid
+#define ERROR_LOG_CANT_DELETE kNtErrorLogCantDelete
+#define ERROR_LOG_CONTAINER_LIMIT_EXCEEDED kNtErrorLogContainerLimitExceeded
+#define ERROR_LOG_START_OF_LOG kNtErrorLogStartOfLog
+#define ERROR_LOG_POLICY_ALREADY_INSTALLED kNtErrorLogPolicyAlreadyInstalled
+#define ERROR_LOG_POLICY_NOT_INSTALLED kNtErrorLogPolicyNotInstalled
+#define ERROR_LOG_POLICY_INVALID kNtErrorLogPolicyInvalid
+#define ERROR_LOG_POLICY_CONFLICT kNtErrorLogPolicyConflict
+#define ERROR_LOG_PINNED_ARCHIVE_TAIL kNtErrorLogPinnedArchiveTail
+#define ERROR_LOG_RECORD_NONEXISTENT kNtErrorLogRecordNonexistent
+#define ERROR_LOG_RECORDS_RESERVED_INVALID kNtErrorLogRecordsReservedInvalid
+#define ERROR_LOG_SPACE_RESERVED_INVALID kNtErrorLogSpaceReservedInvalid
+#define ERROR_LOG_TAIL_INVALID kNtErrorLogTailInvalid
+#define ERROR_LOG_FULL kNtErrorLogFull
+#define ERROR_COULD_NOT_RESIZE_LOG kNtErrorCouldNotResizeLog
+#define ERROR_LOG_MULTIPLEXED kNtErrorLogMultiplexed
+#define ERROR_LOG_DEDICATED kNtErrorLogDedicated
+#define ERROR_LOG_ARCHIVE_NOT_IN_PROGRESS kNtErrorLogArchiveNotInProgress
+#define ERROR_LOG_ARCHIVE_IN_PROGRESS kNtErrorLogArchiveInProgress
+#define ERROR_LOG_EPHEMERAL kNtErrorLogEphemeral
+#define ERROR_LOG_NOT_ENOUGH_CONTAINERS kNtErrorLogNotEnoughContainers
+#define ERROR_LOG_CLIENT_ALREADY_REGISTERED kNtErrorLogClientAlreadyRegistered
+#define ERROR_LOG_CLIENT_NOT_REGISTERED kNtErrorLogClientNotRegistered
+#define ERROR_LOG_FULL_HANDLER_IN_PROGRESS kNtErrorLogFullHandlerInProgress
+#define ERROR_LOG_CONTAINER_READ_FAILED kNtErrorLogContainerReadFailed
+#define ERROR_LOG_CONTAINER_WRITE_FAILED kNtErrorLogContainerWriteFailed
+#define ERROR_LOG_CONTAINER_OPEN_FAILED kNtErrorLogContainerOpenFailed
+#define ERROR_LOG_CONTAINER_STATE_INVALID kNtErrorLogContainerStateInvalid
+#define ERROR_LOG_STATE_INVALID kNtErrorLogStateInvalid
+#define ERROR_LOG_PINNED kNtErrorLogPinned
+#define ERROR_LOG_METADATA_FLUSH_FAILED kNtErrorLogMetadataFlushFailed
+#define ERROR_LOG_INCONSISTENT_SECURITY kNtErrorLogInconsistentSecurity
+#define ERROR_LOG_APPENDED_FLUSH_FAILED kNtErrorLogAppendedFlushFailed
+#define ERROR_LOG_PINNED_RESERVATION kNtErrorLogPinnedReservation
+#define ERROR_INVALID_TRANSACTION kNtErrorInvalidTransaction
+#define ERROR_TRANSACTION_NOT_ACTIVE kNtErrorTransactionNotActive
+#define ERROR_TRANSACTION_REQUEST_NOT_VALID kNtErrorTransactionRequestNotValid
+#define ERROR_TRANSACTION_NOT_REQUESTED kNtErrorTransactionNotRequested
+#define ERROR_TRANSACTION_ALREADY_ABORTED kNtErrorTransactionAlreadyAborted
+#define ERROR_TRANSACTION_ALREADY_COMMITTED kNtErrorTransactionAlreadyCommitted
+#define ERROR_TM_INITIALIZATION_FAILED kNtErrorTmInitializationFailed
+#define ERROR_RESOURCEMANAGER_READ_ONLY kNtErrorResourcemanagerReadOnly
+#define ERROR_TRANSACTION_NOT_JOINED kNtErrorTransactionNotJoined
+#define ERROR_TRANSACTION_SUPERIOR_EXISTS kNtErrorTransactionSuperiorExists
+#define ERROR_CRM_PROTOCOL_ALREADY_EXISTS kNtErrorCrmProtocolAlreadyExists
+#define ERROR_TRANSACTION_PROPAGATION_FAILED kNtErrorTransactionPropagationFailed
+#define ERROR_CRM_PROTOCOL_NOT_FOUND kNtErrorCrmProtocolNotFound
+#define ERROR_TRANSACTION_INVALID_MARSHALL_BUFFER kNtErrorTransactionInvalidMarshallBuffer
+#define ERROR_CURRENT_TRANSACTION_NOT_VALID kNtErrorCurrentTransactionNotValid
+#define ERROR_TRANSACTION_NOT_FOUND kNtErrorTransactionNotFound
+#define ERROR_RESOURCEMANAGER_NOT_FOUND kNtErrorResourcemanagerNotFound
+#define ERROR_ENLISTMENT_NOT_FOUND kNtErrorEnlistmentNotFound
+#define ERROR_TRANSACTIONMANAGER_NOT_FOUND kNtErrorTransactionmanagerNotFound
+#define ERROR_TRANSACTIONMANAGER_NOT_ONLINE kNtErrorTransactionmanagerNotOnline
+#define ERROR_TRANSACTIONMANAGER_RECOVERY_NAME_COLLISION kNtErrorTransactionmanagerRecoveryNameCollision
+#define ERROR_TRANSACTION_NOT_ROOT kNtErrorTransactionNotRoot
+#define ERROR_TRANSACTION_OBJECT_EXPIRED kNtErrorTransactionObjectExpired
+#define ERROR_TRANSACTION_RESPONSE_NOT_ENLISTED kNtErrorTransactionResponseNotEnlisted
+#define ERROR_TRANSACTION_RECORD_TOO_LONG kNtErrorTransactionRecordTooLong
+#define ERROR_IMPLICIT_TRANSACTION_NOT_SUPPORTED kNtErrorImplicitTransactionNotSupported
+#define ERROR_TRANSACTION_INTEGRITY_VIOLATED kNtErrorTransactionIntegrityViolated
+#define ERROR_TRANSACTIONMANAGER_IDENTITY_MISMATCH kNtErrorTransactionmanagerIdentityMismatch
+#define ERROR_RM_CANNOT_BE_FROZEN_FOR_SNAPSHOT kNtErrorRmCannotBeFrozenForSnapshot
+#define ERROR_TRANSACTION_MUST_WRITETHROUGH kNtErrorTransactionMustWritethrough
+#define ERROR_TRANSACTION_NO_SUPERIOR kNtErrorTransactionNoSuperior
+#define ERROR_HEURISTIC_DAMAGE_POSSIBLE kNtErrorHeuristicDamagePossible
+#define ERROR_TRANSACTIONAL_CONFLICT kNtErrorTransactionalConflict
+#define ERROR_RM_NOT_ACTIVE kNtErrorRmNotActive
+#define ERROR_RM_METADATA_CORRUPT kNtErrorRmMetadataCorrupt
+#define ERROR_DIRECTORY_NOT_RM kNtErrorDirectoryNotRm
+#define ERROR_TRANSACTIONS_UNSUPPORTED_REMOTE kNtErrorTransactionsUnsupportedRemote
+#define ERROR_LOG_RESIZE_INVALID_SIZE kNtErrorLogResizeInvalidSize
+#define ERROR_OBJECT_NO_LONGER_EXISTS kNtErrorObjectNoLongerExists
+#define ERROR_STREAM_MINIVERSION_NOT_FOUND kNtErrorStreamMiniversionNotFound
+#define ERROR_STREAM_MINIVERSION_NOT_VALID kNtErrorStreamMiniversionNotValid
+#define ERROR_MINIVERSION_INACCESSIBLE_FROM_SPECIFIED_TRANSACTION kNtErrorMiniversionInaccessibleFromSpecifiedTransaction
+#define ERROR_CANT_OPEN_MINIVERSION_WITH_MODIFY_INTENT kNtErrorCantOpenMiniversionWithModifyIntent
+#define ERROR_CANT_CREATE_MORE_STREAM_MINIVERSIONS kNtErrorCantCreateMoreStreamMiniversions
+#define ERROR_REMOTE_FILE_VERSION_MISMATCH kNtErrorRemoteFileVersionMismatch
+#define ERROR_HANDLE_NO_LONGER_VALID kNtErrorHandleNoLongerValid
+#define ERROR_NO_TXF_METADATA kNtErrorNoTxfMetadata
+#define ERROR_LOG_CORRUPTION_DETECTED kNtErrorLogCorruptionDetected
+#define ERROR_CANT_RECOVER_WITH_HANDLE_OPEN kNtErrorCantRecoverWithHandleOpen
+#define ERROR_RM_DISCONNECTED kNtErrorRmDisconnected
+#define ERROR_ENLISTMENT_NOT_SUPERIOR kNtErrorEnlistmentNotSuperior
+#define ERROR_RECOVERY_NOT_NEEDED kNtErrorRecoveryNotNeeded
+#define ERROR_RM_ALREADY_STARTED kNtErrorRmAlreadyStarted
+#define ERROR_FILE_IDENTITY_NOT_PERSISTENT kNtErrorFileIdentityNotPersistent
+#define ERROR_CANT_BREAK_TRANSACTIONAL_DEPENDENCY kNtErrorCantBreakTransactionalDependency
+#define ERROR_CANT_CROSS_RM_BOUNDARY kNtErrorCantCrossRmBoundary
+#define ERROR_TXF_DIR_NOT_EMPTY kNtErrorTxfDirNotEmpty
+#define ERROR_INDOUBT_TRANSACTIONS_EXIST kNtErrorIndoubtTransactionsExist
+#define ERROR_TM_VOLATILE kNtErrorTmVolatile
+#define ERROR_ROLLBACK_TIMER_EXPIRED kNtErrorRollbackTimerExpired
+#define ERROR_TXF_ATTRIBUTE_CORRUPT kNtErrorTxfAttributeCorrupt
+#define ERROR_EFS_NOT_ALLOWED_IN_TRANSACTION kNtErrorEfsNotAllowedInTransaction
+#define ERROR_TRANSACTIONAL_OPEN_NOT_ALLOWED kNtErrorTransactionalOpenNotAllowed
+#define ERROR_LOG_GROWTH_FAILED kNtErrorLogGrowthFailed
+#define ERROR_TRANSACTED_MAPPING_UNSUPPORTED_REMOTE kNtErrorTransactedMappingUnsupportedRemote
+#define ERROR_TXF_METADATA_ALREADY_PRESENT kNtErrorTxfMetadataAlreadyPresent
+#define ERROR_TRANSACTION_SCOPE_CALLBACKS_NOT_SET kNtErrorTransactionScopeCallbacksNotSet
+#define ERROR_TRANSACTION_REQUIRED_PROMOTION kNtErrorTransactionRequiredPromotion
+#define ERROR_CANNOT_EXECUTE_FILE_IN_TRANSACTION kNtErrorCannotExecuteFileInTransaction
+#define ERROR_TRANSACTIONS_NOT_FROZEN kNtErrorTransactionsNotFrozen
+#define ERROR_TRANSACTION_FREEZE_IN_PROGRESS kNtErrorTransactionFreezeInProgress
+#define ERROR_NOT_SNAPSHOT_VOLUME kNtErrorNotSnapshotVolume
+#define ERROR_NO_SAVEPOINT_WITH_OPEN_FILES kNtErrorNoSavepointWithOpenFiles
+#define ERROR_DATA_LOST_REPAIR kNtErrorDataLostRepair
+#define ERROR_SPARSE_NOT_ALLOWED_IN_TRANSACTION kNtErrorSparseNotAllowedInTransaction
+#define ERROR_TM_IDENTITY_MISMATCH kNtErrorTmIdentityMismatch
+#define ERROR_FLOATED_SECTION kNtErrorFloatedSection
+#define ERROR_CANNOT_ACCEPT_TRANSACTED_WORK kNtErrorCannotAcceptTransactedWork
+#define ERROR_CANNOT_ABORT_TRANSACTIONS kNtErrorCannotAbortTransactions
+#define ERROR_BAD_CLUSTERS kNtErrorBadClusters
+#define ERROR_COMPRESSION_NOT_ALLOWED_IN_TRANSACTION kNtErrorCompressionNotAllowedInTransaction
+#define ERROR_VOLUME_DIRTY kNtErrorVolumeDirty
+#define ERROR_NO_LINK_TRACKING_IN_TRANSACTION kNtErrorNoLinkTrackingInTransaction
+#define ERROR_OPERATION_NOT_SUPPORTED_IN_TRANSACTION kNtErrorOperationNotSupportedInTransaction
+#define ERROR_EXPIRED_HANDLE kNtErrorExpiredHandle
+#define ERROR_TRANSACTION_NOT_ENLISTED kNtErrorTransactionNotEnlisted
+#define ERROR_CTX_WINSTATION_NAME_INVALID kNtErrorCtxWinstationNameInvalid
+#define ERROR_CTX_INVALID_PD kNtErrorCtxInvalidPd
+#define ERROR_CTX_PD_NOT_FOUND kNtErrorCtxPdNotFound
+#define ERROR_CTX_WD_NOT_FOUND kNtErrorCtxWdNotFound
+#define ERROR_CTX_CANNOT_MAKE_EVENTLOG_ENTRY kNtErrorCtxCannotMakeEventlogEntry
+#define ERROR_CTX_SERVICE_NAME_COLLISION kNtErrorCtxServiceNameCollision
+#define ERROR_CTX_CLOSE_PENDING kNtErrorCtxClosePending
+#define ERROR_CTX_NO_OUTBUF kNtErrorCtxNoOutbuf
+#define ERROR_CTX_MODEM_INF_NOT_FOUND kNtErrorCtxModemInfNotFound
+#define ERROR_CTX_INVALID_MODEMNAME kNtErrorCtxInvalidModemname
+#define ERROR_CTX_MODEM_RESPONSE_ERROR kNtErrorCtxModemResponseError
+#define ERROR_CTX_MODEM_RESPONSE_TIMEOUT kNtErrorCtxModemResponseTimeout
+#define ERROR_CTX_MODEM_RESPONSE_NO_CARRIER kNtErrorCtxModemResponseNoCarrier
+#define ERROR_CTX_MODEM_RESPONSE_NO_DIALTONE kNtErrorCtxModemResponseNoDialtone
+#define ERROR_CTX_MODEM_RESPONSE_BUSY kNtErrorCtxModemResponseBusy
+#define ERROR_CTX_MODEM_RESPONSE_VOICE kNtErrorCtxModemResponseVoice
+#define ERROR_CTX_TD_ERROR kNtErrorCtxTdError
+#define ERROR_CTX_WINSTATION_NOT_FOUND kNtErrorCtxWinstationNotFound
+#define ERROR_CTX_WINSTATION_ALREADY_EXISTS kNtErrorCtxWinstationAlreadyExists
+#define ERROR_CTX_WINSTATION_BUSY kNtErrorCtxWinstationBusy
+#define ERROR_CTX_BAD_VIDEO_MODE kNtErrorCtxBadVideoMode
+#define ERROR_CTX_GRAPHICS_INVALID kNtErrorCtxGraphicsInvalid
+#define ERROR_CTX_LOGON_DISABLED kNtErrorCtxLogonDisabled
+#define ERROR_CTX_NOT_CONSOLE kNtErrorCtxNotConsole
+#define ERROR_CTX_CLIENT_QUERY_TIMEOUT kNtErrorCtxClientQueryTimeout
+#define ERROR_CTX_CONSOLE_DISCONNECT kNtErrorCtxConsoleDisconnect
+#define ERROR_CTX_CONSOLE_CONNECT kNtErrorCtxConsoleConnect
+#define ERROR_CTX_SHADOW_DENIED kNtErrorCtxShadowDenied
+#define ERROR_CTX_WINSTATION_ACCESS_DENIED kNtErrorCtxWinstationAccessDenied
+#define ERROR_CTX_INVALID_WD kNtErrorCtxInvalidWd
+#define ERROR_CTX_SHADOW_INVALID kNtErrorCtxShadowInvalid
+#define ERROR_CTX_SHADOW_DISABLED kNtErrorCtxShadowDisabled
+#define ERROR_CTX_CLIENT_LICENSE_IN_USE kNtErrorCtxClientLicenseInUse
+#define ERROR_CTX_CLIENT_LICENSE_NOT_SET kNtErrorCtxClientLicenseNotSet
+#define ERROR_CTX_LICENSE_NOT_AVAILABLE kNtErrorCtxLicenseNotAvailable
+#define ERROR_CTX_LICENSE_CLIENT_INVALID kNtErrorCtxLicenseClientInvalid
+#define ERROR_CTX_LICENSE_EXPIRED kNtErrorCtxLicenseExpired
+#define ERROR_CTX_SHADOW_NOT_RUNNING kNtErrorCtxShadowNotRunning
+#define ERROR_CTX_SHADOW_ENDED_BY_MODE_CHANGE kNtErrorCtxShadowEndedByModeChange
+#define ERROR_ACTIVATION_COUNT_EXCEEDED kNtErrorActivationCountExceeded
+#define ERROR_CTX_WINSTATIONS_DISABLED kNtErrorCtxWinstationsDisabled
+#define ERROR_CTX_ENCRYPTION_LEVEL_REQUIRED kNtErrorCtxEncryptionLevelRequired
+#define ERROR_CTX_SESSION_IN_USE kNtErrorCtxSessionInUse
+#define ERROR_CTX_NO_FORCE_LOGOFF kNtErrorCtxNoForceLogoff
+#define ERROR_CTX_ACCOUNT_RESTRICTION kNtErrorCtxAccountRestriction
+#define ERROR_RDP_PROTOCOL_ERROR kNtErrorRdpProtocolError
+#define ERROR_CTX_CDM_CONNECT kNtErrorCtxCdmConnect
+#define ERROR_CTX_CDM_DISCONNECT kNtErrorCtxCdmDisconnect
+#define ERROR_CTX_SECURITY_LAYER_ERROR kNtErrorCtxSecurityLayerError
+#define ERROR_TS_INCOMPATIBLE_SESSIONS kNtErrorTsIncompatibleSessions
+#define ERROR_TS_VIDEO_SUBSYSTEM_ERROR kNtErrorTsVideoSubsystemError
+#define ERROR_DS_NOT_INSTALLED kNtErrorDsNotInstalled
+#define ERROR_DS_MEMBERSHIP_EVALUATED_LOCALLY kNtErrorDsMembershipEvaluatedLocally
+#define ERROR_DS_NO_ATTRIBUTE_OR_VALUE kNtErrorDsNoAttributeOrValue
+#define ERROR_DS_INVALID_ATTRIBUTE_SYNTAX kNtErrorDsInvalidAttributeSyntax
+#define ERROR_DS_ATTRIBUTE_TYPE_UNDEFINED kNtErrorDsAttributeTypeUndefined
+#define ERROR_DS_ATTRIBUTE_OR_VALUE_EXISTS kNtErrorDsAttributeOrValueExists
+#define ERROR_DS_BUSY kNtErrorDsBusy
+#define ERROR_DS_UNAVAILABLE kNtErrorDsUnavailable
+#define ERROR_DS_NO_RIDS_ALLOCATED kNtErrorDsNoRidsAllocated
+#define ERROR_DS_NO_MORE_RIDS kNtErrorDsNoMoreRids
+#define ERROR_DS_INCORRECT_ROLE_OWNER kNtErrorDsIncorrectRoleOwner
+#define ERROR_DS_RIDMGR_INIT_ERROR kNtErrorDsRidmgrInitError
+#define ERROR_DS_OBJ_CLASS_VIOLATION kNtErrorDsObjClassViolation
+#define ERROR_DS_CANT_ON_NON_LEAF kNtErrorDsCantOnNonLeaf
+#define ERROR_DS_CANT_ON_RDN kNtErrorDsCantOnRdn
+#define ERROR_DS_CANT_MOD_OBJ_CLASS kNtErrorDsCantModObjClass
+#define ERROR_DS_CROSS_DOM_MOVE_ERROR kNtErrorDsCrossDomMoveError
+#define ERROR_DS_GC_NOT_AVAILABLE kNtErrorDsGcNotAvailable
+#define ERROR_SHARED_POLICY kNtErrorSharedPolicy
+#define ERROR_POLICY_OBJECT_NOT_FOUND kNtErrorPolicyObjectNotFound
+#define ERROR_POLICY_ONLY_IN_DS kNtErrorPolicyOnlyInDs
+#define ERROR_PROMOTION_ACTIVE kNtErrorPromotionActive
+#define ERROR_NO_PROMOTION_ACTIVE kNtErrorNoPromotionActive
+#define ERROR_DS_OPERATIONS_ERROR kNtErrorDsOperationsError
+#define ERROR_DS_PROTOCOL_ERROR kNtErrorDsProtocolError
+#define ERROR_DS_TIMELIMIT_EXCEEDED kNtErrorDsTimelimitExceeded
+#define ERROR_DS_SIZELIMIT_EXCEEDED kNtErrorDsSizelimitExceeded
+#define ERROR_DS_ADMIN_LIMIT_EXCEEDED kNtErrorDsAdminLimitExceeded
+#define ERROR_DS_COMPARE_FALSE kNtErrorDsCompareFalse
+#define ERROR_DS_COMPARE_TRUE kNtErrorDsCompareTrue
+#define ERROR_DS_AUTH_METHOD_NOT_SUPPORTED kNtErrorDsAuthMethodNotSupported
+#define ERROR_DS_STRONG_AUTH_REQUIRED kNtErrorDsStrongAuthRequired
+#define ERROR_DS_INAPPROPRIATE_AUTH kNtErrorDsInappropriateAuth
+#define ERROR_DS_AUTH_UNKNOWN kNtErrorDsAuthUnknown
+#define ERROR_DS_REFERRAL kNtErrorDsReferral
+#define ERROR_DS_UNAVAILABLE_CRIT_EXTENSION kNtErrorDsUnavailableCritExtension
+#define ERROR_DS_CONFIDENTIALITY_REQUIRED kNtErrorDsConfidentialityRequired
+#define ERROR_DS_INAPPROPRIATE_MATCHING kNtErrorDsInappropriateMatching
+#define ERROR_DS_CONSTRAINT_VIOLATION kNtErrorDsConstraintViolation
+#define ERROR_DS_NO_SUCH_OBJECT kNtErrorDsNoSuchObject
+#define ERROR_DS_ALIAS_PROBLEM kNtErrorDsAliasProblem
+#define ERROR_DS_INVALID_DN_SYNTAX kNtErrorDsInvalidDnSyntax
+#define ERROR_DS_IS_LEAF kNtErrorDsIsLeaf
+#define ERROR_DS_ALIAS_DEREF_PROBLEM kNtErrorDsAliasDerefProblem
+#define ERROR_DS_UNWILLING_TO_PERFORM kNtErrorDsUnwillingToPerform
+#define ERROR_DS_LOOP_DETECT kNtErrorDsLoopDetect
+#define ERROR_DS_NAMING_VIOLATION kNtErrorDsNamingViolation
+#define ERROR_DS_OBJECT_RESULTS_TOO_LARGE kNtErrorDsObjectResultsTooLarge
+#define ERROR_DS_AFFECTS_MULTIPLE_DSAS kNtErrorDsAffectsMultipleDsas
+#define ERROR_DS_SERVER_DOWN kNtErrorDsServerDown
+#define ERROR_DS_LOCAL_ERROR kNtErrorDsLocalError
+#define ERROR_DS_ENCODING_ERROR kNtErrorDsEncodingError
+#define ERROR_DS_DECODING_ERROR kNtErrorDsDecodingError
+#define ERROR_DS_FILTER_UNKNOWN kNtErrorDsFilterUnknown
+#define ERROR_DS_PARAM_ERROR kNtErrorDsParamError
+#define ERROR_DS_NOT_SUPPORTED kNtErrorDsNotSupported
+#define ERROR_DS_NO_RESULTS_RETURNED kNtErrorDsNoResultsReturned
+#define ERROR_DS_CONTROL_NOT_FOUND kNtErrorDsControlNotFound
+#define ERROR_DS_CLIENT_LOOP kNtErrorDsClientLoop
+#define ERROR_DS_REFERRAL_LIMIT_EXCEEDED kNtErrorDsReferralLimitExceeded
+#define ERROR_DS_SORT_CONTROL_MISSING kNtErrorDsSortControlMissing
+#define ERROR_DS_OFFSET_RANGE_ERROR kNtErrorDsOffsetRangeError
+#define ERROR_DS_RIDMGR_DISABLED kNtErrorDsRidmgrDisabled
+#define ERROR_DS_ROOT_MUST_BE_NC kNtErrorDsRootMustBeNc
+#define ERROR_DS_ADD_REPLICA_INHIBITED kNtErrorDsAddReplicaInhibited
+#define ERROR_DS_ATT_NOT_DEF_IN_SCHEMA kNtErrorDsAttNotDefInSchema
+#define ERROR_DS_MAX_OBJ_SIZE_EXCEEDED kNtErrorDsMaxObjSizeExceeded
+#define ERROR_DS_OBJ_STRING_NAME_EXISTS kNtErrorDsObjStringNameExists
+#define ERROR_DS_NO_RDN_DEFINED_IN_SCHEMA kNtErrorDsNoRdnDefinedInSchema
+#define ERROR_DS_RDN_DOESNT_MATCH_SCHEMA kNtErrorDsRdnDoesntMatchSchema
+#define ERROR_DS_NO_REQUESTED_ATTS_FOUND kNtErrorDsNoRequestedAttsFound
+#define ERROR_DS_USER_BUFFER_TO_SMALL kNtErrorDsUserBufferToSmall
+#define ERROR_DS_ATT_IS_NOT_ON_OBJ kNtErrorDsAttIsNotOnObj
+#define ERROR_DS_ILLEGAL_MOD_OPERATION kNtErrorDsIllegalModOperation
+#define ERROR_DS_OBJ_TOO_LARGE kNtErrorDsObjTooLarge
+#define ERROR_DS_BAD_INSTANCE_TYPE kNtErrorDsBadInstanceType
+#define ERROR_DS_MASTERDSA_REQUIRED kNtErrorDsMasterdsaRequired
+#define ERROR_DS_OBJECT_CLASS_REQUIRED kNtErrorDsObjectClassRequired
+#define ERROR_DS_MISSING_REQUIRED_ATT kNtErrorDsMissingRequiredAtt
+#define ERROR_DS_ATT_NOT_DEF_FOR_CLASS kNtErrorDsAttNotDefForClass
+#define ERROR_DS_ATT_ALREADY_EXISTS kNtErrorDsAttAlreadyExists
+#define ERROR_DS_CANT_ADD_ATT_VALUES kNtErrorDsCantAddAttValues
+#define ERROR_DS_SINGLE_VALUE_CONSTRAINT kNtErrorDsSingleValueConstraint
+#define ERROR_DS_RANGE_CONSTRAINT kNtErrorDsRangeConstraint
+#define ERROR_DS_ATT_VAL_ALREADY_EXISTS kNtErrorDsAttValAlreadyExists
+#define ERROR_DS_CANT_REM_MISSING_ATT kNtErrorDsCantRemMissingAtt
+#define ERROR_DS_CANT_REM_MISSING_ATT_VAL kNtErrorDsCantRemMissingAttVal
+#define ERROR_DS_ROOT_CANT_BE_SUBREF kNtErrorDsRootCantBeSubref
+#define ERROR_DS_NO_CHAINING kNtErrorDsNoChaining
+#define ERROR_DS_NO_CHAINED_EVAL kNtErrorDsNoChainedEval
+#define ERROR_DS_NO_PARENT_OBJECT kNtErrorDsNoParentObject
+#define ERROR_DS_PARENT_IS_AN_ALIAS kNtErrorDsParentIsAnAlias
+#define ERROR_DS_CANT_MIX_MASTER_AND_REPS kNtErrorDsCantMixMasterAndReps
+#define ERROR_DS_CHILDREN_EXIST kNtErrorDsChildrenExist
+#define ERROR_DS_OBJ_NOT_FOUND kNtErrorDsObjNotFound
+#define ERROR_DS_ALIASED_OBJ_MISSING kNtErrorDsAliasedObjMissing
+#define ERROR_DS_BAD_NAME_SYNTAX kNtErrorDsBadNameSyntax
+#define ERROR_DS_ALIAS_POINTS_TO_ALIAS kNtErrorDsAliasPointsToAlias
+#define ERROR_DS_CANT_DEREF_ALIAS kNtErrorDsCantDerefAlias
+#define ERROR_DS_OUT_OF_SCOPE kNtErrorDsOutOfScope
+#define ERROR_DS_OBJECT_BEING_REMOVED kNtErrorDsObjectBeingRemoved
+#define ERROR_DS_CANT_DELETE_DSA_OBJ kNtErrorDsCantDeleteDsaObj
+#define ERROR_DS_GENERIC_ERROR kNtErrorDsGenericError
+#define ERROR_DS_DSA_MUST_BE_INT_MASTER kNtErrorDsDsaMustBeIntMaster
+#define ERROR_DS_CLASS_NOT_DSA kNtErrorDsClassNotDsa
+#define ERROR_DS_INSUFF_ACCESS_RIGHTS kNtErrorDsInsuffAccessRights
+#define ERROR_DS_ILLEGAL_SUPERIOR kNtErrorDsIllegalSuperior
+#define ERROR_DS_ATTRIBUTE_OWNED_BY_SAM kNtErrorDsAttributeOwnedBySam
+#define ERROR_DS_NAME_TOO_MANY_PARTS kNtErrorDsNameTooManyParts
+#define ERROR_DS_NAME_TOO_LONG kNtErrorDsNameTooLong
+#define ERROR_DS_NAME_VALUE_TOO_LONG kNtErrorDsNameValueTooLong
+#define ERROR_DS_NAME_UNPARSEABLE kNtErrorDsNameUnparseable
+#define ERROR_DS_NAME_TYPE_UNKNOWN kNtErrorDsNameTypeUnknown
+#define ERROR_DS_NOT_AN_OBJECT kNtErrorDsNotAnObject
+#define ERROR_DS_SEC_DESC_TOO_SHORT kNtErrorDsSecDescTooShort
+#define ERROR_DS_SEC_DESC_INVALID kNtErrorDsSecDescInvalid
+#define ERROR_DS_NO_DELETED_NAME kNtErrorDsNoDeletedName
+#define ERROR_DS_SUBREF_MUST_HAVE_PARENT kNtErrorDsSubrefMustHaveParent
+#define ERROR_DS_NCNAME_MUST_BE_NC kNtErrorDsNcnameMustBeNc
+#define ERROR_DS_CANT_ADD_SYSTEM_ONLY kNtErrorDsCantAddSystemOnly
+#define ERROR_DS_CLASS_MUST_BE_CONCRETE kNtErrorDsClassMustBeConcrete
+#define ERROR_DS_INVALID_DMD kNtErrorDsInvalidDmd
+#define ERROR_DS_OBJ_GUID_EXISTS kNtErrorDsObjGuidExists
+#define ERROR_DS_NOT_ON_BACKLINK kNtErrorDsNotOnBacklink
+#define ERROR_DS_NO_CROSSREF_FOR_NC kNtErrorDsNoCrossrefForNc
+#define ERROR_DS_SHUTTING_DOWN kNtErrorDsShuttingDown
+#define ERROR_DS_UNKNOWN_OPERATION kNtErrorDsUnknownOperation
+#define ERROR_DS_INVALID_ROLE_OWNER kNtErrorDsInvalidRoleOwner
+#define ERROR_DS_COULDNT_CONTACT_FSMO kNtErrorDsCouldntContactFsmo
+#define ERROR_DS_CROSS_NC_DN_RENAME kNtErrorDsCrossNcDnRename
+#define ERROR_DS_CANT_MOD_SYSTEM_ONLY kNtErrorDsCantModSystemOnly
+#define ERROR_DS_REPLICATOR_ONLY kNtErrorDsReplicatorOnly
+#define ERROR_DS_OBJ_CLASS_NOT_DEFINED kNtErrorDsObjClassNotDefined
+#define ERROR_DS_OBJ_CLASS_NOT_SUBCLASS kNtErrorDsObjClassNotSubclass
+#define ERROR_DS_NAME_REFERENCE_INVALID kNtErrorDsNameReferenceInvalid
+#define ERROR_DS_CROSS_REF_EXISTS kNtErrorDsCrossRefExists
+#define ERROR_DS_CANT_DEL_MASTER_CROSSREF kNtErrorDsCantDelMasterCrossref
+#define ERROR_DS_SUBTREE_NOTIFY_NOT_NC_HEAD kNtErrorDsSubtreeNotifyNotNcHead
+#define ERROR_DS_NOTIFY_FILTER_TOO_COMPLEX kNtErrorDsNotifyFilterTooComplex
+#define ERROR_DS_DUP_RDN kNtErrorDsDupRdn
+#define ERROR_DS_DUP_OID kNtErrorDsDupOid
+#define ERROR_DS_DUP_MAPI_ID kNtErrorDsDupMapiId
+#define ERROR_DS_DUP_SCHEMA_ID_GUID kNtErrorDsDupSchemaIdGuid
+#define ERROR_DS_DUP_LDAP_DISPLAY_NAME kNtErrorDsDupLdapDisplayName
+#define ERROR_DS_SEMANTIC_ATT_TEST kNtErrorDsSemanticAttTest
+#define ERROR_DS_SYNTAX_MISMATCH kNtErrorDsSyntaxMismatch
+#define ERROR_DS_EXISTS_IN_MUST_HAVE kNtErrorDsExistsInMustHave
+#define ERROR_DS_EXISTS_IN_MAY_HAVE kNtErrorDsExistsInMayHave
+#define ERROR_DS_NONEXISTENT_MAY_HAVE kNtErrorDsNonexistentMayHave
+#define ERROR_DS_NONEXISTENT_MUST_HAVE kNtErrorDsNonexistentMustHave
+#define ERROR_DS_AUX_CLS_TEST_FAIL kNtErrorDsAuxClsTestFail
+#define ERROR_DS_NONEXISTENT_POSS_SUP kNtErrorDsNonexistentPossSup
+#define ERROR_DS_SUB_CLS_TEST_FAIL kNtErrorDsSubClsTestFail
+#define ERROR_DS_BAD_RDN_ATT_ID_SYNTAX kNtErrorDsBadRdnAttIdSyntax
+#define ERROR_DS_EXISTS_IN_AUX_CLS kNtErrorDsExistsInAuxCls
+#define ERROR_DS_EXISTS_IN_SUB_CLS kNtErrorDsExistsInSubCls
+#define ERROR_DS_EXISTS_IN_POSS_SUP kNtErrorDsExistsInPossSup
+#define ERROR_DS_RECALCSCHEMA_FAILED kNtErrorDsRecalcschemaFailed
+#define ERROR_DS_TREE_DELETE_NOT_FINISHED kNtErrorDsTreeDeleteNotFinished
+#define ERROR_DS_CANT_DELETE kNtErrorDsCantDelete
+#define ERROR_DS_ATT_SCHEMA_REQ_ID kNtErrorDsAttSchemaReqId
+#define ERROR_DS_BAD_ATT_SCHEMA_SYNTAX kNtErrorDsBadAttSchemaSyntax
+#define ERROR_DS_CANT_CACHE_ATT kNtErrorDsCantCacheAtt
+#define ERROR_DS_CANT_CACHE_CLASS kNtErrorDsCantCacheClass
+#define ERROR_DS_CANT_REMOVE_ATT_CACHE kNtErrorDsCantRemoveAttCache
+#define ERROR_DS_CANT_REMOVE_CLASS_CACHE kNtErrorDsCantRemoveClassCache
+#define ERROR_DS_CANT_RETRIEVE_DN kNtErrorDsCantRetrieveDn
+#define ERROR_DS_MISSING_SUPREF kNtErrorDsMissingSupref
+#define ERROR_DS_CANT_RETRIEVE_INSTANCE kNtErrorDsCantRetrieveInstance
+#define ERROR_DS_CODE_INCONSISTENCY kNtErrorDsCodeInconsistency
+#define ERROR_DS_DATABASE_ERROR kNtErrorDsDatabaseError
+#define ERROR_DS_GOVERNSID_MISSING kNtErrorDsGovernsidMissing
+#define ERROR_DS_MISSING_EXPECTED_ATT kNtErrorDsMissingExpectedAtt
+#define ERROR_DS_NCNAME_MISSING_CR_REF kNtErrorDsNcnameMissingCrRef
+#define ERROR_DS_SECURITY_CHECKING_ERROR kNtErrorDsSecurityCheckingError
+#define ERROR_DS_SCHEMA_NOT_LOADED kNtErrorDsSchemaNotLoaded
+#define ERROR_DS_SCHEMA_ALLOC_FAILED kNtErrorDsSchemaAllocFailed
+#define ERROR_DS_ATT_SCHEMA_REQ_SYNTAX kNtErrorDsAttSchemaReqSyntax
+#define ERROR_DS_GCVERIFY_ERROR kNtErrorDsGcverifyError
+#define ERROR_DS_DRA_SCHEMA_MISMATCH kNtErrorDsDraSchemaMismatch
+#define ERROR_DS_CANT_FIND_DSA_OBJ kNtErrorDsCantFindDsaObj
+#define ERROR_DS_CANT_FIND_EXPECTED_NC kNtErrorDsCantFindExpectedNc
+#define ERROR_DS_CANT_FIND_NC_IN_CACHE kNtErrorDsCantFindNcInCache
+#define ERROR_DS_CANT_RETRIEVE_CHILD kNtErrorDsCantRetrieveChild
+#define ERROR_DS_SECURITY_ILLEGAL_MODIFY kNtErrorDsSecurityIllegalModify
+#define ERROR_DS_CANT_REPLACE_HIDDEN_REC kNtErrorDsCantReplaceHiddenRec
+#define ERROR_DS_BAD_HIERARCHY_FILE kNtErrorDsBadHierarchyFile
+#define ERROR_DS_BUILD_HIERARCHY_TABLE_FAILED kNtErrorDsBuildHierarchyTableFailed
+#define ERROR_DS_CONFIG_PARAM_MISSING kNtErrorDsConfigParamMissing
+#define ERROR_DS_COUNTING_AB_INDICES_FAILED kNtErrorDsCountingAbIndicesFailed
+#define ERROR_DS_HIERARCHY_TABLE_MALLOC_FAILED kNtErrorDsHierarchyTableMallocFailed
+#define ERROR_DS_INTERNAL_FAILURE kNtErrorDsInternalFailure
+#define ERROR_DS_UNKNOWN_ERROR kNtErrorDsUnknownError
+#define ERROR_DS_ROOT_REQUIRES_CLASS_TOP kNtErrorDsRootRequiresClassTop
+#define ERROR_DS_REFUSING_FSMO_ROLES kNtErrorDsRefusingFsmoRoles
+#define ERROR_DS_MISSING_FSMO_SETTINGS kNtErrorDsMissingFsmoSettings
+#define ERROR_DS_UNABLE_TO_SURRENDER_ROLES kNtErrorDsUnableToSurrenderRoles
+#define ERROR_DS_DRA_GENERIC kNtErrorDsDraGeneric
+#define ERROR_DS_DRA_INVALID_PARAMETER kNtErrorDsDraInvalidParameter
+#define ERROR_DS_DRA_BUSY kNtErrorDsDraBusy
+#define ERROR_DS_DRA_BAD_DN kNtErrorDsDraBadDn
+#define ERROR_DS_DRA_BAD_NC kNtErrorDsDraBadNc
+#define ERROR_DS_DRA_DN_EXISTS kNtErrorDsDraDnExists
+#define ERROR_DS_DRA_INTERNAL_ERROR kNtErrorDsDraInternalError
+#define ERROR_DS_DRA_INCONSISTENT_DIT kNtErrorDsDraInconsistentDit
+#define ERROR_DS_DRA_CONNECTION_FAILED kNtErrorDsDraConnectionFailed
+#define ERROR_DS_DRA_BAD_INSTANCE_TYPE kNtErrorDsDraBadInstanceType
+#define ERROR_DS_DRA_OUT_OF_MEM kNtErrorDsDraOutOfMem
+#define ERROR_DS_DRA_MAIL_PROBLEM kNtErrorDsDraMailProblem
+#define ERROR_DS_DRA_REF_ALREADY_EXISTS kNtErrorDsDraRefAlreadyExists
+#define ERROR_DS_DRA_REF_NOT_FOUND kNtErrorDsDraRefNotFound
+#define ERROR_DS_DRA_OBJ_IS_REP_SOURCE kNtErrorDsDraObjIsRepSource
+#define ERROR_DS_DRA_DB_ERROR kNtErrorDsDraDbError
+#define ERROR_DS_DRA_NO_REPLICA kNtErrorDsDraNoReplica
+#define ERROR_DS_DRA_ACCESS_DENIED kNtErrorDsDraAccessDenied
+#define ERROR_DS_DRA_NOT_SUPPORTED kNtErrorDsDraNotSupported
+#define ERROR_DS_DRA_RPC_CANCELLED kNtErrorDsDraRpcCancelled
+#define ERROR_DS_DRA_SOURCE_DISABLED kNtErrorDsDraSourceDisabled
+#define ERROR_DS_DRA_SINK_DISABLED kNtErrorDsDraSinkDisabled
+#define ERROR_DS_DRA_NAME_COLLISION kNtErrorDsDraNameCollision
+#define ERROR_DS_DRA_SOURCE_REINSTALLED kNtErrorDsDraSourceReinstalled
+#define ERROR_DS_DRA_MISSING_PARENT kNtErrorDsDraMissingParent
+#define ERROR_DS_DRA_PREEMPTED kNtErrorDsDraPreempted
+#define ERROR_DS_DRA_ABANDON_SYNC kNtErrorDsDraAbandonSync
+#define ERROR_DS_DRA_SHUTDOWN kNtErrorDsDraShutdown
+#define ERROR_DS_DRA_INCOMPATIBLE_PARTIAL_SET kNtErrorDsDraIncompatiblePartialSet
+#define ERROR_DS_DRA_SOURCE_IS_PARTIAL_REPLICA kNtErrorDsDraSourceIsPartialReplica
+#define ERROR_DS_DRA_EXTN_CONNECTION_FAILED kNtErrorDsDraExtnConnectionFailed
+#define ERROR_DS_INSTALL_SCHEMA_MISMATCH kNtErrorDsInstallSchemaMismatch
+#define ERROR_DS_DUP_LINK_ID kNtErrorDsDupLinkId
+#define ERROR_DS_NAME_ERROR_RESOLVING kNtErrorDsNameErrorResolving
+#define ERROR_DS_NAME_ERROR_NOT_FOUND kNtErrorDsNameErrorNotFound
+#define ERROR_DS_NAME_ERROR_NOT_UNIQUE kNtErrorDsNameErrorNotUnique
+#define ERROR_DS_NAME_ERROR_NO_MAPPING kNtErrorDsNameErrorNoMapping
+#define ERROR_DS_NAME_ERROR_DOMAIN_ONLY kNtErrorDsNameErrorDomainOnly
+#define ERROR_DS_NAME_ERROR_NO_SYNTACTICAL_MAPPING kNtErrorDsNameErrorNoSyntacticalMapping
+#define ERROR_DS_CONSTRUCTED_ATT_MOD kNtErrorDsConstructedAttMod
+#define ERROR_DS_WRONG_OM_OBJ_CLASS kNtErrorDsWrongOmObjClass
+#define ERROR_DS_DRA_REPL_PENDING kNtErrorDsDraReplPending
+#define ERROR_DS_DS_REQUIRED kNtErrorDsDsRequired
+#define ERROR_DS_INVALID_LDAP_DISPLAY_NAME kNtErrorDsInvalidLdapDisplayName
+#define ERROR_DS_NON_BASE_SEARCH kNtErrorDsNonBaseSearch
+#define ERROR_DS_CANT_RETRIEVE_ATTS kNtErrorDsCantRetrieveAtts
+#define ERROR_DS_BACKLINK_WITHOUT_LINK kNtErrorDsBacklinkWithoutLink
+#define ERROR_DS_EPOCH_MISMATCH kNtErrorDsEpochMismatch
+#define ERROR_DS_SRC_NAME_MISMATCH kNtErrorDsSrcNameMismatch
+#define ERROR_DS_SRC_AND_DST_NC_IDENTICAL kNtErrorDsSrcAndDstNcIdentical
+#define ERROR_DS_DST_NC_MISMATCH kNtErrorDsDstNcMismatch
+#define ERROR_DS_NOT_AUTHORITIVE_FOR_DST_NC kNtErrorDsNotAuthoritiveForDstNc
+#define ERROR_DS_SRC_GUID_MISMATCH kNtErrorDsSrcGuidMismatch
+#define ERROR_DS_CANT_MOVE_DELETED_OBJECT kNtErrorDsCantMoveDeletedObject
+#define ERROR_DS_PDC_OPERATION_IN_PROGRESS kNtErrorDsPdcOperationInProgress
+#define ERROR_DS_CROSS_DOMAIN_CLEANUP_REQD kNtErrorDsCrossDomainCleanupReqd
+#define ERROR_DS_ILLEGAL_XDOM_MOVE_OPERATION kNtErrorDsIllegalXdomMoveOperation
+#define ERROR_DS_CANT_WITH_ACCT_GROUP_MEMBERSHPS kNtErrorDsCantWithAcctGroupMembershps
+#define ERROR_DS_NC_MUST_HAVE_NC_PARENT kNtErrorDsNcMustHaveNcParent
+#define ERROR_DS_CR_IMPOSSIBLE_TO_VALIDATE kNtErrorDsCrImpossibleToValidate
+#define ERROR_DS_DST_DOMAIN_NOT_NATIVE kNtErrorDsDstDomainNotNative
+#define ERROR_DS_MISSING_INFRASTRUCTURE_CONTAINER kNtErrorDsMissingInfrastructureContainer
+#define ERROR_DS_CANT_MOVE_ACCOUNT_GROUP kNtErrorDsCantMoveAccountGroup
+#define ERROR_DS_CANT_MOVE_RESOURCE_GROUP kNtErrorDsCantMoveResourceGroup
+#define ERROR_DS_INVALID_SEARCH_FLAG kNtErrorDsInvalidSearchFlag
+#define ERROR_DS_NO_TREE_DELETE_ABOVE_NC kNtErrorDsNoTreeDeleteAboveNc
+#define ERROR_DS_COULDNT_LOCK_TREE_FOR_DELETE kNtErrorDsCouldntLockTreeForDelete
+#define ERROR_DS_COULDNT_IDENTIFY_OBJECTS_FOR_TREE_DELETE kNtErrorDsCouldntIdentifyObjectsForTreeDelete
+#define ERROR_DS_SAM_INIT_FAILURE kNtErrorDsSamInitFailure
+#define ERROR_DS_SENSITIVE_GROUP_VIOLATION kNtErrorDsSensitiveGroupViolation
+#define ERROR_DS_CANT_MOD_PRIMARYGROUPID kNtErrorDsCantModPrimarygroupid
+#define ERROR_DS_ILLEGAL_BASE_SCHEMA_MOD kNtErrorDsIllegalBaseSchemaMod
+#define ERROR_DS_NONSAFE_SCHEMA_CHANGE kNtErrorDsNonsafeSchemaChange
+#define ERROR_DS_SCHEMA_UPDATE_DISALLOWED kNtErrorDsSchemaUpdateDisallowed
+#define ERROR_DS_CANT_CREATE_UNDER_SCHEMA kNtErrorDsCantCreateUnderSchema
+#define ERROR_DS_INSTALL_NO_SRC_SCH_VERSION kNtErrorDsInstallNoSrcSchVersion
+#define ERROR_DS_INSTALL_NO_SCH_VERSION_IN_INIFILE kNtErrorDsInstallNoSchVersionInInifile
+#define ERROR_DS_INVALID_GROUP_TYPE kNtErrorDsInvalidGroupType
+#define ERROR_DS_NO_NEST_GLOBALGROUP_IN_MIXEDDOMAIN kNtErrorDsNoNestGlobalgroupInMixeddomain
+#define ERROR_DS_NO_NEST_LOCALGROUP_IN_MIXEDDOMAIN kNtErrorDsNoNestLocalgroupInMixeddomain
+#define ERROR_DS_GLOBAL_CANT_HAVE_LOCAL_MEMBER kNtErrorDsGlobalCantHaveLocalMember
+#define ERROR_DS_GLOBAL_CANT_HAVE_UNIVERSAL_MEMBER kNtErrorDsGlobalCantHaveUniversalMember
+#define ERROR_DS_UNIVERSAL_CANT_HAVE_LOCAL_MEMBER kNtErrorDsUniversalCantHaveLocalMember
+#define ERROR_DS_GLOBAL_CANT_HAVE_CROSSDOMAIN_MEMBER kNtErrorDsGlobalCantHaveCrossdomainMember
+#define ERROR_DS_LOCAL_CANT_HAVE_CROSSDOMAIN_LOCAL_MEMBER kNtErrorDsLocalCantHaveCrossdomainLocalMember
+#define ERROR_DS_HAVE_PRIMARY_MEMBERS kNtErrorDsHavePrimaryMembers
+#define ERROR_DS_STRING_SD_CONVERSION_FAILED kNtErrorDsStringSdConversionFailed
+#define ERROR_DS_NAMING_MASTER_GC kNtErrorDsNamingMasterGc
+#define ERROR_DS_DNS_LOOKUP_FAILURE kNtErrorDsDnsLookupFailure
+#define ERROR_DS_COULDNT_UPDATE_SPNS kNtErrorDsCouldntUpdateSpns
+#define ERROR_DS_CANT_RETRIEVE_SD kNtErrorDsCantRetrieveSd
+#define ERROR_DS_KEY_NOT_UNIQUE kNtErrorDsKeyNotUnique
+#define ERROR_DS_WRONG_LINKED_ATT_SYNTAX kNtErrorDsWrongLinkedAttSyntax
+#define ERROR_DS_SAM_NEED_BOOTKEY_PASSWORD kNtErrorDsSamNeedBootkeyPassword
+#define ERROR_DS_SAM_NEED_BOOTKEY_FLOPPY kNtErrorDsSamNeedBootkeyFloppy
+#define ERROR_DS_CANT_START kNtErrorDsCantStart
+#define ERROR_DS_INIT_FAILURE kNtErrorDsInitFailure
+#define ERROR_DS_NO_PKT_PRIVACY_ON_CONNECTION kNtErrorDsNoPktPrivacyOnConnection
+#define ERROR_DS_SOURCE_DOMAIN_IN_FOREST kNtErrorDsSourceDomainInForest
+#define ERROR_DS_DESTINATION_DOMAIN_NOT_IN_FOREST kNtErrorDsDestinationDomainNotInForest
+#define ERROR_DS_DESTINATION_AUDITING_NOT_ENABLED kNtErrorDsDestinationAuditingNotEnabled
+#define ERROR_DS_CANT_FIND_DC_FOR_SRC_DOMAIN kNtErrorDsCantFindDcForSrcDomain
+#define ERROR_DS_SRC_OBJ_NOT_GROUP_OR_USER kNtErrorDsSrcObjNotGroupOrUser
+#define ERROR_DS_SRC_SID_EXISTS_IN_FOREST kNtErrorDsSrcSidExistsInForest
+#define ERROR_DS_SRC_AND_DST_OBJECT_CLASS_MISMATCH kNtErrorDsSrcAndDstObjectClassMismatch
+#define ERROR_SAM_INIT_FAILURE kNtErrorSamInitFailure
+#define ERROR_DS_DRA_SCHEMA_INFO_SHIP kNtErrorDsDraSchemaInfoShip
+#define ERROR_DS_DRA_SCHEMA_CONFLICT kNtErrorDsDraSchemaConflict
+#define ERROR_DS_DRA_EARLIER_SCHEMA_CONFLICT kNtErrorDsDraEarlierSchemaConflict
+#define ERROR_DS_DRA_OBJ_NC_MISMATCH kNtErrorDsDraObjNcMismatch
+#define ERROR_DS_NC_STILL_HAS_DSAS kNtErrorDsNcStillHasDsas
+#define ERROR_DS_GC_REQUIRED kNtErrorDsGcRequired
+#define ERROR_DS_LOCAL_MEMBER_OF_LOCAL_ONLY kNtErrorDsLocalMemberOfLocalOnly
+#define ERROR_DS_NO_FPO_IN_UNIVERSAL_GROUPS kNtErrorDsNoFpoInUniversalGroups
+#define ERROR_DS_CANT_ADD_TO_GC kNtErrorDsCantAddToGc
+#define ERROR_DS_NO_CHECKPOINT_WITH_PDC kNtErrorDsNoCheckpointWithPdc
+#define ERROR_DS_SOURCE_AUDITING_NOT_ENABLED kNtErrorDsSourceAuditingNotEnabled
+#define ERROR_DS_CANT_CREATE_IN_NONDOMAIN_NC kNtErrorDsCantCreateInNondomainNc
+#define ERROR_DS_INVALID_NAME_FOR_SPN kNtErrorDsInvalidNameForSpn
+#define ERROR_DS_FILTER_USES_CONTRUCTED_ATTRS kNtErrorDsFilterUsesContructedAttrs
+#define ERROR_DS_UNICODEPWD_NOT_IN_QUOTES kNtErrorDsUnicodepwdNotInQuotes
+#define ERROR_DS_MACHINE_ACCOUNT_QUOTA_EXCEEDED kNtErrorDsMachineAccountQuotaExceeded
+#define ERROR_DS_MUST_BE_RUN_ON_DST_DC kNtErrorDsMustBeRunOnDstDc
+#define ERROR_DS_SRC_DC_MUST_BE_SP4_OR_GREATER kNtErrorDsSrcDcMustBeSp4OrGreater
+#define ERROR_DS_CANT_TREE_DELETE_CRITICAL_OBJ kNtErrorDsCantTreeDeleteCriticalObj
+#define ERROR_DS_INIT_FAILURE_CONSOLE kNtErrorDsInitFailureConsole
+#define ERROR_DS_SAM_INIT_FAILURE_CONSOLE kNtErrorDsSamInitFailureConsole
+#define ERROR_DS_FOREST_VERSION_TOO_HIGH kNtErrorDsForestVersionTooHigh
+#define ERROR_DS_DOMAIN_VERSION_TOO_HIGH kNtErrorDsDomainVersionTooHigh
+#define ERROR_DS_FOREST_VERSION_TOO_LOW kNtErrorDsForestVersionTooLow
+#define ERROR_DS_DOMAIN_VERSION_TOO_LOW kNtErrorDsDomainVersionTooLow
+#define ERROR_DS_INCOMPATIBLE_VERSION kNtErrorDsIncompatibleVersion
+#define ERROR_DS_LOW_DSA_VERSION kNtErrorDsLowDsaVersion
+#define ERROR_DS_NO_BEHAVIOR_VERSION_IN_MIXEDDOMAIN kNtErrorDsNoBehaviorVersionInMixeddomain
+#define ERROR_DS_NOT_SUPPORTED_SORT_ORDER kNtErrorDsNotSupportedSortOrder
+#define ERROR_DS_NAME_NOT_UNIQUE kNtErrorDsNameNotUnique
+#define ERROR_DS_MACHINE_ACCOUNT_CREATED_PRENT4 kNtErrorDsMachineAccountCreatedPrent4
+#define ERROR_DS_OUT_OF_VERSION_STORE kNtErrorDsOutOfVersionStore
+#define ERROR_DS_INCOMPATIBLE_CONTROLS_USED kNtErrorDsIncompatibleControlsUsed
+#define ERROR_DS_NO_REF_DOMAIN kNtErrorDsNoRefDomain
+#define ERROR_DS_RESERVED_LINK_ID kNtErrorDsReservedLinkId
+#define ERROR_DS_LINK_ID_NOT_AVAILABLE kNtErrorDsLinkIdNotAvailable
+#define ERROR_DS_AG_CANT_HAVE_UNIVERSAL_MEMBER kNtErrorDsAgCantHaveUniversalMember
+#define ERROR_DS_MODIFYDN_DISALLOWED_BY_INSTANCE_TYPE kNtErrorDsModifydnDisallowedByInstanceType
+#define ERROR_DS_NO_OBJECT_MOVE_IN_SCHEMA_NC kNtErrorDsNoObjectMoveInSchemaNc
+#define ERROR_DS_MODIFYDN_DISALLOWED_BY_FLAG kNtErrorDsModifydnDisallowedByFlag
+#define ERROR_DS_MODIFYDN_WRONG_GRANDPARENT kNtErrorDsModifydnWrongGrandparent
+#define ERROR_DS_NAME_ERROR_TRUST_REFERRAL kNtErrorDsNameErrorTrustReferral
+#define ERROR_NOT_SUPPORTED_ON_STANDARD_SERVER kNtErrorNotSupportedOnStandardServer
+#define ERROR_DS_CANT_ACCESS_REMOTE_PART_OF_AD kNtErrorDsCantAccessRemotePartOfAd
+#define ERROR_DS_CR_IMPOSSIBLE_TO_VALIDATE_V2 kNtErrorDsCrImpossibleToValidateV2
+#define ERROR_DS_THREAD_LIMIT_EXCEEDED kNtErrorDsThreadLimitExceeded
+#define ERROR_DS_NOT_CLOSEST kNtErrorDsNotClosest
+#define ERROR_DS_CANT_DERIVE_SPN_WITHOUT_SERVER_REF kNtErrorDsCantDeriveSpnWithoutServerRef
+#define ERROR_DS_SINGLE_USER_MODE_FAILED kNtErrorDsSingleUserModeFailed
+#define ERROR_DS_NTDSCRIPT_SYNTAX_ERROR kNtErrorDsNtdscriptSyntaxError
+#define ERROR_DS_NTDSCRIPT_PROCESS_ERROR kNtErrorDsNtdscriptProcessError
+#define ERROR_DS_DIFFERENT_REPL_EPOCHS kNtErrorDsDifferentReplEpochs
+#define ERROR_DS_DRS_EXTENSIONS_CHANGED kNtErrorDsDrsExtensionsChanged
+#define ERROR_DS_REPLICA_SET_CHANGE_NOT_ALLOWED_ON_DISABLED_CR kNtErrorDsReplicaSetChangeNotAllowedOnDisabledCr
+#define ERROR_DS_NO_MSDS_INTID kNtErrorDsNoMsdsIntid
+#define ERROR_DS_DUP_MSDS_INTID kNtErrorDsDupMsdsIntid
+#define ERROR_DS_EXISTS_IN_RDNATTID kNtErrorDsExistsInRdnattid
+#define ERROR_DS_AUTHORIZATION_FAILED kNtErrorDsAuthorizationFailed
+#define ERROR_DS_INVALID_SCRIPT kNtErrorDsInvalidScript
+#define ERROR_DS_REMOTE_CROSSREF_OP_FAILED kNtErrorDsRemoteCrossrefOpFailed
+#define ERROR_DS_CROSS_REF_BUSY kNtErrorDsCrossRefBusy
+#define ERROR_DS_CANT_DERIVE_SPN_FOR_DELETED_DOMAIN kNtErrorDsCantDeriveSpnForDeletedDomain
+#define ERROR_DS_CANT_DEMOTE_WITH_WRITEABLE_NC kNtErrorDsCantDemoteWithWriteableNc
+#define ERROR_DS_DUPLICATE_ID_FOUND kNtErrorDsDuplicateIdFound
+#define ERROR_DS_INSUFFICIENT_ATTR_TO_CREATE_OBJECT kNtErrorDsInsufficientAttrToCreateObject
+#define ERROR_DS_GROUP_CONVERSION_ERROR kNtErrorDsGroupConversionError
+#define ERROR_DS_CANT_MOVE_APP_BASIC_GROUP kNtErrorDsCantMoveAppBasicGroup
+#define ERROR_DS_CANT_MOVE_APP_QUERY_GROUP kNtErrorDsCantMoveAppQueryGroup
+#define ERROR_DS_ROLE_NOT_VERIFIED kNtErrorDsRoleNotVerified
+#define ERROR_DS_WKO_CONTAINER_CANNOT_BE_SPECIAL kNtErrorDsWkoContainerCannotBeSpecial
+#define ERROR_DS_DOMAIN_RENAME_IN_PROGRESS kNtErrorDsDomainRenameInProgress
+#define ERROR_DS_EXISTING_AD_CHILD_NC kNtErrorDsExistingAdChildNc
+#define ERROR_DS_REPL_LIFETIME_EXCEEDED kNtErrorDsReplLifetimeExceeded
+#define ERROR_DS_DISALLOWED_IN_SYSTEM_CONTAINER kNtErrorDsDisallowedInSystemContainer
+#define ERROR_DS_LDAP_SEND_QUEUE_FULL kNtErrorDsLdapSendQueueFull
+#define ERROR_DS_DRA_OUT_SCHEDULE_WINDOW kNtErrorDsDraOutScheduleWindow
+#define ERROR_DS_POLICY_NOT_KNOWN kNtErrorDsPolicyNotKnown
+#define ERROR_NO_SITE_SETTINGS_OBJECT kNtErrorNoSiteSettingsObject
+#define ERROR_NO_SECRETS kNtErrorNoSecrets
+#define ERROR_NO_WRITABLE_DC_FOUND kNtErrorNoWritableDcFound
+#define ERROR_DS_NO_SERVER_OBJECT kNtErrorDsNoServerObject
+#define ERROR_DS_NO_NTDSA_OBJECT kNtErrorDsNoNtdsaObject
+#define ERROR_DS_NON_ASQ_SEARCH kNtErrorDsNonAsqSearch
+#define ERROR_DS_AUDIT_FAILURE kNtErrorDsAuditFailure
+#define ERROR_DS_INVALID_SEARCH_FLAG_SUBTREE kNtErrorDsInvalidSearchFlagSubtree
+#define ERROR_DS_INVALID_SEARCH_FLAG_TUPLE kNtErrorDsInvalidSearchFlagTuple
+#define ERROR_DS_HIERARCHY_TABLE_TOO_DEEP kNtErrorDsHierarchyTableTooDeep
+#define ERROR_DS_DRA_CORRUPT_UTD_VECTOR kNtErrorDsDraCorruptUtdVector
+#define ERROR_DS_DRA_SECRETS_DENIED kNtErrorDsDraSecretsDenied
+#define ERROR_DS_RESERVED_MAPI_ID kNtErrorDsReservedMapiId
+#define ERROR_DS_MAPI_ID_NOT_AVAILABLE kNtErrorDsMapiIdNotAvailable
+#define ERROR_DS_DRA_MISSING_KRBTGT_SECRET kNtErrorDsDraMissingKrbtgtSecret
+#define ERROR_DS_DOMAIN_NAME_EXISTS_IN_FOREST kNtErrorDsDomainNameExistsInForest
+#define ERROR_DS_FLAT_NAME_EXISTS_IN_FOREST kNtErrorDsFlatNameExistsInForest
+#define ERROR_INVALID_USER_PRINCIPAL_NAME kNtErrorInvalidUserPrincipalName
+#define ERROR_DS_OID_MAPPED_GROUP_CANT_HAVE_MEMBERS kNtErrorDsOidMappedGroupCantHaveMembers
+#define ERROR_DS_OID_NOT_FOUND kNtErrorDsOidNotFound
+#define ERROR_DS_DRA_RECYCLED_TARGET kNtErrorDsDraRecycledTarget
+#define ERROR_DS_DISALLOWED_NC_REDIRECT kNtErrorDsDisallowedNcRedirect
+#define ERROR_DS_HIGH_ADLDS_FFL kNtErrorDsHighAdldsFfl
+#define ERROR_DS_HIGH_DSA_VERSION kNtErrorDsHighDsaVersion
+#define ERROR_DS_LOW_ADLDS_FFL kNtErrorDsLowAdldsFfl
+#define ERROR_DOMAIN_SID_SAME_AS_LOCAL_WORKSTATION kNtErrorDomainSidSameAsLocalWorkstation
+#define ERROR_DS_UNDELETE_SAM_VALIDATION_FAILED kNtErrorDsUndeleteSamValidationFailed
+#define ERROR_INCORRECT_ACCOUNT_TYPE kNtErrorIncorrectAccountType
+#define ERROR_DS_SPN_VALUE_NOT_UNIQUE_IN_FOREST kNtErrorDsSpnValueNotUniqueInForest
+#define ERROR_DS_UPN_VALUE_NOT_UNIQUE_IN_FOREST kNtErrorDsUpnValueNotUniqueInForest
+#define ERROR_DS_MISSING_FOREST_TRUST kNtErrorDsMissingForestTrust
+#define ERROR_DS_VALUE_KEY_NOT_UNIQUE kNtErrorDsValueKeyNotUnique
+#define ERROR_IPSEC_QM_POLICY_EXISTS kNtErrorIpsecQmPolicyExists
+#define ERROR_IPSEC_QM_POLICY_NOT_FOUND kNtErrorIpsecQmPolicyNotFound
+#define ERROR_IPSEC_QM_POLICY_IN_USE kNtErrorIpsecQmPolicyInUse
+#define ERROR_IPSEC_MM_POLICY_EXISTS kNtErrorIpsecMmPolicyExists
+#define ERROR_IPSEC_MM_POLICY_NOT_FOUND kNtErrorIpsecMmPolicyNotFound
+#define ERROR_IPSEC_MM_POLICY_IN_USE kNtErrorIpsecMmPolicyInUse
+#define ERROR_IPSEC_MM_FILTER_EXISTS kNtErrorIpsecMmFilterExists
+#define ERROR_IPSEC_MM_FILTER_NOT_FOUND kNtErrorIpsecMmFilterNotFound
+#define ERROR_IPSEC_TRANSPORT_FILTER_EXISTS kNtErrorIpsecTransportFilterExists
+#define ERROR_IPSEC_TRANSPORT_FILTER_NOT_FOUND kNtErrorIpsecTransportFilterNotFound
+#define ERROR_IPSEC_MM_AUTH_EXISTS kNtErrorIpsecMmAuthExists
+#define ERROR_IPSEC_MM_AUTH_NOT_FOUND kNtErrorIpsecMmAuthNotFound
+#define ERROR_IPSEC_MM_AUTH_IN_USE kNtErrorIpsecMmAuthInUse
+#define ERROR_IPSEC_DEFAULT_MM_POLICY_NOT_FOUND kNtErrorIpsecDefaultMmPolicyNotFound
+#define ERROR_IPSEC_DEFAULT_MM_AUTH_NOT_FOUND kNtErrorIpsecDefaultMmAuthNotFound
+#define ERROR_IPSEC_DEFAULT_QM_POLICY_NOT_FOUND kNtErrorIpsecDefaultQmPolicyNotFound
+#define ERROR_IPSEC_TUNNEL_FILTER_EXISTS kNtErrorIpsecTunnelFilterExists
+#define ERROR_IPSEC_TUNNEL_FILTER_NOT_FOUND kNtErrorIpsecTunnelFilterNotFound
+#define ERROR_IPSEC_MM_FILTER_PENDING_DELETION kNtErrorIpsecMmFilterPendingDeletion
+#define ERROR_IPSEC_TRANSPORT_FILTER_PENDING_DELETION kNtErrorIpsecTransportFilterPendingDeletion
+#define ERROR_IPSEC_TUNNEL_FILTER_PENDING_DELETION kNtErrorIpsecTunnelFilterPendingDeletion
+#define ERROR_IPSEC_MM_POLICY_PENDING_DELETION kNtErrorIpsecMmPolicyPendingDeletion
+#define ERROR_IPSEC_MM_AUTH_PENDING_DELETION kNtErrorIpsecMmAuthPendingDeletion
+#define ERROR_IPSEC_QM_POLICY_PENDING_DELETION kNtErrorIpsecQmPolicyPendingDeletion
+#define ERROR_IPSEC_IKE_NEG_STATUS_BEGIN kNtErrorIpsecIkeNegStatusBegin
+#define ERROR_IPSEC_IKE_AUTH_FAIL kNtErrorIpsecIkeAuthFail
+#define ERROR_IPSEC_IKE_ATTRIB_FAIL kNtErrorIpsecIkeAttribFail
+#define ERROR_IPSEC_IKE_NEGOTIATION_PENDING kNtErrorIpsecIkeNegotiationPending
+#define ERROR_IPSEC_IKE_GENERAL_PROCESSING_ERROR kNtErrorIpsecIkeGeneralProcessingError
+#define ERROR_IPSEC_IKE_TIMED_OUT kNtErrorIpsecIkeTimedOut
+#define ERROR_IPSEC_IKE_NO_CERT kNtErrorIpsecIkeNoCert
+#define ERROR_IPSEC_IKE_SA_DELETED kNtErrorIpsecIkeSaDeleted
+#define ERROR_IPSEC_IKE_SA_REAPED kNtErrorIpsecIkeSaReaped
+#define ERROR_IPSEC_IKE_MM_ACQUIRE_DROP kNtErrorIpsecIkeMmAcquireDrop
+#define ERROR_IPSEC_IKE_QM_ACQUIRE_DROP kNtErrorIpsecIkeQmAcquireDrop
+#define ERROR_IPSEC_IKE_QUEUE_DROP_MM kNtErrorIpsecIkeQueueDropMm
+#define ERROR_IPSEC_IKE_QUEUE_DROP_NO_MM kNtErrorIpsecIkeQueueDropNoMm
+#define ERROR_IPSEC_IKE_DROP_NO_RESPONSE kNtErrorIpsecIkeDropNoResponse
+#define ERROR_IPSEC_IKE_MM_DELAY_DROP kNtErrorIpsecIkeMmDelayDrop
+#define ERROR_IPSEC_IKE_QM_DELAY_DROP kNtErrorIpsecIkeQmDelayDrop
+#define ERROR_IPSEC_IKE_ERROR kNtErrorIpsecIkeError
+#define ERROR_IPSEC_IKE_CRL_FAILED kNtErrorIpsecIkeCrlFailed
+#define ERROR_IPSEC_IKE_INVALID_KEY_USAGE kNtErrorIpsecIkeInvalidKeyUsage
+#define ERROR_IPSEC_IKE_INVALID_CERT_TYPE kNtErrorIpsecIkeInvalidCertType
+#define ERROR_IPSEC_IKE_NO_PRIVATE_KEY kNtErrorIpsecIkeNoPrivateKey
+#define ERROR_IPSEC_IKE_SIMULTANEOUS_REKEY kNtErrorIpsecIkeSimultaneousRekey
+#define ERROR_IPSEC_IKE_DH_FAIL kNtErrorIpsecIkeDhFail
+#define ERROR_IPSEC_IKE_CRITICAL_PAYLOAD_NOT_RECOGNIZED kNtErrorIpsecIkeCriticalPayloadNotRecognized
+#define ERROR_IPSEC_IKE_INVALID_HEADER kNtErrorIpsecIkeInvalidHeader
+#define ERROR_IPSEC_IKE_NO_POLICY kNtErrorIpsecIkeNoPolicy
+#define ERROR_IPSEC_IKE_INVALID_SIGNATURE kNtErrorIpsecIkeInvalidSignature
+#define ERROR_IPSEC_IKE_KERBEROS_ERROR kNtErrorIpsecIkeKerberosError
+#define ERROR_IPSEC_IKE_NO_PUBLIC_KEY kNtErrorIpsecIkeNoPublicKey
+#define ERROR_IPSEC_IKE_PROCESS_ERR kNtErrorIpsecIkeProcessErr
+#define ERROR_IPSEC_IKE_PROCESS_ERR_SA kNtErrorIpsecIkeProcessErrSa
+#define ERROR_IPSEC_IKE_PROCESS_ERR_PROP kNtErrorIpsecIkeProcessErrProp
+#define ERROR_IPSEC_IKE_PROCESS_ERR_TRANS kNtErrorIpsecIkeProcessErrTrans
+#define ERROR_IPSEC_IKE_PROCESS_ERR_KE kNtErrorIpsecIkeProcessErrKe
+#define ERROR_IPSEC_IKE_PROCESS_ERR_ID kNtErrorIpsecIkeProcessErrId
+#define ERROR_IPSEC_IKE_PROCESS_ERR_CERT kNtErrorIpsecIkeProcessErrCert
+#define ERROR_IPSEC_IKE_PROCESS_ERR_CERT_REQ kNtErrorIpsecIkeProcessErrCertReq
+#define ERROR_IPSEC_IKE_PROCESS_ERR_HASH kNtErrorIpsecIkeProcessErrHash
+#define ERROR_IPSEC_IKE_PROCESS_ERR_SIG kNtErrorIpsecIkeProcessErrSig
+#define ERROR_IPSEC_IKE_PROCESS_ERR_NONCE kNtErrorIpsecIkeProcessErrNonce
+#define ERROR_IPSEC_IKE_PROCESS_ERR_NOTIFY kNtErrorIpsecIkeProcessErrNotify
+#define ERROR_IPSEC_IKE_PROCESS_ERR_DELETE kNtErrorIpsecIkeProcessErrDelete
+#define ERROR_IPSEC_IKE_PROCESS_ERR_VENDOR kNtErrorIpsecIkeProcessErrVendor
+#define ERROR_IPSEC_IKE_INVALID_PAYLOAD kNtErrorIpsecIkeInvalidPayload
+#define ERROR_IPSEC_IKE_LOAD_SOFT_SA kNtErrorIpsecIkeLoadSoftSa
+#define ERROR_IPSEC_IKE_SOFT_SA_TORN_DOWN kNtErrorIpsecIkeSoftSaTornDown
+#define ERROR_IPSEC_IKE_INVALID_COOKIE kNtErrorIpsecIkeInvalidCookie
+#define ERROR_IPSEC_IKE_NO_PEER_CERT kNtErrorIpsecIkeNoPeerCert
+#define ERROR_IPSEC_IKE_PEER_CRL_FAILED kNtErrorIpsecIkePeerCrlFailed
+#define ERROR_IPSEC_IKE_POLICY_CHANGE kNtErrorIpsecIkePolicyChange
+#define ERROR_IPSEC_IKE_NO_MM_POLICY kNtErrorIpsecIkeNoMmPolicy
+#define ERROR_IPSEC_IKE_NOTCBPRIV kNtErrorIpsecIkeNotcbpriv
+#define ERROR_IPSEC_IKE_SECLOADFAIL kNtErrorIpsecIkeSecloadfail
+#define ERROR_IPSEC_IKE_FAILSSPINIT kNtErrorIpsecIkeFailsspinit
+#define ERROR_IPSEC_IKE_FAILQUERYSSP kNtErrorIpsecIkeFailqueryssp
+#define ERROR_IPSEC_IKE_SRVACQFAIL kNtErrorIpsecIkeSrvacqfail
+#define ERROR_IPSEC_IKE_SRVQUERYCRED kNtErrorIpsecIkeSrvquerycred
+#define ERROR_IPSEC_IKE_GETSPIFAIL kNtErrorIpsecIkeGetspifail
+#define ERROR_IPSEC_IKE_INVALID_FILTER kNtErrorIpsecIkeInvalidFilter
+#define ERROR_IPSEC_IKE_OUT_OF_MEMORY kNtErrorIpsecIkeOutOfMemory
+#define ERROR_IPSEC_IKE_ADD_UPDATE_KEY_FAILED kNtErrorIpsecIkeAddUpdateKeyFailed
+#define ERROR_IPSEC_IKE_INVALID_POLICY kNtErrorIpsecIkeInvalidPolicy
+#define ERROR_IPSEC_IKE_UNKNOWN_DOI kNtErrorIpsecIkeUnknownDoi
+#define ERROR_IPSEC_IKE_INVALID_SITUATION kNtErrorIpsecIkeInvalidSituation
+#define ERROR_IPSEC_IKE_DH_FAILURE kNtErrorIpsecIkeDhFailure
+#define ERROR_IPSEC_IKE_INVALID_GROUP kNtErrorIpsecIkeInvalidGroup
+#define ERROR_IPSEC_IKE_ENCRYPT kNtErrorIpsecIkeEncrypt
+#define ERROR_IPSEC_IKE_DECRYPT kNtErrorIpsecIkeDecrypt
+#define ERROR_IPSEC_IKE_POLICY_MATCH kNtErrorIpsecIkePolicyMatch
+#define ERROR_IPSEC_IKE_UNSUPPORTED_ID kNtErrorIpsecIkeUnsupportedId
+#define ERROR_IPSEC_IKE_INVALID_HASH kNtErrorIpsecIkeInvalidHash
+#define ERROR_IPSEC_IKE_INVALID_HASH_ALG kNtErrorIpsecIkeInvalidHashAlg
+#define ERROR_IPSEC_IKE_INVALID_HASH_SIZE kNtErrorIpsecIkeInvalidHashSize
+#define ERROR_IPSEC_IKE_INVALID_ENCRYPT_ALG kNtErrorIpsecIkeInvalidEncryptAlg
+#define ERROR_IPSEC_IKE_INVALID_AUTH_ALG kNtErrorIpsecIkeInvalidAuthAlg
+#define ERROR_IPSEC_IKE_INVALID_SIG kNtErrorIpsecIkeInvalidSig
+#define ERROR_IPSEC_IKE_LOAD_FAILED kNtErrorIpsecIkeLoadFailed
+#define ERROR_IPSEC_IKE_RPC_DELETE kNtErrorIpsecIkeRpcDelete
+#define ERROR_IPSEC_IKE_BENIGN_REINIT kNtErrorIpsecIkeBenignReinit
+#define ERROR_IPSEC_IKE_INVALID_RESPONDER_LIFETIME_NOTIFY kNtErrorIpsecIkeInvalidResponderLifetimeNotify
+#define ERROR_IPSEC_IKE_INVALID_MAJOR_VERSION kNtErrorIpsecIkeInvalidMajorVersion
+#define ERROR_IPSEC_IKE_INVALID_CERT_KEYLEN kNtErrorIpsecIkeInvalidCertKeylen
+#define ERROR_IPSEC_IKE_MM_LIMIT kNtErrorIpsecIkeMmLimit
+#define ERROR_IPSEC_IKE_NEGOTIATION_DISABLED kNtErrorIpsecIkeNegotiationDisabled
+#define ERROR_IPSEC_IKE_QM_LIMIT kNtErrorIpsecIkeQmLimit
+#define ERROR_IPSEC_IKE_MM_EXPIRED kNtErrorIpsecIkeMmExpired
+#define ERROR_IPSEC_IKE_PEER_MM_ASSUMED_INVALID kNtErrorIpsecIkePeerMmAssumedInvalid
+#define ERROR_IPSEC_IKE_CERT_CHAIN_POLICY_MISMATCH kNtErrorIpsecIkeCertChainPolicyMismatch
+#define ERROR_IPSEC_IKE_UNEXPECTED_MESSAGE_ID kNtErrorIpsecIkeUnexpectedMessageId
+#define ERROR_IPSEC_IKE_INVALID_AUTH_PAYLOAD kNtErrorIpsecIkeInvalidAuthPayload
+#define ERROR_IPSEC_IKE_DOS_COOKIE_SENT kNtErrorIpsecIkeDosCookieSent
+#define ERROR_IPSEC_IKE_SHUTTING_DOWN kNtErrorIpsecIkeShuttingDown
+#define ERROR_IPSEC_IKE_CGA_AUTH_FAILED kNtErrorIpsecIkeCgaAuthFailed
+#define ERROR_IPSEC_IKE_PROCESS_ERR_NATOA kNtErrorIpsecIkeProcessErrNatoa
+#define ERROR_IPSEC_IKE_INVALID_MM_FOR_QM kNtErrorIpsecIkeInvalidMmForQm
+#define ERROR_IPSEC_IKE_QM_EXPIRED kNtErrorIpsecIkeQmExpired
+#define ERROR_IPSEC_IKE_TOO_MANY_FILTERS kNtErrorIpsecIkeTooManyFilters
+#define ERROR_IPSEC_IKE_NEG_STATUS_END kNtErrorIpsecIkeNegStatusEnd
+#define ERROR_IPSEC_IKE_KILL_DUMMY_NAP_TUNNEL kNtErrorIpsecIkeKillDummyNapTunnel
+#define ERROR_IPSEC_IKE_INNER_IP_ASSIGNMENT_FAILURE kNtErrorIpsecIkeInnerIpAssignmentFailure
+#define ERROR_IPSEC_IKE_REQUIRE_CP_PAYLOAD_MISSING kNtErrorIpsecIkeRequireCpPayloadMissing
+#define ERROR_IPSEC_KEY_MODULE_IMPERSONATION_NEGOTIATION_PENDING kNtErrorIpsecKeyModuleImpersonationNegotiationPending
+#define ERROR_IPSEC_IKE_COEXISTENCE_SUPPRESS kNtErrorIpsecIkeCoexistenceSuppress
+#define ERROR_IPSEC_IKE_RATELIMIT_DROP kNtErrorIpsecIkeRatelimitDrop
+#define ERROR_IPSEC_IKE_PEER_DOESNT_SUPPORT_MOBIKE kNtErrorIpsecIkePeerDoesntSupportMobike
+#define ERROR_IPSEC_IKE_AUTHORIZATION_FAILURE kNtErrorIpsecIkeAuthorizationFailure
+#define ERROR_IPSEC_IKE_STRONG_CRED_AUTHORIZATION_FAILURE kNtErrorIpsecIkeStrongCredAuthorizationFailure
+#define ERROR_IPSEC_IKE_AUTHORIZATION_FAILURE_WITH_OPTIONAL_RETRY kNtErrorIpsecIkeAuthorizationFailureWithOptionalRetry
+#define ERROR_IPSEC_IKE_STRONG_CRED_AUTHORIZATION_AND_CERTMAP_FAILURE kNtErrorIpsecIkeStrongCredAuthorizationAndCertmapFailure
+#define ERROR_IPSEC_IKE_NEG_STATUS_EXTENDED_END kNtErrorIpsecIkeNegStatusExtendedEnd
+#define ERROR_IPSEC_BAD_SPI kNtErrorIpsecBadSpi
+#define ERROR_IPSEC_SA_LIFETIME_EXPIRED kNtErrorIpsecSaLifetimeExpired
+#define ERROR_IPSEC_WRONG_SA kNtErrorIpsecWrongSa
+#define ERROR_IPSEC_REPLAY_CHECK_FAILED kNtErrorIpsecReplayCheckFailed
+#define ERROR_IPSEC_INVALID_PACKET kNtErrorIpsecInvalidPacket
+#define ERROR_IPSEC_INTEGRITY_CHECK_FAILED kNtErrorIpsecIntegrityCheckFailed
+#define ERROR_IPSEC_CLEAR_TEXT_DROP kNtErrorIpsecClearTextDrop
+#define ERROR_IPSEC_AUTH_FIREWALL_DROP kNtErrorIpsecAuthFirewallDrop
+#define ERROR_IPSEC_THROTTLE_DROP kNtErrorIpsecThrottleDrop
+#define ERROR_IPSEC_DOSP_BLOCK kNtErrorIpsecDospBlock
+#define ERROR_IPSEC_DOSP_RECEIVED_MULTICAST kNtErrorIpsecDospReceivedMulticast
+#define ERROR_IPSEC_DOSP_INVALID_PACKET kNtErrorIpsecDospInvalidPacket
+#define ERROR_IPSEC_DOSP_STATE_LOOKUP_FAILED kNtErrorIpsecDospStateLookupFailed
+#define ERROR_IPSEC_DOSP_MAX_ENTRIES kNtErrorIpsecDospMaxEntries
+#define ERROR_IPSEC_DOSP_KEYMOD_NOT_ALLOWED kNtErrorIpsecDospKeymodNotAllowed
+#define ERROR_IPSEC_DOSP_NOT_INSTALLED kNtErrorIpsecDospNotInstalled
+#define ERROR_IPSEC_DOSP_MAX_PER_IP_RATELIMIT_QUEUES kNtErrorIpsecDospMaxPerIpRatelimitQueues
+#define ERROR_SXS_SECTION_NOT_FOUND kNtErrorSxsSectionNotFound
+#define ERROR_SXS_CANT_GEN_ACTCTX kNtErrorSxsCantGenActctx
+#define ERROR_SXS_INVALID_ACTCTXDATA_FORMAT kNtErrorSxsInvalidActctxdataFormat
+#define ERROR_SXS_ASSEMBLY_NOT_FOUND kNtErrorSxsAssemblyNotFound
+#define ERROR_SXS_MANIFEST_FORMAT_ERROR kNtErrorSxsManifestFormatError
+#define ERROR_SXS_MANIFEST_PARSE_ERROR kNtErrorSxsManifestParseError
+#define ERROR_SXS_ACTIVATION_CONTEXT_DISABLED kNtErrorSxsActivationContextDisabled
+#define ERROR_SXS_KEY_NOT_FOUND kNtErrorSxsKeyNotFound
+#define ERROR_SXS_VERSION_CONFLICT kNtErrorSxsVersionConflict
+#define ERROR_SXS_WRONG_SECTION_TYPE kNtErrorSxsWrongSectionType
+#define ERROR_SXS_THREAD_QUERIES_DISABLED kNtErrorSxsThreadQueriesDisabled
+#define ERROR_SXS_PROCESS_DEFAULT_ALREADY_SET kNtErrorSxsProcessDefaultAlreadySet
+#define ERROR_SXS_UNKNOWN_ENCODING_GROUP kNtErrorSxsUnknownEncodingGroup
+#define ERROR_SXS_UNKNOWN_ENCODING kNtErrorSxsUnknownEncoding
+#define ERROR_SXS_INVALID_XML_NAMESPACE_URI kNtErrorSxsInvalidXmlNamespaceUri
+#define ERROR_SXS_ROOT_MANIFEST_DEPENDENCY_NOT_INSTALLED kNtErrorSxsRootManifestDependencyNotInstalled
+#define ERROR_SXS_LEAF_MANIFEST_DEPENDENCY_NOT_INSTALLED kNtErrorSxsLeafManifestDependencyNotInstalled
+#define ERROR_SXS_INVALID_ASSEMBLY_IDENTITY_ATTRIBUTE kNtErrorSxsInvalidAssemblyIdentityAttribute
+#define ERROR_SXS_MANIFEST_MISSING_REQUIRED_DEFAULT_NAMESPACE kNtErrorSxsManifestMissingRequiredDefaultNamespace
+#define ERROR_SXS_MANIFEST_INVALID_REQUIRED_DEFAULT_NAMESPACE kNtErrorSxsManifestInvalidRequiredDefaultNamespace
+#define ERROR_SXS_PRIVATE_MANIFEST_CROSS_PATH_WITH_REPARSE_POINT kNtErrorSxsPrivateManifestCrossPathWithReparsePoint
+#define ERROR_SXS_DUPLICATE_DLL_NAME kNtErrorSxsDuplicateDllName
+#define ERROR_SXS_DUPLICATE_WINDOWCLASS_NAME kNtErrorSxsDuplicateWindowclassName
+#define ERROR_SXS_DUPLICATE_CLSID kNtErrorSxsDuplicateClsid
+#define ERROR_SXS_DUPLICATE_IID kNtErrorSxsDuplicateIid
+#define ERROR_SXS_DUPLICATE_TLBID kNtErrorSxsDuplicateTlbid
+#define ERROR_SXS_DUPLICATE_PROGID kNtErrorSxsDuplicateProgid
+#define ERROR_SXS_DUPLICATE_ASSEMBLY_NAME kNtErrorSxsDuplicateAssemblyName
+#define ERROR_SXS_FILE_HASH_MISMATCH kNtErrorSxsFileHashMismatch
+#define ERROR_SXS_POLICY_PARSE_ERROR kNtErrorSxsPolicyParseError
+#define ERROR_SXS_XML_EMISSINGQUOTE kNtErrorSxsXmlEMissingquote
+#define ERROR_SXS_XML_ECOMMENTSYNTAX kNtErrorSxsXmlECommentsyntax
+#define ERROR_SXS_XML_EBADSTARTNAMECHAR kNtErrorSxsXmlEBadstartnamechar
+#define ERROR_SXS_XML_EBADNAMECHAR kNtErrorSxsXmlEBadnamechar
+#define ERROR_SXS_XML_EBADCHARINSTRING kNtErrorSxsXmlEBadcharinstring
+#define ERROR_SXS_XML_EXMLDECLSYNTAX kNtErrorSxsXmlEXmldeclsyntax
+#define ERROR_SXS_XML_EBADCHARDATA kNtErrorSxsXmlEBadchardata
+#define ERROR_SXS_XML_EMISSINGWHITESPACE kNtErrorSxsXmlEMissingwhitespace
+#define ERROR_SXS_XML_EEXPECTINGTAGEND kNtErrorSxsXmlEExpectingtagend
+#define ERROR_SXS_XML_EMISSINGSEMICOLON kNtErrorSxsXmlEMissingsemicolon
+#define ERROR_SXS_XML_EUNBALANCEDPAREN kNtErrorSxsXmlEUnbalancedparen
+#define ERROR_SXS_XML_EINTERNALERROR kNtErrorSxsXmlEInternalerror
+#define ERROR_SXS_XML_EUNEXPECTED_WHITESPACE kNtErrorSxsXmlEUnexpectedWhitespace
+#define ERROR_SXS_XML_EINCOMPLETE_ENCODING kNtErrorSxsXmlEIncompleteEncoding
+#define ERROR_SXS_XML_EMISSING_PAREN kNtErrorSxsXmlEMissingParen
+#define ERROR_SXS_XML_EEXPECTINGCLOSEQUOTE kNtErrorSxsXmlEExpectingclosequote
+#define ERROR_SXS_XML_EMULTIPLE_COLONS kNtErrorSxsXmlEMultipleColons
+#define ERROR_SXS_XML_EINVALID_DECIMAL kNtErrorSxsXmlEInvalidDecimal
+#define ERROR_SXS_XML_EINVALID_HEXIDECIMAL kNtErrorSxsXmlEInvalidHexidecimal
+#define ERROR_SXS_XML_EINVALID_UNICODE kNtErrorSxsXmlEInvalidUnicode
+#define ERROR_SXS_XML_EWHITESPACEORQUESTIONMARK kNtErrorSxsXmlEWhitespaceorquestionmark
+#define ERROR_SXS_XML_EUNEXPECTEDENDTAG kNtErrorSxsXmlEUnexpectedendtag
+#define ERROR_SXS_XML_EUNCLOSEDTAG kNtErrorSxsXmlEUnclosedtag
+#define ERROR_SXS_XML_EDUPLICATEATTRIBUTE kNtErrorSxsXmlEDuplicateattribute
+#define ERROR_SXS_XML_EMULTIPLEROOTS kNtErrorSxsXmlEMultipleroots
+#define ERROR_SXS_XML_EINVALIDATROOTLEVEL kNtErrorSxsXmlEInvalidatrootlevel
+#define ERROR_SXS_XML_EBADXMLDECL kNtErrorSxsXmlEBadxmldecl
+#define ERROR_SXS_XML_EMISSINGROOT kNtErrorSxsXmlEMissingroot
+#define ERROR_SXS_XML_EUNEXPECTEDEOF kNtErrorSxsXmlEUnexpectedeof
+#define ERROR_SXS_XML_EBADPEREFINSUBSET kNtErrorSxsXmlEBadperefinsubset
+#define ERROR_SXS_XML_EUNCLOSEDSTARTTAG kNtErrorSxsXmlEUnclosedstarttag
+#define ERROR_SXS_XML_EUNCLOSEDENDTAG kNtErrorSxsXmlEUnclosedendtag
+#define ERROR_SXS_XML_EUNCLOSEDSTRING kNtErrorSxsXmlEUnclosedstring
+#define ERROR_SXS_XML_EUNCLOSEDCOMMENT kNtErrorSxsXmlEUnclosedcomment
+#define ERROR_SXS_XML_EUNCLOSEDDECL kNtErrorSxsXmlEUncloseddecl
+#define ERROR_SXS_XML_EUNCLOSEDCDATA kNtErrorSxsXmlEUnclosedcdata
+#define ERROR_SXS_XML_ERESERVEDNAMESPACE kNtErrorSxsXmlEReservednamespace
+#define ERROR_SXS_XML_EINVALIDENCODING kNtErrorSxsXmlEInvalidencoding
+#define ERROR_SXS_XML_EINVALIDSWITCH kNtErrorSxsXmlEInvalidswitch
+#define ERROR_SXS_XML_EBADXMLCASE kNtErrorSxsXmlEBadxmlcase
+#define ERROR_SXS_XML_EINVALID_STANDALONE kNtErrorSxsXmlEInvalidStandalone
+#define ERROR_SXS_XML_EUNEXPECTED_STANDALONE kNtErrorSxsXmlEUnexpectedStandalone
+#define ERROR_SXS_XML_EINVALID_VERSION kNtErrorSxsXmlEInvalidVersion
+#define ERROR_SXS_XML_EMISSINGEQUALS kNtErrorSxsXmlEMissingequals
+#define ERROR_SXS_PROTECTION_RECOVERY_FAILED kNtErrorSxsProtectionRecoveryFailed
+#define ERROR_SXS_PROTECTION_PUBLIC_KEY_TOO_SHORT kNtErrorSxsProtectionPublicKeyTooShort
+#define ERROR_SXS_PROTECTION_CATALOG_NOT_VALID kNtErrorSxsProtectionCatalogNotValid
+#define ERROR_SXS_UNTRANSLATABLE_HRESULT kNtErrorSxsUntranslatableHresult
+#define ERROR_SXS_PROTECTION_CATALOG_FILE_MISSING kNtErrorSxsProtectionCatalogFileMissing
+#define ERROR_SXS_MISSING_ASSEMBLY_IDENTITY_ATTRIBUTE kNtErrorSxsMissingAssemblyIdentityAttribute
+#define ERROR_SXS_INVALID_ASSEMBLY_IDENTITY_ATTRIBUTE_NAME kNtErrorSxsInvalidAssemblyIdentityAttributeName
+#define ERROR_SXS_ASSEMBLY_MISSING kNtErrorSxsAssemblyMissing
+#define ERROR_SXS_CORRUPT_ACTIVATION_STACK kNtErrorSxsCorruptActivationStack
+#define ERROR_SXS_CORRUPTION kNtErrorSxsCorruption
+#define ERROR_SXS_EARLY_DEACTIVATION kNtErrorSxsEarlyDeactivation
+#define ERROR_SXS_INVALID_DEACTIVATION kNtErrorSxsInvalidDeactivation
+#define ERROR_SXS_MULTIPLE_DEACTIVATION kNtErrorSxsMultipleDeactivation
+#define ERROR_SXS_PROCESS_TERMINATION_REQUESTED kNtErrorSxsProcessTerminationRequested
+#define ERROR_SXS_RELEASE_ACTIVATION_CONTEXT kNtErrorSxsReleaseActivationContext
+#define ERROR_SXS_SYSTEM_DEFAULT_ACTIVATION_CONTEXT_EMPTY kNtErrorSxsSystemDefaultActivationContextEmpty
+#define ERROR_SXS_INVALID_IDENTITY_ATTRIBUTE_VALUE kNtErrorSxsInvalidIdentityAttributeValue
+#define ERROR_SXS_INVALID_IDENTITY_ATTRIBUTE_NAME kNtErrorSxsInvalidIdentityAttributeName
+#define ERROR_SXS_IDENTITY_DUPLICATE_ATTRIBUTE kNtErrorSxsIdentityDuplicateAttribute
+#define ERROR_SXS_IDENTITY_PARSE_ERROR kNtErrorSxsIdentityParseError
+#define ERROR_MALFORMED_SUBSTITUTION_STRING kNtErrorMalformedSubstitutionString
+#define ERROR_SXS_INCORRECT_PUBLIC_KEY_TOKEN kNtErrorSxsIncorrectPublicKeyToken
+#define ERROR_UNMAPPED_SUBSTITUTION_STRING kNtErrorUnmappedSubstitutionString
+#define ERROR_SXS_ASSEMBLY_NOT_LOCKED kNtErrorSxsAssemblyNotLocked
+#define ERROR_SXS_COMPONENT_STORE_CORRUPT kNtErrorSxsComponentStoreCorrupt
+#define ERROR_ADVANCED_INSTALLER_FAILED kNtErrorAdvancedInstallerFailed
+#define ERROR_XML_ENCODING_MISMATCH kNtErrorXmlEncodingMismatch
+#define ERROR_SXS_MANIFEST_IDENTITY_SAME_BUT_CONTENTS_DIFFERENT kNtErrorSxsManifestIdentitySameButContentsDifferent
+#define ERROR_SXS_IDENTITIES_DIFFERENT kNtErrorSxsIdentitiesDifferent
+#define ERROR_SXS_ASSEMBLY_IS_NOT_ADEPLOYMENT kNtErrorSxsAssemblyIsNotADeployment
+#define ERROR_SXS_FILE_NOT_PART_OF_ASSEMBLY kNtErrorSxsFileNotPartOfAssembly
+#define ERROR_SXS_MANIFEST_TOO_BIG kNtErrorSxsManifestTooBig
+#define ERROR_SXS_SETTING_NOT_REGISTERED kNtErrorSxsSettingNotRegistered
+#define ERROR_SXS_TRANSACTION_CLOSURE_INCOMPLETE kNtErrorSxsTransactionClosureIncomplete
+#define ERROR_SMI_PRIMITIVE_INSTALLER_FAILED kNtErrorSmiPrimitiveInstallerFailed
+#define ERROR_GENERIC_COMMAND_FAILED kNtErrorGenericCommandFailed
+#define ERROR_SXS_FILE_HASH_MISSING kNtErrorSxsFileHashMissing
+#define ERROR_EVT_INVALID_CHANNEL_PATH kNtErrorEvtInvalidChannelPath
+#define ERROR_EVT_INVALID_QUERY kNtErrorEvtInvalidQuery
+#define ERROR_EVT_PUBLISHER_METADATA_NOT_FOUND kNtErrorEvtPublisherMetadataNotFound
+#define ERROR_EVT_EVENT_TEMPLATE_NOT_FOUND kNtErrorEvtEventTemplateNotFound
+#define ERROR_EVT_INVALID_PUBLISHER_NAME kNtErrorEvtInvalidPublisherName
+#define ERROR_EVT_INVALID_EVENT_DATA kNtErrorEvtInvalidEventData
+#define ERROR_EVT_CHANNEL_NOT_FOUND kNtErrorEvtChannelNotFound
+#define ERROR_EVT_MALFORMED_XML_TEXT kNtErrorEvtMalformedXmlText
+#define ERROR_EVT_SUBSCRIPTION_TO_DIRECT_CHANNEL kNtErrorEvtSubscriptionToDirectChannel
+#define ERROR_EVT_CONFIGURATION_ERROR kNtErrorEvtConfigurationError
+#define ERROR_EVT_QUERY_RESULT_STALE kNtErrorEvtQueryResultStale
+#define ERROR_EVT_QUERY_RESULT_INVALID_POSITION kNtErrorEvtQueryResultInvalidPosition
+#define ERROR_EVT_NON_VALIDATING_MSXML kNtErrorEvtNonValidatingMsxml
+#define ERROR_EVT_FILTER_ALREADYSCOPED kNtErrorEvtFilterAlreadyscoped
+#define ERROR_EVT_FILTER_NOTELTSET kNtErrorEvtFilterNoteltset
+#define ERROR_EVT_FILTER_INVARG kNtErrorEvtFilterInvarg
+#define ERROR_EVT_FILTER_INVTEST kNtErrorEvtFilterInvtest
+#define ERROR_EVT_FILTER_INVTYPE kNtErrorEvtFilterInvtype
+#define ERROR_EVT_FILTER_PARSEERR kNtErrorEvtFilterParseerr
+#define ERROR_EVT_FILTER_UNSUPPORTEDOP kNtErrorEvtFilterUnsupportedop
+#define ERROR_EVT_FILTER_UNEXPECTEDTOKEN kNtErrorEvtFilterUnexpectedtoken
+#define ERROR_EVT_INVALID_OPERATION_OVER_ENABLED_DIRECT_CHANNEL kNtErrorEvtInvalidOperationOverEnabledDirectChannel
+#define ERROR_EVT_INVALID_CHANNEL_PROPERTY_VALUE kNtErrorEvtInvalidChannelPropertyValue
+#define ERROR_EVT_INVALID_PUBLISHER_PROPERTY_VALUE kNtErrorEvtInvalidPublisherPropertyValue
+#define ERROR_EVT_CHANNEL_CANNOT_ACTIVATE kNtErrorEvtChannelCannotActivate
+#define ERROR_EVT_FILTER_TOO_COMPLEX kNtErrorEvtFilterTooComplex
+#define ERROR_EVT_MESSAGE_NOT_FOUND kNtErrorEvtMessageNotFound
+#define ERROR_EVT_MESSAGE_ID_NOT_FOUND kNtErrorEvtMessageIdNotFound
+#define ERROR_EVT_UNRESOLVED_VALUE_INSERT kNtErrorEvtUnresolvedValueInsert
+#define ERROR_EVT_UNRESOLVED_PARAMETER_INSERT kNtErrorEvtUnresolvedParameterInsert
+#define ERROR_EVT_MAX_INSERTS_REACHED kNtErrorEvtMaxInsertsReached
+#define ERROR_EVT_EVENT_DEFINITION_NOT_FOUND kNtErrorEvtEventDefinitionNotFound
+#define ERROR_EVT_MESSAGE_LOCALE_NOT_FOUND kNtErrorEvtMessageLocaleNotFound
+#define ERROR_EVT_VERSION_TOO_OLD kNtErrorEvtVersionTooOld
+#define ERROR_EVT_VERSION_TOO_NEW kNtErrorEvtVersionTooNew
+#define ERROR_EVT_CANNOT_OPEN_CHANNEL_OF_QUERY kNtErrorEvtCannotOpenChannelOfQuery
+#define ERROR_EVT_PUBLISHER_DISABLED kNtErrorEvtPublisherDisabled
+#define ERROR_EVT_FILTER_OUT_OF_RANGE kNtErrorEvtFilterOutOfRange
+#define ERROR_EC_SUBSCRIPTION_CANNOT_ACTIVATE kNtErrorEcSubscriptionCannotActivate
+#define ERROR_EC_LOG_DISABLED kNtErrorEcLogDisabled
+#define ERROR_EC_CIRCULAR_FORWARDING kNtErrorEcCircularForwarding
+#define ERROR_EC_CREDSTORE_FULL kNtErrorEcCredstoreFull
+#define ERROR_EC_CRED_NOT_FOUND kNtErrorEcCredNotFound
+#define ERROR_EC_NO_ACTIVE_CHANNEL kNtErrorEcNoActiveChannel
+#define ERROR_MUI_FILE_NOT_FOUND kNtErrorMuiFileNotFound
+#define ERROR_MUI_INVALID_FILE kNtErrorMuiInvalidFile
+#define ERROR_MUI_INVALID_RC_CONFIG kNtErrorMuiInvalidRcConfig
+#define ERROR_MUI_INVALID_LOCALE_NAME kNtErrorMuiInvalidLocaleName
+#define ERROR_MUI_INVALID_ULTIMATEFALLBACK_NAME kNtErrorMuiInvalidUltimatefallbackName
+#define ERROR_MUI_FILE_NOT_LOADED kNtErrorMuiFileNotLoaded
+#define ERROR_RESOURCE_ENUM_USER_STOP kNtErrorResourceEnumUserStop
+#define ERROR_MUI_INTLSETTINGS_UILANG_NOT_INSTALLED kNtErrorMuiIntlsettingsUilangNotInstalled
+#define ERROR_MUI_INTLSETTINGS_INVALID_LOCALE_NAME kNtErrorMuiIntlsettingsInvalidLocaleName
+#define ERROR_MRM_RUNTIME_NO_DEFAULT_OR_NEUTRAL_RESOURCE kNtErrorMrmRuntimeNoDefaultOrNeutralResource
+#define ERROR_MRM_INVALID_PRICONFIG kNtErrorMrmInvalidPriconfig
+#define ERROR_MRM_INVALID_FILE_TYPE kNtErrorMrmInvalidFileType
+#define ERROR_MRM_UNKNOWN_QUALIFIER kNtErrorMrmUnknownQualifier
+#define ERROR_MRM_INVALID_QUALIFIER_VALUE kNtErrorMrmInvalidQualifierValue
+#define ERROR_MRM_NO_CANDIDATE kNtErrorMrmNoCandidate
+#define ERROR_MRM_NO_MATCH_OR_DEFAULT_CANDIDATE kNtErrorMrmNoMatchOrDefaultCandidate
+#define ERROR_MRM_RESOURCE_TYPE_MISMATCH kNtErrorMrmResourceTypeMismatch
+#define ERROR_MRM_DUPLICATE_MAP_NAME kNtErrorMrmDuplicateMapName
+#define ERROR_MRM_DUPLICATE_ENTRY kNtErrorMrmDuplicateEntry
+#define ERROR_MRM_INVALID_RESOURCE_IDENTIFIER kNtErrorMrmInvalidResourceIdentifier
+#define ERROR_MRM_FILEPATH_TOO_LONG kNtErrorMrmFilepathTooLong
+#define ERROR_MRM_UNSUPPORTED_DIRECTORY_TYPE kNtErrorMrmUnsupportedDirectoryType
+#define ERROR_MRM_INVALID_PRI_FILE kNtErrorMrmInvalidPriFile
+#define ERROR_MRM_NAMED_RESOURCE_NOT_FOUND kNtErrorMrmNamedResourceNotFound
+#define ERROR_MRM_MAP_NOT_FOUND kNtErrorMrmMapNotFound
+#define ERROR_MRM_UNSUPPORTED_PROFILE_TYPE kNtErrorMrmUnsupportedProfileType
+#define ERROR_MRM_INVALID_QUALIFIER_OPERATOR kNtErrorMrmInvalidQualifierOperator
+#define ERROR_MRM_INDETERMINATE_QUALIFIER_VALUE kNtErrorMrmIndeterminateQualifierValue
+#define ERROR_MRM_AUTOMERGE_ENABLED kNtErrorMrmAutomergeEnabled
+#define ERROR_MRM_TOO_MANY_RESOURCES kNtErrorMrmTooManyResources
+#define ERROR_MRM_UNSUPPORTED_FILE_TYPE_FOR_MERGE kNtErrorMrmUnsupportedFileTypeForMerge
+#define ERROR_MRM_UNSUPPORTED_FILE_TYPE_FOR_LOAD_UNLOAD_PRI_FILE kNtErrorMrmUnsupportedFileTypeForLoadUnloadPriFile
+#define ERROR_MRM_NO_CURRENT_VIEW_ON_THREAD kNtErrorMrmNoCurrentViewOnThread
+#define ERROR_DIFFERENT_PROFILE_RESOURCE_MANAGER_EXIST kNtErrorDifferentProfileResourceManagerExist
+#define ERROR_OPERATION_NOT_ALLOWED_FROM_SYSTEM_COMPONENT kNtErrorOperationNotAllowedFromSystemComponent
+#define ERROR_MRM_DIRECT_REF_TO_NON_DEFAULT_RESOURCE kNtErrorMrmDirectRefToNonDefaultResource
+#define ERROR_MRM_GENERATION_COUNT_MISMATCH kNtErrorMrmGenerationCountMismatch
+#define ERROR_PRI_MERGE_VERSION_MISMATCH kNtErrorPriMergeVersionMismatch
+#define ERROR_PRI_MERGE_MISSING_SCHEMA kNtErrorPriMergeMissingSchema
+#define ERROR_PRI_MERGE_LOAD_FILE_FAILED kNtErrorPriMergeLoadFileFailed
+#define ERROR_PRI_MERGE_ADD_FILE_FAILED kNtErrorPriMergeAddFileFailed
+#define ERROR_PRI_MERGE_WRITE_FILE_FAILED kNtErrorPriMergeWriteFileFailed
+#define ERROR_PRI_MERGE_MULTIPLE_PACKAGE_FAMILIES_NOT_ALLOWED kNtErrorPriMergeMultiplePackageFamiliesNotAllowed
+#define ERROR_PRI_MERGE_MULTIPLE_MAIN_PACKAGES_NOT_ALLOWED kNtErrorPriMergeMultipleMainPackagesNotAllowed
+#define ERROR_PRI_MERGE_BUNDLE_PACKAGES_NOT_ALLOWED kNtErrorPriMergeBundlePackagesNotAllowed
+#define ERROR_PRI_MERGE_MAIN_PACKAGE_REQUIRED kNtErrorPriMergeMainPackageRequired
+#define ERROR_PRI_MERGE_RESOURCE_PACKAGE_REQUIRED kNtErrorPriMergeResourcePackageRequired
+#define ERROR_PRI_MERGE_INVALID_FILE_NAME kNtErrorPriMergeInvalidFileName
+#define ERROR_MCA_INVALID_CAPABILITIES_STRING kNtErrorMcaInvalidCapabilitiesString
+#define ERROR_MCA_INVALID_VCP_VERSION kNtErrorMcaInvalidVcpVersion
+#define ERROR_MCA_MONITOR_VIOLATES_MCCS_SPECIFICATION kNtErrorMcaMonitorViolatesMccsSpecification
+#define ERROR_MCA_MCCS_VERSION_MISMATCH kNtErrorMcaMccsVersionMismatch
+#define ERROR_MCA_UNSUPPORTED_MCCS_VERSION kNtErrorMcaUnsupportedMccsVersion
+#define ERROR_MCA_INTERNAL_ERROR kNtErrorMcaInternalError
+#define ERROR_MCA_INVALID_TECHNOLOGY_TYPE_RETURNED kNtErrorMcaInvalidTechnologyTypeReturned
+#define ERROR_MCA_UNSUPPORTED_COLOR_TEMPERATURE kNtErrorMcaUnsupportedColorTemperature
+#define ERROR_AMBIGUOUS_SYSTEM_DEVICE kNtErrorAmbiguousSystemDevice
+#define ERROR_SYSTEM_DEVICE_NOT_FOUND kNtErrorSystemDeviceNotFound
+#define ERROR_HASH_NOT_SUPPORTED kNtErrorHashNotSupported
+#define ERROR_HASH_NOT_PRESENT kNtErrorHashNotPresent
+#define ERROR_SECONDARY_IC_PROVIDER_NOT_REGISTERED kNtErrorSecondaryIcProviderNotRegistered
+#define ERROR_GPIO_CLIENT_INFORMATION_INVALID kNtErrorGpioClientInformationInvalid
+#define ERROR_GPIO_VERSION_NOT_SUPPORTED kNtErrorGpioVersionNotSupported
+#define ERROR_GPIO_INVALID_REGISTRATION_PACKET kNtErrorGpioInvalidRegistrationPacket
+#define ERROR_GPIO_OPERATION_DENIED kNtErrorGpioOperationDenied
+#define ERROR_GPIO_INCOMPATIBLE_CONNECT_MODE kNtErrorGpioIncompatibleConnectMode
+#define ERROR_GPIO_INTERRUPT_ALREADY_UNMASKED kNtErrorGpioInterruptAlreadyUnmasked
+#define ERROR_CANNOT_SWITCH_RUNLEVEL kNtErrorCannotSwitchRunlevel
+#define ERROR_INVALID_RUNLEVEL_SETTING kNtErrorInvalidRunlevelSetting
+#define ERROR_RUNLEVEL_SWITCH_TIMEOUT kNtErrorRunlevelSwitchTimeout
+#define ERROR_RUNLEVEL_SWITCH_AGENT_TIMEOUT kNtErrorRunlevelSwitchAgentTimeout
+#define ERROR_RUNLEVEL_SWITCH_IN_PROGRESS kNtErrorRunlevelSwitchInProgress
+#define ERROR_SERVICES_FAILED_AUTOSTART kNtErrorServicesFailedAutostart
+#define ERROR_COM_TASK_STOP_PENDING kNtErrorComTaskStopPending
+#define ERROR_INSTALL_OPEN_PACKAGE_FAILED kNtErrorInstallOpenPackageFailed
+#define ERROR_INSTALL_PACKAGE_NOT_FOUND kNtErrorInstallPackageNotFound
+#define ERROR_INSTALL_INVALID_PACKAGE kNtErrorInstallInvalidPackage
+#define ERROR_INSTALL_RESOLVE_DEPENDENCY_FAILED kNtErrorInstallResolveDependencyFailed
+#define ERROR_INSTALL_OUT_OF_DISK_SPACE kNtErrorInstallOutOfDiskSpace
+#define ERROR_INSTALL_NETWORK_FAILURE kNtErrorInstallNetworkFailure
+#define ERROR_INSTALL_REGISTRATION_FAILURE kNtErrorInstallRegistrationFailure
+#define ERROR_INSTALL_DEREGISTRATION_FAILURE kNtErrorInstallDeregistrationFailure
+#define ERROR_INSTALL_CANCEL kNtErrorInstallCancel
+#define ERROR_INSTALL_FAILED kNtErrorInstallFailed
+#define ERROR_REMOVE_FAILED kNtErrorRemoveFailed
+#define ERROR_PACKAGE_ALREADY_EXISTS kNtErrorPackageAlreadyExists
+#define ERROR_NEEDS_REMEDIATION kNtErrorNeedsRemediation
+#define ERROR_INSTALL_PREREQUISITE_FAILED kNtErrorInstallPrerequisiteFailed
+#define ERROR_PACKAGE_REPOSITORY_CORRUPTED kNtErrorPackageRepositoryCorrupted
+#define ERROR_INSTALL_POLICY_FAILURE kNtErrorInstallPolicyFailure
+#define ERROR_PACKAGE_UPDATING kNtErrorPackageUpdating
+#define ERROR_DEPLOYMENT_BLOCKED_BY_POLICY kNtErrorDeploymentBlockedByPolicy
+#define ERROR_PACKAGES_IN_USE kNtErrorPackagesInUse
+#define ERROR_RECOVERY_FILE_CORRUPT kNtErrorRecoveryFileCorrupt
+#define ERROR_INVALID_STAGED_SIGNATURE kNtErrorInvalidStagedSignature
+#define ERROR_DELETING_EXISTING_APPLICATIONDATA_STORE_FAILED kNtErrorDeletingExistingApplicationdataStoreFailed
+#define ERROR_INSTALL_PACKAGE_DOWNGRADE kNtErrorInstallPackageDowngrade
+#define ERROR_SYSTEM_NEEDS_REMEDIATION kNtErrorSystemNeedsRemediation
+#define ERROR_APPX_INTEGRITY_FAILURE_CLR_NGEN kNtErrorAppxIntegrityFailureClrNgen
+#define ERROR_RESILIENCY_FILE_CORRUPT kNtErrorResiliencyFileCorrupt
+#define ERROR_INSTALL_FIREWALL_SERVICE_NOT_RUNNING kNtErrorInstallFirewallServiceNotRunning
+#define ERROR_PACKAGE_MOVE_FAILED kNtErrorPackageMoveFailed
+#define ERROR_INSTALL_VOLUME_NOT_EMPTY kNtErrorInstallVolumeNotEmpty
+#define ERROR_INSTALL_VOLUME_OFFLINE kNtErrorInstallVolumeOffline
+#define ERROR_INSTALL_VOLUME_CORRUPT kNtErrorInstallVolumeCorrupt
+#define ERROR_NEEDS_REGISTRATION kNtErrorNeedsRegistration
+#define ERROR_INSTALL_WRONG_PROCESSOR_ARCHITECTURE kNtErrorInstallWrongProcessorArchitecture
+#define ERROR_DEV_SIDELOAD_LIMIT_EXCEEDED kNtErrorDevSideloadLimitExceeded
+#define ERROR_INSTALL_OPTIONAL_PACKAGE_REQUIRES_MAIN_PACKAGE kNtErrorInstallOptionalPackageRequiresMainPackage
+#define ERROR_PACKAGE_NOT_SUPPORTED_ON_FILESYSTEM kNtErrorPackageNotSupportedOnFilesystem
+#define ERROR_PACKAGE_MOVE_BLOCKED_BY_STREAMING kNtErrorPackageMoveBlockedByStreaming
+#define ERROR_INSTALL_OPTIONAL_PACKAGE_APPLICATIONID_NOT_UNIQUE kNtErrorInstallOptionalPackageApplicationidNotUnique
+#define ERROR_PACKAGE_STAGING_ONHOLD kNtErrorPackageStagingOnhold
+#define ERROR_INSTALL_INVALID_RELATED_SET_UPDATE kNtErrorInstallInvalidRelatedSetUpdate
+#define ERROR_PACKAGES_REPUTATION_CHECK_FAILED kNtErrorPackagesReputationCheckFailed
+#define ERROR_PACKAGES_REPUTATION_CHECK_TIMEDOUT kNtErrorPackagesReputationCheckTimedout
+#define ERROR_STATE_LOAD_STORE_FAILED kNtErrorStateLoadStoreFailed
+#define ERROR_STATE_GET_VERSION_FAILED kNtErrorStateGetVersionFailed
+#define ERROR_STATE_SET_VERSION_FAILED kNtErrorStateSetVersionFailed
+#define ERROR_STATE_STRUCTURED_RESET_FAILED kNtErrorStateStructuredResetFailed
+#define ERROR_STATE_OPEN_CONTAINER_FAILED kNtErrorStateOpenContainerFailed
+#define ERROR_STATE_CREATE_CONTAINER_FAILED kNtErrorStateCreateContainerFailed
+#define ERROR_STATE_DELETE_CONTAINER_FAILED kNtErrorStateDeleteContainerFailed
+#define ERROR_STATE_READ_SETTING_FAILED kNtErrorStateReadSettingFailed
+#define ERROR_STATE_WRITE_SETTING_FAILED kNtErrorStateWriteSettingFailed
+#define ERROR_STATE_DELETE_SETTING_FAILED kNtErrorStateDeleteSettingFailed
+#define ERROR_STATE_QUERY_SETTING_FAILED kNtErrorStateQuerySettingFailed
+#define ERROR_STATE_READ_COMPOSITE_SETTING_FAILED kNtErrorStateReadCompositeSettingFailed
+#define ERROR_STATE_WRITE_COMPOSITE_SETTING_FAILED kNtErrorStateWriteCompositeSettingFailed
+#define ERROR_STATE_ENUMERATE_CONTAINER_FAILED kNtErrorStateEnumerateContainerFailed
+#define ERROR_STATE_ENUMERATE_SETTINGS_FAILED kNtErrorStateEnumerateSettingsFailed
+#define ERROR_STATE_COMPOSITE_SETTING_VALUE_SIZE_LIMIT_EXCEEDED kNtErrorStateCompositeSettingValueSizeLimitExceeded
+#define ERROR_STATE_SETTING_VALUE_SIZE_LIMIT_EXCEEDED kNtErrorStateSettingValueSizeLimitExceeded
+#define ERROR_STATE_SETTING_NAME_SIZE_LIMIT_EXCEEDED kNtErrorStateSettingNameSizeLimitExceeded
+#define ERROR_STATE_CONTAINER_NAME_SIZE_LIMIT_EXCEEDED kNtErrorStateContainerNameSizeLimitExceeded
+#define ERROR_API_UNAVAILABLE kNtErrorApiUnavailable
+
+#define NOERROR 0
+#define S_OK ((HRESULT)0)
+#define S_FALSE ((HRESULT)1)
+#define E_UNEXPECTED ((HRESULT)0x8000FFFF)
+#define E_NOTIMPL ((HRESULT)0x80004001)
+#define E_OUTOFMEMORY ((HRESULT)0x8007000E)
+#define E_INVALIDARG ((HRESULT)0x80070057)
+#define E_NOINTERFACE ((HRESULT)0x80004002)
+#define E_POINTER ((HRESULT)0x80004003)
+#define E_HANDLE ((HRESULT)0x80070006)
+#define E_ABORT ((HRESULT)0x80004004)
+#define E_FAIL ((HRESULT)0x80004005)
+#define E_ACCESSDENIED ((HRESULT)0x80070005)
+#define E_NOTIMPL ((HRESULT)0x80000001)
+#define E_OUTOFMEMORY ((HRESULT)0x80000002)
+#define E_INVALIDARG ((HRESULT)0x80000003)
+#define E_NOINTERFACE ((HRESULT)0x80000004)
+#define E_POINTER ((HRESULT)0x80000005)
+#define E_HANDLE ((HRESULT)0x80000006)
+#define E_ABORT ((HRESULT)0x80000007)
+#define E_FAIL ((HRESULT)0x80000008)
+#define E_ACCESSDENIED ((HRESULT)0x80000009)
+#define E_PENDING ((HRESULT)0x8000000A)
+#define E_BOUNDS ((HRESULT)0x8000000B)
+#define E_CHANGED_STATE ((HRESULT)0x8000000C)
+#define E_ILLEGAL_STATE_CHANGE ((HRESULT)0x8000000D)
+#define E_ILLEGAL_METHOD_CALL ((HRESULT)0x8000000E)
+
+#define FACILITY_NULL 0
+#define FACILITY_RPC 1
+#define FACILITY_DISPATCH 2
+#define FACILITY_STORAGE 3
+#define FACILITY_ITF 4
+#define FACILITY_WIN32 7
+#define FACILITY_WINDOWS 8
+#define FACILITY_SSPI 9
+#define FACILITY_SECURITY 9
+#define FACILITY_CONTROL 10
+#define FACILITY_CERT 11
+#define FACILITY_INTERNET 12
+#define FACILITY_MEDIASERVER 13
+#define FACILITY_MSMQ 14
+#define FACILITY_SETUPAPI 15
+#define FACILITY_SCARD 16
+#define FACILITY_COMPLUS 17
+#define FACILITY_AAF 18
+#define FACILITY_URT 19
+#define FACILITY_ACS 20
+#define FACILITY_DPLAY 21
+#define FACILITY_UMI 22
+#define FACILITY_SXS 23
+#define FACILITY_WINDOWS_CE 24
+#define FACILITY_HTTP 25
+#define FACILITY_USERMODE_COMMONLOG 26
+#define FACILITY_WER 27
+#define FACILITY_USERMODE_FILTER_MANAGER 31
+#define FACILITY_BACKGROUNDCOPY 32
+#define FACILITY_CONFIGURATION 33
+#define FACILITY_WIA 33
+#define FACILITY_STATE_MANAGEMENT 34
+#define FACILITY_METADIRECTORY 35
+#define FACILITY_WINDOWSUPDATE 36
+#define FACILITY_DIRECTORYSERVICE 37
+#define FACILITY_GRAPHICS 38
+#define FACILITY_SHELL 39
+#define FACILITY_NAP 39
+#define FACILITY_TPM_SERVICES 40
+#define FACILITY_TPM_SOFTWARE 41
+#define FACILITY_UI 42
+#define FACILITY_XAML 43
+#define FACILITY_ACTION_QUEUE 44
+#define FACILITY_PLA 48
+#define FACILITY_WINDOWS_SETUP 48
+#define FACILITY_FVE 49
+#define FACILITY_FWP 50
+#define FACILITY_WINRM 51
+#define FACILITY_NDIS 52
+#define FACILITY_USERMODE_HYPERVISOR 53
+#define FACILITY_CMI 54
+#define FACILITY_USERMODE_VIRTUALIZATION 55
+#define FACILITY_USERMODE_VOLMGR 56
+#define FACILITY_BCD 57
+#define FACILITY_USERMODE_VHD 58
+#define FACILITY_USERMODE_HNS 59
+#define FACILITY_SDIAG 60
+#define FACILITY_WEBSERVICES 61
+#define FACILITY_WINPE 61
+#define FACILITY_WPN 62
+#define FACILITY_WINDOWS_STORE 63
+#define FACILITY_INPUT 64
+#define FACILITY_EAP 66
+#define FACILITY_WINDOWS_DEFENDER 80
+#define FACILITY_OPC 81
+#define FACILITY_XPS 82
+#define FACILITY_MBN 84
+#define FACILITY_POWERSHELL 84
+#define FACILITY_RAS 83
+#define FACILITY_P2P_INT 98
+#define FACILITY_P2P 99
+#define FACILITY_DAF 100
+#define FACILITY_BLUETOOTH_ATT 101
+#define FACILITY_AUDIO 102
+#define FACILITY_STATEREPOSITORY 103
+#define FACILITY_VISUALCPP 109
+#define FACILITY_SCRIPT 112
+#define FACILITY_PARSE 113
+#define FACILITY_BLB 120
+#define FACILITY_BLB_CLI 121
+#define FACILITY_WSBAPP 122
+#define FACILITY_BLBUI 128
+#define FACILITY_USN 129
+#define FACILITY_USERMODE_VOLSNAP 130
+#define FACILITY_TIERING 131
+#define FACILITY_WSB_ONLINE 133
+#define FACILITY_ONLINE_ID 134
+#define FACILITY_DEVICE_UPDATE_AGENT 135
+#define FACILITY_DRVSERVICING 136
+#define FACILITY_DLS 153
+#define FACILITY_DELIVERY_OPTIMIZATION 208
+#define FACILITY_USERMODE_SPACES 231
+#define FACILITY_USER_MODE_SECURITY_CORE 232
+#define FACILITY_USERMODE_LICENSING 234
+#define FACILITY_SOS 160
+#define FACILITY_DEBUGGERS 176
+#define FACILITY_SPP 256
+#define FACILITY_RESTORE 256
+#define FACILITY_DMSERVER 256
+#define FACILITY_DEPLOYMENT_SERVICES_SERVER 257
+#define FACILITY_DEPLOYMENT_SERVICES_IMAGING 258
+#define FACILITY_DEPLOYMENT_SERVICES_MANAGEMENT 259
+#define FACILITY_DEPLOYMENT_SERVICES_UTIL 260
+#define FACILITY_DEPLOYMENT_SERVICES_BINLSVC 261
+#define FACILITY_DEPLOYMENT_SERVICES_PXE 263
+#define FACILITY_DEPLOYMENT_SERVICES_TFTP 264
+#define FACILITY_DEPLOYMENT_SERVICES_TRANSPORT_MANAGEMENT 272
+#define FACILITY_DEPLOYMENT_SERVICES_DRIVER_PROVISIONING 278
+#define FACILITY_DEPLOYMENT_SERVICES_MULTICAST_SERVER 289
+#define FACILITY_DEPLOYMENT_SERVICES_MULTICAST_CLIENT 290
+#define FACILITY_DEPLOYMENT_SERVICES_CONTENT_PROVIDER 293
+#define FACILITY_LINGUISTIC_SERVICES 305
+#define FACILITY_AUDIOSTREAMING 1094
+#define FACILITY_ACCELERATOR 1536
+#define FACILITY_WMAAECMA 1996
+#define FACILITY_DIRECTMUSIC 2168
+#define FACILITY_DIRECT3D10 2169
+#define FACILITY_DXGI 2170
+#define FACILITY_DXGI_DDI 2171
+#define FACILITY_DIRECT3D11 2172
+#define FACILITY_DIRECT3D11_DEBUG 2173
+#define FACILITY_DIRECT3D12 2174
+#define FACILITY_DIRECT3D12_DEBUG 2175
+#define FACILITY_LEAP 2184
+#define FACILITY_AUDCLNT 2185
+#define FACILITY_WINCODEC_DWRITE_DWM 2200
+#define FACILITY_WINML 2192
+#define FACILITY_DIRECT2D 2201
+#define FACILITY_DEFRAG 2304
+#define FACILITY_USERMODE_SDBUS 2305
+#define FACILITY_JSCRIPT 2306
+#define FACILITY_PIDGENX 2561
+#define FACILITY_EAS 85
+#define FACILITY_WEB 885
+#define FACILITY_WEB_SOCKET 886
+#define FACILITY_MOBILE 1793
+#define FACILITY_SQLITE 1967
+#define FACILITY_UTC 1989
+#define FACILITY_WEP 2049
+#define FACILITY_SYNCENGINE 2050
+#define FACILITY_XBOX 2339
+#define FACILITY_GAME 2340
+#define FACILITY_PIX 2748
+
+#define SEVERITY_SUCCESS 0
+#define SEVERITY_ERROR 1
+
+#define SUCCEEDED(hr) (((HRESULT)(hr)) >= 0)
+#define FAILED(hr) (((HRESULT)(hr)) < 0)
+#define IS_ERROR(Status) (((unsigned long)(Status)) >> 31 == SEVERITY_ERROR)
+#define HRESULT_CODE(hr) ((hr) & 0xFFFF)
+#define SCODE_CODE(sc) ((sc) & 0xFFFF)
+#define HRESULT_FACILITY(hr) (((hr) >> 16) & 0x1fff)
+#define SCODE_FACILITY(sc) (((sc) >> 16) & 0x1fff)
+#define HRESULT_SEVERITY(hr) (((hr) >> 31) & 0x1)
+#define SCODE_SEVERITY(sc) (((sc) >> 31) & 0x1)
+#define MAKE_HRESULT(sev,fac,code) ((HRESULT) (((unsigned long)(sev)<<31) | ((unsigned long)(fac)<<16) | ((unsigned long)(code))) )
+#define MAKE_SCODE(sev,fac,code) ((SCODE) (((unsigned long)(sev)<<31) | ((unsigned long)(fac)<<16) | ((unsigned long)(code))) )
+
+#define CP_UTF8 65001
+
+#endif /* COSMOPOLITAN_LIBC_COMPAT_INCLUDE_WINDOWS_H_ */
diff --git a/libc/log/addr2linepath.c b/libc/log/addr2linepath.c
index 5493645c2..89c09d18b 100644
--- a/libc/log/addr2linepath.c
+++ b/libc/log/addr2linepath.c
@@ -36,7 +36,7 @@ static struct {
   char buf[PATH_MAX];
 } g_addr2line;
 
-const void GetAddr2linePathInit(void) {
+void GetAddr2linePathInit(void) {
   int e = errno;
   const char *path;
   if (!(path = getenv("ADDR2LINE"))) {
diff --git a/libc/log/countbranch_report.c b/libc/log/countbranch_report.c
index 80c5381ad..7616c4e7a 100644
--- a/libc/log/countbranch_report.c
+++ b/libc/log/countbranch_report.c
@@ -96,12 +96,9 @@ void countbranch_report(void) {
   }
 }
 
-static textstartup void countbranch_init() {
+__attribute__((__constructor__(90))) static textstartup void
+countbranch_init() {
   atexit(countbranch_report);
 }
 
-const void *const countbranch_ctor[] initarray = {
-    countbranch_init,
-};
-
 #endif /* __x86_64__ */
diff --git a/libc/log/countexpr_data.S b/libc/log/countexpr_data.S
index c06a2fe9e..72db2252f 100644
--- a/libc/log/countexpr_data.S
+++ b/libc/log/countexpr_data.S
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-#include "libc/notice.inc"
 
 	.yoink	countexpr_report
 
diff --git a/libc/log/countexpr_report.c b/libc/log/countexpr_report.c
index 2fe072624..3d1c50025 100644
--- a/libc/log/countexpr_report.c
+++ b/libc/log/countexpr_report.c
@@ -76,12 +76,8 @@ void countexpr_report(void) {
   }
 }
 
-static textstartup void countexpr_init() {
+__attribute__((__constructor__(90))) static textstartup void countexpr_init() {
   atexit(countexpr_report);
 }
 
-const void *const countexpr_ctor[] initarray = {
-    countexpr_init,
-};
-
 #endif /* __x86_64__ */
diff --git a/libc/log/cxaprintexits.c b/libc/log/cxaprintexits.c
index 08e7f2d4a..a9b6354f5 100644
--- a/libc/log/cxaprintexits.c
+++ b/libc/log/cxaprintexits.c
@@ -48,10 +48,10 @@ void __cxa_printexits(FILE *f, void *pred) {
           if (symbol) {
             snprintf(name, sizeof(name), "%s", symbol);
           } else {
-            snprintf(name, sizeof(name), "0x%016lx", b->p[i].fp);
+            snprintf(name, sizeof(name), "0x%016lx", (unsigned long)b->p[i].fp);
           }
-          fprintf(f, "%-22s 0x%016lx 0x%016lx\n", name, b->p[i].arg,
-                  b->p[i].pred);
+          fprintf(f, "%-22s 0x%016lx 0x%016lx\n", name,
+                  (unsigned long)b->p[i].arg, (unsigned long)b->p[i].pred);
         }
       }
     } while ((b = b->next));
diff --git a/libc/log/die.c b/libc/log/die.c
index d2fcd7948..30458102c 100644
--- a/libc/log/die.c
+++ b/libc/log/die.c
@@ -20,6 +20,7 @@
 #include "libc/errno.h"
 #include "libc/intrin/describebacktrace.internal.h"
 #include "libc/intrin/kprintf.h"
+#include "libc/log/backtrace.internal.h"
 #include "libc/log/internal.h"
 #include "libc/runtime/runtime.h"
 #include "libc/runtime/symbols.internal.h"
@@ -51,5 +52,6 @@ relegated wontreturn void __die(void) {
           __nocolor ? "" : "\e[1;31m", program_invocation_short_name, host,
           getpid(), gettid(), __nocolor ? "" : "\e[0m", FindDebugBinary(),
           DescribeBacktrace(__builtin_frame_address(0)));
+  ShowBacktrace(2, __builtin_frame_address(0));
   _Exit(77);
 }
diff --git a/libc/log/err.c b/libc/log/err.c
index 455e4ae40..83b459760 100644
--- a/libc/log/err.c
+++ b/libc/log/err.c
@@ -36,12 +36,9 @@
 #include "libc/str/str.h"
 // clang-format off
 
-asm(".ident\t\"\\n\\n\
-FreeBSD Err (BSD-3 License)\\n\
-Copyright (c) 1993\\n\
-\tThe Regents of the University of California.\\n\
-\tAll rights reserved.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(freebsd_err_notice, "\
+FreeBSD Error Library (BSD-3 License)\n\
+Copyright 1993 The Regents of the University of California");
 
 static FILE *err_file; /* file to use for error output */
 static void (*err_exit)(int);
diff --git a/libc/log/gdbexec.c b/libc/log/gdbexec.c
index 3e0e035ba..f8d47f14e 100644
--- a/libc/log/gdbexec.c
+++ b/libc/log/gdbexec.c
@@ -41,7 +41,7 @@ relegated int(gdbexec)(const char *cmd) {
     elf = "-q";
   }
   bp = __builtin_frame_address(0);
-  sprintf(breakcmd, "%s *%#p", "break", bp->addr);
+  sprintf(breakcmd, "%s *%#lx", "break", (unsigned long)bp->addr);
   if (!(pid = vfork())) {
     execv(gdb, (char *const[]){
                    "gdb",
diff --git a/libc/log/libfatal.internal.h b/libc/log/libfatal.internal.h
index ec4847edb..fd0fee8c3 100644
--- a/libc/log/libfatal.internal.h
+++ b/libc/log/libfatal.internal.h
@@ -25,33 +25,33 @@ forceinline char *__stpcpy(char *d, const char *s) {
 }
 
 forceinline void *__repstosb(void *di, char al, size_t cx) {
-#if defined(__x86__) && defined(__GNUC__) && !defined(__STRICT_ANSI__)
-  asm("rep stosb"
-      : "=D"(di), "=c"(cx), "=m"(*(char(*)[cx])di)
-      : "0"(di), "1"(cx), "a"(al));
+#if defined(__x86__) && defined(__GNUC__)
+  __asm__("rep stosb"
+          : "=D"(di), "=c"(cx), "=m"(*(char(*)[cx])di)
+          : "0"(di), "1"(cx), "a"(al));
   return di;
 #else
   char *d = di;
   while (cx--) {
     *d++ = al;
-    asm volatile("" ::: "memory");
+    __asm__ volatile("" ::: "memory");
   }
   return (void *)d;
 #endif
 }
 
 forceinline void *__repmovsb(void *di, const void *si, size_t cx) {
-#if defined(__x86__) && defined(__GNUC__) && !defined(__STRICT_ANSI__)
-  asm("rep movsb"
-      : "=D"(di), "=S"(si), "=c"(cx), "=m"(*(char(*)[cx])di)
-      : "0"(di), "1"(si), "2"(cx), "m"(*(char(*)[cx])si));
+#if defined(__x86__) && defined(__GNUC__)
+  __asm__("rep movsb"
+          : "=D"(di), "=S"(si), "=c"(cx), "=m"(*(char(*)[cx])di)
+          : "0"(di), "1"(si), "2"(cx), "m"(*(char(*)[cx])si));
   return di;
 #else
   char *d = di;
   const char *s = si;
   while (cx--) {
     *d++ = *s++;
-    asm volatile("" ::: "memory");
+    __asm__ volatile("" ::: "memory");
   }
   return (void *)d;
 #endif
@@ -61,7 +61,7 @@ forceinline void *__mempcpy(void *d, const void *s, size_t n) {
   size_t i;
   for (i = 0; i < n; ++i) {
     ((char *)d)[i] = ((const char *)s)[i];
-    asm volatile("" ::: "memory");
+    __asm__ volatile("" ::: "memory");
   }
   return (char *)d + n;
 }
diff --git a/libc/log/log.h b/libc/log/log.h
index dbe66134f..8e88377e2 100644
--- a/libc/log/log.h
+++ b/libc/log/log.h
@@ -1,7 +1,7 @@
+#ifdef _COSMO_SOURCE
 #ifndef COSMOPOLITAN_LIBC_LOG_LOG_H_
 #define COSMOPOLITAN_LIBC_LOG_LOG_H_
 #include "libc/stdio/stdio.h"
-#ifdef _COSMO_SOURCE
 
 #define kLogFatal   0
 #define kLogError   1
@@ -224,5 +224,5 @@ void vffatalf(ARGS, va_list) asm("vflogf") ATTRV relegated wontreturn libcesque;
 
 #endif /* __STRICT_ANSI__ */
 COSMOPOLITAN_C_END_
-#endif /* _COSMO_SOURCE */
 #endif /* COSMOPOLITAN_LIBC_LOG_LOG_H_ */
+#endif /* _COSMO_SOURCE */
diff --git a/libc/log/logfile.c b/libc/log/logfile.c
index dc6221c8d..6d0231dbf 100644
--- a/libc/log/logfile.c
+++ b/libc/log/logfile.c
@@ -21,6 +21,6 @@
 
 FILE *__log_file;
 
-__attribute__((__constructor__)) static void init(void) {
+__attribute__((__constructor__(61))) static void loginit(void) {
   __log_file = stderr;
 }
diff --git a/libc/log/memlog.c b/libc/log/memlog.c
index 4c641bb41..62304ea7a 100644
--- a/libc/log/memlog.c
+++ b/libc/log/memlog.c
@@ -66,7 +66,7 @@ static struct Memlog {
     struct Alloc {
       void *addr;
       long size;
-    } * p;
+    } *p;
   } allocs;
   atomic_long usage;
 } __memlog;
@@ -270,7 +270,9 @@ static textexit void __memlog_destroy(void) {
   __memlog_unlock();
 }
 
-static textstartup void __memlog_init(void) {
+__attribute__((__constructor__(90)))  //
+static textstartup void
+__memlog_init(void) {
   GetSymbolTable();
   __memlog_lock();
   __memlog.free = hook_free;
@@ -290,7 +292,3 @@ static textstartup void __memlog_init(void) {
   atexit(__memlog_destroy);
   __memlog_unlock();
 }
-
-const void *const enable_memory_log[] initarray = {
-    __memlog_init,
-};
diff --git a/libc/log/oncrash_amd64.c b/libc/log/oncrash_amd64.c
index fe0949c62..06e155a15 100644
--- a/libc/log/oncrash_amd64.c
+++ b/libc/log/oncrash_amd64.c
@@ -252,16 +252,9 @@ static relegated void ShowCrashReport(int err, int sig, struct siginfo *si,
 }
 
 relegated void __oncrash(int sig, struct siginfo *si, void *arg) {
-  ucontext_t *ctx = arg;
-  int gdbpid, err;
-  err = errno;
-  if ((gdbpid = IsDebuggerPresent(true))) {
-    DebugBreak();
-  }
-  if (!(gdbpid > 0 && (sig == SIGTRAP || sig == SIGQUIT))) {
-    __restore_tty();
-    ShowCrashReport(err, sig, si, ctx);
-  }
+  int err = errno;
+  __restore_tty();
+  ShowCrashReport(err, sig, si, arg);
 }
 
 #endif /* __x86_64__ */
diff --git a/libc/macros.internal.h b/libc/macros.internal.h
index 396f2b749..ae6313384 100644
--- a/libc/macros.internal.h
+++ b/libc/macros.internal.h
@@ -21,8 +21,8 @@
 #define TRUE  1
 #define FALSE 0
 
-#define IS2POW(X)       (!((X) & ((X)-1)))
-#define ROUNDUP(X, K)   (((X) + (K)-1) & -(K))
+#define IS2POW(X)       (!((X) & ((X) - 1)))
+#define ROUNDUP(X, K)   (((X) + (K) - 1) & -(K))
 #define ROUNDDOWN(X, K) ((X) & -(K))
 #ifndef __ASSEMBLER__
 #define ABS(X)    ((X) >= 0 ? (X) : -(X))
@@ -40,7 +40,7 @@
 #define STRINGIFY(A)        __STRINGIFY(A)
 #define EQUIVALENT(X, Y)    (__builtin_constant_p((X) == (Y)) && ((X) == (Y)))
 #define TYPE_BIT(type)      (sizeof(type) * CHAR_BIT)
-#define TYPE_SIGNED(type)   (((type)-1) < 0)
+#define TYPE_SIGNED(type)   (((type) - 1) < 0)
 #define TYPE_INTEGRAL(type) (((type)0.5) != 0.5)
 
 #define ARRAYLEN(A) \
diff --git a/libc/math.h b/libc/math.h
index 0dd9fe063..0b63b361a 100644
--- a/libc/math.h
+++ b/libc/math.h
@@ -4,59 +4,101 @@
 │ cosmopolitan § mathematics                                                   │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 
-#define M_E        2.7182818284590452354 /* 𝑒 */
-#define M_LOG2_10  0xd.49a784bcd1b8afep-2 /* log₂10 ≈ 3.3219280948873623478 */
-#define M_LOG2E    0xb.8aa3b295c17f0bcp-3 /* log₂𝑒 ≈ 1.4426950408889634074 */
-#define M_LOG10E   0.43429448190325182765  /* log₁₀𝑒 */
-#define M_LN2      0xb.17217f7d1cf79acp-4  /* logₑ2 ≈ */
-#define M_LN10     2.30258509299404568402  /* logₑ10 */
-#define M_TAU      0x1.921fb54442d1846ap+2 /* τ = 2π */
-#define M_PI       0x1.921fb54442d1846ap+1 /* π ≈ 3.14159265358979323846 */
-#define M_PI_2     1.57079632679489661923  /* π/2 */
-#define M_PI_4     0.78539816339744830962  /* π/4 */
-#define M_1_PI     0.31830988618379067154  /* 1/π */
-#define M_2_PI     0.63661977236758134308  /* 2/π */
-#define M_2_SQRTPI 1.12837916709551257390  /* 2/sqrtπ */
-#define M_SQRT2    1.41421356237309504880  /* sqrt2 */
-#define M_SQRT1_2  0.70710678118654752440  /* 1/sqrt2 */
+#define M_E        2.7182818284590452354  /* 𝑒 */
+#define M_LOG2E    1.4426950408889634074  /* log₂𝑒 */
+#define M_LOG10E   0.43429448190325182765 /* log₁₀𝑒 */
+#define M_LN2      0.69314718055994530942 /* logₑ2 */
+#define M_LN10     2.30258509299404568402 /* logₑ10 */
+#define M_PI       3.14159265358979323846 /* 𝜋 */
+#define M_PI_2     1.57079632679489661923 /* 𝜋/2 */
+#define M_PI_4     0.78539816339744830962 /* 𝜋/4 */
+#define M_1_PI     0.31830988618379067154 /* 1/𝜋 */
+#define M_2_PI     0.63661977236758134308 /* 2/𝜋 */
+#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(𝜋) */
+#define M_SQRT2    1.41421356237309504880 /* sqrt(2) */
+#define M_SQRT1_2  0.70710678118654752440 /* 1/sqrt(2) */
 
-#define DBL_DECIMAL_DIG  __DBL_DECIMAL_DIG__
-#define DBL_DIG          __DBL_DIG__
-#define DBL_EPSILON      __DBL_EPSILON__
-#define DBL_MANT_DIG     __DBL_MANT_DIG__
-#define DBL_MANT_DIG     __DBL_MANT_DIG__
-#define DBL_MAX          __DBL_MAX__
-#define DBL_MAX_10_EXP   __DBL_MAX_10_EXP__
-#define DBL_MAX_EXP      __DBL_MAX_EXP__
-#define DBL_MIN          __DBL_MIN__ /* 2.23e–308 ↔ 1.79e308 */
-#define DBL_MIN_10_EXP   __DBL_MIN_10_EXP__
-#define DBL_MIN_EXP      __DBL_MIN_EXP__
-#define DECIMAL_DIG      __LDBL_DECIMAL_DIG__
-#define FLT_DECIMAL_DIG  __FLT_DECIMAL_DIG__
-#define FLT_RADIX        __FLT_RADIX__
-#define FLT_DIG          __FLT_DIG__
-#define FLT_EPSILON      __FLT_EPSILON__
-#define FLT_MANT_DIG     __FLT_MANT_DIG__
-#define FLT_MANT_DIG     __FLT_MANT_DIG__
-#define FLT_MAX          __FLT_MAX__
-#define FLT_MAX_10_EXP   __FLT_MAX_10_EXP__
-#define FLT_MAX_EXP      __FLT_MAX_EXP__
-#define FLT_MIN          __FLT_MIN__ /* 1.18e–38 ↔ 3.40e38 */
-#define FLT_MIN_10_EXP   __FLT_MIN_10_EXP__
-#define FLT_MIN_EXP      __FLT_MIN_EXP__
-#define HLF_MAX          6.50e4f
-#define HLF_MIN          3.10e-5f
-#define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__
-#define LDBL_DIG         __LDBL_DIG__
-#define LDBL_EPSILON     __LDBL_EPSILON__
-#define LDBL_MANT_DIG    __LDBL_MANT_DIG__
-#define LDBL_MANT_DIG    __LDBL_MANT_DIG__
-#define LDBL_MAX         __LDBL_MAX__
-#define LDBL_MAX_10_EXP  __LDBL_MAX_10_EXP__
-#define LDBL_MAX_EXP     __LDBL_MAX_EXP__
-#define LDBL_MIN         __LDBL_MIN__ /* 3.37e–4932 ↔ 1.18e4932 */
-#define LDBL_MIN_10_EXP  __LDBL_MIN_10_EXP__
-#define LDBL_MIN_EXP     __LDBL_MIN_EXP__
+#if defined(_GNU_SOURCE) || defined(_COSMO_SOURCE)
+#define M_Ef        2.7182818284590452354f  /* 𝑒 */
+#define M_LOG2Ef    1.4426950408889634074f  /* log₂𝑒 */
+#define M_LOG10Ef   0.43429448190325182765f /* log₁₀𝑒 */
+#define M_LN2f      0.69314718055994530942f /* logₑ2 */
+#define M_LN10f     2.30258509299404568402f /* logₑ10 */
+#define M_PIf       3.14159265358979323846f /* 𝜋 */
+#define M_PI_2f     1.57079632679489661923f /* 𝜋/2 */
+#define M_PI_4f     0.78539816339744830962f /* 𝜋/4 */
+#define M_1_PIf     0.31830988618379067154f /* 1/𝜋 */
+#define M_2_PIf     0.63661977236758134308f /* 2/𝜋 */
+#define M_2_SQRTPIf 1.12837916709551257390f /* 2/sqrt(𝜋) */
+#define M_SQRT2f    1.41421356237309504880f /* sqrt(2) */
+#define M_SQRT1_2f  0.70710678118654752440f /* 1/sqrt(2) */
+#endif
+
+#if defined(_GNU_SOURCE) || defined(_COSMO_SOURCE)
+#define M_El        2.718281828459045235360287471352662498L /* 𝑒 */
+#define M_LOG2El    1.442695040888963407359924681001892137L /* log₂𝑒 */
+#define M_LOG10El   0.434294481903251827651128918916605082L /* log₁₀𝑒 */
+#define M_LN2l      0.693147180559945309417232121458176568L /* logₑ2 */
+#define M_LN10l     2.302585092994045684017991454684364208L /* logₑ10 */
+#define M_PIl       3.141592653589793238462643383279502884L /* 𝜋 */
+#define M_PI_2l     1.570796326794896619231321691639751442L /* 𝜋/2 */
+#define M_PI_4l     0.785398163397448309615660845819875721L /* 𝜋/4 */
+#define M_1_PIl     0.318309886183790671537767526745028724L /* 1/𝜋 */
+#define M_2_PIl     0.636619772367581343075535053490057448L /* 2/𝜋 */
+#define M_2_SQRTPIl 1.128379167095512573896158903121545172L /* 2/sqrt(𝜋) */
+#define M_SQRT2l    1.414213562373095048801688724209698079L /* sqrt(2) */
+#define M_SQRT1_2l  0.707106781186547524400844362104849039L /* 1/sqrt(2) */
+#endif
+
+#define DBL_DECIMAL_DIG   __DBL_DECIMAL_DIG__
+#define DBL_DIG           __DBL_DIG__
+#define DBL_EPSILON       __DBL_EPSILON__
+#define DBL_HAS_SUBNORM   __DBL_HAS_DENORM__
+#define DBL_IS_IEC_60559  __DBL_IS_IEC_60559__
+#define DBL_MANT_DIG      __DBL_MANT_DIG__
+#define DBL_MANT_DIG      __DBL_MANT_DIG__
+#define DBL_MAX           __DBL_MAX__
+#define DBL_MAX_10_EXP    __DBL_MAX_10_EXP__
+#define DBL_MAX_EXP       __DBL_MAX_EXP__
+#define DBL_MIN           __DBL_MIN__ /* 2.23e–308 ↔ 1.79e308 */
+#define DBL_MIN_10_EXP    __DBL_MIN_10_EXP__
+#define DBL_MIN_EXP       __DBL_MIN_EXP__
+#define DBL_NORM_MAX      __DBL_NORM_MAX__
+#define DBL_TRUE_MIN      __DBL_DENORM_MIN__
+#define DECIMAL_DIG       __LDBL_DECIMAL_DIG__
+#define FLT_DECIMAL_DIG   __FLT_DECIMAL_DIG__
+#define FLT_DIG           __FLT_DIG__
+#define FLT_EPSILON       __FLT_EPSILON__
+#define FLT_HAS_SUBNORM   __FLT_HAS_DENORM__
+#define FLT_IS_IEC_60559  __FLT_IS_IEC_60559__
+#define FLT_MANT_DIG      __FLT_MANT_DIG__
+#define FLT_MANT_DIG      __FLT_MANT_DIG__
+#define FLT_MAX           __FLT_MAX__
+#define FLT_MAX_10_EXP    __FLT_MAX_10_EXP__
+#define FLT_MAX_EXP       __FLT_MAX_EXP__
+#define FLT_MIN           __FLT_MIN__ /* 1.18e–38 ↔ 3.40e38 */
+#define FLT_MIN_10_EXP    __FLT_MIN_10_EXP__
+#define FLT_MIN_EXP       __FLT_MIN_EXP__
+#define FLT_NORM_MAX      __FLT_NORM_MAX__
+#define FLT_RADIX         __FLT_RADIX__
+#define FLT_TRUE_MIN      __FLT_DENORM_MIN__
+#define HLF_MAX           6.50e4f
+#define HLF_MIN           3.10e-5f
+#define LDBL_DECIMAL_DIG  __LDBL_DECIMAL_DIG__
+#define LDBL_DIG          __LDBL_DIG__
+#define LDBL_EPSILON      __LDBL_EPSILON__
+#define LDBL_HAS_SUBNORM  __LDBL_HAS_DENORM__
+#define LDBL_IS_IEC_60559 __LDBL_IS_IEC_60559__
+#define LDBL_MANT_DIG     __LDBL_MANT_DIG__
+#define LDBL_MANT_DIG     __LDBL_MANT_DIG__
+#define LDBL_MAX          __LDBL_MAX__
+#define LDBL_MAX_10_EXP   __LDBL_MAX_10_EXP__
+#define LDBL_MAX_EXP      __LDBL_MAX_EXP__
+#define LDBL_MIN          __LDBL_MIN__ /* 3.37e–4932 ↔ 1.18e4932 */
+#define LDBL_MIN_10_EXP   __LDBL_MIN_10_EXP__
+#define LDBL_MIN_EXP      __LDBL_MIN_EXP__
+#define LDBL_NORM_MAX     __LDBL_NORM_MAX__
+#define LDBL_TRUE_MIN     __LDBL_DENORM_MIN__
 
 #define FP_NAN       0
 #define FP_INFINITE  1
@@ -66,6 +108,27 @@
 #define FP_ILOGB0    (-2147483647 - 1)
 #define FP_ILOGBNAN  (-2147483647 - 1)
 
+#define MATH_ERRNO     1
+#define MATH_ERREXCEPT 2
+
+#ifdef __FAST_MATH__
+#define math_errhandling 0
+#elif defined(__NO_MATH_ERRNO__)
+#define math_errhandling (MATH_ERREXCEPT)
+#else
+#define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT)
+#endif
+
+#ifdef __FP_FAST_FMA
+#define FP_FAST_FMA 1
+#endif
+#ifdef __FP_FAST_FMAF
+#define FP_FAST_FMAF 1
+#endif
+#ifdef __FP_FAST_FMAL
+#define FP_FAST_FMAL 1
+#endif
+
 COSMOPOLITAN_C_START_
 
 #define NAN       __builtin_nanf("")
@@ -96,10 +159,10 @@ typedef double double_t;
 #define fpclassify(x) \
   __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x)
 
-#define signbit(x)                                      \
-  (sizeof(x) == sizeof(double)  ? __builtin_signbit(x)  \
-   : sizeof(x) == sizeof(float) ? __builtin_signbitf(x) \
-                                : __builtin_signbitl(x))
+#define signbit(x)                                          \
+  (sizeof(x) == sizeof(long double) ? __builtin_signbitl(x) \
+   : sizeof(x) == sizeof(float)     ? __builtin_signbitf(x) \
+                                    : __builtin_signbit(x))
 
 extern int signgam;
 
diff --git a/libc/mem/gc.c b/libc/mem/gc.c
index b977d0964..07957ba57 100644
--- a/libc/mem/gc.c
+++ b/libc/mem/gc.c
@@ -51,7 +51,7 @@ static void TeardownGc(void) {
   }
 }
 
-__attribute__((__constructor__)) static void InitializeGc(void) {
+__attribute__((__constructor__(51))) static textstartup void InitGc(void) {
   atexit(TeardownGc);
 }
 
diff --git a/libc/mem/gc.h b/libc/mem/gc.h
index 3ff3b17d9..263421c53 100644
--- a/libc/mem/gc.h
+++ b/libc/mem/gc.h
@@ -1,6 +1,6 @@
+#ifdef _COSMO_SOURCE
 #ifndef COSMOPOLITAN_LIBC_MEM_GC_H_
 #define COSMOPOLITAN_LIBC_MEM_GC_H_
-#ifdef _COSMO_SOURCE
 COSMOPOLITAN_C_START_
 
 libcesque void _gc_free(void *);
@@ -19,5 +19,5 @@ libcesque void gclongjmp(void *, int) wontreturn;
   })
 
 COSMOPOLITAN_C_END_
-#endif /* _COSMO_SOURCE */
 #endif /* COSMOPOLITAN_LIBC_MEM_GC_H_ */
+#endif /* _COSMO_SOURCE */
diff --git a/libc/mem/heapsort.c b/libc/mem/heapsort.c
index 31645308a..9577c3891 100644
--- a/libc/mem/heapsort.c
+++ b/libc/mem/heapsort.c
@@ -31,6 +31,7 @@
 #include "libc/mem/alg.h"
 #include "libc/mem/mem.h"
 #include "libc/sysv/errfuns.h"
+__static_yoink("openbsd_sorting_notice");
 // clang-format off
 
 /*
diff --git a/libc/mem/mergesort.c b/libc/mem/mergesort.c
index 205a20ef4..400b6dfe3 100644
--- a/libc/mem/mergesort.c
+++ b/libc/mem/mergesort.c
@@ -33,13 +33,9 @@
 #include "libc/mem/mem.h"
 #include "libc/str/str.h"
 #include "libc/sysv/errfuns.h"
+__static_yoink("openbsd_sorting_notice");
 // clang-format off
 
-asm(".ident\t\"\\n\\n\
-OpenBSD Sorting (BSD-3)\\n\
-Copyright 1993 The Regents of the University of California\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 /*
  * Hybrid exponential search/linear search merge sort with hybrid
  * natural/pairwise first pass.  Requires about .3% more comparisons
@@ -54,8 +50,8 @@ asm(".include \"libc/disclaimer.inc\"");
  * (The default is pairwise merging.)
  */
 
-static void setup(uint8_t *, uint8_t *, size_t, size_t, int (*)(), void *);
-static void insertionsort(uint8_t *, size_t, size_t, int (*)(), void *);
+static void setup(uint8_t *, uint8_t *, size_t, size_t, int (*)(const void *, const void *, void *), void *);
+static void insertionsort(uint8_t *, size_t, size_t, int (*)(const void *, const void *, void *), void *);
 
 #define ISIZE sizeof(int)
 #define PSIZE sizeof(uint8_t *)
diff --git a/libc/mem/opensort.c b/libc/mem/opensort.c
new file mode 100644
index 000000000..36f061dfd
--- /dev/null
+++ b/libc/mem/opensort.c
@@ -0,0 +1,2 @@
+__notice(openbsd_sorting_notice, "OpenBSD Sorting (BSD-3)\n\
+Copyright 1993 The Regents of the University of California");
diff --git a/libc/mem/qsort.c b/libc/mem/qsort.c
index ca96a532a..361f26a86 100644
--- a/libc/mem/qsort.c
+++ b/libc/mem/qsort.c
@@ -31,13 +31,9 @@
 #include "libc/macros.internal.h"
 #include "libc/mem/alg.h"
 #include "libc/str/str.h"
+__static_yoink("openbsd_sorting_notice");
 // clang-format off
 
-asm(".ident\t\"\\n\\n\
-OpenBSD Sorting (BSD-3)\\n\
-Copyright 1993 The Regents of the University of California\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 #define SWAPTYPE_BYTEV	1
 #define SWAPTYPE_INTV	2
 #define SWAPTYPE_LONGV	3
diff --git a/libc/mem/realpath.c b/libc/mem/realpath.c
index bba846b7d..3ea1432a5 100644
--- a/libc/mem/realpath.c
+++ b/libc/mem/realpath.c
@@ -28,20 +28,17 @@
 #include "libc/assert.h"
 #include "libc/calls/calls.h"
 #include "libc/errno.h"
-#include "libc/serialize.h"
 #include "libc/intrin/safemacros.internal.h"
 #include "libc/limits.h"
 #include "libc/log/backtrace.internal.h"
 #include "libc/mem/mem.h"
+#include "libc/serialize.h"
 #include "libc/str/str.h"
 #include "libc/sysv/errfuns.h"
+__static_yoink("musl_libc_notice");
 
 #define SYMLOOP_MAX 40
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
 // clang-format off
 
 static size_t GetSlashLen(const char *s)
diff --git a/libc/nexgen32e/argc.S b/libc/nexgen32e/argc.S
index 3dc3852c2..9e85f4409 100644
--- a/libc/nexgen32e/argc.S
+++ b/libc/nexgen32e/argc.S
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-#include "libc/notice.inc"
 
 	.initbss 300,_init_argc
 //	Global variable holding _start(argc) parameter.
diff --git a/libc/nexgen32e/argv.S b/libc/nexgen32e/argv.S
index a3b95116b..9ee093476 100644
--- a/libc/nexgen32e/argv.S
+++ b/libc/nexgen32e/argv.S
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-#include "libc/notice.inc"
 
 	.initbss 300,_init_argv
 //	Global variable holding _start(argv) parameter.
diff --git a/libc/nexgen32e/auxv.S b/libc/nexgen32e/auxv.S
index bc81ef64c..0c921df7c 100644
--- a/libc/nexgen32e/auxv.S
+++ b/libc/nexgen32e/auxv.S
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-#include "libc/notice.inc"
 
 	.initbss 300,_init_auxv
 //	Global variable holding _start(auxv) parameter.
diff --git a/libc/nexgen32e/environ.S b/libc/nexgen32e/environ.S
index bf368d773..d1419a52c 100644
--- a/libc/nexgen32e/environ.S
+++ b/libc/nexgen32e/environ.S
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-#include "libc/notice.inc"
 
 //	Environment variable pointer list.
 	.bss
diff --git a/libc/nexgen32e/gc.S b/libc/nexgen32e/gc.S
index 037ce8b31..302dcd5a2 100644
--- a/libc/nexgen32e/gc.S
+++ b/libc/nexgen32e/gc.S
@@ -18,7 +18,6 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
 #include "libc/dce.h"
-#include "libc/notice.inc"
 
 	nop
 
@@ -37,7 +36,7 @@ __gc:	.ftrace2
 
 #ifdef __x86_64__
 
-	mov	%fs:0,%rcx			// __get_tls()
+	mov	%gs:0x30,%rcx			// __get_tls()
 	mov	0x18(%rcx),%rcx			// tls::garbages
 	decl	(%rcx)				// --g->i
 	mov	(%rcx),%r8d			// r8 = g->i
diff --git a/libc/nexgen32e/gclongjmp.S b/libc/nexgen32e/gclongjmp.S
index 18f092000..1fb68131b 100644
--- a/libc/nexgen32e/gclongjmp.S
+++ b/libc/nexgen32e/gclongjmp.S
@@ -35,7 +35,7 @@ gclongjmp:
 #ifdef __x86_64__
 	push	%rbp
 	mov	%rsp,%rbp
-	mov	%fs:0,%r12			// __get_tls()
+	mov	%gs:0x30,%r12			// __get_tls()
 	mov	0x18(%r12),%r12			// Tls::garbages
 	test	%r12,%r12
 	jz	0f
diff --git a/libc/nexgen32e/identity.S b/libc/nexgen32e/identity.S
index af26136a2..7fc23e4d8 100644
--- a/libc/nexgen32e/identity.S
+++ b/libc/nexgen32e/identity.S
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-#include "libc/notice.inc"
 
 //	The identity() function.
 //	@return	first argument
diff --git a/libc/nexgen32e/rdtscp.h b/libc/nexgen32e/rdtscp.h
index 9c3502faf..5bebc8735 100644
--- a/libc/nexgen32e/rdtscp.h
+++ b/libc/nexgen32e/rdtscp.h
@@ -5,7 +5,7 @@
 #include "libc/nexgen32e/x86feature.h"
 COSMOPOLITAN_C_START_
 
-#define TSC_AUX_CORE(MSR) ((MSR)&0xfff)
+#define TSC_AUX_CORE(MSR) ((MSR) & 0xfff)
 #define TSC_AUX_NODE(MSR) (((MSR) >> 12) & 0xfff)
 
 /**
@@ -41,9 +41,10 @@ COSMOPOLITAN_C_START_
       asm volatile("rdpid\t%0" : "=r"(Msr) : /* no inputs */ : "memory"); \
       Ok = true;                                                          \
     } else if (IsLinux()) {                                               \
+      char *p = (char *)0x7b;                                             \
       asm volatile(ZFLAG_ASM("lsl\t%2,%1")                                \
                    : ZFLAG_CONSTRAINT(Ok), "=r"(Msr)                      \
-                   : "r"(0x7b)                                            \
+                   : "r"(p)                                               \
                    : "memory");                                           \
     }                                                                     \
     if (!Ok && X86_HAVE(RDTSCP)) {                                        \
diff --git a/libc/nexgen32e/sha1.S b/libc/nexgen32e/sha1.S
index 24fb18306..1016c0498 100644
--- a/libc/nexgen32e/sha1.S
+++ b/libc/nexgen32e/sha1.S
@@ -33,10 +33,11 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
 
-.ident "\n\
-AVX2 SHA-1 (BSD-3 License)\n\
-Copyright 2014 Intel Corporation\n"
-.include "libc/disclaimer.inc"
+	.section .notice,"aR",@progbits
+	.asciz	"\n\n\
+AVX2 SHA-1 (BSD-3 License)n\
+Copyright 2014 Intel Corporation"
+	.previous
 
 #define	CTX	%rdi	/* arg1 */
 #define BUF	%rsi	/* arg2 */
diff --git a/libc/nexgen32e/sha1ni.S b/libc/nexgen32e/sha1ni.S
index f1e577d22..223f5f25d 100644
--- a/libc/nexgen32e/sha1ni.S
+++ b/libc/nexgen32e/sha1ni.S
@@ -33,14 +33,15 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
 
-.text
-.balign 32
-.ident "\n\
+.section .notice,"aR",@progbits
+.asciz "\n\n\
 Intel SHA-NI (BSD-3 License)\n\
 Copyright 2015 Intel Corporation\n\
 Sean Gulley <sean.m.gulley@intel.com>\n\
-Tim Chen <tim.c.chen@linux.intel.com>\n"
-.include "libc/disclaimer.inc"
+Tim Chen <tim.c.chen@linux.intel.com>"
+
+.text
+.balign 32
 
 #define FRAME_SIZE	32
 #define DIGEST_PTR	%rdi
diff --git a/libc/nexgen32e/sha256.S b/libc/nexgen32e/sha256.S
index 3858101dd..df175bf5b 100644
--- a/libc/nexgen32e/sha256.S
+++ b/libc/nexgen32e/sha256.S
@@ -49,10 +49,11 @@
 /////////////////////////////////////////////////////////////////////////
 #include "libc/macros.internal.h"
 
-.ident "\n\
+.section .notice,"aR",@progbits
+.asciz "\n\n\
 AVX2 SHA2 (BSD-2 License)\n\
-Copyright 2013 Intel Corporation\n"
-.include "libc/disclaimer.inc"
+Copyright 2013 Intel Corporation"
+.previous
 
 ## assume buffers not aligned
 #define	VMOVDQ vmovdqu
diff --git a/libc/nexgen32e/sha256ni.S b/libc/nexgen32e/sha256ni.S
index d04857550..eb020d706 100644
--- a/libc/nexgen32e/sha256ni.S
+++ b/libc/nexgen32e/sha256ni.S
@@ -33,14 +33,15 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
 
-.text
-.balign 32
-.ident "\n\
+.section .notice,"aR",@progbits
+.asciz "\n\n\
 Intel SHA-NI (BSD-3 License)\n\
 Copyright 2015 Intel Corporation\n\
 Sean Gulley <sean.m.gulley@intel.com>\n\
-Tim Chen <tim.c.chen@linux.intel.com>\n"
-.include "libc/disclaimer.inc"
+Tim Chen <tim.c.chen@linux.intel.com>"
+
+.text
+.balign 32
 
 #define DIGEST_PTR	%rdi	/* 1st arg */
 #define DATA_PTR	%rsi	/* 2nd arg */
diff --git a/libc/nexgen32e/sha512.S b/libc/nexgen32e/sha512.S
index b858a44d9..6e36d6d1b 100644
--- a/libc/nexgen32e/sha512.S
+++ b/libc/nexgen32e/sha512.S
@@ -50,10 +50,11 @@
 /////////////////////////////////////////////////////////////////////////
 #include "libc/macros.internal.h"
 
-.ident "\n\
-AVX2 SHA2 (BSD-2 License)\n\
-Copyright 2013 Intel Corporation\n"
-.include "libc/disclaimer.inc"
+.section .notice,"aR",@progbits
+.asciz "\n\n\
+AVX2 SHA512 (BSD-2 License)\n\
+Copyright 2013 Intel Corporation"
+.previous
 
 # Virtual Registers
 Y_0 = %ymm4
diff --git a/libc/nexgen32e/stackframe.h b/libc/nexgen32e/stackframe.h
index 895a109bd..242758bb1 100644
--- a/libc/nexgen32e/stackframe.h
+++ b/libc/nexgen32e/stackframe.h
@@ -1,6 +1,6 @@
+#ifdef _COSMO_SOURCE
 #ifndef COSMOPOLITAN_LIBC_NEXGEN32E_STACKFRAME_H_
 #define COSMOPOLITAN_LIBC_NEXGEN32E_STACKFRAME_H_
-#ifdef _COSMO_SOURCE
 COSMOPOLITAN_C_START_
 
 struct StackFrame {
@@ -9,5 +9,5 @@ struct StackFrame {
 };
 
 COSMOPOLITAN_C_END_
-#endif /* _COSMO_SOURCE */
 #endif /* COSMOPOLITAN_LIBC_NEXGEN32E_STACKFRAME_H_ */
+#endif /* _COSMO_SOURCE */
diff --git a/libc/notice.inc b/libc/notice.inc
deleted file mode 100644
index 5b9c68964..000000000
--- a/libc/notice.inc
+++ /dev/null
@@ -1,17 +0,0 @@
-.ident "\n\
-Cosmopolitan\n\
-Copyright 2020 Justine Alexandra Roberts Tunney\n\
-\n\
-Permission to use, copy, modify, and/or distribute this software for\n\
-any purpose with or without fee is hereby granted, provided that the\n\
-above copyright notice and this permission notice appear in all copies.\n\
-\n\
-THE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL\n\
-WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED\n\
-WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE\n\
-AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL\n\
-DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR\n\
-PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER\n\
-TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR\n\
-PERFORMANCE OF THIS SOFTWARE.\
-"
diff --git a/libc/notice.internal.h b/libc/notice.internal.h
deleted file mode 100644
index 4bb9ab304..000000000
--- a/libc/notice.internal.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_INTERNAL_NOTICE_H_
-#define COSMOPOLITAN_LIBC_INTERNAL_NOTICE_H_
-#ifndef __STRICT_ANSI__
-
-#ifdef __ASSEMBLER__
-.include "libc/notice.inc"
-#else
-asm(".include \"libc/notice.inc\"");
-#endif
-
-#endif /* !ANSI */
-#endif /* COSMOPOLITAN_LIBC_INTERNAL_NOTICE_H_ */
diff --git a/libc/nt/advapi32/SystemFunction036.S b/libc/nt/BCryptPrimitives/ProcessPrng.S
similarity index 57%
rename from libc/nt/advapi32/SystemFunction036.S
rename to libc/nt/BCryptPrimitives/ProcessPrng.S
index 4380c47e3..d7eec4fbb 100644
--- a/libc/nt/advapi32/SystemFunction036.S
+++ b/libc/nt/BCryptPrimitives/ProcessPrng.S
@@ -1,18 +1,18 @@
 #include "libc/nt/codegen.h"
-.imp	advapi32,__imp_SystemFunction036,SystemFunction036
+.imp	BCryptPrimitives,__imp_ProcessPrng,ProcessPrng
 
 	.text.windows
 	.ftrace1
-RtlGenRandom:
+ProcessPrng:
 	.ftrace2
 #ifdef __x86_64__
 	push	%rbp
 	mov	%rsp,%rbp
-	mov	__imp_SystemFunction036(%rip),%rax
+	mov	__imp_ProcessPrng(%rip),%rax
 	jmp	__sysv2nt
 #elif defined(__aarch64__)
 	mov	x0,#0
 	ret
 #endif
-	.endfn	RtlGenRandom,globl
+	.endfn	ProcessPrng,globl
 	.previous
diff --git a/libc/nt/BUILD.mk b/libc/nt/BUILD.mk
index b49f2c41a..feaa79e24 100644
--- a/libc/nt/BUILD.mk
+++ b/libc/nt/BUILD.mk
@@ -297,6 +297,24 @@ $(LIBC_NT_PSAPI_A).pkg:					\
 
 #───────────────────────────────────────────────────────────────────────────────
 
+LIBC_NT_ARTIFACTS += LIBC_NT_BCRYPTPRIMITIVES_A
+LIBC_NT_BCRYPTPRIMITIVES = $(LIBC_NT_BCRYPTPRIMITIVES_A_DEPS) $(LIBC_NT_BCRYPTPRIMITIVES_A)
+LIBC_NT_BCRYPTPRIMITIVES_A = o/$(MODE)/libc/nt/BCryptPrimitives.a
+LIBC_NT_BCRYPTPRIMITIVES_A_SRCS := $(wildcard libc/nt/BCryptPrimitives/*.S)
+LIBC_NT_BCRYPTPRIMITIVES_A_OBJS = $(LIBC_NT_BCRYPTPRIMITIVES_A_SRCS:%.S=o/$(MODE)/%.o)
+LIBC_NT_BCRYPTPRIMITIVES_A_CHECKS = $(LIBC_NT_BCRYPTPRIMITIVES_A).pkg
+LIBC_NT_BCRYPTPRIMITIVES_A_DIRECTDEPS = LIBC_NT_KERNEL32
+LIBC_NT_BCRYPTPRIMITIVES_A_DEPS := $(call uniq,$(foreach x,$(LIBC_NT_BCRYPTPRIMITIVES_A_DIRECTDEPS),$($(x))))
+$(LIBC_NT_BCRYPTPRIMITIVES_A):				\
+		libc/nt/BCryptPrimitives/		\
+		$(LIBC_NT_BCRYPTPRIMITIVES_A).pkg	\
+		$(LIBC_NT_BCRYPTPRIMITIVES_A_OBJS)
+$(LIBC_NT_BCRYPTPRIMITIVES_A).pkg:			\
+		$(LIBC_NT_BCRYPTPRIMITIVES_A_OBJS)	\
+		$(foreach x,$(LIBC_NT_BCRYPTPRIMITIVES_A_DIRECTDEPS),$($(x)_A).pkg)
+
+#───────────────────────────────────────────────────────────────────────────────
+
 # let aarch64 compile these
 o/$(MODE)/libc/nt/%.o: libc/nt/%.S
 	@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) $<
diff --git a/libc/nt/dll.h b/libc/nt/dll.h
index 860b0bf05..186851f77 100644
--- a/libc/nt/dll.h
+++ b/libc/nt/dll.h
@@ -27,6 +27,7 @@ COSMOPOLITAN_C_START_
 ╚────────────────────────────────────────────────────────────────────────────│*/
 
 int64_t LoadLibrary(const char16_t *lpLibFileName);
+int64_t LoadLibraryA(const char *lpLibFileName);
 int64_t LoadLibraryEx(const char16_t *lpLibFileName, int64_t hFile,
                       uint32_t dwFlags);
 uint32_t GetModuleFileNameA(int64_t hModule, char *lpFilename, uint32_t nSize);
diff --git a/libc/nt/events.h b/libc/nt/events.h
index c57389f22..90dba5ac2 100644
--- a/libc/nt/events.h
+++ b/libc/nt/events.h
@@ -57,6 +57,9 @@ int32_t DeregisterEventSource(uint64_t handle);
 int64_t CreateEvent(const struct NtSecurityAttributes *opt_lpEventAttributes,
                     bool32 bManualReset, bool32 bInitialState,
                     const char16_t *opt_lpName);
+int64_t CreateEventA(const struct NtSecurityAttributes *opt_lpEventAttributes,
+                     bool32 bManualReset, bool32 bInitialState,
+                     const char *opt_lpName);
 int64_t CreateEventEx(const struct NtSecurityAttributes *lpEventAttributes,
                       const char16_t *lpName, uint32_t dwFlags,
                       uint32_t dwDesiredAccess);
diff --git a/libc/nt/kernel32/CreateEventA.S b/libc/nt/kernel32/CreateEventA.S
new file mode 100644
index 000000000..e874b0357
--- /dev/null
+++ b/libc/nt/kernel32/CreateEventA.S
@@ -0,0 +1,18 @@
+#include "libc/nt/codegen.h"
+.imp	kernel32,__imp_CreateEventA,CreateEventA
+
+	.text.windows
+	.ftrace1
+CreateEventA:
+	.ftrace2
+#ifdef __x86_64__
+	push	%rbp
+	mov	%rsp,%rbp
+	mov	__imp_CreateEventA(%rip),%rax
+	jmp	__sysv2nt
+#elif defined(__aarch64__)
+	mov	x0,#0
+	ret
+#endif
+	.endfn	CreateEventA,globl
+	.previous
diff --git a/libc/nt/kernel32/GetNumaProcessorNodeEx.S b/libc/nt/kernel32/GetNumaProcessorNodeEx.S
new file mode 100644
index 000000000..3d3a952a2
--- /dev/null
+++ b/libc/nt/kernel32/GetNumaProcessorNodeEx.S
@@ -0,0 +1,18 @@
+#include "libc/nt/codegen.h"
+.imp	kernel32,__imp_GetNumaProcessorNodeEx,GetNumaProcessorNodeEx
+
+	.text.windows
+	.ftrace1
+GetNumaProcessorNodeEx:
+	.ftrace2
+#ifdef __x86_64__
+	push	%rbp
+	mov	%rsp,%rbp
+	mov	__imp_GetNumaProcessorNodeEx(%rip),%rax
+	jmp	__sysv2nt
+#elif defined(__aarch64__)
+	mov	x0,#0
+	ret
+#endif
+	.endfn	GetNumaProcessorNodeEx,globl
+	.previous
diff --git a/libc/nt/kernel32/LoadLibraryA.S b/libc/nt/kernel32/LoadLibraryA.S
new file mode 100644
index 000000000..94637fa2b
--- /dev/null
+++ b/libc/nt/kernel32/LoadLibraryA.S
@@ -0,0 +1,20 @@
+#include "libc/nt/codegen.h"
+.imp	kernel32,__imp_LoadLibraryA,LoadLibraryA
+
+	.text.windows
+	.ftrace1
+LoadLibraryA:
+	.ftrace2
+#ifdef __x86_64__
+	push	%rbp
+	mov	%rsp,%rbp
+	mov	%rdi,%rcx
+	sub	$32,%rsp
+	call	*__imp_LoadLibraryA(%rip)
+	leave
+#elif defined(__aarch64__)
+	mov	x0,#0
+#endif
+	ret
+	.endfn	LoadLibraryA,globl
+	.previous
diff --git a/libc/nt/master.sh b/libc/nt/master.sh
index 11d9e4528..5529622ea 100755
--- a/libc/nt/master.sh
+++ b/libc/nt/master.sh
@@ -63,6 +63,7 @@ imp	'ConnectNamedPipe'					ConnectNamedPipe					kernel32	2
 imp	'ContinueDebugEvent'					ContinueDebugEvent					kernel32	3
 imp	'CopyFile'						CopyFileW						kernel32	3
 imp	'CreateEvent'						CreateEventW						kernel32	4
+imp	'CreateEventA'						CreateEventA						kernel32	4
 imp	'CreateEventEx'						CreateEventExW						kernel32	4
 imp	'CreateHardLink'					CreateHardLinkW						kernel32	3
 imp	'CreateIoCompletionPort'				CreateIoCompletionPort					kernel32	4
@@ -134,6 +135,7 @@ imp	'GetModuleHandle'					GetModuleHandleA					kernel32	1
 imp	'GetModuleHandleEx'					GetModuleHandleExW					kernel32	3
 imp	'GetModuleHandleW'					GetModuleHandleW					kernel32	1
 imp	'GetNamedPipeInfo'					GetNamedPipeInfo					kernel32	5
+imp	'GetNumaProcessorNodeEx'				GetNumaProcessorNodeEx					kernel32	2
 imp	'GetNumberOfConsoleInputEvents'				GetNumberOfConsoleInputEvents				kernel32	2
 imp	'GetNumberOfConsoleMouseButtons'			GetNumberOfConsoleMouseButtons				kernel32	1
 imp	'GetOverlappedResult'					GetOverlappedResult					kernel32	4
@@ -195,6 +197,7 @@ imp	'InitializeProcThreadAttributeList'			InitializeProcThreadAttributeList			ke
 imp	'InitializeSRWLock'					InitializeSRWLock					kernel32	1
 imp	'LeaveCriticalSection'					LeaveCriticalSection					kernel32	1
 imp	'LoadLibrary'						LoadLibraryW						kernel32	1
+imp	'LoadLibraryA'						LoadLibraryA						kernel32	1
 imp	'LoadLibraryEx'						LoadLibraryExW						kernel32	3
 imp	'LoadResource'						LoadResource						kernel32	2
 imp	'LocalFree'						LocalFree						kernel32	1
@@ -362,7 +365,6 @@ imp	'RegisterEventSource'					RegisterEventSourceW					advapi32	2
 imp	'ReportEvent'						ReportEventW						advapi32	9
 imp	'ReportEventA'						ReportEventA						advapi32	9
 imp	'RevertToSelf'						RevertToSelf						advapi32	0
-imp	'RtlGenRandom'						SystemFunction036					advapi32	2
 imp	'TraceSetInformation'					TraceSetInformation					advapi32 # Windows 7+
 
 # USER32.DLL
@@ -611,6 +613,11 @@ imp	'GetModuleBaseName'					GetModuleBaseNameW					psapi		4
 imp	'GetProcessImageFileName'				GetProcessImageFileNameW				psapi		3
 imp	'GetProcessMemoryInfo'					GetProcessMemoryInfo					psapi		3
 
+# BCryptPrimitives.dll
+#
+#	Name							Actual							DLL			Arity
+imp	'ProcessPrng'						ProcessPrng						BCryptPrimitives	2
+
 # API-MS-Win-Core-Synch-l1-2-0.dll (Windows 8+)
 #
 #	Name							Actual							DLL					Arity
diff --git a/libc/nt/privilege.h b/libc/nt/privilege.h
index 95088a34a..d281a330a 100644
--- a/libc/nt/privilege.h
+++ b/libc/nt/privilege.h
@@ -46,5 +46,8 @@ bool32 AdjustTokenPrivileges(int64_t TokenHandle, bool32 DisableAllPrivileges,
 bool32 ImpersonateSelf(int kNtSecurityImpersonationLevel);
 bool32 RevertToSelf(void);
 
+bool32 OpenThreadToken(intptr_t ThreadHandle, uint32_t DesiredAccess,
+                       bool32 OpenAsSelf, intptr_t *TokenHandle);
+
 COSMOPOLITAN_C_END_
 #endif /* COSMOPOLITAN_LIBC_NT_PRIVILEGE_H_ */
diff --git a/libc/nt/runtime.h b/libc/nt/runtime.h
index 5248816a3..953e77692 100644
--- a/libc/nt/runtime.h
+++ b/libc/nt/runtime.h
@@ -36,11 +36,11 @@ bool32 TerminateProcess(int64_t hProcess, uint32_t uExitCode);
 void TerminateThisProcess(uint32_t dwWaitStatus) wontreturn;
 void ExitProcess(uint32_t uExitCode) wontreturn;
 uint32_t GetLastError(void) nosideeffect;
-bool32 CloseHandle(int64_t hObject) dontthrow nocallback;
+bool32 CloseHandle(int64_t hObject) dontthrow dontcallback;
 intptr_t GetStdHandle(uint32_t nStdHandle) nosideeffect;
 bool32 SetStdHandle(uint32_t nStdHandle, int64_t hHandle);
 bool32 SetDefaultDllDirectories(unsigned dirflags);
-bool32 RtlGenRandom(void *RandomBuffer, uint32_t RandomBufferLength);
+bool32 ProcessPrng(void *RandomBuffer, uint32_t RandomBufferLength);
 uint32_t GetModuleFileName(int64_t hModule, char16_t *lpFilename,
                            uint32_t nSize);
 
diff --git a/libc/nt/struct/teb.h b/libc/nt/struct/teb.h
index a232b4f43..d7ee6f649 100644
--- a/libc/nt/struct/teb.h
+++ b/libc/nt/struct/teb.h
@@ -1,26 +1,25 @@
 #ifndef COSMOPOLITAN_LIBC_NT_TEB_H_
 #define COSMOPOLITAN_LIBC_NT_TEB_H_
-#include "libc/intrin/segmentation.h"
 #include "libc/nt/struct/peb.h"
-#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+#ifdef __GNUC__
 
 /*
  * These macros address directly into NT's TEB a.k.a. TIB
  * Any function that does this needs the `dontasan` keyword
  */
-#define NtGetPeb()           gs((struct NtPeb **)(0x60ULL))
-#define NtGetTeb()           gs((void **)(0x30))    /* %gs:0 linear address */
-#define NtGetPid()           gs((uint32_t *)(0x40)) /* GetCurrentProcessId() */
-#define NtGetTid()           gs((uint32_t *)(0x48)) /* GetCurrentThreadId() */
-#define NtGetErr()           gs((int *)(0x68))
-#define _NtGetSeh()          gs((void **)(0x00))
-#define _NtGetStackHigh()    gs((void **)(0x08))
-#define _NtGetStackLow()     gs((void **)(0x10))
-#define _NtGetSubsystemTib() gs((void **)(0x18))
-#define _NtGetFib()          gs((void **)(0x20))
-#define _NtGetEnv()          gs((char16_t **)(0x38))
-#define _NtGetRpc()          gs((void **)(0x50))
-#define _NtGetTls()          gs((void **)(0x58)) /* cf. gs((long *)0x1480 + i0..64) */
+#define NtGetPeb()           ((__seg_gs struct NtPeb *)0x60)
+#define NtGetTeb()           ((void *)*(__seg_gs uintptr_t *)0x30)
+#define NtGetPid()           (*(__seg_gs uint32_t *)0x40)
+#define NtGetTid()           (*(__seg_gs uint32_t *)0x48)
+#define NtGetErr()           (*(__seg_gs int *)0x68)
+#define _NtGetSeh()          ((void *)*(__seg_gs uintptr_t *)0x00)
+#define _NtGetStackHigh()    ((void *)*(__seg_gs uintptr_t *)0x08)
+#define _NtGetStackLow()     ((void *)*(__seg_gs uintptr_t *)0x10)
+#define _NtGetSubsystemTib() ((void *)*(__seg_gs uintptr_t *)0x18)
+#define _NtGetFib()          ((void *)*(__seg_gs uintptr_t *)0x20)
+#define _NtGetEnv()          ((char16_t *)*(__seg_gs intptr_t *)0x38)
+#define _NtGetRpc()          ((void *)*(__seg_gs uintptr_t *)0x50)
+#define _NtGetTls()          ((void *)*(__seg_gs uintptr_t *)0x58)
 
-#endif /* __GNUC__ && !__STRICT_ANSI__ */
+#endif /* __GNUC__ */
 #endif /* COSMOPOLITAN_LIBC_NT_TEB_H_ */
diff --git a/libc/nt/synchronization.h b/libc/nt/synchronization.h
index d4cbef44f..dd365b33c 100644
--- a/libc/nt/synchronization.h
+++ b/libc/nt/synchronization.h
@@ -118,6 +118,9 @@ bool32 GetSystemTimeAdjustment(uint32_t *lpTimeAdjustment,
 
 void GetCurrentProcessorNumberEx(struct NtProcessorNumber *out_ProcNumber);
 
+bool32 GetNumaProcessorNodeEx(const struct NtProcessorNumber *Processor,
+                              unsigned short *out_NodeNumber);
+
 #if ShouldUseMsabiAttribute()
 #include "libc/nt/thunk/synchronization.inc"
 #endif /* ShouldUseMsabiAttribute() */
diff --git a/libc/nt/thunk/msabi.h b/libc/nt/thunk/msabi.h
index 24dc92c9d..beace506e 100644
--- a/libc/nt/thunk/msabi.h
+++ b/libc/nt/thunk/msabi.h
@@ -2,8 +2,7 @@
 #define COSMOPOLITAN_LIBC_NT_THUNK_MSABI_H_
 #ifdef __x86_64__
 
-#if !defined(__STRICT_ANSI__) &&               \
-    (__GNUC__ * 100 + __GNUC_MINOR__ >= 408 || \
+#if (__GNUC__ * 100 + __GNUC_MINOR__ >= 408 || \
      (__has_attribute(__ms_abi__) || defined(__llvm__)))
 /**
  * Defines function as using Microsoft x64 calling convention.
diff --git a/libc/nt/version.h b/libc/nt/version.h
index 77572bd89..3d14e8f90 100644
--- a/libc/nt/version.h
+++ b/libc/nt/version.h
@@ -8,13 +8,13 @@ bool32 GetVersionEx(struct NtOsVersionInfo *lpVersionInformation);
 
 #if defined(__GNUC__) && !defined(__STRICT_ANSI__) && defined(__x86_64__)
 #define IsAtLeastWindows10() (GetNtMajorVersion() >= 10)
-#define GetNtMajorVersion()    \
-  ({                           \
-    uintptr_t __x;             \
-    asm("mov\t%%gs:96,%q0\r\n" \
-        "mov\t280(%q0),%b0"    \
-        : "=q"(__x));          \
-    (unsigned char)__x;        \
+#define GetNtMajorVersion()        \
+  __extension__({                  \
+    uintptr_t __x;                 \
+    __asm__("mov\t%%gs:96,%q0\r\n" \
+            "mov\t280(%q0),%b0"    \
+            : "=q"(__x));          \
+    (unsigned char)__x;            \
   })
 #endif
 
diff --git a/libc/proc/execve-sysv.c b/libc/proc/execve-sysv.c
index 04a40f861..aa638aac6 100644
--- a/libc/proc/execve-sysv.c
+++ b/libc/proc/execve-sysv.c
@@ -98,7 +98,7 @@ static void SetupExecve(void) {
   g_execve.tmpdir = getenv("TMPDIR");
 }
 
-__attribute__((__constructor__)) static void InitExecve(void) {
+__attribute__((__constructor__(10))) static textstartup void InitExecve(void) {
   cosmo_once(&g_execve.once, SetupExecve);
 }
 
diff --git a/libc/proc/fork-nt.c b/libc/proc/fork-nt.c
index a59fecaaa..dac6300cb 100644
--- a/libc/proc/fork-nt.c
+++ b/libc/proc/fork-nt.c
@@ -78,7 +78,9 @@ static textwindows char16_t *ParseInt(char16_t *p, int64_t *x) {
 }
 
 static inline textwindows ssize_t ForkIo(int64_t h, char *p, size_t n,
-                                         bool32 (*f)()) {
+                                         bool32 (*f)(int64_t, void *, uint32_t,
+                                                     uint32_t *,
+                                                     struct NtOverlapped *)) {
   size_t i;
   uint32_t x;
   for (i = 0; i < n; i += x) {
@@ -90,8 +92,10 @@ static inline textwindows ssize_t ForkIo(int64_t h, char *p, size_t n,
 }
 
 static dontinline textwindows bool ForkIo2(int64_t h, void *buf, size_t n,
-                                           bool32 (*fn)(), const char *sf,
-                                           bool ischild) {
+                                           bool32 (*fn)(int64_t, void *,
+                                                        uint32_t, uint32_t *,
+                                                        struct NtOverlapped *),
+                                           const char *sf, bool ischild) {
   ssize_t rc = ForkIo(h, buf, n, fn);
   if (ischild) __tls_enabled_set(false);  // prevent tls crash in kprintf
   NTTRACE("%s(%ld, %p, %'zu) → %'zd% m", sf, h, buf, n, rc);
@@ -100,9 +104,9 @@ static dontinline textwindows bool ForkIo2(int64_t h, void *buf, size_t n,
 
 static dontinline textwindows bool WriteAll(int64_t h, void *buf, size_t n) {
   bool ok;
-  ok = ForkIo2(h, buf, n, WriteFile, "WriteFile", false);
+  ok = ForkIo2(h, buf, n, (void *)WriteFile, "WriteFile", false);
 #ifndef NDEBUG
-  if (ok) ok = ForkIo2(h, &n, sizeof(n), WriteFile, "WriteFile", false);
+  if (ok) ok = ForkIo2(h, &n, sizeof(n), (void *)WriteFile, "WriteFile", false);
 #endif
 #if SYSDEBUG
   if (!ok) {
diff --git a/libc/proc/vfork.S b/libc/proc/vfork.S
index ff701c1c5..d43faf4cf 100644
--- a/libc/proc/vfork.S
+++ b/libc/proc/vfork.S
@@ -61,7 +61,7 @@ vfork:
 #endif
 	pop	%rbp
 #endif
-	mov	%fs:0,%r9		// get thread information block
+	mov	%gs:0x30,%r9		// get thread information block
 	mov	0x3c(%r9),%r8d		// avoid question of @vforksafe errno
 	pop	%rsi			// saves return address in a register
 	mov	__NR_vfork(%rip),%eax
diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c
index 85d3db9df..7a9913ccb 100644
--- a/libc/runtime/clone.c
+++ b/libc/runtime/clone.c
@@ -94,7 +94,7 @@ struct CloneArgs {
   void *arg;
 };
 
-int sys_set_tls();
+int sys_set_tls(uintptr_t, void *);
 int __stack_call(void *, int, long, long, int (*)(void *, int), void *);
 
 static struct CloneArgs *AllocateCloneArgs(char *stk, size_t stksz) {
@@ -400,9 +400,7 @@ static wontreturn void FreebsdThreadMain(void *p) {
 #ifdef __aarch64__
   asm volatile("mov\tx28,%0" : /* no outputs */ : "r"(wt->tls));
 #elif defined(__x86_64__)
-  if (__tls_morphed) {
-    sys_set_tls(AMD64_SET_GSBASE, wt->tls);
-  }
+  sys_set_tls(AMD64_SET_GSBASE, wt->tls);
 #endif
   *wt->ctid = wt->tid;
   wt->func(wt->arg, wt->tid);
@@ -575,7 +573,7 @@ static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz,
 #endif
   wt = (struct LinuxCloneArgs *)sp;
 #ifdef __x86_64__
-  if ((flags & CLONE_SETTLS) && __tls_morphed) {
+  if (flags & CLONE_SETTLS) {
     flags &= ~CLONE_SETTLS;
     wt->arg = arg;
     wt->tls = tls;
diff --git a/libc/runtime/cosmo.S b/libc/runtime/cosmo.S
index e8d1d9fb1..31ee018a9 100644
--- a/libc/runtime/cosmo.S
+++ b/libc/runtime/cosmo.S
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-#include "libc/notice.inc"
 #include "libc/sysv/consts/prot.h"
 #include "libc/sysv/consts/map.h"
 #include "libc/intrin/strace.internal.h"
@@ -85,19 +84,18 @@ cosmo:	push	%rbp
 	call	_init
 
 //	call constructors
-	ezlea	__init_array_end,ax		// static ctors in forward order
-	.weak	__init_array_end		// could be called multiple times
-	ezlea	__init_array_start,cx		// idempotency recommended
-	.weak	__init_array_start		// @see ape/ape.lds
-1:	cmp	%rax,%rcx
+	.weak	__init_array_end
+	.weak	__init_array_start
+	mov	$__init_array_start,%eax
+1:	cmp	$__init_array_end,%eax
 	je	2f
-	sub	$8,%rax
 	push	%rax
-	push	%rcx
+	push	%rax
 	call	.Largs
 	call	*(%rax)
-	pop	%rcx
 	pop	%rax
+	pop	%rax
+	add	$8,%eax
 	jmp	1b
 
 //	call main()
diff --git a/libc/runtime/cosmo2.c b/libc/runtime/cosmo2.c
index 841c37d23..8181bb9b3 100644
--- a/libc/runtime/cosmo2.c
+++ b/libc/runtime/cosmo2.c
@@ -59,8 +59,6 @@ extern char syscon_openbsd[];
 extern char syscon_netbsd[];
 extern char syscon_windows[];
 extern init_f __strace_init;
-extern init_f *__preinit_array_start[] __attribute__((__weak__));
-extern init_f *__preinit_array_end[] __attribute__((__weak__));
 extern init_f *__init_array_start[] __attribute__((__weak__));
 extern init_f *__init_array_end[] __attribute__((__weak__));
 extern char ape_stack_prot[] __attribute__((__weak__));
@@ -149,7 +147,7 @@ wontreturn textstartup void cosmo(long *sp, struct Syslib *m1, char *exename,
   }
 
   // check system call abi compatibility
-  if (IsXnu() && __syslib->__version < SYSLIB_VERSION) {
+  if (IsXnu() && __syslib->__version < SYSLIB_VERSION_MANDATORY) {
     sys_write(2, "need newer ape loader\n", 22);
     _Exit(127);
   }
@@ -189,7 +187,7 @@ wontreturn textstartup void cosmo(long *sp, struct Syslib *m1, char *exename,
 #if SYSDEBUG
   argc = __strace_init(argc, argv, envp, auxv);
 #endif
-  for (init_f **fp = __init_array_end; fp-- > __init_array_start;) {
+  for (init_f **fp = __init_array_start; fp < __init_array_end; ++fp) {
     (*fp)(argc, argv, envp, auxv);
   }
 #ifdef FTRACE
diff --git a/libc/runtime/dsohandle.S b/libc/runtime/dsohandle.S
index 6d344c37e..39cc3e989 100644
--- a/libc/runtime/dsohandle.S
+++ b/libc/runtime/dsohandle.S
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/macros.internal.h"
-#include "libc/notice.inc"
 
 	.underrun
 //	Uniquely identifies each artifact linked in an address space.
diff --git a/libc/runtime/efimain.greg.c b/libc/runtime/efimain.greg.c
index 50d5dee82..7861ef0bc 100644
--- a/libc/runtime/efimain.greg.c
+++ b/libc/runtime/efimain.greg.c
@@ -30,6 +30,9 @@
 #include "libc/runtime/runtime.h"
 #include "libc/str/str.h"
 
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
+
 #ifdef __x86_64__
 
 /* TODO: Why can't we change CR3? Could it really need PML5T? */
@@ -162,7 +165,7 @@ static void EfiInitAcpi(struct mman *mm, EFI_SYSTEM_TABLE *SystemTable) {
  * @see libc/dce.h
  */
 __msabi EFI_STATUS EfiMain(EFI_HANDLE ImageHandle,
-                                    EFI_SYSTEM_TABLE *SystemTable) {
+                           EFI_SYSTEM_TABLE *SystemTable) {
   struct mman *mm;
   uint32_t DescVersion;
   uintptr_t i, j, MapSize;
@@ -215,9 +218,8 @@ __msabi EFI_STATUS EfiMain(EFI_HANDLE ImageHandle,
                                             &kEfiLoadedImageProtocol, &ImgInfo);
   CmdLine = (const char16_t *)ImgInfo->LoadOptions;
   if (!CmdLine || !CmdLine[0]) CmdLine = u"BOOTX64.EFI";
-  Args = GetDosArgv(CmdLine, ArgBlock->ArgBlock,
-                    sizeof(ArgBlock->ArgBlock), ArgBlock->Args,
-                    ARRAYLEN(ArgBlock->Args));
+  Args = GetDosArgv(CmdLine, ArgBlock->ArgBlock, sizeof(ArgBlock->ArgBlock),
+                    ArgBlock->Args, ARRAYLEN(ArgBlock->Args));
 
   /*
    * Gets information about our current video mode.  Clears the screen.
diff --git a/libc/runtime/enable_tls.c b/libc/runtime/enable_tls.c
index 5395b6d6f..045740baf 100644
--- a/libc/runtime/enable_tls.c
+++ b/libc/runtime/enable_tls.c
@@ -251,7 +251,7 @@ textstartup void __enable_tls(void) {
 
 #ifdef __x86_64__
   // rewrite the executable tls opcodes in memory
-  if (IsWindows() || IsXnu()) {
+  if (IsWindows() || IsOpenbsd() || IsNetbsd()) {
     __morph_tls();
   }
 #endif
diff --git a/libc/runtime/fenv.h b/libc/runtime/fenv.h
index ac9b33c47..4f25de55a 100644
--- a/libc/runtime/fenv.h
+++ b/libc/runtime/fenv.h
@@ -59,7 +59,9 @@ typedef double fenv_t;
 
 #endif /* __x86_64__ */
 
-#ifdef __FLT_EVAL_METHOD__
+#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
+#define FLT_EVAL_METHOD __FLT_EVAL_METHOD_TS_18661_3__
+#elif defined(__FLT_EVAL_METHOD__)
 #define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
 #else
 #define FLT_EVAL_METHOD 0
diff --git a/libc/runtime/ftracer.c b/libc/runtime/ftracer.c
index a95ac420a..2d54afe92 100644
--- a/libc/runtime/ftracer.c
+++ b/libc/runtime/ftracer.c
@@ -51,7 +51,7 @@ static struct CosmoFtrace g_ftrace;
 
 __funline int GetNestingLevelImpl(struct StackFrame *frame) {
   int nesting = -2;
-  while (frame) {
+  while (frame && !kisdangerous(frame)) {
     ++nesting;
     frame = frame->next;
   }
diff --git a/libc/runtime/getargmax.c b/libc/runtime/getargmax.c
index 3a02203d2..37ce64c83 100644
--- a/libc/runtime/getargmax.c
+++ b/libc/runtime/getargmax.c
@@ -19,6 +19,8 @@
 #include "libc/dce.h"
 #include "libc/macros.internal.h"
 #include "libc/runtime/runtime.h"
+#include "libc/stdio/sysparam.h"
+#include "libc/sysv/consts/_posix.h"
 #include "libc/sysv/consts/limits.h"
 #include "libc/sysv/consts/rlimit.h"
 
@@ -29,6 +31,7 @@
  * Returns expensive but more correct version of `ARG_MAX`.
  */
 int __get_arg_max(void) {
+  int res;
   if (IsLinux()) {
     // You might think that just returning a constant 128KiB (ARG_MAX)
     // would make sense, as this guy did:
@@ -57,10 +60,11 @@ int __get_arg_max(void) {
     // does. Right now (2019, Linux 5.3) that amounts to:
     uint64_t stacksz;
     stacksz = __get_rlimit(RLIMIT_STACK);
-    return MAX(MIN(stacksz / 4, 3 * (8 * 1024 * 1024) / 4), _ARG_MAX);
+    res = MAX(MIN(stacksz / 4, 3 * (8 * 1024 * 1024) / 4), _ARG_MAX);
   } else if (IsBsd()) {
-    return __get_sysctl(CTL_KERN, KERN_ARGMAX);
+    res = __get_sysctl(CTL_KERN, KERN_ARGMAX);
   } else {
-    return _ARG_MAX;
+    res = _ARG_MAX;
   }
+  return MAX(res, _POSIX_ARG_MAX);
 }
diff --git a/libc/runtime/memtrack.internal.h b/libc/runtime/memtrack.internal.h
index 543a16c22..6607a036a 100644
--- a/libc/runtime/memtrack.internal.h
+++ b/libc/runtime/memtrack.internal.h
@@ -164,9 +164,9 @@ forceinline pureconst bool OverlapsImageSpace(const void *p, size_t n) {
   const unsigned char *BegA, *EndA, *BegB, *EndB;
   if (n) {
     BegA = p;
-    EndA = BegA + (n - 1);
+    EndA = BegA + n;
     BegB = __executable_start;
-    EndB = _end - 1;
+    EndB = _end;
     return MAX(BegA, BegB) < MIN(EndA, EndB);
   } else {
     return 0;
@@ -177,9 +177,9 @@ forceinline pureconst bool OverlapsShadowSpace(const void *p, size_t n) {
   intptr_t BegA, EndA, BegB, EndB;
   if (n) {
     BegA = (intptr_t)p;
-    EndA = BegA + (n - 1);
+    EndA = BegA + n;
     BegB = 0x7fff0000;
-    EndB = 0x10007fffffff;
+    EndB = 0x100080000000;
     return MAX(BegA, BegB) < MIN(EndA, EndB);
   } else {
     return 0;
diff --git a/libc/runtime/morph_tls.c b/libc/runtime/morph_tls.c
index b780533ed..aef561580 100644
--- a/libc/runtime/morph_tls.c
+++ b/libc/runtime/morph_tls.c
@@ -17,6 +17,7 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "ape/sections.internal.h"
+#include "libc/intrin/kprintf.h"
 #include "libc/runtime/internal.h"
 #include "libc/runtime/runtime.h"
 #include "libc/serialize.h"
@@ -49,68 +50,105 @@ privileged void __morph_tls(void) {
   unsigned char *p;
   __morph_begin();
 
-  if (IsXnu()) {
-    // Apple is quite straightforward to patch. We basically
-    // just change the segment register, and the linear slot
-    // address 0x30 was promised to us, according to Go team
-    // https://github.com/golang/go/issues/23617
-    dis = 0x30;
-  } else if (IsWindows()) {
+  if (IsWindows()) {
     // MSVC __declspec(thread) generates binary code for this
     // %gs:0x1480 abi. So long as TlsAlloc() isn't called >64
     // times we should be good.
     dis = 0x1480 + __tls_index * 8;
-  } else {
-    dis = 0;
-  }
 
-  // iterate over modifiable code looking for 9 byte instruction
-  // this used to take 30ms with xed to enable tls on python.com
-  for (p = _ereal; p + 9 <= __privileged_start; p += n) {
+    // iterate over modifiable code looking for 9 byte instruction
+    // this used to take 30ms with xed to enable tls on python.com
+    for (p = _ereal; p + 9 <= __privileged_start; p += n) {
 
-    // use sse to zoom zoom to fs register prefixes
-    // that way it'll take 1 ms to morph python.com
-    // we recompiled a 13mb binary in 1 millisecond
-    while (p + 9 + 16 <= __privileged_start) {
-      if ((m = __builtin_ia32_pmovmskb128(
-               *(xmm_t *)p == (xmm_t){0144, 0144, 0144, 0144, 0144, 0144, 0144,
-                                      0144, 0144, 0144, 0144, 0144, 0144, 0144,
-                                      0144, 0144}))) {
-        m = __builtin_ctzll(m);
-        p += m;
-        break;
+      // use sse to zoom zoom to fs register prefixes
+      // that way it'll take 1 ms to morph python.com
+      // we recompiled a 13mb binary in 1 millisecond
+      while (p + 9 + 16 <= __privileged_start) {
+        if ((m = __builtin_ia32_pmovmskb128(
+                 *(xmm_t *)p == (xmm_t){0145, 0145, 0145, 0145, 0145, 0145,
+                                        0145, 0145, 0145, 0145, 0145, 0145,
+                                        0145, 0145, 0145, 0145}))) {
+          m = __builtin_ctzll(m);
+          p += m;
+          break;
+        } else {
+          p += 16;
+        }
+      }
+
+      // we're checking for the following expression:
+      //   0145 == p[0] &&           // %gs
+      //   0110 == (p[1] & 0373) &&  // rex.w (and ignore rex.r)
+      //   (0213 == p[2] ||          // mov reg/mem → reg (word-sized)
+      //   0003 == p[2]) &&          // add reg/mem → reg (word-sized)
+      //   0004 == (p[3] & 0307) &&  // mod/rm (4,reg,0) means sib → reg
+      //   0x30 == p[4] &&           // sib (5,4,0) → (rbp,rsp,0) → disp32
+      //   0000 == p[5] &&           // displacement (von Neumann endian)
+      //   0000 == p[6] &&           // displacement
+      //   0000 == p[7] &&           // displacement
+      //   0000 == p[8]              // displacement
+      w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377");
+      if ((w == READ64LE("\145\110\213\004\045\060\000\000") ||
+           w == READ64LE("\145\110\003\004\045\060\000\000")) &&
+          !p[8]) {
+
+        // now change the code
+        p[5] = (dis & 0x000000ff) >> 000;  // displacement
+        p[6] = (dis & 0x0000ff00) >> 010;  // displacement
+        p[7] = (dis & 0x00ff0000) >> 020;  // displacement
+        p[8] = (dis & 0xff000000) >> 030;  // displacement
+
+        // advance to the next instruction
+        n = 9;
       } else {
-        p += 16;
+        n = 1;
       }
     }
+  } else {
+    // iterate over modifiable code looking for 9 byte instruction
+    // this used to take 30ms with xed to enable tls on python.com
+    for (p = _ereal; p + 9 <= __privileged_start; p += n) {
 
-    // we're checking for the following expression:
-    //   0144 == p[0] &&           // %fs
-    //   0110 == (p[1] & 0373) &&  // rex.w (and ignore rex.r)
-    //   (0213 == p[2] ||          // mov reg/mem → reg (word-sized)
-    //   0003 == p[2]) &&          // add reg/mem → reg (word-sized)
-    //   0004 == (p[3] & 0307) &&  // mod/rm (4,reg,0) means sib → reg
-    //   0045 == p[4] &&           // sib (5,4,0) → (rbp,rsp,0) → disp32
-    //   0000 == p[5] &&           // displacement (von Neumann endian)
-    //   0000 == p[6] &&           // displacement
-    //   0000 == p[7] &&           // displacement
-    //   0000 == p[8]              // displacement
-    w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377");
-    if ((w == READ64LE("\144\110\213\004\045\000\000\000") ||
-         w == READ64LE("\144\110\003\004\045\000\000\000")) &&
-        !p[8]) {
+      // use sse to zoom zoom to fs register prefixes
+      // that way it'll take 1 ms to morph python.com
+      // we recompiled a 13mb binary in 1 millisecond
+      while (p + 9 + 16 <= __privileged_start) {
+        if ((m = __builtin_ia32_pmovmskb128(
+                 *(xmm_t *)p == (xmm_t){0145, 0145, 0145, 0145, 0145, 0145,
+                                        0145, 0145, 0145, 0145, 0145, 0145,
+                                        0145, 0145, 0145, 0145}))) {
+          m = __builtin_ctzll(m);
+          p += m;
+          break;
+        } else {
+          p += 16;
+        }
+      }
 
-      // now change the code
-      p[0] = 0145;                       // change %fs to %gs
-      p[5] = (dis & 0x000000ff) >> 000;  // displacement
-      p[6] = (dis & 0x0000ff00) >> 010;  // displacement
-      p[7] = (dis & 0x00ff0000) >> 020;  // displacement
-      p[8] = (dis & 0xff000000) >> 030;  // displacement
+      // we're checking for the following expression:
+      //   0145 == p[0] &&           // %gs
+      //   0110 == (p[1] & 0373) &&  // rex.w (and ignore rex.r)
+      //   (0213 == p[2] ||          // mov reg/mem → reg (word-sized)
+      //   0003 == p[2]) &&          // add reg/mem → reg (word-sized)
+      //   0004 == (p[3] & 0307) &&  // mod/rm (4,reg,0) means sib → reg
+      //   0x30 == p[4] &&           // sib (5,4,0) → (rbp,rsp,0) → disp32
+      //   0000 == p[5] &&           // displacement (von Neumann endian)
+      //   0000 == p[6] &&           // displacement
+      //   0000 == p[7] &&           // displacement
+      //   0000 == p[8]              // displacement
+      w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377");
+      if ((w == READ64LE("\145\110\213\004\045\060\000\000") ||
+           w == READ64LE("\145\110\003\004\045\060\000\000")) &&
+          !p[8]) {
 
-      // advance to the next instruction
-      n = 9;
-    } else {
-      n = 1;
+        // now change the code
+        p[0] = 0144;  // change %gs to %fs
+
+        // advance to the next instruction
+        n = 9;
+      } else {
+        n = 1;
+      }
     }
   }
 
diff --git a/libc/runtime/runtime.h b/libc/runtime/runtime.h
index ff26dde66..db0dfb253 100644
--- a/libc/runtime/runtime.h
+++ b/libc/runtime/runtime.h
@@ -54,7 +54,7 @@ char *getlogin(void);
 int getlogin_r(char *, size_t);
 int login_tty(int);
 int getpagesize(void);
-int syncfs(int);
+int syncfs(int) dontthrow;
 int vhangup(void);
 int getdtablesize(void);
 int sethostname(const char *, size_t);
diff --git a/libc/runtime/set_tls.c b/libc/runtime/set_tls.c
index 0dd8049c5..0f54e8d12 100644
--- a/libc/runtime/set_tls.c
+++ b/libc/runtime/set_tls.c
@@ -29,7 +29,7 @@
 #define AMD64_SET_FSBASE 129
 #define AMD64_SET_GSBASE 131
 
-int sys_set_tls();
+int sys_set_tls(uintptr_t, void *);
 
 // we can't allow --ftrace here because cosmo_dlopen() calls this
 // function to fix the tls register, and ftrace needs it unbroken
@@ -39,20 +39,20 @@ dontinstrument textstartup void __set_tls(struct CosmoTib *tib) {
   // ask the operating system to change the x86 segment register
   if (IsWindows()) {
     asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib));
-  } else if (IsFreebsd()) {
-    sys_set_tls(__tls_morphed ? AMD64_SET_GSBASE : AMD64_SET_FSBASE, tib);
   } else if (IsLinux()) {
-    sys_set_tls(__tls_morphed ? ARCH_SET_GS : ARCH_SET_FS, tib);
+    sys_set_tls(ARCH_SET_GS, tib);
+  } else if (IsFreebsd()) {
+    sys_set_tls(AMD64_SET_GSBASE, tib);
   } else if (IsNetbsd()) {
     // netbsd has sysarch(X86_SET_FSBASE) but we can't use that because
     // signal handlers will cause it to be reset due to not setting the
     // _mc_tlsbase field in struct mcontext_netbsd.
-    sys_set_tls(tib);
+    sys_set_tls((uintptr_t)tib, 0);
   } else if (IsOpenbsd()) {
-    sys_set_tls(tib);
+    sys_set_tls((uintptr_t)tib, 0);
   } else if (IsXnu()) {
     // thread_fast_set_cthread_self has a weird ABI
-    sys_set_tls((intptr_t)tib - 0x30);
+    sys_set_tls((intptr_t)tib - 0x30, 0);
   } else {
     uint64_t val = (uint64_t)tib;
     asm volatile("wrmsr"
diff --git a/libc/runtime/stack.h b/libc/runtime/stack.h
index dd5270634..19be5d3ea 100644
--- a/libc/runtime/stack.h
+++ b/libc/runtime/stack.h
@@ -1,6 +1,6 @@
+#ifdef _COSMO_SOURCE
 #ifndef COSMOPOLITAN_LIBC_RUNTIME_STACK_H_
 #define COSMOPOLITAN_LIBC_RUNTIME_STACK_H_
-#ifdef _COSMO_SOURCE
 
 /**
  * Returns preferred size and alignment of thread stack.
@@ -56,7 +56,7 @@
 #define _STACK_EXTRA ""
 #endif
 
-#if defined(__GNUC__) && defined(__ELF__) && !defined(__STRICT_ANSI__)
+#if defined(__GNUC__) && defined(__ELF__)
 COSMOPOLITAN_C_START_
 
 extern char ape_stack_prot[] __attribute__((__weak__));
@@ -156,5 +156,5 @@ int FreeCosmoStack(void *) libcesque;
 
 COSMOPOLITAN_C_END_
 #endif /* GNU ELF */
-#endif /* _COSMO_SOURCE */
 #endif /* COSMOPOLITAN_LIBC_RUNTIME_STACK_H_ */
+#endif /* _COSMO_SOURCE */
diff --git a/libc/runtime/syslib.internal.h b/libc/runtime/syslib.internal.h
index 01e129464..ec6d87fe5 100644
--- a/libc/runtime/syslib.internal.h
+++ b/libc/runtime/syslib.internal.h
@@ -10,8 +10,14 @@ COSMOPOLITAN_C_START_
  * `-errno` convention, and hence should be wrapped with `_sysret()`.
  */
 
-#define SYSLIB_MAGIC   ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24)
-#define SYSLIB_VERSION 8
+#define SYSLIB_MAGIC ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24)
+
+#define SYSLIB_VERSION 9 /* sync with ape/ape-m1.c */
+
+/* if this number increases, then everyone on macos arm will need to
+   reinstall ape loader in order to run newer ape binaries so please
+   don't do this if it's sufficient to just check __syslib->version. */
+#define SYSLIB_VERSION_MANDATORY 8
 
 typedef uint64_t dispatch_time_t;
 typedef uint64_t dispatch_semaphore_t;
@@ -69,11 +75,13 @@ struct Syslib {
   long (*__sem_trywait)(int *);
   long (*__getrlimit)(int, void *);
   long (*__setrlimit)(int, const void *);
-  // v6 (2023-11-03)
+  /* v6 (2023-11-03) */
   void *(*__dlopen)(const char *, int);
   void *(*__dlsym)(void *, const char *);
   int (*__dlclose)(void *);
   char *(*__dlerror)(void);
+  /* v9 (2024-01-31) */
+  int (*__pthread_cpu_number_np)(size_t *);
 };
 
 extern struct Syslib *__syslib;
diff --git a/libc/runtime/winargs.internal.h b/libc/runtime/winargs.internal.h
index c123cd613..ccf0fc74d 100644
--- a/libc/runtime/winargs.internal.h
+++ b/libc/runtime/winargs.internal.h
@@ -4,12 +4,17 @@
 COSMOPOLITAN_C_START_
 
 struct WinArgs {
-  char *argv[8192];
-  char *envp[512];
-  intptr_t auxv[2][2];
-  char argv0buf[256];
-  char argblock[32767];
-  char envblock[32767];
+  union {
+    struct {
+      char *argv[8192];
+      char *envp[512];
+      intptr_t auxv[2][2];
+      char argv0buf[256];
+      char argblock[32767];
+      char envblock[32767];
+    };
+    char16_t tmp16[257];
+  };
 } forcealign(16);
 
 COSMOPOLITAN_C_END_
diff --git a/libc/runtime/winmain.greg.c b/libc/runtime/winmain.greg.c
index 118f7fbdf..3c0ae49ed 100644
--- a/libc/runtime/winmain.greg.c
+++ b/libc/runtime/winmain.greg.c
@@ -24,6 +24,7 @@
 #include "libc/limits.h"
 #include "libc/macros.internal.h"
 #include "libc/nexgen32e/rdtsc.h"
+#include "libc/nt/accounting.h"
 #include "libc/nt/console.h"
 #include "libc/nt/enum/consolemodeflags.h"
 #include "libc/nt/enum/filemapflags.h"
@@ -59,6 +60,7 @@ __msabi extern typeof(GetEnvironmentStrings) *const __imp_GetEnvironmentStringsW
 __msabi extern typeof(GetEnvironmentVariable) *const __imp_GetEnvironmentVariableW;
 __msabi extern typeof(GetFileAttributes) *const __imp_GetFileAttributesW;
 __msabi extern typeof(GetStdHandle) *const __imp_GetStdHandle;
+__msabi extern typeof(GetUserName) *const __imp_GetUserNameW;
 __msabi extern typeof(MapViewOfFileEx) *const __imp_MapViewOfFileEx;
 __msabi extern typeof(SetConsoleCP) *const __imp_SetConsoleCP;
 __msabi extern typeof(SetConsoleMode) *const __imp_SetConsoleMode;
@@ -142,6 +144,11 @@ static abi void DeduplicateStdioHandles(void) {
   }
 }
 
+static bool32 HasEnvironmentVariable(const char16_t *name) {
+  char16_t buf[4];
+  return __imp_GetEnvironmentVariableW(name, buf, ARRAYLEN(buf));
+}
+
 // main function of windows init process
 // i.e. first process spawned that isn't forked
 static abi wontreturn void WinInit(const char16_t *cmdline) {
@@ -168,12 +175,6 @@ static abi wontreturn void WinInit(const char16_t *cmdline) {
     }
   }
 
-  // avoid programs like emacs nagging the user to define this
-  char16_t var[8];
-  if (!__imp_GetEnvironmentVariableW(u"TERM", var, 8)) {
-    __imp_SetEnvironmentVariableW(u"TERM", u"xterm-256color");
-  }
-
   // allocate memory for stack and argument block
   _mmi.p = _mmi.s;
   _mmi.n = ARRAYLEN(_mmi.s);
@@ -200,6 +201,34 @@ static abi wontreturn void WinInit(const char16_t *cmdline) {
   struct WinArgs *wa =
       (struct WinArgs *)(stackaddr + (stacksize - sizeof(struct WinArgs)));
 
+  // define $TERM if it's not already present
+  // programs like emacs will stop the world and nag if it's not set
+  if (!HasEnvironmentVariable(u"TERM")) {
+    __imp_SetEnvironmentVariableW(u"TERM", u"xterm-256color");
+  }
+
+  // define $USER as GetUserName() if not set
+  // Windows doesn't define this environment variable by default
+  uint32_t vsize = ARRAYLEN(wa->tmp16);
+  if (!HasEnvironmentVariable(u"USER") &&
+      __imp_GetUserNameW(&wa->tmp16, &vsize)) {
+    __imp_SetEnvironmentVariableW(u"USER", wa->tmp16);
+  }
+
+  // define $HOME as $HOMEDRIVE$HOMEPATH if not set
+  // Windows doesn't define this environment variable by default
+  uint32_t vlen;
+  if (!HasEnvironmentVariable(u"HOME") &&
+      (vlen = __imp_GetEnvironmentVariableW(u"HOMEDRIVE", wa->tmp16,
+                                            ARRAYLEN(wa->tmp16))) <
+          ARRAYLEN(wa->tmp16) &&
+      (vlen += __imp_GetEnvironmentVariableW(u"HOMEPATH", wa->tmp16 + vlen,
+                                             ARRAYLEN(wa->tmp16) - vlen)) <
+          ARRAYLEN(wa->tmp16) &&
+      vlen) {
+    __imp_SetEnvironmentVariableW(u"HOME", wa->tmp16);
+  }
+
   // parse utf-16 command into utf-8 argv array in argument block
   int count = GetDosArgv(cmdline, wa->argblock, ARRAYLEN(wa->argblock),
                          wa->argv, ARRAYLEN(wa->argv));
diff --git a/libc/runtime/zipos-open.c b/libc/runtime/zipos-open.c
index da92c9d37..83b71ed76 100644
--- a/libc/runtime/zipos-open.c
+++ b/libc/runtime/zipos-open.c
@@ -302,7 +302,7 @@ int __zipos_open(struct ZiposUri *name, int flags) {
   return rc;
 }
 
-__attribute__((__constructor__)) static void __zipos_ctor(void) {
+__attribute__((__constructor__(60))) static textstartup void zipos_ctor(void) {
   __zipos_wipe();
   pthread_atfork(__zipos_lock, __zipos_unlock, __zipos_wipe);
 }
diff --git a/libc/sock/epoll.c b/libc/sock/epoll.c
index 73fcf66cd..4c86e35e1 100644
--- a/libc/sock/epoll.c
+++ b/libc/sock/epoll.c
@@ -91,11 +91,10 @@
  * TODO(jart): Polyfill kqueue for XNU/FreeBSD/OpenBSD.
  */
 
-asm(".ident\t\"\\n\\n\
-wepoll (BSD-2)\\n\
-Copyright 2012-2020 Bert Belder\\n\
-https://github.com/piscisaureus/wepoll\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(wepoll_notice, "\
+wepoll (BSD-2)\n\
+Copyright 2012-2020 Bert Belder\n\
+https://github.com/piscisaureus/wepoll");
 
 #define MAX_GROUP_SIZE 32
 
@@ -128,7 +127,7 @@ asm(".include \"libc/disclaimer.inc\"");
   } while (0)
 
 #define CONTAINOF(ptr, type, member) \
-  ((type *)((uintptr_t)(ptr)-offsetof(type, member)))
+  ((type *)((uintptr_t)(ptr) - offsetof(type, member)))
 
 #define TREE__ROTATE(cis, trans)       \
   struct TreeNode *p = node;           \
diff --git a/libc/sock/inet_aton.c b/libc/sock/inet_aton.c
index 98869e463..c826cb955 100644
--- a/libc/sock/inet_aton.c
+++ b/libc/sock/inet_aton.c
@@ -29,11 +29,7 @@
 #include "libc/sock/sock.h"
 #include "libc/sock/struct/sockaddr.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 /**
  * Converts dotted IPv4 address string to network order binary.
diff --git a/libc/stdbool.h b/libc/stdbool.h
index 37173c92c..6e5002ee1 100644
--- a/libc/stdbool.h
+++ b/libc/stdbool.h
@@ -2,15 +2,22 @@
 #define COSMOPOLITAN_LIBC_STDBOOL_H_
 
 #ifndef __cplusplus
-#if __STDC_VERSION__ + 0 >= 201112
+
 #define bool _Bool
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L
+#define true  ((_Bool) + 1u)
+#define false ((_Bool) + 0u)
 #else
-#define bool unsigned char
-#endif
-#define true 1
+#define true  1
 #define false 0
+#endif
+
+#else /* __cplusplus */
+
+#define _Bool bool
+
 #endif /* __cplusplus */
 
-#define __bool_true_false_are_defined
+#define __bool_true_false_are_defined 1
 
 #endif /* COSMOPOLITAN_LIBC_STDBOOL_H_ */
diff --git a/libc/stdio/append.h b/libc/stdio/append.h
index fd248705d..4ff526a3a 100644
--- a/libc/stdio/append.h
+++ b/libc/stdio/append.h
@@ -1,6 +1,6 @@
+#ifdef _COSMO_SOURCE
 #ifndef COSMOPOLITAN_LIBC_STDIO_APPEND_H_
 #define COSMOPOLITAN_LIBC_STDIO_APPEND_H_
-#ifdef _COSMO_SOURCE
 
 #define APPEND_COOKIE 21578
 
@@ -32,5 +32,5 @@ ssize_t kappendf(char **, const char *, ...) libcesque;
 ssize_t kvappendf(char **, const char *, va_list) libcesque;
 
 COSMOPOLITAN_C_END_
-#endif /* _COSMO_SOURCE */
 #endif /* COSMOPOLITAN_LIBC_STDIO_APPEND_H_ */
+#endif /* _COSMO_SOURCE */
diff --git a/libc/stdio/confstr.c b/libc/stdio/confstr.c
index 16b88dcfd..735b9589d 100644
--- a/libc/stdio/confstr.c
+++ b/libc/stdio/confstr.c
@@ -20,6 +20,7 @@
 #include "libc/stdio/stdio.h"
 #include "libc/str/str.h"
 #include "libc/sysv/errfuns.h"
+#include "libc/unistd.h"
 
 size_t confstr(int name, char *buf, size_t len) {
   if (name == _CS_PATH) {
diff --git a/libc/stdio/demangle.c b/libc/stdio/demangle.c
index 42b532d23..2950ad7ba 100644
--- a/libc/stdio/demangle.c
+++ b/libc/stdio/demangle.c
@@ -34,11 +34,10 @@
 #include "libc/stdio/stdio.h"
 #include "libc/str/str.h"
 
-asm(".ident\t\"\\n\\n\
-C++ Demangle (BSD-2)\\n\
-Copyright (c) 2007 Hyogeol Lee <hyogeollee@gmail.com>\\n\
-Copyright (c) 2015-2017 Kai Wang <kaiwang27@gmail.com>\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(demangle_notice, "\
+FreeBSD C++ Demangle (BSD-2)\n\
+Copyright (c) 2007 Hyogeol Lee <hyogeollee@gmail.com>\n\
+Copyright (c) 2015-2017 Kai Wang <kaiwang27@gmail.com>");
 // https://github.com/freebsd/freebsd-src/blob/2176c9ab71c85efd90a6c7af4a9e04fe8e3d49ca/contrib/libcxxrt/libelftc_dem_gnu3.c
 // clang-format off
 
diff --git a/libc/stdio/ecvt.c b/libc/stdio/ecvt.c
index cb3324fdc..ebb72075c 100644
--- a/libc/stdio/ecvt.c
+++ b/libc/stdio/ecvt.c
@@ -27,10 +27,9 @@
 #include "libc/str/str.h"
 #include "third_party/gdtoa/gdtoa.h"
 
-asm(".ident\t\"\\n\\n\
-OpenBSD ecvt/gcvt (MIT)\\n\
-Copyright (c) 2002, 2006, 2010 Todd C. Miller <millert@openbsd.org>\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(ecvt_notice, "\
+OpenBSD ecvt (MIT)\n\
+Copyright (c) 2002, 2006, 2010 Todd C. Miller <millert@openbsd.org>");
 // clang-format off
 
 static char *s;
@@ -42,7 +41,7 @@ __cvt_atexit(void)
 	s = 0;
 }
 
-static void __attribute__((__constructor__))
+static __attribute__((__constructor__(60))) textstartup void
 __cvt_init(void)
 {
 	atexit(__cvt_atexit);
diff --git a/libc/stdio/fflush_unlocked.c b/libc/stdio/fflush_unlocked.c
index ab889dec9..fd8887d79 100644
--- a/libc/stdio/fflush_unlocked.c
+++ b/libc/stdio/fflush_unlocked.c
@@ -69,7 +69,7 @@ static void __stdio_fork_child(void) {
   pthread_mutex_init(&__fflush_lock_obj, 0);
 }
 
-__attribute__((__constructor__)) static void __stdio_init(void) {
+__attribute__((__constructor__(60))) static textstartup void stdioinit(void) {
   pthread_atfork(__stdio_fork_prepare, __stdio_fork_parent, __stdio_fork_child);
 }
 
diff --git a/libc/stdio/fmt.c b/libc/stdio/fmt.c
index f35116953..12de10d52 100644
--- a/libc/stdio/fmt.c
+++ b/libc/stdio/fmt.c
@@ -43,7 +43,6 @@
 #include "libc/fmt/conv.h"
 #include "libc/fmt/divmod10.internal.h"
 #include "libc/fmt/itoa.h"
-#include "libc/serialize.h"
 #include "libc/intrin/bsr.h"
 #include "libc/intrin/nomultics.internal.h"
 #include "libc/intrin/safemacros.internal.h"
@@ -53,6 +52,7 @@
 #include "libc/mem/mem.h"
 #include "libc/mem/reverse.internal.h"
 #include "libc/runtime/internal.h"
+#include "libc/serialize.h"
 #include "libc/str/str.h"
 #include "libc/str/strwidth.h"
 #include "libc/str/tab.internal.h"
@@ -800,7 +800,7 @@ int __fmt(void *fn, void *arg, const char *format, va_list va) {
 
   x = 0;
   lasterr = errno;
-  out = fn ? fn : __fmt_noop;
+  out = fn ? fn : (void *)__fmt_noop;
 
   while (*format) {
     if (*format != '%') {
diff --git a/libc/stdio/ftw.c b/libc/stdio/ftw.c
index 9be3bb03e..ded9820ff 100644
--- a/libc/stdio/ftw.c
+++ b/libc/stdio/ftw.c
@@ -26,11 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/stdio/ftw.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
 // clang-format off
 
 /**
@@ -50,5 +47,5 @@ int ftw(const char *dirpath,
 	/* The following cast assumes that calling a function with one
 	 * argument more than it needs behaves as expected. This is
 	 * actually undefined, but works on all real-world machines. */
-	return nftw(dirpath, (int (*)())fn, fd_limit, FTW_PHYS);
+	return nftw(dirpath, (void *)fn, fd_limit, FTW_PHYS);
 }
diff --git a/libc/stdio/gcvt.c b/libc/stdio/gcvt.c
index c0ecbe869..dfc824415 100644
--- a/libc/stdio/gcvt.c
+++ b/libc/stdio/gcvt.c
@@ -28,10 +28,9 @@
 #include "libc/str/unicode.h"
 #include "third_party/gdtoa/gdtoa.h"
 
-asm(".ident\t\"\\n\\n\
-OpenBSD ecvt/gcvt (MIT)\\n\
-Copyright (c) 2002, 2006, 2010 Todd C. Miller <millert@openbsd.org>\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(gcvt_notice, "\
+OpenBSD gcvt (MIT)\n\
+Copyright (c) 2002, 2006, 2010 Todd C. Miller <millert@openbsd.org>");
 // clang-format off
 
 #define DEFPREC	6
diff --git a/libc/stdio/iconv.c b/libc/stdio/iconv.c
index 791d73277..3b04b6b8d 100644
--- a/libc/stdio/iconv.c
+++ b/libc/stdio/iconv.c
@@ -32,11 +32,8 @@
 #include "libc/str/str.h"
 #include "libc/thread/tls.h"
 // clang-format off
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
 
 #define UTF_32BE    0300
 #define UTF_16LE    0301
diff --git a/libc/stdio/mt19937.c b/libc/stdio/mt19937.c
index 3ef4e8362..8a4b8b8d2 100644
--- a/libc/stdio/mt19937.c
+++ b/libc/stdio/mt19937.c
@@ -38,10 +38,8 @@
 #include "libc/macros.internal.h"
 #include "libc/stdio/rand.h"
 
-asm(".ident\t\"\\n\\n\
-mt19937 (BSD-3)\\n\
-Copyright 1997-2004 Makoto Matsumoto and Takuji Nishimura\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(mt19937_notice, "mt19937 (BSD-3)\n\
+Copyright 1997-2004 Makoto Matsumoto and Takuji Nishimura");
 
 /*
  * A C-program for MT19937-64 (2004/9/29 version).
diff --git a/libc/stdio/nftw.c b/libc/stdio/nftw.c
index 35c84b0cb..53c5ca2da 100644
--- a/libc/stdio/nftw.c
+++ b/libc/stdio/nftw.c
@@ -36,11 +36,8 @@
 #include "libc/sysv/consts/o.h"
 #include "libc/sysv/consts/s.h"
 #include "libc/thread/thread.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
 // clang-format off
 
 struct history
diff --git a/libc/stdio/printargs.c b/libc/stdio/printargs.c
index fa38e6ebf..eb7dbac63 100644
--- a/libc/stdio/printargs.c
+++ b/libc/stdio/printargs.c
@@ -84,6 +84,7 @@ static const char *FindNameById(const struct IdName *names, unsigned long id) {
 }
 
 static void PrintDependencies(const char *prologue) {
+#ifdef __x86_64__
   struct NtLinkedList *head = &NtGetPeb()->Ldr->InLoadOrderModuleList;
   struct NtLinkedList *ldr = head->Next;
   do {
@@ -92,6 +93,7 @@ static void PrintDependencies(const char *prologue) {
     PRINT(" ☼ %.*!hs (%'zukb @ %p)", dll->FullDllName.Length,
           dll->FullDllName.Data, dll->SizeOfImage / 1024, dll->DllBase);
   } while ((ldr = ldr->Next) && ldr != head);
+#endif
 }
 
 static void Print(const char *prologue) {
@@ -624,6 +626,7 @@ textstartup void __printargs(const char *prologue) {
     if (GetConsoleMode(GetStdHandle(kNtStdErrorHandle), &cm))
       PRINT("   %s", DescribeNtConsoleOutFlags(cm));
 
+#ifdef __x86_64__
     PRINT("");
     PRINT("TEB");
     PRINT(" ☼ gs:0x%02x %s = %p", 0x00, "NtGetSeh()", _NtGetSeh());
@@ -640,6 +643,7 @@ textstartup void __printargs(const char *prologue) {
     PRINT(" ☼ gs:0x%02x %s = %p", 0x58, "NtGetTls()", _NtGetTls());
     PRINT(" ☼ gs:0x%02x %s = %p", 0x60, "NtGetPeb()", NtGetPeb());
     PRINT(" ☼ gs:0x%02x %s = %p", 0x68, "NtGetErr()", NtGetErr());
+#endif
 
     PRINT("");
     PRINT("DEPENDENCIES");
diff --git a/libc/stdio/random.c b/libc/stdio/random.c
index 513300d76..aa78e31c9 100644
--- a/libc/stdio/random.c
+++ b/libc/stdio/random.c
@@ -26,11 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/stdio/rand.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 /*
  * this code uses the same lagged fibonacci generator as the
diff --git a/libc/stdio/rdseed.c b/libc/stdio/rdseed.c
index 2b88bb22f..4cde60469 100644
--- a/libc/stdio/rdseed.c
+++ b/libc/stdio/rdseed.c
@@ -27,7 +27,7 @@
  *
  * If RDSEED isn't available, we'll try RDRAND (which we automatically
  * disable for microarchitectures where it's known to be slow or buggy).
- * If RDRAND isn't available then we try getrandom(), RtlGenRandom(), or
+ * If RDRAND isn't available then we try getrandom(), ProcessPrng(), or
  * sysctl(KERN_ARND). If those aren't available then we try /dev/urandom
  * and if that fails, we use RDTSC and getpid().
  *
diff --git a/libc/stdio/rngset.c b/libc/stdio/rngset.c
index 0c0823172..881eb9fa1 100644
--- a/libc/stdio/rngset.c
+++ b/libc/stdio/rngset.c
@@ -45,9 +45,6 @@ dontasan void *rngset(void *b, size_t n, uint64_t seed(void), size_t reseed) {
   size_t m;
   uint64_t x, t = 0;
   unsigned char *p = b;
-  if (IsAsan()) {
-    __asan_verify(b, n);
-  }
   if (!seed) {
     t = reseed;
     reseed = -1;
diff --git a/libc/stdio/scandir.c b/libc/stdio/scandir.c
index 07973bcc6..e9be6e664 100644
--- a/libc/stdio/scandir.c
+++ b/libc/stdio/scandir.c
@@ -31,11 +31,8 @@
 #include "libc/mem/alg.h"
 #include "libc/mem/mem.h"
 #include "libc/str/str.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
 // clang-format off
 
 int scandir(const char *path, struct dirent ***res,
diff --git a/libc/stdio/stderr.c b/libc/stdio/stderr.c
index 9065dd114..444a3ed0c 100644
--- a/libc/stdio/stderr.c
+++ b/libc/stdio/stderr.c
@@ -30,7 +30,7 @@ FILE *stderr;
 
 static FILE __stderr;
 
-__attribute__((__constructor__)) static void __stderr_init(void) {
+__attribute__((__constructor__(60))) static textstartup void errinit(void) {
   stderr = &__stderr;
   stderr->fd = STDERR_FILENO;
   stderr->bufmode = _IONBF;
diff --git a/libc/stdio/stdin.c b/libc/stdio/stdin.c
index 0c08324d8..2407187fe 100644
--- a/libc/stdio/stdin.c
+++ b/libc/stdio/stdin.c
@@ -30,7 +30,7 @@ FILE *stdin;
 
 static FILE __stdin;
 
-__attribute__((__constructor__)) static void __stdin_init(void) {
+__attribute__((__constructor__(60))) static textstartup void initin(void) {
   stdin = &__stdin;
   stdin->fd = STDIN_FILENO;
   stdin->iomode = O_RDONLY;
diff --git a/libc/stdio/stdio.h b/libc/stdio/stdio.h
index e8ec9473e..4d385e0ba 100644
--- a/libc/stdio/stdio.h
+++ b/libc/stdio/stdio.h
@@ -1,12 +1,11 @@
 #ifndef COSMOPOLITAN_LIBC_STDIO_H_
 #define COSMOPOLITAN_LIBC_STDIO_H_
 
-#define EOF      -1  /* end of file */
-#define WEOF     -1u /* end of file (multibyte) */
-#define _IOFBF   0   /* fully buffered */
-#define _IOLBF   1   /* line buffered */
-#define _IONBF   2   /* no buffering */
-#define _CS_PATH 0
+#define EOF    -1  /* end of file */
+#define WEOF   -1u /* end of file (multibyte) */
+#define _IOFBF 0   /* fully buffered */
+#define _IOLBF 1   /* line buffered */
+#define _IONBF 2   /* no buffering */
 
 #define L_tmpnam     20
 #define L_ctermid    20
@@ -90,7 +89,7 @@ int fsetpos(FILE *, const fpos_t *) libcesque paramsnonnull();
 FILE *tmpfile(void) libcesque __wur;
 char *tmpnam(char *) libcesque __wur;
 char *tmpnam_r(char *) libcesque __wur;
-int system(const char *) libcesque;
+
 FILE *popen(const char *, const char *) libcesque;
 
 /*───────────────────────────────────────────────────────────────────────────│─╗
diff --git a/libc/stdio/stdout.c b/libc/stdio/stdout.c
index 3de0d80f3..06eee4475 100644
--- a/libc/stdio/stdout.c
+++ b/libc/stdio/stdout.c
@@ -31,7 +31,7 @@ FILE *stdout;
 
 static FILE __stdout;
 
-__attribute__((__constructor__)) static void __stdout_init(void) {
+__attribute__((__constructor__(60))) static textstartup void outinit(void) {
   stdout = &__stdout;
 
   stdout->fd = STDOUT_FILENO;
diff --git a/libc/stdio/syscall.c b/libc/stdio/syscall.c
index 66113d2e7..b66b575e5 100644
--- a/libc/stdio/syscall.c
+++ b/libc/stdio/syscall.c
@@ -43,8 +43,15 @@ long syscall(long number, ...) {
       size_t buflen = va_arg(va, size_t);
       unsigned flags = va_arg(va, unsigned);
       va_end(va);
-      ssize_t rc = getrandom(buf, buflen, flags);
-      return rc;
+      return getrandom(buf, buflen, flags);
+    }
+    case SYS_getcpu: {
+      va_list va;
+      va_start(va, number);
+      unsigned *cpu = va_arg(va, unsigned *);
+      unsigned *node = va_arg(va, unsigned *);
+      va_end(va);
+      return getcpu(cpu, node);
     }
   }
 }
diff --git a/libc/stdio/syscall.h b/libc/stdio/syscall.h
index afd3d67ef..5cbb2802b 100644
--- a/libc/stdio/syscall.h
+++ b/libc/stdio/syscall.h
@@ -2,8 +2,9 @@
 #define COSMOPOLITAN_LIBC_STDIO_SYSCALL_H_
 COSMOPOLITAN_C_START_
 
-#define SYS_gettid    186
-#define SYS_getrandom 318
+#define SYS_gettid    1
+#define SYS_getrandom 2
+#define SYS_getcpu    3
 
 long syscall(long, ...) libcesque;
 
diff --git a/libc/stdio/tmpnam.c b/libc/stdio/tmpnam.c
index b15d42895..b0ab75b92 100644
--- a/libc/stdio/tmpnam.c
+++ b/libc/stdio/tmpnam.c
@@ -18,7 +18,6 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/calls/calls.h"
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 #include "libc/stdio/rand.h"
 #include "libc/stdio/stdio.h"
 #include "libc/str/str.h"
@@ -35,7 +34,6 @@ static char g_tmpnam[L_tmpnam];
  *     is only mutated on success
  */
 char *tmpnam(char *buf) {
-  if (IsAsan()) __asan_verify(buf, L_tmpnam);
   char path[] = P_tmpdir "/tmpnam_XXXXXX";
   for (int t = 0; t < 100; ++t) {
     int w = _rand64();
diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c
index cb890edb4..f58f21e74 100644
--- a/libc/stdio/vcscanf.c
+++ b/libc/stdio/vcscanf.c
@@ -50,6 +50,12 @@
     }                                         \
     c;                                        \
   })
+#define UNBUFFER                \
+  ({                            \
+    if (c != -1) {              \
+      fpbuf[--fpbufcur] = '\0'; \
+    }                           \
+  })
 
 /**
  * String / file / stream decoder.
@@ -369,10 +375,11 @@ int __vcscanf(int callback(void *),    //
                   }
                 } while ((c = BUFFER) != -1 && c != ')');
                 if (c == ')') {
-                  c = BUFFER;
+                  c = READ;
                 }
                 goto GotFloatingPointNumber;
               } else {
+                UNBUFFER;
                 goto GotFloatingPointNumber;
               }
             } else {
@@ -410,9 +417,7 @@ int __vcscanf(int callback(void *),    //
                   goto Done;
                 }
               } else {
-                if (c != -1 && unget) {
-                  unget(c, arg);
-                }
+                UNBUFFER;
                 goto GotFloatingPointNumber;
               }
             } else {
@@ -465,13 +470,24 @@ int __vcscanf(int callback(void *),    //
         Continue:
           continue;
         Break:
-          if (c != -1 && unget) {
-            unget(c, arg);
-          }
+          UNBUFFER;
           break;
         } while ((c = BUFFER) != -1);
       GotFloatingPointNumber:
-        fp = strtod((char *)fpbuf, NULL);
+        /* An empty buffer can't be a valid float; don't even bother parsing. */
+        bool valid = fpbufcur > 0;
+        if (valid) {
+          char *ep;
+          fp = strtod((char *)fpbuf, &ep);
+          /* We should have parsed the whole buffer. */
+          valid = ep == (char *)fpbuf + fpbufcur;
+        }
+        free(fpbuf);
+        fpbuf = NULL;
+        fpbufcur = fpbufsize = 0;
+        if (!valid) {
+          goto Done;
+        }
         if (!discard) {
           ++items;
           void *out = va_arg(va, void *);
@@ -481,9 +497,6 @@ int __vcscanf(int callback(void *),    //
             *(double *)out = (double)fp;
           }
         }
-        free(fpbuf);
-        fpbuf = NULL;
-        fpbufcur = fpbufsize = 0;
         continue;
       ReportConsumed:
         n_ptr = va_arg(va, int *);
@@ -537,6 +550,11 @@ int __vcscanf(int callback(void *),    //
               if (!j && c == -1 && !items) {
                 items = -1;
                 goto Done;
+              } else if (rawmode && j != width) {
+                /* The C standard says that %c "matches a sequence of characters of
+                 * **exactly** the number specified by the field width". If we have
+                 * fewer characters, what we've just read is invalid. */
+                goto Done;
               } else if (!rawmode && j < bufsize) {
                 if (charbytes == sizeof(char)) {
                   buf[j] = '\0';
diff --git a/libc/stdlib.h b/libc/stdlib.h
index 9c5bc3b1e..e7ef213f0 100644
--- a/libc/stdlib.h
+++ b/libc/stdlib.h
@@ -6,5 +6,10 @@ char *fcvt(double, int, int *, int *) libcesque;
 char *ecvt(double, int, int *, int *) libcesque;
 char *gcvt(double, int, char *) libcesque;
 
+#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || \
+    defined(_COSMO_SOURCE)
+void setkey(const char *) libcesque;
+#endif
+
 COSMOPOLITAN_C_END_
 #endif /* COSMOPOLITAN_LIBC_STDLIB_H_ */
diff --git a/libc/str/BUILD.mk b/libc/str/BUILD.mk
index 8a86f287a..1600b2f3a 100644
--- a/libc/str/BUILD.mk
+++ b/libc/str/BUILD.mk
@@ -88,7 +88,7 @@ o/$(MODE)/libc/str/windowstimetotimespec.o: private		\
 			-O2
 
 $(LIBC_STR_A_OBJS): private					\
-		COPTS +=					\
+		CFLAGS +=					\
 			-fno-sanitize=all			\
 			-Wframe-larger-than=4096		\
 			-Walloca-larger-than=4096
diff --git a/libc/str/blake2.c b/libc/str/blake2.c
index 44e0ebc3b..362a198da 100644
--- a/libc/str/blake2.c
+++ b/libc/str/blake2.c
@@ -23,10 +23,9 @@
 
 #define ROR(v, n) (((v) >> (n)) | ((v) << (64 - (n))))
 
-asm(".ident\t\"\\n\\n\
-boringssl blake2b (ISC License)\\n\
-Copyright 2021 Google LLC\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(blake2b_notice, "\
+boringssl blake2b (ISC License)\n\
+Copyright 2021 Google LLC");
 
 // https://tools.ietf.org/html/rfc7693#section-2.6
 static const uint64_t kIV[8] = {
diff --git a/libc/str/btowc.c b/libc/str/btowc.c
index e0faaf8e0..4e3cb74ab 100644
--- a/libc/str/btowc.c
+++ b/libc/str/btowc.c
@@ -29,11 +29,7 @@
 #include "libc/stdio/stdio.h"
 #include "libc/str/mb.internal.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 wint_t btowc(int c) {
   int b = (unsigned char)c;
diff --git a/libc/str/c16rtomb.c b/libc/str/c16rtomb.c
index 3289411f6..34406401e 100644
--- a/libc/str/c16rtomb.c
+++ b/libc/str/c16rtomb.c
@@ -30,11 +30,7 @@
 #include "libc/limits.h"
 #include "libc/str/mb.internal.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 size_t c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps) {
   static unsigned internal_state;
diff --git a/libc/str/djbsort.c b/libc/str/djbsort.c
index abfc8f3a9..18299e517 100644
--- a/libc/str/djbsort.c
+++ b/libc/str/djbsort.c
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 #include "libc/nexgen32e/x86feature.h"
 #include "libc/runtime/runtime.h"
 #include "libc/stdckdint.h"
@@ -28,11 +27,6 @@ void djbsort_avx2(int32_t *, long);
  * D.J. Bernstein's outrageously fast integer sorting algorithm.
  */
 void djbsort(int32_t *a, size_t n) {
-  size_t m;
-  if (IsAsan()) {
-    if (ckd_mul(&m, n, 4)) m = -1;
-    __asan_verify(a, m);
-  }
   if (n > 1) {
 #if defined(__x86_64__) && !defined(__chibicc__)
     if (X86_HAVE(AVX2)) {
diff --git a/libc/str/highwayhash64.c b/libc/str/highwayhash64.c
index 7cb0af034..5cd7581a3 100644
--- a/libc/str/highwayhash64.c
+++ b/libc/str/highwayhash64.c
@@ -18,10 +18,9 @@
 #include "libc/str/highwayhash64.h"
 #include "libc/serialize.h"
 
-asm(".ident\t\"\\n\\n\
-HighwayHash (Apache 2.0)\\n\
-Copyright 2017 Google LLC\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(highwayhash_notice, "\
+HighwayHash (Apache 2.0)\n\
+Copyright 2017 Google LLC");
 
 typedef struct {
   uint64_t v0[4];
diff --git a/libc/str/isutf8.c b/libc/str/isutf8.c
index ed0fb918e..ddca5e330 100644
--- a/libc/str/isutf8.c
+++ b/libc/str/isutf8.c
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 #include "libc/intrin/likely.h"
 #include "libc/str/str.h"
 
@@ -53,7 +52,6 @@ bool32 isutf8(const void *data, size_t size) {
   long c;
   const char *p, *e;
   if (size == -1) size = data ? strlen(data) : 0;
-  if (IsAsan()) __asan_verify(data, size);
   p = data;
   e = p + size;
   while (p < e) {
diff --git a/libc/str/langinfo.c b/libc/str/langinfo.c
index d66386082..fe88cd54a 100644
--- a/libc/str/langinfo.c
+++ b/libc/str/langinfo.c
@@ -29,11 +29,8 @@
 #include "libc/str/locale.h"
 #include "libc/str/nltypes.h"
 #include "libc/thread/tls.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
 // clang-format off
 
 static const char c_time[] =
diff --git a/libc/str/mb.c b/libc/str/mb.c
index 3838b676c..98cbf47dd 100644
--- a/libc/str/mb.c
+++ b/libc/str/mb.c
@@ -26,11 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/str/mb.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 #define C(x) (x < 2 ? -1 : (R(0x80, 0xc0) | x))
 #define D(x) C((x + 16))
diff --git a/libc/str/mbrtoc16.c b/libc/str/mbrtoc16.c
index 1a41aa9f7..40d366118 100644
--- a/libc/str/mbrtoc16.c
+++ b/libc/str/mbrtoc16.c
@@ -29,11 +29,7 @@
 #include "libc/limits.h"
 #include "libc/str/mb.internal.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 size_t mbrtoc16(char16_t *pc16, const char *s, size_t n, mbstate_t *ps) {
   static unsigned internal_state;
diff --git a/libc/str/mbrtoc32.c b/libc/str/mbrtoc32.c
index 5cfca124b..00cc13ba7 100644
--- a/libc/str/mbrtoc32.c
+++ b/libc/str/mbrtoc32.c
@@ -29,11 +29,7 @@
 #include "libc/limits.h"
 #include "libc/macros.internal.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 size_t mbrtoc32(char32_t *pc32, const char *s, size_t n, mbstate_t *ps) {
   static unsigned internal_state;
diff --git a/libc/str/mbrtowc.c b/libc/str/mbrtowc.c
index 1d89eb7b9..8a0f996ef 100644
--- a/libc/str/mbrtowc.c
+++ b/libc/str/mbrtowc.c
@@ -30,11 +30,7 @@
 #include "libc/macros.internal.h"
 #include "libc/str/mb.internal.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 size_t mbrtowc(wchar_t *wc, const char *src, size_t n, mbstate_t *st) {
   static unsigned internal_state;
diff --git a/libc/str/mbsnrtowcs.c b/libc/str/mbsnrtowcs.c
index d3dd1499a..504a81c45 100644
--- a/libc/str/mbsnrtowcs.c
+++ b/libc/str/mbsnrtowcs.c
@@ -30,11 +30,7 @@
 #include "libc/macros.internal.h"
 #include "libc/str/mb.internal.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 size_t mbsnrtowcs(wchar_t *wcs, const char **src, size_t n, size_t wn,
                   mbstate_t *st) {
diff --git a/libc/str/mbsrtowcs.c b/libc/str/mbsrtowcs.c
index b3aa5f0ae..986172725 100644
--- a/libc/str/mbsrtowcs.c
+++ b/libc/str/mbsrtowcs.c
@@ -30,11 +30,7 @@
 #include "libc/macros.internal.h"
 #include "libc/str/mb.internal.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 size_t mbsrtowcs(wchar_t *ws, const char **src, size_t wn, mbstate_t *st) {
   const unsigned char *s = (const void *)*src;
diff --git a/libc/str/mbtowc.c b/libc/str/mbtowc.c
index 669dcaf47..155270726 100644
--- a/libc/str/mbtowc.c
+++ b/libc/str/mbtowc.c
@@ -29,11 +29,7 @@
 #include "libc/limits.h"
 #include "libc/str/mb.internal.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n) {
   unsigned c;
diff --git a/libc/str/memmem.c b/libc/str/memmem.c
index c9e43c8f0..51975a3ef 100644
--- a/libc/str/memmem.c
+++ b/libc/str/memmem.c
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 #include "libc/intrin/likely.h"
 #include "libc/str/str.h"
 
@@ -32,16 +31,14 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
  * @param needlelen is its character count
  * @return pointer to first result or NULL if not found
  */
-void *memmem(const void *haystack, size_t haystacklen,
-                      const void *needle, size_t needlelen) {
+__vex void *memmem(const void *haystack, size_t haystacklen, const void *needle,
+                   size_t needlelen) {
 #if defined(__x86_64__) && !defined(__chibicc__)
   char c;
   xmm_t n;
   const xmm_t *v;
   unsigned i, k, m;
   const char *p, *q, *e;
-  if (IsAsan()) __asan_verify(needle, needlelen);
-  if (IsAsan()) __asan_verify(haystack, haystacklen);
   if (!needlelen) return (void *)haystack;
   if (UNLIKELY(needlelen > haystacklen)) return 0;
   q = needle;
diff --git a/libc/str/memrchr16.c b/libc/str/memrchr16.c
index 517f4a9a0..15c61ba46 100644
--- a/libc/str/memrchr16.c
+++ b/libc/str/memrchr16.c
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 #include "libc/limits.h"
 #include "libc/nexgen32e/x86feature.h"
 #include "libc/str/str.h"
@@ -36,8 +35,8 @@ static inline const char16_t *memrchr16_pure(const char16_t *s, char16_t c,
 }
 
 #if defined(__x86_64__) && !defined(__chibicc__)
-static inline const char16_t *memrchr16_sse(const char16_t *s,
-                                                     char16_t c, size_t n) {
+static inline const char16_t *memrchr16_sse(const char16_t *s, char16_t c,
+                                            size_t n) {
   size_t i;
   unsigned m;
   xmm_t v, t = {c, c, c, c, c, c, c, c};
@@ -67,11 +66,10 @@ static inline const char16_t *memrchr16_sse(const char16_t *s,
  * @return is pointer to first instance of c or NULL if not found
  * @asyncsignalsafe
  */
-void *memrchr16(const void *s, int c, size_t n) {
+__vex void *memrchr16(const void *s, int c, size_t n) {
 #if defined(__x86_64__) && !defined(__chibicc__)
   const void *r;
   if (!IsTiny() && X86_HAVE(SSE)) {
-    if (IsAsan()) __asan_verify(s, n * 2);
     r = memrchr16_sse(s, c, n);
   } else {
     r = memrchr16_pure(s, c, n);
diff --git a/libc/str/rawmemchr.c b/libc/str/rawmemchr.c
index a6f089f6b..d3b4a5523 100644
--- a/libc/str/rawmemchr.c
+++ b/libc/str/rawmemchr.c
@@ -18,7 +18,6 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/assert.h"
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 #include "libc/nexgen32e/x86feature.h"
 #include "libc/str/str.h"
 
@@ -33,8 +32,7 @@ static inline const unsigned char *rawmemchr_pure(const unsigned char *s,
 
 #if defined(__x86_64__) && !defined(__chibicc__)
 typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
-static inline const char *rawmemchr_sse(const char *s,
-                                                 unsigned char c) {
+static inline const char *rawmemchr_sse(const char *s, unsigned char c) {
   unsigned k;
   unsigned m;
   const xmm_t *p;
@@ -67,11 +65,10 @@ static inline uint64_t UncheckedAlignedRead64(const unsigned char *p) {
  * @param c is search byte which is masked with 255
  * @return is pointer to first instance of c
  */
-void *rawmemchr(const void *s, int c) {
+__vex void *rawmemchr(const void *s, int c) {
 #if defined(__x86_64__) && !defined(__chibicc__)
   const void *r;
   if (X86_HAVE(SSE)) {
-    if (IsAsan()) __asan_verify(s, 1);
     r = rawmemchr_sse(s, c);
   } else {
     r = rawmemchr_pure(s, c);
diff --git a/libc/str/smoothsort.c b/libc/str/smoothsort.c
index e9cf02b26..ba84be2e4 100644
--- a/libc/str/smoothsort.c
+++ b/libc/str/smoothsort.c
@@ -26,11 +26,10 @@
 #include "libc/mem/alg.h"
 #include "libc/str/str.h"
 
-asm(".ident\t\"\\n\\n\
-Smoothsort (MIT License)\\n\
-Copyright 2011 Valentin Ochs\\n\
-Discovered by Edsger Dijkstra\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(smoothsort_notice, "\
+Smoothsort (MIT License)\n\
+Copyright 2011 Valentin Ochs\n\
+Discovered by Edsger Dijkstra");
 
 typedef int (*cmpfun)(const void *, const void *, void *);
 
diff --git a/libc/str/str.h b/libc/str/str.h
index 18a91bd20..fdc97b244 100644
--- a/libc/str/str.h
+++ b/libc/str/str.h
@@ -167,7 +167,7 @@ wint_t towctrans(wint_t, wctrans_t) libcesque;
 
 int getsubopt(char **, char *const *, char **) libcesque paramsnonnull();
 char *strsignal(int) returnsnonnull libcesque;
-char *strerror(int) returnsnonnull dontthrow nocallback;
+char *strerror(int) returnsnonnull dontthrow dontcallback;
 errno_t strerror_r(int, char *, size_t) libcesque;
 char *__xpg_strerror_r(int, char *, size_t) libcesque;
 
diff --git a/libc/str/strcasecmp.c b/libc/str/strcasecmp.c
index acc9d5a7a..edeb633fc 100644
--- a/libc/str/strcasecmp.c
+++ b/libc/str/strcasecmp.c
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 #include "libc/str/str.h"
 #include "libc/str/tab.internal.h"
 
@@ -34,8 +33,6 @@ int strcasecmp(const char *a, const char *b) {
   size_t i = 0;
   uint64_t v, w;
   if (a == b) return 0;
-  if (IsAsan()) __asan_verify_str(a);
-  if (IsAsan()) __asan_verify_str(b);
   if (((uintptr_t)a & 7) == ((uintptr_t)b & 7)) {
     for (; (uintptr_t)(a + i) & 7; ++i) {
     CheckEm:
diff --git a/libc/str/strcasestr.c b/libc/str/strcasestr.c
index 51bdcf138..cf46cb3f1 100644
--- a/libc/str/strcasestr.c
+++ b/libc/str/strcasestr.c
@@ -18,7 +18,6 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/str/str.h"
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 #include "libc/str/tab.internal.h"
 
 typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
@@ -35,15 +34,13 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
  * @asyncsignalsafe
  * @see strstr()
  */
-char *strcasestr(const char *haystack, const char *needle) {
+__vex char *strcasestr(const char *haystack, const char *needle) {
 #if defined(__x86_64__) && !defined(__chibicc__)
   char c;
   size_t i;
   unsigned k, m;
   const xmm_t *p;
   xmm_t v, n1, n2, z = {0};
-  if (IsAsan()) __asan_verify(needle, 1);
-  if (IsAsan()) __asan_verify(haystack, 1);
   if (haystack == needle || !*needle) return (char *)haystack;
   c = *needle;
   n1 = (xmm_t){c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
diff --git a/libc/str/strlcat.c b/libc/str/strlcat.c
index 445ec2742..087376cc7 100644
--- a/libc/str/strlcat.c
+++ b/libc/str/strlcat.c
@@ -19,11 +19,7 @@
 #include "libc/str/str.h"
 // clang-format off
 // $OpenBSD: strlcat.c,v 1.19 2019/01/25 00:19:25 millert Exp $
-
-asm(".ident\t\"\\n\\n\
-OpenBSD Strings (ISC)\\n\
-Copyright (c) 1998, 2015 Todd C. Miller <millert@openbsd.org>\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("openbsd_strings_notice");
 
 /**
  * Appends string, the BSD way.
diff --git a/libc/str/strlcpy.c b/libc/str/strlcpy.c
index 89bbbd8fa..785eaacaf 100644
--- a/libc/str/strlcpy.c
+++ b/libc/str/strlcpy.c
@@ -19,11 +19,7 @@
 #include "libc/str/str.h"
 // clang-format off
 // $OpenBSD: strlcpy.c,v 1.16 2019/01/25 00:19:25 millert Exp $
-
-asm(".ident\t\"\\n\\n\
-OpenBSD Strings (ISC)\\n\
-Copyright (c) 1998, 2015 Todd C. Miller <millert@openbsd.org>\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("openbsd_strings_notice");
 
 /**
  * Copies string, the BSD way.
diff --git a/libc/str/strlen16.c b/libc/str/strlen16.c
index 823d91d4d..cb9fe11c8 100644
--- a/libc/str/strlen16.c
+++ b/libc/str/strlen16.c
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 #include "libc/str/str.h"
 
 typedef char16_t xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
@@ -29,17 +28,15 @@ typedef char16_t xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
  * @return number of shorts (excluding NUL)
  * @asyncsignalsafe
  */
-size_t strlen16(const char16_t *s) {
+__vex size_t strlen16(const char16_t *s) {
 #if defined(__x86_64__) && !defined(__chibicc__)
   size_t n;
   xmm_t z = {0};
   unsigned m, k = (uintptr_t)s & 15;
   const xmm_t *p = (const xmm_t *)((uintptr_t)s & -16);
-  if (IsAsan()) __asan_verify(s, 2);
   m = __builtin_ia32_pmovmskb128(*p == z) >> k << k;
   while (!m) m = __builtin_ia32_pmovmskb128(*++p == z);
   n = (const char16_t *)p + (__builtin_ctzl(m) >> 1) - s;
-  if (IsAsan()) __asan_verify(s, n * 2);
   return n;
 #else
   size_t n = 0;
diff --git a/libc/str/strnlen_s.c b/libc/str/strnlen_s.c
index 26595cd4a..c95d88b41 100644
--- a/libc/str/strnlen_s.c
+++ b/libc/str/strnlen_s.c
@@ -18,7 +18,6 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/assert.h"
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 #include "libc/str/str.h"
 
 static size_t strnlen_s_x64(const char *s, size_t n, size_t i) {
@@ -48,7 +47,6 @@ static size_t strnlen_s_x64(const char *s, size_t n, size_t i) {
 size_t strnlen_s(const char *s, size_t n) {
   size_t i;
   if (!s) return 0;
-  if (IsAsan()) __asan_verify(s, n);
   for (i = 0; (uintptr_t)(s + i) & 7; ++i) {
     if (i == n || !s[i]) return i;
   }
diff --git a/libc/str/strstr.c b/libc/str/strstr.c
index 8cc041d02..b428851b0 100644
--- a/libc/str/strstr.c
+++ b/libc/str/strstr.c
@@ -18,7 +18,6 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/str/str.h"
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 
 typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
 
@@ -35,14 +34,12 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
  * @see strcasestr()
  * @see memmem()
  */
-char *strstr(const char *haystack, const char *needle) {
+__vex char *strstr(const char *haystack, const char *needle) {
 #if defined(__x86_64__) && !defined(__chibicc__)
   size_t i;
   unsigned k, m;
   const xmm_t *p;
   xmm_t v, n, z = {0};
-  if (IsAsan()) __asan_verify(needle, 1);
-  if (IsAsan()) __asan_verify(haystack, 1);
   if (haystack == needle || !*needle) return (char *)haystack;
   n = (xmm_t){*needle, *needle, *needle, *needle, *needle, *needle,
               *needle, *needle, *needle, *needle, *needle, *needle,
diff --git a/libc/str/strverscmp.c b/libc/str/strverscmp.c
index 0652be54c..3cc3740ef 100644
--- a/libc/str/strverscmp.c
+++ b/libc/str/strverscmp.c
@@ -26,11 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/str/str.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
 // clang-format off
 
 /**
diff --git a/libc/str/timingsafe_memcmp.c b/libc/str/timingsafe_memcmp.c
index a209fd829..dff98b620 100644
--- a/libc/str/timingsafe_memcmp.c
+++ b/libc/str/timingsafe_memcmp.c
@@ -19,10 +19,9 @@
 #include "libc/limits.h"
 #include "libc/str/str.h"
 
-asm(".ident\t\"\\n\\n\
-timingsafe_memcmp (ISC License)\\n\
-Copyright 2014 Google Inc.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(timingsafe_memcmp_notice, "\
+timingsafe_memcmp (ISC License)\n\
+Copyright 2014 Google Inc.");
 
 /**
  * Lexicographically compares the first 𝑛 bytes in 𝑝 and 𝑞.
diff --git a/libc/str/todd.c b/libc/str/todd.c
new file mode 100644
index 000000000..133d948d7
--- /dev/null
+++ b/libc/str/todd.c
@@ -0,0 +1,2 @@
+__notice(openbsd_strings_notice, "OpenBSD Strings (ISC)\n\
+Copyright (c) 1998, 2015 Todd C. Miller <millert@openbsd.org>");
diff --git a/libc/str/wcrtomb.c b/libc/str/wcrtomb.c
index 23116b707..0a8f6b155 100644
--- a/libc/str/wcrtomb.c
+++ b/libc/str/wcrtomb.c
@@ -29,11 +29,7 @@
 #include "libc/limits.h"
 #include "libc/str/mb.internal.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 size_t wcrtomb(char *s, wchar_t wc, mbstate_t *st) {
   if (!s) return 1;
diff --git a/libc/str/wcslen.c b/libc/str/wcslen.c
index 8cbd6ba35..9dbbfcbee 100644
--- a/libc/str/wcslen.c
+++ b/libc/str/wcslen.c
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 #include "libc/str/str.h"
 
 typedef wchar_t xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
@@ -29,17 +28,15 @@ typedef wchar_t xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
  * @return number of wide characters (excluding NUL)
  * @asyncsignalsafe
  */
-size_t wcslen(const wchar_t *s) {
+__vex size_t wcslen(const wchar_t *s) {
 #if defined(__x86_64__) && !defined(__chibicc__)
   size_t n;
   xmm_t z = {0};
   unsigned m, k = (uintptr_t)s & 15;
   const xmm_t *p = (const xmm_t *)((uintptr_t)s & -16);
-  if (IsAsan()) __asan_verify(s, 4);
   m = __builtin_ia32_pmovmskb128(*p == z) >> k << k;
   while (!m) m = __builtin_ia32_pmovmskb128(*++p == z);
   n = (const wchar_t *)p + (__builtin_ctzl(m) >> 2) - s;
-  if (IsAsan()) __asan_verify(s, n);
   return n;
 #else
   size_t n = 0;
diff --git a/libc/str/wcsnrtombs.c b/libc/str/wcsnrtombs.c
index ef684d9b0..4486ce579 100644
--- a/libc/str/wcsnrtombs.c
+++ b/libc/str/wcsnrtombs.c
@@ -29,11 +29,7 @@
 #include "libc/limits.h"
 #include "libc/str/mb.internal.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 size_t wcsnrtombs(char *dst, const wchar_t **wcs, size_t wn, size_t n,
                   mbstate_t *st) {
diff --git a/libc/str/wcsrtombs.c b/libc/str/wcsrtombs.c
index 013635e39..f1ad160e9 100644
--- a/libc/str/wcsrtombs.c
+++ b/libc/str/wcsrtombs.c
@@ -29,11 +29,7 @@
 #include "libc/limits.h"
 #include "libc/str/mb.internal.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 size_t wcsrtombs(char *s, const wchar_t **ws, size_t n, mbstate_t *st) {
   const wchar_t *ws2;
diff --git a/libc/str/wctob.c b/libc/str/wctob.c
index 62e3ed8a7..bd5ff66b6 100644
--- a/libc/str/wctob.c
+++ b/libc/str/wctob.c
@@ -29,11 +29,7 @@
 #include "libc/stdio/stdio.h"
 #include "libc/str/mb.internal.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 int wctob(wint_t c) {
   if (c < 128U) return c;
diff --git a/libc/str/wmemrchr.c b/libc/str/wmemrchr.c
index 848e75ee5..acd2413a6 100644
--- a/libc/str/wmemrchr.c
+++ b/libc/str/wmemrchr.c
@@ -17,7 +17,6 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/dce.h"
-#include "libc/intrin/asan.internal.h"
 #include "libc/limits.h"
 #include "libc/nexgen32e/x86feature.h"
 #include "libc/stdckdint.h"
@@ -38,7 +37,7 @@ static inline const wchar_t *wmemrchr_pure(const wchar_t *s, wchar_t c,
 
 #if defined(__x86_64__) && !defined(__chibicc__)
 static inline const wchar_t *wmemrchr_sse(const wchar_t *s, wchar_t c,
-                                                   size_t n) {
+                                          size_t n) {
   size_t i;
   unsigned m;
   xmm_t v, t = {c, c, c, c};
@@ -68,16 +67,9 @@ static inline const wchar_t *wmemrchr_sse(const wchar_t *s, wchar_t c,
  * @return is pointer to first instance of c or NULL if not found
  * @asyncsignalsafe
  */
-void *wmemrchr(const wchar_t *s, wchar_t c, size_t n) {
+__vex void *wmemrchr(const wchar_t *s, wchar_t c, size_t n) {
 #if defined(__x86_64__) && !defined(__chibicc__)
-  size_t bytes;
-  const void *r;
-  if (IsAsan()) {
-    if (ckd_mul(&bytes, n, sizeof(wchar_t))) bytes = -1;
-    __asan_verify(s, bytes);
-  }
-  r = wmemrchr_sse(s, c, n);
-  return (void *)r;
+  return (void *)wmemrchr_sse(s, c, n);
 #else
   return (void *)wmemrchr_pure(s, c, n);
 #endif
diff --git a/libc/sysv/consts.sh b/libc/sysv/consts.sh
index 2a7a5cf22..1242f1e4a 100755
--- a/libc/sysv/consts.sh
+++ b/libc/sysv/consts.sh
@@ -234,7 +234,9 @@ syscon	mmap	MAP_INHERIT				-1			-1			-1			-1			-1			-1			0x00000080		-1			# make
 syscon	mmap	MAP_HASSEMAPHORE			0			0			0x00000200		0x00000200		0x00000200		0			0x00000200		0			# does it matter on x86?
 syscon	mmap	MAP_NOSYNC				0			0			0			0			0x00000800		0			0			0			# flush to physical media only when necessary rather than gratuitously; be sure to use write() rather than ftruncate() with this!
 syscon	mmap	MAP_CONCEAL				0			0			0			0			0x00020000		0x00008000		0x00008000		0			# omit from core dumps; MAP_NOCORE on FreeBSD
-syscon	mmap	MAP_JIT					0			0			0			0x00000800		0			0			0			0			# omit from core dumps; MAP_NOCORE on FreeBSD
+syscon	mmap	MAP_JIT					0			0			0			0x00000800		0			0			0			0			# allocate region used for just-in-time compilation
+syscon	mmap	MAP_NOCACHE				0			0			0x00000400		0x00000400		0			0			0			0			# don't cache pages for this mapping
+syscon	mmap	MAP_NOEXTEND				0			0			0x00000100		0x00000100		0			0			0			0			# for MAP_FILE, don't change file size
 syscon	compat	MAP_NOCORE				0			0			0			0			0x00020000		0x00008000		0x00008000		0			# use MAP_CONCEAL
 syscon	compat	MAP_ANON				0x00000020		0x00000020		0x00001000		0x00001000		0x00001000		0x00001000		0x00001000		0x00000020		# bsd consensus; faked nt
 syscon	compat	MAP_EXECUTABLE				0x00001000		0x00001000		0			0			0			0			0			0			# ignored
@@ -269,17 +271,18 @@ syscon	madv	MADV_WIPEONFORK				18			18			127			127			127			127			127			127			# T
 syscon	madv	MADV_KEEPONFORK				19			19			127			127			127			127			127			127			# TODO: add support ?
 syscon	madv	MADV_COLD				20			20			127			127			127			127			127			127			# TODO: add support ?
 syscon	madv	MADV_PAGEOUT				21			21			127			127			127			127			127			127			# TODO: add support ?
-syscon	madv	MADV_POPULATE_READ				22			22			127			127			127			127			127			127			# TODO: add support ?
-syscon	madv	MADV_POPULATE_WRITE				23			23			127			127			127			127			127			127			# TODO: add support ?
-syscon	madv	MADV_DONTNEED_LOCKED				24			24			127			127			127			127			127			127			# TODO: add support ?
+syscon	madv	MADV_POPULATE_READ			22			22			127			127			127			127			127			127			# TODO: add support ?
+syscon	madv	MADV_POPULATE_WRITE			23			23			127			127			127			127			127			127			# TODO: add support ?
+syscon	madv	MADV_DONTNEED_LOCKED			24			24			127			127			127			127			127			127			# TODO: add support ?
 syscon	madv	MADV_COLLAPSE				25			25			127			127			127			127			127			127			# TODO: add support ?
 syscon	madv	MADV_DOFORK				11			11			127			127			127			127			127			127			# TODO(jart): what is it?
 syscon	madv	MADV_DONTDUMP				16			16			127			127			127			127			127			127			# see MAP_CONCEAL in OpenBSD; TODO(jart): what is it?
 syscon	madv	MADV_DONTFORK				10			10			127			127			127			127			127			127			# TODO(jart): what is it?
 syscon	madv	MADV_HWPOISON				100			100			127			127			127			127			127			127			# TODO(jart): what is it?
-syscon	madv	MADV_SOFT_OFFLINE				101			101			127			127			127			127			127			127			# TODO: add support ?
+syscon	madv	MADV_SOFT_OFFLINE			101			101			127			127			127			127			127			127			# TODO: add support ?
 syscon	madv	MADV_REMOVE				9			9			127			127			127			127			127			127			# TODO(jart): what is it?
 syscon	fadv	POSIX_FADV_NOREUSE			5			5			127			127			5			127			5			127			# wut
+syscon	madv	MADV_REMOVE				9			9			127			127			127			127			127			127			# TODO(jart): what is it?
 
 #	mmap(), mprotect(), etc.
 #	digital restrictions management for the people
@@ -579,19 +582,19 @@ syscon	clock	CLOCK_REALTIME				0			0			0			0			0			0			0			0			# consensus
 syscon	clock	CLOCK_REALTIME_PRECISE			0			0			0			0			9			0			0			0			#
 syscon	clock	CLOCK_REALTIME_FAST			0			0			0			0			10			0			0			0			#
 syscon	clock	CLOCK_REALTIME_COARSE			5			5			0			0			10			0			0			2			# Linux 2.6.32+; bsd consensus; not available on RHEL5
-syscon	clock	CLOCK_MONOTONIC				1			1			1			6			4			3			3			1			# XNU/NT faked; could move backwards if NTP introduces negative leap second
-syscon	clock	CLOCK_MONOTONIC_PRECISE			1			1			1			6			11			3			3			1			#
-syscon	clock	CLOCK_MONOTONIC_FAST			1			1			1			6			12			3			3			1			#
-syscon	clock	CLOCK_MONOTONIC_COARSE			6			6			1			6			12			3			3			1			# Linux 2.6.32+; bsd consensus; not available on RHEL5
-syscon	clock	CLOCK_MONOTONIC_RAW			4			4			127			4			127			127			127			127			# actually monotonic; not subject to NTP adjustments; Linux 2.6.28+; XNU/NT/FreeBSD/OpenBSD faked; not available on RHEL5
-syscon	clock	CLOCK_PROCESS_CPUTIME_ID		2			2			127			12			15			2			0x40000000		127			# NetBSD lets you bitwise a PID into clockid_t
-syscon	clock	CLOCK_THREAD_CPUTIME_ID			3			3			127			16			14			4			0x20000000		127			#
+syscon	clock	CLOCK_MONOTONIC				1			1			6			6			4			3			3			1			# XNU/NT faked; could move backwards if NTP introduces negative leap second
+syscon	clock	CLOCK_MONOTONIC_PRECISE			1			1			6			6			11			3			3			1			#
+syscon	clock	CLOCK_MONOTONIC_FAST			1			1			6			6			12			3			3			1			#
+syscon	clock	CLOCK_MONOTONIC_COARSE			6			6			5			5			12			3			3			1			# Linux 2.6.32+; bsd consensus; not available on RHEL5
+syscon	clock	CLOCK_MONOTONIC_RAW			4			4			4			4			127			127			127			127			# actually monotonic; not subject to NTP adjustments; Linux 2.6.28+; XNU/NT/FreeBSD/OpenBSD faked; not available on RHEL5
+syscon	clock	CLOCK_PROCESS_CPUTIME_ID		2			2			12			12			15			2			0x40000000		4			# NetBSD lets you bitwise a PID into clockid_t
+syscon	clock	CLOCK_THREAD_CPUTIME_ID			3			3			16			16			14			4			0x20000000		5			#
 syscon	clock	CLOCK_PROF				127			127			127			127			2			127			2			127			#
 syscon	clock	CLOCK_BOOTTIME				7			7			7			127			127			6			127			3			#
 syscon	clock	CLOCK_REALTIME_ALARM			8			8			127			127			127			127			127			127			#
 syscon	clock	CLOCK_BOOTTIME_ALARM			9			9			127			127			127			127			127			127			#
 syscon	clock	CLOCK_TAI				11			11			127			127			127			127			127			127			#
-syscon	clock	CLOCK_UPTIME				127			127			127			127			5			5			127			127			#
+syscon	clock	CLOCK_UPTIME				127			127			8			8			5			5			127			127			#
 syscon	clock	CLOCK_UPTIME_PRECISE			127			127			127			127			7			127			127			127			#
 syscon	clock	CLOCK_UPTIME_FAST			127			127			127			127			8			127			127			127			#
 syscon	clock	CLOCK_SECOND				127			127			127			127			13			127			127			127			#
diff --git a/libc/sysv/consts/CLOCK_MONOTONIC.S b/libc/sysv/consts/CLOCK_MONOTONIC.S
index d4cf1ff24..2275c6cf1 100644
--- a/libc/sysv/consts/CLOCK_MONOTONIC.S
+++ b/libc/sysv/consts/CLOCK_MONOTONIC.S
@@ -1,2 +1,2 @@
 #include "libc/sysv/consts/syscon.internal.h"
-.syscon clock,CLOCK_MONOTONIC,1,1,1,6,4,3,3,1
+.syscon clock,CLOCK_MONOTONIC,1,1,6,6,4,3,3,1
diff --git a/libc/sysv/consts/CLOCK_MONOTONIC_COARSE.S b/libc/sysv/consts/CLOCK_MONOTONIC_COARSE.S
index 27762d022..225972c1d 100644
--- a/libc/sysv/consts/CLOCK_MONOTONIC_COARSE.S
+++ b/libc/sysv/consts/CLOCK_MONOTONIC_COARSE.S
@@ -1,2 +1,2 @@
 #include "libc/sysv/consts/syscon.internal.h"
-.syscon clock,CLOCK_MONOTONIC_COARSE,6,6,1,6,12,3,3,1
+.syscon clock,CLOCK_MONOTONIC_COARSE,6,6,5,5,12,3,3,1
diff --git a/libc/sysv/consts/CLOCK_MONOTONIC_FAST.S b/libc/sysv/consts/CLOCK_MONOTONIC_FAST.S
index 80bb43b66..0069c82cf 100644
--- a/libc/sysv/consts/CLOCK_MONOTONIC_FAST.S
+++ b/libc/sysv/consts/CLOCK_MONOTONIC_FAST.S
@@ -1,2 +1,2 @@
 #include "libc/sysv/consts/syscon.internal.h"
-.syscon clock,CLOCK_MONOTONIC_FAST,1,1,1,6,12,3,3,1
+.syscon clock,CLOCK_MONOTONIC_FAST,1,1,6,6,12,3,3,1
diff --git a/libc/sysv/consts/CLOCK_MONOTONIC_PRECISE.S b/libc/sysv/consts/CLOCK_MONOTONIC_PRECISE.S
index fdea24d20..e9e77f345 100644
--- a/libc/sysv/consts/CLOCK_MONOTONIC_PRECISE.S
+++ b/libc/sysv/consts/CLOCK_MONOTONIC_PRECISE.S
@@ -1,2 +1,2 @@
 #include "libc/sysv/consts/syscon.internal.h"
-.syscon clock,CLOCK_MONOTONIC_PRECISE,1,1,1,6,11,3,3,1
+.syscon clock,CLOCK_MONOTONIC_PRECISE,1,1,6,6,11,3,3,1
diff --git a/libc/sysv/consts/CLOCK_MONOTONIC_RAW.S b/libc/sysv/consts/CLOCK_MONOTONIC_RAW.S
index 5704b2138..1c158565e 100644
--- a/libc/sysv/consts/CLOCK_MONOTONIC_RAW.S
+++ b/libc/sysv/consts/CLOCK_MONOTONIC_RAW.S
@@ -1,2 +1,2 @@
 #include "libc/sysv/consts/syscon.internal.h"
-.syscon clock,CLOCK_MONOTONIC_RAW,4,4,127,4,127,127,127,127
+.syscon clock,CLOCK_MONOTONIC_RAW,4,4,4,4,127,127,127,127
diff --git a/libc/sysv/consts/CLOCK_PROCESS_CPUTIME_ID.S b/libc/sysv/consts/CLOCK_PROCESS_CPUTIME_ID.S
index 2b8c354db..b4b39f501 100644
--- a/libc/sysv/consts/CLOCK_PROCESS_CPUTIME_ID.S
+++ b/libc/sysv/consts/CLOCK_PROCESS_CPUTIME_ID.S
@@ -1,2 +1,2 @@
 #include "libc/sysv/consts/syscon.internal.h"
-.syscon clock,CLOCK_PROCESS_CPUTIME_ID,2,2,127,12,15,2,0x40000000,127
+.syscon clock,CLOCK_PROCESS_CPUTIME_ID,2,2,12,12,15,2,0x40000000,4
diff --git a/libc/sysv/consts/CLOCK_THREAD_CPUTIME_ID.S b/libc/sysv/consts/CLOCK_THREAD_CPUTIME_ID.S
index 7d5893688..3f3529ab7 100644
--- a/libc/sysv/consts/CLOCK_THREAD_CPUTIME_ID.S
+++ b/libc/sysv/consts/CLOCK_THREAD_CPUTIME_ID.S
@@ -1,2 +1,2 @@
 #include "libc/sysv/consts/syscon.internal.h"
-.syscon clock,CLOCK_THREAD_CPUTIME_ID,3,3,127,16,14,4,0x20000000,127
+.syscon clock,CLOCK_THREAD_CPUTIME_ID,3,3,16,16,14,4,0x20000000,5
diff --git a/libc/sysv/consts/CLOCK_UPTIME.S b/libc/sysv/consts/CLOCK_UPTIME.S
index 15a0e414a..281eaa508 100644
--- a/libc/sysv/consts/CLOCK_UPTIME.S
+++ b/libc/sysv/consts/CLOCK_UPTIME.S
@@ -1,2 +1,2 @@
 #include "libc/sysv/consts/syscon.internal.h"
-.syscon clock,CLOCK_UPTIME,127,127,127,127,5,5,127,127
+.syscon clock,CLOCK_UPTIME,127,127,8,8,5,5,127,127
diff --git a/libc/sysv/consts/MAP_NOCACHE.S b/libc/sysv/consts/MAP_NOCACHE.S
new file mode 100644
index 000000000..23ee0d40c
--- /dev/null
+++ b/libc/sysv/consts/MAP_NOCACHE.S
@@ -0,0 +1,2 @@
+#include "libc/sysv/consts/syscon.internal.h"
+.syscon mmap,MAP_NOCACHE,0,0,0x00000400,0x00000400,0,0,0,0
diff --git a/libc/sysv/consts/MAP_NOEXTEND.S b/libc/sysv/consts/MAP_NOEXTEND.S
new file mode 100644
index 000000000..1d79cf695
--- /dev/null
+++ b/libc/sysv/consts/MAP_NOEXTEND.S
@@ -0,0 +1,2 @@
+#include "libc/sysv/consts/syscon.internal.h"
+.syscon mmap,MAP_NOEXTEND,0,0,0x00000100,0x00000100,0,0,0,0
diff --git a/libc/sysv/consts/map.h b/libc/sysv/consts/map.h
index 50657c9a8..04008f4cd 100644
--- a/libc/sysv/consts/map.h
+++ b/libc/sysv/consts/map.h
@@ -16,6 +16,8 @@ extern const int MAP_HASSEMAPHORE;
 extern const int MAP_INHERIT;
 extern const int MAP_JIT;
 extern const int MAP_LOCKED;
+extern const int MAP_NOCACHE;
+extern const int MAP_NOEXTEND;
 extern const int MAP_NONBLOCK;
 extern const int MAP_NORESERVE;
 extern const int MAP_NOSYNC;
@@ -40,6 +42,7 @@ COSMOPOLITAN_C_END_
 #define MAP_FIXED_NOREPLACE MAP_FIXED_NOREPLACE
 #define MAP_HASSEMAPHORE    MAP_HASSEMAPHORE
 #define MAP_POPULATE        MAP_POPULATE
+#define MAP_NORESERVE       MAP_NORESERVE
 
 #define MAP_ANON   MAP_ANONYMOUS
 #define MAP_NOCORE MAP_CONCEAL
diff --git a/libc/sysv/errno.c b/libc/sysv/errno.c
index 570f29d5b..438ee9508 100644
--- a/libc/sysv/errno.c
+++ b/libc/sysv/errno.c
@@ -17,7 +17,7 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/errno.h"
-#include "libc/thread/tls.h"
+#include "libc/thread/tls2.internal.h"
 
 /**
  * Global variable for last error.
diff --git a/libc/sysv/systemfive.S b/libc/sysv/systemfive.S
index 02b237817..e858edc54 100644
--- a/libc/sysv/systemfive.S
+++ b/libc/sysv/systemfive.S
@@ -108,7 +108,7 @@ systemfive_cp:
 systemfive_cancellable:			// our pthread_cancel() miracle code
 	cmpb	$0,__tls_enabled(%rip)	// inspired by the musl libc design!
 	je	1f			// we handle linux and bsd together!
-	mov	%fs:0,%r10		// CosmoTib::tib_self
+	mov	%gs:0x30,%r10		// CosmoTib::tib_self
 	mov	0x28(%r10),%r10		// CosmoTib::tib_pthread
 	test	%r10,%r10		// is it a posix thread?
 	jz	1f			// it's spawn() probably
@@ -135,7 +135,7 @@ systemfive_cancellable_end:		// i/o calls park here for long time
 	jne	systemfive_errno	// werent interrupted by OnSigCancel
 	cmpb	$0,__tls_enabled(%rip)	// make sure it's safe to grab %fs:0
 	je	systemfive_errno	// tls is disabled we can't continue
-	mov	%fs:0,%rcx		// CosmoTib::tib_self
+	mov	%gs:0x30,%rcx		// CosmoTib::tib_self
 	mov	0x28(%rcx),%rcx		// CosmoTib::tib_pthread
 	test	%rcx,%rcx		// is it a posix thread?
 	jz	systemfive_errno	// it's spawn() probably
diff --git a/libc/testlib/ezbenchcontrol.c b/libc/testlib/ezbenchcontrol.c
index 0763a228f..f64263fe8 100644
--- a/libc/testlib/ezbenchcontrol.c
+++ b/libc/testlib/ezbenchcontrol.c
@@ -42,7 +42,7 @@ double __testlib_ezbenchcontrol(void) {
     } while (++Tries < 10 && (__testlib_getcore() != Core &&
                               __testlib_getinterrupts() > Interrupts));
     if (Tries == 10) {
-      tinyprint(2, "warning: failed to accurately benchmark control\n");
+      tinyprint(2, "warning: failed to accurately benchmark control\n", NULL);
     }
     strcpy(host, "unknown");
     gethostname(host, 64);
diff --git a/libc/testlib/showerror.c b/libc/testlib/showerror.c
index 56238317d..713afbb67 100644
--- a/libc/testlib/showerror.c
+++ b/libc/testlib/showerror.c
@@ -101,7 +101,7 @@ static void testlib_showerror_(int line,              //
       _weaken(kvprintf)(fmt, va);
       tinyprint(2, "\n", NULL);
     } else {
-      tinyprint(2, "\t[missing kvprintf]\n");
+      tinyprint(2, "\t[missing kvprintf]\n", NULL);
     }
   }
   tinyprint(2, "\t", SUBTLE, strerror(e), RESET, "\n\t", SUBTLE,
diff --git a/libc/testlib/testlib.h b/libc/testlib/testlib.h
index e2dedd68f..50a0df919 100644
--- a/libc/testlib/testlib.h
+++ b/libc/testlib/testlib.h
@@ -167,22 +167,22 @@ void TearDownOnce(void);
 #define ASSERT_IN(NEEDLE, GOT) \
   assertContains(FILIFU sizeof(*(NEEDLE)), NEEDLE, GOT, #GOT, true)
 
-#define ASSERT_BINEQ(WANT, GOT)              \
-  _Generic((WANT)[0], char                   \
-           : assertBinaryEquals_hex, default \
-           : assertBinaryEquals_cp437)(FILIFU WANT, GOT, -1, #GOT, true)
-#define ASSERT_BINNE(NOPE, GOT)                 \
-  _Generic((NOPE)[0], char                      \
-           : assertBinaryNotEquals_hex, default \
-           : assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, true)
-#define ASSERT_BINEQN(WANT, GOT, N)          \
-  _Generic((WANT)[0], char                   \
-           : assertBinaryEquals_hex, default \
-           : assertBinaryEquals_cp437)(FILIFU WANT, GOT, N, #GOT, true)
-#define ASSERT_BINNEN(NOPE, GOT, N)             \
-  _Generic((NOPE)[0], char                      \
-           : assertBinaryNotEquals_hex, default \
-           : assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, true)
+#define ASSERT_BINEQ(WANT, GOT)     \
+  _Generic((WANT)[0],               \
+      char: assertBinaryEquals_hex, \
+      default: assertBinaryEquals_cp437)(FILIFU WANT, GOT, -1, #GOT, true)
+#define ASSERT_BINNE(NOPE, GOT)        \
+  _Generic((NOPE)[0],                  \
+      char: assertBinaryNotEquals_hex, \
+      default: assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, true)
+#define ASSERT_BINEQN(WANT, GOT, N) \
+  _Generic((WANT)[0],               \
+      char: assertBinaryEquals_hex, \
+      default: assertBinaryEquals_cp437)(FILIFU WANT, GOT, N, #GOT, true)
+#define ASSERT_BINNEN(NOPE, GOT, N)    \
+  _Generic((NOPE)[0],                  \
+      char: assertBinaryNotEquals_hex, \
+      default: assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, true)
 
 #define ASSERT_FLOAT_EQ(WANT, GOT) \
   assertLongDoubleEquals(FILIFU WANT, GOT, #GOT, true)
@@ -243,22 +243,22 @@ void TearDownOnce(void);
 #define EXPECT_IN(NEEDLE, GOT) \
   assertContains(FILIFU sizeof(*(NEEDLE)), NEEDLE, GOT, #GOT, false)
 
-#define EXPECT_BINEQ(WANT, GOT)              \
-  _Generic((WANT)[0], char                   \
-           : assertBinaryEquals_hex, default \
-           : assertBinaryEquals_cp437)(FILIFU WANT, GOT, -1, #GOT, false)
-#define EXPECT_BINNE(NOPE, GOT)                 \
-  _Generic((NOPE)[0], char                      \
-           : assertBinaryNotEquals_hex, default \
-           : assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, false)
-#define EXPECT_BINEQN(WANT, GOT, N)          \
-  _Generic((WANT)[0], char                   \
-           : assertBinaryEquals_hex, default \
-           : assertBinaryEquals_cp437)(FILIFU WANT, GOT, N, #GOT, false)
-#define EXPECT_BINNEN(NOPE, GOT, N)             \
-  _Generic((NOPE)[0], char                      \
-           : assertBinaryNotEquals_hex, default \
-           : assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, false)
+#define EXPECT_BINEQ(WANT, GOT)     \
+  _Generic((WANT)[0],               \
+      char: assertBinaryEquals_hex, \
+      default: assertBinaryEquals_cp437)(FILIFU WANT, GOT, -1, #GOT, false)
+#define EXPECT_BINNE(NOPE, GOT)        \
+  _Generic((NOPE)[0],                  \
+      char: assertBinaryNotEquals_hex, \
+      default: assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, false)
+#define EXPECT_BINEQN(WANT, GOT, N) \
+  _Generic((WANT)[0],               \
+      char: assertBinaryEquals_hex, \
+      default: assertBinaryEquals_cp437)(FILIFU WANT, GOT, N, #GOT, false)
+#define EXPECT_BINNEN(NOPE, GOT, N)    \
+  _Generic((NOPE)[0],                  \
+      char: assertBinaryNotEquals_hex, \
+      default: assertBinaryNotEquals_cp437)(FILIFU NOPE, GOT, -1, #GOT, false)
 
 #define EXPECT_FLOAT_EQ(WANT, GOT) \
   assertLongDoubleEquals(FILIFU WANT, GOT, #GOT, false)
diff --git a/libc/thread/makecontext.c b/libc/thread/makecontext.c
index d3e93a85c..0108979f7 100644
--- a/libc/thread/makecontext.c
+++ b/libc/thread/makecontext.c
@@ -30,7 +30,8 @@
 typedef double vect __attribute__((__vector_size__(16), __aligned__(16)));
 
 struct Gadget {
-  void (*func)();
+  void (*func)(long, long, long, long, long, long,  //
+               vect, vect, vect, vect, vect, vect);
   long longs[6];
   vect vects[6];
 };
@@ -89,7 +90,7 @@ static void runcontext(struct Gadget *call, ucontext_t *link) {
  * @param argc is effectively ignored (see notes above)
  * @see setcontext(), getcontext(), swapcontext()
  */
-void makecontext(ucontext_t *uc, void func(), int argc, ...) {
+void makecontext(ucontext_t *uc, void *func, int argc, ...) {
   va_list va;
   long sp, sb;
   struct Gadget *call;
diff --git a/libc/thread/tls.h b/libc/thread/tls.h
index a9b689a3f..87c0b0bc5 100644
--- a/libc/thread/tls.h
+++ b/libc/thread/tls.h
@@ -39,7 +39,7 @@ struct CosmoTib {
   void **tib_keys;
   void *tib_nsync;
   void *tib_todo[7];
-};
+} __attribute__((__aligned__(64)));
 
 extern int __threaded;
 extern char __tls_morphed;
@@ -70,7 +70,7 @@ forceinline pureconst struct CosmoTib *__get_tls(void) {
   return 0;
 #elif __x86_64__
   struct CosmoTib *__tib;
-  __asm__("mov\t%%fs:0,%0" : "=r"(__tib));
+  __asm__("mov\t%%gs:0x30,%0" : "=r"(__tib));
   return __tib;
 #elif defined(__aarch64__)
   register struct CosmoTib *__tls __asm__("x28");
diff --git a/libc/thread/tls2.internal.h b/libc/thread/tls2.internal.h
index e91ce33d7..383d79cc2 100644
--- a/libc/thread/tls2.internal.h
+++ b/libc/thread/tls2.internal.h
@@ -3,7 +3,7 @@
 #include "libc/dce.h"
 #include "libc/thread/tls.h"
 COSMOPOLITAN_C_START_
-#if defined(__GNUC__) && defined(__x86_64__) && !defined(__STRICT_ANSI__)
+#if defined(__GNUC__) && defined(__x86_64__)
 
 /**
  * Returns location of thread information block.
@@ -13,14 +13,10 @@ COSMOPOLITAN_C_START_
  */
 forceinline struct CosmoTib *__get_tls_privileged(void) {
   char *tib, *lin = (char *)0x30;
-  if (IsLinux() || IsFreebsd() || IsNetbsd() || IsOpenbsd() || IsMetal()) {
-    if (!__tls_morphed) {
-      asm("mov\t%%fs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory");
-    } else {
-      asm("mov\t%%gs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory");
-    }
+  if (IsNetbsd() || IsOpenbsd()) {
+    __asm__("mov\t%%fs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory");
   } else {
-    asm("mov\t%%gs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory");
+    __asm__("mov\t%%gs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory");
     if (IsWindows()) {
       tib = *(char **)(tib + 0x1480 + __tls_index * 8);
     }
@@ -30,13 +26,13 @@ forceinline struct CosmoTib *__get_tls_privileged(void) {
 
 forceinline struct CosmoTib *__get_tls_win32(void) {
   char *tib, *lin = (char *)0x30;
-  asm("mov\t%%gs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory");
+  __asm__("mov\t%%gs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory");
   tib = *(char **)(tib + 0x1480 + __tls_index * 8);
   return (struct CosmoTib *)tib;
 }
 
 forceinline void __set_tls_win32(void *tls) {
-  asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tls));
+  __asm__("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tls));
 }
 
 #elif defined(__aarch64__)
diff --git a/libc/time/BUILD.mk b/libc/time/BUILD.mk
index 8705b3347..bb3b136f9 100644
--- a/libc/time/BUILD.mk
+++ b/libc/time/BUILD.mk
@@ -4,7 +4,6 @@
 PKGS += LIBC_TIME
 
 LIBC_TIME_ARTIFACTS += LIBC_TIME_A
-LIBC_TIME_ZONEINFOS = $(wildcard usr/share/zoneinfo/*)
 LIBC_TIME = $(LIBC_TIME_A_DEPS) $(LIBC_TIME_A)
 LIBC_TIME_A = o/$(MODE)/libc/time/time.a
 LIBC_TIME_A_FILES := $(wildcard libc/time/struct/*) $(wildcard libc/time/*)
@@ -12,6 +11,10 @@ LIBC_TIME_A_HDRS := $(filter %.h,$(LIBC_TIME_A_FILES))
 LIBC_TIME_A_SRCS_S = $(filter %.S,$(LIBC_TIME_A_FILES))
 LIBC_TIME_A_SRCS_C = $(filter %.c,$(LIBC_TIME_A_FILES))
 
+LIBC_TIME_ZONEINFOS :=					\
+	$(wildcard usr/share/zoneinfo/*)		\
+	$(wildcard usr/share/zoneinfo/US/*)
+
 LIBC_TIME_A_SRCS =					\
 	$(LIBC_TIME_A_SRCS_S)				\
 	$(LIBC_TIME_A_SRCS_C)
diff --git a/libc/time/localtime.c b/libc/time/localtime.c
index c90686b2c..3c5c567c8 100644
--- a/libc/time/localtime.c
+++ b/libc/time/localtime.c
@@ -5,9 +5,9 @@
 #include "libc/calls/blockcancel.internal.h"
 #include "libc/calls/calls.h"
 #include "libc/cxxabi.h"
-#include "libc/serialize.h"
 #include "libc/mem/gc.h"
 #include "libc/mem/mem.h"
+#include "libc/serialize.h"
 #include "libc/str/str.h"
 #include "libc/sysv/consts/o.h"
 #include "libc/thread/thread.h"
@@ -60,7 +60,8 @@ void localtime_unlock(void) {
 	pthread_mutex_unlock(&locallock);
 }
 
-__attribute__((__constructor__)) static void localtime_init(void) {
+__attribute__((__constructor__(80)))
+static textstartup void localtime_init(void) {
 	localtime_wipe();
 	pthread_atfork(localtime_lock,
 		       localtime_unlock,
diff --git a/libc/time/strftime.c b/libc/time/strftime.c
index 1906cf1db..1a607b35e 100644
--- a/libc/time/strftime.c
+++ b/libc/time/strftime.c
@@ -26,10 +26,8 @@
 
 #define DIVISOR	100
 
-asm(".ident\t\"\\n\\n\
-strftime (BSD-3)\\n\
-Copyright 1989 The Regents of the University of California\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(strftime_notice, "strftime (BSD-3)\n\
+Copyright 1989 The Regents of the University of California");
 
 /*
 ** Based on the UCB version with the copyright notice appearing above.
diff --git a/libc/time/strptime.c b/libc/time/strptime.c
index 74c558466..63d9020ac 100644
--- a/libc/time/strptime.c
+++ b/libc/time/strptime.c
@@ -30,11 +30,7 @@
 #include "libc/str/str.h"
 #include "libc/time/struct/tm.h"
 #include "libc/time/time.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2019 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 char *strptime(const char *s, const char *f, struct tm *tm) {
   int i, w, neg, adj, min, range, itemsize, *dest, dummy;
@@ -44,8 +40,7 @@ char *strptime(const char *s, const char *f, struct tm *tm) {
   while (*f) {
     if (*f != '%') {
       if (isspace(*f)) {
-        for (; *s && isspace(*s); s++)
-          ;
+        for (; *s && isspace(*s); s++);
       } else if (*s != *f) {
         return 0;
       } else {
@@ -138,8 +133,7 @@ char *strptime(const char *s, const char *f, struct tm *tm) {
         goto numeric_range;
       case 'n':
       case 't':
-        for (; *s && isspace(*s); s++)
-          ;
+        for (; *s && isspace(*s); s++);
         break;
       case 'p':
         ex = "AM";
diff --git a/libc/time/tzfile.internal.h b/libc/time/tzfile.internal.h
index 3c892b77f..abab9923d 100644
--- a/libc/time/tzfile.internal.h
+++ b/libc/time/tzfile.internal.h
@@ -29,7 +29,7 @@
 #endif /* !defined TZDEFAULT */
 
 #ifndef TZDEFRULES
-#define TZDEFRULES	"New_York"
+#define TZDEFRULES	"US/Pacific"
 #endif /* !defined TZDEFRULES */
 
 
diff --git a/libc/tinymath/.clang-format b/libc/tinymath/.clang-format
new file mode 100644
index 000000000..47a38a93f
--- /dev/null
+++ b/libc/tinymath/.clang-format
@@ -0,0 +1,2 @@
+DisableFormat: true
+SortIncludes: Never
diff --git a/libc/tinymath/BUILD.mk b/libc/tinymath/BUILD.mk
index df3806404..15493f253 100644
--- a/libc/tinymath/BUILD.mk
+++ b/libc/tinymath/BUILD.mk
@@ -54,6 +54,7 @@ o/$(MODE)/libc/tinymath/loglq.o: private		\
 
 $(LIBC_TINYMATH_A_OBJS): private			\
 		CFLAGS +=				\
+			-fmath-errno			\
 			-fsigned-zeros			\
 			-ftrapping-math			\
 			-frounding-math			\
diff --git a/libc/tinymath/LICENSE.optimized-routines b/libc/tinymath/LICENSE.optimized-routines
index 20a4b7717..c6fbf4a38 100644
--- a/libc/tinymath/LICENSE.optimized-routines
+++ b/libc/tinymath/LICENSE.optimized-routines
@@ -5,7 +5,7 @@ MIT OR Apache-2.0 WITH LLVM-exception
 MIT License
 -----------
 
-Copyright (c) 1999-2022, Arm Limited.
+Copyright (c) 2018-2024, Arm Limited.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/libc/tinymath/__cexp.c b/libc/tinymath/__cexp.c
index 788edab61..59d3e36fd 100644
--- a/libc/tinymath/__cexp.c
+++ b/libc/tinymath/__cexp.c
@@ -28,12 +28,7 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/k_exp.c */
 /*-
diff --git a/libc/tinymath/__cexpf.c b/libc/tinymath/__cexpf.c
index 5bf24743f..1840c31e1 100644
--- a/libc/tinymath/__cexpf.c
+++ b/libc/tinymath/__cexpf.c
@@ -28,12 +28,8 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("freebsd_libm_notice");
+__static_yoink("musl_libc_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/k_expf.c */
 /*-
diff --git a/libc/tinymath/__math_divzero.c b/libc/tinymath/__math_divzero.c
deleted file mode 100644
index 6c4fecba0..000000000
--- a/libc/tinymath/__math_divzero.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/internal.h"
-// clang-format off
-
-double __math_divzero(uint32_t sign)
-{
-	return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
-}
diff --git a/libc/tinymath/__math_divzerof.c b/libc/tinymath/__math_divzerof.c
deleted file mode 100644
index aba79e70d..000000000
--- a/libc/tinymath/__math_divzerof.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/internal.h"
-// clang-format off
-
-float __math_divzerof(uint32_t sign)
-{
-	return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
-}
diff --git a/libc/tinymath/__math_invalid.c b/libc/tinymath/__math_invalid.c
deleted file mode 100644
index 77a8a245e..000000000
--- a/libc/tinymath/__math_invalid.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/internal.h"
-// clang-format off
-
-double __math_invalid(double x)
-{
-	return (x - x) / (x - x);
-}
diff --git a/libc/tinymath/__math_invalidf.c b/libc/tinymath/__math_invalidf.c
deleted file mode 100644
index 5c8bd6951..000000000
--- a/libc/tinymath/__math_invalidf.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/internal.h"
-// clang-format off
-
-float __math_invalidf(float x)
-{
-	return (x - x) / (x - x);
-}
diff --git a/libc/tinymath/__math_oflow.c b/libc/tinymath/__math_oflow.c
deleted file mode 100644
index 3e8a756a0..000000000
--- a/libc/tinymath/__math_oflow.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/internal.h"
-// clang-format off
-
-dontinstrument double __math_oflow(uint32_t sign)
-{
-	return __math_xflow(sign, 0x1p769);
-}
diff --git a/libc/tinymath/__math_oflowf.c b/libc/tinymath/__math_oflowf.c
deleted file mode 100644
index c289062d4..000000000
--- a/libc/tinymath/__math_oflowf.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/internal.h"
-// clang-format off
-
-dontinstrument float __math_oflowf(uint32_t sign)
-{
-	return __math_xflowf(sign, 0x1p97f);
-}
diff --git a/libc/tinymath/__math_uflow.c b/libc/tinymath/__math_uflow.c
deleted file mode 100644
index c810da1c6..000000000
--- a/libc/tinymath/__math_uflow.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/internal.h"
-// clang-format off
-
-dontinstrument double __math_uflow(uint32_t sign)
-{
-	return __math_xflow(sign, 0x1p-767);
-}
diff --git a/libc/tinymath/__math_uflowf.c b/libc/tinymath/__math_uflowf.c
deleted file mode 100644
index de7cf7c06..000000000
--- a/libc/tinymath/__math_uflowf.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/internal.h"
-// clang-format off
-
-dontinstrument float __math_uflowf(uint32_t sign)
-{
-	return __math_xflowf(sign, 0x1p-95f);
-}
diff --git a/libc/tinymath/__math_xflow.c b/libc/tinymath/__math_xflow.c
deleted file mode 100644
index 7e87826dc..000000000
--- a/libc/tinymath/__math_xflow.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/internal.h"
-// clang-format off
-
-dontinstrument double __math_xflow(uint32_t sign, double y)
-{
-	return eval_as_double(fp_barrier(sign ? -y : y) * y);
-}
diff --git a/libc/tinymath/__math_xflowf.c b/libc/tinymath/__math_xflowf.c
deleted file mode 100644
index 1ea47b7fe..000000000
--- a/libc/tinymath/__math_xflowf.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/internal.h"
-// clang-format off
-
-dontinstrument float __math_xflowf(uint32_t sign, float y)
-{
-	return eval_as_float(fp_barrierf(sign ? -y : y) * y);
-}
diff --git a/libc/tinymath/acos.c b/libc/tinymath/acos.c
index 7631f77ea..9bd36c7c8 100644
--- a/libc/tinymath/acos.c
+++ b/libc/tinymath/acos.c
@@ -27,11 +27,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_acos.c */
 /*
diff --git a/libc/tinymath/acosf.c b/libc/tinymath/acosf.c
index 969a464b4..ee451d43a 100644
--- a/libc/tinymath/acosf.c
+++ b/libc/tinymath/acosf.c
@@ -28,11 +28,9 @@
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_acosf.c */
 /*
diff --git a/libc/tinymath/acosh.c b/libc/tinymath/acosh.c
index b1bb9a4fa..cc0c3f0f5 100644
--- a/libc/tinymath/acosh.c
+++ b/libc/tinymath/acosh.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns inverse hyperbolic cosine of 𝑥.
diff --git a/libc/tinymath/acoshf.c b/libc/tinymath/acoshf.c
index f0bc86b5c..d8d4be8e9 100644
--- a/libc/tinymath/acoshf.c
+++ b/libc/tinymath/acoshf.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns inverse hyperbolic cosine of 𝑥.
diff --git a/libc/tinymath/acoshl.c b/libc/tinymath/acoshl.c
index 019d6ae74..a0b16cc66 100644
--- a/libc/tinymath/acoshl.c
+++ b/libc/tinymath/acoshl.c
@@ -39,15 +39,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/freebsd.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("fdlibm_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* EXP_LARGE is the threshold above which we use acosh(x) ~= log(2x). */
 #if LDBL_MANT_DIG == 64
diff --git a/libc/tinymath/acosl.c b/libc/tinymath/acosl.c
index d65ab0a27..eaa8ffb7f 100644
--- a/libc/tinymath/acosl.c
+++ b/libc/tinymath/acosl.c
@@ -29,15 +29,8 @@
 #include "libc/tinymath/invtrigl.internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("fdlibm_notice");
+__static_yoink("musl_libc_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_acosl.c */
 /*
diff --git a/libc/tinymath/arm.internal.h b/libc/tinymath/arm.internal.h
new file mode 100644
index 000000000..de1337a4c
--- /dev/null
+++ b/libc/tinymath/arm.internal.h
@@ -0,0 +1,509 @@
+#ifndef COSMOPOLITAN_LIBC_TINYMATH_ARM_H_
+#define COSMOPOLITAN_LIBC_TINYMATH_ARM_H_
+#include "libc/math.h"
+COSMOPOLITAN_C_START_
+
+#define USE_GLIBC_ABI 1
+
+/* If defined to 1, return correct results for special cases in non-nearest
+   rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
+   This may be set to 0 if there is no fenv support or if math functions only
+   get called in round to nearest mode.  */
+#ifdef __ROUNDING_MATH__
+#define WANT_ROUNDING 1
+#else
+#define WANT_ROUNDING 0
+#endif
+
+/* If defined to 1, set errno in math functions according to ISO C.  Many math
+   libraries do not set errno, so this is 0 by default.  It may need to be
+   set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
+#ifdef __NO_MATH_ERRNO__
+#define WANT_ERRNO 0
+#else
+#define WANT_ERRNO 1
+#endif
+
+/*------------------------------------------------------------------------------*/
+/* optimized-routines/math/math_config.h */
+
+#ifndef WANT_ROUNDING
+/* If defined to 1, return correct results for special cases in non-nearest
+   rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
+   This may be set to 0 if there is no fenv support or if math functions only
+   get called in round to nearest mode.  */
+# define WANT_ROUNDING 1
+#endif
+#ifndef WANT_ERRNO
+/* If defined to 1, set errno in math functions according to ISO C.  Many math
+   libraries do not set errno, so this is 0 by default.  It may need to be
+   set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
+# define WANT_ERRNO 0
+#endif
+#ifndef WANT_ERRNO_UFLOW
+/* Set errno to ERANGE if result underflows to 0 (in all rounding modes).  */
+# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
+#endif
+
+/* Compiler can inline round as a single instruction.  */
+#ifndef HAVE_FAST_ROUND
+# if __aarch64__
+#   define HAVE_FAST_ROUND 1
+# else
+#   define HAVE_FAST_ROUND 0
+# endif
+#endif
+
+/* Compiler can inline lround, but not (long)round(x).  */
+#ifndef HAVE_FAST_LROUND
+# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
+#   define HAVE_FAST_LROUND 1
+# else
+#   define HAVE_FAST_LROUND 0
+# endif
+#endif
+
+/* Compiler can inline fma as a single instruction.  */
+#ifndef HAVE_FAST_FMA
+# if defined FP_FAST_FMA || __aarch64__
+#   define HAVE_FAST_FMA 1
+# else
+#   define HAVE_FAST_FMA 0
+# endif
+#endif
+
+/* Provide *_finite symbols and some of the glibc hidden symbols
+   so libmathlib can be used with binaries compiled against glibc
+   to interpose math functions with both static and dynamic linking.  */
+#ifndef USE_GLIBC_ABI
+# if __GNUC__
+#   define USE_GLIBC_ABI 1
+# else
+#   define USE_GLIBC_ABI 0
+# endif
+#endif
+
+/* Optionally used extensions.  */
+#ifdef __GNUC__
+# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
+# define NOINLINE __attribute__ ((noinline))
+# define UNUSED __attribute__ ((unused))
+# define likely(x) __builtin_expect (!!(x), 1)
+# define unlikely(x) __builtin_expect (x, 0)
+# if __GNUC__ >= 9
+#   define attribute_copy(f) __attribute__ ((copy (f)))
+# else
+#   define attribute_copy(f)
+# endif
+# define strong_alias(f, a) \
+  extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
+# define hidden_alias(f, a) \
+  extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
+  attribute_copy (f);
+#else
+# define HIDDEN
+# define NOINLINE
+# define UNUSED
+# define likely(x) (x)
+# define unlikely(x) (x)
+#endif
+
+/* Return ptr but hide its value from the compiler so accesses through it
+   cannot be optimized based on the contents.  */
+#define ptr_barrier(ptr)                                                      \
+  ({                                                                          \
+    __typeof (ptr) __ptr = (ptr);                                             \
+    __asm("" : "+r"(__ptr));                                                  \
+    __ptr;                                                                    \
+  })
+
+#if HAVE_FAST_ROUND
+/* When set, the roundtoint and converttoint functions are provided with
+   the semantics documented below.  */
+# define TOINT_INTRINSICS 1
+
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+   consistently with converttoint so the results match.  If the result
+   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
+static inline double_t
+roundtoint (double_t x)
+{
+  return round (x);
+}
+
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+   consistently with roundtoint.  If the result is not representible in an
+   int32_t then the semantics is unspecified.  */
+static inline int32_t
+converttoint (double_t x)
+{
+# if HAVE_FAST_LROUND
+  return lround (x);
+# else
+  return (long) round (x);
+# endif
+}
+#endif
+
+static inline uint32_t
+asuint (float f)
+{
+  union
+  {
+    float f;
+    uint32_t i;
+  } u = {f};
+  return u.i;
+}
+
+static inline float
+asfloat (uint32_t i)
+{
+  union
+  {
+    uint32_t i;
+    float f;
+  } u = {i};
+  return u.f;
+}
+
+static inline uint64_t
+asuint64 (double f)
+{
+  union
+  {
+    double f;
+    uint64_t i;
+  } u = {f};
+  return u.i;
+}
+
+static inline double
+asdouble (uint64_t i)
+{
+  union
+  {
+    uint64_t i;
+    double f;
+  } u = {i};
+  return u.f;
+}
+
+#ifndef IEEE_754_2008_SNAN
+# define IEEE_754_2008_SNAN 1
+#endif
+static inline int
+issignalingf_inline (float x)
+{
+  uint32_t ix = asuint (x);
+  if (!IEEE_754_2008_SNAN)
+    return (ix & 0x7fc00000) == 0x7fc00000;
+  return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
+}
+
+static inline int
+issignaling_inline (double x)
+{
+  uint64_t ix = asuint64 (x);
+  if (!IEEE_754_2008_SNAN)
+    return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
+  return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
+}
+
+#if __aarch64__ && __GNUC__
+/* Prevent the optimization of a floating-point expression.  */
+static inline float
+opt_barrier_float (float x)
+{
+  __asm__ __volatile__ ("" : "+w" (x));
+  return x;
+}
+static inline double
+opt_barrier_double (double x)
+{
+  __asm__ __volatile__ ("" : "+w" (x));
+  return x;
+}
+/* Force the evaluation of a floating-point expression for its side-effect.  */
+static inline void
+force_eval_float (float x)
+{
+  __asm__ __volatile__ ("" : "+w" (x));
+}
+static inline void
+force_eval_double (double x)
+{
+  __asm__ __volatile__ ("" : "+w" (x));
+}
+#else
+static inline float
+opt_barrier_float (float x)
+{
+  volatile float y = x;
+  return y;
+}
+static inline double
+opt_barrier_double (double x)
+{
+  volatile double y = x;
+  return y;
+}
+static inline void
+force_eval_float (float x)
+{
+  volatile float y UNUSED = x;
+}
+static inline void
+force_eval_double (double x)
+{
+  volatile double y UNUSED = x;
+}
+#endif
+
+/* Evaluate an expression as the specified type, normally a type
+   cast should be enough, but compilers implement non-standard
+   excess-precision handling, so when FLT_EVAL_METHOD != 0 then
+   these functions may need to be customized.  */
+static inline float
+eval_as_float (float x)
+{
+  return x;
+}
+static inline double
+eval_as_double (double x)
+{
+  return x;
+}
+
+/* Error handling tail calls for special cases, with a sign argument.
+   The sign of the return value is set if the argument is non-zero.  */
+
+/* The result overflows.  */
+HIDDEN float __math_oflowf (uint32_t);
+/* The result underflows to 0 in nearest rounding mode.  */
+HIDDEN float __math_uflowf (uint32_t);
+/* The result underflows to 0 in some directed rounding mode only.  */
+HIDDEN float __math_may_uflowf (uint32_t);
+/* Division by zero.  */
+HIDDEN float __math_divzerof (uint32_t);
+/* The result overflows.  */
+HIDDEN double __math_oflow (uint32_t);
+/* The result underflows to 0 in nearest rounding mode.  */
+HIDDEN double __math_uflow (uint32_t);
+/* The result underflows to 0 in some directed rounding mode only.  */
+HIDDEN double __math_may_uflow (uint32_t);
+/* Division by zero.  */
+HIDDEN double __math_divzero (uint32_t);
+
+/* Error handling using input checking.  */
+
+/* Invalid input unless it is a quiet NaN.  */
+HIDDEN float __math_invalidf (float);
+/* Invalid input unless it is a quiet NaN.  */
+HIDDEN double __math_invalid (double);
+/* Invalid input unless it is a quiet NaN.  */
+HIDDEN long double __math_invalidl (long double);
+
+/* Error handling using output checking, only for errno setting.  */
+
+/* Check if the result overflowed to infinity.  */
+HIDDEN double __math_check_oflow (double);
+/* Check if the result underflowed to 0.  */
+HIDDEN double __math_check_uflow (double);
+
+/* Check if the result overflowed to infinity.  */
+static inline double
+check_oflow (double x)
+{
+  return WANT_ERRNO ? __math_check_oflow (x) : x;
+}
+
+/* Check if the result underflowed to 0.  */
+static inline double
+check_uflow (double x)
+{
+  return WANT_ERRNO ? __math_check_uflow (x) : x;
+}
+
+/* Check if the result overflowed to infinity.  */
+HIDDEN float __math_check_oflowf (float);
+/* Check if the result underflowed to 0.  */
+HIDDEN float __math_check_uflowf (float);
+
+/* Check if the result overflowed to infinity.  */
+static inline float
+check_oflowf (float x)
+{
+  return WANT_ERRNO ? __math_check_oflowf (x) : x;
+}
+
+/* Check if the result underflowed to 0.  */
+static inline float
+check_uflowf (float x)
+{
+  return WANT_ERRNO ? __math_check_uflowf (x) : x;
+}
+
+/* Shared between expf, exp2f and powf.  */
+#define EXP2F_TABLE_BITS 5
+#define EXP2F_POLY_ORDER 3
+extern const struct exp2f_data
+{
+  uint64_t tab[1 << EXP2F_TABLE_BITS];
+  double shift_scaled;
+  double poly[EXP2F_POLY_ORDER];
+  double shift;
+  double invln2_scaled;
+  double poly_scaled[EXP2F_POLY_ORDER];
+} __exp2f_data HIDDEN;
+
+#define LOGF_TABLE_BITS 4
+#define LOGF_POLY_ORDER 4
+extern const struct logf_data
+{
+  struct
+  {
+    double invc, logc;
+  } tab[1 << LOGF_TABLE_BITS];
+  double ln2;
+  double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
+} __logf_data HIDDEN;
+
+#define LOG2F_TABLE_BITS 4
+#define LOG2F_POLY_ORDER 4
+extern const struct log2f_data
+{
+  struct
+  {
+    double invc, logc;
+  } tab[1 << LOG2F_TABLE_BITS];
+  double poly[LOG2F_POLY_ORDER];
+} __log2f_data HIDDEN;
+
+#define POWF_LOG2_TABLE_BITS 4
+#define POWF_LOG2_POLY_ORDER 5
+#if TOINT_INTRINSICS
+# define POWF_SCALE_BITS EXP2F_TABLE_BITS
+#else
+# define POWF_SCALE_BITS 0
+#endif
+#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
+extern const struct powf_log2_data
+{
+  struct
+  {
+    double invc, logc;
+  } tab[1 << POWF_LOG2_TABLE_BITS];
+  double poly[POWF_LOG2_POLY_ORDER];
+} __powf_log2_data HIDDEN;
+
+
+#define EXP_TABLE_BITS 7
+#define EXP_POLY_ORDER 5
+/* Use polynomial that is optimized for a wider input range.  This may be
+   needed for good precision in non-nearest rounding and !TOINT_INTRINSICS.  */
+#define EXP_POLY_WIDE 0
+/* Use close to nearest rounding toint when !TOINT_INTRINSICS.  This may be
+   needed for good precision in non-nearest rouning and !EXP_POLY_WIDE.  */
+#define EXP_USE_TOINT_NARROW 0
+#define EXP2_POLY_ORDER 5
+#define EXP2_POLY_WIDE 0
+/* Wider exp10 polynomial necessary for good precision in non-nearest rounding
+   and !TOINT_INTRINSICS.  */
+#define EXP10_POLY_WIDE 0
+extern const struct exp_data
+{
+  double invln2N;
+  double invlog10_2N;
+  double shift;
+  double negln2hiN;
+  double negln2loN;
+  double neglog10_2hiN;
+  double neglog10_2loN;
+  double poly[4]; /* Last four coefficients.  */
+  double exp2_shift;
+  double exp2_poly[EXP2_POLY_ORDER];
+  double exp10_poly[5];
+  uint64_t tab[2*(1 << EXP_TABLE_BITS)];
+} __exp_data HIDDEN;
+
+#define LOG_TABLE_BITS 7
+#define LOG_POLY_ORDER 6
+#define LOG_POLY1_ORDER 12
+extern const struct log_data
+{
+  double ln2hi;
+  double ln2lo;
+  double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
+  double poly1[LOG_POLY1_ORDER - 1];
+  struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
+#if !HAVE_FAST_FMA
+  struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
+#endif
+} __log_data HIDDEN;
+
+#define LOG2_TABLE_BITS 6
+#define LOG2_POLY_ORDER 7
+#define LOG2_POLY1_ORDER 11
+extern const struct log2_data
+{
+  double invln2hi;
+  double invln2lo;
+  double poly[LOG2_POLY_ORDER - 1];
+  double poly1[LOG2_POLY1_ORDER - 1];
+  struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
+#if !HAVE_FAST_FMA
+  struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
+#endif
+} __log2_data HIDDEN;
+
+#define POW_LOG_TABLE_BITS 7
+#define POW_LOG_POLY_ORDER 8
+extern const struct pow_log_data
+{
+  double ln2hi;
+  double ln2lo;
+  double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
+  /* Note: the pad field is unused, but allows slightly faster indexing.  */
+  struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
+} __pow_log_data HIDDEN;
+
+extern const struct erff_data
+{
+  float erff_poly_A[6];
+  float erff_poly_B[7];
+} __erff_data HIDDEN;
+
+#define ERF_POLY_A_ORDER 19
+#define ERF_POLY_A_NCOEFFS 10
+#define ERFC_POLY_C_NCOEFFS 16
+#define ERFC_POLY_D_NCOEFFS 18
+#define ERFC_POLY_E_NCOEFFS 14
+#define ERFC_POLY_F_NCOEFFS 17
+extern const struct erf_data
+{
+  double erf_poly_A[ERF_POLY_A_NCOEFFS];
+  double erf_ratio_N_A[5];
+  double erf_ratio_D_A[5];
+  double erf_ratio_N_B[7];
+  double erf_ratio_D_B[6];
+  double erfc_poly_C[ERFC_POLY_C_NCOEFFS];
+  double erfc_poly_D[ERFC_POLY_D_NCOEFFS];
+  double erfc_poly_E[ERFC_POLY_E_NCOEFFS];
+  double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
+} __erf_data HIDDEN;
+
+#define V_EXP_TABLE_BITS 7
+extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
+
+#define V_LOG_TABLE_BITS 7
+extern const struct v_log_data
+{
+  struct
+  {
+    double invc, logc;
+  } table[1 << V_LOG_TABLE_BITS];
+} __v_log_data HIDDEN;
+
+COSMOPOLITAN_C_END_
+#endif /* COSMOPOLITAN_LIBC_TINYMATH_ARM_H_ */
diff --git a/libc/tinymath/asin.c b/libc/tinymath/asin.c
index 0d5afb86f..761f33636 100644
--- a/libc/tinymath/asin.c
+++ b/libc/tinymath/asin.c
@@ -26,12 +26,9 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("fdlibm_notice");
+__static_yoink("musl_libc_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_asin.c */
 /*
diff --git a/libc/tinymath/asinf.c b/libc/tinymath/asinf.c
index 36e4320af..ff0eed85a 100644
--- a/libc/tinymath/asinf.c
+++ b/libc/tinymath/asinf.c
@@ -28,14 +28,9 @@
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("fdlibm_notice");
+__static_yoink("musl_libc_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_asinf.c */
 /*
diff --git a/libc/tinymath/asinh.c b/libc/tinymath/asinh.c
index 62d70372b..fd9967fff 100644
--- a/libc/tinymath/asinh.c
+++ b/libc/tinymath/asinh.c
@@ -28,12 +28,7 @@
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
 #include "libc/tinymath/freebsd.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns inverse hyperbolic sine of 𝑥.
diff --git a/libc/tinymath/asinhf.c b/libc/tinymath/asinhf.c
index bd66d94cf..840e40852 100644
--- a/libc/tinymath/asinhf.c
+++ b/libc/tinymath/asinhf.c
@@ -27,12 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns inverse hyperbolic sine of 𝑥.
diff --git a/libc/tinymath/asinhl.c b/libc/tinymath/asinhl.c
index 16d2790e6..65651f9f4 100644
--- a/libc/tinymath/asinhl.c
+++ b/libc/tinymath/asinhl.c
@@ -5,13 +5,6 @@
 │ FreeBSD lib/msun/src/s_asinhl.c                                              │
 │ Converted to ldbl by David Schultz <das@FreeBSD.ORG> and Bruce D. Evans.     │
 │                                                                              │
-│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.            │
-│                                                                              │
-│ Developed at SunPro, a Sun Microsystems, Inc. business.                      │
-│ Permission to use, copy, modify, and distribute this                         │
-│ software is freely granted, provided that this notice                        │
-│ is preserved.                                                                │
-│                                                                              │
 │ Copyright (c) 1992-2023 The FreeBSD Project.                                 │
 │                                                                              │
 │ Redistribution and use in source and binary forms, with or without           │
@@ -35,19 +28,19 @@
 │ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF       │
 │ SUCH DAMAGE.                                                                 │
 │                                                                              │
+│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.            │
+│                                                                              │
+│ Developed at SunPro, a Sun Microsystems, Inc. business.                      │
+│ Permission to use, copy, modify, and distribute this                         │
+│ software is freely granted, provided that this notice                        │
+│ is preserved.                                                                │
+│                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/math.h"
 #include "libc/tinymath/freebsd.internal.h"
-#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("freebsd_libm_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
 
 /* EXP_LARGE is the threshold above which we use asinh(x) ~= log(2x). */
 /* EXP_TINY is the threshold below which we use asinh(x) ~= x. */
diff --git a/libc/tinymath/asinl.c b/libc/tinymath/asinl.c
index 07ff3f32c..bec55a702 100644
--- a/libc/tinymath/asinl.c
+++ b/libc/tinymath/asinl.c
@@ -30,15 +30,8 @@
 #include "libc/tinymath/invtrigl.internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("fdlibm_notice");
+__static_yoink("musl_libc_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_asinl.c */
 /*
diff --git a/libc/tinymath/atan.c b/libc/tinymath/atan.c
index 9314d1ba1..bfcf1fb67 100644
--- a/libc/tinymath/atan.c
+++ b/libc/tinymath/atan.c
@@ -27,15 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_atan.c */
 /*
diff --git a/libc/tinymath/atan2.c b/libc/tinymath/atan2.c
index 663d979aa..04b79f109 100644
--- a/libc/tinymath/atan2.c
+++ b/libc/tinymath/atan2.c
@@ -38,14 +38,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/freebsd.internal.h"
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("fdlibm_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* atan2(y,x)
  * Method :
diff --git a/libc/tinymath/atan2f.c b/libc/tinymath/atan2f.c
index e62ead380..fe60ca348 100644
--- a/libc/tinymath/atan2f.c
+++ b/libc/tinymath/atan2f.c
@@ -1,182 +1,120 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
-│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8                               :vi │
+/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
+│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│ FreeBSD lib/msun/src/s_asinhl.c                                              │
+│ Converted to ldbl by David Schultz <das@FreeBSD.ORG> and Bruce D. Evans.     │
 │                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
+│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.            │
 │                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
+│ Developed at SunPro, a Sun Microsystems, Inc. business.                      │
+│ Permission to use, copy, modify, and distribute this                         │
+│ software is freely granted, provided that this notice                        │
+│ is preserved.                                                                │
 │                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
+│ Copyright (c) 1992-2023 The FreeBSD Project.                                 │
+│                                                                              │
+│ Redistribution and use in source and binary forms, with or without           │
+│ modification, are permitted provided that the following conditions           │
+│ are met:                                                                     │
+│ 1. Redistributions of source code must retain the above copyright            │
+│    notice, this list of conditions and the following disclaimer.             │
+│ 2. Redistributions in binary form must reproduce the above copyright         │
+│    notice, this list of conditions and the following disclaimer in the       │
+│    documentation and/or other materials provided with the distribution.      │
+│                                                                              │
+│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND       │
+│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        │
+│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   │
+│ ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE      │
+│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL   │
+│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS      │
+│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        │
+│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT   │
+│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    │
+│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF       │
+│ SUCH DAMAGE.                                                                 │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/atanf_common.internal.h"
-#include "libc/tinymath/internal.h"
+#include "libc/tinymath/freebsd.internal.h"
+__static_yoink("freebsd_libm_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-Optimized Routines (MIT License)\\n\
-Copyright 2022 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+static volatile float
+tiny  = 1.0e-30;
+static const float
+zero  = 0.0,
+pi_o_4  = 7.8539818525e-01, /* 0x3f490fdb */
+pi_o_2  = 1.5707963705e+00, /* 0x3fc90fdb */
+pi      = 3.1415927410e+00; /* 0x40490fdb */
+static volatile float
+pi_lo   = -8.7422776573e-08; /* 0xb3bbbd2e */
 
-#define Pi (0x1.921fb6p+1f)
-#define PiOver2 (0x1.921fb6p+0f)
-#define PiOver4 (0x1.921fb6p-1f)
-#define SignMask (0x80000000)
-
-/* We calculate atan2f by P(n/d), where n and d are similar to the input
-   arguments, and P is a polynomial. The polynomial may underflow.
-   POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and d
-   for which P underflows, and is used to special-case such inputs.  */
-#define POLY_UFLOW_BOUND 24
-
-static inline int32_t
-biased_exponent (float f)
-{
-  uint32_t fi = asuint (f);
-  int32_t ex = (int32_t) ((fi & 0x7f800000) >> 23);
-  if (UNLIKELY (ex == 0))
-    {
-      /* Subnormal case - we still need to get the exponent right for subnormal
-	 numbers as division may take us back inside the normal range.  */
-      return ex - __builtin_clz (fi << 9);
-    }
-  return ex;
-}
-
-/* Fast implementation of scalar atan2f. Largest observed error is
-   2.88ulps in [99.0, 101.0] x [99.0, 101.0]:
-   atan2f(0x1.9332d8p+6, 0x1.8cb6c4p+6) got 0x1.964646p-1
-				       want 0x1.964640p-1.  */
+/**
+ * Returns arc tangent of 𝑦/𝑥.
+ */
 float
-atan2f (float y, float x)
+atan2f(float y, float x)
 {
-  uint32_t ix = asuint (x);
-  uint32_t iy = asuint (y);
+	float z;
+	int32_t k,m,hx,hy,ix,iy;
 
-  uint32_t sign_x = ix & SignMask;
-  uint32_t sign_y = iy & SignMask;
+	GET_FLOAT_WORD(hx,x);
+	ix = hx&0x7fffffff;
+	GET_FLOAT_WORD(hy,y);
+	iy = hy&0x7fffffff;
+	if((ix>0x7f800000)||
+	   (iy>0x7f800000))	/* x or y is NaN */
+	    return nan_mix(x, y);
+	if(hx==0x3f800000) return atanf(y);   /* x=1.0 */
+	m = ((hy>>31)&1)|((hx>>30)&2);	/* 2*sign(x)+sign(y) */
 
-  uint32_t iax = ix & ~SignMask;
-  uint32_t iay = iy & ~SignMask;
-
-  /* x or y is NaN.  */
-  if ((iax > 0x7f800000) || (iay > 0x7f800000))
-    return x + y;
-
-  /* m = 2 * sign(x) + sign(y).  */
-  uint32_t m = ((iy >> 31) & 1) | ((ix >> 30) & 2);
-
-  /* The following follows glibc ieee754 implementation, except
-     that we do not use +-tiny shifts (non-nearest rounding mode).  */
-
-  int32_t exp_diff = biased_exponent (x) - biased_exponent (y);
-
-  /* Special case for (x, y) either on or very close to the x axis. Either y =
-     0, or y is tiny and x is huge (difference in exponents >=
-     POLY_UFLOW_BOUND). In the second case, we only want to use this special
-     case when x is negative (i.e. quadrants 2 or 3).  */
-  if (UNLIKELY (iay == 0 || (exp_diff >= POLY_UFLOW_BOUND && m >= 2)))
-    {
-      switch (m)
-	{
-	case 0:
-	case 1:
-	  return y; /* atan(+-0,+anything)=+-0.  */
-	case 2:
-	  return Pi; /* atan(+0,-anything) = pi.  */
-	case 3:
-	  return -Pi; /* atan(-0,-anything) =-pi.  */
-	}
-    }
-  /* Special case for (x, y) either on or very close to the y axis. Either x =
-     0, or x is tiny and y is huge (difference in exponents >=
-     POLY_UFLOW_BOUND).  */
-  if (UNLIKELY (iax == 0 || exp_diff <= -POLY_UFLOW_BOUND))
-    return sign_y ? -PiOver2 : PiOver2;
-
-  /* x is INF.  */
-  if (iax == 0x7f800000)
-    {
-      if (iay == 0x7f800000)
-	{
-	  switch (m)
-	    {
-	    case 0:
-	      return PiOver4; /* atan(+INF,+INF).  */
-	    case 1:
-	      return -PiOver4; /* atan(-INF,+INF).  */
-	    case 2:
-	      return 3.0f * PiOver4; /* atan(+INF,-INF).  */
-	    case 3:
-	      return -3.0f * PiOver4; /* atan(-INF,-INF).  */
+    /* when y = 0 */
+	if(iy==0) {
+	    switch(m) {
+		case 0:
+		case 1: return y; 	/* atan(+-0,+anything)=+-0 */
+		case 2: return  pi+tiny;/* atan(+0,-anything) = pi */
+		case 3: return -pi-tiny;/* atan(-0,-anything) =-pi */
 	    }
 	}
-      else
-	{
-	  switch (m)
-	    {
-	    case 0:
-	      return 0.0f; /* atan(+...,+INF).  */
-	    case 1:
-	      return -0.0f; /* atan(-...,+INF).  */
-	    case 2:
-	      return Pi; /* atan(+...,-INF).  */
-	    case 3:
-	      return -Pi; /* atan(-...,-INF).  */
+    /* when x = 0 */
+	if(ix==0) return (hy<0)?  -pi_o_2-tiny: pi_o_2+tiny;
+
+    /* when x is INF */
+	if(ix==0x7f800000) {
+	    if(iy==0x7f800000) {
+		switch(m) {
+		    case 0: return  pi_o_4+tiny;/* atan(+INF,+INF) */
+		    case 1: return -pi_o_4-tiny;/* atan(-INF,+INF) */
+		    case 2: return  (float)3.0*pi_o_4+tiny;/*atan(+INF,-INF)*/
+		    case 3: return (float)-3.0*pi_o_4-tiny;/*atan(-INF,-INF)*/
+		}
+	    } else {
+		switch(m) {
+		    case 0: return  zero  ;	/* atan(+...,+INF) */
+		    case 1: return -zero  ;	/* atan(-...,+INF) */
+		    case 2: return  pi+tiny  ;	/* atan(+...,-INF) */
+		    case 3: return -pi-tiny  ;	/* atan(-...,-INF) */
+		}
 	    }
 	}
-    }
-  /* y is INF.  */
-  if (iay == 0x7f800000)
-    return sign_y ? -PiOver2 : PiOver2;
+    /* when y is INF */
+	if(iy==0x7f800000) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
 
-  uint32_t sign_xy = sign_x ^ sign_y;
-
-  float ax = asfloat (iax);
-  float ay = asfloat (iay);
-
-  bool pred_aygtax = (ay > ax);
-
-  /* Set up z for call to atanf.  */
-  float n = pred_aygtax ? -ax : ay;
-  float d = pred_aygtax ? ay : ax;
-  float z = n / d;
-
-  float ret;
-  if (UNLIKELY (m < 2 && exp_diff >= POLY_UFLOW_BOUND))
-    {
-      /* If (x, y) is very close to x axis and x is positive, the polynomial
-	 will underflow and evaluate to z.  */
-      ret = z;
-    }
-  else
-    {
-      /* Work out the correct shift.  */
-      float shift = sign_x ? -2.0f : 0.0f;
-      shift = pred_aygtax ? shift + 1.0f : shift;
-      shift *= PiOver2;
-
-      ret = eval_poly (z, z, shift);
-    }
-
-  /* Account for the sign of x and y.  */
-  return asfloat (asuint (ret) ^ sign_xy);
+    /* compute y/x */
+	k = (iy-ix)>>23;
+	if(k > 26) {			/* |y/x| >  2**26 */
+	    z=pi_o_2+(float)0.5*pi_lo;
+	    m&=1;
+	}
+	else if(k<-26&&hx<0) z=0.0; 	/* 0 > |y|/x > -2**-26 */
+	else z=atanf(fabsf(y/x));	/* safe to do y/x */
+	switch (m) {
+	    case 0: return       z  ;	/* atan(+,+) */
+	    case 1: return      -z  ;	/* atan(-,+) */
+	    case 2: return  pi-(z-pi_lo);/* atan(+,-) */
+	    default: /* case 3 */
+	    	    return  (z-pi_lo)-pi;/* atan(-,-) */
+	}
 }
diff --git a/libc/tinymath/atan2l.c b/libc/tinymath/atan2l.c
index f2f1cac8c..f8b61f6d8 100644
--- a/libc/tinymath/atan2l.c
+++ b/libc/tinymath/atan2l.c
@@ -30,15 +30,9 @@
 #include "libc/tinymath/invtrigl.internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_atan2l.c */
 /*
diff --git a/libc/tinymath/atan_common.internal.h b/libc/tinymath/atan_common.internal.h
deleted file mode 100644
index 57c6aaf01..000000000
--- a/libc/tinymath/atan_common.internal.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_
-#include "libc/tinymath/atan_data.internal.h"
-#include "libc/tinymath/estrin_wrap.internal.h"
-#include "libc/tinymath/horner.internal.h"
-COSMOPOLITAN_C_START_
-// clang-format off
-
-/*
- * Double-precision polynomial evaluation function for scalar and vector atan(x)
- * and atan2(y,x).
- *
- * Copyright (c) 2021-2023, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
-
-#if WANT_VMATH
-
-#define DBL_T float64x2_t
-#define P(i) v_f64 (__atan_poly_data.poly[i])
-
-#else
-
-#define DBL_T double
-#define P(i) __atan_poly_data.poly[i]
-
-#endif
-
-/* Polynomial used in fast atan(x) and atan2(y,x) implementations
-   The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
-static inline DBL_T
-eval_poly (DBL_T z, DBL_T az, DBL_T shift)
-{
-  /* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
-     full scheme to avoid underflow in x^16.  */
-  DBL_T z2 = z * z;
-  DBL_T x2 = z2 * z2;
-  DBL_T x4 = x2 * x2;
-  DBL_T x8 = x4 * x4;
-  DBL_T y
-    = FMA (ESTRIN_11_ (z2, x2, x4, x8, P, 8), x8, ESTRIN_7 (z2, x2, x4, P));
-
-  /* Finalize. y = shift + z + z^3 * P(z^2).  */
-  y = FMA (y, z2 * az, az);
-  y = y + shift;
-
-  return y;
-}
-
-#undef DBL_T
-#undef FMA
-#undef P
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_COMMON_H_ */
diff --git a/libc/tinymath/atan_data.internal.h b/libc/tinymath/atan_data.internal.h
deleted file mode 100644
index 5f3858465..000000000
--- a/libc/tinymath/atan_data.internal.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_
-COSMOPOLITAN_C_START_
-
-#define ATAN_POLY_NCOEFFS 20
-extern const struct atan_poly_data {
-  double poly[ATAN_POLY_NCOEFFS];
-} __atan_poly_data;
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATAN_DATA_H_ */
diff --git a/libc/tinymath/atanf.c b/libc/tinymath/atanf.c
index 59e773020..e2a687bf8 100644
--- a/libc/tinymath/atanf.c
+++ b/libc/tinymath/atanf.c
@@ -27,15 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_atanf.c */
 /*
diff --git a/libc/tinymath/atanf_common.internal.h b/libc/tinymath/atanf_common.internal.h
deleted file mode 100644
index 13368d3a1..000000000
--- a/libc/tinymath/atanf_common.internal.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_
-#include "libc/tinymath/atanf_data.internal.h"
-#include "libc/tinymath/estrin_wrap.internal.h"
-#include "libc/tinymath/hornerf.internal.h"
-COSMOPOLITAN_C_START_
-// clang-format off
-
-#if WANT_VMATH
-
-#define FLT_T float32x4_t
-#define P(i) v_f32 (__atanf_poly_data.poly[i])
-
-#else
-
-#define FLT_T float
-#define P(i) __atanf_poly_data.poly[i]
-
-#endif
-
-/* Polynomial used in fast atanf(x) and atan2f(y,x) implementations
-   The order 7 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
-static inline FLT_T
-eval_poly (FLT_T z, FLT_T az, FLT_T shift)
-{
-  /* Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
-     a standard implementation using z8 creates spurious underflow
-     in the very last fma (when z^8 is small enough).
-     Therefore, we split the last fma into a mul and and an fma.
-     Horner and single-level Estrin have higher errors that exceed
-     threshold.  */
-  FLT_T z2 = z * z;
-  FLT_T z4 = z2 * z2;
-
-  /* Then assemble polynomial.  */
-  FLT_T y = FMA (z4, z4 * ESTRIN_3_ (z2, z4, P, 4), ESTRIN_3 (z2, z4, P));
-
-  /* Finalize:
-     y = shift + z * P(z^2).  */
-  return FMA (y, z2 * az, az) + shift;
-}
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_COMMON_H_ */
diff --git a/libc/tinymath/atanf_data.internal.h b/libc/tinymath/atanf_data.internal.h
deleted file mode 100644
index ade0fd877..000000000
--- a/libc/tinymath/atanf_data.internal.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_
-COSMOPOLITAN_C_START_
-
-#define ATANF_POLY_NCOEFFS 8
-extern const struct atanf_poly_data {
-  float poly[ATANF_POLY_NCOEFFS];
-} __atanf_poly_data;
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_ATANF_DATA_H_ */
diff --git a/libc/tinymath/atanh.c b/libc/tinymath/atanh.c
index 36bd107dc..f05f0466d 100644
--- a/libc/tinymath/atanh.c
+++ b/libc/tinymath/atanh.c
@@ -27,12 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns inverse hyperbolic tangent of 𝑥.
diff --git a/libc/tinymath/atanhf.c b/libc/tinymath/atanhf.c
index fb00d8452..f2431cc90 100644
--- a/libc/tinymath/atanhf.c
+++ b/libc/tinymath/atanhf.c
@@ -27,12 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns inverse hyperbolic tangent of 𝑥.
diff --git a/libc/tinymath/atanhl.c b/libc/tinymath/atanhl.c
index 208ff9b9c..e10b33c6a 100644
--- a/libc/tinymath/atanhl.c
+++ b/libc/tinymath/atanhl.c
@@ -29,12 +29,7 @@
 #include "libc/tinymath/feval.internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns inverse hyperbolic tangent of 𝑥.
diff --git a/libc/tinymath/atanl.c b/libc/tinymath/atanl.c
index ced707ff5..6f3a29942 100644
--- a/libc/tinymath/atanl.c
+++ b/libc/tinymath/atanl.c
@@ -29,15 +29,8 @@
 #include "libc/tinymath/internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_atanl.c */
 /*
diff --git a/libc/tinymath/cacos.c b/libc/tinymath/cacos.c
deleted file mode 100644
index 9ad032fd7..000000000
--- a/libc/tinymath/cacos.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/complex.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-// FIXME: Hull et al. "Implementing the complex arcsine and arccosine functions using exception handling" 1997
-
-/* acos(z) = pi/2 - asin(z) */
-
-double complex cacos(double complex z)
-{
-	z = casin(z);
-	return CMPLX(M_PI_2 - creal(z), -cimag(z));
-}
diff --git a/libc/tinymath/cacosf.c b/libc/tinymath/cacosf.c
deleted file mode 100644
index 1b6bfcd3e..000000000
--- a/libc/tinymath/cacosf.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/complex.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-// FIXME
-
-float complex cacosf(float complex z)
-{
-	z = casinf(z);
-	return CMPLXF((float)M_PI_2 - crealf(z), -cimagf(z));
-}
diff --git a/libc/tinymath/cacosh.c b/libc/tinymath/cacosh.c
deleted file mode 100644
index 4eccd729c..000000000
--- a/libc/tinymath/cacosh.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/complex.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/* acosh(z) = i acos(z) */
-double complex cacosh(double complex z)
-{
-	int zineg = signbit(cimag(z));
-	z = cacos(z);
-	if (zineg) return CMPLX(cimag(z), -creal(z));
-	else       return CMPLX(-cimag(z), creal(z));
-}
-
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-__weak_reference(cacosh, cacoshl);
-#endif
diff --git a/libc/tinymath/cacoshf.c b/libc/tinymath/cacoshf.c
deleted file mode 100644
index a029f08da..000000000
--- a/libc/tinymath/cacoshf.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/complex.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-float complex cacoshf(float complex z)
-{
-	int zineg = signbit(cimagf(z));
-	z = cacosf(z);
-	if (zineg) return CMPLXF(cimagf(z), -crealf(z));
-	else       return CMPLXF(-cimagf(z), crealf(z));
-}
diff --git a/libc/tinymath/cacoshl.c b/libc/tinymath/cacoshl.c
index 1c5e96cb4..5e129462a 100644
--- a/libc/tinymath/cacoshl.c
+++ b/libc/tinymath/cacoshl.c
@@ -28,12 +28,7 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 long double complex cacoshl(long double complex z)
 {
diff --git a/libc/tinymath/cacosl.c b/libc/tinymath/cacosl.c
index 0e09d4753..adfff0b5d 100644
--- a/libc/tinymath/cacosl.c
+++ b/libc/tinymath/cacosl.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 long double complex cacosl(long double complex z) {
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
diff --git a/libc/tinymath/casinf.c b/libc/tinymath/casinf.c
deleted file mode 100644
index 1d0b35414..000000000
--- a/libc/tinymath/casinf.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/complex.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
-
-// FIXME
-
-float complex casinf(float complex z)
-{
-	float complex w;
-	float x, y;
-
-	x = crealf(z);
-	y = cimagf(z);
-	w = CMPLXF(1.0 - (x - y)*(x + y), -2.0*x*y);
-	float complex r = clogf(CMPLXF(-y, x) + csqrtf(w));
-	return CMPLXF(cimagf(r), -crealf(r));
-}
diff --git a/libc/tinymath/casinh.c b/libc/tinymath/casinh.c
deleted file mode 100644
index 5a11878a7..000000000
--- a/libc/tinymath/casinh.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/complex.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
-
-/* asinh(z) = -i asin(i z) */
-
-double complex casinh(double complex z)
-{
-	z = casin(CMPLX(-cimag(z), creal(z)));
-	return CMPLX(cimag(z), -creal(z));
-}
diff --git a/libc/tinymath/casinhf.c b/libc/tinymath/casinhf.c
deleted file mode 100644
index 503eb3437..000000000
--- a/libc/tinymath/casinhf.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/complex.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
-
-float complex casinhf(float complex z)
-{
-	z = casinf(CMPLXF(-cimagf(z), crealf(z)));
-	return CMPLXF(cimagf(z), -crealf(z));
-}
diff --git a/libc/tinymath/casinhl.c b/libc/tinymath/casinhl.c
index 5766ff6d9..193d8d53a 100644
--- a/libc/tinymath/casinhl.c
+++ b/libc/tinymath/casinhl.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 long double complex casinhl(long double complex z) {
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
diff --git a/libc/tinymath/casinl.c b/libc/tinymath/casinl.c
index 618d236be..78913278d 100644
--- a/libc/tinymath/casinl.c
+++ b/libc/tinymath/casinl.c
@@ -27,12 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 long double complex casinl(long double complex z)
 {
diff --git a/libc/tinymath/catan.c b/libc/tinymath/catan.c
deleted file mode 100644
index b9c6b7faa..000000000
--- a/libc/tinymath/catan.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/complex.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-
-/* origin: OpenBSD /usr/src/lib/libm/src/s_catan.c */
-/*
- * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-/*
- *      Complex circular arc tangent
- *
- *
- * SYNOPSIS:
- *
- * double complex catan();
- * double complex z, w;
- *
- * w = catan (z);
- *
- *
- * DESCRIPTION:
- *
- * If
- *     z = x + iy,
- *
- * then
- *          1       (    2x     )
- * Re w  =  - arctan(-----------)  +  k PI
- *          2       (     2    2)
- *                  (1 - x  - y )
- *
- *               ( 2         2)
- *          1    (x  +  (y+1) )
- * Im w  =  - log(------------)
- *          4    ( 2         2)
- *               (x  +  (y-1) )
- *
- * Where k is an arbitrary integer.
- *
- * catan(z) = -i catanh(iz).
- *
- * ACCURACY:
- *
- *                      Relative error:
- * arithmetic   domain     # trials      peak         rms
- *    DEC       -10,+10      5900       1.3e-16     7.8e-18
- *    IEEE      -10,+10     30000       2.3e-15     8.5e-17
- * The check catan( ctan(z) )  =  z, with |x| and |y| < PI/2,
- * had peak relative error 1.5e-16, rms relative error
- * 2.9e-17.  See also clog().
- */
-
-
-#define MAXNUM 1.0e308
-
-static const double DP1 = 3.14159265160560607910E0;
-static const double DP2 = 1.98418714791870343106E-9;
-static const double DP3 = 1.14423774522196636802E-17;
-
-static double _redupi(double x)
-{
-	double t;
-	long i;
-
-	t = x/M_PI;
-	if (t >= 0.0)
-		t += 0.5;
-	else
-		t -= 0.5;
-
-	i = t;  /* the multiple */
-	t = i;
-	t = ((x - t * DP1) - t * DP2) - t * DP3;
-	return t;
-}
-
-double complex catan(double complex z)
-{
-	double complex w;
-	double a, t, x, x2, y;
-
-	x = creal(z);
-	y = cimag(z);
-
-	x2 = x * x;
-	a = 1.0 - x2 - (y * y);
-
-	t = 0.5 * atan2(2.0 * x, a);
-	w = _redupi(t);
-
-	t = y - 1.0;
-	a = x2 + (t * t);
-
-	t = y + 1.0;
-	a = (x2 + t * t)/a;
-	w = CMPLX(w, 0.25 * log(a));
-	return w;
-}
-
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-__weak_reference(catan, catanl);
-#endif
diff --git a/libc/tinymath/catanf.c b/libc/tinymath/catanf.c
deleted file mode 100644
index ce5f2cdb0..000000000
--- a/libc/tinymath/catanf.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/complex.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
-/* origin: OpenBSD /usr/src/lib/libm/src/s_catanf.c */
-/*
- * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-/*
- *      Complex circular arc tangent
- *
- *
- * SYNOPSIS:
- *
- * float complex catanf();
- * float complex z, w;
- *
- * w = catanf( z );
- *
- *
- * DESCRIPTION:
- *
- * If
- *     z = x + iy,
- *
- * then
- *          1       (    2x     )
- * Re w  =  - arctan(-----------)  +  k PI
- *          2       (     2    2)
- *                  (1 - x  - y )
- *
- *               ( 2         2)
- *          1    (x  +  (y+1) )
- * Im w  =  - log(------------)
- *          4    ( 2         2)
- *               (x  +  (y-1) )
- *
- * Where k is an arbitrary integer.
- *
- *
- * ACCURACY:
- *
- *                      Relative error:
- * arithmetic   domain     # trials      peak         rms
- *    IEEE      -10,+10     30000        2.3e-6      5.2e-8
- */
-
-
-#define MAXNUMF 1.0e38F
-
-static const double DP1 = 3.140625;
-static const double DP2 = 9.67502593994140625E-4;
-static const double DP3 = 1.509957990978376432E-7;
-
-static float _redupif(float xx)
-{
-	float x, t;
-	long i;
-
-	x = xx;
-	t = x/(float)M_PI;
-	if (t >= 0.0f)
-		t += 0.5f;
-	else
-		t -= 0.5f;
-
-	i = t;  /* the multiple */
-	t = i;
-	t = ((x - t * DP1) - t * DP2) - t * DP3;
-	return t;
-}
-
-float complex catanf(float complex z)
-{
-	float complex w;
-	float a, t, x, x2, y;
-
-	x = crealf(z);
-	y = cimagf(z);
-
-	x2 = x * x;
-	a = 1.0f - x2 - (y * y);
-
-	t = 0.5f * atan2f(2.0f * x, a);
-	w = _redupif(t);
-
-	t = y - 1.0f;
-	a = x2 + (t * t);
-
-	t = y + 1.0f;
-	a = (x2 + (t * t))/a;
-	w = CMPLXF(w, 0.25f * logf(a));
-	return w;
-}
diff --git a/libc/tinymath/catanh.c b/libc/tinymath/catanh.c
deleted file mode 100644
index 087239547..000000000
--- a/libc/tinymath/catanh.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/complex.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
-
-/* atanh = -i atan(i z) */
-
-double complex catanh(double complex z)
-{
-	z = catan(CMPLX(-cimag(z), creal(z)));
-	return CMPLX(cimag(z), -creal(z));
-}
diff --git a/libc/tinymath/catanhf.c b/libc/tinymath/catanhf.c
deleted file mode 100644
index 90f491207..000000000
--- a/libc/tinymath/catanhf.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
-╚──────────────────────────────────────────────────────────────────────────────╝
-│                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
-│                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
-│                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
-│                                                                              │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/complex.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
-
-float complex catanhf(float complex z)
-{
-	z = catanf(CMPLXF(-cimagf(z), crealf(z)));
-	return CMPLXF(cimagf(z), -crealf(z));
-}
diff --git a/libc/tinymath/catanhl.c b/libc/tinymath/catanhl.c
index 299aadece..59c2a88fc 100644
--- a/libc/tinymath/catanhl.c
+++ b/libc/tinymath/catanhl.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 long double complex catanhl(long double complex z) {
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
diff --git a/libc/tinymath/catanl.c b/libc/tinymath/catanl.c
index 3f87d3a3b..633d155ea 100644
--- a/libc/tinymath/catanl.c
+++ b/libc/tinymath/catanl.c
@@ -22,15 +22,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("openbsd_libm_notice");
+__static_yoink("musl_libc_notice");
 
 /*
  *      Complex circular arc tangent
diff --git a/libc/tinymath/catrig.c b/libc/tinymath/catrig.c
new file mode 100644
index 000000000..84d301fe3
--- /dev/null
+++ b/libc/tinymath/catrig.c
@@ -0,0 +1,651 @@
+/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
+│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+╚──────────────────────────────────────────────────────────────────────────────╝
+│                                                                              │
+│ FreeBSD lib/msun/src/catrig.c                                                │
+│ Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG>            │
+│                                                                              │
+│ Redistribution and use in source and binary forms, with or without           │
+│ modification, are permitted provided that the following conditions           │
+│ are met:                                                                     │
+│ 1. Redistributions of source code must retain the above copyright            │
+│    notice, this list of conditions and the following disclaimer.             │
+│ 2. Redistributions in binary form must reproduce the above copyright         │
+│    notice, this list of conditions and the following disclaimer in the       │
+│    documentation and/or other materials provided with the distribution.      │
+│                                                                              │
+│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND       │
+│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        │
+│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   │
+│ ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE      │
+│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL   │
+│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS      │
+│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        │
+│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT   │
+│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    │
+│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF       │
+│ SUCH DAMAGE.                                                                 │
+│                                                                              │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/tinymath/freebsd.internal.h"
+__static_yoink("freebsd_complex_notice");
+
+#undef isinf
+#define isinf(x)	(fabs(x) == INFINITY)
+#undef isnan
+#define isnan(x)	((x) != (x))
+#define	raise_inexact()	do { volatile float _j = 1 + tiny; (void)_j; } while(0)
+#undef signbit
+#define signbit(x)	(__builtin_signbit(x))
+
+/* We need that DBL_EPSILON^2/128 is larger than FOUR_SQRT_MIN. */
+static const double
+A_crossover =		10, /* Hull et al suggest 1.5, but 10 works better */
+B_crossover =		0.6417,			/* suggested by Hull et al */
+FOUR_SQRT_MIN =		0x1p-509,		/* >= 4 * sqrt(DBL_MIN) */
+QUARTER_SQRT_MAX =	0x1p509,		/* <= sqrt(DBL_MAX) / 4 */
+m_e =			2.7182818284590452e0,	/*  0x15bf0a8b145769.0p-51 */
+m_ln2 =			6.9314718055994531e-1,	/*  0x162e42fefa39ef.0p-53 */
+pio2_hi =		1.5707963267948966e0,	/*  0x1921fb54442d18.0p-52 */
+RECIP_EPSILON =		1 / DBL_EPSILON,
+SQRT_3_EPSILON =	2.5809568279517849e-8,	/*  0x1bb67ae8584caa.0p-78 */
+SQRT_6_EPSILON =	3.6500241499888571e-8,	/*  0x13988e1409212e.0p-77 */
+SQRT_MIN =		0x1p-511;		/* >= sqrt(DBL_MIN) */
+
+static const volatile double
+pio2_lo =		6.1232339957367659e-17;	/*  0x11a62633145c07.0p-106 */
+static const volatile float
+tiny =			0x1p-100;
+
+static double complex clog_for_large_values(double complex z);
+
+/*
+ * Testing indicates that all these functions are accurate up to 4 ULP.
+ * The functions casin(h) and cacos(h) are about 2.5 times slower than asinh.
+ * The functions catan(h) are a little under 2 times slower than atanh.
+ *
+ * The code for casinh, casin, cacos, and cacosh comes first.  The code is
+ * rather complicated, and the four functions are highly interdependent.
+ *
+ * The code for catanh and catan comes at the end.  It is much simpler than
+ * the other functions, and the code for these can be disconnected from the
+ * rest of the code.
+ */
+
+/*
+ *			================================
+ *			| casinh, casin, cacos, cacosh |
+ *			================================
+ */
+
+/*
+ * The algorithm is very close to that in "Implementing the complex arcsine
+ * and arccosine functions using exception handling" by T. E. Hull, Thomas F.
+ * Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on
+ * Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335,
+ * http://dl.acm.org/citation.cfm?id=275324.
+ *
+ * Throughout we use the convention z = x + I*y.
+ *
+ * casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B)
+ * where
+ * A = (|z+I| + |z-I|) / 2
+ * B = (|z+I| - |z-I|) / 2 = y/A
+ *
+ * These formulas become numerically unstable:
+ *   (a) for Re(casinh(z)) when z is close to the line segment [-I, I] (that
+ *       is, Re(casinh(z)) is close to 0);
+ *   (b) for Im(casinh(z)) when z is close to either of the intervals
+ *       [I, I*infinity) or (-I*infinity, -I] (that is, |Im(casinh(z))| is
+ *       close to PI/2).
+ *
+ * These numerical problems are overcome by defining
+ * f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2
+ * Then if A < A_crossover, we use
+ *   log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1)))
+ *   A-1 = f(x, 1+y) + f(x, 1-y)
+ * and if B > B_crossover, we use
+ *   asin(B) = atan2(y, sqrt(A*A - y*y)) = atan2(y, sqrt((A+y)*(A-y)))
+ *   A-y = f(x, y+1) + f(x, y-1)
+ * where without loss of generality we have assumed that x and y are
+ * non-negative.
+ *
+ * Much of the difficulty comes because the intermediate computations may
+ * produce overflows or underflows.  This is dealt with in the paper by Hull
+ * et al by using exception handling.  We do this by detecting when
+ * computations risk underflow or overflow.  The hardest part is handling the
+ * underflows when computing f(a, b).
+ *
+ * Note that the function f(a, b) does not appear explicitly in the paper by
+ * Hull et al, but the idea may be found on pages 308 and 309.  Introducing the
+ * function f(a, b) allows us to concentrate many of the clever tricks in this
+ * paper into one function.
+ */
+
+/*
+ * Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2.
+ * Pass hypot(a, b) as the third argument.
+ */
+static inline double
+f(double a, double b, double hypot_a_b)
+{
+	if (b < 0)
+		return ((hypot_a_b - b) / 2);
+	if (b == 0)
+		return (a / 2);
+	return (a * a / (hypot_a_b + b) / 2);
+}
+
+/*
+ * All the hard work is contained in this function.
+ * x and y are assumed positive or zero, and less than RECIP_EPSILON.
+ * Upon return:
+ * rx = Re(casinh(z)) = -Im(cacos(y + I*x)).
+ * B_is_usable is set to 1 if the value of B is usable.
+ * If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y.
+ * If returning sqrt_A2my2 has potential to result in an underflow, it is
+ * rescaled, and new_y is similarly rescaled.
+ */
+static inline void
+do_hard_work(double x, double y, double *rx, int *B_is_usable, double *B,
+    double *sqrt_A2my2, double *new_y)
+{
+	double R, S, A; /* A, B, R, and S are as in Hull et al. */
+	double Am1, Amy; /* A-1, A-y. */
+
+	R = hypot(x, y + 1);		/* |z+I| */
+	S = hypot(x, y - 1);		/* |z-I| */
+
+	/* A = (|z+I| + |z-I|) / 2 */
+	A = (R + S) / 2;
+	/*
+	 * Mathematically A >= 1.  There is a small chance that this will not
+	 * be so because of rounding errors.  So we will make certain it is
+	 * so.
+	 */
+	if (A < 1)
+		A = 1;
+
+	if (A < A_crossover) {
+		/*
+		 * Am1 = fp + fm, where fp = f(x, 1+y), and fm = f(x, 1-y).
+		 * rx = log1p(Am1 + sqrt(Am1*(A+1)))
+		 */
+		if (y == 1 && x < DBL_EPSILON * DBL_EPSILON / 128) {
+			/*
+			 * fp is of order x^2, and fm = x/2.
+			 * A = 1 (inexactly).
+			 */
+			*rx = sqrt(x);
+		} else if (x >= DBL_EPSILON * fabs(y - 1)) {
+			/*
+			 * Underflow will not occur because
+			 * x >= DBL_EPSILON^2/128 >= FOUR_SQRT_MIN
+			 */
+			Am1 = f(x, 1 + y, R) + f(x, 1 - y, S);
+			*rx = log1p(Am1 + sqrt(Am1 * (A + 1)));
+		} else if (y < 1) {
+			/*
+			 * fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and
+			 * A = 1 (inexactly).
+			 */
+			*rx = x / sqrt((1 - y) * (1 + y));
+		} else {		/* if (y > 1) */
+			/*
+			 * A-1 = y-1 (inexactly).
+			 */
+			*rx = log1p((y - 1) + sqrt((y - 1) * (y + 1)));
+		}
+	} else {
+		*rx = log(A + sqrt(A * A - 1));
+	}
+
+	*new_y = y;
+
+	if (y < FOUR_SQRT_MIN) {
+		/*
+		 * Avoid a possible underflow caused by y/A.  For casinh this
+		 * would be legitimate, but will be picked up by invoking atan2
+		 * later on.  For cacos this would not be legitimate.
+		 */
+		*B_is_usable = 0;
+		*sqrt_A2my2 = A * (2 / DBL_EPSILON);
+		*new_y = y * (2 / DBL_EPSILON);
+		return;
+	}
+
+	/* B = (|z+I| - |z-I|) / 2 = y/A */
+	*B = y / A;
+	*B_is_usable = 1;
+
+	if (*B > B_crossover) {
+		*B_is_usable = 0;
+		/*
+		 * Amy = fp + fm, where fp = f(x, y+1), and fm = f(x, y-1).
+		 * sqrt_A2my2 = sqrt(Amy*(A+y))
+		 */
+		if (y == 1 && x < DBL_EPSILON / 128) {
+			/*
+			 * fp is of order x^2, and fm = x/2.
+			 * A = 1 (inexactly).
+			 */
+			*sqrt_A2my2 = sqrt(x) * sqrt((A + y) / 2);
+		} else if (x >= DBL_EPSILON * fabs(y - 1)) {
+			/*
+			 * Underflow will not occur because
+			 * x >= DBL_EPSILON/128 >= FOUR_SQRT_MIN
+			 * and
+			 * x >= DBL_EPSILON^2 >= FOUR_SQRT_MIN
+			 */
+			Amy = f(x, y + 1, R) + f(x, y - 1, S);
+			*sqrt_A2my2 = sqrt(Amy * (A + y));
+		} else if (y > 1) {
+			/*
+			 * fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and
+			 * A = y (inexactly).
+			 *
+			 * y < RECIP_EPSILON.  So the following
+			 * scaling should avoid any underflow problems.
+			 */
+			*sqrt_A2my2 = x * (4 / DBL_EPSILON / DBL_EPSILON) * y /
+			    sqrt((y + 1) * (y - 1));
+			*new_y = y * (4 / DBL_EPSILON / DBL_EPSILON);
+		} else {		/* if (y < 1) */
+			/*
+			 * fm = 1-y >= DBL_EPSILON, fp is of order x^2, and
+			 * A = 1 (inexactly).
+			 */
+			*sqrt_A2my2 = sqrt((1 - y) * (1 + y));
+		}
+	}
+}
+
+/*
+ * casinh(z) = z + O(z^3)   as z -> 0
+ *
+ * casinh(z) = sign(x)*clog(sign(x)*z) + O(1/z^2)   as z -> infinity
+ * The above formula works for the imaginary part as well, because
+ * Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3)
+ *    as z -> infinity, uniformly in y
+ */
+double complex
+casinh(double complex z)
+{
+	double x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y;
+	int B_is_usable;
+	double complex w;
+
+	x = creal(z);
+	y = cimag(z);
+	ax = fabs(x);
+	ay = fabs(y);
+
+	if (isnan(x) || isnan(y)) {
+		/* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */
+		if (isinf(x))
+			return (CMPLX(x, y + y));
+		/* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */
+		if (isinf(y))
+			return (CMPLX(y, x + x));
+		/* casinh(NaN + I*0) = NaN + I*0 */
+		if (y == 0)
+			return (CMPLX(x + x, y));
+		/*
+		 * All other cases involving NaN return NaN + I*NaN.
+		 * C99 leaves it optional whether to raise invalid if one of
+		 * the arguments is not NaN, so we opt not to raise it.
+		 */
+		return (CMPLX(nan_mix(x, y), nan_mix(x, y)));
+	}
+
+	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
+		/* clog...() will raise inexact unless x or y is infinite. */
+		if (signbit(x) == 0)
+			w = clog_for_large_values(z) + m_ln2;
+		else
+			w = clog_for_large_values(-z) + m_ln2;
+		return (CMPLX(copysign(creal(w), x), copysign(cimag(w), y)));
+	}
+
+	/* Avoid spuriously raising inexact for z = 0. */
+	if (x == 0 && y == 0)
+		return (z);
+
+	/* All remaining cases are inexact. */
+	raise_inexact();
+
+	if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
+		return (z);
+
+	do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y);
+	if (B_is_usable)
+		ry = asin(B);
+	else
+		ry = atan2(new_y, sqrt_A2my2);
+	return (CMPLX(copysign(rx, x), copysign(ry, y)));
+}
+
+/*
+ * casin(z) = reverse(casinh(reverse(z)))
+ * where reverse(x + I*y) = y + I*x = I*conj(z).
+ */
+double complex
+casin(double complex z)
+{
+	double complex w = casinh(CMPLX(cimag(z), creal(z)));
+
+	return (CMPLX(cimag(w), creal(w)));
+}
+
+/*
+ * cacos(z) = PI/2 - casin(z)
+ * but do the computation carefully so cacos(z) is accurate when z is
+ * close to 1.
+ *
+ * cacos(z) = PI/2 - z + O(z^3)   as z -> 0
+ *
+ * cacos(z) = -sign(y)*I*clog(z) + O(1/z^2)   as z -> infinity
+ * The above formula works for the real part as well, because
+ * Re(cacos(z)) = atan2(fabs(y), x) + O(y/z^3)
+ *    as z -> infinity, uniformly in y
+ */
+double complex
+cacos(double complex z)
+{
+	double x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x;
+	int sx, sy;
+	int B_is_usable;
+	double complex w;
+
+	x = creal(z);
+	y = cimag(z);
+	sx = signbit(x);
+	sy = signbit(y);
+	ax = fabs(x);
+	ay = fabs(y);
+
+	if (isnan(x) || isnan(y)) {
+		/* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */
+		if (isinf(x))
+			return (CMPLX(y + y, -INFINITY));
+		/* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */
+		if (isinf(y))
+			return (CMPLX(x + x, -y));
+		/* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */
+		if (x == 0)
+			return (CMPLX(pio2_hi + pio2_lo, y + y));
+		/*
+		 * All other cases involving NaN return NaN + I*NaN.
+		 * C99 leaves it optional whether to raise invalid if one of
+		 * the arguments is not NaN, so we opt not to raise it.
+		 */
+		return (CMPLX(nan_mix(x, y), nan_mix(x, y)));
+	}
+
+	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
+		/* clog...() will raise inexact unless x or y is infinite. */
+		w = clog_for_large_values(z);
+		rx = fabs(cimag(w));
+		ry = creal(w) + m_ln2;
+		if (sy == 0)
+			ry = -ry;
+		return (CMPLX(rx, ry));
+	}
+
+	/* Avoid spuriously raising inexact for z = 1. */
+	if (x == 1 && y == 0)
+		return (CMPLX(0, -y));
+
+	/* All remaining cases are inexact. */
+	raise_inexact();
+
+	if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
+		return (CMPLX(pio2_hi - (x - pio2_lo), -y));
+
+	do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x);
+	if (B_is_usable) {
+		if (sx == 0)
+			rx = acos(B);
+		else
+			rx = acos(-B);
+	} else {
+		if (sx == 0)
+			rx = atan2(sqrt_A2mx2, new_x);
+		else
+			rx = atan2(sqrt_A2mx2, -new_x);
+	}
+	if (sy == 0)
+		ry = -ry;
+	return (CMPLX(rx, ry));
+}
+
+/*
+ * cacosh(z) = I*cacos(z) or -I*cacos(z)
+ * where the sign is chosen so Re(cacosh(z)) >= 0.
+ */
+double complex
+cacosh(double complex z)
+{
+	double complex w;
+	double rx, ry;
+
+	w = cacos(z);
+	rx = creal(w);
+	ry = cimag(w);
+	/* cacosh(NaN + I*NaN) = NaN + I*NaN */
+	if (isnan(rx) && isnan(ry))
+		return (CMPLX(ry, rx));
+	/* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */
+	/* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */
+	if (isnan(rx))
+		return (CMPLX(fabs(ry), rx));
+	/* cacosh(0 + I*NaN) = NaN + I*NaN */
+	if (isnan(ry))
+		return (CMPLX(ry, ry));
+	return (CMPLX(fabs(ry), copysign(rx, cimag(z))));
+}
+
+/*
+ * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON.
+ */
+static double complex
+clog_for_large_values(double complex z)
+{
+	double x, y;
+	double ax, ay, t;
+
+	x = creal(z);
+	y = cimag(z);
+	ax = fabs(x);
+	ay = fabs(y);
+	if (ax < ay) {
+		t = ax;
+		ax = ay;
+		ay = t;
+	}
+
+	/*
+	 * Avoid overflow in hypot() when x and y are both very large.
+	 * Divide x and y by E, and then add 1 to the logarithm.  This
+	 * depends on E being larger than sqrt(2), since the return value of
+	 * hypot cannot overflow if neither argument is greater in magnitude
+	 * than 1/sqrt(2) of the maximum value of the return type.  Likewise
+	 * this determines the necessary threshold for using this method
+	 * (however, actually use 1/2 instead as it is simpler).
+	 *
+	 * Dividing by E causes an insignificant loss of accuracy; however
+	 * this method is still poor since it is uneccessarily slow.
+	 */
+	if (ax > DBL_MAX / 2)
+		return (CMPLX(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x)));
+
+	/*
+	 * Avoid overflow when x or y is large.  Avoid underflow when x or
+	 * y is small.
+	 */
+	if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN)
+		return (CMPLX(log(hypot(x, y)), atan2(y, x)));
+
+	return (CMPLX(log(ax * ax + ay * ay) / 2, atan2(y, x)));
+}
+
+/*
+ *				=================
+ *				| catanh, catan |
+ *				=================
+ */
+
+/*
+ * sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow).
+ * Assumes x*x and y*y will not overflow.
+ * Assumes x and y are finite.
+ * Assumes y is non-negative.
+ * Assumes fabs(x) >= DBL_EPSILON.
+ */
+static inline double
+sum_squares(double x, double y)
+{
+
+	/* Avoid underflow when y is small. */
+	if (y < SQRT_MIN)
+		return (x * x);
+
+	return (x * x + y * y);
+}
+
+/*
+ * real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y).
+ * Assumes x and y are not NaN, and one of x and y is larger than
+ * RECIP_EPSILON.  We avoid unwarranted underflow.  It is important to not use
+ * the code creal(1/z), because the imaginary part may produce an unwanted
+ * underflow.
+ * This is only called in a context where inexact is always raised before
+ * the call, so no effort is made to avoid or force inexact.
+ */
+static inline double
+real_part_reciprocal(double x, double y)
+{
+	double scale;
+	uint32_t hx, hy;
+	int32_t ix, iy;
+
+	/*
+	 * This code is inspired by the C99 document n1124.pdf, Section G.5.1,
+	 * example 2.
+	 */
+	GET_HIGH_WORD(hx, x);
+	ix = hx & 0x7ff00000;
+	GET_HIGH_WORD(hy, y);
+	iy = hy & 0x7ff00000;
+#undef BIAS
+#define	BIAS	(DBL_MAX_EXP - 1)
+/* XXX more guard digits are useful iff there is extra precision. */
+#define	CUTOFF	(DBL_MANT_DIG / 2 + 1)	/* just half or 1 guard digit */
+	if (ix - iy >= CUTOFF << 20 || isinf(x))
+		return (1 / x);		/* +-Inf -> +-0 is special */
+	if (iy - ix >= CUTOFF << 20)
+		return (x / y / y);	/* should avoid double div, but hard */
+	if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20)
+		return (x / (x * x + y * y));
+	scale = 1;
+	SET_HIGH_WORD(scale, 0x7ff00000 - ix);	/* 2**(1-ilogb(x)) */
+	x *= scale;
+	y *= scale;
+	return (x / (x * x + y * y) * scale);
+}
+
+/*
+ * catanh(z) = log((1+z)/(1-z)) / 2
+ *           = log1p(4*x / |z-1|^2) / 4
+ *             + I * atan2(2*y, (1-x)*(1+x)-y*y) / 2
+ *
+ * catanh(z) = z + O(z^3)   as z -> 0
+ *
+ * catanh(z) = 1/z + sign(y)*I*PI/2 + O(1/z^3)   as z -> infinity
+ * The above formula works for the real part as well, because
+ * Re(catanh(z)) = x/|z|^2 + O(x/z^4)
+ *    as z -> infinity, uniformly in x
+ */
+double complex
+catanh(double complex z)
+{
+	double x, y, ax, ay, rx, ry;
+
+	x = creal(z);
+	y = cimag(z);
+	ax = fabs(x);
+	ay = fabs(y);
+
+	/* This helps handle many cases. */
+	if (y == 0 && ax <= 1)
+		return (CMPLX(atanh(x), y));
+
+	/* To ensure the same accuracy as atan(), and to filter out z = 0. */
+	if (x == 0)
+		return (CMPLX(x, atan(y)));
+
+	if (isnan(x) || isnan(y)) {
+		/* catanh(+-Inf + I*NaN) = +-0 + I*NaN */
+		if (isinf(x))
+			return (CMPLX(copysign(0, x), y + y));
+		/* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */
+		if (isinf(y))
+			return (CMPLX(copysign(0, x),
+			    copysign(pio2_hi + pio2_lo, y)));
+		/*
+		 * All other cases involving NaN return NaN + I*NaN.
+		 * C99 leaves it optional whether to raise invalid if one of
+		 * the arguments is not NaN, so we opt not to raise it.
+		 */
+		return (CMPLX(nan_mix(x, y), nan_mix(x, y)));
+	}
+
+	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
+		return (CMPLX(real_part_reciprocal(x, y),
+		    copysign(pio2_hi + pio2_lo, y)));
+
+	if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
+		/*
+		 * z = 0 was filtered out above.  All other cases must raise
+		 * inexact, but this is the only case that needs to do it
+		 * explicitly.
+		 */
+		raise_inexact();
+		return (z);
+	}
+
+	if (ax == 1 && ay < DBL_EPSILON)
+		rx = (m_ln2 - log(ay)) / 2;
+	else
+		rx = log1p(4 * ax / sum_squares(ax - 1, ay)) / 4;
+
+	if (ax == 1)
+		ry = atan2(2, -ay) / 2;
+	else if (ay < DBL_EPSILON)
+		ry = atan2(2 * ay, (1 - ax) * (1 + ax)) / 2;
+	else
+		ry = atan2(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
+
+	return (CMPLX(copysign(rx, x), copysign(ry, y)));
+}
+
+/*
+ * catan(z) = reverse(catanh(reverse(z)))
+ * where reverse(x + I*y) = y + I*x = I*conj(z).
+ */
+double complex
+catan(double complex z)
+{
+	double complex w = catanh(CMPLX(cimag(z), creal(z)));
+
+	return (CMPLX(cimag(w), creal(w)));
+}
+
+#if LDBL_MANT_DIG == 53
+__weak_reference(cacosh, cacoshl);
+__weak_reference(cacos, cacosl);
+__weak_reference(casinh, casinhl);
+__weak_reference(casin, casinl);
+__weak_reference(catanh, catanhl);
+__weak_reference(catan, catanl);
+#endif
diff --git a/libc/tinymath/catrigf.c b/libc/tinymath/catrigf.c
new file mode 100644
index 000000000..0e21adfe4
--- /dev/null
+++ b/libc/tinymath/catrigf.c
@@ -0,0 +1,377 @@
+/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
+│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+╚──────────────────────────────────────────────────────────────────────────────╝
+│                                                                              │
+│ FreeBSD lib/msun/src/catrigf.c                                               │
+│ Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG>            │
+│                                                                              │
+│ Redistribution and use in source and binary forms, with or without           │
+│ modification, are permitted provided that the following conditions           │
+│ are met:                                                                     │
+│ 1. Redistributions of source code must retain the above copyright            │
+│    notice, this list of conditions and the following disclaimer.             │
+│ 2. Redistributions in binary form must reproduce the above copyright         │
+│    notice, this list of conditions and the following disclaimer in the       │
+│    documentation and/or other materials provided with the distribution.      │
+│                                                                              │
+│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND       │
+│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        │
+│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   │
+│ ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE      │
+│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL   │
+│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS      │
+│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        │
+│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT   │
+│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    │
+│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF       │
+│ SUCH DAMAGE.                                                                 │
+│                                                                              │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/tinymath/freebsd.internal.h"
+__static_yoink("freebsd_complex_notice");
+
+#undef isinf
+#define isinf(x)	(fabsf(x) == INFINITY)
+#undef isnan
+#define isnan(x)	((x) != (x))
+#define	raise_inexact()	do { volatile float _j = 1 + tiny; (void)_j; } while(0)
+#undef signbit
+#define signbit(x)	(__builtin_signbitf(x))
+
+static const float
+A_crossover =		10,
+B_crossover =		0.6417,
+FOUR_SQRT_MIN =		0x1p-61,
+QUARTER_SQRT_MAX =	0x1p61,
+m_e =			2.7182818285e0,		/*  0xadf854.0p-22 */
+m_ln2 =			6.9314718056e-1,	/*  0xb17218.0p-24 */
+pio2_hi =		1.5707962513e0,		/*  0xc90fda.0p-23 */
+RECIP_EPSILON =		1 / FLT_EPSILON,
+SQRT_3_EPSILON =	5.9801995673e-4,	/*  0x9cc471.0p-34 */
+SQRT_6_EPSILON =	8.4572793338e-4,	/*  0xddb3d7.0p-34 */
+SQRT_MIN =		0x1p-63;
+
+static const volatile float
+pio2_lo =		7.5497899549e-8,	/*  0xa22169.0p-47 */
+tiny =			0x1p-100;
+
+static float complex clog_for_large_values(float complex z);
+
+static inline float
+f(float a, float b, float hypot_a_b)
+{
+	if (b < 0)
+		return ((hypot_a_b - b) / 2);
+	if (b == 0)
+		return (a / 2);
+	return (a * a / (hypot_a_b + b) / 2);
+}
+
+static inline void
+do_hard_work(float x, float y, float *rx, int *B_is_usable, float *B,
+    float *sqrt_A2my2, float *new_y)
+{
+	float R, S, A;
+	float Am1, Amy;
+
+	R = hypotf(x, y + 1);
+	S = hypotf(x, y - 1);
+
+	A = (R + S) / 2;
+	if (A < 1)
+		A = 1;
+
+	if (A < A_crossover) {
+		if (y == 1 && x < FLT_EPSILON * FLT_EPSILON / 128) {
+			*rx = sqrtf(x);
+		} else if (x >= FLT_EPSILON * fabsf(y - 1)) {
+			Am1 = f(x, 1 + y, R) + f(x, 1 - y, S);
+			*rx = log1pf(Am1 + sqrtf(Am1 * (A + 1)));
+		} else if (y < 1) {
+			*rx = x / sqrtf((1 - y) * (1 + y));
+		} else {
+			*rx = log1pf((y - 1) + sqrtf((y - 1) * (y + 1)));
+		}
+	} else {
+		*rx = logf(A + sqrtf(A * A - 1));
+	}
+
+	*new_y = y;
+
+	if (y < FOUR_SQRT_MIN) {
+		*B_is_usable = 0;
+		*sqrt_A2my2 = A * (2 / FLT_EPSILON);
+		*new_y = y * (2 / FLT_EPSILON);
+		return;
+	}
+
+	*B = y / A;
+	*B_is_usable = 1;
+
+	if (*B > B_crossover) {
+		*B_is_usable = 0;
+		if (y == 1 && x < FLT_EPSILON / 128) {
+			*sqrt_A2my2 = sqrtf(x) * sqrtf((A + y) / 2);
+		} else if (x >= FLT_EPSILON * fabsf(y - 1)) {
+			Amy = f(x, y + 1, R) + f(x, y - 1, S);
+			*sqrt_A2my2 = sqrtf(Amy * (A + y));
+		} else if (y > 1) {
+			*sqrt_A2my2 = x * (4 / FLT_EPSILON / FLT_EPSILON) * y /
+			    sqrtf((y + 1) * (y - 1));
+			*new_y = y * (4 / FLT_EPSILON / FLT_EPSILON);
+		} else {
+			*sqrt_A2my2 = sqrtf((1 - y) * (1 + y));
+		}
+	}
+}
+
+float complex
+casinhf(float complex z)
+{
+	float x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y;
+	int B_is_usable;
+	float complex w;
+
+	x = crealf(z);
+	y = cimagf(z);
+	ax = fabsf(x);
+	ay = fabsf(y);
+
+	if (isnan(x) || isnan(y)) {
+		if (isinf(x))
+			return (CMPLXF(x, y + y));
+		if (isinf(y))
+			return (CMPLXF(y, x + x));
+		if (y == 0)
+			return (CMPLXF(x + x, y));
+		return (CMPLXF(nan_mix(x, y), nan_mix(x, y)));
+	}
+
+	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
+		if (signbit(x) == 0)
+			w = clog_for_large_values(z) + m_ln2;
+		else
+			w = clog_for_large_values(-z) + m_ln2;
+		return (CMPLXF(copysignf(crealf(w), x),
+		    copysignf(cimagf(w), y)));
+	}
+
+	if (x == 0 && y == 0)
+		return (z);
+
+	raise_inexact();
+
+	if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
+		return (z);
+
+	do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y);
+	if (B_is_usable)
+		ry = asinf(B);
+	else
+		ry = atan2f(new_y, sqrt_A2my2);
+	return (CMPLXF(copysignf(rx, x), copysignf(ry, y)));
+}
+
+float complex
+casinf(float complex z)
+{
+	float complex w = casinhf(CMPLXF(cimagf(z), crealf(z)));
+
+	return (CMPLXF(cimagf(w), crealf(w)));
+}
+
+float complex
+cacosf(float complex z)
+{
+	float x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x;
+	int sx, sy;
+	int B_is_usable;
+	float complex w;
+
+	x = crealf(z);
+	y = cimagf(z);
+	sx = signbit(x);
+	sy = signbit(y);
+	ax = fabsf(x);
+	ay = fabsf(y);
+
+	if (isnan(x) || isnan(y)) {
+		if (isinf(x))
+			return (CMPLXF(y + y, -INFINITY));
+		if (isinf(y))
+			return (CMPLXF(x + x, -y));
+		if (x == 0)
+			return (CMPLXF(pio2_hi + pio2_lo, y + y));
+		return (CMPLXF(nan_mix(x, y), nan_mix(x, y)));
+	}
+
+	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
+		w = clog_for_large_values(z);
+		rx = fabsf(cimagf(w));
+		ry = crealf(w) + m_ln2;
+		if (sy == 0)
+			ry = -ry;
+		return (CMPLXF(rx, ry));
+	}
+
+	if (x == 1 && y == 0)
+		return (CMPLXF(0, -y));
+
+	raise_inexact();
+
+	if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4)
+		return (CMPLXF(pio2_hi - (x - pio2_lo), -y));
+
+	do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x);
+	if (B_is_usable) {
+		if (sx == 0)
+			rx = acosf(B);
+		else
+			rx = acosf(-B);
+	} else {
+		if (sx == 0)
+			rx = atan2f(sqrt_A2mx2, new_x);
+		else
+			rx = atan2f(sqrt_A2mx2, -new_x);
+	}
+	if (sy == 0)
+		ry = -ry;
+	return (CMPLXF(rx, ry));
+}
+
+float complex
+cacoshf(float complex z)
+{
+	float complex w;
+	float rx, ry;
+
+	w = cacosf(z);
+	rx = crealf(w);
+	ry = cimagf(w);
+	if (isnan(rx) && isnan(ry))
+		return (CMPLXF(ry, rx));
+	if (isnan(rx))
+		return (CMPLXF(fabsf(ry), rx));
+	if (isnan(ry))
+		return (CMPLXF(ry, ry));
+	return (CMPLXF(fabsf(ry), copysignf(rx, cimagf(z))));
+}
+
+static float complex
+clog_for_large_values(float complex z)
+{
+	float x, y;
+	float ax, ay, t;
+
+	x = crealf(z);
+	y = cimagf(z);
+	ax = fabsf(x);
+	ay = fabsf(y);
+	if (ax < ay) {
+		t = ax;
+		ax = ay;
+		ay = t;
+	}
+
+	if (ax > FLT_MAX / 2)
+		return (CMPLXF(logf(hypotf(x / m_e, y / m_e)) + 1,
+		    atan2f(y, x)));
+
+	if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN)
+		return (CMPLXF(logf(hypotf(x, y)), atan2f(y, x)));
+
+	return (CMPLXF(logf(ax * ax + ay * ay) / 2, atan2f(y, x)));
+}
+
+static inline float
+sum_squares(float x, float y)
+{
+
+	if (y < SQRT_MIN)
+		return (x * x);
+
+	return (x * x + y * y);
+}
+
+static inline float
+real_part_reciprocal(float x, float y)
+{
+	float scale;
+	uint32_t hx, hy;
+	int32_t ix, iy;
+
+	GET_FLOAT_WORD(hx, x);
+	ix = hx & 0x7f800000;
+	GET_FLOAT_WORD(hy, y);
+	iy = hy & 0x7f800000;
+#undef BIAS
+#define	BIAS	(FLT_MAX_EXP - 1)
+#define	CUTOFF	(FLT_MANT_DIG / 2 + 1)
+	if (ix - iy >= CUTOFF << 23 || isinf(x))
+		return (1 / x);
+	if (iy - ix >= CUTOFF << 23)
+		return (x / y / y);
+	if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23)
+		return (x / (x * x + y * y));
+	SET_FLOAT_WORD(scale, 0x7f800000 - ix);
+	x *= scale;
+	y *= scale;
+	return (x / (x * x + y * y) * scale);
+}
+
+float complex
+catanhf(float complex z)
+{
+	float x, y, ax, ay, rx, ry;
+
+	x = crealf(z);
+	y = cimagf(z);
+	ax = fabsf(x);
+	ay = fabsf(y);
+
+	if (y == 0 && ax <= 1)
+		return (CMPLXF(atanhf(x), y));
+
+	if (x == 0)
+		return (CMPLXF(x, atanf(y)));
+
+	if (isnan(x) || isnan(y)) {
+		if (isinf(x))
+			return (CMPLXF(copysignf(0, x), y + y));
+		if (isinf(y))
+			return (CMPLXF(copysignf(0, x),
+			    copysignf(pio2_hi + pio2_lo, y)));
+		return (CMPLXF(nan_mix(x, y), nan_mix(x, y)));
+	}
+
+	if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
+		return (CMPLXF(real_part_reciprocal(x, y),
+		    copysignf(pio2_hi + pio2_lo, y)));
+
+	if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
+		raise_inexact();
+		return (z);
+	}
+
+	if (ax == 1 && ay < FLT_EPSILON)
+		rx = (m_ln2 - logf(ay)) / 2;
+	else
+		rx = log1pf(4 * ax / sum_squares(ax - 1, ay)) / 4;
+
+	if (ax == 1)
+		ry = atan2f(2, -ay) / 2;
+	else if (ay < FLT_EPSILON)
+		ry = atan2f(2 * ay, (1 - ax) * (1 + ax)) / 2;
+	else
+		ry = atan2f(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
+
+	return (CMPLXF(copysignf(rx, x), copysignf(ry, y)));
+}
+
+float complex
+catanf(float complex z)
+{
+	float complex w = catanhf(CMPLXF(cimagf(z), crealf(z)));
+
+	return (CMPLXF(cimagf(w), crealf(w)));
+}
diff --git a/libc/tinymath/cbrt.c b/libc/tinymath/cbrt.c
index b88b7e297..55e324164 100644
--- a/libc/tinymath/cbrt.c
+++ b/libc/tinymath/cbrt.c
@@ -26,15 +26,9 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */
 /*
diff --git a/libc/tinymath/cbrtf.c b/libc/tinymath/cbrtf.c
index 0ec14e758..088ac83a7 100644
--- a/libc/tinymath/cbrtf.c
+++ b/libc/tinymath/cbrtf.c
@@ -26,15 +26,9 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */
 /*
diff --git a/libc/tinymath/cbrtl.c b/libc/tinymath/cbrtl.c
index ccd1040f1..4e1e2863c 100644
--- a/libc/tinymath/cbrtl.c
+++ b/libc/tinymath/cbrtl.c
@@ -28,17 +28,9 @@
 #include "libc/math.h"
 #include "libc/tinymath/ldshape.internal.h"
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("fdlibm_notice");
+__static_yoink("musl_libc_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtl.c */
 /*-
diff --git a/libc/tinymath/ccos.c b/libc/tinymath/ccos.c
index 35d3f137f..ccfddd586 100644
--- a/libc/tinymath/ccos.c
+++ b/libc/tinymath/ccos.c
@@ -17,8 +17,11 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
-#include "libc/math.h"
 
 double complex ccos(double complex z) {
   return ccosh(CMPLX(-cimag(z), creal(z)));
 }
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+__weak_reference(ccos, ccosl);
+#endif
diff --git a/libc/tinymath/ccosf.c b/libc/tinymath/ccosf.c
index 5613099ed..6356d54cb 100644
--- a/libc/tinymath/ccosf.c
+++ b/libc/tinymath/ccosf.c
@@ -28,14 +28,7 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
+__static_yoink("musl_libc_notice");
 
 float complex ccosf(float complex z)
 {
diff --git a/libc/tinymath/ccosh.c b/libc/tinymath/ccosh.c
index 28bb3cbc2..c45f39ee3 100644
--- a/libc/tinymath/ccosh.c
+++ b/libc/tinymath/ccosh.c
@@ -29,14 +29,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_ccosh.c */
 /*-
diff --git a/libc/tinymath/ccoshf.c b/libc/tinymath/ccoshf.c
index d2e137044..d7e02c489 100644
--- a/libc/tinymath/ccoshf.c
+++ b/libc/tinymath/ccoshf.c
@@ -29,14 +29,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_ccoshf.c */
 /*-
diff --git a/libc/tinymath/ccoshl.c b/libc/tinymath/ccoshl.c
index 7d22ca69d..bc388ffc3 100644
--- a/libc/tinymath/ccoshl.c
+++ b/libc/tinymath/ccoshl.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 //FIXME
 long double complex ccoshl(long double complex z)
diff --git a/libc/tinymath/ccosl.c b/libc/tinymath/ccosl.c
index 76548b9d6..a16144507 100644
--- a/libc/tinymath/ccosl.c
+++ b/libc/tinymath/ccosl.c
@@ -26,17 +26,11 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
 long double complex ccosl(long double complex z) {
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-	return ccos(z);
-#else
 	return ccoshl(CMPLXL(-cimagl(z), creall(z)));
-#endif
 }
+
+#endif
diff --git a/libc/tinymath/ceil.c b/libc/tinymath/ceil.c
index 4ba6ec6da..dd5c4f6eb 100644
--- a/libc/tinymath/ceil.c
+++ b/libc/tinymath/ceil.c
@@ -31,12 +31,7 @@
 #ifndef __llvm__
 #include "third_party/intel/smmintrin.internal.h"
 #endif
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
 #define EPS DBL_EPSILON
diff --git a/libc/tinymath/ceilf.c b/libc/tinymath/ceilf.c
index 2dad43065..ab34911fd 100644
--- a/libc/tinymath/ceilf.c
+++ b/libc/tinymath/ceilf.c
@@ -30,12 +30,7 @@
 #ifndef __llvm__
 #include "third_party/intel/smmintrin.internal.h"
 #endif
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns smallest integral value not less than 𝑥.
diff --git a/libc/tinymath/ceill.c b/libc/tinymath/ceill.c
index 921a93836..42c733662 100644
--- a/libc/tinymath/ceill.c
+++ b/libc/tinymath/ceill.c
@@ -28,12 +28,7 @@
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
 #include "libc/tinymath/ldshape.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns smallest integral value not less than 𝑥.
diff --git a/libc/tinymath/cexp.c b/libc/tinymath/cexp.c
index c6fd6e086..5f09c24e8 100644
--- a/libc/tinymath/cexp.c
+++ b/libc/tinymath/cexp.c
@@ -28,13 +28,8 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
+__static_yoink("freebsd_libm_notice");
+__static_yoink("musl_libc_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_cexp.c */
 /*-
diff --git a/libc/tinymath/cexpf.c b/libc/tinymath/cexpf.c
index 200b9de4f..a26b30feb 100644
--- a/libc/tinymath/cexpf.c
+++ b/libc/tinymath/cexpf.c
@@ -28,13 +28,8 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
+__static_yoink("freebsd_libm_notice");
+__static_yoink("musl_libc_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_cexpf.c */
 /*-
diff --git a/libc/tinymath/cexpl.c b/libc/tinymath/cexpl.c
index a58b94d2e..1800be2d5 100644
--- a/libc/tinymath/cexpl.c
+++ b/libc/tinymath/cexpl.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 //FIXME
 long double complex cexpl(long double complex z)
diff --git a/libc/tinymath/clog.c b/libc/tinymath/clog.c
index 60c48fb9c..77b4cb284 100644
--- a/libc/tinymath/clog.c
+++ b/libc/tinymath/clog.c
@@ -28,14 +28,7 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
+__static_yoink("musl_libc_notice");
 
 // FIXME
 
@@ -49,3 +42,7 @@ double complex clog(double complex z)
 	phi = carg(z);
 	return CMPLX(log(r), phi);
 }
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+__weak_reference(clog, clogl);
+#endif
diff --git a/libc/tinymath/clogf.c b/libc/tinymath/clogf.c
index dcc3a4b29..e658c58f3 100644
--- a/libc/tinymath/clogf.c
+++ b/libc/tinymath/clogf.c
@@ -28,14 +28,7 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
+__static_yoink("musl_libc_notice");
 
 // FIXME
 
diff --git a/libc/tinymath/clogl.c b/libc/tinymath/clogl.c
index 868f44eb5..d55a153b8 100644
--- a/libc/tinymath/clogl.c
+++ b/libc/tinymath/clogl.c
@@ -27,21 +27,10 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
 #include "libc/math.h"
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
 #include "libc/tinymath/complex.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-long double complex clogl(long double complex z)
-{
-	return clog(z);
-}
-#else
-// FIXME
 long double complex clogl(long double complex z)
 {
 	long double r, phi;
@@ -49,4 +38,5 @@ long double complex clogl(long double complex z)
 	phi = cargl(z);
 	return CMPLXL(logl(r), phi);
 }
+
 #endif
diff --git a/libc/tinymath/cos.c b/libc/tinymath/cos.c
index 00776746a..6933ebac5 100644
--- a/libc/tinymath/cos.c
+++ b/libc/tinymath/cos.c
@@ -28,15 +28,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
 #include "libc/tinymath/kernel.internal.h"
-
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */
 /*
@@ -85,7 +78,6 @@ asm(".include \"libc/disclaimer.inc\"");
 
 /**
  * Returns cosine of 𝑥.
- * @note should take ~5ns
  */
 double cos(double x)
 {
diff --git a/libc/tinymath/cosdf.c b/libc/tinymath/cosdf.c
index 0fb5e3da1..3d35a6436 100644
--- a/libc/tinymath/cosdf.c
+++ b/libc/tinymath/cosdf.c
@@ -27,15 +27,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
-
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+__static_yoink("freebsd_libm_notice");
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */
 /*
diff --git a/libc/tinymath/cosf.c b/libc/tinymath/cosf.c
index 6e12a7b14..0e26909f7 100644
--- a/libc/tinymath/cosf.c
+++ b/libc/tinymath/cosf.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,97 +25,63 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-#include "libc/tinymath/feval.internal.h"
-#include "libc/tinymath/kernel.internal.h"
-
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- * Optimized by Bruce D. Evans.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-/* Small multiples of pi/2 rounded to double precision. */
-static const double
-c1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */
-c2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
-c3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
-c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
+#include "libc/tinymath/sincosf.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 /**
- * Returns cosine of 𝑥.
- * @note should take about 5ns
+ * Returns cosine of y.
+ *
+ * This is a fast cosf implementation. The worst-case ULP is 0.5607, and
+ * the maximum relative error is 0.5303 * 2^-23. A single-step range
+ * reduction is used for small values. Large inputs have their range
+ * reduced using fast integer arithmetic.
+ *
+ * @raise EDOM and FE_INVALID if y is an infinity
  */
-float cosf(float x)
+float
+cosf (float y)
 {
-	double y;
-	uint32_t ix;
-	unsigned n, sign;
+  double x = y;
+  double s;
+  int n;
+  const sincos_t *p = &__sincosf_table[0];
 
-	GET_FLOAT_WORD(ix, x);
-	sign = ix >> 31;
-	ix &= 0x7fffffff;
+  if (abstop12 (y) < abstop12 (pio4f))
+    {
+      double x2 = x * x;
 
-	if (ix <= 0x3f490fda) {  /* |x| ~<= pi/4 */
-		if (ix < 0x39800000) {  /* |x| < 2**-12 */
-			/* raise inexact if x != 0 */
-			FORCE_EVAL(x + 0x1p120f);
-			return 1.0f;
-		}
-		return __cosdf(x);
-	}
-	if (ix <= 0x407b53d1) {  /* |x| ~<= 5*pi/4 */
-		if (ix > 0x4016cbe3)  /* |x|  ~> 3*pi/4 */
-			return -__cosdf(sign ? x+c2pio2 : x-c2pio2);
-		else {
-			if (sign)
-				return __sindf(x + c1pio2);
-			else
-				return __sindf(c1pio2 - x);
-		}
-	}
-	if (ix <= 0x40e231d5) {  /* |x| ~<= 9*pi/4 */
-		if (ix > 0x40afeddf)  /* |x| ~> 7*pi/4 */
-			return __cosdf(sign ? x+c4pio2 : x-c4pio2);
-		else {
-			if (sign)
-				return __sindf(-x - c3pio2);
-			else
-				return __sindf(x - c3pio2);
-		}
-	}
+      if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
+	return 1.0f;
 
-	/* cos(Inf or NaN) is NaN */
-	if (ix >= 0x7f800000)
-		return x-x;
+      return sinf_poly (x, x2, p, 1);
+    }
+  else if (likely (abstop12 (y) < abstop12 (120.0f)))
+    {
+      x = reduce_fast (x, p, &n);
 
-	/* general argument reduction needed */
-	n = __rem_pio2f(x,&y);
-	switch (n&3) {
-	case 0: return  __cosdf(y);
-	case 1: return  __sindf(-y);
-	case 2: return -__cosdf(y);
-	default:
-		return  __sindf(y);
-	}
+      /* Setup the signs for sin and cos.  */
+      s = p->sign[n & 3];
+
+      if (n & 2)
+	p = &__sincosf_table[1];
+
+      return sinf_poly (x * s, x * x, p, n ^ 1);
+    }
+  else if (abstop12 (y) < abstop12 (INFINITY))
+    {
+      uint32_t xi = asuint (y);
+      int sign = xi >> 31;
+
+      x = reduce_large (xi, &n);
+
+      /* Setup signs for sin and cos - include original sign.  */
+      s = p->sign[(n + sign) & 3];
+
+      if ((n + sign) & 2)
+	p = &__sincosf_table[1];
+
+      return sinf_poly (x * s, x * x, p, n ^ 1);
+    }
+  else
+    return __math_invalidf (y);
 }
diff --git a/libc/tinymath/cosh.c b/libc/tinymath/cosh.c
index 3c98b9253..4db136b38 100644
--- a/libc/tinymath/cosh.c
+++ b/libc/tinymath/cosh.c
@@ -28,12 +28,7 @@
 #include "libc/math.h"
 #include "libc/tinymath/expo.internal.h"
 #include "libc/tinymath/feval.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns hyperbolic cosine of 𝑥.
diff --git a/libc/tinymath/coshf.c b/libc/tinymath/coshf.c
index 1e352d1c4..e0bef68dc 100644
--- a/libc/tinymath/coshf.c
+++ b/libc/tinymath/coshf.c
@@ -28,12 +28,7 @@
 #include "libc/math.h"
 #include "libc/tinymath/expo.internal.h"
 #include "libc/tinymath/feval.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns hyperbolic cosine of 𝑥.
diff --git a/libc/tinymath/coshl.c b/libc/tinymath/coshl.c
index f7cfb1ed3..c2cb21c6e 100644
--- a/libc/tinymath/coshl.c
+++ b/libc/tinymath/coshl.c
@@ -38,16 +38,11 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/freebsd.internal.h"
-#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("fdlibm_notice");
+__static_yoink("freebsd_libm_notice");
+
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
 
 #if LDBL_MAX_EXP != 0x4000
 /* We also require the usual expsign encoding. */
diff --git a/libc/tinymath/cosl.c b/libc/tinymath/cosl.c
index 1368cc825..6642af10e 100644
--- a/libc/tinymath/cosl.c
+++ b/libc/tinymath/cosl.c
@@ -30,12 +30,7 @@
 #include "libc/tinymath/kernel.internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns cosine of 𝑥.
diff --git a/libc/tinymath/cpow.c b/libc/tinymath/cpow.c
index 4a0aa4b10..5d58e14d9 100644
--- a/libc/tinymath/cpow.c
+++ b/libc/tinymath/cpow.c
@@ -26,18 +26,15 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
-#include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /* pow(z, c) = exp(c log(z)), See C99 G.6.4.1 */
-
 double complex cpow(double complex z, double complex c)
 {
 	return cexp(c * clog(z));
 }
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+__weak_reference(cpow, cpowl);
+#endif
diff --git a/libc/tinymath/cpowf.c b/libc/tinymath/cpowf.c
index 671bad57e..9d3656faa 100644
--- a/libc/tinymath/cpowf.c
+++ b/libc/tinymath/cpowf.c
@@ -28,14 +28,7 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
+__static_yoink("musl_libc_notice");
 
 float complex cpowf(float complex z, float complex c)
 {
diff --git a/libc/tinymath/cpowl.c b/libc/tinymath/cpowl.c
index 389642031..1fa76aee6 100644
--- a/libc/tinymath/cpowl.c
+++ b/libc/tinymath/cpowl.c
@@ -26,21 +26,12 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-long double complex cpowl(long double complex z, long double complex c)
-{
-	return cpow(z, c);
-}
-#else
 long double complex cpowl(long double complex z, long double complex c)
 {
 	return cexpl(c * clogl(z));
 }
+
 #endif
diff --git a/libc/tinymath/csin.c b/libc/tinymath/csin.c
index 23d0f27c2..eeb1a30f4 100644
--- a/libc/tinymath/csin.c
+++ b/libc/tinymath/csin.c
@@ -28,19 +28,15 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
+__static_yoink("musl_libc_notice");
 
 /* sin(z) = -i sinh(i z) */
-
 double complex csin(double complex z)
 {
 	z = csinh(CMPLX(-cimag(z), creal(z)));
 	return CMPLX(cimag(z), -creal(z));
 }
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+__weak_reference(csin, csinl);
+#endif
diff --git a/libc/tinymath/csinf.c b/libc/tinymath/csinf.c
index 24511f17b..0883e26cd 100644
--- a/libc/tinymath/csinf.c
+++ b/libc/tinymath/csinf.c
@@ -28,12 +28,8 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
 
 
 
diff --git a/libc/tinymath/csinh.c b/libc/tinymath/csinh.c
index 8f59253da..22da546e0 100644
--- a/libc/tinymath/csinh.c
+++ b/libc/tinymath/csinh.c
@@ -29,15 +29,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
+__static_yoink("musl_libc_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_csinh.c */
 /*-
diff --git a/libc/tinymath/csinhf.c b/libc/tinymath/csinhf.c
index 07471ce81..d75ac0c07 100644
--- a/libc/tinymath/csinhf.c
+++ b/libc/tinymath/csinhf.c
@@ -29,14 +29,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_csinhf.c */
 /*-
diff --git a/libc/tinymath/csinhl.c b/libc/tinymath/csinhl.c
index 1a83181fc..f85b8b26a 100644
--- a/libc/tinymath/csinhl.c
+++ b/libc/tinymath/csinhl.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 //FIXME
 long double complex csinhl(long double complex z)
diff --git a/libc/tinymath/csinl.c b/libc/tinymath/csinl.c
index ddf599d90..02b5d0178 100644
--- a/libc/tinymath/csinl.c
+++ b/libc/tinymath/csinl.c
@@ -26,22 +26,13 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-long double complex csinl(long double complex z)
-{
-	return csin(z);
-}
-#else
 long double complex csinl(long double complex z)
 {
 	z = csinhl(CMPLXL(-cimagl(z), creall(z)));
 	return CMPLXL(cimagl(z), -creall(z));
 }
+
 #endif
diff --git a/libc/tinymath/csqrt.c b/libc/tinymath/csqrt.c
index 70583e7a2..d2bccdc38 100644
--- a/libc/tinymath/csqrt.c
+++ b/libc/tinymath/csqrt.c
@@ -28,12 +28,8 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+__static_yoink("freebsd_libm_notice");
+__static_yoink("musl_libc_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_csqrt.c */
 /*-
diff --git a/libc/tinymath/csqrtf.c b/libc/tinymath/csqrtf.c
index 27c1e6889..61d142f11 100644
--- a/libc/tinymath/csqrtf.c
+++ b/libc/tinymath/csqrtf.c
@@ -28,13 +28,8 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
+__static_yoink("freebsd_libm_notice");
+__static_yoink("musl_libc_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_csqrtf.c */
 /*-
diff --git a/libc/tinymath/csqrtl.c b/libc/tinymath/csqrtl.c
index c6b1e7524..df534fd9f 100644
--- a/libc/tinymath/csqrtl.c
+++ b/libc/tinymath/csqrtl.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 //FIXME
 long double complex csqrtl(long double complex z)
diff --git a/libc/tinymath/ctan.c b/libc/tinymath/ctan.c
index b3e2de71d..8e20f8757 100644
--- a/libc/tinymath/ctan.c
+++ b/libc/tinymath/ctan.c
@@ -28,14 +28,7 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
+__static_yoink("musl_libc_notice");
 
 /* tan(z) = -i tanh(i z) */
 
diff --git a/libc/tinymath/ctanf.c b/libc/tinymath/ctanf.c
index f584c5ea9..fe65ba01b 100644
--- a/libc/tinymath/ctanf.c
+++ b/libc/tinymath/ctanf.c
@@ -28,14 +28,7 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
+__static_yoink("musl_libc_notice");
 
 float complex ctanf(float complex z)
 {
diff --git a/libc/tinymath/ctanh.c b/libc/tinymath/ctanh.c
index 94a1776d8..721e303da 100644
--- a/libc/tinymath/ctanh.c
+++ b/libc/tinymath/ctanh.c
@@ -28,16 +28,8 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-ctahnh (BSD-2 License)\\n\
-Copyright (c) 2011 David Schultz <das@FreeBSD.ORG>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
+__static_yoink("freebsd_libm_notice");
+__static_yoink("musl_libc_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_ctanh.c */
 /*-
diff --git a/libc/tinymath/ctanhf.c b/libc/tinymath/ctanhf.c
index 29d7e3f1b..b806dcf58 100644
--- a/libc/tinymath/ctanhf.c
+++ b/libc/tinymath/ctanhf.c
@@ -28,13 +28,8 @@
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
+__static_yoink("freebsd_libm_notice");
+__static_yoink("musl_libc_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_ctanhf.c */
 /*-
diff --git a/libc/tinymath/ctanhl.c b/libc/tinymath/ctanhl.c
index 6aa599c3c..43061dcf9 100644
--- a/libc/tinymath/ctanhl.c
+++ b/libc/tinymath/ctanhl.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 //FIXME
 long double complex ctanhl(long double complex z)
diff --git a/libc/tinymath/ctanl.c b/libc/tinymath/ctanl.c
index d8602d9b7..27f0d94ca 100644
--- a/libc/tinymath/ctanl.c
+++ b/libc/tinymath/ctanl.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+__static_yoink("musl_libc_notice");
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex ctanl(long double complex z)
diff --git a/libc/tinymath/erf.c b/libc/tinymath/erf.c
index 029a41840..cd2948c0a 100644
--- a/libc/tinymath/erf.c
+++ b/libc/tinymath/erf.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,317 +25,247 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/math.h"
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+#define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
+#define C 0x1.b0ac16p-1
+#define PA __erf_data.erf_poly_A
+#define NA __erf_data.erf_ratio_N_A
+#define DA __erf_data.erf_ratio_D_A
+#define NB __erf_data.erf_ratio_N_B
+#define DB __erf_data.erf_ratio_D_B
+#define PC __erf_data.erfc_poly_C
+#define PD __erf_data.erfc_poly_D
+#define PE __erf_data.erfc_poly_E
+#define PF __erf_data.erfc_poly_F
 
-/* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* double erf(double x)
- * double erfc(double x)
- *                           x
- *                    2      |\
- *     erf(x)  =  ---------  | exp(-t*t)dt
- *                 sqrt(pi) \|
- *                           0
- *
- *     erfc(x) =  1-erf(x)
- *  Note that
- *              erf(-x) = -erf(x)
- *              erfc(-x) = 2 - erfc(x)
- *
- * Method:
- *      1. For |x| in [0, 0.84375]
- *          erf(x)  = x + x*R(x^2)
- *          erfc(x) = 1 - erf(x)           if x in [-.84375,0.25]
- *                  = 0.5 + ((0.5-x)-x*R)  if x in [0.25,0.84375]
- *         where R = P/Q where P is an odd poly of degree 8 and
- *         Q is an odd poly of degree 10.
- *                                               -57.90
- *                      | R - (erf(x)-x)/x | <= 2
- *
- *
- *         Remark. The formula is derived by noting
- *          erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
- *         and that
- *          2/sqrt(pi) = 1.128379167095512573896158903121545171688
- *         is close to one. The interval is chosen because the fix
- *         point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
- *         near 0.6174), and by some experiment, 0.84375 is chosen to
- *         guarantee the error is less than one ulp for erf.
- *
- *      2. For |x| in [0.84375,1.25], let s = |x| - 1, and
- *         c = 0.84506291151 rounded to single (24 bits)
- *              erf(x)  = sign(x) * (c  + P1(s)/Q1(s))
- *              erfc(x) = (1-c)  - P1(s)/Q1(s) if x > 0
- *                        1+(c+P1(s)/Q1(s))    if x < 0
- *              |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
- *         Remark: here we use the taylor series expansion at x=1.
- *              erf(1+s) = erf(1) + s*Poly(s)
- *                       = 0.845.. + P1(s)/Q1(s)
- *         That is, we use rational approximation to approximate
- *                      erf(1+s) - (c = (single)0.84506291151)
- *         Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
- *         where
- *              P1(s) = degree 6 poly in s
- *              Q1(s) = degree 6 poly in s
- *
- *      3. For x in [1.25,1/0.35(~2.857143)],
- *              erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
- *              erf(x)  = 1 - erfc(x)
- *         where
- *              R1(z) = degree 7 poly in z, (z=1/x^2)
- *              S1(z) = degree 8 poly in z
- *
- *      4. For x in [1/0.35,28]
- *              erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
- *                      = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
- *                      = 2.0 - tiny            (if x <= -6)
- *              erf(x)  = sign(x)*(1.0 - erfc(x)) if x < 6, else
- *              erf(x)  = sign(x)*(1.0 - tiny)
- *         where
- *              R2(z) = degree 6 poly in z, (z=1/x^2)
- *              S2(z) = degree 7 poly in z
- *
- *      Note1:
- *         To compute exp(-x*x-0.5625+R/S), let s be a single
- *         precision number and s := x; then
- *              -x*x = -s*s + (s-x)*(s+x)
- *              exp(-x*x-0.5626+R/S) =
- *                      exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
- *      Note2:
- *         Here 4 and 5 make use of the asymptotic series
- *                        exp(-x*x)
- *              erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
- *                        x*sqrt(pi)
- *         We use rational approximation to approximate
- *              g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
- *         Here is the error bound for R1/S1 and R2/S2
- *              |R1/S1 - f(x)|  < 2**(-62.57)
- *              |R2/S2 - f(x)|  < 2**(-61.52)
- *
- *      5. For inf > x >= 28
- *              erf(x)  = sign(x) *(1 - tiny)  (raise inexact)
- *              erfc(x) = tiny*tiny (raise underflow) if x > 0
- *                      = 2 - tiny if x<0
- *
- *      7. Special case:
- *              erf(0)  = 0, erf(inf)  = 1, erf(-inf) = -1,
- *              erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
- *              erfc/erf(NaN) is NaN
- */
-
-static const double
-erx  = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
-/*
- * Coefficients for approximation to  erf on [0,0.84375]
- */
-efx8 =  1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
-pp0  =  1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
-pp1  = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
-pp2  = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
-pp3  = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
-pp4  = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
-qq1  =  3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
-qq2  =  6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
-qq3  =  5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
-qq4  =  1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
-qq5  = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
-/*
- * Coefficients for approximation to  erf  in [0.84375,1.25]
- */
-pa0  = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
-pa1  =  4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
-pa2  = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
-pa3  =  3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
-pa4  = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
-pa5  =  3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
-pa6  = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
-qa1  =  1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
-qa2  =  5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
-qa3  =  7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
-qa4  =  1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
-qa5  =  1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
-qa6  =  1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
-/*
- * Coefficients for approximation to  erfc in [1.25,1/0.35]
- */
-ra0  = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
-ra1  = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
-ra2  = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
-ra3  = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
-ra4  = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
-ra5  = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
-ra6  = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
-ra7  = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
-sa1  =  1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
-sa2  =  1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
-sa3  =  4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
-sa4  =  6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
-sa5  =  4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
-sa6  =  1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
-sa7  =  6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
-sa8  = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
-/*
- * Coefficients for approximation to  erfc in [1/.35,28]
- */
-rb0  = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
-rb1  = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
-rb2  = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
-rb3  = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
-rb4  = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
-rb5  = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
-rb6  = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
-sb1  =  3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
-sb2  =  3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
-sb3  =  1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
-sb4  =  3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
-sb5  =  2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
-sb6  =  4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
-sb7  = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
-
-#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
-#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
-#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
-#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
-#define INSERT_WORDS(d,hi,lo)                     \
-do {                                              \
-  (d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
-} while (0)
-#define GET_HIGH_WORD(hi,d)                       \
-do {                                              \
-  (hi) = asuint64(d) >> 32;                       \
-} while (0)
-#define GET_LOW_WORD(lo,d)                        \
-do {                                              \
-  (lo) = (uint32_t)asuint64(d);                   \
-} while (0)
-#define SET_HIGH_WORD(d,hi)                       \
-  INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
-#define SET_LOW_WORD(d,lo)                        \
-  INSERT_WORDS(d, asuint64(d)>>32, lo)
-
-static double erfc1(double x)
+/* Top 32 bits of a double.  */
+static inline uint32_t
+top32 (double x)
 {
-	double_t s,P,Q;
-
-	s = fabs(x) - 1;
-	P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
-	Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
-	return 1 - erx - P/Q;
-}
-
-static double erfc2(uint32_t ix, double x)
-{
-	double_t s,R,S;
-	double z;
-
-	if (ix < 0x3ff40000)  /* |x| < 1.25 */
-		return erfc1(x);
-
-	x = fabs(x);
-	s = 1/(x*x);
-	if (ix < 0x4006db6d) {  /* |x| < 1/.35 ~ 2.85714 */
-		R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
-		     ra5+s*(ra6+s*ra7))))));
-		S = 1.0+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
-		     sa5+s*(sa6+s*(sa7+s*sa8)))))));
-	} else {                /* |x| > 1/.35 */
-		R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
-		     rb5+s*rb6)))));
-		S = 1.0+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
-		     sb5+s*(sb6+s*sb7))))));
-	}
-	z = x;
-	SET_LOW_WORD(z,0);
-	return exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S)/x;
+  return asuint64 (x) >> 32;
 }
 
 /**
- * Returns error function of 𝑥.
+ * Returns error function of x.
+ *
+ * Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0.
+ *
+ * @raise ERANGE on underflow
  */
-double erf(double x)
+double
+erf (double x)
 {
-	double r,s,z,y;
-	uint32_t ix;
-	int sign;
+  /* Get top word and sign.  */
+  uint32_t ix = top32 (x);
+  uint32_t ia = ix & 0x7fffffff;
+  uint32_t sign = ix >> 31;
 
-	GET_HIGH_WORD(ix, x);
-	sign = ix>>31;
-	ix &= 0x7fffffff;
-	if (ix >= 0x7ff00000) {
-		/* erf(nan)=nan, erf(+-inf)=+-1 */
-		return 1-2*sign + 1/x;
+  /* Normalized and subnormal cases */
+  if (ia < 0x3feb0000)
+    { /* a = |x| < 0.84375.  */
+
+      if (ia < 0x3e300000)
+	{ /* a < 2^(-28).  */
+	  if (ia < 0x00800000)
+	    { /* a < 2^(-1015).  */
+	      double y =  fma (TwoOverSqrtPiMinusOne, x, x);
+	      return check_uflow (y);
+	    }
+	  return x + TwoOverSqrtPiMinusOne * x;
 	}
-	if (ix < 0x3feb0000) {  /* |x| < 0.84375 */
-		if (ix < 0x3e300000) {  /* |x| < 2**-28 */
-			/* avoid underflow */
-			return 0.125*(8*x + efx8*x);
-		}
-		z = x*x;
-		r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
-		s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
-		y = r/s;
-		return x + x*y;
+
+      double x2 = x * x;
+
+      if (ia < 0x3fe00000)
+	{ /* a < 0.5  - Use polynomial approximation.  */
+	  double r1 = fma (x2, PA[1], PA[0]);
+	  double r2 = fma (x2, PA[3], PA[2]);
+	  double r3 = fma (x2, PA[5], PA[4]);
+	  double r4 = fma (x2, PA[7], PA[6]);
+	  double r5 = fma (x2, PA[9], PA[8]);
+	  double x4 = x2 * x2;
+	  double r = r5;
+	  r = fma (x4, r, r4);
+	  r = fma (x4, r, r3);
+	  r = fma (x4, r, r2);
+	  r = fma (x4, r, r1);
+	  return fma (r, x, x); /* This fma is crucial for accuracy.  */
 	}
-	if (ix < 0x40180000)  /* 0.84375 <= |x| < 6 */
-		y = 1 - erfc2(ix,x);
-	else
-		y = 1 - 0x1p-1022;
-	return sign ? -y : y;
+      else
+	{ /* 0.5 <= a < 0.84375 - Use rational approximation.  */
+	  double x4, x8, r1n, r2n, r1d, r2d, r3d;
+
+	  r1n = fma (x2, NA[1], NA[0]);
+	  x4 = x2 * x2;
+	  r2n = fma (x2, NA[3], NA[2]);
+	  x8 = x4 * x4;
+	  r1d = fma (x2, DA[0], 1.0);
+	  r2d = fma (x2, DA[2], DA[1]);
+	  r3d = fma (x2, DA[4], DA[3]);
+	  double P = r1n + x4 * r2n + x8 * NA[4];
+	  double Q = r1d + x4 * r2d + x8 * r3d;
+	  return fma (P / Q, x, x);
+	}
+    }
+  else if (ia < 0x3ff40000)
+    { /* 0.84375 <= |x| < 1.25.  */
+      double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d;
+      double a = fabs (x) - 1.0;
+      r1n = fma (a, NB[1], NB[0]);
+      a2 = a * a;
+      r1d = fma (a, DB[0], 1.0);
+      a4 = a2 * a2;
+      r2n = fma (a, NB[3], NB[2]);
+      a6 = a4 * a2;
+      r2d = fma (a, DB[2], DB[1]);
+      r3n = fma (a, NB[5], NB[4]);
+      r3d = fma (a, DB[4], DB[3]);
+      r4n = NB[6];
+      r4d = DB[5];
+      double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n;
+      double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d;
+      if (sign)
+	return -C - P / Q;
+      else
+	return C + P / Q;
+    }
+  else if (ia < 0x40000000)
+    { /* 1.25 <= |x| < 2.0.  */
+      double a = fabs (x);
+      a = a - 1.25;
+
+      double r1 = fma (a, PC[1], PC[0]);
+      double r2 = fma (a, PC[3], PC[2]);
+      double r3 = fma (a, PC[5], PC[4]);
+      double r4 = fma (a, PC[7], PC[6]);
+      double r5 = fma (a, PC[9], PC[8]);
+      double r6 = fma (a, PC[11], PC[10]);
+      double r7 = fma (a, PC[13], PC[12]);
+      double r8 = fma (a, PC[15], PC[14]);
+
+      double a2 = a * a;
+
+      double r = r8;
+      r = fma (a2, r, r7);
+      r = fma (a2, r, r6);
+      r = fma (a2, r, r5);
+      r = fma (a2, r, r4);
+      r = fma (a2, r, r3);
+      r = fma (a2, r, r2);
+      r = fma (a2, r, r1);
+
+      if (sign)
+	return -1.0 + r;
+      else
+	return 1.0 - r;
+    }
+  else if (ia < 0x400a0000)
+    { /* 2 <= |x| < 3.25.  */
+      double a = fabs (x);
+      a = fma (0.5, a, -1.0);
+
+      double r1 = fma (a, PD[1], PD[0]);
+      double r2 = fma (a, PD[3], PD[2]);
+      double r3 = fma (a, PD[5], PD[4]);
+      double r4 = fma (a, PD[7], PD[6]);
+      double r5 = fma (a, PD[9], PD[8]);
+      double r6 = fma (a, PD[11], PD[10]);
+      double r7 = fma (a, PD[13], PD[12]);
+      double r8 = fma (a, PD[15], PD[14]);
+      double r9 = fma (a, PD[17], PD[16]);
+
+      double a2 = a * a;
+
+      double r = r9;
+      r = fma (a2, r, r8);
+      r = fma (a2, r, r7);
+      r = fma (a2, r, r6);
+      r = fma (a2, r, r5);
+      r = fma (a2, r, r4);
+      r = fma (a2, r, r3);
+      r = fma (a2, r, r2);
+      r = fma (a2, r, r1);
+
+      if (sign)
+	return -1.0 + r;
+      else
+	return 1.0 - r;
+    }
+  else if (ia < 0x40100000)
+    { /* 3.25 <= |x| < 4.0.  */
+      double a = fabs (x);
+      a = a - 3.25;
+
+      double r1 = fma (a, PE[1], PE[0]);
+      double r2 = fma (a, PE[3], PE[2]);
+      double r3 = fma (a, PE[5], PE[4]);
+      double r4 = fma (a, PE[7], PE[6]);
+      double r5 = fma (a, PE[9], PE[8]);
+      double r6 = fma (a, PE[11], PE[10]);
+      double r7 = fma (a, PE[13], PE[12]);
+
+      double a2 = a * a;
+
+      double r = r7;
+      r = fma (a2, r, r6);
+      r = fma (a2, r, r5);
+      r = fma (a2, r, r4);
+      r = fma (a2, r, r3);
+      r = fma (a2, r, r2);
+      r = fma (a2, r, r1);
+
+      if (sign)
+	return -1.0 + r;
+      else
+	return 1.0 - r;
+    }
+  else if (ia < 0x4017a000)
+    { /* 4 <= |x| < 5.90625.  */
+      double a = fabs (x);
+      a = fma (0.5, a, -2.0);
+
+      double r1 = fma (a, PF[1], PF[0]);
+      double r2 = fma (a, PF[3], PF[2]);
+      double r3 = fma (a, PF[5], PF[4]);
+      double r4 = fma (a, PF[7], PF[6]);
+      double r5 = fma (a, PF[9], PF[8]);
+      double r6 = fma (a, PF[11], PF[10]);
+      double r7 = fma (a, PF[13], PF[12]);
+      double r8 = fma (a, PF[15], PF[14]);
+      double r9 = PF[16];
+
+      double a2 = a * a;
+
+      double r = r9;
+      r = fma (a2, r, r8);
+      r = fma (a2, r, r7);
+      r = fma (a2, r, r6);
+      r = fma (a2, r, r5);
+      r = fma (a2, r, r4);
+      r = fma (a2, r, r3);
+      r = fma (a2, r, r2);
+      r = fma (a2, r, r1);
+
+      if (sign)
+	return -1.0 + r;
+      else
+	return 1.0 - r;
+    }
+  else
+    {
+      /* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1.  */
+      if (unlikely (ia >= 0x7ff00000))
+	return (double) (1.0 - (sign << 1)) + 1.0 / x;
+
+      if (sign)
+	return -1.0;
+      else
+	return 1.0;
+    }
 }
 
-/**
- * Returns complementary error function of 𝑥.
- */
-double erfc(double x)
-{
-	double r,s,z,y;
-	uint32_t ix;
-	int sign;
-
-	GET_HIGH_WORD(ix, x);
-	sign = ix>>31;
-	ix &= 0x7fffffff;
-	if (ix >= 0x7ff00000) {
-		/* erfc(nan)=nan, erfc(+-inf)=0,2 */
-		return 2*sign + 1/x;
-	}
-	if (ix < 0x3feb0000) {  /* |x| < 0.84375 */
-		if (ix < 0x3c700000)  /* |x| < 2**-56 */
-			return 1.0 - x;
-		z = x*x;
-		r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
-		s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
-		y = r/s;
-		if (sign || ix < 0x3fd00000) {  /* x < 1/4 */
-			return 1.0 - (x+x*y);
-		}
-		return 0.5 - (x - 0.5 + x*y);
-	}
-	if (ix < 0x403c0000) {  /* 0.84375 <= |x| < 28 */
-		return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
-	}
-	return sign ? 2 - 0x1p-1022 : 0x1p-1022*0x1p-1022;
-}
-
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+#if LDBL_MANT_DIG == 53
 __weak_reference(erf, erfl);
-__weak_reference(erfc, erfcl);
 #endif
diff --git a/libc/tinymath/erf_data.c b/libc/tinymath/erf_data.c
new file mode 100644
index 000000000..bfe7e84db
--- /dev/null
+++ b/libc/tinymath/erf_data.c
@@ -0,0 +1,105 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╚──────────────────────────────────────────────────────────────────────────────╝
+│                                                                              │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
+│                                                                              │
+│  Permission is hereby granted, free of charge, to any person obtaining       │
+│  a copy of this software and associated documentation files (the             │
+│  "Software"), to deal in the Software without restriction, including         │
+│  without limitation the rights to use, copy, modify, merge, publish,         │
+│  distribute, sublicense, and/or sell copies of the Software, and to          │
+│  permit persons to whom the Software is furnished to do so, subject to       │
+│  the following conditions:                                                   │
+│                                                                              │
+│  The above copyright notice and this permission notice shall be              │
+│  included in all copies or substantial portions of the Software.             │
+│                                                                              │
+│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
+│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
+│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
+│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
+│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
+│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
+│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
+│                                                                              │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
+
+/*
+Minimax approximation of erf
+*/
+const struct erf_data __erf_data = {
+.erf_poly_A = {
+#if ERF_POLY_A_NCOEFFS == 10
+0x1.06eba8214db68p-3, -0x1.812746b037948p-2, 0x1.ce2f21a03872p-4,
+-0x1.b82ce30e6548p-6, 0x1.565bcc360a2f2p-8, -0x1.c02d812bc979ap-11,
+0x1.f99bddfc1ebe9p-14, -0x1.f42c457cee912p-17, 0x1.b0e414ec20ee9p-20,
+-0x1.18c47fd143c5ep-23
+#endif
+},
+/* Rational approximation on [0x1p-28, 0.84375] */
+.erf_ratio_N_A = {
+0x1.06eba8214db68p-3, -0x1.4cd7d691cb913p-2, -0x1.d2a51dbd7194fp-6,
+-0x1.7a291236668e4p-8, -0x1.8ead6120016acp-16
+},
+.erf_ratio_D_A = {
+0x1.97779cddadc09p-2, 0x1.0a54c5536cebap-4, 0x1.4d022c4d36b0fp-8,
+0x1.15dc9221c1a1p-13, -0x1.09c4342a2612p-18
+},
+/* Rational approximation on [0.84375, 1.25] */
+.erf_ratio_N_B = {
+-0x1.359b8bef77538p-9, 0x1.a8d00ad92b34dp-2, -0x1.7d240fbb8c3f1p-2,
+0x1.45fca805120e4p-2, -0x1.c63983d3e28ecp-4, 0x1.22a36599795ebp-5,
+-0x1.1bf380a96073fp-9
+},
+.erf_ratio_D_B = {
+0x1.b3e6618eee323p-4, 0x1.14af092eb6f33p-1, 0x1.2635cd99fe9a7p-4,
+0x1.02660e763351fp-3, 0x1.bedc26b51dd1cp-7, 0x1.88b545735151dp-7
+},
+.erfc_poly_C = {
+#if ERFC_POLY_C_NCOEFFS == 16
+/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=15 a=1.25 b=2 c=1 d=1.25 */
+0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb702p-3, 0x1.2ebf3dcca0446p-2,
+-0x1.571d01c62d66p-3, 0x1.93a9a8f5b3413p-8, 0x1.8281cbcc2cd52p-5,
+-0x1.5cffd86b4de16p-6, -0x1.db4ccf595053ep-9, 0x1.757cbf8684edap-8,
+-0x1.ce7dfd2a9e56ap-11, -0x1.99ee3bc5a3263p-11, 0x1.3c57cf9213f5fp-12,
+0x1.60692996bf254p-14, -0x1.6e44cb7c1fa2ap-14, 0x1.9d4484ac482b2p-16,
+-0x1.578c9e375d37p-19
+#endif
+},
+.erfc_poly_D = {
+#if ERFC_POLY_D_NCOEFFS == 18
+/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=17 a=2 b=3.25 c=2 d=2 */
+0x1.328f5ec350e5p-8, -0x1.529b9e8cf8e99p-5, 0x1.529b9e8cd9e71p-3,
+-0x1.8b0ae3a023bf2p-2, 0x1.1a2c592599d82p-1, -0x1.ace732477e494p-2,
+-0x1.e1a06a27920ffp-6, 0x1.bae92a6d27af6p-2, -0x1.a15470fcf5ce7p-2,
+0x1.bafe45d18e213p-6, 0x1.0d950680d199ap-2, -0x1.8c9481e8f22e3p-3,
+-0x1.158450ed5c899p-4, 0x1.c01f2973b44p-3, -0x1.73ed2827546a7p-3,
+0x1.47733687d1ff7p-4, -0x1.2dec70d00b8e1p-6, 0x1.a947ab83cd4fp-10
+#endif
+},
+.erfc_poly_E = {
+#if ERFC_POLY_E_NCOEFFS == 14
+/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=13 a=3.25 b=4 c=1 d=3.25 */
+0x1.20c13035539e4p-18, -0x1.e9b5e8d16df7ep-16, 0x1.8de3cd4733bf9p-14,
+-0x1.9aa48beb8382fp-13, 0x1.2c7d713370a9fp-12, -0x1.490b12110b9e2p-12,
+0x1.1459c5d989d23p-12, -0x1.64b28e9f1269p-13, 0x1.57c76d9d05cf8p-14,
+-0x1.bf271d9951cf8p-16, 0x1.db7ea4d4535c9p-19, 0x1.91c2e102d5e49p-20,
+-0x1.e9f0826c2149ep-21, 0x1.60eebaea236e1p-23
+#endif
+},
+.erfc_poly_F = {
+#if ERFC_POLY_F_NCOEFFS == 17
+/* Generated using Sollya::remez(f(c*x+d), deg, [(a-d)/c;(b-d)/c], 1, 1e-16), [|D ...|] with deg=16 a=4 b=5.90625 c=2 d=4 */
+0x1.08ddd130d1fa6p-26, -0x1.10b146f59ff06p-22, 0x1.10b135328b7b2p-19,
+-0x1.6039988e7575fp-17, 0x1.497d365e19367p-15, -0x1.da48d9afac83ep-14,
+0x1.1024c9b1fbb48p-12, -0x1.fc962e7066272p-12, 0x1.87297282d4651p-11,
+-0x1.f057b255f8c59p-11, 0x1.0228d0eee063p-10, -0x1.b1b21b84ec41cp-11,
+0x1.1ead8ae9e1253p-11, -0x1.1e708fba37fccp-12, 0x1.9559363991edap-14,
+-0x1.68c827b783d9cp-16, 0x1.2ec4adeccf4a2p-19
+#endif
+}
+};
diff --git a/libc/tinymath/erfc.c b/libc/tinymath/erfc.c
new file mode 100644
index 000000000..29a87f891
--- /dev/null
+++ b/libc/tinymath/erfc.c
@@ -0,0 +1,279 @@
+/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
+│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+╚──────────────────────────────────────────────────────────────────────────────╝
+│                                                                              │
+│ Copyright (c) 1992-2024 The FreeBSD Project                                  │
+│                                                                              │
+│ Redistribution and use in source and binary forms, with or without           │
+│ modification, are permitted provided that the following conditions           │
+│ are met:                                                                     │
+│ 1. Redistributions of source code must retain the above copyright            │
+│    notice, this list of conditions and the following disclaimer.             │
+│ 2. Redistributions in binary form must reproduce the above copyright         │
+│    notice, this list of conditions and the following disclaimer in the       │
+│    documentation and/or other materials provided with the distribution.      │
+│                                                                              │
+│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND       │
+│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        │
+│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   │
+│ ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE      │
+│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL   │
+│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS      │
+│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        │
+│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT   │
+│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    │
+│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF       │
+│ SUCH DAMAGE.                                                                 │
+│                                                                              │
+│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.            │
+│                                                                              │
+│ Developed at SunPro, a Sun Microsystems, Inc. business.                      │
+│ Permission to use, copy, modify, and distribute this                         │
+│ software is freely granted, provided that this notice                        │
+│ is preserved.                                                                │
+│                                                                              │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/tinymath/freebsd.internal.h"
+__static_yoink("freebsd_libm_notice");
+__static_yoink("fdlibm_notice");
+
+/* double erf(double x)
+ * double erfc(double x)
+ *			     x
+ *		      2      |\
+ *     erf(x)  =  ---------  | exp(-t*t)dt
+ *	 	   sqrt(pi) \|
+ *			     0
+ *
+ *     erfc(x) =  1-erf(x)
+ *  Note that
+ *		erf(-x) = -erf(x)
+ *		erfc(-x) = 2 - erfc(x)
+ *
+ * Method:
+ *	1. For |x| in [0, 0.84375]
+ *	    erf(x)  = x + x*R(x^2)
+ *          erfc(x) = 1 - erf(x)           if x in [-.84375,0.25]
+ *                  = 0.5 + ((0.5-x)-x*R)  if x in [0.25,0.84375]
+ *	   where R = P/Q where P is an odd poly of degree 8 and
+ *	   Q is an odd poly of degree 10.
+ *						 -57.90
+ *			| R - (erf(x)-x)/x | <= 2
+ *
+ *
+ *	   Remark. The formula is derived by noting
+ *          erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
+ *	   and that
+ *          2/sqrt(pi) = 1.128379167095512573896158903121545171688
+ *	   is close to one. The interval is chosen because the fix
+ *	   point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
+ *	   near 0.6174), and by some experiment, 0.84375 is chosen to
+ * 	   guarantee the error is less than one ulp for erf.
+ *
+ *      2. For |x| in [0.84375,1.25], let s = |x| - 1, and
+ *         c = 0.84506291151 rounded to single (24 bits)
+ *         	erf(x)  = sign(x) * (c  + P1(s)/Q1(s))
+ *         	erfc(x) = (1-c)  - P1(s)/Q1(s) if x > 0
+ *			  1+(c+P1(s)/Q1(s))    if x < 0
+ *         	|P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
+ *	   Remark: here we use the taylor series expansion at x=1.
+ *		erf(1+s) = erf(1) + s*Poly(s)
+ *			 = 0.845.. + P1(s)/Q1(s)
+ *	   That is, we use rational approximation to approximate
+ *			erf(1+s) - (c = (single)0.84506291151)
+ *	   Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
+ *	   where
+ *		P1(s) = degree 6 poly in s
+ *		Q1(s) = degree 6 poly in s
+ *
+ *      3. For x in [1.25,1/0.35(~2.857143)],
+ *         	erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
+ *         	erf(x)  = 1 - erfc(x)
+ *	   where
+ *		R1(z) = degree 7 poly in z, (z=1/x^2)
+ *		S1(z) = degree 8 poly in z
+ *
+ *      4. For x in [1/0.35,28]
+ *         	erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
+ *			= 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
+ *			= 2.0 - tiny		(if x <= -6)
+ *         	erf(x)  = sign(x)*(1.0 - erfc(x)) if x < 6, else
+ *         	erf(x)  = sign(x)*(1.0 - tiny)
+ *	   where
+ *		R2(z) = degree 6 poly in z, (z=1/x^2)
+ *		S2(z) = degree 7 poly in z
+ *
+ *      Note1:
+ *	   To compute exp(-x*x-0.5625+R/S), let s be a single
+ *	   precision number and s := x; then
+ *		-x*x = -s*s + (s-x)*(s+x)
+ *	        exp(-x*x-0.5626+R/S) =
+ *			exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
+ *      Note2:
+ *	   Here 4 and 5 make use of the asymptotic series
+ *			  exp(-x*x)
+ *		erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
+ *			  x*sqrt(pi)
+ *	   We use rational approximation to approximate
+ *      	g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
+ *	   Here is the error bound for R1/S1 and R2/S2
+ *      	|R1/S1 - f(x)|  < 2**(-62.57)
+ *      	|R2/S2 - f(x)|  < 2**(-61.52)
+ *
+ *      5. For inf > x >= 28
+ *         	erf(x)  = sign(x) *(1 - tiny)  (raise inexact)
+ *         	erfc(x) = tiny*tiny (raise underflow) if x > 0
+ *			= 2 - tiny if x<0
+ *
+ *      7. Special case:
+ *         	erf(0)  = 0, erf(inf)  = 1, erf(-inf) = -1,
+ *         	erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
+ *	   	erfc/erf(NaN) is NaN
+ */
+
+/* XXX Prevent compilers from erroneously constant folding: */
+static const volatile double tiny= 1e-300;
+
+static const double
+half= 0.5,
+one = 1,
+two = 2,
+/* c = (float)0.84506291151 */
+erx =  8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
+/*
+ * In the domain [0, 2**-28], only the first term in the power series
+ * expansion of erf(x) is used.  The magnitude of the first neglected
+ * terms is less than 2**-84.
+ */
+efx =  1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
+efx8=  1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
+/*
+ * Coefficients for approximation to erf on [0,0.84375]
+ */
+pp0  =  1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
+pp1  = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
+pp2  = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
+pp3  = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
+pp4  = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
+qq1  =  3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
+qq2  =  6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
+qq3  =  5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
+qq4  =  1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
+qq5  = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
+/*
+ * Coefficients for approximation to erf in [0.84375,1.25]
+ */
+pa0  = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
+pa1  =  4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
+pa2  = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
+pa3  =  3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
+pa4  = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
+pa5  =  3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
+pa6  = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
+qa1  =  1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
+qa2  =  5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
+qa3  =  7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
+qa4  =  1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
+qa5  =  1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
+qa6  =  1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
+/*
+ * Coefficients for approximation to erfc in [1.25,1/0.35]
+ */
+ra0  = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
+ra1  = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
+ra2  = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
+ra3  = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
+ra4  = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
+ra5  = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
+ra6  = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
+ra7  = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
+sa1  =  1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
+sa2  =  1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
+sa3  =  4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
+sa4  =  6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
+sa5  =  4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
+sa6  =  1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
+sa7  =  6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
+sa8  = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
+/*
+ * Coefficients for approximation to erfc in [1/.35,28]
+ */
+rb0  = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
+rb1  = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
+rb2  = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
+rb3  = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
+rb4  = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
+rb5  = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
+rb6  = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
+sb1  =  3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
+sb2  =  3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
+sb3  =  1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
+sb4  =  3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
+sb5  =  2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
+sb6  =  4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
+sb7  = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
+
+/**
+ * Returns complementary error function of x, i.e. 1.0 - erf(x).
+ */
+double
+erfc(double x)
+{
+	int32_t hx,ix;
+	double R,S,P,Q,s,y,z,r;
+	GET_HIGH_WORD(hx,x);
+	ix = hx&0x7fffffff;
+	if(ix>=0x7ff00000) {			/* erfc(nan)=nan */
+						/* erfc(+-inf)=0,2 */
+	    return (double)(((uint32_t)hx>>31)<<1)+one/x;
+	}
+
+	if(ix < 0x3feb0000) {		/* |x|<0.84375 */
+	    if(ix < 0x3c700000)  	/* |x|<2**-56 */
+		return one-x;
+	    z = x*x;
+	    r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
+	    s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
+	    y = r/s;
+	    if(hx < 0x3fd00000) {  	/* x<1/4 */
+		return one-(x+x*y);
+	    } else {
+		r = x*y;
+		r += (x-half);
+	        return half - r ;
+	    }
+	}
+	if(ix < 0x3ff40000) {		/* 0.84375 <= |x| < 1.25 */
+	    s = fabs(x)-one;
+	    P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
+	    Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
+	    if(hx>=0) {
+	        z  = one-erx; return z - P/Q;
+	    } else {
+		z = erx+P/Q; return one+z;
+	    }
+	}
+	if (ix < 0x403c0000) {		/* |x|<28 */
+	    x = fabs(x);
+ 	    s = one/(x*x);
+	    if(ix< 0x4006DB6D) {	/* |x| < 1/.35 ~ 2.857143*/
+		R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))));
+		S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+
+		    s*sa8)))))));
+	    } else {			/* |x| >= 1/.35 ~ 2.857143 */
+		if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */
+		R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))));
+		S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))));
+	    }
+	    z  = x;
+	    SET_LOW_WORD(z,0);
+	    r  =  exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S);
+	    if(hx>0) return r/x; else return two-r/x;
+	} else {
+	    if(hx>0) return tiny*tiny; else return two-tiny;
+	}
+}
+
+#if LDBL_MANT_DIG == 53
+__weak_reference(erfc, erfcl);
+#endif
diff --git a/libc/tinymath/erff.c b/libc/tinymath/erff.c
index cea72f1a4..ae29ebef1 100644
--- a/libc/tinymath/erff.c
+++ b/libc/tinymath/erff.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2020 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,195 +25,99 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/math.h"
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+#define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
+#define A __erff_data.erff_poly_A
+#define B __erff_data.erff_poly_B
 
-/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
-#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
-
-static const float
-erx  =  8.4506291151e-01, /* 0x3f58560b */
-/*
- * Coefficients for approximation to  erf on [0,0.84375]
- */
-efx8 =  1.0270333290e+00, /* 0x3f8375d4 */
-pp0  =  1.2837916613e-01, /* 0x3e0375d4 */
-pp1  = -3.2504209876e-01, /* 0xbea66beb */
-pp2  = -2.8481749818e-02, /* 0xbce9528f */
-pp3  = -5.7702702470e-03, /* 0xbbbd1489 */
-pp4  = -2.3763017452e-05, /* 0xb7c756b1 */
-qq1  =  3.9791721106e-01, /* 0x3ecbbbce */
-qq2  =  6.5022252500e-02, /* 0x3d852a63 */
-qq3  =  5.0813062117e-03, /* 0x3ba68116 */
-qq4  =  1.3249473704e-04, /* 0x390aee49 */
-qq5  = -3.9602282413e-06, /* 0xb684e21a */
-/*
- * Coefficients for approximation to  erf  in [0.84375,1.25]
- */
-pa0  = -2.3621185683e-03, /* 0xbb1acdc6 */
-pa1  =  4.1485610604e-01, /* 0x3ed46805 */
-pa2  = -3.7220788002e-01, /* 0xbebe9208 */
-pa3  =  3.1834661961e-01, /* 0x3ea2fe54 */
-pa4  = -1.1089469492e-01, /* 0xbde31cc2 */
-pa5  =  3.5478305072e-02, /* 0x3d1151b3 */
-pa6  = -2.1663755178e-03, /* 0xbb0df9c0 */
-qa1  =  1.0642088205e-01, /* 0x3dd9f331 */
-qa2  =  5.4039794207e-01, /* 0x3f0a5785 */
-qa3  =  7.1828655899e-02, /* 0x3d931ae7 */
-qa4  =  1.2617121637e-01, /* 0x3e013307 */
-qa5  =  1.3637083583e-02, /* 0x3c5f6e13 */
-qa6  =  1.1984500103e-02, /* 0x3c445aa3 */
-/*
- * Coefficients for approximation to  erfc in [1.25,1/0.35]
- */
-ra0  = -9.8649440333e-03, /* 0xbc21a093 */
-ra1  = -6.9385856390e-01, /* 0xbf31a0b7 */
-ra2  = -1.0558626175e+01, /* 0xc128f022 */
-ra3  = -6.2375331879e+01, /* 0xc2798057 */
-ra4  = -1.6239666748e+02, /* 0xc322658c */
-ra5  = -1.8460508728e+02, /* 0xc3389ae7 */
-ra6  = -8.1287437439e+01, /* 0xc2a2932b */
-ra7  = -9.8143291473e+00, /* 0xc11d077e */
-sa1  =  1.9651271820e+01, /* 0x419d35ce */
-sa2  =  1.3765776062e+02, /* 0x4309a863 */
-sa3  =  4.3456588745e+02, /* 0x43d9486f */
-sa4  =  6.4538726807e+02, /* 0x442158c9 */
-sa5  =  4.2900814819e+02, /* 0x43d6810b */
-sa6  =  1.0863500214e+02, /* 0x42d9451f */
-sa7  =  6.5702495575e+00, /* 0x40d23f7c */
-sa8  = -6.0424413532e-02, /* 0xbd777f97 */
-/*
- * Coefficients for approximation to  erfc in [1/.35,28]
- */
-rb0  = -9.8649431020e-03, /* 0xbc21a092 */
-rb1  = -7.9928326607e-01, /* 0xbf4c9dd4 */
-rb2  = -1.7757955551e+01, /* 0xc18e104b */
-rb3  = -1.6063638306e+02, /* 0xc320a2ea */
-rb4  = -6.3756646729e+02, /* 0xc41f6441 */
-rb5  = -1.0250950928e+03, /* 0xc480230b */
-rb6  = -4.8351919556e+02, /* 0xc3f1c275 */
-sb1  =  3.0338060379e+01, /* 0x41f2b459 */
-sb2  =  3.2579251099e+02, /* 0x43a2e571 */
-sb3  =  1.5367296143e+03, /* 0x44c01759 */
-sb4  =  3.1998581543e+03, /* 0x4547fdbb */
-sb5  =  2.5530502930e+03, /* 0x451f90ce */
-sb6  =  4.7452853394e+02, /* 0x43ed43a7 */
-sb7  = -2.2440952301e+01; /* 0xc1b38712 */
-
-static float erfc1(float x)
+/* Top 12 bits of a float.  */
+static inline uint32_t
+top12 (float x)
 {
-	float_t s,P,Q;
-
-	s = fabsf(x) - 1;
-	P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
-	Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
-	return 1 - erx - P/Q;
+  return asuint (x) >> 20;
 }
 
-static float erfc2(uint32_t ix, float x)
+/* Efficient implementation of erff
+   using either a pure polynomial approximation or
+   the exponential of a polynomial.
+   Worst-case error is 1.09ulps at 0x1.c111acp-1.  */
+float
+erff (float x)
 {
-	float_t s,R,S;
-	float z;
+  float r, x2, u;
 
-	if (ix < 0x3fa00000)  /* |x| < 1.25 */
-		return erfc1(x);
+  /* Get top word.  */
+  uint32_t ix = asuint (x);
+  uint32_t sign = ix >> 31;
+  uint32_t ia12 = top12 (x) & 0x7ff;
 
-	x = fabsf(x);
-	s = 1/(x*x);
-	if (ix < 0x4036db6d) {   /* |x| < 1/0.35 */
-		R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
-		     ra5+s*(ra6+s*ra7))))));
-		S = 1.0f+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
-		     sa5+s*(sa6+s*(sa7+s*sa8)))))));
-	} else {                 /* |x| >= 1/0.35 */
-		R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
-		     rb5+s*rb6)))));
-		S = 1.0f+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
-		     sb5+s*(sb6+s*sb7))))));
+  /* Limit of both intervals is 0.875 for performance reasons but coefficients
+     computed on [0.0, 0.921875] and [0.921875, 4.0], which brought accuracy
+     from 0.94 to 1.1ulps.  */
+  if (ia12 < 0x3f6)
+    { /* a = |x| < 0.875.  */
+
+      /* Tiny and subnormal cases.  */
+      if (unlikely (ia12 < 0x318))
+	{ /* |x| < 2^(-28).  */
+	  if (unlikely (ia12 < 0x040))
+	    { /* |x| < 2^(-119).  */
+	      float y = fmaf (TwoOverSqrtPiMinusOne, x, x);
+	      return check_uflowf (y);
+	    }
+	  return x + TwoOverSqrtPiMinusOne * x;
 	}
-	ix = asuint(x);
-	z = asfloat(ix&0xffffe000);
-	return expf(-z*z - 0.5625f) * expf((z-x)*(z+x) + R/S)/x;
-}
-
-float erff(float x)
-{
-	float r,s,z,y;
-	uint32_t ix;
-	int sign;
-
-	ix = asuint(x);
-	sign = ix>>31;
-	ix &= 0x7fffffff;
-	if (ix >= 0x7f800000) {
-		/* erf(nan)=nan, erf(+-inf)=+-1 */
-		return 1-2*sign + 1/x;
-	}
-	if (ix < 0x3f580000) {  /* |x| < 0.84375 */
-		if (ix < 0x31800000) {  /* |x| < 2**-28 */
-			/*avoid underflow */
-			return 0.125f*(8*x + efx8*x);
-		}
-		z = x*x;
-		r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
-		s = 1+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
-		y = r/s;
-		return x + x*y;
-	}
-	if (ix < 0x40c00000)  /* |x| < 6 */
-		y = 1 - erfc2(ix,x);
-	else
-		y = 1 - 0x1p-120f;
-	return sign ? -y : y;
-}
-
-float erfcf(float x)
-{
-	float r,s,z,y;
-	uint32_t ix;
-	int sign;
-
-	ix = asuint(x);
-	sign = ix>>31;
-	ix &= 0x7fffffff;
-	if (ix >= 0x7f800000) {
-		/* erfc(nan)=nan, erfc(+-inf)=0,2 */
-		return 2*sign + 1/x;
-	}
-
-	if (ix < 0x3f580000) {  /* |x| < 0.84375 */
-		if (ix < 0x23800000)  /* |x| < 2**-56 */
-			return 1.0f - x;
-		z = x*x;
-		r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
-		s = 1.0f+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
-		y = r/s;
-		if (sign || ix < 0x3e800000)  /* x < 1/4 */
-			return 1.0f - (x+x*y);
-		return 0.5f - (x - 0.5f + x*y);
-	}
-	if (ix < 0x41e00000) {  /* |x| < 28 */
-		return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
-	}
-	return sign ? 2 - 0x1p-120f : 0x1p-120f*0x1p-120f;
+
+      x2 = x * x;
+
+      /* Normalized cases (|x| < 0.921875). Use Horner scheme for x+x*P(x^2).  */
+      r = A[5];
+      r = fmaf (r, x2, A[4]);
+      r = fmaf (r, x2, A[3]);
+      r = fmaf (r, x2, A[2]);
+      r = fmaf (r, x2, A[1]);
+      r = fmaf (r, x2, A[0]);
+      r = fmaf (r, x, x);
+    }
+  else if (ia12 < 0x408)
+    { /* |x| < 4.0 - Use a custom Estrin scheme.  */
+
+      float a = fabsf (x);
+      /* Start with Estrin scheme on high order (small magnitude) coefficients.  */
+      r = fmaf (B[6], a, B[5]);
+      u = fmaf (B[4], a, B[3]);
+      x2 = x * x;
+      r = fmaf (r, x2, u);
+      /* Then switch to pure Horner scheme.  */
+      r = fmaf (r, a, B[2]);
+      r = fmaf (r, a, B[1]);
+      r = fmaf (r, a, B[0]);
+      r = fmaf (r, a, a);
+      /* Single precision exponential with ~0.5ulps,
+	 ensures erff has max. rel. error
+	 < 1ulp on [0.921875, 4.0],
+	 < 1.1ulps on [0.875, 4.0].  */
+      r = expf (-r);
+      /* Explicit copysign (calling copysignf increases latency).  */
+      if (sign)
+	r = -1.0f + r;
+      else
+	r = 1.0f - r;
+    }
+  else
+    { /* |x| >= 4.0.  */
+
+      /* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1.  */
+      if (unlikely (ia12 >= 0x7f8))
+	return (1.f - (float) ((ix >> 31) << 1)) + 1.f / x;
+
+      /* Explicit copysign (calling copysignf increases latency).  */
+      if (sign)
+	r = -1.0f;
+      else
+	r = 1.0f;
+    }
+  return r;
 }
diff --git a/libc/tinymath/log1pf_data.c b/libc/tinymath/erff_data.c
similarity index 79%
rename from libc/tinymath/log1pf_data.c
rename to libc/tinymath/erff_data.c
index eea85b405..6ae973f28 100644
--- a/libc/tinymath/log1pf_data.c
+++ b/libc/tinymath/erff_data.c
@@ -1,9 +1,9 @@
 /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
-│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8                               :vi │
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,17 +25,19 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/log1pf_data.internal.h"
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
-asm(".ident\t\"\\n\\n\
-Optimized Routines (MIT License)\\n\
-Copyright 2022 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+/* Minimax approximation of erff. */
+const struct erff_data __erff_data = {
+.erff_poly_A = {
+0x1.06eba6p-03f, -0x1.8126e0p-02f, 0x1.ce1a46p-04f,
+-0x1.b68bd2p-06f, 0x1.473f48p-08f, -0x1.3a1a82p-11f
+},
+.erff_poly_B = {
+0x1.079d0cp-3f, 0x1.450aa0p-1f, 0x1.b55cb0p-4f,
+-0x1.8d6300p-6f, 0x1.fd1336p-9f, -0x1.91d2ccp-12f,
+0x1.222900p-16f
+}
+};
 
-/* Polynomial coefficients generated using floating-point minimax
-   algorithm, see tools/log1pf.sollya for details.  */
-const struct log1pf_data __log1pf_data
-  = {.coeffs = {-0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
-		-0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
-		-0x1.6f0d5ep-5f}};
diff --git a/libc/tinymath/erfl.c b/libc/tinymath/erfl.c
index 6ac746b30..60bb0ef2a 100644
--- a/libc/tinymath/erfl.c
+++ b/libc/tinymath/erfl.c
@@ -28,15 +28,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
-
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("openbsd_libm_notice");
+__static_yoink("musl_libc_notice");
 
 /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_erfl.c */
 /*
diff --git a/libc/tinymath/estrin_wrap.internal.h b/libc/tinymath/estrin_wrap.internal.h
index c71015fa0..ee77c108e 100644
--- a/libc/tinymath/estrin_wrap.internal.h
+++ b/libc/tinymath/estrin_wrap.internal.h
@@ -8,7 +8,6 @@
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-// clang-format off
 #define  ESTRIN_1_(x,                  c, i) FMA(x,   c(1 + i),                        c(i))
 #define  ESTRIN_2_(x, x2,              c, i) FMA(x2,  c(2 + i),                        ESTRIN_1_(x,              c, i))
 #define  ESTRIN_3_(x, x2,              c, i) FMA(x2,  ESTRIN_1_(x,         c,  2 + i), ESTRIN_1_(x,              c, i))
diff --git a/libc/tinymath/exp.c b/libc/tinymath/exp.c
index e9ea6fddb..d0956ec2d 100644
--- a/libc/tinymath/exp.c
+++ b/libc/tinymath/exp.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,23 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/exp_data.internal.h"
-#include "libc/tinymath/internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/*
- * Double-precision e^x function.
- *
- * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 #define N (1 << EXP_TABLE_BITS)
 #define InvLn2N __exp_data.invln2N
@@ -53,6 +38,7 @@ asm(".include \"libc/disclaimer.inc\"");
 #define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
 #define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
 #define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
+#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
 
 /* Handle cases that may overflow or underflow when computing the result that
    is scale*(1+TMP) without intermediate rounding.  The bit representation of
@@ -61,114 +47,154 @@ asm(".include \"libc/disclaimer.inc\"");
    a double.  (int32_t)KI is the k used in the argument reduction and exponent
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
-static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
+static inline double
+specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
 {
-	double_t scale, y;
+  double_t scale, y;
 
-	if ((ki & 0x80000000) == 0) {
-		/* k > 0, the exponent of scale might have overflowed by <= 460.  */
-		sbits -= 1009ull << 52;
-		scale = asdouble(sbits);
-		y = 0x1p1009 * (scale + scale * tmp);
-		return eval_as_double(y);
-	}
-	/* k < 0, need special care in the subnormal range.  */
-	sbits += 1022ull << 52;
-	scale = asdouble(sbits);
-	y = scale + scale * tmp;
-	if (y < 1.0) {
-		/* Round y to the right precision before scaling it into the subnormal
-		 range to avoid double rounding that can cause 0.5+E/2 ulp error where
-		 E is the worst-case ulp error outside the subnormal range.  So this
-		 is only useful if the goal is better than 1 ulp worst-case error.  */
-		double_t hi, lo;
-		lo = scale - y + scale * tmp;
-		hi = 1.0 + y;
-		lo = 1.0 - hi + y + lo;
-		y = eval_as_double(hi + lo) - 1.0;
-		/* Avoid -0.0 with downward rounding.  */
-		if (WANT_ROUNDING && y == 0.0)
-			y = 0.0;
-		/* The underflow exception needs to be signaled explicitly.  */
-		fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
-	}
-	y = 0x1p-1022 * y;
-	return eval_as_double(y);
+  if ((ki & 0x80000000) == 0)
+    {
+      /* k > 0, the exponent of scale might have overflowed by <= 460.  */
+      sbits -= 1009ull << 52;
+      scale = asdouble (sbits);
+      y = 0x1p1009 * (scale + scale * tmp);
+      return check_oflow (eval_as_double (y));
+    }
+  /* k < 0, need special care in the subnormal range.  */
+  sbits += 1022ull << 52;
+  scale = asdouble (sbits);
+  y = scale + scale * tmp;
+  if (y < 1.0)
+    {
+      /* Round y to the right precision before scaling it into the subnormal
+	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
+	 E is the worst-case ulp error outside the subnormal range.  So this
+	 is only useful if the goal is better than 1 ulp worst-case error.  */
+      double_t hi, lo;
+      lo = scale - y + scale * tmp;
+      hi = 1.0 + y;
+      lo = 1.0 - hi + y + lo;
+      y = eval_as_double (hi + lo) - 1.0;
+      /* Avoid -0.0 with downward rounding.  */
+      if (WANT_ROUNDING && y == 0.0)
+	y = 0.0;
+      /* The underflow exception needs to be signaled explicitly.  */
+      force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+    }
+  y = 0x1p-1022 * y;
+  return check_uflow (eval_as_double (y));
 }
 
 /* Top 12 bits of a double (sign and exponent bits).  */
-static inline uint32_t top12(double x)
+static inline uint32_t
+top12 (double x)
 {
-	return asuint64(x) >> 52;
+  return asuint64 (x) >> 52;
+}
+
+/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+   If hastail is 0 then xtail is assumed to be 0 too.  */
+static inline double
+exp_inline (double x, double xtail, int hastail)
+{
+  uint32_t abstop;
+  uint64_t ki, idx, top, sbits;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t kd, z, r, r2, scale, tail, tmp;
+
+  abstop = top12 (x) & 0x7ff;
+  if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
+    {
+      if (abstop - top12 (0x1p-54) >= 0x80000000)
+	/* Avoid spurious underflow for tiny x.  */
+	/* Note: 0 is common input.  */
+	return WANT_ROUNDING ? 1.0 + x : 1.0;
+      if (abstop >= top12 (1024.0))
+	{
+	  if (asuint64 (x) == asuint64 (-INFINITY))
+	    return 0.0;
+	  if (abstop >= top12 (INFINITY))
+	    return 1.0 + x;
+	  if (asuint64 (x) >> 63)
+	    return __math_uflow (0);
+	  else
+	    return __math_oflow (0);
+	}
+      /* Large x is special cased below.  */
+      abstop = 0;
+    }
+
+  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
+  /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
+  z = InvLn2N * x;
+#if TOINT_INTRINSICS
+  kd = roundtoint (z);
+  ki = converttoint (z);
+#elif EXP_USE_TOINT_NARROW
+  /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
+  kd = eval_as_double (z + Shift);
+  ki = asuint64 (kd) >> 16;
+  kd = (double_t) (int32_t) ki;
+#else
+  /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
+  kd = eval_as_double (z + Shift);
+  ki = asuint64 (kd);
+  kd -= Shift;
+#endif
+  r = x + kd * NegLn2hiN + kd * NegLn2loN;
+  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
+  if (hastail)
+    r += xtail;
+  /* 2^(k/N) ~= scale * (1 + tail).  */
+  idx = 2 * (ki % N);
+  top = ki << (52 - EXP_TABLE_BITS);
+  tail = asdouble (T[idx]);
+  /* This is only a valid scale when -1023*N < k < 1024*N.  */
+  sbits = T[idx + 1] + top;
+  /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  r2 = r * r;
+  /* Without fma the worst case error is 0.25/N ulp larger.  */
+  /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
+#if EXP_POLY_ORDER == 4
+  tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
+#elif EXP_POLY_ORDER == 5
+  tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+#elif EXP_POLY_ORDER == 6
+  tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
+#endif
+  if (unlikely (abstop == 0))
+    return specialcase (tmp, sbits, ki);
+  scale = asdouble (sbits);
+  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+     is no spurious underflow here even without fma.  */
+  return eval_as_double (scale + scale * tmp);
 }
 
 /**
  * Returns 𝑒^x.
+ *
+ * @raise ERANGE on overflow or underflow
  */
-double exp(double x)
+double
+exp (double x)
 {
-	uint32_t abstop;
-	uint64_t ki, idx, top, sbits;
-	double_t kd, z, r, r2, scale, tail, tmp;
-
-	abstop = top12(x) & 0x7ff;
-	if (UNLIKELY(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
-		if (abstop - top12(0x1p-54) >= 0x80000000)
-			/* Avoid spurious underflow for tiny x.  */
-			/* Note: 0 is common input.  */
-			return WANT_ROUNDING ? 1.0 + x : 1.0;
-		if (abstop >= top12(1024.0)) {
-			if (asuint64(x) == asuint64(-INFINITY))
-				return 0.0;
-			if (abstop >= top12(INFINITY))
-				return 1.0 + x;
-			if (asuint64(x) >> 63)
-				return __math_uflow(0);
-			else
-				return __math_oflow(0);
-		}
-		/* Large x is special cased below.  */
-		abstop = 0;
-	}
-
-	/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
-	/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
-	z = InvLn2N * x;
-#if TOINT_INTRINSICS
-	kd = roundtoint(z);
-	ki = converttoint(z);
-#elif EXP_USE_TOINT_NARROW
-	/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
-	kd = eval_as_double(z + Shift);
-	ki = asuint64(kd) >> 16;
-	kd = (double_t)(int32_t)ki;
-#else
-	/* z - kd is in [-1, 1] in non-nearest rounding modes.  */
-	kd = eval_as_double(z + Shift);
-	ki = asuint64(kd);
-	kd -= Shift;
-#endif
-	r = x + kd * NegLn2hiN + kd * NegLn2loN;
-	/* 2^(k/N) ~= scale * (1 + tail).  */
-	idx = 2 * (ki % N);
-	top = ki << (52 - EXP_TABLE_BITS);
-	tail = asdouble(T[idx]);
-	/* This is only a valid scale when -1023*N < k < 1024*N.  */
-	sbits = T[idx + 1] + top;
-	/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
-	/* Evaluation is optimized assuming superscalar pipelined execution.  */
-	r2 = r * r;
-	/* Without fma the worst case error is 0.25/N ulp larger.  */
-	/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
-	tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
-	if (UNLIKELY(abstop == 0))
-		return specialcase(tmp, sbits, ki);
-	scale = asdouble(sbits);
-	/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
-	   is no spurious underflow here even without fma.  */
-	return eval_as_double(scale + scale * tmp);
+  return exp_inline (x, 0, 0);
 }
 
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-__weak_reference(exp, expl);
+/* May be useful for implementing pow where more than double
+   precision input is needed.  */
+double
+__exp_dd (double x, double xtail)
+{
+  return exp_inline (x, xtail, 1);
+}
+
+#if USE_GLIBC_ABI
+strong_alias (exp, __exp_finite)
+hidden_alias (exp, __ieee754_exp)
+hidden_alias (__exp_dd, __exp1)
+# if LDBL_MANT_DIG == 53
+long double expl (long double x) { return exp (x); }
+# endif
 #endif
diff --git a/libc/tinymath/exp10.c b/libc/tinymath/exp10.c
index 27baff1cd..f80d13963 100644
--- a/libc/tinymath/exp10.c
+++ b/libc/tinymath/exp10.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,38 +25,135 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/math.h"
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+#define N (1 << EXP_TABLE_BITS)
+#define IndexMask (N - 1)
+#define OFlowBound 0x1.34413509f79ffp8 /* log10(DBL_MAX).  */
+#define UFlowBound -0x1.5ep+8 /* -350.  */
+#define SmallTop 0x3c6 /* top12(0x1p-57).  */
+#define BigTop 0x407   /* top12(0x1p8).  */
+#define Thresh 0x41    /* BigTop - SmallTop.  */
+#define Shift __exp_data.shift
+#define C(i) __exp_data.exp10_poly[i]
+
+static double
+special_case (uint64_t sbits, double_t tmp, uint64_t ki)
+{
+  double_t scale, y;
+
+  if (ki - (1ull << 16) < 0x80000000)
+    {
+      /* The exponent of scale might have overflowed by 1.  */
+      sbits -= 1ull << 52;
+      scale = asdouble (sbits);
+      y = 2 * (scale + scale * tmp);
+      return check_oflow (eval_as_double (y));
+    }
+
+  /* n < 0, need special care in the subnormal range.  */
+  sbits += 1022ull << 52;
+  scale = asdouble (sbits);
+  y = scale + scale * tmp;
+
+  if (y < 1.0)
+    {
+      /* Round y to the right precision before scaling it into the subnormal
+	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
+	 E is the worst-case ulp error outside the subnormal range.  So this
+	 is only useful if the goal is better than 1 ulp worst-case error.  */
+      double_t lo = scale - y + scale * tmp;
+      double_t hi = 1.0 + y;
+      lo = 1.0 - hi + y + lo;
+      y = eval_as_double (hi + lo) - 1.0;
+      /* Avoid -0.0 with downward rounding.  */
+      if (WANT_ROUNDING && y == 0.0)
+	y = 0.0;
+      /* The underflow exception needs to be signaled explicitly.  */
+      force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+    }
+  y = 0x1p-1022 * y;
+
+  return check_uflow (y);
+}
 
 /**
  * Returns 10ˣ.
+ *
+ * The largest observed error is ~0.513 ULP.
  */
-double exp10(double x)
+double
+exp10 (double x)
 {
-	static const double p10[] = {
-		1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10,
-		1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
-		1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
-		1e10, 1e11, 1e12, 1e13, 1e14, 1e15
-	};
-	double n, y = modf(x, &n);
-	union {double f; uint64_t i;} u = {n};
-	/* fabs(n) < 16 without raising invalid on nan */
-	if ((u.i>>52 & 0x7ff) < 0x3ff+4) {
-		if (!y) return p10[(int)n+15];
-		y = exp2(3.32192809488736234787031942948939 * y);
-		return y * p10[(int)n+15];
-	}
-	return pow(10.0, x);
+  uint64_t ix = asuint64 (x);
+  uint32_t abstop = (ix >> 52) & 0x7ff;
+
+  if (unlikely (abstop - SmallTop >= Thresh))
+    {
+      if (abstop - SmallTop >= 0x80000000)
+	/* Avoid spurious underflow for tiny x.
+	   Note: 0 is common input.  */
+	return x + 1;
+      if (abstop == 0x7ff)
+	return ix == asuint64 (-INFINITY) ? 0.0 : x + 1.0;
+      if (x >= OFlowBound)
+	return __math_oflow (0);
+      if (x < UFlowBound)
+	return __math_uflow (0);
+
+      /* Large x is special-cased below.  */
+      abstop = 0;
+    }
+
+  /* Reduce x: z = x * N / log10(2), k = round(z).  */
+  double_t z = __exp_data.invlog10_2N * x;
+  double_t kd;
+  int64_t ki;
+#if TOINT_INTRINSICS
+  kd = roundtoint (z);
+  ki = converttoint (z);
+#else
+  kd = eval_as_double (z + Shift);
+  kd -= Shift;
+  ki = kd;
+#endif
+
+  /* r = x - k * log10(2), r in [-0.5, 0.5].  */
+  double_t r = x;
+  r = __exp_data.neglog10_2hiN * kd + r;
+  r = __exp_data.neglog10_2loN * kd + r;
+
+  /* exp10(x) = 2^(k/N) * 2^(r/N).
+     Approximate the two components separately.  */
+
+  /* s = 2^(k/N), using lookup table.  */
+  uint64_t e = ki << (52 - EXP_TABLE_BITS);
+  uint64_t i = (ki & IndexMask) * 2;
+  uint64_t u = __exp_data.tab[i + 1];
+  uint64_t sbits = u + e;
+
+  double_t tail = asdouble (__exp_data.tab[i]);
+
+  /* 2^(r/N) ~= 1 + r * Poly(r).  */
+  double_t r2 = r * r;
+  double_t p = C (0) + r * C (1);
+  double_t y = C (2) + r * C (3);
+  y = y + r2 * C (4);
+  y = p + r2 * y;
+  y = tail + y * r;
+
+  if (unlikely (abstop == 0))
+    return special_case (sbits, y, ki);
+
+  /* Assemble components:
+     y  = 2^(r/N) * 2^(k/N)
+       ~= (y + 1) * s.  */
+  double_t s = asdouble (sbits);
+  return eval_as_double (s * y + s);
 }
 
 __strong_reference(exp10, pow10);
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 __weak_reference(exp10, exp10l);
-__weak_reference(exp10, pow10l);
 #endif
diff --git a/libc/tinymath/exp10f.c b/libc/tinymath/exp10f.c
index 16dc2071a..9bbae5350 100644
--- a/libc/tinymath/exp10f.c
+++ b/libc/tinymath/exp10f.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns 10ˣ.
diff --git a/libc/tinymath/exp10l.c b/libc/tinymath/exp10l.c
index e108701d7..1111bde6e 100644
--- a/libc/tinymath/exp10l.c
+++ b/libc/tinymath/exp10l.c
@@ -28,12 +28,7 @@
 #include "libc/math.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns 10ˣ.
diff --git a/libc/tinymath/exp2.c b/libc/tinymath/exp2.c
index 1dc2c9a43..c62be798c 100644
--- a/libc/tinymath/exp2.c
+++ b/libc/tinymath/exp2.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,23 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/exp_data.internal.h"
-#include "libc/tinymath/internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-/*
- * Double-precision 2^x function.
- *
- * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 #define N (1 << EXP_TABLE_BITS)
 #define Shift __exp_data.exp2_shift
@@ -51,6 +36,7 @@ asm(".include \"libc/disclaimer.inc\"");
 #define C3 __exp_data.exp2_poly[2]
 #define C4 __exp_data.exp2_poly[3]
 #define C5 __exp_data.exp2_poly[4]
+#define C6 __exp_data.exp2_poly[5]
 
 /* Handle cases that may overflow or underflow when computing the result that
    is scale*(1+TMP) without intermediate rounding.  The bit representation of
@@ -59,103 +45,121 @@ asm(".include \"libc/disclaimer.inc\"");
    a double.  (int32_t)KI is the k used in the argument reduction and exponent
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
-static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
+static inline double
+specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
 {
-	double_t scale, y;
+  double_t scale, y;
 
-	if ((ki & 0x80000000) == 0) {
-		/* k > 0, the exponent of scale might have overflowed by 1.  */
-		sbits -= 1ull << 52;
-		scale = asdouble(sbits);
-		y = 2 * (scale + scale * tmp);
-		return eval_as_double(y);
-	}
-	/* k < 0, need special care in the subnormal range.  */
-	sbits += 1022ull << 52;
-	scale = asdouble(sbits);
-	y = scale + scale * tmp;
-	if (y < 1.0) {
-		/* Round y to the right precision before scaling it into the subnormal
-		   range to avoid double rounding that can cause 0.5+E/2 ulp error where
-		   E is the worst-case ulp error outside the subnormal range.  So this
-		   is only useful if the goal is better than 1 ulp worst-case error.  */
-		double_t hi, lo;
-		lo = scale - y + scale * tmp;
-		hi = 1.0 + y;
-		lo = 1.0 - hi + y + lo;
-		y = eval_as_double(hi + lo) - 1.0;
-		/* Avoid -0.0 with downward rounding.  */
-		if (WANT_ROUNDING && y == 0.0)
-			y = 0.0;
-		/* The underflow exception needs to be signaled explicitly.  */
-		fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
-	}
-	y = 0x1p-1022 * y;
-	return eval_as_double(y);
+  if ((ki & 0x80000000) == 0)
+    {
+      /* k > 0, the exponent of scale might have overflowed by 1.  */
+      sbits -= 1ull << 52;
+      scale = asdouble (sbits);
+      y = 2 * (scale + scale * tmp);
+      return check_oflow (eval_as_double (y));
+    }
+  /* k < 0, need special care in the subnormal range.  */
+  sbits += 1022ull << 52;
+  scale = asdouble (sbits);
+  y = scale + scale * tmp;
+  if (y < 1.0)
+    {
+      /* Round y to the right precision before scaling it into the subnormal
+	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
+	 E is the worst-case ulp error outside the subnormal range.  So this
+	 is only useful if the goal is better than 1 ulp worst-case error.  */
+      double_t hi, lo;
+      lo = scale - y + scale * tmp;
+      hi = 1.0 + y;
+      lo = 1.0 - hi + y + lo;
+      y = eval_as_double (hi + lo) - 1.0;
+      /* Avoid -0.0 with downward rounding.  */
+      if (WANT_ROUNDING && y == 0.0)
+	y = 0.0;
+      /* The underflow exception needs to be signaled explicitly.  */
+      force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+    }
+  y = 0x1p-1022 * y;
+  return check_uflow (eval_as_double (y));
 }
 
 /* Top 12 bits of a double (sign and exponent bits).  */
-static inline uint32_t top12(double x)
+static inline uint32_t
+top12 (double x)
 {
-	return asuint64(x) >> 52;
+  return asuint64 (x) >> 52;
 }
 
 /**
  * Returns 2^𝑥.
  */
-double exp2(double x)
+double
+exp2 (double x)
 {
-	uint32_t abstop;
-	uint64_t ki, idx, top, sbits;
-	double_t kd, r, r2, scale, tail, tmp;
+  uint32_t abstop;
+  uint64_t ki, idx, top, sbits;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t kd, r, r2, scale, tail, tmp;
 
-	abstop = top12(x) & 0x7ff;
-	if (UNLIKELY(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
-		if (abstop - top12(0x1p-54) >= 0x80000000)
-			/* Avoid spurious underflow for tiny x.  */
-			/* Note: 0 is common input.  */
-			return WANT_ROUNDING ? 1.0 + x : 1.0;
-		if (abstop >= top12(1024.0)) {
-			if (asuint64(x) == asuint64(-INFINITY))
-				return 0.0;
-			if (abstop >= top12(INFINITY))
-				return 1.0 + x;
-			if (!(asuint64(x) >> 63))
-				return __math_oflow(0);
-			else if (asuint64(x) >= asuint64(-1075.0))
-				return __math_uflow(0);
-		}
-		if (2 * asuint64(x) > 2 * asuint64(928.0))
-			/* Large x is special cased below.  */
-			abstop = 0;
+  abstop = top12 (x) & 0x7ff;
+  if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
+    {
+      if (abstop - top12 (0x1p-54) >= 0x80000000)
+	/* Avoid spurious underflow for tiny x.  */
+	/* Note: 0 is common input.  */
+	return WANT_ROUNDING ? 1.0 + x : 1.0;
+      if (abstop >= top12 (1024.0))
+	{
+	  if (asuint64 (x) == asuint64 (-INFINITY))
+	    return 0.0;
+	  if (abstop >= top12 (INFINITY))
+	    return 1.0 + x;
+	  if (!(asuint64 (x) >> 63))
+	    return __math_oflow (0);
+	  else if (asuint64 (x) >= asuint64 (-1075.0))
+	    return __math_uflow (0);
 	}
+      if (2 * asuint64 (x) > 2 * asuint64 (928.0))
+	/* Large x is special cased below.  */
+	abstop = 0;
+    }
 
-	/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)].  */
-	/* x = k/N + r, with int k and r in [-1/2N, 1/2N].  */
-	kd = eval_as_double(x + Shift);
-	ki = asuint64(kd); /* k.  */
-	kd -= Shift; /* k/N for int k.  */
-	r = x - kd;
-	/* 2^(k/N) ~= scale * (1 + tail).  */
-	idx = 2 * (ki % N);
-	top = ki << (52 - EXP_TABLE_BITS);
-	tail = asdouble(T[idx]);
-	/* This is only a valid scale when -1023*N < k < 1024*N.  */
-	sbits = T[idx + 1] + top;
-	/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1).  */
-	/* Evaluation is optimized assuming superscalar pipelined execution.  */
-	r2 = r * r;
-	/* Without fma the worst case error is 0.5/N ulp larger.  */
-	/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp.  */
-	tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
-	if (UNLIKELY(abstop == 0))
-		return specialcase(tmp, sbits, ki);
-	scale = asdouble(sbits);
-	/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
-	   is no spurious underflow here even without fma.  */
-	return eval_as_double(scale + scale * tmp);
+  /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)].  */
+  /* x = k/N + r, with int k and r in [-1/2N, 1/2N].  */
+  kd = eval_as_double (x + Shift);
+  ki = asuint64 (kd); /* k.  */
+  kd -= Shift; /* k/N for int k.  */
+  r = x - kd;
+  /* 2^(k/N) ~= scale * (1 + tail).  */
+  idx = 2 * (ki % N);
+  top = ki << (52 - EXP_TABLE_BITS);
+  tail = asdouble (T[idx]);
+  /* This is only a valid scale when -1023*N < k < 1024*N.  */
+  sbits = T[idx + 1] + top;
+  /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1).  */
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  r2 = r * r;
+  /* Without fma the worst case error is 0.5/N ulp larger.  */
+  /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp.  */
+#if EXP2_POLY_ORDER == 4
+  tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4);
+#elif EXP2_POLY_ORDER == 5
+  tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+#elif EXP2_POLY_ORDER == 6
+  tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
+#endif
+  if (unlikely (abstop == 0))
+    return specialcase (tmp, sbits, ki);
+  scale = asdouble (sbits);
+  /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
+     is no spurious underflow here even without fma.  */
+  return eval_as_double (scale + scale * tmp);
 }
 
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-__weak_reference(exp2, exp2l);
+#if USE_GLIBC_ABI
+strong_alias (exp2, __exp2_finite)
+hidden_alias (exp2, __ieee754_exp2)
+# if LDBL_MANT_DIG == 53
+long double exp2l (long double x) { return exp2 (x); }
+# endif
 #endif
diff --git a/libc/tinymath/exp2f.c b/libc/tinymath/exp2f.c
index 1687b8b30..5b134235d 100644
--- a/libc/tinymath/exp2f.c
+++ b/libc/tinymath/exp2f.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,23 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/exp2f_data.internal.h"
-#include "libc/tinymath/internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/*
- * Single-precision 2^x function.
- *
- * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 /*
 EXP2F_TABLE_BITS = 5
@@ -58,48 +43,66 @@ Non-nearest ULP error: 1 (rounded ULP error)
 #define C __exp2f_data.poly
 #define SHIFT __exp2f_data.shift_scaled
 
-static inline uint32_t top12(float x)
+static inline uint32_t
+top12 (float x)
 {
-	return asuint(x) >> 20;
+  return asuint (x) >> 20;
 }
 
 /**
  * Returns 2^𝑥.
+ *
+ * - ULP error: 0.502 (nearest rounding.)
+ * - Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
+ * - Wrong count: 168353 (all nearest rounding wrong results with fma.)
+ * - Non-nearest ULP error: 1 (rounded ULP error)
  */
-float exp2f(float x)
+float
+exp2f (float x)
 {
-	uint32_t abstop;
-	uint64_t ki, t;
-	double_t kd, xd, z, r, r2, y, s;
+  uint32_t abstop;
+  uint64_t ki, t;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t kd, xd, z, r, r2, y, s;
 
-	xd = (double_t)x;
-	abstop = top12(x) & 0x7ff;
-	if (UNLIKELY(abstop >= top12(128.0f))) {
-		/* |x| >= 128 or x is nan.  */
-		if (asuint(x) == asuint(-INFINITY))
-			return 0.0f;
-		if (abstop >= top12(INFINITY))
-			return x + x;
-		if (x > 0.0f)
-			return __math_oflowf(0);
-		if (x <= -150.0f)
-			return __math_uflowf(0);
-	}
+  xd = (double_t) x;
+  abstop = top12 (x) & 0x7ff;
+  if (unlikely (abstop >= top12 (128.0f)))
+    {
+      /* |x| >= 128 or x is nan.  */
+      if (asuint (x) == asuint (-INFINITY))
+	return 0.0f;
+      if (abstop >= top12 (INFINITY))
+	return x + x;
+      if (x > 0.0f)
+	return __math_oflowf (0);
+      if (x <= -150.0f)
+	return __math_uflowf (0);
+#if WANT_ERRNO_UFLOW
+      if (x < -149.0f)
+	return __math_may_uflowf (0);
+#endif
+    }
 
-	/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k.  */
-	kd = eval_as_double(xd + SHIFT);
-	ki = asuint64(kd);
-	kd -= SHIFT; /* k/N for int k.  */
-	r = xd - kd;
+  /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k.  */
+  kd = eval_as_double (xd + SHIFT);
+  ki = asuint64 (kd);
+  kd -= SHIFT; /* k/N for int k.  */
+  r = xd - kd;
 
-	/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
-	t = T[ki % N];
-	t += ki << (52 - EXP2F_TABLE_BITS);
-	s = asdouble(t);
-	z = C[0] * r + C[1];
-	r2 = r * r;
-	y = C[2] * r + 1;
-	y = z * r2 + y;
-	y = y * s;
-	return eval_as_float(y);
+  /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+  t = T[ki % N];
+  t += ki << (52 - EXP2F_TABLE_BITS);
+  s = asdouble (t);
+  z = C[0] * r + C[1];
+  r2 = r * r;
+  y = C[2] * r + 1;
+  y = z * r2 + y;
+  y = y * s;
+  return eval_as_float (y);
 }
+
+#if USE_GLIBC_ABI
+strong_alias (exp2f, __exp2f_finite)
+hidden_alias (exp2f, __ieee754_exp2f)
+#endif
diff --git a/libc/tinymath/exp2f_data.c b/libc/tinymath/exp2f_data.c
index 94442cc73..6e21620d3 100644
--- a/libc/tinymath/exp2f_data.c
+++ b/libc/tinymath/exp2f_data.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set et ft=c ts=8 sw=8 fenc=utf-8                                     :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,20 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/exp2f_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-/*
- * Shared data between expf, exp2f and powf.
- *
- * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 #define N (1 << EXP2F_TABLE_BITS)
 
@@ -47,6 +35,15 @@ const struct exp2f_data __exp2f_data = {
      used for computing 2^(k/N) for an int |k| < 150 N as
      double(tab[k%N] + (k << 52-BITS)) */
   .tab = {
+#if N == 8
+0x3ff0000000000000, 0x3fef72b83c7d517b, 0x3fef06fe0a31b715, 0x3feebfdad5362a27,
+0x3feea09e667f3bcd, 0x3feeace5422aa0db, 0x3feee89f995ad3ad, 0x3fef5818dcfba487,
+#elif N == 16
+0x3ff0000000000000, 0x3fefb5586cf9890f, 0x3fef72b83c7d517b, 0x3fef387a6e756238,
+0x3fef06fe0a31b715, 0x3feedea64c123422, 0x3feebfdad5362a27, 0x3feeab07dd485429,
+0x3feea09e667f3bcd, 0x3feea11473eb0187, 0x3feeace5422aa0db, 0x3feec49182a3f090,
+0x3feee89f995ad3ad, 0x3fef199bdd85529c, 0x3fef5818dcfba487, 0x3fefa4afa2a490da,
+#elif N == 32
 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
 0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
@@ -55,14 +52,48 @@ const struct exp2f_data __exp2f_data = {
 0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
 0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
+#elif N == 64
+0x3ff0000000000000, 0x3fefec9a3e778061, 0x3fefd9b0d3158574, 0x3fefc74518759bc8,
+0x3fefb5586cf9890f, 0x3fefa3ec32d3d1a2, 0x3fef9301d0125b51, 0x3fef829aaea92de0,
+0x3fef72b83c7d517b, 0x3fef635beb6fcb75, 0x3fef54873168b9aa, 0x3fef463b88628cd6,
+0x3fef387a6e756238, 0x3fef2b4565e27cdd, 0x3fef1e9df51fdee1, 0x3fef1285a6e4030b,
+0x3fef06fe0a31b715, 0x3feefc08b26416ff, 0x3feef1a7373aa9cb, 0x3feee7db34e59ff7,
+0x3feedea64c123422, 0x3feed60a21f72e2a, 0x3feece086061892d, 0x3feec6a2b5c13cd0,
+0x3feebfdad5362a27, 0x3feeb9b2769d2ca7, 0x3feeb42b569d4f82, 0x3feeaf4736b527da,
+0x3feeab07dd485429, 0x3feea76f15ad2148, 0x3feea47eb03a5585, 0x3feea23882552225,
+0x3feea09e667f3bcd, 0x3fee9fb23c651a2f, 0x3fee9f75e8ec5f74, 0x3fee9feb564267c9,
+0x3feea11473eb0187, 0x3feea2f336cf4e62, 0x3feea589994cce13, 0x3feea8d99b4492ed,
+0x3feeace5422aa0db, 0x3feeb1ae99157736, 0x3feeb737b0cdc5e5, 0x3feebd829fde4e50,
+0x3feec49182a3f090, 0x3feecc667b5de565, 0x3feed503b23e255d, 0x3feede6b5579fdbf,
+0x3feee89f995ad3ad, 0x3feef3a2b84f15fb, 0x3feeff76f2fb5e47, 0x3fef0c1e904bc1d2,
+0x3fef199bdd85529c, 0x3fef27f12e57d14b, 0x3fef3720dcef9069, 0x3fef472d4a07897c,
+0x3fef5818dcfba487, 0x3fef69e603db3285, 0x3fef7c97337b9b5f, 0x3fef902ee78b3ff6,
+0x3fefa4afa2a490da, 0x3fefba1bee615a27, 0x3fefd0765b6e4540, 0x3fefe7c1819e90d8,
+#endif
   },
   .shift_scaled = 0x1.8p+52 / N,
   .poly = {
+#if N == 8
+  0x1.c6a00335106e2p-5, 0x1.ec0c313449f55p-3, 0x1.62e431111f69fp-1,
+#elif N == 16
+  0x1.c6ac6aa313963p-5, 0x1.ebfff4532d9bap-3, 0x1.62e43001bc49fp-1,
+#elif N == 32
   0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
+#elif N == 64
+  0x1.c6b04b4221b2ap-5, 0x1.ebfc213e184d7p-3, 0x1.62e42fefb5b7fp-1,
+#endif
   },
   .shift = 0x1.8p+52,
   .invln2_scaled = 0x1.71547652b82fep+0 * N,
   .poly_scaled = {
+#if N == 8
+  0x1.c6a00335106e2p-5/N/N/N, 0x1.ec0c313449f55p-3/N/N, 0x1.62e431111f69fp-1/N,
+#elif N == 16
+  0x1.c6ac6aa313963p-5/N/N/N, 0x1.ebfff4532d9bap-3/N/N, 0x1.62e43001bc49fp-1/N,
+#elif N == 32
   0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
+#elif N == 64
+  0x1.c6b04b4221b2ap-5/N/N/N, 0x1.ebfc213e184d7p-3/N/N, 0x1.62e42fefb5b7fp-1/N,
+#endif
   },
 };
diff --git a/libc/tinymath/exp2f_data.internal.h b/libc/tinymath/exp2f_data.internal.h
deleted file mode 100644
index af157b4e8..000000000
--- a/libc/tinymath/exp2f_data.internal.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_
-
-#define EXP2F_TABLE_BITS 5
-#define EXP2F_POLY_ORDER 3
-
-COSMOPOLITAN_C_START_
-
-extern const struct exp2f_data {
-  uint64_t tab[1 << EXP2F_TABLE_BITS];
-  double shift_scaled;
-  double poly[EXP2F_POLY_ORDER];
-  double shift;
-  double invln2_scaled;
-  double poly_scaled[EXP2F_POLY_ORDER];
-} __exp2f_data;
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_EXP2F_DATA_INTERNAL_H_ */
diff --git a/libc/tinymath/exp2l.c b/libc/tinymath/exp2l.c
index 28553aed2..a4c8f9a7f 100644
--- a/libc/tinymath/exp2l.c
+++ b/libc/tinymath/exp2l.c
@@ -28,16 +28,11 @@
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
 #include "libc/tinymath/ldshape.internal.h"
-#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("freebsd_libm_notice");
+
+#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
 
 /* origin: FreeBSD /usr/src/lib/msun/ld80/s_exp2l.c and /usr/src/lib/msun/ld128/s_exp2l.c */
 /*-
diff --git a/libc/tinymath/exp_data.c b/libc/tinymath/exp_data.c
index 0d4b880a8..2bc52bc62 100644
--- a/libc/tinymath/exp_data.c
+++ b/libc/tinymath/exp_data.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,29 +25,31 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/exp_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-/*
- * Shared data between exp, exp2 and pow.
- *
- * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 #define N (1 << EXP_TABLE_BITS)
 
 const struct exp_data __exp_data = {
 // N/ln2
 .invln2N = 0x1.71547652b82fep0 * N,
+.invlog10_2N = 0x1.a934f0979a371p1 * N,
 // -ln2/N
+#if N == 64
+.negln2hiN = -0x1.62e42fefa0000p-7,
+.negln2loN = -0x1.cf79abc9e3b3ap-46,
+#elif N == 128
 .negln2hiN = -0x1.62e42fefa0000p-8,
 .negln2loN = -0x1.cf79abc9e3b3ap-47,
+#elif N == 256
+.negln2hiN = -0x1.62e42fefc0000p-9,
+.negln2loN = 0x1.c610ca86c3899p-45,
+#elif N == 512
+.negln2hiN = -0x1.62e42fef80000p-10,
+.negln2loN = -0x1.1cf79abc9e3b4p-45,
+#endif
+.neglog10_2hiN = -0x1.3441350ap-2 / N,
+.neglog10_2loN = 0x1.0c0219dc1da99p-39 / N,
 // Used for rounding when !TOINT_INTRINSICS
 #if EXP_USE_TOINT_NARROW
 .shift = 0x1800000000.8p0,
@@ -56,6 +58,24 @@ const struct exp_data __exp_data = {
 #endif
 // exp polynomial coefficients.
 .poly = {
+#if N == 64 && EXP_POLY_ORDER == 5 && !EXP_POLY_WIDE
+// abs error: 1.5543*2^-60
+// ulp error: 0.529 (0.533 without fma)
+// if |x| < ln2/128+eps
+// abs error if |x| < ln2/64: 1.7157*2^-50
+0x1.fffffffffdbcdp-2,
+0x1.555555555444cp-3,
+0x1.555573c6a9f7dp-5,
+0x1.1111266d28935p-7,
+#elif N == 64 && EXP_POLY_ORDER == 6 && EXP_POLY_WIDE
+// abs error: 1.6735*2^-64
+// ulp error: 0.518 (0.522 without fma)
+// if |x| < ln2/64
+0x1.5555555548f9ap-3,
+0x1.555555554bf5dp-5,
+0x1.11115b75f0f4dp-7,
+0x1.6c171a6b6303ep-10,
+#elif N == 128 && EXP_POLY_ORDER == 5 && !EXP_POLY_WIDE
 // abs error: 1.555*2^-66
 // ulp error: 0.509 (0.511 without fma)
 // if |x| < ln2/256+eps
@@ -65,10 +85,63 @@ const struct exp_data __exp_data = {
 0x1.555555555543cp-3,
 0x1.55555cf172b91p-5,
 0x1.1111167a4d017p-7,
+#elif N == 128 && EXP_POLY_ORDER == 5 && EXP_POLY_WIDE
+// abs error: 1.5542*2^-60
+// ulp error: 0.521 (0.523 without fma)
+// if |x| < ln2/128
+0x1.fffffffffdbcep-2,
+0x1.55555555543c2p-3,
+0x1.555573c64f2e3p-5,
+0x1.111126b4eff73p-7,
+#elif N == 128 && EXP_POLY_ORDER == 6 && EXP_POLY_WIDE
+// abs error: 1.6861*2^-71
+// ulp error: 0.509 (0.511 without fma)
+// if |x| < ln2/128
+0x1.55555555548fdp-3,
+0x1.555555555658fp-5,
+0x1.111123a859bb6p-7,
+0x1.6c16ba6920cabp-10,
+#elif N == 256 && EXP_POLY_ORDER == 4 && !EXP_POLY_WIDE
+// abs error: 1.43*2^-58
+// ulp error: 0.549 (0.550 without fma)
+// if |x| < ln2/512
+0x1p0, // unused
+0x1.fffffffffffd4p-2,
+0x1.5555571d6ef9p-3,
+0x1.5555576a5adcep-5,
+#elif N == 256 && EXP_POLY_ORDER == 5 && EXP_POLY_WIDE
+// abs error: 1.5547*2^-66
+// ulp error: 0.505 (0.506 without fma)
+// if |x| < ln2/256
+0x1.ffffffffffdbdp-2,
+0x1.555555555543cp-3,
+0x1.55555cf16e1edp-5,
+0x1.1111167a4b553p-7,
+#elif N == 512 && EXP_POLY_ORDER == 4 && !EXP_POLY_WIDE
+// abs error: 1.4300*2^-63
+// ulp error: 0.504
+// if |x| < ln2/1024
+// abs error if |x| < ln2/512: 1.0689*2^-55
+0x1p0, // unused
+0x1.ffffffffffffdp-2,
+0x1.555555c75bb6p-3,
+0x1.555555dec04a8p-5,
+#endif
 },
 .exp2_shift = 0x1.8p52 / N,
 // exp2 polynomial coefficients.
 .exp2_poly = {
+#if N == 64 && EXP2_POLY_ORDER == 6 && EXP2_POLY_WIDE
+// abs error: 1.3054*2^-63
+// ulp error: 0.515
+// if |x| < 1/64
+0x1.62e42fefa39efp-1,
+0x1.ebfbdff82c58fp-3,
+0x1.c6b08d7045cf1p-5,
+0x1.3b2ab6fb8fd0ep-7,
+0x1.5d884afec48d7p-10,
+0x1.43097dc684ae1p-13,
+#elif N == 128 && EXP2_POLY_ORDER == 5 && !EXP2_POLY_WIDE
 // abs error: 1.2195*2^-65
 // ulp error: 0.507 (0.511 without fma)
 // if |x| < 1/256
@@ -78,11 +151,114 @@ const struct exp_data __exp_data = {
 0x1.c6b08d70cf4b5p-5,
 0x1.3b2abd24650ccp-7,
 0x1.5d7e09b4e3a84p-10,
+#elif N == 256 && EXP2_POLY_ORDER == 5 && EXP2_POLY_WIDE
+// abs error: 1.2195*2^-65
+// ulp error: 0.504 (0.508 without fma)
+// if |x| < 1/256
+0x1.62e42fefa39efp-1,
+0x1.ebfbdff82c424p-3,
+0x1.c6b08d70cf4b5p-5,
+0x1.3b2abd24650ccp-7,
+0x1.5d7e09b4e3a84p-10,
+#elif N == 512 && EXP2_POLY_ORDER == 4 && !EXP2_POLY_WIDE
+// abs error: 1.4411*2^-64
+// ulp error: 0.5024 (0.5063 without fma)
+// if |x| < 1/1024
+// abs error if |x| < 1/512: 1.9430*2^-56
+0x1.62e42fefa39ecp-1,
+0x1.ebfbdff82c58bp-3,
+0x1.c6b08e46de41fp-5,
+0x1.3b2ab786ee1dap-7,
+#endif
+},
+.exp10_poly = {
+#if EXP10_POLY_WIDE
+/* Range is wider if using shift-based reduction: coeffs generated
+   using Remez in [-log10(2)/128, log10(2)/128 ].  */
+0x1.26bb1bbb55515p1,
+0x1.53524c73cd32bp1,
+0x1.0470591e1a108p1,
+0x1.2bd77b12fe9a8p0,
+0x1.14289fef24b78p-1
+#else
+/* Coeffs generated using Remez in [-log10(2)/256, log10(2)/256 ].  */
+0x1.26bb1bbb55516p1,
+0x1.53524c73ce9fep1,
+0x1.0470591ce4b26p1,
+0x1.2bd76577fe684p0,
+0x1.1446eeccd0efbp-1
+#endif
 },
 // 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
 // tab[2*k] = asuint64(T[k])
 // tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
 .tab = {
+#if N == 64
+0x0, 0x3ff0000000000000,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0xbc93cedd78565858, 0x3feea23882552225,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+#elif N == 128
 0x0, 0x3ff0000000000000,
 0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
 0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
@@ -211,5 +387,776 @@ const struct exp_data __exp_data = {
 0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
 0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
 0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
+#elif N == 256
+0x0, 0x3ff0000000000000,
+0xbc84e82fc61851ac, 0x3feffb1afa5abcbf,
+0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
+0xbc82985dd8521d32, 0x3feff168143b0281,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0x3c651e617061bfbd, 0x3fefe7d42e11bbcc,
+0xbc905e7a108766d1, 0x3fefe315e86e7f85,
+0x3c845fad437fa426, 0x3fefde5f72f654b1,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0xbc954529642b232f, 0x3fefd50a0e3c1f89,
+0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
+0x3c8293708ef5c32e, 0x3fefcbd42b72a836,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0xbc95b9280905b2a4, 0x3fefc2bdf66607e0,
+0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
+0x3c84f31f32c4b7e7, 0x3fefb9c79b1f3919,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0x3c9407fb30d06420, 0x3fefb0f145e46c85,
+0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
+0xbc9a5d04b3b9911b, 0x3fefa83b23395dec,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0xbc937a01f0739546, 0x3fef9fa55fdfa9c5,
+0xbc6a033489906e0b, 0x3fef9b66affed31b,
+0x3c8b8268b04ef0a5, 0x3fef973028d7233e,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc9ac46e44a2ebcc, 0x3fef8edbab5e2ab6,
+0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
+0xbc65704e90c9f860, 0x3fef86a814f204ab,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0xbc897cea57e46280, 0x3fef7e95934f312e,
+0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
+0x3c56f01429e2b9d2, 0x3fef76a45471c3c2,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0x3c6e653b2459034b, 0x3fef6ed48695bbc0,
+0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
+0x3c92cc7ea345b7dc, 0x3fef672658375d2f,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0x3c957bfb2876ea9e, 0x3fef5f99f8138a1c,
+0xbc96d99c7611eb26, 0x3fef5be084045cd4,
+0x3c8cdc1873af2155, 0x3fef582f95281c6b,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0xbc9493684653a131, 0x3fef50e75eb44027,
+0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
+0xbc98e2899077520a, 0x3fef49c18438ce4d,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0x3c9120fcd4f59273, 0x3fef42be3578a819,
+0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
+0x3c89b788c188c9b8, 0x3fef3bdda27912d1,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0x3c877afbca90ef84, 0x3fef351ffb82140a,
+0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
+0x3c91512f082876ee, 0x3fef2e85711ece75,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0x3c9a02f0c7d75ec6, 0x3fef280e341ddf29,
+0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
+0xbc803297e78260bf, 0x3fef21ba7591bb70,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0xbc95b77e5ccd9fbf, 0x3fef1b8a66d10f13,
+0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
+0xbc91e75c40b4251e, 0x3fef157e39771b2f,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c98a911f1f7785a, 0x3fef0f961f641589,
+0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
+0xbc61e7c998db7dbb, 0x3fef09d24abd886b,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c85425c11faadf4, 0x3fef0432edeeb2fd,
+0x3c864201e2ac744c, 0x3fef0170fc4cd831,
+0xbc979517a03e2847, 0x3feefeb83ba8ea32,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc800e2a46da4bee, 0x3feef96266e3fa2d,
+0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
+0xbc87430803972b34, 0x3feef431a2de883b,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc954de30ae02d94, 0x3feeef26231e754a,
+0xbc9907f81b512d8e, 0x3feeecae6d05d866,
+0xbc94f2487e1c03ec, 0x3feeea401b7140ef,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0x3c914a5432fcb2f4, 0x3feee57fbfec6cf4,
+0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
+0x3c79c3bba5562a2f, 0x3feee0e544ede173,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc85a71612e21658, 0x3feedc70df1c5175,
+0xbc9312607a28698a, 0x3feeda4504ac801c,
+0x3c86421f6f1d24d6, 0x3feed822c367a024,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0xbc9348a6815fce65, 0x3feed3fb2709468a,
+0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
+0x3c835c43984d9871, 0x3feecffa3f84b9d4,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0xbc632afc8d9473a0, 0x3feecc2042a7d232,
+0x3c9666093b0664ef, 0x3feeca41ed1d0057,
+0xbc95fc5e44de020e, 0x3feec86d668b3237,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0xbc7ea0148327c42f, 0x3feec4e1e192aed2,
+0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
+0xbc7a843ad1a88022, 0x3feec17dea6db7d7,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0x3c892ca3bf144e63, 0x3feebe41b817c114,
+0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
+0xbc902c99b04aa8b0, 0x3feebb2d81d8abff,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0x3c73e34f67e67118, 0x3feeb8417f4531ee,
+0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
+0xbc75a3b1197ba0f0, 0x3feeb57de83f4eef,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0x3c81bd2888075068, 0x3feeb2e2f4f6ad27,
+0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
+0xbc896be8ae89ef8f, 0x3feeb070dde910d2,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0xbc88e6ac90348602, 0x3feeae27dbe2c4cf,
+0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
+0xbc91af7f1365c3ac, 0x3feeac0827ff07cc,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0xbc943a3540d1898a, 0x3feeaa11fba87a03,
+0x3c34c7855019c6ea, 0x3feea9268a5946b7,
+0xbc951f58ddaa8090, 0x3feea84590998b93,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0xbc82e1648e50a17c, 0x3feea6a320dceb71,
+0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
+0x3c95f30eda98a575, 0x3feea52ae6cdf6f4,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0x3c917ecda8a72159, 0x3feea3dd1d1929fd,
+0xbc845378892be9ae, 0x3feea34634ccc320,
+0xbc9345f3cee1ae6e, 0x3feea2b9febc8fb7,
+0xbc93cedd78565858, 0x3feea23882552225,
+0xbc85c33fdf910406, 0x3feea1c1c70833f6,
+0x3c5710aa807e1964, 0x3feea155d44ca973,
+0x3c81079ab5789604, 0x3feea0f4b19e9538,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0x3c727df161cd7778, 0x3feea052fa75173e,
+0xbc6a12ad8734b982, 0x3feea012750bdabf,
+0x3c93f9924a05b767, 0x3fee9fdcddd47645,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc87557939a8b5ef, 0x3fee9f9298593ae5,
+0xbc80dc3d54e08851, 0x3fee9f7df9519484,
+0x3c51ed2f56fa9d1a, 0x3fee9f7466f42e87,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc88e67a9006c909, 0x3fee9f8286ead08a,
+0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
+0x3c86597566977ac8, 0x3fee9fbd35d7cbfd,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0x3c92c0b7028a5c3a, 0x3feea024b1ab6e09,
+0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
+0x3c8a30faf49cc78c, 0x3feea0b938ac1cf6,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0xbc92dad3519d7b5b, 0x3feea17b0976cfdb,
+0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
+0x3c87d51410fd15c2, 0x3feea26a62ff86f0,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0xbc760a3629969871, 0x3feea3878491c491,
+0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
+0x3c8b18c6e3fdef5d, 0x3feea4d2add106d9,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0x3c90ec1ddcb1390a, 0x3feea64c1eb941f7,
+0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
+0xbc522cea4f3afa1e, 0x3feea7f4179f5b21,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c7c88549b958471, 0x3feea9cad931a436,
+0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
+0x3c931143962f7877, 0x3feeabd0a478580f,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0x3c93e9e96f112479, 0x3feeae05bad61778,
+0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
+0xbc8dac42a4a38df0, 0x3feeb06a5e0866d9,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0x3c8b99dd98b1ed84, 0x3feeb2fed0282c8a,
+0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
+0xbc7885ad50cbb750, 0x3feeb5c353aa2fe2,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0xbc82d5e85f3e0301, 0x3feeb8b82b5f98e5,
+0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
+0xbc51669428996971, 0x3feebbdd9a7670b3,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0x3c71f2b2c1c4c014, 0x3feebf33e47a22a2,
+0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
+0xbc9294f304f166b6, 0x3feec2bb4d53fe0d,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0xbc8a1e58414c07d3, 0x3feec674194bb8d5,
+0x3c6dd235e10a73bb, 0x3feec86319e32323,
+0xbc79740b58a20091, 0x3feeca5e8d07f29e,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0x3c9165830a2b96c2, 0x3feece7aed8eb8bb,
+0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
+0xbc903d5cbe27874b, 0x3feed2c980460ad8,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c5986178980fce0, 0x3feed74a8af46052,
+0x3c90cc319cee31d2, 0x3feed99e1330b358,
+0xbc89472975b1f2a5, 0x3feedbfe53c12e59,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0x3c7d8157a34b7e7f, 0x3feee0e521356eba,
+0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
+0x3c8c8a4e231ebb7d, 0x3feee5ff3a3c2774,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc888c8d11a142e5, 0x3feeeb4ce622f2ff,
+0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
+0x3c889c2ea41433c7, 0x3feef0ce6c9a8952,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc7274aedac8ff80, 0x3feef68415b749b1,
+0xbc90a40e3da6f640, 0x3feef9728de5593a,
+0x3c85c620ce76df06, 0x3feefc6e29f1c52a,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0xbc8fda52e1b51e41, 0x3fef028cf22749e4,
+0xbc91eee26b588a35, 0x3fef05b030a1064a,
+0xbc32141a7b3e2cd8, 0x3fef08e0b79a6f1f,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0xbc302899507554e5, 0x3fef0f69c3f3a207,
+0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
+0xbc80dda2d4c0010c, 0x3fef16286141b33d,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0xbc8a007daadf8d68, 0x3fef1d1cd9fa652c,
+0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
+0x3c836909391181d3, 0x3fef244778fafb22,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0xbc811cd7dbdf9547, 0x3fef2ba88988c933,
+0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
+0xbc7ac28b7bef6621, 0x3fef33405751c4db,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc7030587207b9e1, 0x3fef3b0f2e6d1675,
+0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
+0xbc8cc734592af7fc, 0x3fef43155b5bab74,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0x3c87752a44f587e8, 0x3fef4b532b08c968,
+0xbc900dae3875a949, 0x3fef4f87080d89f2,
+0x3c85b66fefeef52e, 0x3fef53c8eacaa1d6,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0x3c5159d9d908a96e, 0x3fef5c76e862e6d3,
+0xbc82919e2040220f, 0x3fef60e316c98398,
+0x3c8c254d16117a68, 0x3fef655d71ff6075,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0xbc8d8c329fbd0e03, 0x3fef6e7cd63a8315,
+0x3c843a59ac016b4b, 0x3fef7321f301b460,
+0xbc8ea6e6fbd5f2a6, 0x3fef77d5641c0658,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0xbc63e8e3eab2cbb4, 0x3fef81676b197d17,
+0xbc892ab93b470dc9, 0x3fef864614f5a129,
+0xbc8b7966cd0d2cd9, 0x3fef8b333b16ee12,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0xbc776caa4c2ff1cf, 0x3fef953924676d76,
+0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
+0xbc81d5fc525d9940, 0x3fef9f7977cdb740,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0x3c855cd8aaea3d21, 0x3fefa9f4867cca6e,
+0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
+0x3c8269947c2bed4a, 0x3fefb4aaa2188510,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0xbc83b6137e9afe9e, 0x3fefbf9c1cb6412a,
+0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
+0xbc69fa74878ba7c7, 0x3fefcac948dd7274,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0x3c901f3a75ee0efe, 0x3fefd632798844f8,
+0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
+0xbc516a9ce6ed84fa, 0x3fefe1d802243c89,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+0xbc699c7db2effc76, 0x3fefedba3692d514,
+0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
+0x3c64b458677f9840, 0x3feff9d96b2a23d9,
+#elif N == 512
+0x0, 0x3ff0000000000000,
+0xbc75d87ade1f60d5, 0x3feffd8c86da1c0a,
+0xbc84e82fc61851ac, 0x3feffb1afa5abcbf,
+0x3c9bffdaa7ac4bac, 0x3feff8ab5b2cbd11,
+0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
+0x3c75c18e5ae0563a, 0x3feff3d1e77170b4,
+0xbc82985dd8521d32, 0x3feff168143b0281,
+0xbc705b1125cf49a5, 0x3fefef003103b10e,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0x3c9f879abbff3f87, 0x3fefea363d42b027,
+0x3c651e617061bfbd, 0x3fefe7d42e11bbcc,
+0x3c9b14003824712a, 0x3fefe57411915a8a,
+0xbc905e7a108766d1, 0x3fefe315e86e7f85,
+0x3c61cbf0f38af658, 0x3fefe0b9b35659d8,
+0x3c845fad437fa426, 0x3fefde5f72f654b1,
+0xbc9a3316383dcbc5, 0x3fefdc0727fc1762,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0x3c9901c9e0e797fd, 0x3fefd75c74f0bec2,
+0xbc954529642b232f, 0x3fefd50a0e3c1f89,
+0xbc89b3236d111646, 0x3fefd2b99fa6407c,
+0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
+0xbc8cb191be99b1b0, 0x3fefce1ead925493,
+0x3c8293708ef5c32e, 0x3fefcbd42b72a836,
+0xbc9acb71e83765b7, 0x3fefc98ba42e7d30,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0x3c5cd3e58b03697e, 0x3fefc50088f8093f,
+0xbc95b9280905b2a4, 0x3fefc2bdf66607e0,
+0xbc8bfb07d4755452, 0x3fefc07d61701716,
+0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
+0x3c8aedeb3e7b14cd, 0x3fefbc02331b9715,
+0x3c84f31f32c4b7e7, 0x3fefb9c79b1f3919,
+0x3c9a8eb1f3d914b4, 0x3fefb78f03834e52,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0xbc85b9eb0402507b, 0x3fefb323d833d93f,
+0x3c9407fb30d06420, 0x3fefb0f145e46c85,
+0xbc93f0f225bbf3ee, 0x3fefaec0b6bdae53,
+0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
+0xbc9c3fe7282d1784, 0x3fefaa65a4b520ba,
+0xbc9a5d04b3b9911b, 0x3fefa83b23395dec,
+0x3c9c8be44bf4cde8, 0x3fefa612a7b26300,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0x3c820c5444c93c44, 0x3fefa1c7c55189c6,
+0xbc937a01f0739546, 0x3fef9fa55fdfa9c5,
+0xbc84c6baeb580d7a, 0x3fef9d8503328e6d,
+0xbc6a033489906e0b, 0x3fef9b66affed31b,
+0x3c8657aa1b0d9f83, 0x3fef994a66f951ce,
+0x3c8b8268b04ef0a5, 0x3fef973028d7233e,
+0x3c62f2c7fd6ee145, 0x3fef9517f64d9ef1,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc6b0b2789925e90, 0x3fef90edb6db2dc1,
+0xbc9ac46e44a2ebcc, 0x3fef8edbab5e2ab6,
+0xbc93aad17d197fae, 0x3fef8ccbae51a5c8,
+0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
+0xbc989c464a07ad70, 0x3fef88b1e264a0e9,
+0xbc65704e90c9f860, 0x3fef86a814f204ab,
+0xbc72c338fce197f4, 0x3fef84a058cbae1e,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0xbc6dca724cea0eb6, 0x3fef809717425438,
+0xbc897cea57e46280, 0x3fef7e95934f312e,
+0x3c464770b955d34d, 0x3fef7c962388149e,
+0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
+0xbc962811c114424f, 0x3fef789d83606e12,
+0x3c56f01429e2b9d2, 0x3fef76a45471c3c2,
+0x3c8ec58e74904dd4, 0x3fef74ad3c92df73,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0x3c8d63b0ab2d5bbf, 0x3fef70c554eaea89,
+0x3c6e653b2459034b, 0x3fef6ed48695bbc0,
+0xbc9ca9effbeeac92, 0x3fef6ce5d23816c9,
+0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
+0x3c8bda920de0f6e2, 0x3fef690eba4df41f,
+0x3c92cc7ea345b7dc, 0x3fef672658375d2f,
+0xbc9a597f9a5ff71c, 0x3fef654013041dc2,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0x3c50835b125aa573, 0x3fef6179e2363cf8,
+0x3c957bfb2876ea9e, 0x3fef5f99f8138a1c,
+0x3c8aaa13d61aec1f, 0x3fef5dbc2dc40bf0,
+0xbc96d99c7611eb26, 0x3fef5be084045cd4,
+0x3c8a4f81aa7110bd, 0x3fef5a06fb91588f,
+0x3c8cdc1873af2155, 0x3fef582f95281c6b,
+0xbc6817fd6a313e3e, 0x3fef565a51860746,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0xbc96236af85fd26a, 0x3fef52b6358e15e8,
+0xbc9493684653a131, 0x3fef50e75eb44027,
+0x3c7795eb4523abe7, 0x3fef4f1aad999e82,
+0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
+0x3c8fe58b91b40095, 0x3fef4b87bf9cda38,
+0xbc98e2899077520a, 0x3fef49c18438ce4d,
+0x3c91ecaa860c614a, 0x3fef47fd7190241e,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0xbc3e45c83ba0bbcb, 0x3fef447bc96ffc18,
+0x3c9120fcd4f59273, 0x3fef42be3578a819,
+0xbc29fd3bea07b4ee, 0x3fef4102cd3d09b9,
+0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
+0x3c87f1c7350e256d, 0x3fef3d9282fc1f27,
+0x3c89b788c188c9b8, 0x3fef3bdda27912d1,
+0x3c420dac6c124f4f, 0x3fef3a2af0b63bff,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0xbc99501d09bc09fd, 0x3fef36cc1c78903a,
+0x3c877afbca90ef84, 0x3fef351ffb82140a,
+0x3c73baf864dc8675, 0x3fef33760c547f15,
+0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
+0x3c91b0575c1eaf54, 0x3fef3028c65fa1ff,
+0x3c91512f082876ee, 0x3fef2e85711ece75,
+0xbc90364bc9ce33ab, 0x3fef2ce450b3cb82,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0xbc7548165d85ed32, 0x3fef29a8b16f0a30,
+0x3c9a02f0c7d75ec6, 0x3fef280e341ddf29,
+0x3c7c3b977a68e32c, 0x3fef2675eeb3ab98,
+0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
+0xbc93a255f697ecfe, 0x3fef234c0ea83f36,
+0xbc803297e78260bf, 0x3fef21ba7591bb70,
+0x3c8d2d19edc1e550, 0x3fef202b17779965,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0xbc76b2173113dd8c, 0x3fef1d130f50d65c,
+0xbc95b77e5ccd9fbf, 0x3fef1b8a66d10f13,
+0x3c811aa5f853590b, 0x3fef1a03fc675d1f,
+0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
+0x3c61d61a34c8aa02, 0x3fef16fde4f2e280,
+0xbc91e75c40b4251e, 0x3fef157e39771b2f,
+0xbc91f892bf6b286d, 0x3fef1400cf2f6c18,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c7590c65c20e680, 0x3fef110cc15d5346,
+0x3c98a911f1f7785a, 0x3fef0f961f641589,
+0x3c86fe320b5c1e9d, 0x3fef0e21c1c14833,
+0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
+0xbc903cd8b2f25790, 0x3fef0b3fd6a454d2,
+0xbc61e7c998db7dbb, 0x3fef09d24abd886b,
+0x3c7b3bf786a54a87, 0x3fef08670653dfe4,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c74bb6c41732885, 0x3fef05975721b004,
+0x3c85425c11faadf4, 0x3fef0432edeeb2fd,
+0xbc99d7399abb9a8b, 0x3fef02d0cf63eeac,
+0x3c864201e2ac744c, 0x3fef0170fc4cd831,
+0xbc5451d60c6ac9eb, 0x3fef001375752b40,
+0xbc979517a03e2847, 0x3feefeb83ba8ea32,
+0x3c8787a210ceafd9, 0x3feefd5f4fb45e20,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc888d1e4629943d, 0x3feefab46484ebb4,
+0xbc800e2a46da4bee, 0x3feef96266e3fa2d,
+0xbc93369c544088b6, 0x3feef812ba4ea77d,
+0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
+0x3c85373ce4eb6dfb, 0x3feef57a577dd72b,
+0xbc87430803972b34, 0x3feef431a2de883b,
+0x3c83adec8265a67f, 0x3feef2eb428335b4,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc835388bcac6bc5, 0x3feef06581d3f669,
+0xbc954de30ae02d94, 0x3feeef26231e754a,
+0x3c727cdb4e4b6640, 0x3feeede91be9c811,
+0xbc9907f81b512d8e, 0x3feeecae6d05d866,
+0x3c86c2696a26af35, 0x3feeeb761742d808,
+0xbc94f2487e1c03ec, 0x3feeea401b7140ef,
+0x3c888f6ff06b979a, 0x3feee90c7a61d55b,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0xbc89d5efaabc2030, 0x3feee6ac4bcdf3ea,
+0x3c914a5432fcb2f4, 0x3feee57fbfec6cf4,
+0xbc76b8867f91c9d6, 0x3feee4559212ef89,
+0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
+0x3c94c9c0b5157fe6, 0x3feee20853c10f28,
+0x3c79c3bba5562a2f, 0x3feee0e544ede173,
+0xbc62455345b51c8e, 0x3feedfc4976d27fa,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc93331de45477d0, 0x3feedd8a63b0a09b,
+0xbc85a71612e21658, 0x3feedc70df1c5175,
+0xbc95f84d39b39b16, 0x3feedb59bf29743f,
+0xbc9312607a28698a, 0x3feeda4504ac801c,
+0xbc72ba4dc7c4d562, 0x3feed932b07a35df,
+0x3c86421f6f1d24d6, 0x3feed822c367a024,
+0xbc844f25dc02691f, 0x3feed7153e4a136a,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0xbc888d328eb9b501, 0x3feed5016f44d8f5,
+0xbc9348a6815fce65, 0x3feed3fb2709468a,
+0x3c7f0bec42ddb15a, 0x3feed2f74a1af3f1,
+0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
+0xbc615f0a2b9cd452, 0x3feed0f6d5817663,
+0x3c835c43984d9871, 0x3feecffa3f84b9d4,
+0xbc8c2e465a919e1d, 0x3feecf0018321a1a,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0xbc865dfd02bd08f1, 0x3feecd1318eb43ec,
+0xbc632afc8d9473a0, 0x3feecc2042a7d232,
+0xbc8e68cec89b1762, 0x3feecb2fde7006f4,
+0x3c9666093b0664ef, 0x3feeca41ed1d0057,
+0xbc48ae858eb682ca, 0x3feec9566f8827d0,
+0xbc95fc5e44de020e, 0x3feec86d668b3237,
+0x3c5dd71277c0915f, 0x3feec786d3001fe5,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0x3c92001325ecd7fb, 0x3feec5c10fa920a1,
+0xbc7ea0148327c42f, 0x3feec4e1e192aed2,
+0x3c65ace6e2870332, 0x3feec4052c5916c4,
+0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
+0xbc9595c55690ffaf, 0x3feec2532feaada6,
+0xbc7a843ad1a88022, 0x3feec17dea6db7d7,
+0xbc8b401ba9fb5199, 0x3feec0ab213d5283,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0x3c6df82bf324cc57, 0x3feebf0d073537ca,
+0x3c892ca3bf144e63, 0x3feebe41b817c114,
+0x3c97cae38641c7bb, 0x3feebd78e8bb586b,
+0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
+0x3c62d80c5c4a2b67, 0x3feebbeeccbd7b2a,
+0xbc902c99b04aa8b0, 0x3feebb2d81d8abff,
+0x3c8f39c10d12eaf0, 0x3feeba6eba2e35f0,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0xbc80b582d74a55d9, 0x3feeb8f8b804f127,
+0x3c73e34f67e67118, 0x3feeb8417f4531ee,
+0xbc6b4e327ff434ca, 0x3feeb78ccd3deb0d,
+0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
+0xbc592dca38593e20, 0x3feeb62b00da3b14,
+0xbc75a3b1197ba0f0, 0x3feeb57de83f4eef,
+0xbc85daca9994833e, 0x3feeb4d359dfd53d,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0xbc980b4321bc6dae, 0x3feeb385df598d78,
+0x3c81bd2888075068, 0x3feeb2e2f4f6ad27,
+0xbc8390afec5241c5, 0x3feeb24298571b06,
+0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
+0x3c8f15cdafe7d586, 0x3feeb1098bed1bdf,
+0xbc896be8ae89ef8f, 0x3feeb070dde910d2,
+0xbc910aa91ae9b67f, 0x3feeafdac1351819,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0x3c957e1b67462375, 0x3feeaeb63f4d854c,
+0xbc88e6ac90348602, 0x3feeae27dbe2c4cf,
+0x3c8124d5051552a7, 0x3feead9c0d59ca07,
+0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
+0xbc3ca103952ecf1f, 0x3feeac8c32824135,
+0xbc91af7f1365c3ac, 0x3feeac0827ff07cc,
+0x3c773345c02a4fd6, 0x3feeab86b5f43d92,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0xbc909d2a0fce20f2, 0x3feeaa8b9ee20d1e,
+0xbc943a3540d1898a, 0x3feeaa11fba87a03,
+0xbc924f2cb4f81746, 0x3feea99af482fc8f,
+0x3c34c7855019c6ea, 0x3feea9268a5946b7,
+0xbc943592a0a9846b, 0x3feea8b4be135acc,
+0xbc951f58ddaa8090, 0x3feea84590998b93,
+0xbc956bc85d444f4f, 0x3feea7d902d47c65,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0x3c914d1e4218319f, 0x3feea707ca0cbf0f,
+0xbc82e1648e50a17c, 0x3feea6a320dceb71,
+0x3c971c93709313f4, 0x3feea6411b078d26,
+0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
+0x3c7f88303b60d222, 0x3feea584fd15612a,
+0x3c95f30eda98a575, 0x3feea52ae6cdf6f4,
+0x3c70125ca18d4b5b, 0x3feea4d3778bc944,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0x3c9592ea73798b11, 0x3feea42c91c56acd,
+0x3c917ecda8a72159, 0x3feea3dd1d1929fd,
+0xbc9371d6d7d75739, 0x3feea390532205d8,
+0xbc845378892be9ae, 0x3feea34634ccc320,
+0xbc8ac05fd996f807, 0x3feea2fec30678b7,
+0xbc9345f3cee1ae6e, 0x3feea2b9febc8fb7,
+0xbc91f5067d03653a, 0x3feea277e8dcc390,
+0xbc93cedd78565858, 0x3feea23882552225,
+0x3c917339c86ce3ad, 0x3feea1fbcc140be7,
+0xbc85c33fdf910406, 0x3feea1c1c70833f6,
+0xbc77e66065ba2500, 0x3feea18a7420a036,
+0x3c5710aa807e1964, 0x3feea155d44ca973,
+0x3c964c827ee6b49a, 0x3feea123e87bfb7a,
+0x3c81079ab5789604, 0x3feea0f4b19e9538,
+0xbc928311a3c73480, 0x3feea0c830a4c8d4,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0x3c882c79e185e981, 0x3feea077541ee718,
+0x3c727df161cd7778, 0x3feea052fa75173e,
+0xbc8b48cea80b043b, 0x3feea0315a736c75,
+0xbc6a12ad8734b982, 0x3feea012750bdabf,
+0xbc4f4863bc8e5180, 0x3fee9ff64b30aa09,
+0x3c93f9924a05b767, 0x3fee9fdcddd47645,
+0x3c954835dd4b7548, 0x3fee9fc62dea2f8a,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc8bf41f59b59f8a, 0x3fee9fa10a38cee8,
+0xbc87557939a8b5ef, 0x3fee9f9298593ae5,
+0xbc8f652fde52775c, 0x3fee9f86e7ba9fef,
+0xbc80dc3d54e08851, 0x3fee9f7df9519484,
+0xbc7b0300defbcf98, 0x3fee9f77ce1303f6,
+0x3c51ed2f56fa9d1a, 0x3fee9f7466f42e87,
+0xbc89dab646035dc0, 0x3fee9f73c4eaa988,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc91f0c230588dde, 0x3fee9f7ad3ef9011,
+0xbc88e67a9006c909, 0x3fee9f8286ead08a,
+0x3c9106450507a28c, 0x3fee9f8d02d50b8f,
+0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
+0xbc9129729a10f3a0, 0x3fee9faa5953c849,
+0x3c86597566977ac8, 0x3fee9fbd35d7cbfd,
+0x3c781a70a5124f67, 0x3fee9fd2df29ce7c,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0x3c941626ea62646d, 0x3feea0069c1a861d,
+0x3c92c0b7028a5c3a, 0x3feea024b1ab6e09,
+0xbc940b9f54365b7c, 0x3feea04597eeba8f,
+0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
+0x3c873455e0e826c1, 0x3feea08fda749e5d,
+0x3c8a30faf49cc78c, 0x3feea0b938ac1cf6,
+0x3c94f006ad874e3e, 0x3feea0e56b7fcf03,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0xbc8f6d693d0973bb, 0x3feea14652e958aa,
+0xbc92dad3519d7b5b, 0x3feea17b0976cfdb,
+0x3c58c5ee2b7e7848, 0x3feea1b2988fb9ec,
+0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
+0xbc88b25e045d207b, 0x3feea22a4456e7a3,
+0x3c87d51410fd15c2, 0x3feea26a62ff86f0,
+0xbc69cb3314060ca7, 0x3feea2ad5e2850ac,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0x3c87a0b15d19e0bb, 0x3feea33bedf2e1b9,
+0xbc760a3629969871, 0x3feea3878491c491,
+0x3c94aa7212bfa73c, 0x3feea3d5fbab091f,
+0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
+0xbc81e688272a8a12, 0x3feea47b8f4abaa9,
+0x3c8b18c6e3fdef5d, 0x3feea4d2add106d9,
+0x3c4ab7b7112ec9d5, 0x3feea52cb0d1736a,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0x3c8a1e274eed4476, 0x3feea5e968443d9a,
+0x3c90ec1ddcb1390a, 0x3feea64c1eb941f7,
+0x3c94a533a59324da, 0x3feea6b1bdadb46d,
+0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
+0x3c7a56d2760d087d, 0x3feea785b91e07f1,
+0xbc522cea4f3afa1e, 0x3feea7f4179f5b21,
+0x3c91682c1c6e8b05, 0x3feea86562ab00ec,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c89ea99cf7a9591, 0x3feea950c27004c2,
+0x3c7c88549b958471, 0x3feea9cad931a436,
+0xbc59e57d8f92ff8e, 0x3feeaa47e08e1957,
+0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
+0x3c909b176e05a9cd, 0x3feeab4ac52be8f7,
+0x3c931143962f7877, 0x3feeabd0a478580f,
+0x3c711607f1952c95, 0x3feeac597875c644,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0x3c869608f0f86431, 0x3feead74029db01e,
+0x3c93e9e96f112479, 0x3feeae05bad61778,
+0xbc7f1ced15c5c5c0, 0x3feeae9a6bdb5598,
+0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
+0x3c614b97be3f7b4e, 0x3feeafccbc6c19e6,
+0xbc8dac42a4a38df0, 0x3feeb06a5e0866d9,
+0x3c81c1701c359530, 0x3feeb10afc931857,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0xbc8edb1bf6809287, 0x3feeb2553499284b,
+0x3c8b99dd98b1ed84, 0x3feeb2fed0282c8a,
+0xbc8ba58ce7a736d3, 0x3feeb3ab6ccce12c,
+0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
+0xbc93fc025e1db9ce, 0x3feeb50dad829e70,
+0xbc7885ad50cbb750, 0x3feeb5c353aa2fe2,
+0xbc8d737c7d71382e, 0x3feeb67bff148396,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0x3c6ae88c43905293, 0x3feeb7f669e2802b,
+0xbc82d5e85f3e0301, 0x3feeb8b82b5f98e5,
+0xbc93d1f7661fe51b, 0x3feeb97cf65253d1,
+0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
+0x3c651b68797ffc1c, 0x3feebb0faccf9243,
+0xbc51669428996971, 0x3feebbdd9a7670b3,
+0x3c54579c5ceed70b, 0x3feebcae95cba768,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0x3c87298413381667, 0x3feebe59b9bddb5b,
+0x3c71f2b2c1c4c014, 0x3feebf33e47a22a2,
+0xbc905000be64e965, 0x3feec01121235681,
+0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
+0xbc89fb12e3454b73, 0x3feec1d4d47f2598,
+0xbc9294f304f166b6, 0x3feec2bb4d53fe0d,
+0x3c7be2a03697693b, 0x3feec3a4dc5a3dd3,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0x3c90622b15810eea, 0x3feec581414380f2,
+0xbc8a1e58414c07d3, 0x3feec674194bb8d5,
+0x3be9a5ecc875d327, 0x3feec76a0bcfc15e,
+0x3c6dd235e10a73bb, 0x3feec86319e32323,
+0x3c88ea486a3350ef, 0x3feec95f4499c647,
+0xbc79740b58a20091, 0x3feeca5e8d07f29e,
+0xbc7a2ee551d4c40f, 0x3feecb60f4424fcb,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0x3c89c31f7e38028b, 0x3feecd6f23701b15,
+0x3c9165830a2b96c2, 0x3feece7aed8eb8bb,
+0xbc5fac13f4e005a3, 0x3feecf89dacfe68c,
+0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
+0x3c7d8aced7162e89, 0x3feed1b1231475f7,
+0xbc903d5cbe27874b, 0x3feed2c980460ad8,
+0xbc848f50cea7269f, 0x3feed3e504f696b1,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c821eb9a08a0542, 0x3feed625893523d4,
+0x3c5986178980fce0, 0x3feed74a8af46052,
+0xbc6133a953131cfd, 0x3feed872b8950a73,
+0x3c90cc319cee31d2, 0x3feed99e1330b358,
+0x3c89e95e6f4a0ae4, 0x3feedacc9be14dca,
+0xbc89472975b1f2a5, 0x3feedbfe53c12e59,
+0xbc90260cf07cb311, 0x3feedd333beb0b7e,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0x3c1bca400a7b939d, 0x3feedfa6a1897fd2,
+0x3c7d8157a34b7e7f, 0x3feee0e521356eba,
+0x3c9140bc34dfc19f, 0x3feee226d59a09ee,
+0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
+0xbc8c9b1da461ab87, 0x3feee4b3e100301e,
+0x3c8c8a4e231ebb7d, 0x3feee5ff3a3c2774,
+0x3c8c115f23ebea8e, 0x3feee74dcca5a413,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc6dcab99f23f84e, 0x3feee9f4a17a4735,
+0xbc888c8d11a142e5, 0x3feeeb4ce622f2ff,
+0x3c60a43e8b7e4bfe, 0x3feeeca868742ee4,
+0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
+0x3c915b1397075f04, 0x3feeef692a8fa8cd,
+0x3c889c2ea41433c7, 0x3feef0ce6c9a8952,
+0xbc839f7a1f04d2b0, 0x3feef236f0cf3f3a,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc86a510f31e13e6, 0x3feef511c43bbd62,
+0xbc7274aedac8ff80, 0x3feef68415b749b1,
+0xbc92887ea88e7340, 0x3feef7f9ade433c6,
+0xbc90a40e3da6f640, 0x3feef9728de5593a,
+0xbc6e57ac604759ba, 0x3feefaeeb6ddfc87,
+0x3c85c620ce76df06, 0x3feefc6e29f1c52a,
+0x3c8e6c6db4f83226, 0x3feefdf0e844bfc6,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0xbc8d1bf10460dba0, 0x3fef01004b3a7804,
+0xbc8fda52e1b51e41, 0x3fef028cf22749e4,
+0x3c8e5d80813dddfc, 0x3fef041ce8e77680,
+0xbc91eee26b588a35, 0x3fef05b030a1064a,
+0x3c8caff9640f2dcb, 0x3fef0746ca7a67a7,
+0xbc32141a7b3e2cd8, 0x3fef08e0b79a6f1f,
+0x3c7a77557fd62db3, 0x3fef0a7df9285775,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0xbc651ba6128db749, 0x3fef0dc27e2cb5e5,
+0xbc302899507554e5, 0x3fef0f69c3f3a207,
+0xbc7c0ffefdc5e251, 0x3fef111462c95b60,
+0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
+0xbc8b6cd058bfd6fa, 0x3fef1473b0468d30,
+0xbc80dda2d4c0010c, 0x3fef16286141b33d,
+0x3c923759b8aca76d, 0x3fef17e06ff301f4,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0xbc895498a73dac7d, 0x3fef1b5aab23e61e,
+0xbc8a007daadf8d68, 0x3fef1d1cd9fa652c,
+0x3c851de924583108, 0x3fef1ee26b34e065,
+0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
+0xbc8c5fe4051ba06c, 0x3fef2277b9881650,
+0x3c836909391181d3, 0x3fef244778fafb22,
+0xbc6d1816c0a9ac07, 0x3fef261a9f8630ad,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0xbc7af5c67c4e8235, 0x3fef29cb269e601f,
+0xbc811cd7dbdf9547, 0x3fef2ba88988c933,
+0xbc8304ef0045d575, 0x3fef2d89584661a1,
+0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
+0x3c8725f94f910375, 0x3fef31553dfa8313,
+0xbc7ac28b7bef6621, 0x3fef33405751c4db,
+0x3c7b53e99f9191e8, 0x3fef352ee13da7cb,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc810a79e6d7e2b8, 0x3fef39164b994d23,
+0xbc7030587207b9e1, 0x3fef3b0f2e6d1675,
+0x3c840635f6d2a9c0, 0x3fef3d0b869d8f0f,
+0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
+0x3c549eeef9ec910c, 0x3fef410e9be12cb9,
+0xbc8cc734592af7fc, 0x3fef43155b5bab74,
+0xbc8335827ffb9dce, 0x3fef451f95018d17,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0x3c645563980ef762, 0x3fef493e7ba2c38c,
+0x3c87752a44f587e8, 0x3fef4b532b08c968,
+0xbc8cd0205eb2aab2, 0x3fef4d6b596f948c,
+0xbc900dae3875a949, 0x3fef4f87080d89f2,
+0xbc8aab80ceab2b4a, 0x3fef51a638197a3c,
+0x3c85b66fefeef52e, 0x3fef53c8eacaa1d6,
+0xbc8f870f40a8ba1b, 0x3fef55ef2158a91f,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0x3c83c119f18464c5, 0x3fef5a461eec14be,
+0x3c5159d9d908a96e, 0x3fef5c76e862e6d3,
+0xbc5a628c2be4e7c7, 0x3fef5eab3a99745b,
+0xbc82919e2040220f, 0x3fef60e316c98398,
+0xbc72550d76be719a, 0x3fef631e7e2d479d,
+0x3c8c254d16117a68, 0x3fef655d71ff6075,
+0xbc82090274667d12, 0x3fef679ff37adb4a,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0x3c75f7d28150cac4, 0x3fef6c2fa45c4dfd,
+0xbc8d8c329fbd0e03, 0x3fef6e7cd63a8315,
+0x3c890de9296f4cd1, 0x3fef70cd9ab294e4,
+0x3c843a59ac016b4b, 0x3fef7321f301b460,
+0x3c832ff9978b34bc, 0x3fef7579e065807d,
+0xbc8ea6e6fbd5f2a6, 0x3fef77d5641c0658,
+0xbc7303b63dda1980, 0x3fef7a347f63c159,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0xbc81f2ba385f2f95, 0x3fef7efd81a2ece1,
+0xbc63e8e3eab2cbb4, 0x3fef81676b197d17,
+0x3c768d9144ae12fc, 0x3fef83d4f11f8220,
+0xbc892ab93b470dc9, 0x3fef864614f5a129,
+0x3c853687f542403b, 0x3fef88bad7dcee90,
+0xbc8b7966cd0d2cd9, 0x3fef8b333b16ee12,
+0xbc736ed2de40b407, 0x3fef8daf3fe592e8,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0xbc614ef56c770f3b, 0x3fef92b2334ac7ee,
+0xbc776caa4c2ff1cf, 0x3fef953924676d76,
+0x3c8df7d1353d8e88, 0x3fef97c3bc24e350,
+0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
+0xbc850bed64091b8a, 0x3fef9ce3e4933c7e,
+0xbc81d5fc525d9940, 0x3fef9f7977cdb740,
+0x3c89d852381c317f, 0x3fefa212b6bc3181,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0x3c68a00e3cca04c4, 0x3fefa7503ccd2be5,
+0x3c855cd8aaea3d21, 0x3fefa9f4867cca6e,
+0xbc5a1f25ce94cae7, 0x3fefac9c80faa594,
+0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
+0xbc6fb5f3ee307976, 0x3fefb1f78d802dc2,
+0x3c8269947c2bed4a, 0x3fefb4aaa2188510,
+0x3c737e8ae802b851, 0x3fefb7616ca06dd6,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0x3c875119560e34af, 0x3fefbcda28a52e59,
+0xbc83b6137e9afe9e, 0x3fefbf9c1cb6412a,
+0xbc7431c3840929c6, 0x3fefc261cbdf5be7,
+0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
+0xbc8cb472d2e86b99, 0x3fefc7f860a70c22,
+0xbc69fa74878ba7c7, 0x3fefcac948dd7274,
+0x3c83f5df2fde16a8, 0x3fefcd9df15b82ac,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0x3c8eef18336b62e3, 0x3fefd35288633625,
+0x3c901f3a75ee0efe, 0x3fefd632798844f8,
+0x3c80d23f87b50a2a, 0x3fefd916302bd526,
+0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
+0x3c8302dee657c8e6, 0x3fefdee8f32a4b45,
+0xbc516a9ce6ed84fa, 0x3fefe1d802243c89,
+0xbc7b0caa080df170, 0x3fefe4cadbdac61d,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+0x3c7617a9f2fd24e5, 0x3fefeabbf4c0ba54,
+0xbc699c7db2effc76, 0x3fefedba3692d514,
+0x3c75f103b8fd5ca7, 0x3feff0bc4866e8ad,
+0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
+0x3c8e70b094fa075a, 0x3feff6cbe15f6314,
+0x3c64b458677f9840, 0x3feff9d96b2a23d9,
+0xbc72ec9a3e5d680a, 0x3feffceaca4391b6,
+#endif
 },
 };
diff --git a/libc/tinymath/exp_data.internal.h b/libc/tinymath/exp_data.internal.h
deleted file mode 100644
index a0713d691..000000000
--- a/libc/tinymath/exp_data.internal.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_
-
-#define EXP_TABLE_BITS       7
-#define EXP_POLY_ORDER       5
-#define EXP_USE_TOINT_NARROW 0
-#define EXP2_POLY_ORDER      5
-
-COSMOPOLITAN_C_START_
-
-extern const struct exp_data {
-  double invln2N;
-  double shift;
-  double negln2hiN;
-  double negln2loN;
-  double poly[4]; /* Last four coefficients.  */
-  double exp2_shift;
-  double exp2_poly[EXP2_POLY_ORDER];
-  uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
-} __exp_data;
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_EXP_DATA_H_ */
diff --git a/libc/tinymath/expf.c b/libc/tinymath/expf.c
index 4879566ab..625133bac 100644
--- a/libc/tinymath/expf.c
+++ b/libc/tinymath/expf.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,23 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/exp2f_data.internal.h"
-#include "libc/tinymath/internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/*
- * Single-precision e^x function.
- *
- * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 /*
 EXP2F_TABLE_BITS = 5
@@ -58,59 +43,79 @@ Non-nearest ULP error: 1 (rounded ULP error)
 #define T __exp2f_data.tab
 #define C __exp2f_data.poly_scaled
 
-static inline uint32_t top12(float x)
+static inline uint32_t
+top12 (float x)
 {
-	return asuint(x) >> 20;
+  return asuint (x) >> 20;
 }
 
 /**
  * Returns 𝑒^x.
+ *
+ * - ULP error: 0.502 (nearest rounding.)
+ * - Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
+ * - Wrong count: 170635 (all nearest rounding wrong results with fma.)
+ * - Non-nearest ULP error: 1 (rounded ULP error)
+ *
+ * @raise ERANGE on overflow or underflow
  */
-float expf(float x)
+float
+expf (float x)
 {
-	uint32_t abstop;
-	uint64_t ki, t;
-	double_t kd, xd, z, r, r2, y, s;
+  uint32_t abstop;
+  uint64_t ki, t;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t kd, xd, z, r, r2, y, s;
 
-	xd = (double_t)x;
-	abstop = top12(x) & 0x7ff;
-	if (UNLIKELY(abstop >= top12(88.0f))) {
-		/* |x| >= 88 or x is nan.  */
-		if (asuint(x) == asuint(-INFINITY))
-			return 0.0f;
-		if (abstop >= top12(INFINITY))
-			return x + x;
-		if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
-			return __math_oflowf(0);
-		if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
-			return __math_uflowf(0);
-	}
+  xd = (double_t) x;
+  abstop = top12 (x) & 0x7ff;
+  if (unlikely (abstop >= top12 (88.0f)))
+    {
+      /* |x| >= 88 or x is nan.  */
+      if (asuint (x) == asuint (-INFINITY))
+	return 0.0f;
+      if (abstop >= top12 (INFINITY))
+	return x + x;
+      if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
+	return __math_oflowf (0);
+      if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
+	return __math_uflowf (0);
+#if WANT_ERRNO_UFLOW
+      if (x < -0x1.9d1d9ep6f) /* x < log(0x1p-149) ~= -103.28 */
+	return __math_may_uflowf (0);
+#endif
+    }
 
-	/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
-	z = InvLn2N * xd;
+  /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
+  z = InvLn2N * xd;
 
-	/* Round and convert z to int, the result is in [-150*N, 128*N] and
-	   ideally ties-to-even rule is used, otherwise the magnitude of r
-	   can be bigger which gives larger approximation error.  */
+  /* Round and convert z to int, the result is in [-150*N, 128*N] and
+     ideally nearest int is used, otherwise the magnitude of r can be
+     bigger which gives larger approximation error.  */
 #if TOINT_INTRINSICS
-	kd = roundtoint(z);
-	ki = converttoint(z);
+  kd = roundtoint (z);
+  ki = converttoint (z);
 #else
 # define SHIFT __exp2f_data.shift
-	kd = eval_as_double(z + SHIFT);
-	ki = asuint64(kd);
-	kd -= SHIFT;
+  kd = eval_as_double (z + SHIFT);
+  ki = asuint64 (kd);
+  kd -= SHIFT;
 #endif
-	r = z - kd;
+  r = z - kd;
 
-	/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
-	t = T[ki % N];
-	t += ki << (52 - EXP2F_TABLE_BITS);
-	s = asdouble(t);
-	z = C[0] * r + C[1];
-	r2 = r * r;
-	y = C[2] * r + 1;
-	y = z * r2 + y;
-	y = y * s;
-	return eval_as_float(y);
+  /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+  t = T[ki % N];
+  t += ki << (52 - EXP2F_TABLE_BITS);
+  s = asdouble (t);
+  z = C[0] * r + C[1];
+  r2 = r * r;
+  y = C[2] * r + 1;
+  y = z * r2 + y;
+  y = y * s;
+  return eval_as_float (y);
 }
+
+#if USE_GLIBC_ABI
+strong_alias (expf, __expf_finite)
+hidden_alias (expf, __ieee754_expf)
+#endif
diff --git a/libc/tinymath/expl.c b/libc/tinymath/expl.c
index 5ff1010c3..52dffa2f4 100644
--- a/libc/tinymath/expl.c
+++ b/libc/tinymath/expl.c
@@ -17,18 +17,11 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
-
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
 #include "libc/tinymath/internal.h"
-
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("openbsd_libm_notice");
 
 /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_expl.c */
 /*
@@ -149,15 +142,7 @@ long double expl(long double x)
 
 #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
 #include "libc/tinymath/freebsd.internal.h"
-
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("freebsd_libm_notice");
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
diff --git a/libc/tinymath/expm1.c b/libc/tinymath/expm1.c
index 12600b69b..2fc2d66aa 100644
--- a/libc/tinymath/expm1.c
+++ b/libc/tinymath/expm1.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_expm1.c */
 /*
diff --git a/libc/tinymath/expm1f.c b/libc/tinymath/expm1f.c
index 6500aec27..097aa40e4 100644
--- a/libc/tinymath/expm1f.c
+++ b/libc/tinymath/expm1f.c
@@ -38,14 +38,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/freebsd.internal.h"
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("fdlibm_notice");
+__static_yoink("freebsd_libm_notice");
 
 static const float
 one		= 1.0,
diff --git a/libc/tinymath/expm1l.c b/libc/tinymath/expm1l.c
index bd585bca8..45dae3426 100644
--- a/libc/tinymath/expm1l.c
+++ b/libc/tinymath/expm1l.c
@@ -1,12 +1,7 @@
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
-// clang-format off
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("openbsd_libm_notice");
 
 /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_expm1l.c */
 /*
diff --git a/libc/tinymath/expo2.c b/libc/tinymath/expo2.c
index 3f8eda928..befac99ad 100644
--- a/libc/tinymath/expo2.c
+++ b/libc/tinymath/expo2.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/expo.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 #define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
 #define INSERT_WORDS(d,hi,lo)                     \
diff --git a/libc/tinymath/expo2f.c b/libc/tinymath/expo2f.c
index 6d149db93..fc5c66245 100644
--- a/libc/tinymath/expo2f.c
+++ b/libc/tinymath/expo2f.c
@@ -26,12 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 #define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
 #define SET_FLOAT_WORD(d,w)                       \
diff --git a/libc/tinymath/floor.c b/libc/tinymath/floor.c
index c492fc05c..a19b2e8f4 100644
--- a/libc/tinymath/floor.c
+++ b/libc/tinymath/floor.c
@@ -31,12 +31,8 @@
 #ifndef __llvm__
 #include "third_party/intel/smmintrin.internal.h"
 #endif
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
 #define EPS DBL_EPSILON
diff --git a/libc/tinymath/floorf.c b/libc/tinymath/floorf.c
index de575ee27..c83aae1a7 100644
--- a/libc/tinymath/floorf.c
+++ b/libc/tinymath/floorf.c
@@ -30,12 +30,8 @@
 #ifndef __llvm__
 #include "third_party/intel/smmintrin.internal.h"
 #endif
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Returns largest integral value not greater than 𝑥.
diff --git a/libc/tinymath/floorl.c b/libc/tinymath/floorl.c
index 499012d8f..37d17d9f1 100644
--- a/libc/tinymath/floorl.c
+++ b/libc/tinymath/floorl.c
@@ -29,12 +29,8 @@
 #include "libc/tinymath/internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Returns largest integral value not greater than 𝑥.
diff --git a/libc/tinymath/fma.c b/libc/tinymath/fma.c
index 2eaf00cb0..308e31a77 100644
--- a/libc/tinymath/fma.c
+++ b/libc/tinymath/fma.c
@@ -26,12 +26,9 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+#include "libc/nexgen32e/x86feature.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 #define ASUINT64(x) ((union {double f; uint64_t i;}){x}).i
 #define ZEROINFNAN (0x7ff-0x3ff-52-1)
@@ -92,41 +89,51 @@ static void mul(uint64_t *hi, uint64_t *lo, uint64_t x, uint64_t y)
  */
 double fma(double x, double y, double z)
 {
-#if defined(__x86_64__) && defined(__FMA__) && defined(__FAST_MATH__)
+#if defined(__x86_64__) && defined(__FMA__)
 
 	// Intel Haswell+ (c. 2013)
 	// AMD Piledriver+ (c. 2011)
 	asm("vfmadd132sd\t%1,%2,%0" : "+x"(x) : "x"(y), "x"(z));
 	return x;
 
-#elif defined(__x86_64__) && defined(__FMA4__) && defined(__FAST_MATH__)
+#elif defined(__x86_64__) && defined(__FMA4__)
 
 	// AMD Bulldozer+ (c. 2011)
 	asm("vfmaddsd\t%3,%2,%1,%0" : "=x"(x) : "x"(x), "x"(y), "x"(z));
 	return x;
 
-#elif defined(__aarch64__) && defined(__FAST_MATH__)
+#elif defined(__aarch64__)
 
 	asm("fmadd\t%d0,%d1,%d2,%d3" : "=w"(x) : "w"(x), "w"(y), "w"(z));
 	return x;
 
-#elif defined(__powerpc64__) && defined(__FAST_MATH__)
+#elif defined(__powerpc64__)
 
 	asm("fmadd\t%0,%1,%2,%3" : "=d"(x) : "d"(x), "d"(y), "d"(z));
 	return x;
 
-#elif defined(__riscv) && __riscv_flen >= 64 && defined(__FAST_MATH__)
+#elif defined(__riscv) && __riscv_flen >= 64
 
 	asm("fmadd.d\t%0,%1,%2,%3" : "=f"(x) : "f"(x), "f"(y), "f"(z));
 	return x;
 
-#elif defined(__s390x__) && defined(__FAST_MATH__)
+#elif defined(__s390x__)
 
 	asm("madbr\t%0,\t%1,\t%2" : "+f"(z) : "f"(x), "f"(y));
 	return z;
 
 #else
-// #pragma STDC FENV_ACCESS ON
+/* #pragma STDC FENV_ACCESS ON */
+
+#ifdef __x86_64__
+	if (X86_HAVE(FMA)) {
+		asm("vfmadd132sd\t%1,%2,%0" : "+x"(x) : "x"(y), "x"(z));
+		return x;
+	} else if (X86_HAVE(FMA4)) {
+		asm("vfmaddsd\t%3,%2,%1,%0" : "=x"(x) : "x"(x), "x"(y), "x"(z));
+		return x;
+	}
+#endif
 
 	/* normalize so top 10bits and last bit are 0 */
 	struct num nx, ny, nz;
@@ -268,3 +275,7 @@ double fma(double x, double y, double z)
 
 #endif /* __x86_64__ */
 }
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+__weak_reference(fma, fmal);
+#endif
diff --git a/libc/tinymath/fmaf.c b/libc/tinymath/fmaf.c
index 7d651b650..544301ca5 100644
--- a/libc/tinymath/fmaf.c
+++ b/libc/tinymath/fmaf.c
@@ -26,16 +26,10 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+#include "libc/nexgen32e/x86feature.h"
 #include "libc/runtime/fenv.h"
-
-asm(".ident\t\"\\n\\n\
-Fused Multiply Add (MIT License)\\n\
-Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */
 /*-
@@ -110,7 +104,7 @@ float fmaf(float x, float y, float z)
 	   so direct double-precision arithmetic suffices, except where
 	   double rounding occurs. */
 
-	/* #pragma STDC FENV_ACCESS ON */
+/* #pragma STDC FENV_ACCESS ON */
 	double xy, result;
 	union {double f; uint64_t i;} u;
 	int e;
diff --git a/libc/tinymath/fmal.c b/libc/tinymath/fmal.c
new file mode 100644
index 000000000..53b4db60f
--- /dev/null
+++ b/libc/tinymath/fmal.c
@@ -0,0 +1,284 @@
+/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
+│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+╚──────────────────────────────────────────────────────────────────────────────╝
+│                                                                              │
+│ Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>                      │
+│ All rights reserved.                                                         │
+│                                                                              │
+│ Redistribution and use in source and binary forms, with or without           │
+│ modification, are permitted provided that the following conditions           │
+│ are met:                                                                     │
+│ 1. Redistributions of source code must retain the above copyright            │
+│    notice, this list of conditions and the following disclaimer.             │
+│ 2. Redistributions in binary form must reproduce the above copyright         │
+│    notice, this list of conditions and the following disclaimer in the       │
+│    documentation and/or other materials provided with the distribution.      │
+│                                                                              │
+│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND       │
+│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        │
+│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   │
+│ ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE      │
+│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL   │
+│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS      │
+│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        │
+│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT   │
+│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    │
+│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF       │
+│ SUCH DAMAGE.                                                                 │
+│                                                                              │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/math.h"
+#include "libc/runtime/fenv.h"
+#include "libc/tinymath/freebsd.internal.h"
+#include "libc/tinymath/ldshape.internal.h"
+
+__static_yoink("freebsd_libm_notice");
+
+#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
+
+#if LDBL_MANT_DIG == 64
+#define LASTBIT(u) (u.i.m & 1)
+#define SPLIT      (0x1p32L + 1)
+#elif LDBL_MANT_DIG == 113
+#define LASTBIT(u) (u.i.lo & 1)
+#define SPLIT      (0x1p57L + 1)
+#endif
+
+/*
+ * A struct dd represents a floating-point number with twice the precision
+ * of a long double.  We maintain the invariant that "hi" stores the high-order
+ * bits of the result.
+ */
+struct dd {
+  long double hi;
+  long double lo;
+};
+
+/*
+ * Compute a+b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are finite, but make no assumptions about their relative
+ * magnitudes.
+ */
+static inline struct dd dd_add(long double a, long double b) {
+  struct dd ret;
+  long double s;
+  ret.hi = a + b;
+  s = ret.hi - a;
+  ret.lo = (a - (ret.hi - s)) + (b - s);
+  return (ret);
+}
+
+/*
+ * Compute a+b, with a small tweak:  The least significant bit of the
+ * result is adjusted into a sticky bit summarizing all the bits that
+ * were lost to rounding.  This adjustment negates the effects of double
+ * rounding when the result is added to another number with a higher
+ * exponent.  For an explanation of round and sticky bits, see any reference
+ * on FPU design, e.g.,
+ *
+ *     J. Coonen.  An Implementation Guide to a Proposed Standard for
+ *     Floating-Point Arithmetic.  Computer, vol. 13, no. 1, Jan 1980.
+ */
+static inline long double add_adjusted(long double a, long double b) {
+  struct dd sum;
+  union ldshape u;
+  sum = dd_add(a, b);
+  if (sum.lo != 0) {
+    u.f = sum.hi;
+    if (!LASTBIT(u)) sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
+  }
+  return (sum.hi);
+}
+
+/*
+ * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
+ * that the result will be subnormal, and care is taken to ensure that
+ * double rounding does not occur.
+ */
+static inline long double add_and_denormalize(long double a, long double b,
+                                              int scale) {
+  struct dd sum;
+  int bits_lost;
+  union ldshape u;
+
+  sum = dd_add(a, b);
+
+  /*
+   * If we are losing at least two bits of accuracy to denormalization,
+   * then the first lost bit becomes a round bit, and we adjust the
+   * lowest bit of sum.hi to make it a sticky bit summarizing all the
+   * bits in sum.lo. With the sticky bit adjusted, the hardware will
+   * break any ties in the correct direction.
+   *
+   * If we are losing only one bit to denormalization, however, we must
+   * break the ties manually.
+   */
+  if (sum.lo != 0) {
+    u.f = sum.hi;
+    bits_lost = -u.i.se - scale + 1;
+    if ((bits_lost != 1) ^ LASTBIT(u))
+      sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
+  }
+  return scalbnl(sum.hi, scale);
+}
+
+/*
+ * Compute a*b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are normalized, so no underflow or overflow will occur.
+ * The current rounding mode must be round-to-nearest.
+ */
+static inline struct dd dd_mul(long double a, long double b) {
+  struct dd ret;
+  long double ha, hb, la, lb, p, q;
+
+  p = a * SPLIT;
+  ha = a - p;
+  ha += p;
+  la = a - ha;
+
+  p = b * SPLIT;
+  hb = b - p;
+  hb += p;
+  lb = b - hb;
+
+  p = ha * hb;
+  q = ha * lb + la * hb;
+
+  ret.hi = p + q;
+  ret.lo = p - ret.hi + q + la * lb;
+  return (ret);
+}
+
+/*
+ * Fused multiply-add: Compute x * y + z with a single rounding error.
+ *
+ * We use scaling to avoid overflow/underflow, along with the
+ * canonical precision-doubling technique adapted from:
+ *
+ *      Dekker, T.  A Floating-Point Technique for Extending the
+ *      Available Precision.  Numer. Math. 18, 224-242 (1971).
+ */
+long double fmal(long double x, long double y, long double z) {
+/* #pragma STDC FENV_ACCESS ON */
+  long double xs, ys, zs, adj;
+  struct dd xy, r;
+  int oround;
+  int ex, ey, ez;
+  int spread;
+
+  /*
+   * Handle special cases. The order of operations and the particular
+   * return values here are crucial in handling special cases involving
+   * infinities, NaNs, overflows, and signed zeroes correctly.
+   */
+  if (!isfinite(x) || !isfinite(y)) return x * y + z;
+  if (!isfinite(z)) return z;
+  if (x == 0.0 || y == 0.0) return x * y + z;
+  if (z == 0.0) return x * y;
+
+  xs = frexpl(x, &ex);
+  ys = frexpl(y, &ey);
+  zs = frexpl(z, &ez);
+  oround = fegetround();
+  spread = ex + ey - ez;
+
+  /*
+   * If x * y and z are many orders of magnitude apart, the scaling
+   * will overflow, so we handle these cases specially.  Rounding
+   * modes other than FE_TONEAREST are painful.
+   */
+  if (spread < -LDBL_MANT_DIG) {
+#ifdef FE_INEXACT
+    feraiseexcept(FE_INEXACT);
+#endif
+#ifdef FE_UNDERFLOW
+    if (!isnormal(z)) feraiseexcept(FE_UNDERFLOW);
+#endif
+    switch (oround) {
+      default: /* FE_TONEAREST */
+        return z;
+#ifdef FE_TOWARDZERO
+      case FE_TOWARDZERO:
+        if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0))
+          return z;
+        else
+          return nextafterl(z, 0);
+#endif
+#ifdef FE_DOWNWARD
+      case FE_DOWNWARD:
+        if ((x > 0.0) ^ (y < 0.0))
+          return (z);
+        else
+          return nextafterl(z, -INFINITY);
+#endif
+#ifdef FE_UPWARD
+      case FE_UPWARD:
+        if ((x > 0.0) ^ (y < 0.0))
+          return nextafterl(z, INFINITY);
+        else
+          return (z);
+#endif
+    }
+  }
+  if (spread <= LDBL_MANT_DIG * 2)
+    zs = scalbnl(zs, -spread);
+  else
+    zs = copysignl(LDBL_MIN, zs);
+
+  fesetround(FE_TONEAREST);
+
+  /*
+   * Basic approach for round-to-nearest:
+   *
+   *     (xy.hi, xy.lo) = x * y           (exact)
+   *     (r.hi, r.lo)   = xy.hi + z       (exact)
+   *     adj = xy.lo + r.lo               (inexact; low bit is sticky)
+   *     result = r.hi + adj              (correctly rounded)
+   */
+  xy = dd_mul(xs, ys);
+  r = dd_add(xy.hi, zs);
+
+  spread = ex + ey;
+
+  if (r.hi == 0.0) {
+    /*
+     * When the addends cancel to 0, ensure that the result has
+     * the correct sign.
+     */
+    fesetround(oround);
+    volatile long double vzs = zs; /* XXX gcc CSE bug workaround */
+    return xy.hi + vzs + scalbnl(xy.lo, spread);
+  }
+
+  if (oround != FE_TONEAREST) {
+    /*
+     * There is no need to worry about double rounding in directed
+     * rounding modes.
+     * But underflow may not be raised correctly, example in downward rounding:
+     * fmal(0x1.0000000001p-16000L, 0x1.0000000001p-400L, -0x1p-16440L)
+     */
+    long double ret;
+#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
+    int e = fetestexcept(FE_INEXACT);
+    feclearexcept(FE_INEXACT);
+#endif
+    fesetround(oround);
+    adj = r.lo + xy.lo;
+    ret = scalbnl(r.hi + adj, spread);
+#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
+    if (ilogbl(ret) < -16382 && fetestexcept(FE_INEXACT))
+      feraiseexcept(FE_UNDERFLOW);
+    else if (e)
+      feraiseexcept(FE_INEXACT);
+#endif
+    return ret;
+  }
+
+  adj = add_adjusted(r.lo, xy.lo);
+  if (spread + ilogbl(r.hi) > -16383)
+    return scalbnl(r.hi + adj, spread);
+  else
+    return add_and_denormalize(r.hi, adj, spread);
+}
+
+#endif
diff --git a/libc/tinymath/fmod.c b/libc/tinymath/fmod.c
index e08205299..b9956e9ef 100644
--- a/libc/tinymath/fmod.c
+++ b/libc/tinymath/fmod.c
@@ -26,12 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Does (𝑥 rem 𝑦) w/ round()-style rounding.
diff --git a/libc/tinymath/fmodf.c b/libc/tinymath/fmodf.c
index 6e01ef28c..5e74153ef 100644
--- a/libc/tinymath/fmodf.c
+++ b/libc/tinymath/fmodf.c
@@ -26,12 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 float fmodf(float x, float y)
 {
diff --git a/libc/tinymath/fmodl.c b/libc/tinymath/fmodl.c
index 2b7ecf532..ffe9fdb66 100644
--- a/libc/tinymath/fmodl.c
+++ b/libc/tinymath/fmodl.c
@@ -28,12 +28,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Does (𝑥 rem 𝑦) w/ round()-style rounding.
diff --git a/libc/tinymath/freebsd.internal.h b/libc/tinymath/freebsd.internal.h
index 0aca0a1ac..dff035568 100644
--- a/libc/tinymath/freebsd.internal.h
+++ b/libc/tinymath/freebsd.internal.h
@@ -5,7 +5,6 @@
 #include "libc/math.h"
 #include "libc/runtime/fenv.h"
 COSMOPOLITAN_C_START_
-// clang-format off
 
 #define	__CONCAT1(x,y)	x ## y
 #define	__CONCAT(x,y)	__CONCAT1(x,y)
diff --git a/libc/tinymath/frexp.c b/libc/tinymath/frexp.c
index 4ba2e1b7a..78a8e80b3 100644
--- a/libc/tinymath/frexp.c
+++ b/libc/tinymath/frexp.c
@@ -26,12 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Splits number normalized fraction and exponent.
diff --git a/libc/tinymath/frexpf.c b/libc/tinymath/frexpf.c
index 4666ec4e5..759a87006 100644
--- a/libc/tinymath/frexpf.c
+++ b/libc/tinymath/frexpf.c
@@ -26,12 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
 
 /**
  * Splits number normalized fraction and exponent.
diff --git a/libc/tinymath/frexpl.c b/libc/tinymath/frexpl.c
index 3d4fd6b41..619442c45 100644
--- a/libc/tinymath/frexpl.c
+++ b/libc/tinymath/frexpl.c
@@ -29,13 +29,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/freebsd.internal.h"
-#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("freebsd_libm_notice");
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
 
 /**
  * Splits number normalized fraction and exponent.
diff --git a/libc/tinymath/horner_wrap.internal.h b/libc/tinymath/horner_wrap.internal.h
index 984c728c7..0c213a4a0 100644
--- a/libc/tinymath/horner_wrap.internal.h
+++ b/libc/tinymath/horner_wrap.internal.h
@@ -8,7 +8,6 @@
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
-// clang-format off
 #define  HORNER_1_(x, c, i) FMA(c(i + 1), x, c(i))
 #define  HORNER_2_(x, c, i) FMA(HORNER_1_ (x, c, i + 1), x, c(i))
 #define  HORNER_3_(x, c, i) FMA(HORNER_2_ (x, c, i + 1), x, c(i))
diff --git a/libc/tinymath/hypot.c b/libc/tinymath/hypot.c
index cd8cac22b..fbc6b7737 100644
--- a/libc/tinymath/hypot.c
+++ b/libc/tinymath/hypot.c
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 #if FLT_EVAL_METHOD > 1U && LDBL_MANT_DIG == 64
 #define SPLIT (0x1p32 + 1)
diff --git a/libc/tinymath/hypotf.c b/libc/tinymath/hypotf.c
index 5dd9c8eef..22ab375a3 100644
--- a/libc/tinymath/hypotf.c
+++ b/libc/tinymath/hypotf.c
@@ -26,13 +26,13 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
+/**
+ * Returns euclidean distance.
+ *
+ * Max observed error is 1 ulp.
+ */
 float hypotf(float x, float y)
 {
 	union {float f; uint32_t i;} ux = {x}, uy = {y}, ut;
diff --git a/libc/tinymath/hypotf2.c b/libc/tinymath/hypotf2.c
new file mode 100644
index 000000000..734884e43
--- /dev/null
+++ b/libc/tinymath/hypotf2.c
@@ -0,0 +1,175 @@
+/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
+│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+╚──────────────────────────────────────────────────────────────────────────────╝
+│                                                                              │
+│ FreeBSD lib/msun/src/e_hypotf.c                                              │
+│ Copyright (c) 1992-2023 The FreeBSD Project.                                 │
+│                                                                              │
+│ Redistribution and use in source and binary forms, with or without           │
+│ modification, are permitted provided that the following conditions           │
+│ are met:                                                                     │
+│ 1. Redistributions of source code must retain the above copyright            │
+│    notice, this list of conditions and the following disclaimer.             │
+│ 2. Redistributions in binary form must reproduce the above copyright         │
+│    notice, this list of conditions and the following disclaimer in the       │
+│    documentation and/or other materials provided with the distribution.      │
+│                                                                              │
+│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND       │
+│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        │
+│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   │
+│ ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE      │
+│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL   │
+│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS      │
+│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        │
+│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT   │
+│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    │
+│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF       │
+│ SUCH DAMAGE.                                                                 │
+│                                                                              │
+│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.            │
+│                                                                              │
+│ Developed at SunPro, a Sun Microsystems, Inc. business.                      │
+│ Permission to use, copy, modify, and distribute this                         │
+│ software is freely granted, provided that this notice                        │
+│ is preserved.                                                                │
+│                                                                              │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/tinymath/freebsd.internal.h"
+__static_yoink("freebsd_libm_notice");
+__static_yoink("fdlibm_notice");
+
+static	const float	one	= 1.0, tiny=1.0e-30;
+
+float
+sqrtf2(float x)
+{
+	float z;
+	int32_t sign = (int)0x80000000;
+	int32_t ix,s,q,m,t,i;
+	uint32_t r;
+
+	GET_FLOAT_WORD(ix,x);
+
+    /* take care of Inf and NaN */
+	if((ix&0x7f800000)==0x7f800000) {
+	    return x*x+x;		/* sqrt(NaN)=NaN, sqrt(+inf)=+inf
+					   sqrt(-inf)=sNaN */
+	}
+    /* take care of zero */
+	if(ix<=0) {
+	    if((ix&(~sign))==0) return x;/* sqrt(+-0) = +-0 */
+	    else if(ix<0)
+		return (x-x)/(x-x);		/* sqrt(-ve) = sNaN */
+	}
+    /* normalize x */
+	m = (ix>>23);
+	if(m==0) {				/* subnormal x */
+	    for(i=0;(ix&0x00800000)==0;i++) ix<<=1;
+	    m -= i-1;
+	}
+	m -= 127;	/* unbias exponent */
+	ix = (ix&0x007fffff)|0x00800000;
+	if(m&1)	/* odd m, double x to make it even */
+	    ix += ix;
+	m >>= 1;	/* m = [m/2] */
+
+    /* generate sqrt(x) bit by bit */
+	ix += ix;
+	q = s = 0;		/* q = sqrt(x) */
+	r = 0x01000000;		/* r = moving bit from right to left */
+
+	while(r!=0) {
+	    t = s+r;
+	    if(t<=ix) {
+		s    = t+r;
+		ix  -= t;
+		q   += r;
+	    }
+	    ix += ix;
+	    r>>=1;
+	}
+
+    /* use floating add to find out rounding direction */
+	if(ix!=0) {
+	    z = one-tiny; /* trigger inexact flag */
+	    if (z>=one) {
+	        z = one+tiny;
+		if (z>one)
+		    q += 2;
+		else
+		    q += (q&1);
+	    }
+	}
+	ix = (q>>1)+0x3f000000;
+	ix += ((uint32_t)m <<23);
+	SET_FLOAT_WORD(z,ix);
+	return z;
+}
+
+/**
+ * Returns euclidean distance.
+ *
+ * Error is less than 1 ULP.
+ */
+float
+hypotf2(float x, float y)
+{
+	float a,b,t1,t2,y1,y2,w;
+	int32_t j,k,ha,hb;
+
+	GET_FLOAT_WORD(ha,x);
+	ha &= 0x7fffffff;
+	GET_FLOAT_WORD(hb,y);
+	hb &= 0x7fffffff;
+	if(hb > ha) {a=y;b=x;j=ha; ha=hb;hb=j;} else {a=x;b=y;}
+	a = fabsf(a);
+	b = fabsf(b);
+	if((ha-hb)>0xf000000) {return a+b;} /* x/y > 2**30 */
+	k=0;
+	if(ha > 0x58800000) {	/* a>2**50 */
+	   if(ha >= 0x7f800000) {	/* Inf or NaN */
+	       /* Use original arg order iff result is NaN; quieten sNaNs. */
+	       w = fabsl(x+0.0L)-fabsf(y+0);
+	       if(ha == 0x7f800000) w = a;
+	       if(hb == 0x7f800000) w = b;
+	       return w;
+	   }
+	   /* scale a and b by 2**-68 */
+	   ha -= 0x22000000; hb -= 0x22000000;	k += 68;
+	   SET_FLOAT_WORD(a,ha);
+	   SET_FLOAT_WORD(b,hb);
+	}
+	if(hb < 0x26800000) {	/* b < 2**-50 */
+	    if(hb <= 0x007fffff) {	/* subnormal b or 0 */
+	        if(hb==0) return a;
+		SET_FLOAT_WORD(t1,0x7e800000);	/* t1=2^126 */
+		b *= t1;
+		a *= t1;
+		k -= 126;
+	    } else {		/* scale a and b by 2^68 */
+	        ha += 0x22000000; 	/* a *= 2^68 */
+		hb += 0x22000000;	/* b *= 2^68 */
+		k -= 68;
+		SET_FLOAT_WORD(a,ha);
+		SET_FLOAT_WORD(b,hb);
+	    }
+	}
+    /* medium size a and b */
+	w = a-b;
+	if (w>b) {
+	    SET_FLOAT_WORD(t1,ha&0xfffff000);
+	    t2 = a-t1;
+	    w  = sqrtf2(t1*t1-(b*(-b)-t2*(a+t1)));
+	} else {
+	    a  = a+a;
+	    SET_FLOAT_WORD(y1,hb&0xfffff000);
+	    y2 = b - y1;
+	    SET_FLOAT_WORD(t1,(ha+0x00800000)&0xfffff000);
+	    t2 = a - t1;
+	    w  = sqrtf2(t1*y1-(w*(-w)-(t1*y2+t2*b)));
+	}
+	if(k!=0) {
+	    SET_FLOAT_WORD(t1,(127+k)<<23);
+	    return t1*w;
+	} else return w;
+}
diff --git a/libc/tinymath/hypotl.c b/libc/tinymath/hypotl.c
index b524fd52b..a569d3239 100644
--- a/libc/tinymath/hypotl.c
+++ b/libc/tinymath/hypotl.c
@@ -28,12 +28,7 @@
 #include "libc/math.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 #if LDBL_MANT_DIG == 64
 #define SPLIT (0x1p32L+1)
diff --git a/libc/tinymath/ilogb.c b/libc/tinymath/ilogb.c
index 429d17a27..595da8c28 100644
--- a/libc/tinymath/ilogb.c
+++ b/libc/tinymath/ilogb.c
@@ -28,19 +28,15 @@
 #include "libc/limits.h"
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Returns log₂𝑥 exponent part of double.
  */
 int ilogb(double x)
 {
-	// #pragma STDC FENV_ACCESS ON
+/* #pragma STDC FENV_ACCESS ON */
 	union {double f; uint64_t i;} u = {x};
 	uint64_t i = u.i;
 	int e = i>>52 & 0x7ff;
diff --git a/libc/tinymath/ilogbf.c b/libc/tinymath/ilogbf.c
index 0d91f0cbc..744594eb1 100644
--- a/libc/tinymath/ilogbf.c
+++ b/libc/tinymath/ilogbf.c
@@ -28,19 +28,15 @@
 #include "libc/limits.h"
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Returns log₂𝑥 exponent part of double.
  */
 int ilogbf(float x)
 {
-	// #pragma STDC FENV_ACCESS ON
+/* #pragma STDC FENV_ACCESS ON */
 	union {float f; uint32_t i;} u = {x};
 	uint32_t i = u.i;
 	int e = i>>23 & 0xff;
diff --git a/libc/tinymath/internal.h b/libc/tinymath/internal.h
index cc121fed0..5d4b097b1 100644
--- a/libc/tinymath/internal.h
+++ b/libc/tinymath/internal.h
@@ -8,7 +8,6 @@ COSMOPOLITAN_C_START_
 #define issignalingf_inline(x) 0
 #define issignaling_inline(x)  0
 
-// clang-format off
 #define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
 #define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
 #define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
diff --git a/libc/tinymath/invtrigl.c b/libc/tinymath/invtrigl.c
index 67d984089..c03ff89da 100644
--- a/libc/tinymath/invtrigl.c
+++ b/libc/tinymath/invtrigl.c
@@ -31,15 +31,9 @@
 #include "libc/tinymath/invtrigl.internal.h"
 #include "libc/tinymath/kernel.internal.h"
 #include "libc/tinymath/ldshape.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
 
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
 static const long double
diff --git a/libc/tinymath/j0.c b/libc/tinymath/j0.c
index 6d6eda6d5..2c4746213 100644
--- a/libc/tinymath/j0.c
+++ b/libc/tinymath/j0.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("freebsd_libm_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_j0.c */
 /*
diff --git a/libc/tinymath/j0f.c b/libc/tinymath/j0f.c
index 6723ab68b..80c663da0 100644
--- a/libc/tinymath/j0f.c
+++ b/libc/tinymath/j0f.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("freebsd_libm_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_j0f.c */
 /*
diff --git a/libc/tinymath/j1.c b/libc/tinymath/j1.c
index bd3240687..25b04ce1c 100644
--- a/libc/tinymath/j1.c
+++ b/libc/tinymath/j1.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("freebsd_libm_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_j1.c */
 /*
diff --git a/libc/tinymath/j1f.c b/libc/tinymath/j1f.c
index 8b86031f3..0b016b683 100644
--- a/libc/tinymath/j1f.c
+++ b/libc/tinymath/j1f.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("freebsd_libm_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_j1f.c */
 /*
diff --git a/libc/tinymath/jn.c b/libc/tinymath/jn.c
index 5df41b18b..2b5213e21 100644
--- a/libc/tinymath/jn.c
+++ b/libc/tinymath/jn.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("freebsd_libm_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_jn.c */
 /*
diff --git a/libc/tinymath/jnf.c b/libc/tinymath/jnf.c
index ff33eb8ac..09f67338d 100644
--- a/libc/tinymath/jnf.c
+++ b/libc/tinymath/jnf.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("freebsd_libm_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */
 /*
diff --git a/libc/tinymath/kcos.c b/libc/tinymath/kcos.c
index f2467a0b7..a80529aec 100644
--- a/libc/tinymath/kcos.c
+++ b/libc/tinymath/kcos.c
@@ -27,15 +27,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */
 /*
diff --git a/libc/tinymath/kcosl.c b/libc/tinymath/kcosl.c
index 8d54f5fcb..6563de59b 100644
--- a/libc/tinymath/kcosl.c
+++ b/libc/tinymath/kcosl.c
@@ -27,18 +27,10 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
+__static_yoink("freebsd_libm_notice");
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/ld80/k_cosl.c */
 /* origin: FreeBSD /usr/src/lib/msun/ld128/k_cosl.c */
diff --git a/libc/tinymath/kexpl.c b/libc/tinymath/kexpl.c
index 5dc19b613..d02bf2aaa 100644
--- a/libc/tinymath/kexpl.c
+++ b/libc/tinymath/kexpl.c
@@ -37,11 +37,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/tinymath/freebsd.internal.h"
+__static_yoink("freebsd_libm_notice");
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-// clang-format off
 
 /*
  * ld128 version of k_expl.h.  See ../ld80/s_expl.c for most comments.
diff --git a/libc/tinymath/ksin.c b/libc/tinymath/ksin.c
index 2a61bcbac..080382c19 100644
--- a/libc/tinymath/ksin.c
+++ b/libc/tinymath/ksin.c
@@ -27,15 +27,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */
 /*
diff --git a/libc/tinymath/ksinl.c b/libc/tinymath/ksinl.c
index c430c0322..bce105867 100644
--- a/libc/tinymath/ksinl.c
+++ b/libc/tinymath/ksinl.c
@@ -28,17 +28,9 @@
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
+__static_yoink("freebsd_libm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/ld80/k_sinl.c */
 /* origin: FreeBSD /usr/src/lib/msun/ld128/k_sinl.c */
diff --git a/libc/tinymath/ktan.c b/libc/tinymath/ktan.c
index 695c6b70e..02ca7daa6 100644
--- a/libc/tinymath/ktan.c
+++ b/libc/tinymath/ktan.c
@@ -27,15 +27,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */
 /*
diff --git a/libc/tinymath/ktanl.c b/libc/tinymath/ktanl.c
index cbe9228b8..f7b618bc2 100644
--- a/libc/tinymath/ktanl.c
+++ b/libc/tinymath/ktanl.c
@@ -27,19 +27,12 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
-#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
+__static_yoink("freebsd_libm_notice");
+
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
 
 /* origin: FreeBSD /usr/src/lib/msun/ld80/k_tanl.c */
 /* origin: FreeBSD /usr/src/lib/msun/ld128/k_tanl.c */
diff --git a/libc/tinymath/lgamma_r.c b/libc/tinymath/lgamma_r.c
index 356155bca..4632b144c 100644
--- a/libc/tinymath/lgamma_r.c
+++ b/libc/tinymath/lgamma_r.c
@@ -27,15 +27,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */
 /*
diff --git a/libc/tinymath/lgammaf_r.c b/libc/tinymath/lgammaf_r.c
index f51eeea00..ee6b7a286 100644
--- a/libc/tinymath/lgammaf_r.c
+++ b/libc/tinymath/lgammaf_r.c
@@ -27,15 +27,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */
 /*
diff --git a/libc/tinymath/lgammal.c b/libc/tinymath/lgammal.c
index de54194b7..ec3309acb 100644
--- a/libc/tinymath/lgammal.c
+++ b/libc/tinymath/lgammal.c
@@ -29,15 +29,9 @@
 #include "libc/tinymath/kernel.internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
+__static_yoink("musl_libc_notice");
+__static_yoink("openbsd_libm_notice");
 
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_lgammal.c */
 /*
diff --git a/libc/tinymath/log.c b/libc/tinymath/log.c
index 61b13ef62..662d1e427 100644
--- a/libc/tinymath/log.c
+++ b/libc/tinymath/log.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,23 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/internal.h"
-#include "libc/tinymath/log_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Optimized Routines (MIT License)\\n\
-Copyright 2022 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/*
- * Double-precision log(x) function.
- *
- * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 #define T __log_data.tab
 #define T2 __log_data.tab2
@@ -52,95 +37,151 @@ asm(".include \"libc/disclaimer.inc\"");
 #define N (1 << LOG_TABLE_BITS)
 #define OFF 0x3fe6000000000000
 
-/**
- * Returns natural logarithm of 𝑥.
- */
-double log(double x)
+/* Top 16 bits of a double.  */
+static inline uint32_t
+top16 (double x)
 {
-	double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
-	uint64_t ix, iz, tmp;
-	uint32_t top;
-	int k, i;
-
-	ix = asuint64(x);
-	top = ix >> 48;
-#define LO asuint64(1.0 - 0x1p-4)
-#define HI asuint64(1.0 + 0x1.09p-4)
-	if (UNLIKELY(ix - LO < HI - LO)) {
-		/* Handle close to 1.0 inputs separately.  */
-		/* Fix sign of zero with downward rounding when x==1.  */
-		if (WANT_ROUNDING && UNLIKELY(ix == asuint64(1.0)))
-			return 0;
-		r = x - 1.0;
-		r2 = r * r;
-		r3 = r * r2;
-		y = r3 *
-		    (B[1] + r * B[2] + r2 * B[3] +
-		     r3 * (B[4] + r * B[5] + r2 * B[6] +
-			   r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
-		/* Worst-case error is around 0.507 ULP.  */
-		w = r * 0x1p27;
-		double_t rhi = r + w - w;
-		double_t rlo = r - rhi;
-		w = rhi * rhi * B[0]; /* B[0] == -0.5.  */
-		hi = r + w;
-		lo = r - hi + w;
-		lo += B[0] * rlo * (rhi + r);
-		y += lo;
-		y += hi;
-		return eval_as_double(y);
-	}
-	if (UNLIKELY(top - 0x0010 >= 0x7ff0 - 0x0010)) {
-		/* x < 0x1p-1022 or inf or nan.  */
-		if (ix * 2 == 0)
-			return __math_divzero(1);
-		if (ix == asuint64(INFINITY)) /* log(inf) == inf.  */
-			return x;
-		if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
-			return __math_invalid(x);
-		/* x is subnormal, normalize it.  */
-		ix = asuint64(x * 0x1p52);
-		ix -= 52ULL << 52;
-	}
-
-	/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
-	   The range is split into N subintervals.
-	   The ith subinterval contains z and c is near its center.  */
-	tmp = ix - OFF;
-	i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
-	k = (int64_t)tmp >> 52; /* arithmetic shift */
-	iz = ix - (tmp & 0xfffULL << 52);
-	invc = T[i].invc;
-	logc = T[i].logc;
-	z = asdouble(iz);
-
-	/* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
-	/* r ~= z/c - 1, |r| < 1/(2*N).  */
-#if __FP_FAST_FMA
-	/* rounding error: 0x1p-55/N.  */
-	r = __builtin_fma(z, invc, -1.0);
-#else
-	/* rounding error: 0x1p-55/N + 0x1p-66.  */
-	r = (z - T2[i].chi - T2[i].clo) * invc;
-#endif
-	kd = (double_t)k;
-
-	/* hi + lo = r + log(c) + k*Ln2.  */
-	w = kd * Ln2hi + logc;
-	hi = w + r;
-	lo = w - hi + r + kd * Ln2lo;
-
-	/* log(x) = lo + (log1p(r) - r) + hi.  */
-	r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
-	/* Worst case error if |y| > 0x1p-5:
-	   0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
-	   Worst case error if |y| > 0x1p-4:
-	   0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma).  */
-	y = lo + r2 * A[0] +
-	    r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
-	return eval_as_double(y);
+  return asuint64 (x) >> 48;
 }
 
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-__weak_reference(log, logl);
+/**
+ * Returns natural logarithm of 𝑥.
+ *
+ * @raise EDOM and FE_INVALID if x is negative
+ * @raise ERANGE and FE_DIVBYZERO if x is zero
+ */
+double
+log (double x)
+{
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
+  uint64_t ix, iz, tmp;
+  uint32_t top;
+  int k, i;
+
+  ix = asuint64 (x);
+  top = top16 (x);
+
+#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
+# define LO asuint64 (1.0 - 0x1p-5)
+# define HI asuint64 (1.0 + 0x1.1p-5)
+#elif LOG_POLY1_ORDER == 12
+# define LO asuint64 (1.0 - 0x1p-4)
+# define HI asuint64 (1.0 + 0x1.09p-4)
+#endif
+  if (unlikely (ix - LO < HI - LO))
+    {
+      /* Handle close to 1.0 inputs separately.  */
+      /* Fix sign of zero with downward rounding when x==1.  */
+      if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
+	return 0;
+      r = x - 1.0;
+      r2 = r * r;
+      r3 = r * r2;
+#if LOG_POLY1_ORDER == 10
+      /* Worst-case error is around 0.516 ULP.  */
+      y = r3 * (B[1] + r * B[2] + r2 * B[3]
+		+ r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
+      w = B[0] * r2; /* B[0] == -0.5.  */
+      hi = r + w;
+      y += r - hi + w;
+      y += hi;
+#elif LOG_POLY1_ORDER == 11
+      /* Worst-case error is around 0.516 ULP.  */
+      y = r3 * (B[1] + r * B[2]
+		+ r2 * (B[3] + r * B[4] + r2 * B[5]
+			+ r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
+      w = B[0] * r2; /* B[0] == -0.5.  */
+      hi = r + w;
+      y += r - hi + w;
+      y += hi;
+#elif LOG_POLY1_ORDER == 12
+      y = r3 * (B[1] + r * B[2] + r2 * B[3]
+		+ r3 * (B[4] + r * B[5] + r2 * B[6]
+			+ r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
+# if N <= 64
+      /* Worst-case error is around 0.532 ULP.  */
+      w = B[0] * r2; /* B[0] == -0.5.  */
+      hi = r + w;
+      y += r - hi + w;
+      y += hi;
+# else
+      /* Worst-case error is around 0.507 ULP.  */
+      w = r * 0x1p27;
+      double_t rhi = r + w - w;
+      double_t rlo = r - rhi;
+      w = rhi * rhi * B[0]; /* B[0] == -0.5.  */
+      hi = r + w;
+      lo = r - hi + w;
+      lo += B[0] * rlo * (rhi + r);
+      y += lo;
+      y += hi;
+# endif
+#endif
+      return eval_as_double (y);
+    }
+  if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
+    {
+      /* x < 0x1p-1022 or inf or nan.  */
+      if (ix * 2 == 0)
+	return __math_divzero (1);
+      if (ix == asuint64 (INFINITY)) /* log(inf) == inf.  */
+	return x;
+      if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
+	return __math_invalid (x);
+      /* x is subnormal, normalize it.  */
+      ix = asuint64 (x * 0x1p52);
+      ix -= 52ULL << 52;
+    }
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
+  k = (int64_t) tmp >> 52; /* arithmetic shift */
+  iz = ix - (tmp & 0xfffULL << 52);
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = asdouble (iz);
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
+  /* r ~= z/c - 1, |r| < 1/(2*N).  */
+#if HAVE_FAST_FMA
+  /* rounding error: 0x1p-55/N.  */
+  r = fma (z, invc, -1.0);
+#else
+  /* rounding error: 0x1p-55/N + 0x1p-66.  */
+  r = (z - T2[i].chi - T2[i].clo) * invc;
+#endif
+  kd = (double_t) k;
+
+  /* hi + lo = r + log(c) + k*Ln2.  */
+  w = kd * Ln2hi + logc;
+  hi = w + r;
+  lo = w - hi + r + kd * Ln2lo;
+
+  /* log(x) = lo + (log1p(r) - r) + hi.  */
+  r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
+  /* Worst case error if |y| > 0x1p-5:
+     0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
+     Worst case error if |y| > 0x1p-4:
+     0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma).  */
+#if LOG_POLY_ORDER == 6
+  y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
+#elif LOG_POLY_ORDER == 7
+  y = lo
+      + r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
+	      + r2 * r2 * (A[4] + r * A[5]))
+      + hi;
+#endif
+  return eval_as_double (y);
+}
+
+#if USE_GLIBC_ABI
+strong_alias (log, __log_finite)
+hidden_alias (log, __ieee754_log)
+# if LDBL_MANT_DIG == 53
+long double logl (long double x) { return log (x); }
+# endif
 #endif
diff --git a/libc/tinymath/log10.c b/libc/tinymath/log10.c
index 3c3f19650..79d14e8b8 100644
--- a/libc/tinymath/log10.c
+++ b/libc/tinymath/log10.c
@@ -29,16 +29,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
 #include "libc/tinymath/internal.h"
-#include "libc/tinymath/log2_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */
 /*
diff --git a/libc/tinymath/log10f.c b/libc/tinymath/log10f.c
index e078f8982..90ecb8cea 100644
--- a/libc/tinymath/log10f.c
+++ b/libc/tinymath/log10f.c
@@ -26,15 +26,9 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_log10f.c */
 /*
diff --git a/libc/tinymath/log10l.c b/libc/tinymath/log10l.c
index 04f658b86..35de4cc8b 100644
--- a/libc/tinymath/log10l.c
+++ b/libc/tinymath/log10l.c
@@ -28,15 +28,9 @@
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
+__static_yoink("musl_libc_notice");
+__static_yoink("openbsd_libm_notice");
 
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_log10l.c */
 /*
diff --git a/libc/tinymath/log1p.c b/libc/tinymath/log1p.c
index 3d6883422..c965de88b 100644
--- a/libc/tinymath/log1p.c
+++ b/libc/tinymath/log1p.c
@@ -2,78 +2,84 @@
 │ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│ Copyright (c) 1992-2024 The FreeBSD Project                                  │
+│ Copyright (c) 1993 Sun Microsystems, Inc.                                    │
+│ All rights reserved.                                                         │
 │                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
+│ Redistribution and use in source and binary forms, with or without           │
+│ modification, are permitted provided that the following conditions           │
+│ are met:                                                                     │
+│ 1. Redistributions of source code must retain the above copyright            │
+│    notice, this list of conditions and the following disclaimer.             │
+│ 2. Redistributions in binary form must reproduce the above copyright         │
+│    notice, this list of conditions and the following disclaimer in the       │
+│    documentation and/or other materials provided with the distribution.      │
 │                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
+│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND       │
+│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        │
+│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   │
+│ ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE      │
+│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL   │
+│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS      │
+│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        │
+│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT   │
+│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    │
+│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF       │
+│ SUCH DAMAGE.                                                                 │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/internal.h"
-#include "libc/tinymath/log_data.internal.h"
+#include "libc/tinymath/freebsd.internal.h"
+__static_yoink("freebsd_libm_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/* origin: FreeBSD /usr/src/lib/msun/src/s_log1p.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
 /* double log1p(double x)
- * Return the natural logarithm of 1+x.
  *
  * Method :
  *   1. Argument Reduction: find k and f such that
- *                      1+x = 2^k * (1+f),
- *         where  sqrt(2)/2 < 1+f < sqrt(2) .
+ *			1+x = 2^k * (1+f),
+ *	   where  sqrt(2)/2 < 1+f < sqrt(2) .
  *
  *      Note. If k=0, then f=x is exact. However, if k!=0, then f
- *      may not be representable exactly. In that case, a correction
- *      term is need. Let u=1+x rounded. Let c = (1+x)-u, then
- *      log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
- *      and add back the correction term c/u.
- *      (Note: when x > 2**53, one can simply return log(x))
+ *	may not be representable exactly. In that case, a correction
+ *	term is need. Let u=1+x rounded. Let c = (1+x)-u, then
+ *	log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
+ *	and add back the correction term c/u.
+ *	(Note: when x > 2**53, one can simply return log(x))
  *
- *   2. Approximation of log(1+f): See log.c
+ *   2. Approximation of log1p(f).
+ *	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
+ *		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
+ *	     	 = 2s + s*R
+ *      We use a special Reme algorithm on [0,0.1716] to generate
+ * 	a polynomial of degree 14 to approximate R The maximum error
+ *	of this polynomial approximation is bounded by 2**-58.45. In
+ *	other words,
+ *		        2      4      6      8      10      12      14
+ *	    R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
+ *  	(the values of Lp1 to Lp7 are listed in the program)
+ *	and
+ *	    |      2          14          |     -58.45
+ *	    | Lp1*s +...+Lp7*s    -  R(z) | <= 2
+ *	    |                             |
+ *	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
+ *	In order to guarantee error in log below 1ulp, we compute log
+ *	by
+ *		log1p(f) = f - (hfsq - s*(hfsq+R)).
  *
- *   3. Finally, log1p(x) = k*ln2 + log(1+f) + c/u. See log.c
+ *	3. Finally, log1p(x) = k*ln2 + log1p(f).
+ *		 	     = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
+ *	   Here ln2 is split into two floating point number:
+ *			ln2_hi + ln2_lo,
+ *	   where n*ln2_hi is always exact for |n| < 2000.
  *
  * Special cases:
- *      log1p(x) is NaN with signal if x < -1 (including -INF) ;
- *      log1p(+INF) is +INF; log1p(-1) is -INF with signal;
- *      log1p(NaN) is that NaN with no signal.
+ *	log1p(x) is NaN with signal if x < -1 (including -INF) ;
+ *	log1p(+INF) is +INF; log1p(-1) is -INF with signal;
+ *	log1p(NaN) is that NaN with no signal.
  *
  * Accuracy:
- *      according to an error analysis, the error is always less than
- *      1 ulp (unit in the last place).
+ *	according to an error analysis, the error is always less than
+ *	1 ulp (unit in the last place).
  *
  * Constants:
  * The hexadecimal values are the intended ones for the following
@@ -82,84 +88,110 @@ asm(".include \"libc/disclaimer.inc\"");
  * to produce the hexadecimal values shown.
  *
  * Note: Assuming log() return accurate answer, the following
- *       algorithm can be used to compute log1p(x) to within a few ULP:
+ * 	 algorithm can be used to compute log1p(x) to within a few ULP:
  *
- *              u = 1+x;
- *              if(u==1.0) return x ; else
- *                         return log(u)*(x/(u-1.0));
+ *		u = 1+x;
+ *		if(u==1.0) return x ; else
+ *			   return log(u)*(x/(u-1.0));
  *
- *       See HP-15C Advanced Functions Handbook, p.193.
+ *	 See HP-15C Advanced Functions Handbook, p.193.
  */
 
 static const double
-ln2_hi = 6.93147180369123816490e-01,  /* 3fe62e42 fee00000 */
-ln2_lo = 1.90821492927058770002e-10,  /* 3dea39ef 35793c76 */
-Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
-Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
-Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
-Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
-Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
-Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
-Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
+ln2_hi  =  6.93147180369123816490e-01,	/* 3fe62e42 fee00000 */
+ln2_lo  =  1.90821492927058770002e-10,	/* 3dea39ef 35793c76 */
+two54   =  1.80143985094819840000e+16,  /* 43500000 00000000 */
+Lp1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
+Lp2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
+Lp3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
+Lp4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
+Lp5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
+Lp6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
+Lp7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
+
+static const double zero = 0.0;
+static volatile double vzero = 0.0;
 
 /**
- * Returns log(𝟷+𝑥).
+ * Returns log(1 + x).
  */
-double log1p(double x)
+double
+log1p(double x)
 {
-	union {double f; uint64_t i;} u = {x};
-	double_t hfsq,f,c,s,z,R,w,t1,t2,dk;
-	uint32_t hx,hu;
-	int k;
+	double hfsq,f,c,s,z,R,u;
+	int32_t k,hx,hu,ax;
+
+	GET_HIGH_WORD(hx,x);
+	ax = hx&0x7fffffff;
 
-	hx = u.i>>32;
 	k = 1;
-	if (hx < 0x3fda827a || hx>>31) {  /* 1+x < sqrt(2)+ */
-		if (hx >= 0xbff00000) {  /* x <= -1.0 */
-			if (x == -1)
-				return x/0.0; /* log1p(-1) = -inf */
-			return (x-x)/0.0;     /* log1p(x<-1) = NaN */
-		}
-		if (hx<<1 < 0x3ca00000<<1) {  /* |x| < 2**-53 */
-			/* underflow if subnormal */
-			if ((hx&0x7ff00000) == 0)
-				FORCE_EVAL((float)x);
-			return x;
-		}
-		if (hx <= 0xbfd2bec4) {  /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
-			k = 0;
-			c = 0;
-			f = x;
-		}
-	} else if (hx >= 0x7ff00000)
-		return x;
-	if (k) {
-		u.f = 1 + x;
-		hu = u.i>>32;
-		hu += 0x3ff00000 - 0x3fe6a09e;
-		k = (int)(hu>>20) - 0x3ff;
-		/* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
-		if (k < 54) {
-			c = k >= 2 ? 1-(u.f-x) : x-(u.f-1);
-			c /= u.f;
-		} else
-			c = 0;
-		/* reduce u into [sqrt(2)/2, sqrt(2)] */
-		hu = (hu&0x000fffff) + 0x3fe6a09e;
-		u.i = (uint64_t)hu<<32 | (u.i&0xffffffff);
-		f = u.f - 1;
+	if (hx < 0x3FDA827A) {			/* 1+x < sqrt(2)+ */
+	    if(ax>=0x3ff00000) {		/* x <= -1.0 */
+		if(x==-1.0) return -two54/vzero; /* log1p(-1)=+inf */
+		else return (x-x)/(x-x);	/* log1p(x<-1)=NaN */
+	    }
+	    if(ax<0x3e200000) {			/* |x| < 2**-29 */
+		if(two54+x>zero			/* raise inexact */
+	            &&ax<0x3c900000) 		/* |x| < 2**-54 */
+		    return x;
+		else
+		    return x - x*x*0.5;
+	    }
+	    if(hx>0||hx<=((int32_t)0xbfd2bec4)) {
+		k=0;f=x;hu=1;}		/* sqrt(2)/2- <= 1+x < sqrt(2)+ */
 	}
-	hfsq = 0.5*f*f;
-	s = f/(2.0+f);
+	if (hx >= 0x7ff00000) return x+x;
+	if(k!=0) {
+	    if(hx<0x43400000) {
+		STRICT_ASSIGN(double,u,1.0+x);
+		GET_HIGH_WORD(hu,u);
+	        k  = (hu>>20)-1023;
+	        c  = (k>0)? 1.0-(u-x):x-(u-1.0);/* correction term */
+		c /= u;
+	    } else {
+		u  = x;
+		GET_HIGH_WORD(hu,u);
+	        k  = (hu>>20)-1023;
+		c  = 0;
+	    }
+	    hu &= 0x000fffff;
+	    /*
+	     * The approximation to sqrt(2) used in thresholds is not
+	     * critical.  However, the ones used above must give less
+	     * strict bounds than the one here so that the k==0 case is
+	     * never reached from here, since here we have committed to
+	     * using the correction term but don't use it if k==0.
+	     */
+	    if(hu<0x6a09e) {			/* u ~< sqrt(2) */
+	        SET_HIGH_WORD(u,hu|0x3ff00000);	/* normalize u */
+	    } else {
+	        k += 1;
+		SET_HIGH_WORD(u,hu|0x3fe00000);	/* normalize u/2 */
+	        hu = (0x00100000-hu)>>2;
+	    }
+	    f = u-1.0;
+	}
+	hfsq=0.5*f*f;
+	if(hu==0) {	/* |f| < 2**-20 */
+	    if(f==zero) {
+		if(k==0) {
+		    return zero;
+		} else {
+		    c += k*ln2_lo;
+		    return k*ln2_hi+c;
+		}
+	    }
+	    R = hfsq*(1.0-0.66666666666666666*f);
+	    if(k==0) return f-R; else
+	    	     return k*ln2_hi-((R-(k*ln2_lo+c))-f);
+	}
+ 	s = f/(2.0+f);
 	z = s*s;
-	w = z*z;
-	t1 = w*(Lg2+w*(Lg4+w*Lg6));
-	t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
-	R = t2 + t1;
-	dk = k;
-	return s*(hfsq+R) + (dk*ln2_lo+c) - hfsq + f + dk*ln2_hi;
+	R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
+	if(k==0) return f-(hfsq-s*(hfsq+R)); else
+		 return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
 }
 
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+#if (LDBL_MANT_DIG == 53)
 __weak_reference(log1p, log1pl);
 #endif
diff --git a/libc/tinymath/log1pf.c b/libc/tinymath/log1pf.c
index 0f1b867f9..04e388154 100644
--- a/libc/tinymath/log1pf.c
+++ b/libc/tinymath/log1pf.c
@@ -1,180 +1,133 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
-│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8                               :vi │
+/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
+│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│ Copyright (c) 1992-2024 The FreeBSD Project                                  │
+│ Copyright (c) 1993 Sun Microsystems, Inc.                                    │
+│ All rights reserved.                                                         │
 │                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
+│ Redistribution and use in source and binary forms, with or without           │
+│ modification, are permitted provided that the following conditions           │
+│ are met:                                                                     │
+│ 1. Redistributions of source code must retain the above copyright            │
+│    notice, this list of conditions and the following disclaimer.             │
+│ 2. Redistributions in binary form must reproduce the above copyright         │
+│    notice, this list of conditions and the following disclaimer in the       │
+│    documentation and/or other materials provided with the distribution.      │
 │                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
-│                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
+│ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND       │
+│ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE        │
+│ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   │
+│ ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE      │
+│ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL   │
+│ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS      │
+│ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        │
+│ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT   │
+│ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    │
+│ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF       │
+│ SUCH DAMAGE.                                                                 │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/internal.h"
-#include "libc/tinymath/log1pf_data.internal.h"
+#include "libc/tinymath/freebsd.internal.h"
+__static_yoink("freebsd_libm_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-Optimized Routines (MIT License)\\n\
-Copyright 2022 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+/* s_log1pf.c -- float version of s_log1p.c.
+ * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
+ */
 
-#define Ln2 (0x1.62e43p-1f)
-#define SignMask (0x80000000)
+static const float
+ln2_hi =   6.9313812256e-01,	/* 0x3f317180 */
+ln2_lo =   9.0580006145e-06,	/* 0x3717f7d1 */
+two25 =    3.355443200e+07,	/* 0x4c000000 */
+Lp1 = 6.6666668653e-01,	/* 3F2AAAAB */
+Lp2 = 4.0000000596e-01,	/* 3ECCCCCD */
+Lp3 = 2.8571429849e-01, /* 3E924925 */
+Lp4 = 2.2222198546e-01, /* 3E638E29 */
+Lp5 = 1.8183572590e-01, /* 3E3A3325 */
+Lp6 = 1.5313838422e-01, /* 3E1CD04F */
+Lp7 = 1.4798198640e-01; /* 3E178897 */
 
-/* Biased exponent of the largest float m for which m^8 underflows.  */
-#define M8UFLOW_BOUND_BEXP 112
-/* Biased exponent of the largest float for which we just return x.  */
-#define TINY_BOUND_BEXP 103
+static const float zero = 0.0;
+static volatile float vzero = 0.0;
 
-#define C(i) __log1pf_data.coeffs[i]
-
-static inline float
-eval_poly (float m, uint32_t e)
-{
-#ifdef LOG1PF_2U5
-
-  /* 2.5 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using
-     slightly modified Estrin scheme (no x^0 term, and x term is just x).  */
-  float p_12 = fmaf (m, C (1), C (0));
-  float p_34 = fmaf (m, C (3), C (2));
-  float p_56 = fmaf (m, C (5), C (4));
-  float p_78 = fmaf (m, C (7), C (6));
-
-  float m2 = m * m;
-  float p_02 = fmaf (m2, p_12, m);
-  float p_36 = fmaf (m2, p_56, p_34);
-  float p_79 = fmaf (m2, C (8), p_78);
-
-  float m4 = m2 * m2;
-  float p_06 = fmaf (m4, p_36, p_02);
-
-  if (UNLIKELY (e < M8UFLOW_BOUND_BEXP))
-    return p_06;
-
-  float m8 = m4 * m4;
-  return fmaf (m8, p_79, p_06);
-
-#elif defined(LOG1PF_1U3)
-
-  /* 1.3 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using Horner
-     scheme. Our polynomial approximation for log1p has the form
-     x + C1 * x^2 + C2 * x^3 + C3 * x^4 + ...
-     Hence approximation has the form m + m^2 * P(m)
-       where P(x) = C1 + C2 * x + C3 * x^2 + ... .  */
-  return fmaf (m, m * HORNER_8 (m, C), m);
-
-#else
-#error No log1pf approximation exists with the requested precision. Options are 13 or 25.
-#endif
-}
-
-static inline uint32_t
-biased_exponent (uint32_t ix)
-{
-  return (ix & 0x7f800000) >> 23;
-}
-
-/* log1pf approximation using polynomial on reduced interval. Worst-case error
-   when using Estrin is roughly 2.02 ULP:
-   log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3.  */
+/**
+ * Returns log(1 + x).
+ */
 float
-log1pf (float x)
+log1pf(float x)
 {
-  uint32_t ix = asuint (x);
-  uint32_t ia = ix & ~SignMask;
-  uint32_t ia12 = ia >> 20;
-  uint32_t e = biased_exponent (ix);
+	float hfsq,f,c,s,z,R,u;
+	int32_t k,hx,hu,ax;
 
-  /* Handle special cases first.  */
-  if (UNLIKELY (ia12 >= 0x7f8 || ix >= 0xbf800000 || ix == 0x80000000
-		|| e <= TINY_BOUND_BEXP))
-    {
-      if (ix == 0xff800000)
-	{
-	  /* x == -Inf => log1pf(x) =  NaN.  */
-	  return NAN;
+	GET_FLOAT_WORD(hx,x);
+	ax = hx&0x7fffffff;
+
+	k = 1;
+	if (hx < 0x3ed413d0) {			/* 1+x < sqrt(2)+  */
+	    if(ax>=0x3f800000) {		/* x <= -1.0 */
+		if(x==(float)-1.0) return -two25/vzero; /* log1p(-1)=+inf */
+		else return (x-x)/(x-x);	/* log1p(x<-1)=NaN */
+	    }
+	    if(ax<0x38000000) {			/* |x| < 2**-15 */
+		if(two25+x>zero			/* raise inexact */
+	            &&ax<0x33800000) 		/* |x| < 2**-24 */
+		    return x;
+		else
+		    return x - x*x*(float)0.5;
+	    }
+	    if(hx>0||hx<=((int32_t)0xbe95f619)) {
+		k=0;f=x;hu=1;}		/* sqrt(2)/2- <= 1+x < sqrt(2)+ */
 	}
-      if ((ix == 0x7f800000 || e <= TINY_BOUND_BEXP) && ia12 <= 0x7f8)
-	{
-	  /* |x| < TinyBound => log1p(x)  =  x.
-	      x ==       Inf => log1pf(x) = Inf.  */
-	  return x;
+	if (hx >= 0x7f800000) return x+x;
+	if(k!=0) {
+	    if(hx<0x5a000000) {
+		STRICT_ASSIGN(float,u,(float)1.0+x);
+		GET_FLOAT_WORD(hu,u);
+	        k  = (hu>>23)-127;
+		/* correction term */
+	        c  = (k>0)? (float)1.0-(u-x):x-(u-(float)1.0);
+		c /= u;
+	    } else {
+		u  = x;
+		GET_FLOAT_WORD(hu,u);
+	        k  = (hu>>23)-127;
+		c  = 0;
+	    }
+	    hu &= 0x007fffff;
+	    /*
+	     * The approximation to sqrt(2) used in thresholds is not
+	     * critical.  However, the ones used above must give less
+	     * strict bounds than the one here so that the k==0 case is
+	     * never reached from here, since here we have committed to
+	     * using the correction term but don't use it if k==0.
+	     */
+	    if(hu<0x3504f4) {			/* u < sqrt(2) */
+	        SET_FLOAT_WORD(u,hu|0x3f800000);/* normalize u */
+	    } else {
+	        k += 1;
+		SET_FLOAT_WORD(u,hu|0x3f000000);	/* normalize u/2 */
+	        hu = (0x00800000-hu)>>2;
+	    }
+	    f = u-(float)1.0;
 	}
-      if (ix == 0xbf800000)
-	{
-	  /* x == -1.0 => log1pf(x) = -Inf.  */
-	  return __math_divzerof (-1);
+	hfsq=(float)0.5*f*f;
+	if(hu==0) {	/* |f| < 2**-20 */
+	    if(f==zero) {
+		if(k==0) {
+		    return zero;
+		} else {
+		    c += k*ln2_lo;
+		    return k*ln2_hi+c;
+		}
+	    }
+	    R = hfsq*((float)1.0-(float)0.66666666666666666*f);
+	    if(k==0) return f-R; else
+	    	     return k*ln2_hi-((R-(k*ln2_lo+c))-f);
 	}
-      if (ia12 >= 0x7f8)
-	{
-	  /* x == +/-NaN => log1pf(x) = NaN.  */
-	  return __math_invalidf (asfloat (ia));
-	}
-      /* x <    -1.0 => log1pf(x) = NaN.  */
-      return __math_invalidf (x);
-    }
-
-  /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
-			   is in [-0.25, 0.5]):
-     log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
-
-     We approximate log1p(m) with a polynomial, then scale by
-     k*log(2). Instead of doing this directly, we use an intermediate
-     scale factor s = 4*k*log(2) to ensure the scale is representable
-     as a normalised fp32 number.  */
-
-  if (ix <= 0x3f000000 || ia <= 0x3e800000)
-    {
-      /* If x is in [-0.25, 0.5] then we can shortcut all the logic
-	 below, as k = 0 and m = x.  All we need is to return the
-	 polynomial.  */
-      return eval_poly (x, e);
-    }
-
-  float m = x + 1.0f;
-
-  /* k is used scale the input. 0x3f400000 is chosen as we are trying to
-     reduce x to the range [-0.25, 0.5]. Inside this range, k is 0.
-     Outside this range, if k is reinterpreted as (NOT CONVERTED TO) float:
-	 let k = sign * 2^p      where sign = -1 if x < 0
-					       1 otherwise
-	 and p is a negative integer whose magnitude increases with the
-	 magnitude of x.  */
-  int k = (asuint (m) - 0x3f400000) & 0xff800000;
-
-  /* By using integer arithmetic, we obtain the necessary scaling by
-     subtracting the unbiased exponent of k from the exponent of x.  */
-  float m_scale = asfloat (asuint (x) - k);
-
-  /* Scale up to ensure that the scale factor is representable as normalised
-     fp32 number (s in [2**-126,2**26]), and scale m down accordingly.  */
-  float s = asfloat (asuint (4.0f) - k);
-  m_scale = m_scale + fmaf (0.25f, s, -1.0f);
-
-  float p = eval_poly (m_scale, biased_exponent (asuint (m_scale)));
-
-  /* The scale factor to be applied back at the end - by multiplying float(k)
-     by 2^-23 we get the unbiased exponent of k.  */
-  float scale_back = (float) k * 0x1.0p-23f;
-
-  /* Apply the scaling back.  */
-  return fmaf (scale_back, Ln2, p);
+ 	s = f/((float)2.0+f);
+	z = s*s;
+	R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
+	if(k==0) return f-(hfsq-s*(hfsq+R)); else
+		 return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
 }
diff --git a/libc/tinymath/log1pf_data.internal.h b/libc/tinymath/log1pf_data.internal.h
deleted file mode 100644
index 867f79fd3..000000000
--- a/libc/tinymath/log1pf_data.internal.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_
-COSMOPOLITAN_C_START_
-
-#define LOG1PF_2U5
-#define V_LOG1PF_2U5
-#define LOG1PF_NCOEFFS 9
-extern const struct log1pf_data {
-  float coeffs[LOG1PF_NCOEFFS];  //
-} __log1pf_data;
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG1PF_DATA_H_ */
diff --git a/libc/tinymath/log1pl.c b/libc/tinymath/log1pl.c
index 42f5a3b17..784feab56 100644
--- a/libc/tinymath/log1pl.c
+++ b/libc/tinymath/log1pl.c
@@ -29,15 +29,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
-
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("openbsd_libm_notice");
 
 /* origin: OpenBSD /usr/src/lib/libm/src/ld80/s_log1pl.c */
 /*
diff --git a/libc/tinymath/log2.c b/libc/tinymath/log2.c
index d1d4d40f9..a1f802383 100644
--- a/libc/tinymath/log2.c
+++ b/libc/tinymath/log2.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,24 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-#include "libc/tinymath/internal.h"
-#include "libc/tinymath/log2_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/*
- * Double-precision log2(x) function.
- *
- * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 #define T __log2_data.tab
 #define T2 __log2_data.tab2
@@ -54,110 +38,126 @@ asm(".include \"libc/disclaimer.inc\"");
 #define OFF 0x3fe6000000000000
 
 /* Top 16 bits of a double.  */
-static inline uint32_t top16(double x)
+static inline uint32_t
+top16 (double x)
 {
-	return asuint64(x) >> 48;
+  return asuint64 (x) >> 48;
 }
 
 /**
- * Calculates log₂𝑥.
+ * Returns base 2 logarithm of x.
  */
-double log2(double x)
+double
+log2 (double x)
 {
-	double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
-	uint64_t ix, iz, tmp;
-	uint32_t top;
-	int k, i;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
+  uint64_t ix, iz, tmp;
+  uint32_t top;
+  int k, i;
 
-	ix = asuint64(x);
-	top = top16(x);
-#define LO asuint64(1.0 - 0x1.5b51p-5)
-#define HI asuint64(1.0 + 0x1.6ab2p-5)
-	if (UNLIKELY(ix - LO < HI - LO)) {
-		/* Handle close to 1.0 inputs separately.  */
-		/* Fix sign of zero with downward rounding when x==1.  */
-		if (WANT_ROUNDING && UNLIKELY(ix == asuint64(1.0)))
-			return 0;
-		r = x - 1.0;
-#if __FP_FAST_FMA
-		hi = r * InvLn2hi;
-		lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
-#else
-		double_t rhi, rlo;
-		rhi = asdouble(asuint64(r) & -1ULL << 32);
-		rlo = r - rhi;
-		hi = rhi * InvLn2hi;
-		lo = rlo * InvLn2hi + r * InvLn2lo;
+  ix = asuint64 (x);
+  top = top16 (x);
+
+#if LOG2_POLY1_ORDER == 11
+# define LO asuint64 (1.0 - 0x1.5b51p-5)
+# define HI asuint64 (1.0 + 0x1.6ab2p-5)
 #endif
-		r2 = r * r; /* rounding error: 0x1p-62.  */
-		r4 = r2 * r2;
-		/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
-		p = r2 * (B[0] + r * B[1]);
-		y = hi + p;
-		lo += hi - y + p;
-		lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
-			    r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
-		y += lo;
-		return eval_as_double(y);
-	}
-	if (UNLIKELY(top - 0x0010 >= 0x7ff0 - 0x0010)) {
-		/* x < 0x1p-1022 or inf or nan.  */
-		if (ix * 2 == 0)
-			return __math_divzero(1);
-		if (ix == asuint64(INFINITY)) /* log(inf) == inf.  */
-			return x;
-		if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
-			return __math_invalid(x);
-		/* x is subnormal, normalize it.  */
-		ix = asuint64(x * 0x1p52);
-		ix -= 52ULL << 52;
-	}
-
-	/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
-	   The range is split into N subintervals.
-	   The ith subinterval contains z and c is near its center.  */
-	tmp = ix - OFF;
-	i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
-	k = (int64_t)tmp >> 52; /* arithmetic shift */
-	iz = ix - (tmp & 0xfffULL << 52);
-	invc = T[i].invc;
-	logc = T[i].logc;
-	z = asdouble(iz);
-	kd = (double_t)k;
-
-	/* log2(x) = log2(z/c) + log2(c) + k.  */
-	/* r ~= z/c - 1, |r| < 1/(2*N).  */
-#if __FP_FAST_FMA
-	/* rounding error: 0x1p-55/N.  */
-	r = __builtin_fma(z, invc, -1.0);
-	t1 = r * InvLn2hi;
-	t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
+  if (unlikely (ix - LO < HI - LO))
+    {
+      /* Handle close to 1.0 inputs separately.  */
+      /* Fix sign of zero with downward rounding when x==1.  */
+      if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
+	return 0;
+      r = x - 1.0;
+#if HAVE_FAST_FMA
+      hi = r * InvLn2hi;
+      lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
 #else
-	double_t rhi, rlo;
-	/* rounding error: 0x1p-55/N + 0x1p-65.  */
-	r = (z - T2[i].chi - T2[i].clo) * invc;
-	rhi = asdouble(asuint64(r) & -1ULL << 32);
-	rlo = r - rhi;
-	t1 = rhi * InvLn2hi;
-	t2 = rlo * InvLn2hi + r * InvLn2lo;
+      double_t rhi, rlo;
+      rhi = asdouble (asuint64 (r) & -1ULL << 32);
+      rlo = r - rhi;
+      hi = rhi * InvLn2hi;
+      lo = rlo * InvLn2hi + r * InvLn2lo;
+#endif
+      r2 = r * r; /* rounding error: 0x1p-62.  */
+      r4 = r2 * r2;
+#if LOG2_POLY1_ORDER == 11
+      /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
+      p = r2 * (B[0] + r * B[1]);
+      y = hi + p;
+      lo += hi - y + p;
+      lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5])
+		  + r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
+      y += lo;
+#endif
+      return eval_as_double (y);
+    }
+  if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
+    {
+      /* x < 0x1p-1022 or inf or nan.  */
+      if (ix * 2 == 0)
+	return __math_divzero (1);
+      if (ix == asuint64 (INFINITY)) /* log(inf) == inf.  */
+	return x;
+      if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
+	return __math_invalid (x);
+      /* x is subnormal, normalize it.  */
+      ix = asuint64 (x * 0x1p52);
+      ix -= 52ULL << 52;
+    }
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
+  k = (int64_t) tmp >> 52; /* arithmetic shift */
+  iz = ix - (tmp & 0xfffULL << 52);
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = asdouble (iz);
+  kd = (double_t) k;
+
+  /* log2(x) = log2(z/c) + log2(c) + k.  */
+  /* r ~= z/c - 1, |r| < 1/(2*N).  */
+#if HAVE_FAST_FMA
+  /* rounding error: 0x1p-55/N.  */
+  r = fma (z, invc, -1.0);
+  t1 = r * InvLn2hi;
+  t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1);
+#else
+  double_t rhi, rlo;
+  /* rounding error: 0x1p-55/N + 0x1p-65.  */
+  r = (z - T2[i].chi - T2[i].clo) * invc;
+  rhi = asdouble (asuint64 (r) & -1ULL << 32);
+  rlo = r - rhi;
+  t1 = rhi * InvLn2hi;
+  t2 = rlo * InvLn2hi + r * InvLn2lo;
 #endif
 
-	/* hi + lo = r/ln2 + log2(c) + k.  */
-	t3 = kd + logc;
-	hi = t3 + t1;
-	lo = t3 - hi + t1 + t2;
+  /* hi + lo = r/ln2 + log2(c) + k.  */
+  t3 = kd + logc;
+  hi = t3 + t1;
+  lo = t3 - hi + t1 + t2;
 
-	/* log2(r+1) = r/ln2 + r^2*poly(r).  */
-	/* Evaluation is optimized assuming superscalar pipelined execution.  */
-	r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
-	r4 = r2 * r2;
-	/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
-	   ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
-	p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
-	y = lo + r2 * p + hi;
-	return eval_as_double(y);
+  /* log2(r+1) = r/ln2 + r^2*poly(r).  */
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
+  r4 = r2 * r2;
+#if LOG2_POLY_ORDER == 7
+  /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
+     ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
+  p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
+  y = lo + r2 * p + hi;
+#endif
+  return eval_as_double (y);
 }
 
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-__weak_reference(log2, log2l);
+#if USE_GLIBC_ABI
+strong_alias (log2, __log2_finite)
+hidden_alias (log2, __ieee754_log2)
+# if LDBL_MANT_DIG == 53
+long double log2l (long double x) { return log2 (x); }
+# endif
 #endif
diff --git a/libc/tinymath/log2_data.c b/libc/tinymath/log2_data.c
index 259ffd0db..bd0658073 100644
--- a/libc/tinymath/log2_data.c
+++ b/libc/tinymath/log2_data.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,20 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/log2_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-/*
- * Data for log2.
- *
- * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 #define N (1 << LOG2_TABLE_BITS)
 
@@ -47,6 +35,7 @@ const struct log2_data __log2_data = {
 .invln2hi = 0x1.7154765200000p+0,
 .invln2lo = 0x1.705fc2eefa200p-33,
 .poly1 = {
+#if LOG2_POLY1_ORDER == 11
 // relative error: 0x1.2fad8188p-63
 // in -0x1.5b51p-5 0x1.6ab2p-5
 -0x1.71547652b82fep-1,
@@ -59,8 +48,10 @@ const struct log2_data __log2_data = {
 0x1.484d154f01b4ap-3,
 -0x1.289e4a72c383cp-3,
 0x1.0b32f285aee66p-3,
+#endif
 },
 .poly = {
+#if N == 64 && LOG2_POLY_ORDER == 7
 // relative error: 0x1.a72c2bf8p-58
 // abs error: 0x1.67a552c8p-66
 // in -0x1.f45p-8 0x1.f45p-8
@@ -70,6 +61,7 @@ const struct log2_data __log2_data = {
 0x1.2776c50034c48p-2,
 -0x1.ec7b328ea92bcp-3,
 0x1.a6225e117f92ep-3,
+#endif
 },
 /* Algorithm:
 
@@ -98,6 +90,7 @@ single rounding error when there is no fast fma for z*invc - 1, 3) ensures
 that logc + poly(z/c - 1) has small error, however near x == 1 when
 |log2(x)| < 0x1p-4, this is not enough so that is special cased.  */
 .tab = {
+#if N == 64
 {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
 {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
 {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
@@ -162,9 +155,11 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
 {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
 {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
 {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
+#endif
 },
-#if !__FP_FAST_FMA
+#if !HAVE_FAST_FMA
 .tab2 = {
+# if N == 64
 {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
 {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
 {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
@@ -229,6 +224,7 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
 {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
 {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
 {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
+# endif
 },
-#endif
+#endif /* !HAVE_FAST_FMA */
 };
diff --git a/libc/tinymath/log2_data.internal.h b/libc/tinymath/log2_data.internal.h
deleted file mode 100644
index d838672f6..000000000
--- a/libc/tinymath/log2_data.internal.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_
-
-#define LOG2_TABLE_BITS  6
-#define LOG2_POLY_ORDER  7
-#define LOG2_POLY1_ORDER 11
-
-COSMOPOLITAN_C_START_
-
-extern const struct log2_data {
-  double invln2hi;
-  double invln2lo;
-  double poly[LOG2_POLY_ORDER - 1];
-  double poly1[LOG2_POLY1_ORDER - 1];
-  struct {
-    double invc, logc;
-  } tab[1 << LOG2_TABLE_BITS];
-#if !__FP_FAST_FMA
-  struct {
-    double chi, clo;
-  } tab2[1 << LOG2_TABLE_BITS];
-#endif
-} __log2_data;
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG2_DATA_INTERNAL_H_ */
diff --git a/libc/tinymath/log2f.c b/libc/tinymath/log2f.c
index 118a02d29..044bd4004 100644
--- a/libc/tinymath/log2f.c
+++ b/libc/tinymath/log2f.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,24 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-#include "libc/tinymath/internal.h"
-#include "libc/tinymath/log2f_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-/*
- * Single-precision log2 function.
- *
- * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 /*
 LOG2F_TABLE_BITS = 4
@@ -58,52 +42,65 @@ Relative error: 1.9 * 2^-26 (before rounding.)
 #define OFF 0x3f330000
 
 /**
- * Calculates log₂𝑥.
+ * Returns base-2 logarithm of x.
+ *
+ * - ULP error: 0.752 (nearest rounding.)
+ * - Relative error: 1.9 * 2^-26 (before rounding.)
  */
-float log2f(float x)
+float
+log2f (float x)
 {
-	double_t z, r, r2, p, y, y0, invc, logc;
-	uint32_t ix, iz, top, tmp;
-	int k, i;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, r2, p, y, y0, invc, logc;
+  uint32_t ix, iz, top, tmp;
+  int k, i;
 
-	ix = asuint(x);
-	/* Fix sign of zero with downward rounding when x==1.  */
-	if (WANT_ROUNDING && UNLIKELY(ix == 0x3f800000))
-		return 0;
-	if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
-		/* x < 0x1p-126 or inf or nan.  */
-		if (ix * 2 == 0)
-			return __math_divzerof(1);
-		if (ix == 0x7f800000) /* log2(inf) == inf.  */
-			return x;
-		if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
-			return __math_invalidf(x);
-		/* x is subnormal, normalize it.  */
-		ix = asuint(x * 0x1p23f);
-		ix -= 23 << 23;
-	}
+  ix = asuint (x);
+#if WANT_ROUNDING
+  /* Fix sign of zero with downward rounding when x==1.  */
+  if (unlikely (ix == 0x3f800000))
+    return 0;
+#endif
+  if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
+    {
+      /* x < 0x1p-126 or inf or nan.  */
+      if (ix * 2 == 0)
+	return __math_divzerof (1);
+      if (ix == 0x7f800000) /* log2(inf) == inf.  */
+	return x;
+      if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+	return __math_invalidf (x);
+      /* x is subnormal, normalize it.  */
+      ix = asuint (x * 0x1p23f);
+      ix -= 23 << 23;
+    }
 
-	/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
-	   The range is split into N subintervals.
-	   The ith subinterval contains z and c is near its center.  */
-	tmp = ix - OFF;
-	i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
-	top = tmp & 0xff800000;
-	iz = ix - top;
-	k = (int32_t)tmp >> 23; /* arithmetic shift */
-	invc = T[i].invc;
-	logc = T[i].logc;
-	z = (double_t)asfloat(iz);
+  /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
+  top = tmp & 0xff800000;
+  iz = ix - top;
+  k = (int32_t) tmp >> 23; /* arithmetic shift */
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = (double_t) asfloat (iz);
 
-	/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
-	r = z * invc - 1;
-	y0 = logc + (double_t)k;
+  /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
+  r = z * invc - 1;
+  y0 = logc + (double_t) k;
 
-	/* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
-	r2 = r * r;
-	y = A[1] * r + A[2];
-	y = A[0] * r2 + y;
-	p = A[3] * r + y0;
-	y = y * r2 + p;
-	return eval_as_float(y);
+  /* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
+  r2 = r * r;
+  y = A[1] * r + A[2];
+  y = A[0] * r2 + y;
+  p = A[3] * r + y0;
+  y = y * r2 + p;
+  return eval_as_float (y);
 }
+
+#if USE_GLIBC_ABI
+strong_alias (log2f, __log2f_finite)
+hidden_alias (log2f, __ieee754_log2f)
+#endif
diff --git a/libc/tinymath/log2f_data.c b/libc/tinymath/log2f_data.c
index 783213a91..f34e7a8b4 100644
--- a/libc/tinymath/log2f_data.c
+++ b/libc/tinymath/log2f_data.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,20 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/log2f_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-/*
- * Data definition for log2f.
- *
- * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 const struct log2f_data __log2f_data = {
   .tab = {
diff --git a/libc/tinymath/log2f_data.internal.h b/libc/tinymath/log2f_data.internal.h
deleted file mode 100644
index 835274890..000000000
--- a/libc/tinymath/log2f_data.internal.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_
-
-#define LOG2F_TABLE_BITS 4
-#define LOG2F_POLY_ORDER 4
-
-COSMOPOLITAN_C_START_
-
-extern const struct log2f_data {
-  struct {
-    double invc, logc;
-  } tab[1 << LOG2F_TABLE_BITS];
-  double poly[LOG2F_POLY_ORDER];
-} __log2f_data;
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG2F_DATA_INTERNAL_H_ */
diff --git a/libc/tinymath/log2l.c b/libc/tinymath/log2l.c
index fd1489688..6ad25ef6c 100644
--- a/libc/tinymath/log2l.c
+++ b/libc/tinymath/log2l.c
@@ -29,15 +29,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
-
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("openbsd_libm_notice");
 
 /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_log2l.c */
 /*
diff --git a/libc/tinymath/log_data.c b/libc/tinymath/log_data.c
index be87c4105..5c96ed1e0 100644
--- a/libc/tinymath/log_data.c
+++ b/libc/tinymath/log_data.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,20 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/log_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-/*
- * Data for log.
- *
- * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 #define N (1 << LOG_TABLE_BITS)
 
@@ -46,6 +34,32 @@ const struct log_data __log_data = {
 .ln2hi = 0x1.62e42fefa3800p-1,
 .ln2lo = 0x1.ef35793c76730p-45,
 .poly1 = {
+#if LOG_POLY1_ORDER == 10
+// relative error: 0x1.32eccc6p-62
+// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
+-0x1p-1,
+0x1.55555555554e5p-2,
+-0x1.0000000000af2p-2,
+0x1.9999999bbe436p-3,
+-0x1.55555537f9cdep-3,
+0x1.24922fc8127cfp-3,
+-0x1.0000b7d6bb612p-3,
+0x1.c806ee1ddbcafp-4,
+-0x1.972335a9c2d6ep-4,
+#elif LOG_POLY1_ORDER == 11
+// relative error: 0x1.52c8b708p-68
+// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
+-0x1p-1,
+0x1.5555555555555p-2,
+-0x1.ffffffffffea9p-3,
+0x1.999999999c4d4p-3,
+-0x1.55555557f5541p-3,
+0x1.249248fbe33e4p-3,
+-0x1.ffffc9a3c825bp-4,
+0x1.c71e1f204435dp-4,
+-0x1.9a7f26377d06ep-4,
+0x1.71c30cf8f7364p-4,
+#elif LOG_POLY1_ORDER == 12
 // relative error: 0x1.c04d76cp-63
 // in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
 -0x1p-1,
@@ -59,8 +73,20 @@ const struct log_data __log_data = {
 -0x1.999eb43b068ffp-4,
 0x1.78182f7afd085p-4,
 -0x1.5521375d145cdp-4,
+#endif
 },
 .poly = {
+#if N == 64 && LOG_POLY_ORDER == 7
+// relative error: 0x1.906eb8ap-58
+// abs error: 0x1.d2cad5a8p-67
+// in -0x1.fp-8 0x1.fp-8
+-0x1.0000000000027p-1,
+0x1.555555555556ap-2,
+-0x1.fffffff0440bap-3,
+0x1.99999991906c3p-3,
+-0x1.555c8d7e8201ep-3,
+0x1.24978c59151fap-3,
+#elif N == 128 && LOG_POLY_ORDER == 6
 // relative error: 0x1.926199e8p-56
 // abs error: 0x1.882ff33p-65
 // in -0x1.fp-9 0x1.fp-9
@@ -69,6 +95,17 @@ const struct log_data __log_data = {
 -0x1.fffffffeb459p-3,
 0x1.999b324f10111p-3,
 -0x1.55575e506c89fp-3,
+#elif N == 128 && LOG_POLY_ORDER == 7
+// relative error: 0x1.649fc4bp-64
+// abs error: 0x1.c3b5769p-74
+// in -0x1.fp-9 0x1.fp-9
+-0x1.0000000000001p-1,
+0x1.5555555555556p-2,
+-0x1.fffffffea1a8p-3,
+0x1.99999998e9139p-3,
+-0x1.555776801b968p-3,
+0x1.2493c29331a5cp-3,
+#endif
 },
 /* Algorithm:
 
@@ -97,6 +134,72 @@ a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
 that logc + poly(z/c - 1) has small error, however near x == 1 when
 |log(x)| < 0x1p-4, this is not enough so that is special cased.  */
 .tab = {
+#if N == 64
+{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
+{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2},
+{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2},
+{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2},
+{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2},
+{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2},
+{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2},
+{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2},
+{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2},
+{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2},
+{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2},
+{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2},
+{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3},
+{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3},
+{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3},
+{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3},
+{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3},
+{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3},
+{0x1.33ae463091760p+0, -0x1.7898db878d000p-3},
+{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3},
+{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3},
+{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3},
+{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3},
+{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3},
+{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3},
+{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4},
+{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4},
+{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4},
+{0x1.194538e960658p+0, -0x1.8197efba9a000p-4},
+{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4},
+{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4},
+{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4},
+{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5},
+{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5},
+{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5},
+{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5},
+{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6},
+{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6},
+{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7},
+{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8},
+{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8},
+{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6},
+{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5},
+{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5},
+{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4},
+{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4},
+{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4},
+{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4},
+{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4},
+{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3},
+{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3},
+{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3},
+{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3},
+{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3},
+{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3},
+{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3},
+{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3},
+{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3},
+{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2},
+{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2},
+{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2},
+{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2},
+{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2},
+{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2},
+#elif N == 128
 {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
 {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
 {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
@@ -225,9 +328,76 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
 {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
 {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
 {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
+#endif
 },
-#if !__FP_FAST_FMA
+#if !HAVE_FAST_FMA
 .tab2 = {
+# if N == 64
+{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
+{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55},
+{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55},
+{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56},
+{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55},
+{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57},
+{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56},
+{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56},
+{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57},
+{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56},
+{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57},
+{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55},
+{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55},
+{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55},
+{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56},
+{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58},
+{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55},
+{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55},
+{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57},
+{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56},
+{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57},
+{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57},
+{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56},
+{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55},
+{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58},
+{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55},
+{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58},
+{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59},
+{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59},
+{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58},
+{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55},
+{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55},
+{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55},
+{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60},
+{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55},
+{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55},
+{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56},
+{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57},
+{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58},
+{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58},
+{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54},
+{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55},
+{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54},
+{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54},
+{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55},
+{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57},
+{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54},
+{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55},
+{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54},
+{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55},
+{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56},
+{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54},
+{0x1.320000324c55bp+0, 0x1.f81983997354fp-54},
+{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54},
+{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54},
+{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56},
+{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54},
+{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55},
+{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55},
+{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56},
+{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54},
+{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55},
+{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55},
+{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54},
+# elif N == 128
 {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
 {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
 {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
@@ -356,6 +526,7 @@ that logc + poly(z/c - 1) has small error, however near x == 1 when
 {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
 {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
 {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
-},
 #endif
+},
+#endif /* !HAVE_FAST_FMA */
 };
diff --git a/libc/tinymath/log_data.internal.h b/libc/tinymath/log_data.internal.h
deleted file mode 100644
index 72f87b410..000000000
--- a/libc/tinymath/log_data.internal.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_
-
-#define LOG_TABLE_BITS  7
-#define LOG_POLY_ORDER  6
-#define LOG_POLY1_ORDER 12
-
-COSMOPOLITAN_C_START_
-
-extern const struct log_data {
-  double ln2hi;
-  double ln2lo;
-  double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
-  double poly1[LOG_POLY1_ORDER - 1];
-  struct {
-    double invc, logc;
-  } tab[1 << LOG_TABLE_BITS];
-#if !__FP_FAST_FMA
-  struct {
-    double chi, clo;
-  } tab2[1 << LOG_TABLE_BITS];
-#endif
-} __log_data;
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOG_DATA_H_ */
diff --git a/libc/tinymath/logf.c b/libc/tinymath/logf.c
index 2bce5c3a3..3b9b85c19 100644
--- a/libc/tinymath/logf.c
+++ b/libc/tinymath/logf.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,23 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/internal.h"
-#include "libc/tinymath/logf_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Optimized Routines (MIT License)\\n\
-Copyright 2022 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/*
- * Single-precision log function.
- *
- * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 /*
 LOGF_TABLE_BITS = 4
@@ -58,50 +43,63 @@ Relative error: 1.957 * 2^-26 (before rounding.)
 #define OFF 0x3f330000
 
 /**
- * Returns natural logarithm of 𝑥.
+ * Returns natural logarithm of x.
+ *
+ * - ULP error: 0.818 (nearest rounding.)
+ * - Relative error: 1.957 * 2^-26 (before rounding.)
  */
-float logf(float x)
+float
+logf (float x)
 {
-	double_t z, r, r2, y, y0, invc, logc;
-	uint32_t ix, iz, tmp;
-	int k, i;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, r2, y, y0, invc, logc;
+  uint32_t ix, iz, tmp;
+  int k, i;
 
-	ix = asuint(x);
-	/* Fix sign of zero with downward rounding when x==1.  */
-	if (WANT_ROUNDING && UNLIKELY(ix == 0x3f800000))
-		return 0;
-	if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
-		/* x < 0x1p-126 or inf or nan.  */
-		if (ix * 2 == 0)
-			return __math_divzerof(1);
-		if (ix == 0x7f800000) /* log(inf) == inf.  */
-			return x;
-		if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
-			return __math_invalidf(x);
-		/* x is subnormal, normalize it.  */
-		ix = asuint(x * 0x1p23f);
-		ix -= 23 << 23;
-	}
+  ix = asuint (x);
+#if WANT_ROUNDING
+  /* Fix sign of zero with downward rounding when x==1.  */
+  if (unlikely (ix == 0x3f800000))
+    return 0;
+#endif
+  if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
+    {
+      /* x < 0x1p-126 or inf or nan.  */
+      if (ix * 2 == 0)
+	return __math_divzerof (1);
+      if (ix == 0x7f800000) /* log(inf) == inf.  */
+	return x;
+      if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+	return __math_invalidf (x);
+      /* x is subnormal, normalize it.  */
+      ix = asuint (x * 0x1p23f);
+      ix -= 23 << 23;
+    }
 
-	/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
-	   The range is split into N subintervals.
-	   The ith subinterval contains z and c is near its center.  */
-	tmp = ix - OFF;
-	i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
-	k = (int32_t)tmp >> 23; /* arithmetic shift */
-	iz = ix - (tmp & 0xff800000);
-	invc = T[i].invc;
-	logc = T[i].logc;
-	z = (double_t)asfloat(iz);
+  /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
+  k = (int32_t) tmp >> 23; /* arithmetic shift */
+  iz = ix - (tmp & 0xff800000);
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = (double_t) asfloat (iz);
 
-	/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
-	r = z * invc - 1;
-	y0 = logc + (double_t)k * Ln2;
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
+  r = z * invc - 1;
+  y0 = logc + (double_t) k * Ln2;
 
-	/* Pipelined polynomial evaluation to approximate log1p(r).  */
-	r2 = r * r;
-	y = A[1] * r + A[2];
-	y = A[0] * r2 + y;
-	y = y * r2 + (y0 + r);
-	return eval_as_float(y);
+  /* Pipelined polynomial evaluation to approximate log1p(r).  */
+  r2 = r * r;
+  y = A[1] * r + A[2];
+  y = A[0] * r2 + y;
+  y = y * r2 + (y0 + r);
+  return eval_as_float (y);
 }
+
+#if USE_GLIBC_ABI
+strong_alias (logf, __logf_finite)
+hidden_alias (logf, __ieee754_logf)
+#endif
diff --git a/libc/tinymath/logf_data.c b/libc/tinymath/logf_data.c
index 504d50a26..30885b7eb 100644
--- a/libc/tinymath/logf_data.c
+++ b/libc/tinymath/logf_data.c
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,20 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/logf_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-/*
- * Data definition for logf.
- *
- * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 const struct logf_data __logf_data = {
   .tab = {
diff --git a/libc/tinymath/logf_data.internal.h b/libc/tinymath/logf_data.internal.h
deleted file mode 100644
index caa51da52..000000000
--- a/libc/tinymath/logf_data.internal.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_
-
-#define LOGF_TABLE_BITS 4
-#define LOGF_POLY_ORDER 4
-
-COSMOPOLITAN_C_START_
-
-extern const struct logf_data {
-  struct {
-    double invc, logc;
-  } tab[1 << LOGF_TABLE_BITS];
-  double ln2;
-  double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
-} __logf_data;
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_LOGF_DATA_INTERNAL_H_ */
diff --git a/libc/tinymath/logl.c b/libc/tinymath/logl.c
index b6f269c45..6f4c36d16 100644
--- a/libc/tinymath/logl.c
+++ b/libc/tinymath/logl.c
@@ -28,15 +28,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
-
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("openbsd_libm_notice");
 
 /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_logl.c */
 /*
diff --git a/libc/tinymath/loglq.c b/libc/tinymath/loglq.c
index 0d2dbdcfb..5af871c9c 100644
--- a/libc/tinymath/loglq.c
+++ b/libc/tinymath/loglq.c
@@ -29,13 +29,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/freebsd.internal.h"
+__static_yoink("freebsd_libm_notice");
 #if LDBL_MANT_DIG == 113
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Implementation of the natural logarithm of x for 128-bit format.
diff --git a/libc/tinymath/atan_data.c b/libc/tinymath/math_err.c
similarity index 61%
rename from libc/tinymath/atan_data.c
rename to libc/tinymath/math_err.c
index e28e2d5b0..c5a7fa08c 100644
--- a/libc/tinymath/atan_data.c
+++ b/libc/tinymath/math_err.c
@@ -1,9 +1,9 @@
 /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
-│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8                               :vi │
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,22 +25,76 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/atan_data.internal.h"
+#include "libc/errno.h"
+#include "libc/tinymath/arm.internal.h"
 
-asm(".ident\t\"\\n\\n\
-Optimized Routines (MIT License)\\n\
-Copyright 2022 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+#if WANT_ERRNO
+/* dontinline reduces code size and avoids making math functions non-leaf
+   when the error handling is inlined.  */
+dontinline static double
+with_errno (double y, int e)
+{
+  errno = e;
+  return y;
+}
+#else
+#define with_errno(x, e) (x)
+#endif
 
-const struct atan_poly_data __atan_poly_data = {
-  .poly = {/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
-	      [2**-1022, 1.0]. See atan.sollya for details of how these were
-	      generated.  */
-	   -0x1.5555555555555p-2,  0x1.99999999996c1p-3,  -0x1.2492492478f88p-3,
-	   0x1.c71c71bc3951cp-4,   -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
-	   -0x1.11100ee084227p-4,  0x1.e1d0f9696f63bp-5,  -0x1.aebfe7b418581p-5,
-	   0x1.842dbe9b0d916p-5,   -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
-	   -0x1.00e6eece7de8p-5,   0x1.860897b29e5efp-6,  -0x1.0051381722a59p-6,
-	   0x1.14e9dc19a4a4ep-7,   -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
-	   -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16}};
+/* dontinline reduces code size.  */
+dontinline static double
+xflow (uint32_t sign, double y)
+{
+  y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
+  return with_errno (y, ERANGE);
+}
+
+double
+__math_uflow (uint32_t sign)
+{
+  return xflow (sign, 0x1p-767);
+}
+
+#if WANT_ERRNO_UFLOW
+/* Underflows to zero in some non-nearest rounding mode, setting errno
+   is valid even if the result is non-zero, but in the subnormal range.  */
+double
+__math_may_uflow (uint32_t sign)
+{
+  return xflow (sign, 0x1.8p-538);
+}
+#endif
+
+double
+__math_oflow (uint32_t sign)
+{
+  return xflow (sign, 0x1p769);
+}
+
+double
+__math_divzero (uint32_t sign)
+{
+  double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
+  return with_errno (y, ERANGE);
+}
+
+dontinstrument double
+__math_invalid (double x)
+{
+  double y = (x - x) / (x - x);
+  return isnan (x) ? y : with_errno (y, EDOM);
+}
+
+/* Check result and set errno if necessary.  */
+
+dontinstrument double
+__math_check_uflow (double y)
+{
+  return y == 0.0 ? with_errno (y, ERANGE) : y;
+}
+
+dontinstrument double
+__math_check_oflow (double y)
+{
+  return isinf (y) ? with_errno (y, ERANGE) : y;
+}
diff --git a/libc/tinymath/casin.c b/libc/tinymath/math_errf.c
similarity index 57%
rename from libc/tinymath/casin.c
rename to libc/tinymath/math_errf.c
index b0e85ca2a..2927b565d 100644
--- a/libc/tinymath/casin.c
+++ b/libc/tinymath/math_errf.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,32 +25,76 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/complex.h"
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
+#include "libc/errno.h"
+#include "libc/tinymath/arm.internal.h"
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-// FIXME
-
-/* asin(z) = -i log(i z + sqrt(1 - z*z)) */
-
-double complex casin(double complex z)
+#if WANT_ERRNO
+/* dontinline reduces code size and avoids making math functions non-leaf
+   when the error handling is inlined.  */
+dontinline static float
+with_errnof (float y, int e)
 {
-	double complex w;
-	double x, y;
+  errno = e;
+  return y;
+}
+#else
+#define with_errnof(x, e) (x)
+#endif
 
-	x = creal(z);
-	y = cimag(z);
-	w = CMPLX(1.0 - (x - y)*(x + y), -2.0*x*y);
-	double complex r = clog(CMPLX(-y, x) + csqrt(w));
-	return CMPLX(cimag(r), -creal(r));
+/* dontinline reduces code size.  */
+dontinline static float
+xflowf (uint32_t sign, float y)
+{
+  y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
+  return with_errnof (y, ERANGE);
 }
 
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-__weak_reference(casin, casinl);
+float
+__math_uflowf (uint32_t sign)
+{
+  return xflowf (sign, 0x1p-95f);
+}
+
+#if WANT_ERRNO_UFLOW
+/* Underflows to zero in some non-nearest rounding mode, setting errno
+   is valid even if the result is non-zero, but in the subnormal range.  */
+float
+__math_may_uflowf (uint32_t sign)
+{
+  return xflowf (sign, 0x1.4p-75f);
+}
 #endif
+
+float
+__math_oflowf (uint32_t sign)
+{
+  return xflowf (sign, 0x1p97f);
+}
+
+float
+__math_divzerof (uint32_t sign)
+{
+  float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
+  return with_errnof (y, ERANGE);
+}
+
+dontinstrument float
+__math_invalidf (float x)
+{
+  float y = (x - x) / (x - x);
+  return isnan (x) ? y : with_errnof (y, EDOM);
+}
+
+/* Check result and set errno if necessary.  */
+
+dontinstrument float
+__math_check_uflowf (float y)
+{
+  return y == 0.0f ? with_errnof (y, ERANGE) : y;
+}
+
+dontinstrument float
+__math_check_oflowf (float y)
+{
+  return isinf (y) ? with_errnof (y, ERANGE) : y;
+}
diff --git a/libc/tinymath/atanf_data.c b/libc/tinymath/math_errl.c
similarity index 78%
rename from libc/tinymath/atanf_data.c
rename to libc/tinymath/math_errl.c
index cf22b0506..d776fb349 100644
--- a/libc/tinymath/atanf_data.c
+++ b/libc/tinymath/math_errl.c
@@ -1,9 +1,9 @@
 /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
-│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8                               :vi │
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,17 +25,25 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/atanf_data.internal.h"
+#include "libc/errno.h"
+#include "libc/tinymath/arm.internal.h"
 
-asm(".ident\t\"\\n\\n\
-Optimized Routines (MIT License)\\n\
-Copyright 2022 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+#if WANT_ERRNO
+/* dontinline reduces code size and avoids making math functions non-leaf
+   when the error handling is inlined.  */
+dontinline static long double
+with_errnol (long double y, int e)
+{
+  errno = e;
+  return y;
+}
+#else
+#define with_errnol(x, e) (x)
+#endif
 
-/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0].
- */
-const struct atanf_poly_data __atanf_poly_data = {
-  .poly = {/* See atanf.sollya for details of how these were generated.  */
-	   -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
-	   -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f}};
+dontinstrument long double
+__math_invalidl (long double x)
+{
+  long double y = (x - x) / (x - x);
+  return isnan (x) ? y : with_errnol (y, EDOM);
+}
diff --git a/libc/tinymath/modf.c b/libc/tinymath/modf.c
index 3047590c4..2701093c1 100644
--- a/libc/tinymath/modf.c
+++ b/libc/tinymath/modf.c
@@ -26,12 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Returns fractional part of 𝑥.
diff --git a/libc/tinymath/modff.c b/libc/tinymath/modff.c
index 99e60840c..720f05e80 100644
--- a/libc/tinymath/modff.c
+++ b/libc/tinymath/modff.c
@@ -26,12 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 float modff(float x, float *iptr)
 {
diff --git a/libc/tinymath/modfl.c b/libc/tinymath/modfl.c
index 9b430c646..0d3f585ec 100644
--- a/libc/tinymath/modfl.c
+++ b/libc/tinymath/modfl.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 static const long double toint = 1/LDBL_EPSILON;
 
diff --git a/libc/tinymath/nearbyint.c b/libc/tinymath/nearbyint.c
index ee8ae70b3..c8b853ce5 100644
--- a/libc/tinymath/nearbyint.c
+++ b/libc/tinymath/nearbyint.c
@@ -29,7 +29,7 @@ double nearbyint(double x) {
   asm("fidbra\t%0,0,%1,4" : "=f"(x) : "f"(x));
 #else
 #ifdef FE_INEXACT
-  // #pragma STDC FENV_ACCESS ON
+  /* #pragma STDC FENV_ACCESS ON */
   int e;
   e = fetestexcept(FE_INEXACT);
 #endif
diff --git a/libc/tinymath/nearbyintf.c b/libc/tinymath/nearbyintf.c
index e9b50cf29..332e15a65 100644
--- a/libc/tinymath/nearbyintf.c
+++ b/libc/tinymath/nearbyintf.c
@@ -24,7 +24,7 @@
  */
 float nearbyintf(float x) {
 #ifdef FE_INEXACT
-  // #pragma STDC FENV_ACCESS ON
+  /* #pragma STDC FENV_ACCESS ON */
   int e;
   e = fetestexcept(FE_INEXACT);
 #endif
diff --git a/libc/tinymath/nearbyintl.c b/libc/tinymath/nearbyintl.c
index 353b3d905..e020916ae 100644
--- a/libc/tinymath/nearbyintl.c
+++ b/libc/tinymath/nearbyintl.c
@@ -25,7 +25,7 @@
  */
 long double nearbyintl(long double x) {
 #ifdef FE_INEXACT
-  // #pragma STDC FENV_ACCESS ON
+  /* #pragma STDC FENV_ACCESS ON */
   int e;
   e = fetestexcept(FE_INEXACT);
 #endif
diff --git a/libc/tinymath/nextafter.c b/libc/tinymath/nextafter.c
index f041bf0f3..a1da2b40a 100644
--- a/libc/tinymath/nextafter.c
+++ b/libc/tinymath/nextafter.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 double nextafter(double x, double y)
 {
diff --git a/libc/tinymath/nextafterf.c b/libc/tinymath/nextafterf.c
index 80e884413..82bb5bd48 100644
--- a/libc/tinymath/nextafterf.c
+++ b/libc/tinymath/nextafterf.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 float nextafterf(float x, float y)
 {
diff --git a/libc/tinymath/nextafterl.c b/libc/tinymath/nextafterl.c
index e85824851..c213e93ff 100644
--- a/libc/tinymath/nextafterl.c
+++ b/libc/tinymath/nextafterl.c
@@ -30,12 +30,8 @@
 #include "libc/tinymath/internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 long double nextafterl(long double x, long double y)
 {
diff --git a/libc/tinymath/nexttoward.c b/libc/tinymath/nexttoward.c
index c7a16bd50..d451122de 100644
--- a/libc/tinymath/nexttoward.c
+++ b/libc/tinymath/nexttoward.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 double nexttoward(double x, long double y)
 {
diff --git a/libc/tinymath/nexttowardf.c b/libc/tinymath/nexttowardf.c
index da5eb59ba..470c27edd 100644
--- a/libc/tinymath/nexttowardf.c
+++ b/libc/tinymath/nexttowardf.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 float nexttowardf(float x, long double y)
 {
diff --git a/libc/tinymath/polevll.c b/libc/tinymath/polevll.c
index aac4505d8..92356ca36 100644
--- a/libc/tinymath/polevll.c
+++ b/libc/tinymath/polevll.c
@@ -26,15 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("openbsd_libm_notice");
 
 /* origin: OpenBSD /usr/src/lib/libm/src/polevll.c */
 /*
diff --git a/libc/tinymath/pow.c b/libc/tinymath/pow.c
index 94195c97f..86e36fd40 100644
--- a/libc/tinymath/pow.c
+++ b/libc/tinymath/pow.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,24 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/exp_data.internal.h"
-#include "libc/tinymath/internal.h"
-#include "libc/tinymath/pow_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Optimized Routines (MIT License)\\n\
-Copyright 2022 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/*
- * Double-precision x^y function.
- *
- * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 /*
 Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
@@ -58,79 +42,83 @@ ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
 #define OFF 0x3fe6955500000000
 
 /* Top 12 bits of a double (sign and exponent bits).  */
-static inline uint32_t top12(double x)
+static inline uint32_t
+top12 (double x)
 {
-	return asuint64(x) >> 52;
+  return asuint64 (x) >> 52;
 }
 
 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
    additional 15 bits precision.  IX is the bit representation of x, but
    normalized in the subnormal range using the sign bit for the exponent.  */
-static inline double_t log_inline(uint64_t ix, double_t *tail)
+static inline double_t
+log_inline (uint64_t ix, double_t *tail)
 {
-	/* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
-	double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
-	uint64_t iz, tmp;
-	int k, i;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
+  uint64_t iz, tmp;
+  int k, i;
 
-	/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
-	   The range is split into N subintervals.
-	   The ith subinterval contains z and c is near its center.  */
-	tmp = ix - OFF;
-	i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
-	k = (int64_t)tmp >> 52; /* arithmetic shift */
-	iz = ix - (tmp & 0xfffULL << 52);
-	z = asdouble(iz);
-	kd = (double_t)k;
+  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
+  k = (int64_t) tmp >> 52; /* arithmetic shift */
+  iz = ix - (tmp & 0xfffULL << 52);
+  z = asdouble (iz);
+  kd = (double_t) k;
 
-	/* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
-	invc = T[i].invc;
-	logc = T[i].logc;
-	logctail = T[i].logctail;
+  /* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
+  invc = T[i].invc;
+  logc = T[i].logc;
+  logctail = T[i].logctail;
 
-	/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
+  /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
-#if __FP_FAST_FMA
-	r = __builtin_fma(z, invc, -1.0);
+#if HAVE_FAST_FMA
+  r = fma (z, invc, -1.0);
 #else
-	/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|.  */
-	double_t zhi = asdouble((iz + (1ULL << 31)) & (-1ULL << 32));
-	double_t zlo = z - zhi;
-	double_t rhi = zhi * invc - 1.0;
-	double_t rlo = zlo * invc;
-	r = rhi + rlo;
+  /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|.  */
+  double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32));
+  double_t zlo = z - zhi;
+  double_t rhi = zhi * invc - 1.0;
+  double_t rlo = zlo * invc;
+  r = rhi + rlo;
 #endif
 
-	/* k*Ln2 + log(c) + r.  */
-	t1 = kd * Ln2hi + logc;
-	t2 = t1 + r;
-	lo1 = kd * Ln2lo + logctail;
-	lo2 = t1 - t2 + r;
+  /* k*Ln2 + log(c) + r.  */
+  t1 = kd * Ln2hi + logc;
+  t2 = t1 + r;
+  lo1 = kd * Ln2lo + logctail;
+  lo2 = t1 - t2 + r;
 
-	/* Evaluation is optimized assuming superscalar pipelined execution.  */
-	double_t ar, ar2, ar3, lo3, lo4;
-	ar = A[0] * r; /* A[0] = -0.5.  */
-	ar2 = r * ar;
-	ar3 = r * ar2;
-	/* k*Ln2 + log(c) + r + A[0]*r*r.  */
-#if __FP_FAST_FMA
-	hi = t2 + ar2;
-	lo3 = __builtin_fma(ar, r, -ar2);
-	lo4 = t2 - hi + ar2;
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  double_t ar, ar2, ar3, lo3, lo4;
+  ar = A[0] * r; /* A[0] = -0.5.  */
+  ar2 = r * ar;
+  ar3 = r * ar2;
+  /* k*Ln2 + log(c) + r + A[0]*r*r.  */
+#if HAVE_FAST_FMA
+  hi = t2 + ar2;
+  lo3 = fma (ar, r, -ar2);
+  lo4 = t2 - hi + ar2;
 #else
-	double_t arhi = A[0] * rhi;
-	double_t arhi2 = rhi * arhi;
-	hi = t2 + arhi2;
-	lo3 = rlo * (ar + arhi);
-	lo4 = t2 - hi + arhi2;
+  double_t arhi = A[0] * rhi;
+  double_t arhi2 = rhi * arhi;
+  hi = t2 + arhi2;
+  lo3 = rlo * (ar + arhi);
+  lo4 = t2 - hi + arhi2;
 #endif
-	/* p = log1p(r) - r - A[0]*r*r.  */
-	p = (ar3 * (A[1] + r * A[2] +
-		    ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
-	lo = lo1 + lo2 + lo3 + lo4 + p;
-	y = hi + lo;
-	*tail = hi - y + lo;
-	return y;
+  /* p = log1p(r) - r - A[0]*r*r.  */
+#if POW_LOG_POLY_ORDER == 8
+  p = (ar3
+       * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
+#endif
+  lo = lo1 + lo2 + lo3 + lo4 + p;
+  y = hi + lo;
+  *tail = hi - y + lo;
+  return y;
 }
 
 #undef N
@@ -154,232 +142,268 @@ static inline double_t log_inline(uint64_t ix, double_t *tail)
    a double.  (int32_t)KI is the k used in the argument reduction and exponent
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
-static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
+static inline double
+specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
 {
-	double_t scale, y;
+  double_t scale, y;
 
-	if ((ki & 0x80000000) == 0) {
-		/* k > 0, the exponent of scale might have overflowed by <= 460.  */
-		sbits -= 1009ull << 52;
-		scale = asdouble(sbits);
-		y = 0x1p1009 * (scale + scale * tmp);
-		return eval_as_double(y);
-	}
-	/* k < 0, need special care in the subnormal range.  */
-	sbits += 1022ull << 52;
-	/* Note: sbits is signed scale.  */
-	scale = asdouble(sbits);
-	y = scale + scale * tmp;
-	if (fabs(y) < 1.0) {
-		/* Round y to the right precision before scaling it into the subnormal
-		   range to avoid double rounding that can cause 0.5+E/2 ulp error where
-		   E is the worst-case ulp error outside the subnormal range.  So this
-		   is only useful if the goal is better than 1 ulp worst-case error.  */
-		double_t hi, lo, one = 1.0;
-		if (y < 0.0)
-			one = -1.0;
-		lo = scale - y + scale * tmp;
-		hi = one + y;
-		lo = one - hi + y + lo;
-		y = eval_as_double(hi + lo) - one;
-		/* Fix the sign of 0.  */
-		if (y == 0.0)
-			y = asdouble(sbits & 0x8000000000000000);
-		/* The underflow exception needs to be signaled explicitly.  */
-		fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
-	}
-	y = 0x1p-1022 * y;
-	return eval_as_double(y);
+  if ((ki & 0x80000000) == 0)
+    {
+      /* k > 0, the exponent of scale might have overflowed by <= 460.  */
+      sbits -= 1009ull << 52;
+      scale = asdouble (sbits);
+      y = 0x1p1009 * (scale + scale * tmp);
+      return check_oflow (eval_as_double (y));
+    }
+  /* k < 0, need special care in the subnormal range.  */
+  sbits += 1022ull << 52;
+  /* Note: sbits is signed scale.  */
+  scale = asdouble (sbits);
+  y = scale + scale * tmp;
+  if (fabs (y) < 1.0)
+    {
+      /* Round y to the right precision before scaling it into the subnormal
+	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
+	 E is the worst-case ulp error outside the subnormal range.  So this
+	 is only useful if the goal is better than 1 ulp worst-case error.  */
+      double_t hi, lo, one = 1.0;
+      if (y < 0.0)
+	one = -1.0;
+      lo = scale - y + scale * tmp;
+      hi = one + y;
+      lo = one - hi + y + lo;
+      y = eval_as_double (hi + lo) - one;
+      /* Fix the sign of 0.  */
+      if (y == 0.0)
+	y = asdouble (sbits & 0x8000000000000000);
+      /* The underflow exception needs to be signaled explicitly.  */
+      force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+    }
+  y = 0x1p-1022 * y;
+  return check_uflow (eval_as_double (y));
 }
 
 #define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
 
 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
    The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1.  */
-static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
+static inline double
+exp_inline (double_t x, double_t xtail, uint32_t sign_bias)
 {
-	uint32_t abstop;
-	uint64_t ki, idx, top, sbits;
-	/* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
-	double_t kd, z, r, r2, scale, tail, tmp;
+  uint32_t abstop;
+  uint64_t ki, idx, top, sbits;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t kd, z, r, r2, scale, tail, tmp;
 
-	abstop = top12(x) & 0x7ff;
-	if (UNLIKELY(abstop - top12(0x1p-54) >=
-                     top12(512.0) - top12(0x1p-54))) {
-		if (abstop - top12(0x1p-54) >= 0x80000000) {
-			/* Avoid spurious underflow for tiny x.  */
-			/* Note: 0 is common input.  */
-			double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
-			return sign_bias ? -one : one;
-		}
-		if (abstop >= top12(1024.0)) {
-			/* Note: inf and nan are already handled.  */
-			if (asuint64(x) >> 63)
-				return __math_uflow(sign_bias);
-			else
-				return __math_oflow(sign_bias);
-		}
-		/* Large x is special cased below.  */
-		abstop = 0;
+  abstop = top12 (x) & 0x7ff;
+  if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
+    {
+      if (abstop - top12 (0x1p-54) >= 0x80000000)
+	{
+	  /* Avoid spurious underflow for tiny x.  */
+	  /* Note: 0 is common input.  */
+	  double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
+	  return sign_bias ? -one : one;
 	}
+      if (abstop >= top12 (1024.0))
+	{
+	  /* Note: inf and nan are already handled.  */
+	  if (asuint64 (x) >> 63)
+	    return __math_uflow (sign_bias);
+	  else
+	    return __math_oflow (sign_bias);
+	}
+      /* Large x is special cased below.  */
+      abstop = 0;
+    }
 
-	/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
-	/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
-	z = InvLn2N * x;
+  /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */
+  /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */
+  z = InvLn2N * x;
 #if TOINT_INTRINSICS
-	kd = roundtoint(z);
-	ki = converttoint(z);
+  kd = roundtoint (z);
+  ki = converttoint (z);
 #elif EXP_USE_TOINT_NARROW
-	/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
-	kd = eval_as_double(z + Shift);
-	ki = asuint64(kd) >> 16;
-	kd = (double_t)(int32_t)ki;
+  /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
+  kd = eval_as_double (z + Shift);
+  ki = asuint64 (kd) >> 16;
+  kd = (double_t) (int32_t) ki;
 #else
-	/* z - kd is in [-1, 1] in non-nearest rounding modes.  */
-	kd = eval_as_double(z + Shift);
-	ki = asuint64(kd);
-	kd -= Shift;
+  /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
+  kd = eval_as_double (z + Shift);
+  ki = asuint64 (kd);
+  kd -= Shift;
 #endif
-	r = x + kd * NegLn2hiN + kd * NegLn2loN;
-	/* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
-	r += xtail;
-	/* 2^(k/N) ~= scale * (1 + tail).  */
-	idx = 2 * (ki % N);
-	top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
-	tail = asdouble(T[idx]);
-	/* This is only a valid scale when -1023*N < k < 1024*N.  */
-	sbits = T[idx + 1] + top;
-	/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
-	/* Evaluation is optimized assuming superscalar pipelined execution.  */
-	r2 = r * r;
-	/* Without fma the worst case error is 0.25/N ulp larger.  */
-	/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
-	tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
-	if (UNLIKELY(abstop == 0))
-		return specialcase(tmp, sbits, ki);
-	scale = asdouble(sbits);
-	/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
-	   is no spurious underflow here even without fma.  */
-	return eval_as_double(scale + scale * tmp);
+  r = x + kd * NegLn2hiN + kd * NegLn2loN;
+  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
+  r += xtail;
+  /* 2^(k/N) ~= scale * (1 + tail).  */
+  idx = 2 * (ki % N);
+  top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
+  tail = asdouble (T[idx]);
+  /* This is only a valid scale when -1023*N < k < 1024*N.  */
+  sbits = T[idx + 1] + top;
+  /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
+  /* Evaluation is optimized assuming superscalar pipelined execution.  */
+  r2 = r * r;
+  /* Without fma the worst case error is 0.25/N ulp larger.  */
+  /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
+#if EXP_POLY_ORDER == 4
+  tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
+#elif EXP_POLY_ORDER == 5
+  tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+#elif EXP_POLY_ORDER == 6
+  tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
+#endif
+  if (unlikely (abstop == 0))
+    return specialcase (tmp, sbits, ki);
+  scale = asdouble (sbits);
+  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+     is no spurious underflow here even without fma.  */
+  return eval_as_double (scale + scale * tmp);
 }
 
 /* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
    the bit representation of a non-zero finite floating-point value.  */
-static inline int checkint(uint64_t iy)
+static inline int
+checkint (uint64_t iy)
 {
-	int e = iy >> 52 & 0x7ff;
-	if (e < 0x3ff)
-		return 0;
-	if (e > 0x3ff + 52)
-		return 2;
-	if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
-		return 0;
-	if (iy & (1ULL << (0x3ff + 52 - e)))
-		return 1;
-	return 2;
+  int e = iy >> 52 & 0x7ff;
+  if (e < 0x3ff)
+    return 0;
+  if (e > 0x3ff + 52)
+    return 2;
+  if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
+    return 0;
+  if (iy & (1ULL << (0x3ff + 52 - e)))
+    return 1;
+  return 2;
 }
 
 /* Returns 1 if input is the bit representation of 0, infinity or nan.  */
-static inline int zeroinfnan(uint64_t i)
+static inline int
+zeroinfnan (uint64_t i)
 {
-	return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1;
+  return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
 }
 
 /**
  * Returns 𝑥^𝑦.
- * @note should take ~18ns
+ *
+ * - Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
+ * - relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
+ * - ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
+ *
+ * @raise ERANGE on overflow or underflow
+ * @raise EDOM if x is negative and y is a finite non-integer
  */
-double pow(double x, double y)
+double
+pow (double x, double y)
 {
-	uint32_t sign_bias = 0;
-	uint64_t ix, iy;
-	uint32_t topx, topy;
+  uint32_t sign_bias = 0;
+  uint64_t ix, iy;
+  uint32_t topx, topy;
 
-	ix = asuint64(x);
-	iy = asuint64(y);
-	topx = top12(x);
-	topy = top12(y);
-	if (UNLIKELY(topx - 0x001 >= 0x7ff - 0x001 ||
-                     (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) {
-		/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
-		   and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1.  */
-		/* Special cases: (x < 0x1p-126 or inf or nan) or
-		   (|y| < 0x1p-65 or |y| >= 0x1p63 or nan).  */
-		if (UNLIKELY(zeroinfnan(iy))) {
-			if (2 * iy == 0)
-				return issignaling_inline(x) ? x + y : 1.0;
-			if (ix == asuint64(1.0))
-				return issignaling_inline(y) ? x + y : 1.0;
-			if (2 * ix > 2 * asuint64(INFINITY) ||
-			    2 * iy > 2 * asuint64(INFINITY))
-				return x + y;
-			if (2 * ix == 2 * asuint64(1.0))
-				return 1.0;
-			if ((2 * ix < 2 * asuint64(1.0)) == !(iy >> 63))
-				return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
-			return y * y;
-		}
-		if (UNLIKELY(zeroinfnan(ix))) {
-			double_t x2 = x * x;
-			if (ix >> 63 && checkint(iy) == 1)
-				x2 = -x2;
-			/* Without the barrier some versions of clang hoist the 1/x2 and
-			   thus division by zero exception can be signaled spuriously.  */
-			return iy >> 63 ? fp_barrier(1 / x2) : x2;
-		}
-		/* Here x and y are non-zero finite.  */
-		if (ix >> 63) {
-			/* Finite x < 0.  */
-			int yint = checkint(iy);
-			if (yint == 0)
-				return __math_invalid(x);
-			if (yint == 1)
-				sign_bias = SIGN_BIAS;
-			ix &= 0x7fffffffffffffff;
-			topx &= 0x7ff;
-		}
-		if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
-			/* Note: sign_bias == 0 here because y is not odd.  */
-			if (ix == asuint64(1.0))
-				return 1.0;
-			if ((topy & 0x7ff) < 0x3be) {
-				/* |y| < 2^-65, x^y ~= 1 + y*log(x).  */
-				if (WANT_ROUNDING)
-					return ix > asuint64(1.0) ? 1.0 + y :
-								    1.0 - y;
-				else
-					return 1.0;
-			}
-			return (ix > asuint64(1.0)) == (topy < 0x800) ?
-				       __math_oflow(0) :
-				       __math_uflow(0);
-		}
-		if (topx == 0) {
-			/* Normalize subnormal x so exponent becomes negative.  */
-			ix = asuint64(x * 0x1p52);
-			ix &= 0x7fffffffffffffff;
-			ix -= 52ULL << 52;
-		}
+  ix = asuint64 (x);
+  iy = asuint64 (y);
+  topx = top12 (x);
+  topy = top12 (y);
+  if (unlikely (topx - 0x001 >= 0x7ff - 0x001
+		|| (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
+    {
+      /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
+	 and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1.  */
+      /* Special cases: (x < 0x1p-126 or inf or nan) or
+	 (|y| < 0x1p-65 or |y| >= 0x1p63 or nan).  */
+      if (unlikely (zeroinfnan (iy)))
+	{
+	  if (2 * iy == 0)
+	    return issignaling_inline (x) ? x + y : 1.0;
+	  if (ix == asuint64 (1.0))
+	    return issignaling_inline (y) ? x + y : 1.0;
+	  if (2 * ix > 2 * asuint64 (INFINITY)
+	      || 2 * iy > 2 * asuint64 (INFINITY))
+	    return x + y;
+	  if (2 * ix == 2 * asuint64 (1.0))
+	    return 1.0;
+	  if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
+	    return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
+	  return y * y;
 	}
+      if (unlikely (zeroinfnan (ix)))
+	{
+	  double_t x2 = x * x;
+	  if (ix >> 63 && checkint (iy) == 1)
+	    {
+	      x2 = -x2;
+	      sign_bias = 1;
+	    }
+	  if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
+	    return __math_divzero (sign_bias);
+	  /* Without the barrier some versions of clang hoist the 1/x2 and
+	     thus division by zero exception can be signaled spuriously.  */
+	  return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
+	}
+      /* Here x and y are non-zero finite.  */
+      if (ix >> 63)
+	{
+	  /* Finite x < 0.  */
+	  int yint = checkint (iy);
+	  if (yint == 0)
+	    return __math_invalid (x);
+	  if (yint == 1)
+	    sign_bias = SIGN_BIAS;
+	  ix &= 0x7fffffffffffffff;
+	  topx &= 0x7ff;
+	}
+      if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
+	{
+	  /* Note: sign_bias == 0 here because y is not odd.  */
+	  if (ix == asuint64 (1.0))
+	    return 1.0;
+	  if ((topy & 0x7ff) < 0x3be)
+	    {
+	      /* |y| < 2^-65, x^y ~= 1 + y*log(x).  */
+	      if (WANT_ROUNDING)
+		return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
+	      else
+		return 1.0;
+	    }
+	  return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
+							 : __math_uflow (0);
+	}
+      if (topx == 0)
+	{
+	  /* Normalize subnormal x so exponent becomes negative.  */
+	  /* Without the barrier some versions of clang evalutate the mul
+	     unconditionally causing spurious overflow exceptions.  */
+	  ix = asuint64 (opt_barrier_double (x) * 0x1p52);
+	  ix &= 0x7fffffffffffffff;
+	  ix -= 52ULL << 52;
+	}
+    }
 
-	double_t lo;
-	double_t hi = log_inline(ix, &lo);
-	double_t ehi, elo;
-#if __FP_FAST_FMA
-	ehi = y * hi;
-	elo = y * lo + __builtin_fma(y, hi, -ehi);
+  double_t lo;
+  double_t hi = log_inline (ix, &lo);
+  double_t ehi, elo;
+#if HAVE_FAST_FMA
+  ehi = y * hi;
+  elo = y * lo + fma (y, hi, -ehi);
 #else
-	double_t yhi = asdouble(iy & -1ULL << 27);
-	double_t ylo = y - yhi;
-	double_t lhi = asdouble(asuint64(hi) & -1ULL << 27);
-	double_t llo = hi - lhi + lo;
-	ehi = yhi * lhi;
-	elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25.  */
+  double_t yhi = asdouble (iy & -1ULL << 27);
+  double_t ylo = y - yhi;
+  double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
+  double_t llo = hi - lhi + lo;
+  ehi = yhi * lhi;
+  elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25.  */
 #endif
-	return exp_inline(ehi, elo, sign_bias);
+  return exp_inline (ehi, elo, sign_bias);
 }
 
-__weak_reference(pow, __pow_finite);
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-__weak_reference(pow, powl);
+#if USE_GLIBC_ABI
+strong_alias (pow, __pow_finite)
+hidden_alias (pow, __ieee754_pow)
+# if LDBL_MANT_DIG == 53
+long double powl (long double x, long double y) { return pow (x, y); }
+# endif
 #endif
diff --git a/libc/tinymath/pow_data.c b/libc/tinymath/pow_data.c
index 43dc5d008..c1da89cb3 100644
--- a/libc/tinymath/pow_data.c
+++ b/libc/tinymath/pow_data.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,20 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/pow_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-/*
- * Data for the log part of pow.
- *
- * Copyright (c) 2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 #define N (1 << POW_LOG_TABLE_BITS)
 
@@ -46,6 +34,7 @@ const struct pow_log_data __pow_log_data = {
 .ln2hi = 0x1.62e42fefa3800p-1,
 .ln2lo = 0x1.ef35793c76730p-45,
 .poly = {
+#if N == 128 && POW_LOG_POLY_ORDER == 8
 // relative error: 0x1.11922ap-70
 // in -0x1.6bp-8 0x1.6bp-8
 // Coefficients are scaled to match the scaling during evaluation.
@@ -56,6 +45,7 @@ const struct pow_log_data __pow_log_data = {
 -0x1.555555529a47ap-3 * 4,
 0x1.2495b9b4845e9p-3 * -8,
 -0x1.0002b8b263fc3p-3 * -8,
+#endif
 },
 /* Algorithm:
 
@@ -80,6 +70,7 @@ the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
 error and the interval for z is selected such that near x == 1, where log(x)
 is tiny, large cancellation error is avoided in logc + poly(z/c - 1).  */
 .tab = {
+#if N == 128
 #define A(a, b, c) {a, 0, b, c},
 A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
 A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
@@ -209,5 +200,6 @@ A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
 A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
 A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
 A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
+#endif
 },
 };
diff --git a/libc/tinymath/pow_data.internal.h b/libc/tinymath/pow_data.internal.h
deleted file mode 100644
index ac394a416..000000000
--- a/libc/tinymath/pow_data.internal.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_
-
-#define POW_LOG_TABLE_BITS 7
-#define POW_LOG_POLY_ORDER 8
-
-COSMOPOLITAN_C_START_
-
-extern const struct pow_log_data {
-  double ln2hi;
-  double ln2lo;
-  double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
-  /* Note: the pad field is unused, but allows slightly faster indexing.  */
-  struct {
-    double invc, pad, logc, logctail;
-  } tab[1 << POW_LOG_TABLE_BITS];
-} __pow_log_data;
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_POW_DATA_INTERNAL_H_ */
diff --git a/libc/tinymath/powf.c b/libc/tinymath/powf.c
index 2ad8f7f63..c116f3931 100644
--- a/libc/tinymath/powf.c
+++ b/libc/tinymath/powf.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,23 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
-#include "libc/tinymath/exp2f_data.internal.h"
-#include "libc/tinymath/exp_data.internal.h"
-#include "libc/tinymath/internal.h"
-#include "libc/tinymath/powf_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/*
- * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 /*
 POWF_LOG2_POLY_ORDER = 5
@@ -60,37 +45,39 @@ relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
 
 /* Subnormal input is normalized so ix has negative biased exponent.
    Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set.  */
-static inline double_t log2_inline(uint32_t ix)
+static inline double_t
+log2_inline (uint32_t ix)
 {
-	double_t z, r, r2, r4, p, q, y, y0, invc, logc;
-	uint32_t iz, top, tmp;
-	int k, i;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, r2, r4, p, q, y, y0, invc, logc;
+  uint32_t iz, top, tmp;
+  int k, i;
 
-	/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
-	   The range is split into N subintervals.
-	   The ith subinterval contains z and c is near its center.  */
-	tmp = ix - OFF;
-	i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
-	top = tmp & 0xff800000;
-	iz = ix - top;
-	k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
-	invc = T[i].invc;
-	logc = T[i].logc;
-	z = (double_t)asfloat(iz);
+  /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
+  top = tmp & 0xff800000;
+  iz = ix - top;
+  k = (int32_t) top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = (double_t) asfloat (iz);
 
-	/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
-	r = z * invc - 1;
-	y0 = logc + (double_t)k;
+  /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
+  r = z * invc - 1;
+  y0 = logc + (double_t) k;
 
-	/* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
-	r2 = r * r;
-	y = A[0] * r + A[1];
-	p = A[2] * r + A[3];
-	r4 = r2 * r2;
-	q = A[4] * r + y0;
-	q = p * r2 + q;
-	y = y * r4 + q;
-	return y;
+  /* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
+  r2 = r * r;
+  y = A[0] * r + A[1];
+  p = A[2] * r + A[3];
+  r4 = r2 * r2;
+  q = A[4] * r + y0;
+  q = p * r2 + q;
+  y = y * r4 + q;
+  return y;
 }
 
 #undef N
@@ -102,124 +89,164 @@ static inline double_t log2_inline(uint32_t ix)
 /* The output of log2 and thus the input of exp2 is either scaled by N
    (in case of fast toint intrinsics) or not.  The unscaled xd must be
    in [-1021,1023], sign_bias sets the sign of the result.  */
-static inline float exp2_inline(double_t xd, uint32_t sign_bias)
+static inline float
+exp2_inline (double_t xd, uint32_t sign_bias)
 {
-	uint64_t ki, ski, t;
-	double_t kd, z, r, r2, y, s;
+  uint64_t ki, ski, t;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t kd, z, r, r2, y, s;
 
 #if TOINT_INTRINSICS
-#define C __exp2f_data.poly_scaled
-	/* N*x = k + r with r in [-1/2, 1/2] */
-	kd = roundtoint(xd); /* k */
-	ki = converttoint(xd);
+# define C __exp2f_data.poly_scaled
+  /* N*x = k + r with r in [-1/2, 1/2] */
+  kd = roundtoint (xd); /* k */
+  ki = converttoint (xd);
 #else
-#define C __exp2f_data.poly
-#define SHIFT __exp2f_data.shift_scaled
-	/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
-	kd = eval_as_double(xd + SHIFT);
-	ki = asuint64(kd);
-	kd -= SHIFT; /* k/N */
+# define C __exp2f_data.poly
+# define SHIFT __exp2f_data.shift_scaled
+  /* x = k/N + r with r in [-1/(2N), 1/(2N)] */
+  kd = eval_as_double (xd + SHIFT);
+  ki = asuint64 (kd);
+  kd -= SHIFT; /* k/N */
 #endif
-	r = xd - kd;
+  r = xd - kd;
 
-	/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
-	t = T[ki % N];
-	ski = ki + sign_bias;
-	t += ski << (52 - EXP2F_TABLE_BITS);
-	s = asdouble(t);
-	z = C[0] * r + C[1];
-	r2 = r * r;
-	y = C[2] * r + 1;
-	y = z * r2 + y;
-	y = y * s;
-	return eval_as_float(y);
+  /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+  t = T[ki % N];
+  ski = ki + sign_bias;
+  t += ski << (52 - EXP2F_TABLE_BITS);
+  s = asdouble (t);
+  z = C[0] * r + C[1];
+  r2 = r * r;
+  y = C[2] * r + 1;
+  y = z * r2 + y;
+  y = y * s;
+  return eval_as_float (y);
 }
 
 /* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
    the bit representation of a non-zero finite floating-point value.  */
-static inline int checkint(uint32_t iy)
+static inline int
+checkint (uint32_t iy)
 {
-	int e = iy >> 23 & 0xff;
-	if (e < 0x7f)
-		return 0;
-	if (e > 0x7f + 23)
-		return 2;
-	if (iy & ((1 << (0x7f + 23 - e)) - 1))
-		return 0;
-	if (iy & (1 << (0x7f + 23 - e)))
-		return 1;
-	return 2;
+  int e = iy >> 23 & 0xff;
+  if (e < 0x7f)
+    return 0;
+  if (e > 0x7f + 23)
+    return 2;
+  if (iy & ((1 << (0x7f + 23 - e)) - 1))
+    return 0;
+  if (iy & (1 << (0x7f + 23 - e)))
+    return 1;
+  return 2;
 }
 
-static inline int zeroinfnan(uint32_t ix)
+static inline int
+zeroinfnan (uint32_t ix)
 {
-	return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
+  return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
 }
 
 /**
  * Returns 𝑥^𝑦.
- * @note should take ~16ns
+ *
+ * - ULP error: 0.82 (~ 0.5 + relerr*2^24)
+ * - relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
+ * - relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
+ * - relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
+ *
+ * @raise ERANGE on overflow or underflow
+ * @raise EDOM if x is negative and y is a finite non-integer
  */
-float powf(float x, float y)
+float
+powf (float x, float y)
 {
-	uint32_t sign_bias = 0;
-	uint32_t ix, iy;
+  uint32_t sign_bias = 0;
+  uint32_t ix, iy;
 
-	ix = asuint(x);
-	iy = asuint(y);
-	if (UNLIKELY(ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
-                     zeroinfnan(iy))) {
-		/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan).  */
-		if (UNLIKELY(zeroinfnan(iy))) {
-			if (2 * iy == 0)
-				return issignalingf_inline(x) ? x + y : 1.0f;
-			if (ix == 0x3f800000)
-				return issignalingf_inline(y) ? x + y : 1.0f;
-			if (2 * ix > 2u * 0x7f800000 ||
-			    2 * iy > 2u * 0x7f800000)
-				return x + y;
-			if (2 * ix == 2 * 0x3f800000)
-				return 1.0f;
-			if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
-				return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
-			return y * y;
-		}
-		if (UNLIKELY(zeroinfnan(ix))) {
-			float_t x2 = x * x;
-			if (ix & 0x80000000 && checkint(iy) == 1)
-				x2 = -x2;
-			/* Without the barrier some versions of clang hoist the 1/x2 and
-			   thus division by zero exception can be signaled spuriously.  */
-			return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
-		}
-		/* x and y are non-zero finite.  */
-		if (ix & 0x80000000) {
-			/* Finite x < 0.  */
-			int yint = checkint(iy);
-			if (yint == 0)
-				return __math_invalidf(x);
-			if (yint == 1)
-				sign_bias = SIGN_BIAS;
-			ix &= 0x7fffffff;
-		}
-		if (ix < 0x00800000) {
-			/* Normalize subnormal x so exponent becomes negative.  */
-			ix = asuint(x * 0x1p23f);
-			ix &= 0x7fffffff;
-			ix -= 23 << 23;
-		}
+  ix = asuint (x);
+  iy = asuint (y);
+  if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000 || zeroinfnan (iy)))
+    {
+      /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan).  */
+      if (unlikely (zeroinfnan (iy)))
+	{
+	  if (2 * iy == 0)
+	    return issignalingf_inline (x) ? x + y : 1.0f;
+	  if (ix == 0x3f800000)
+	    return issignalingf_inline (y) ? x + y : 1.0f;
+	  if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
+	    return x + y;
+	  if (2 * ix == 2 * 0x3f800000)
+	    return 1.0f;
+	  if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
+	    return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
+	  return y * y;
 	}
-	double_t logx = log2_inline(ix);
-	double_t ylogx = y * logx; /* cannot overflow, y is single prec.  */
-	if (UNLIKELY((asuint64(ylogx) >> 47 & 0xffff) >=
-                     asuint64(126.0 * POWF_SCALE) >> 47)) {
-		/* |y*log(x)| >= 126.  */
-		if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
-			return __math_oflowf(sign_bias);
-		if (ylogx <= -150.0 * POWF_SCALE)
-			return __math_uflowf(sign_bias);
+      if (unlikely (zeroinfnan (ix)))
+	{
+	  float_t x2 = x * x;
+	  if (ix & 0x80000000 && checkint (iy) == 1)
+	    {
+	      x2 = -x2;
+	      sign_bias = 1;
+	    }
+#if WANT_ERRNO
+	  if (2 * ix == 0 && iy & 0x80000000)
+	    return __math_divzerof (sign_bias);
+#endif
+	  /* Without the barrier some versions of clang hoist the 1/x2 and
+	     thus division by zero exception can be signaled spuriously.  */
+	  return iy & 0x80000000 ? opt_barrier_float (1 / x2) : x2;
 	}
-	return exp2_inline(ylogx, sign_bias);
+      /* x and y are non-zero finite.  */
+      if (ix & 0x80000000)
+	{
+	  /* Finite x < 0.  */
+	  int yint = checkint (iy);
+	  if (yint == 0)
+	    return __math_invalidf (x);
+	  if (yint == 1)
+	    sign_bias = SIGN_BIAS;
+	  ix &= 0x7fffffff;
+	}
+      if (ix < 0x00800000)
+	{
+	  /* Normalize subnormal x so exponent becomes negative.  */
+	  ix = asuint (x * 0x1p23f);
+	  ix &= 0x7fffffff;
+	  ix -= 23 << 23;
+	}
+    }
+  double_t logx = log2_inline (ix);
+  double_t ylogx = y * logx; /* Note: cannot overflow, y is single prec.  */
+  if (unlikely ((asuint64 (ylogx) >> 47 & 0xffff)
+		 >= asuint64 (126.0 * POWF_SCALE) >> 47))
+    {
+      /* |y*log(x)| >= 126.  */
+      if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
+	/* |x^y| > 0x1.ffffffp127.  */
+	return __math_oflowf (sign_bias);
+      if (WANT_ROUNDING && WANT_ERRNO
+	  && ylogx > 0x1.fffffffa3aae2p+6 * POWF_SCALE)
+	/* |x^y| > 0x1.fffffep127, check if we round away from 0.  */
+	if ((!sign_bias
+	     && eval_as_float (1.0f + opt_barrier_float (0x1p-25f)) != 1.0f)
+	    || (sign_bias
+		&& eval_as_float (-1.0f - opt_barrier_float (0x1p-25f))
+		     != -1.0f))
+	  return __math_oflowf (sign_bias);
+      if (ylogx <= -150.0 * POWF_SCALE)
+	return __math_uflowf (sign_bias);
+#if WANT_ERRNO_UFLOW
+      if (ylogx < -149.0 * POWF_SCALE)
+	return __math_may_uflowf (sign_bias);
+#endif
+    }
+  return exp2_inline (ylogx, sign_bias);
 }
 
-__weak_reference(powf, __powf_finite);
+#if USE_GLIBC_ABI
+strong_alias (powf, __powf_finite)
+hidden_alias (powf, __ieee754_powf)
+#endif
diff --git a/libc/tinymath/powf_data.c b/libc/tinymath/powf_data.c
index 0ef109ad6..24cb324af 100644
--- a/libc/tinymath/powf_data.c
+++ b/libc/tinymath/powf_data.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,20 +25,8 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/tinymath/powf_data.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Double-precision math functions (MIT License)\\n\
-Copyright 2018 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-/*
- * Data definition for powf.
- *
- * Copyright (c) 2017-2018, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
+#include "libc/tinymath/arm.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 const struct powf_log2_data __powf_log2_data = {
   .tab = {
diff --git a/libc/tinymath/powf_data.internal.h b/libc/tinymath/powf_data.internal.h
deleted file mode 100644
index 498abf299..000000000
--- a/libc/tinymath/powf_data.internal.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_
-#define COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_
-
-#define POWF_LOG2_TABLE_BITS 4
-#define POWF_LOG2_POLY_ORDER 5
-#if TOINT_INTRINSICS
-#define POWF_SCALE_BITS EXP2F_TABLE_BITS
-#else
-#define POWF_SCALE_BITS 0
-#endif
-#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS))
-
-COSMOPOLITAN_C_START_
-
-extern const struct powf_log2_data {
-  struct {
-    double invc, logc;
-  } tab[1 << POWF_LOG2_TABLE_BITS];
-  double poly[POWF_LOG2_POLY_ORDER];
-} __powf_log2_data;
-
-COSMOPOLITAN_C_END_
-#endif /* COSMOPOLITAN_LIBC_TINYMATH_POWF_DATA_INTERNAL_H_ */
diff --git a/libc/tinymath/powl.c b/libc/tinymath/powl.c
index cb53b0e2c..3ed4cd9e4 100644
--- a/libc/tinymath/powl.c
+++ b/libc/tinymath/powl.c
@@ -1,127 +1,42 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
-│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8                               :vi │
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2021 Justine Alexandra Roberts Tunney                              │
+/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
+│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│ Permission to use, copy, modify, and/or distribute this software for         │
-│ any purpose with or without fee is hereby granted, provided that the         │
-│ above copyright notice and this permission notice appear in all copies.      │
+│ OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c                                  │
+│         /usr/src/lib/libm/src/ld128/e_powl.c                                 │
+│                                                                              │
+│ Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>                    │
+│                                                                              │
+│ Permission to use, copy, modify, and distribute this software for any        │
+│ purpose with or without fee is hereby granted, provided that the above       │
+│ copyright notice and this permission notice appear in all copies.            │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES     │
+│ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF             │
+│ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR      │
+│ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES       │
+│ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN        │
+│ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF      │
+│                                                                              │
+│ Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.            │
+│                                                                              │
+│ Developed at SunPro, a Sun Microsystems, Inc. business.                      │
+│ Permission to use, copy, modify, and distribute this                         │
+│ software is freely granted, provided that this notice                        │
+│ is preserved.                                                                │
 │                                                                              │
-│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
-│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
-│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
-│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
-│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
-│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
-│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
-│ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/errno.h"
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
-#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+#include "libc/tinymath/freebsd.internal.h"
 
-#ifdef __x86_64__
+__static_yoink("openbsd_libm_notice");
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-/**
- * Returns 𝑥^𝑦.
- * @note should take ~56ns
- */
-long double powl(long double x, long double y) {
-  long double t, u;
-  if (!isunordered(x, y)) {
-    if (!isinf(y)) {
-      if (!isinf(x)) {
-        if (x) {
-          if (y) {
-            if (x < 0 && y != truncl(y)) {
-#ifndef __NO_MATH_ERRNO__
-              errno = EDOM;
-#endif
-              return NAN;
-            }
-            asm("fyl2x" : "=t"(u) : "0"(fabsl(x)), "u"(y) : "st(1)");
-            asm("fprem" : "=t"(t) : "0"(u), "u"(1.L));
-            asm("f2xm1" : "=t"(t) : "0"(t));
-            asm("fscale" : "=t"(t) : "0"(t + 1), "u"(u));
-            if (signbit(x)) {
-              if (y != truncl(y)) return -NAN;
-              if ((int64_t)y & 1) t = -t;
-            }
-            return t;
-          } else {
-            return 1;
-          }
-        } else if (y > 0) {
-          if (signbit(x) && y == truncl(y) && ((int64_t)y & 1)) {
-            return -0.;
-          } else {
-            return 0;
-          }
-        } else if (!y) {
-          return 1;
-        } else {
-#ifndef __NO_MATH_ERRNO__
-          errno = ERANGE;
-#endif
-          if (y == truncl(y) && ((int64_t)y & 1)) {
-            return copysignl(INFINITY, x);
-          } else {
-            return INFINITY;
-          }
-        }
-      } else if (signbit(x)) {
-        if (!y) return 1;
-        x = y < 0 ? 0 : INFINITY;
-        if (y == truncl(y) && ((int64_t)y & 1)) x = -x;
-        return x;
-      } else if (y < 0) {
-        return 0;
-      } else if (y > 0) {
-        return INFINITY;
-      } else {
-        return 1;
-      }
-    } else {
-      x = fabsl(x);
-      if (x < 1) return signbit(y) ? INFINITY : 0;
-      if (x > 1) return signbit(y) ? 0 : INFINITY;
-      return 1;
-    }
-  } else if (!y || x == 1) {
-    return 1;
-  } else {
-    return NAN;
-  }
-}
+#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
 
-#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
-
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */
-/*
- * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
 /*                                                      powl.c
  *
  *      Power function, long double precision
@@ -613,39 +528,9 @@ static long double powil(long double x, int nn)
 	return y;
 }
 
+__weak_reference(powl, __powl_finite);
+
 #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
-#include "libc/tinymath/freebsd.internal.h"
-
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-
-/*-
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-/*
- * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
 
 /* powl(x,y) return x**y
  *
@@ -1056,8 +941,6 @@ powl(long double x, long double y)
   return s * z;
 }
 
-#endif /* __x86_64__ */
-
 __weak_reference(powl, __powl_finite);
 
-#endif /* long double is long */
+#endif /* __x86_64__ */
diff --git a/libc/tinymath/poz.c b/libc/tinymath/poz.c
index 4599ab71a..09c4a0476 100644
--- a/libc/tinymath/poz.c
+++ b/libc/tinymath/poz.c
@@ -1,7 +1,6 @@
 /*-*- mode:c;indent-tabs-mode:t;c-basic-offset:4;tab-width:4;coding:utf-8   -*-│
 │ vi: set noet ft=c ts=4 sts=4 sw=4 fenc=utf-8                             :vi │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-/* clang-format off */
 /*
 
     Compute probability of measured Chi Square value.
diff --git a/libc/tinymath/randtest.c b/libc/tinymath/randtest.c
index ae96db067..10dc8d5c4 100644
--- a/libc/tinymath/randtest.c
+++ b/libc/tinymath/randtest.c
@@ -1,4 +1,3 @@
-/* clang-format off */
 /*
 
 	 Apply various randomness tests to a stream of bytes
diff --git a/libc/tinymath/remainderf.c b/libc/tinymath/remainderf.c
index b02a9da9c..f403db37e 100644
--- a/libc/tinymath/remainderf.c
+++ b/libc/tinymath/remainderf.c
@@ -19,7 +19,6 @@
 #include "libc/math.h"
 #include "libc/tinymath/freebsd.internal.h"
 
-// clang-format off
 
 /**
  * Returns remainder of dividing 𝑥 by 𝑦.
diff --git a/libc/tinymath/rempio2.c b/libc/tinymath/rempio2.c
index 7337b0684..09565bcd7 100644
--- a/libc/tinymath/rempio2.c
+++ b/libc/tinymath/rempio2.c
@@ -29,15 +29,9 @@
 #include "libc/math.h"
 #include "libc/runtime/fenv.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c */
 /*
diff --git a/libc/tinymath/rempio2f.c b/libc/tinymath/rempio2f.c
index 86ed07f49..b7ea0658b 100644
--- a/libc/tinymath/rempio2f.c
+++ b/libc/tinymath/rempio2f.c
@@ -29,15 +29,9 @@
 #include "libc/math.h"
 #include "libc/runtime/fenv.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2f.c */
 /*
diff --git a/libc/tinymath/rempio2l.c b/libc/tinymath/rempio2l.c
index 5648b89aa..818989d80 100644
--- a/libc/tinymath/rempio2l.c
+++ b/libc/tinymath/rempio2l.c
@@ -29,19 +29,13 @@
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
 #include "libc/tinymath/ldshape.internal.h"
+
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
+__static_yoink("freebsd_libm_notice");
+
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/ld80/e_rem_pio2.c */
 /*
diff --git a/libc/tinymath/rempio2large.c b/libc/tinymath/rempio2large.c
index cc6e643f5..7dce93dab 100644
--- a/libc/tinymath/rempio2large.c
+++ b/libc/tinymath/rempio2large.c
@@ -27,15 +27,11 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
+
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */
 /*
diff --git a/libc/tinymath/remquo.c b/libc/tinymath/remquo.c
index 975451a21..156a1e320 100644
--- a/libc/tinymath/remquo.c
+++ b/libc/tinymath/remquo.c
@@ -26,12 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Computes remainder and part of quotient.
diff --git a/libc/tinymath/remquof.c b/libc/tinymath/remquof.c
index ca4fabcf4..a5eddfdb3 100644
--- a/libc/tinymath/remquof.c
+++ b/libc/tinymath/remquof.c
@@ -26,12 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Computes remainder and part of quotient.
diff --git a/libc/tinymath/remquol.c b/libc/tinymath/remquol.c
index e5c196fa4..4e86521ee 100644
--- a/libc/tinymath/remquol.c
+++ b/libc/tinymath/remquol.c
@@ -28,12 +28,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Computes remainder and part of quotient.
diff --git a/libc/tinymath/rint.c b/libc/tinymath/rint.c
index dd1732d18..f8cfc91f5 100644
--- a/libc/tinymath/rint.c
+++ b/libc/tinymath/rint.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/runtime/fenv.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
 #define EPS DBL_EPSILON
diff --git a/libc/tinymath/rintf.c b/libc/tinymath/rintf.c
index d9270e6c4..abef412d7 100644
--- a/libc/tinymath/rintf.c
+++ b/libc/tinymath/rintf.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/runtime/fenv.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 #if FLT_EVAL_METHOD==0
 #define EPS FLT_EPSILON
diff --git a/libc/tinymath/rintl.c b/libc/tinymath/rintl.c
index b9c2e9bce..ba6324740 100644
--- a/libc/tinymath/rintl.c
+++ b/libc/tinymath/rintl.c
@@ -28,12 +28,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Rounds to integer in current rounding mode.
diff --git a/libc/tinymath/round.c b/libc/tinymath/round.c
index d5966f9eb..86670d97d 100644
--- a/libc/tinymath/round.c
+++ b/libc/tinymath/round.c
@@ -28,12 +28,7 @@
 #include "libc/math.h"
 #include "libc/runtime/fenv.h"
 #include "libc/tinymath/internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
 #define EPS DBL_EPSILON
diff --git a/libc/tinymath/roundf.c b/libc/tinymath/roundf.c
index e84568cee..4eac941c3 100644
--- a/libc/tinymath/roundf.c
+++ b/libc/tinymath/roundf.c
@@ -28,12 +28,7 @@
 #include "libc/math.h"
 #include "libc/runtime/fenv.h"
 #include "libc/tinymath/internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 #if FLT_EVAL_METHOD==0
 #define EPS FLT_EPSILON
diff --git a/libc/tinymath/roundl.c b/libc/tinymath/roundl.c
index 2eb2bd7d0..3f2518945 100644
--- a/libc/tinymath/roundl.c
+++ b/libc/tinymath/roundl.c
@@ -29,12 +29,8 @@
 #include "libc/tinymath/internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Rounds 𝑥 to nearest integer, away from zero.
diff --git a/libc/tinymath/scalb.c b/libc/tinymath/scalb.c
index f8137e3a2..73048c389 100644
--- a/libc/tinymath/scalb.c
+++ b/libc/tinymath/scalb.c
@@ -26,15 +26,9 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/e_scalb.c */
 /*
diff --git a/libc/tinymath/scalbf.c b/libc/tinymath/scalbf.c
index e71d1e719..e3f0eb8f9 100644
--- a/libc/tinymath/scalbf.c
+++ b/libc/tinymath/scalbf.c
@@ -26,15 +26,9 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 float scalbf(float x, float fn)
 {
diff --git a/libc/tinymath/sin.c b/libc/tinymath/sin.c
index 362e1a5bb..468e93128 100644
--- a/libc/tinymath/sin.c
+++ b/libc/tinymath/sin.c
@@ -28,15 +28,9 @@
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */
 /*
diff --git a/libc/tinymath/sincos.c b/libc/tinymath/sincos.c
index 69fcec219..934123dc2 100644
--- a/libc/tinymath/sincos.c
+++ b/libc/tinymath/sincos.c
@@ -29,15 +29,9 @@
 #include "libc/runtime/runtime.h"
 #include "libc/tinymath/feval.internal.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */
 /*
diff --git a/libc/tinymath/sincosf.c b/libc/tinymath/sincosf.c
index 33371598d..a95801bfc 100644
--- a/libc/tinymath/sincosf.c
+++ b/libc/tinymath/sincosf.c
@@ -1,9 +1,9 @@
 /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
-│ vi: set et ft=c ts=8 sts=2 sw=2 fenc=utf-8                               :vi │
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,20 +25,19 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/likely.h"
-#include "libc/math.h"
 #include "libc/tinymath/sincosf.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
-asm(".ident\t\"\\n\\n\
-Optimized Routines (MIT License)\\n\
-Copyright 2022 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/* Fast sincosf implementation.  Worst-case ULP is 0.5607, maximum relative
-   error is 0.5303 * 2^-23.  A single-step range reduction is used for
-   small values.  Large inputs have their range reduced using fast integer
-   arithmetic.  */
+/**
+ * Returns sine and cosine of y.
+ *
+ * This is a fast sincosf implementation. Worst-case ULP is 0.5607,
+ * maximum relative error is 0.5303 * 2^-23. A single-step range
+ * reduction is used for small values. Large inputs have their range
+ * reduced using fast integer arithmetic.
+ *
+ * @raise EDOM if y is an infinity
+ */
 void
 sincosf (float y, float *sinp, float *cosp)
 {
@@ -51,11 +50,11 @@ sincosf (float y, float *sinp, float *cosp)
     {
       double x2 = x * x;
 
-      if (UNLIKELY (abstop12 (y) < abstop12 (0x1p-12f)))
+      if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
 	{
-	  if (UNLIKELY (abstop12 (y) < abstop12 (0x1p-126f)))
+	  if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
 	    /* Force underflow for tiny y.  */
-	    FORCE_EVAL (x2);
+	    force_eval_float (x2);
 	  *sinp = y;
 	  *cosp = 1.0f;
 	  return;
@@ -75,7 +74,7 @@ sincosf (float y, float *sinp, float *cosp)
 
       sincosf_poly (x * s, x * x, p, n, sinp, cosp);
     }
-  else if (LIKELY (abstop12 (y) < abstop12 (INFINITY)))
+  else if (likely (abstop12 (y) < abstop12 (INFINITY)))
     {
       uint32_t xi = asuint (y);
       int sign = xi >> 31;
diff --git a/libc/tinymath/sincosf.internal.h b/libc/tinymath/sincosf.internal.h
index e07239828..e8c055e25 100644
--- a/libc/tinymath/sincosf.internal.h
+++ b/libc/tinymath/sincosf.internal.h
@@ -1,15 +1,7 @@
 #ifndef COSMOPOLITAN_LIBC_TINYMATH_SINCOSF_INTERNAL_H_
 #define COSMOPOLITAN_LIBC_TINYMATH_SINCOSF_INTERNAL_H_
-#include "libc/tinymath/internal.h"
+#include "libc/tinymath/arm.internal.h"
 COSMOPOLITAN_C_START_
-// clang-format off
-
-/*
- * Header for sinf, cosf and sincosf.
- *
- * Copyright (c) 2018-2021, Arm Limited.
- * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
- */
 
 /* 2PI * 2^-64.  */
 static const double pi63 = 0x1.921FB54442D18p-62;
@@ -27,10 +19,10 @@ typedef struct
 } sincos_t;
 
 /* Polynomial data (the cosine polynomial is negated in the 2nd entry).  */
-extern const sincos_t __sincosf_table[2] ;
+extern const sincos_t __sincosf_table[2] HIDDEN;
 
 /* Table with 4/PI to 192 bit precision.  */
-extern const uint32_t __inv_pio4[] ;
+extern const uint32_t __inv_pio4[] HIDDEN;
 
 /* Top 12 bits of the float representation with the sign bit cleared.  */
 static inline uint32_t
diff --git a/libc/tinymath/sincosf_data.c b/libc/tinymath/sincosf_data.c
index d90de5455..03d181202 100644
--- a/libc/tinymath/sincosf_data.c
+++ b/libc/tinymath/sincosf_data.c
@@ -3,7 +3,7 @@
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
 │  Optimized Routines                                                          │
-│  Copyright (c) 1999-2022, Arm Limited.                                       │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -26,12 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/tinymath/sincosf.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Optimized Routines (MIT License)\\n\
-Copyright 2022 ARM Limited\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+__static_yoink("arm_optimized_routines_notice");
 
 /* The constants and polynomials for sine and cosine.  The 2nd entry
    computes -cos (x) rather than cos (x) to get negation for free.  */
diff --git a/libc/tinymath/sincosl.c b/libc/tinymath/sincosl.c
index 8cc1c3d2b..cb4d03a49 100644
--- a/libc/tinymath/sincosl.c
+++ b/libc/tinymath/sincosl.c
@@ -30,12 +30,8 @@
 #include "libc/tinymath/kernel.internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Returns sine and cosine of 𝑥.
diff --git a/libc/tinymath/sindf.c b/libc/tinymath/sindf.c
index 06a4196f1..60608e0c1 100644
--- a/libc/tinymath/sindf.c
+++ b/libc/tinymath/sindf.c
@@ -27,15 +27,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */
 /*
diff --git a/libc/tinymath/sinf.c b/libc/tinymath/sinf.c
index 81d3218d2..812d67521 100644
--- a/libc/tinymath/sinf.c
+++ b/libc/tinymath/sinf.c
@@ -1,9 +1,9 @@
-/*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8   -*-│
-│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8                                   :vi │
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│  Optimized Routines                                                          │
+│  Copyright (c) 2018-2024, Arm Limited.                                       │
 │                                                                              │
 │  Permission is hereby granted, free of charge, to any person obtaining       │
 │  a copy of this software and associated documentation files (the             │
@@ -25,95 +25,68 @@
 │  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/math.h"
-#include "libc/tinymath/complex.internal.h"
-#include "libc/tinymath/feval.internal.h"
-#include "libc/tinymath/kernel.internal.h"
-
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
-
-/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- * Optimized by Bruce D. Evans.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-/* Small multiples of pi/2 rounded to double precision. */
-static const double
-s1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */
-s2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
-s3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
-s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
+#include "libc/tinymath/sincosf.internal.h"
+__static_yoink("arm_optimized_routines_notice");
 
 /**
- * Returns sine of 𝑥.
- * @note should take about 5ns
+ * Returns sine of y.
+ *
+ * This is a fast sinf implementation. The worst-case ULP is 0.5607 and
+ * the maximum relative error is 0.5303 * 2^-23. A single-step range
+ * reduction is used for small values. Large inputs have their range
+ * reduced using fast integer arithmetic.
+ *
+ * @raise EDOM and FE_INVALID if y is an infinity
  */
-float sinf(float x)
+float
+sinf (float y)
 {
-	double y;
-	uint32_t ix;
-	int n, sign;
+  double x = y;
+  double s;
+  int n;
+  const sincos_t *p = &__sincosf_table[0];
 
-	GET_FLOAT_WORD(ix, x);
-	sign = ix >> 31;
-	ix &= 0x7fffffff;
+  if (abstop12 (y) < abstop12 (pio4f))
+    {
+      s = x * x;
 
-	if (ix <= 0x3f490fda) {  /* |x| ~<= pi/4 */
-		if (ix < 0x39800000) {  /* |x| < 2**-12 */
-			/* raise inexact if x!=0 and underflow if subnormal */
-			FORCE_EVAL(ix < 0x00800000 ? x/0x1p120f : x+0x1p120f);
-			return x;
-		}
-		return __sindf(x);
-	}
-	if (ix <= 0x407b53d1) {  /* |x| ~<= 5*pi/4 */
-		if (ix <= 0x4016cbe3) {  /* |x| ~<= 3pi/4 */
-			if (sign)
-				return -__cosdf(x + s1pio2);
-			else
-				return __cosdf(x - s1pio2);
-		}
-		return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2));
-	}
-	if (ix <= 0x40e231d5) {  /* |x| ~<= 9*pi/4 */
-		if (ix <= 0x40afeddf) {  /* |x| ~<= 7*pi/4 */
-			if (sign)
-				return __cosdf(x + s3pio2);
-			else
-				return -__cosdf(x - s3pio2);
-		}
-		return __sindf(sign ? x + s4pio2 : x - s4pio2);
+      if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
+	{
+	  if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
+	    /* Force underflow for tiny y.  */
+	    force_eval_float (s);
+	  return y;
 	}
 
-	/* sin(Inf or NaN) is NaN */
-	if (ix >= 0x7f800000)
-		return x - x;
+      return sinf_poly (x, s, p, 0);
+    }
+  else if (likely (abstop12 (y) < abstop12 (120.0f)))
+    {
+      x = reduce_fast (x, p, &n);
 
-	/* general argument reduction needed */
-	n = __rem_pio2f(x, &y);
-	switch (n&3) {
-	case 0: return  __sindf(y);
-	case 1: return  __cosdf(y);
-	case 2: return  __sindf(-y);
-	default:
-		return -__cosdf(y);
-	}
+      /* Setup the signs for sin and cos.  */
+      s = p->sign[n & 3];
+
+      if (n & 2)
+	p = &__sincosf_table[1];
+
+      return sinf_poly (x * s, x * x, p, n);
+    }
+  else if (abstop12 (y) < abstop12 (INFINITY))
+    {
+      uint32_t xi = asuint (y);
+      int sign = xi >> 31;
+
+      x = reduce_large (xi, &n);
+
+      /* Setup signs for sin and cos - include original sign.  */
+      s = p->sign[(n + sign) & 3];
+
+      if ((n + sign) & 2)
+	p = &__sincosf_table[1];
+
+      return sinf_poly (x * s, x * x, p, n);
+    }
+  else
+    return __math_invalidf (y);
 }
diff --git a/libc/tinymath/sinh.c b/libc/tinymath/sinh.c
index d740da683..6c3b9975c 100644
--- a/libc/tinymath/sinh.c
+++ b/libc/tinymath/sinh.c
@@ -28,12 +28,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/expo.internal.h"
 #include "libc/tinymath/freebsd.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Returns hyperbolic sine of 𝑥.
diff --git a/libc/tinymath/sinhf.c b/libc/tinymath/sinhf.c
index 75fcb49c6..1f1e359f3 100644
--- a/libc/tinymath/sinhf.c
+++ b/libc/tinymath/sinhf.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/expo.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Returns hyperbolic sine of 𝑥.
diff --git a/libc/tinymath/sinhl.c b/libc/tinymath/sinhl.c
index 372038ada..bb642a6df 100644
--- a/libc/tinymath/sinhl.c
+++ b/libc/tinymath/sinhl.c
@@ -39,16 +39,11 @@
 #include "libc/intrin/likely.h"
 #include "libc/math.h"
 #include "libc/tinymath/freebsd.internal.h"
-#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("fdlibm_notice");
+__static_yoink("freebsd_libm_notice");
+
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
 
 #if LDBL_MAX_EXP != 0x4000
 /* We also require the usual expsign encoding. */
diff --git a/libc/tinymath/sinl.c b/libc/tinymath/sinl.c
index b65e21e4b..7f6241c8e 100644
--- a/libc/tinymath/sinl.c
+++ b/libc/tinymath/sinl.c
@@ -30,12 +30,7 @@
 #include "libc/tinymath/kernel.internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns sine of 𝑥.
diff --git a/libc/tinymath/sqrt.c b/libc/tinymath/sqrt.c
index b6e1cd215..f8802df1e 100644
--- a/libc/tinymath/sqrt.c
+++ b/libc/tinymath/sqrt.c
@@ -28,12 +28,7 @@
 #include "libc/intrin/likely.h"
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 #define FENV_SUPPORT 1
 
@@ -58,7 +53,7 @@ static inline uint64_t mul64(uint64_t a, uint64_t b)
  */
 double sqrt(double x)
 {
-#if defined(__x86_64__) && defined(__SSE2__)
+#if defined(__x86_64__)
 
 	asm("sqrtsd\t%1,%0" : "=x"(x) : "x"(x));
 	return x;
diff --git a/libc/tinymath/sqrt_data.c b/libc/tinymath/sqrt_data.c
index 9850440cd..c0e9e6f75 100644
--- a/libc/tinymath/sqrt_data.c
+++ b/libc/tinymath/sqrt_data.c
@@ -26,12 +26,8 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/tinymath/internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 const uint16_t __rsqrt_tab[128] = {
 0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43,
diff --git a/libc/tinymath/sqrtf.c b/libc/tinymath/sqrtf.c
index 21374d03f..8df10f33c 100644
--- a/libc/tinymath/sqrtf.c
+++ b/libc/tinymath/sqrtf.c
@@ -28,12 +28,7 @@
 #include "libc/intrin/likely.h"
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 #define FENV_SUPPORT 1
 
@@ -42,14 +37,12 @@ static inline uint32_t mul32(uint32_t a, uint32_t b)
 	return (uint64_t)a*b >> 32;
 }
 
-/* see sqrt.c for more detailed comments.  */
-
 /**
  * Returns square root of 𝑥.
  */
 float sqrtf(float x)
 {
-#ifdef __SSE2__
+#if defined(__x86_64__)
 
 	asm("sqrtss\t%1,%0" : "=x"(x) : "x"(x));
 	return x;
diff --git a/libc/tinymath/sqrtl.c b/libc/tinymath/sqrtl.c
index 4bf9d7df2..fa3c2c41c 100644
--- a/libc/tinymath/sqrtl.c
+++ b/libc/tinymath/sqrtl.c
@@ -30,12 +30,7 @@
 #include "libc/tinymath/internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 #define FENV_SUPPORT 1
 
@@ -199,14 +194,12 @@ static inline u128 mul128_tail(u128 a, u128 b)
 	return lo;
 }
 
-/* see sqrt.c for detailed comments.  */
-
 /**
  * Returns square root of 𝑥.
  */
 long double sqrtl(long double x)
 {
-#ifdef __x86__
+#if defined(__x86__)
 
 	asm("fsqrt" : "+t"(x));
 	return x;
diff --git a/libc/tinymath/tan.c b/libc/tinymath/tan.c
index 0d5a45f73..475328ea7 100644
--- a/libc/tinymath/tan.c
+++ b/libc/tinymath/tan.c
@@ -28,15 +28,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
 #include "libc/tinymath/kernel.internal.h"
-
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_tan.c */
 /*
diff --git a/libc/tinymath/tandf.c b/libc/tinymath/tandf.c
index 93b79289b..bff4e4adb 100644
--- a/libc/tinymath/tandf.c
+++ b/libc/tinymath/tandf.c
@@ -27,15 +27,9 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
+__static_yoink("fdlibm_notice");
 
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
 
 /* origin: FreeBSD /usr/src/lib/msun/src/k_tanf.c */
 /*
diff --git a/libc/tinymath/tanf.c b/libc/tinymath/tanf.c
index 5fb7c4453..508fec260 100644
--- a/libc/tinymath/tanf.c
+++ b/libc/tinymath/tanf.c
@@ -28,12 +28,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: FreeBSD /usr/src/lib/msun/src/s_tanf.c */
 /*
diff --git a/libc/tinymath/tanh.c b/libc/tinymath/tanh.c
index 043d788f7..760440a1f 100644
--- a/libc/tinymath/tanh.c
+++ b/libc/tinymath/tanh.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Returns hyperbolic tangent of 𝑥.
diff --git a/libc/tinymath/tanhf.c b/libc/tinymath/tanhf.c
index 2b735ff2d..628a4860e 100644
--- a/libc/tinymath/tanhf.c
+++ b/libc/tinymath/tanhf.c
@@ -27,12 +27,8 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Returns hyperbolic tangent of 𝑥.
diff --git a/libc/tinymath/tanhl.c b/libc/tinymath/tanhl.c
index 295f72de7..d1e8ffb78 100644
--- a/libc/tinymath/tanhl.c
+++ b/libc/tinymath/tanhl.c
@@ -39,16 +39,11 @@
 #include "libc/intrin/likely.h"
 #include "libc/math.h"
 #include "libc/tinymath/freebsd.internal.h"
-#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
 
-asm(".ident\t\"\\n\\n\
-FreeBSD libm (BSD-2 License)\\n\
-Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\"");
-asm(".ident\t\"\\n\\n\
-fdlibm (fdlibm license)\\n\
-Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("fdlibm_notice");
+__static_yoink("freebsd_libm_notice");
+
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
 
 #if LDBL_MAX_EXP != 0x4000
 /* We also require the usual expsign encoding. */
diff --git a/libc/tinymath/tanl.c b/libc/tinymath/tanl.c
index 9ef8475a7..90b6d69e4 100644
--- a/libc/tinymath/tanl.c
+++ b/libc/tinymath/tanl.c
@@ -30,12 +30,8 @@
 #include "libc/tinymath/kernel.internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Returns tangent of x.
diff --git a/libc/tinymath/tgamma.c b/libc/tinymath/tgamma.c
index 35369f448..822f43249 100644
--- a/libc/tinymath/tgamma.c
+++ b/libc/tinymath/tgamma.c
@@ -28,12 +28,8 @@
 #include "libc/math.h"
 #include "libc/tinymath/feval.internal.h"
 #include "libc/tinymath/kernel.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /*
 "A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964)
diff --git a/libc/tinymath/tgammal.c b/libc/tinymath/tgammal.c
index ae73f31dd..ee5ffafd1 100644
--- a/libc/tinymath/tgammal.c
+++ b/libc/tinymath/tgammal.c
@@ -28,15 +28,9 @@
 #include "libc/math.h"
 #include "libc/tinymath/internal.h"
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
+__static_yoink("musl_libc_notice");
+__static_yoink("openbsd_libm_notice");
 
-asm(".ident\t\"\\n\\n\
-OpenBSD libm (ISC License)\\n\
-Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>\"");
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_tgammal.c */
 /*
diff --git a/libc/tinymath/trunc.c b/libc/tinymath/trunc.c
index 0262f4028..a849fcc6d 100644
--- a/libc/tinymath/trunc.c
+++ b/libc/tinymath/trunc.c
@@ -30,12 +30,7 @@
 #ifndef __llvm__
 #include "third_party/intel/smmintrin.internal.h"
 #endif
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
+__static_yoink("musl_libc_notice");
 
 /**
  * Rounds to integer, towards zero.
diff --git a/libc/tinymath/truncf.c b/libc/tinymath/truncf.c
index 3f85aef61..906e00b0a 100644
--- a/libc/tinymath/truncf.c
+++ b/libc/tinymath/truncf.c
@@ -30,12 +30,8 @@
 #ifndef __llvm__
 #include "third_party/intel/smmintrin.internal.h"
 #endif
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Rounds to integer, towards zero.
diff --git a/libc/tinymath/truncl.c b/libc/tinymath/truncl.c
index 52f8586ca..55b305294 100644
--- a/libc/tinymath/truncl.c
+++ b/libc/tinymath/truncl.c
@@ -29,12 +29,8 @@
 #include "libc/tinymath/internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-// clang-format off
 
 /**
  * Rounds to integer, towards zero.
diff --git a/libc/unistd.h b/libc/unistd.h
new file mode 100644
index 000000000..04bf9c95a
--- /dev/null
+++ b/libc/unistd.h
@@ -0,0 +1,14 @@
+#ifndef COSMOPOLITAN_LIBC_UNISTD_H_
+#define COSMOPOLITAN_LIBC_UNISTD_H_
+COSMOPOLITAN_C_START_
+
+#define _CS_PATH 0
+
+#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || \
+    defined(_COSMO_SOURCE)
+void encrypt(char *, int) libcesque;
+char *crypt(const char *, const char *) libcesque;
+#endif
+
+COSMOPOLITAN_C_END_
+#endif /* COSMOPOLITAN_LIBC_UNISTD_H_ */
diff --git a/libc/x/x.h b/libc/x/x.h
index 1be0a30bf..dca4b877a 100644
--- a/libc/x/x.h
+++ b/libc/x/x.h
@@ -1,7 +1,7 @@
+#ifdef _COSMO_SOURCE
 #ifndef COSMOPOLITAN_LIBC_X_H_
 #define COSMOPOLITAN_LIBC_X_H_
 
-#ifdef _COSMO_SOURCE
 #define xwrite        __xwrite
 #define xdie          __xdie
 #define xmalloc       __xmalloc
@@ -32,47 +32,47 @@
 #define xfixpath      __xfixpath
 #define xslurp        __xslurp
 #define xbarf         __xbarf
-#endif /* _COSMO_SOURCE */
 
-#ifdef _COSMO_SOURCE
 COSMOPOLITAN_C_START_
 
 int xwrite(int, const void *, uint64_t);
 void xdie(void) wontreturn;
 char *xdtoa(double)
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 char *xdtoaf(float)
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 char *xdtoal(long double)
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 void *xmalloc(size_t) attributeallocsize((1))
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 void *xrealloc(void *, size_t)
-    attributeallocsize((2)) dontthrow nocallback __wur;
+    attributeallocsize((2)) dontthrow dontcallback __wur;
 void *xcalloc(size_t, size_t) attributeallocsize((1, 2))
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 void *xvalloc(size_t) attributeallocsize((1)) returnsaligned((65536))
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 void *xmemalign(size_t, size_t) attributeallocalign((1)) attributeallocsize((2))
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
-void *xmemalignzero(size_t, size_t) attributeallocalign((1)) attributeallocsize(
-    (2)) returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
+void *xmemalignzero(size_t, size_t) attributeallocalign((1))
+    attributeallocsize((2))
+        returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 char *xstrdup(const char *) paramsnonnull()
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 char *xstrndup(const char *, size_t) paramsnonnull()
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 char *xstrcat(const char *, ...) paramsnonnull((1)) nullterminated()
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 #define xstrcat(...) (xstrcat)(__VA_ARGS__, NULL)
 char *xstrmul(const char *, size_t) paramsnonnull((1))
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 char *xinet_ntop(int, const void *) paramsnonnull()
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 void *xunbinga(size_t, const char16_t *)
-    attributeallocalign((1)) returnspointerwithnoaliases dontthrow
-    nocallback __wur returnsnonnull dontthrow nocallback __wur returnsnonnull;
-void *xunbing(const char16_t *) returnspointerwithnoaliases dontthrow
-    nocallback __wur returnsnonnull dontthrow nocallback __wur returnsnonnull;
+    attributeallocalign((1)) returnspointerwithnoaliases dontthrow dontcallback
+    __wur returnsnonnull dontthrow dontcallback __wur returnsnonnull;
+void *xunbing(const char16_t *)
+    returnspointerwithnoaliases dontthrow dontcallback __wur
+    returnsnonnull dontthrow dontcallback __wur returnsnonnull;
 char16_t *utf8to16(const char *, size_t, size_t *) __wur;
 char *utf16to8(const char16_t *, size_t, size_t *) __wur;
 wchar_t *utf8to32(const char *, size_t, size_t *) __wur;
@@ -84,16 +84,16 @@ char *xstripexts(const char *) __wur;
 void *xload(_Atomic(void *) *, const void *, size_t, size_t);
 int rmrf(const char *);
 char *xbasename(const char *) paramsnonnull()
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 char *xdirname(const char *) paramsnonnull()
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 char *xjoinpaths(const char *, const char *) paramsnonnull()
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 void xfixpath(void);
 void *xslurp(const char *, size_t *)
     paramsnonnull((1)) returnspointerwithnoaliases returnsaligned((4096)) __wur;
 int xbarf(const char *, const void *, size_t);
 
 COSMOPOLITAN_C_END_
-#endif /* _COSMO_SOURCE */
 #endif /* COSMOPOLITAN_LIBC_X_H_ */
+#endif /* _COSMO_SOURCE */
diff --git a/libc/x/xasprintf.h b/libc/x/xasprintf.h
index 7e4049bae..4d92a3db3 100644
--- a/libc/x/xasprintf.h
+++ b/libc/x/xasprintf.h
@@ -3,9 +3,9 @@
 COSMOPOLITAN_C_START_
 
 char *xasprintf(const char *, ...) paramsnonnull((1))
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 char *xvasprintf(const char *, va_list) paramsnonnull()
-    returnspointerwithnoaliases dontthrow nocallback __wur returnsnonnull;
+    returnspointerwithnoaliases dontthrow dontcallback __wur returnsnonnull;
 
 COSMOPOLITAN_C_END_
 #endif /* COSMOPOLITAN_LIBC_X_XASPRINTF_H_ */
diff --git a/net/http/base32.c b/net/http/base32.c
index 56c56c9ef..6e7963d38 100644
--- a/net/http/base32.c
+++ b/net/http/base32.c
@@ -21,15 +21,11 @@
 #include "libc/mem/mem.h"
 #include "libc/str/str.h"
 
-asm(".ident\t\"\\n\\n\
-Apache License, Version 2.0\\n\
-Copyright 2010 Google Inc.\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 const char base32def[] = "0123456789abcdefghjkmnpqrstvwxyz";
 
 int tobits(int b) {
-  int bits = 0; while (b && (b >>= 1)) bits++;
+  int bits = 0;
+  while (b && (b >>= 1)) bits++;
   return bits;
 }
 
@@ -48,20 +44,19 @@ int tobits(int b) {
  * @param ol if non-NULL receives output length
  * @return allocated NUL-terminated buffer, or NULL w/ errno
  */
-char* EncodeBase32(const char *s, size_t sl,
-                   const char *a, size_t al,
+char *EncodeBase32(const char *s, size_t sl, const char *a, size_t al,
                    size_t *ol) {
   size_t count = 0;
   char *r = NULL;
   if (sl == -1) sl = s ? strlen(s) : 0;
   if (al == 0) {
     a = base32def;
-    al = sizeof(base32def)/sizeof(a[0]);
+    al = sizeof(base32def) / sizeof(a[0]);
   }
   unassert(2 <= al && al <= 128);
   int bl = tobits(al);
   int mask = (1 << bl) - 1;
-  size_t n = (sl * 8 + bl - 1) / bl; // calculate output length
+  size_t n = (sl * 8 + bl - 1) / bl;  // calculate output length
   if ((r = malloc(n + 1))) {
     int buffer = s[0];
     size_t next = 1;
@@ -92,11 +87,11 @@ static signed char kBase32[256] = {
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1,  // 0x00
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 0x10
     -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1,  // 0x20
-     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,  // 0x30
-    -1, 10, 11, 12, 13, 14, 15, 16, 17,  1, 18, 19,  1, 20, 21, -1,  // 0x40
-    22, 23, 24, 25, 26,  0, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1,  // 0x50
-    -1, 10, 11, 12, 13, 14, 15, 16, 17,  1, 18, 19,  1, 20, 21, -1,  // 0x60
-    22, 23, 24, 25, 26,  0, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1,  // 0x70
+    0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  -1, -1, -1, -1, -1, -1,  // 0x30
+    -1, 10, 11, 12, 13, 14, 15, 16, 17, 1,  18, 19, 1,  20, 21, -1,  // 0x40
+    22, 23, 24, 25, 26, 0,  27, 28, 29, 30, 31, -1, -1, -1, -1, -1,  // 0x50
+    -1, 10, 11, 12, 13, 14, 15, 16, 17, 1,  18, 19, 1,  20, 21, -1,  // 0x60
+    22, 23, 24, 25, 26, 0,  27, 28, 29, 30, 31, -1, -1, -1, -1, -1,  // 0x70
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 0x80
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 0x90
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 0xa0
@@ -120,19 +115,18 @@ static signed char kBase32[256] = {
  * @param ol if non-NULL receives output length
  * @return allocated NUL-terminated buffer, or NULL w/ errno
  */
-char* DecodeBase32(const char *s, size_t sl,
-                   const char *a, size_t al,
+char *DecodeBase32(const char *s, size_t sl, const char *a, size_t al,
                    size_t *ol) {
   size_t count = 0;
   char *r = NULL;
   if (sl == -1) sl = s ? strlen(s) : 0;
   if (al == 0) {
     a = base32def;
-    al = sizeof(base32def)/sizeof(a[0]);
+    al = sizeof(base32def) / sizeof(a[0]);
   }
   unassert(2 <= al && al <= 128);
   int bl = tobits(al);
-  size_t n = (sl * bl + 1) / 8 + 1; // calculate output length
+  size_t n = (sl * bl + 1) / 8 + 1;  // calculate output length
   // process input
   if ((r = malloc(n + 1))) {
     unsigned int buffer = 0;
diff --git a/net/http/gethttpmethod.gperf b/net/http/gethttpmethod.gperf
deleted file mode 100644
index bc7e90a3f..000000000
--- a/net/http/gethttpmethod.gperf
+++ /dev/null
@@ -1,30 +0,0 @@
-%{
-#include "libc/str/str.h"
-#include "net/http/http.h"
-#define GPERF_DOWNCASE
-%}
-%compare-strncmp
-%ignore-case
-%language=ANSI-C
-%readonly-tables
-%struct-type
-%define lookup-function-name LookupHttpMethod
-struct HttpMethodSlot { char name[8]; char code; };
-%%
-DELETE,      kHttpDelete
-GET,         kHttpGet
-HEAD,        kHttpHead
-POST,        kHttpPost
-PUT,         kHttpPut
-OPTIONS,     kHttpOptions
-CONNECT,     kHttpConnect
-TRACE,       kHttpTrace
-COPY,        kHttpCopy
-LOCK,        kHttpLock
-MERGE,       kHttpMerge
-MKCOL,       kHttpMkcol
-MOVE,        kHttpMove
-NOTIFY,      kHttpNotify
-PATCH,       kHttpPatch
-REPORT,      kHttpReport
-UNLOCK,      kHttpUnlock
diff --git a/net/http/gethttpmethod.inc b/net/http/gethttpmethod.inc
deleted file mode 100644
index 2c475fef2..000000000
--- a/net/http/gethttpmethod.inc
+++ /dev/null
@@ -1,196 +0,0 @@
-/* ANSI-C code produced by gperf version 3.1 */
-/* Command-line: gperf gethttpmethod.gperf  */
-/* Computed positions: -k'1-2' */
-/* clang-format off */
-
-#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
-      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
-      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
-      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
-      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
-      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
-      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
-      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
-      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
-      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
-      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
-      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
-      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
-      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
-      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
-      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
-      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
-      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
-      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
-      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
-      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
-      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
-      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
-/* The character set is not based on ISO-646.  */
-#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."
-#endif
-
-#line 1 "gethttpmethod.gperf"
-
-#include "libc/str/str.h"
-#include "libc/str/tab.internal.h"
-#include "net/http/http.h"
-#define GPERF_DOWNCASE
-#line 12 "gethttpmethod.gperf"
-struct HttpMethodSlot { char name[8]; char code; };
-
-#define TOTAL_KEYWORDS 17
-#define MIN_WORD_LENGTH 3
-#define MAX_WORD_LENGTH 7
-#define MIN_HASH_VALUE 3
-#define MAX_HASH_VALUE 25
-/* maximum key range = 23, duplicates = 0 */
-
-#ifndef GPERF_DOWNCASE
-#define GPERF_DOWNCASE 1
-static unsigned char gperf_downcase[256] =
-  {
-      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
-     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
-     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
-     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
-     60,  61,  62,  63,  64,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
-    107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
-    122,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
-    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
-    120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
-    135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
-    150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
-    165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
-    180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
-    195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
-    210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
-    225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
-    240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
-    255
-  };
-#endif
-
-#ifndef GPERF_CASE_STRNCMP
-#define GPERF_CASE_STRNCMP 1
-static inline int
-gperf_case_strncmp (register const char *s1, register const char *s2, register size_t n)
-{
-  for (; n > 0;)
-    {
-      unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
-      unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
-      if (c1 != 0 && c1 == c2)
-        {
-          n--;
-          continue;
-        }
-      return (int)c1 - (int)c2;
-    }
-  return 0;
-}
-#endif
-
-#ifdef __GNUC__
-__inline
-#else
-#ifdef __cplusplus
-inline
-#endif
-#endif
-static unsigned int
-hash (register const char *str, register size_t len)
-{
-  static const unsigned char asso_values[] =
-    {
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26,  0, 26,  5, 15,  0,
-      26,  5,  0, 26, 26, 10, 15, 10,  0,  5,
-       0, 26, 10, 26,  5,  0, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26,  0, 26,  5,
-      15,  0, 26,  5,  0, 26, 26, 10, 15, 10,
-       0,  5,  0, 26, 10, 26,  5,  0, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-      26, 26, 26, 26, 26, 26
-    };
-  return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
-}
-
-static inline const struct HttpMethodSlot *
-LookupHttpMethod (register const char *str, register size_t len)
-{
-  static const struct HttpMethodSlot wordlist[] =
-    {
-      {""}, {""}, {""},
-#line 18 "gethttpmethod.gperf"
-      {"PUT",         kHttpPut},
-#line 16 "gethttpmethod.gperf"
-      {"HEAD",        kHttpHead},
-#line 28 "gethttpmethod.gperf"
-      {"PATCH",       kHttpPatch},
-#line 30 "gethttpmethod.gperf"
-      {"UNLOCK",      kHttpUnlock},
-      {""},
-#line 15 "gethttpmethod.gperf"
-      {"GET",         kHttpGet},
-#line 17 "gethttpmethod.gperf"
-      {"POST",        kHttpPost},
-      {""},
-#line 27 "gethttpmethod.gperf"
-      {"NOTIFY",      kHttpNotify},
-#line 19 "gethttpmethod.gperf"
-      {"OPTIONS",     kHttpOptions},
-      {""},
-#line 22 "gethttpmethod.gperf"
-      {"COPY",        kHttpCopy},
-#line 24 "gethttpmethod.gperf"
-      {"MERGE",       kHttpMerge},
-#line 29 "gethttpmethod.gperf"
-      {"REPORT",      kHttpReport},
-#line 20 "gethttpmethod.gperf"
-      {"CONNECT",     kHttpConnect},
-      {""},
-#line 26 "gethttpmethod.gperf"
-      {"MOVE",        kHttpMove},
-#line 21 "gethttpmethod.gperf"
-      {"TRACE",       kHttpTrace},
-#line 14 "gethttpmethod.gperf"
-      {"DELETE",      kHttpDelete},
-      {""}, {""},
-#line 23 "gethttpmethod.gperf"
-      {"LOCK",        kHttpLock},
-#line 25 "gethttpmethod.gperf"
-      {"MKCOL",       kHttpMkcol}
-    };
-
-  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
-    {
-      register unsigned int key = hash (str, len);
-
-      if (key <= MAX_HASH_VALUE)
-        {
-          register const char *s = wordlist[key].name;
-
-          if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
-            return &wordlist[key];
-        }
-    }
-  return 0;
-}
diff --git a/net/http/http.h b/net/http/http.h
index 0e09b4a15..a673a4c3a 100644
--- a/net/http/http.h
+++ b/net/http/http.h
@@ -1,27 +1,19 @@
 #ifndef COSMOPOLITAN_LIBC_HTTP_HTTP_H_
 #define COSMOPOLITAN_LIBC_HTTP_HTTP_H_
+#include "libc/serialize.h"
 #include "libc/time/struct/tm.h"
 
 #define kHttpRequest  0
 #define kHttpResponse 1
 
-#define kHttpGet     1
-#define kHttpHead    2
-#define kHttpPost    3
-#define kHttpPut     4
-#define kHttpDelete  5
-#define kHttpOptions 6
-#define kHttpConnect 7
-#define kHttpTrace   8
-#define kHttpCopy    9
-#define kHttpLock    10
-#define kHttpMerge   11
-#define kHttpMkcol   12
-#define kHttpMove    13
-#define kHttpNotify  14
-#define kHttpPatch   15
-#define kHttpReport  16
-#define kHttpUnlock  17
+#define kHttpGet     READ32LE("GET")
+#define kHttpHead    READ32LE("HEAD")
+#define kHttpPost    READ32LE("POST")
+#define kHttpPut     READ32LE("PUT")
+#define kHttpDelete  READ64LE("DELETE\0")
+#define kHttpOptions READ64LE("OPTIONS")
+#define kHttpConnect READ64LE("CONNECT")
+#define kHttpTrace   READ64LE("TRACE\0\0")
 
 #define kHttpStateStart   0
 #define kHttpStateMethod  1
@@ -168,14 +160,13 @@ struct HttpMessage {
   int i, a, status;
   unsigned char t;
   unsigned char type;
-  unsigned char method;
   unsigned char version;
+  uint64_t method;
   struct HttpSlice k;
   struct HttpSlice uri;
   struct HttpSlice scratch;
   struct HttpSlice message;
   struct HttpSlice headers[kHttpHeadersMax];
-  struct HttpSlice xmethod;
   struct HttpHeaders xheaders;
 };
 
@@ -187,13 +178,11 @@ struct HttpUnchunker {
 };
 
 extern const char kHttpToken[256];
-extern const char kHttpMethod[18][8];
 extern const bool kHttpRepeatable[kHttpHeadersMax];
 
 const char *GetHttpReason(int);
 const char *GetHttpHeaderName(int);
 int GetHttpHeader(const char *, size_t);
-int GetHttpMethod(const char *, size_t);
 void InitHttpMessage(struct HttpMessage *, int);
 void DestroyHttpMessage(struct HttpMessage *);
 int ParseHttpMessage(struct HttpMessage *, const char *, size_t);
@@ -202,6 +191,7 @@ int64_t ParseContentLength(const char *, size_t);
 char *FormatHttpDateTime(char[hasatleast 30], struct tm *);
 bool ParseHttpRange(const char *, size_t, long, long *, long *);
 int64_t ParseHttpDateTime(const char *, size_t);
+uint64_t ParseHttpMethod(const char *, size_t);
 bool IsValidHttpToken(const char *, size_t);
 bool IsValidCookieValue(const char *, size_t);
 bool IsAcceptablePath(const char *, size_t);
diff --git a/net/http/parsehttpmessage.c b/net/http/parsehttpmessage.c
index 56eb5d342..b95869d69 100644
--- a/net/http/parsehttpmessage.c
+++ b/net/http/parsehttpmessage.c
@@ -17,14 +17,15 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/assert.h"
-#include "libc/serialize.h"
 #include "libc/limits.h"
 #include "libc/macros.internal.h"
 #include "libc/mem/alg.h"
 #include "libc/mem/arraylist.internal.h"
 #include "libc/mem/mem.h"
+#include "libc/serialize.h"
 #include "libc/stdio/stdio.h"
 #include "libc/str/str.h"
+#include "libc/str/tab.internal.h"
 #include "libc/sysv/errfuns.h"
 #include "libc/x/x.h"
 #include "net/http/http.h"
@@ -90,23 +91,29 @@ int ParseHttpMessage(struct HttpMessage *r, const char *p, size_t n) {
     c = p[r->i] & 0xff;
     switch (r->t) {
       case kHttpStateStart:
-        if (c == '\r' || c == '\n') break; /* RFC7230 § 3.5 */
+        if (c == '\r' || c == '\n') break;  // RFC7230 § 3.5
         if (!kHttpToken[c]) return ebadmsg();
-        r->t = r->type == kHttpRequest ? kHttpStateMethod : kHttpStateVersion;
-        r->a = r->i;
+        if (r->type == kHttpRequest) {
+          r->t = kHttpStateMethod;
+          r->method = kToUpper[c];
+          r->a = 8;
+        } else {
+          r->t = kHttpStateVersion;
+          r->a = r->i;
+        }
         break;
       case kHttpStateMethod:
         for (;;) {
           if (c == ' ') {
-            r->method = GetHttpMethod(p + r->a, r->i - r->a);
-            r->xmethod.a = r->a;
-            r->xmethod.b = r->i;
             r->a = r->i + 1;
             r->t = kHttpStateUri;
             break;
-          } else if (!kHttpToken[c]) {
+          } else if (r->a == 64 || !kHttpToken[c]) {
             return ebadmsg();
           }
+          c = kToUpper[c];
+          r->method |= (uint64_t)c << r->a;
+          r->a += 8;
           if (++r->i == n) break;
           c = p[r->i] & 0xff;
         }
@@ -195,10 +202,8 @@ int ParseHttpMessage(struct HttpMessage *r, const char *p, size_t n) {
         } else if (c == '\n') {
           return ++r->i;
         } else if (!kHttpToken[c]) {
-          /*
-           * 1. Forbid empty header name (RFC2616 §2.2)
-           * 2. Forbid line folding (RFC7230 §3.2.4)
-           */
+          // 1. Forbid empty header name (RFC2616 §2.2)
+          // 2. Forbid line folding (RFC7230 §3.2.4)
           return ebadmsg();
         }
         r->k.a = r->i;
@@ -221,7 +226,7 @@ int ParseHttpMessage(struct HttpMessage *r, const char *p, size_t n) {
         if (c == ' ' || c == '\t') break;
         r->a = r->i;
         r->t = kHttpStateValue;
-        /* fallthrough */
+        // fallthrough
       case kHttpStateValue:
         for (;;) {
           if (c == '\r' || c == '\n') {
diff --git a/net/http/gethttpmethod.c b/net/http/parsehttpmethod.c
similarity index 79%
rename from net/http/gethttpmethod.c
rename to net/http/parsehttpmethod.c
index 4fd5d362b..6354eb397 100644
--- a/net/http/gethttpmethod.c
+++ b/net/http/parsehttpmethod.c
@@ -16,21 +16,28 @@
 │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "net/http/gethttpmethod.inc"
+#include "libc/str/str.h"
+#include "libc/str/tab.internal.h"
 #include "net/http/http.h"
 
 /**
- * Converts HTTP method string into internal index
+ * Converts HTTP method to word encoding.
+ *
+ * For example, `ParseHttpMethod("GET", -1)` will return `kHttpGet`.
  *
  * @param len if -1 implies strlen
- * @return small number for HTTP method, or 0 if not found.
+ * @return word encoded method, or 0 if invalid
  */
-int GetHttpMethod(const char *str, size_t len) {
-  const struct HttpMethodSlot *slot;
+uint64_t ParseHttpMethod(const char *str, size_t len) {
+  int s = 0;
+  uint64_t w = 0;
   if (len == -1) len = str ? strlen(str) : 0;
-  if ((slot = LookupHttpMethod(str, len))) {
-    return slot->code;
-  } else {
-    return 0;
+  for (size_t i = 0; i < len; ++i) {
+    int c = kToUpper[str[i] & 255];
+    if (!kHttpToken[c]) return 0;
+    if (s == 64) return 0;
+    w |= (uint64_t)c << s;
+    s += 8;
   }
+  return w;
 }
diff --git a/net/turfwar/turfwar.c b/net/turfwar/turfwar.c
index f50da64b4..41e7d3b30 100644
--- a/net/turfwar/turfwar.c
+++ b/net/turfwar/turfwar.c
@@ -936,8 +936,9 @@ void *HttpWorker(void *arg) {
       }
 
       // access log
-      LOG("%6P %16s %.*s %.*s %.*s %.*s %#.*s\n", ipbuf,
-          msg->xmethod.b - msg->xmethod.a, inbuf + msg->xmethod.a,
+      char method[9] = {0};
+      WRITE64LE(method, msg->method);
+      LOG("%6P %16s %s %.*s %.*s %.*s %#.*s\n", ipbuf, method,
           msg->uri.b - msg->uri.a, inbuf + msg->uri.a,
           HeaderLength(kHttpCfIpcountry), HeaderData(kHttpCfIpcountry),
           HeaderLength(kHttpSecChUaPlatform), HeaderData(kHttpSecChUaPlatform),
diff --git a/test/BUILD.mk b/test/BUILD.mk
index db75515cc..63d89de46 100644
--- a/test/BUILD.mk
+++ b/test/BUILD.mk
@@ -4,7 +4,8 @@
 .PHONY:		o/$(MODE)/test
 o/$(MODE)/test:	o/$(MODE)/test/dsp	\
 		o/$(MODE)/test/libc	\
-		o/$(MODE)/test/net	\
 		o/$(MODE)/test/libcxx	\
+		o/$(MODE)/test/math	\
+		o/$(MODE)/test/net	\
 		o/$(MODE)/test/posix	\
 		o/$(MODE)/test/tool
diff --git a/test/libc/calls/dup_test.c b/test/libc/calls/dup_test.c
index c8cb7098a..cad66f18e 100644
--- a/test/libc/calls/dup_test.c
+++ b/test/libc/calls/dup_test.c
@@ -40,7 +40,8 @@ void SetUpOnce(void) {
   testlib_enable_tmp_setup_teardown();
 }
 
-static textstartup void TestInit(int argc, char **argv) {
+__attribute__((__constructor__)) static textstartup void TestInit(int argc,
+                                                                  char **argv) {
   int fd;
   if (argc == 2 && !strcmp(argv[1], "boop")) {
     if ((fd = open("/dev/null", O_RDWR | O_CLOEXEC)) == 3) {
@@ -51,8 +52,6 @@ static textstartup void TestInit(int argc, char **argv) {
   }
 }
 
-const void *const TestCtor[] initarray = {TestInit};
-
 TEST(dup, ebadf) {
   ASSERT_SYS(EBADF, -1, dup(-1));
   ASSERT_SYS(EBADF, -1, dup2(-1, 0));
diff --git a/test/libc/calls/madvise_test.c b/test/libc/calls/madvise_test.c
index a5de04344..d1eb03b5e 100644
--- a/test/libc/calls/madvise_test.c
+++ b/test/libc/calls/madvise_test.c
@@ -20,6 +20,7 @@
 #include "libc/dce.h"
 #include "libc/errno.h"
 #include "libc/runtime/runtime.h"
+#include "libc/sysv/consts/auxv.h"
 #include "libc/sysv/consts/madv.h"
 #include "libc/sysv/consts/map.h"
 #include "libc/sysv/consts/o.h"
@@ -63,7 +64,9 @@ TEST(madvise, subPages) {
   char *p;
   ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE, PROT_READ | PROT_WRITE,
                                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)));
-  ASSERT_SYS(0, 0, madvise(p + 4096, FRAMESIZE - 4096, MADV_WILLNEED));
+  ASSERT_SYS(0, 0,
+             madvise(p + getauxval(AT_PAGESZ), FRAMESIZE - getauxval(AT_PAGESZ),
+                     MADV_WILLNEED));
   ASSERT_SYS(0, 0, munmap(p, FRAMESIZE));
 }
 
diff --git a/test/libc/calls/pledge_test.c b/test/libc/calls/pledge_test.c
index 36cdfe0e7..a9a160392 100644
--- a/test/libc/calls/pledge_test.c
+++ b/test/libc/calls/pledge_test.c
@@ -550,6 +550,7 @@ TEST(pledge, execpromises_reducesAtExecOnLinux) {
 }
 
 TEST(pledge_openbsd, execpromisesIsNull_letsItDoAnything) {
+  if (IsOpenbsd()) return;  // mimmutable() ugh
   if (!IsOpenbsd()) return;
   int ws, pid;
   ASSERT_NE(-1, (pid = fork()));
@@ -566,6 +567,7 @@ TEST(pledge_openbsd, execpromisesIsNull_letsItDoAnything) {
 }
 
 TEST(pledge_openbsd, execpromisesIsSuperset_letsItDoAnything) {
+  if (IsOpenbsd()) return;  // mimmutable() ugh
   if (!IsOpenbsd()) return;
   int ws, pid;
   ASSERT_NE(-1, (pid = fork()));
@@ -585,6 +587,7 @@ TEST(pledge_linux, execpromisesIsSuperset_notPossible) {
 }
 
 TEST(pledge_openbsd, execpromises_notok) {
+  if (IsOpenbsd()) return;  // mimmutable() ugh
   int ws, pid;
   ASSERT_NE(-1, (pid = fork()));
   if (!pid) {
diff --git a/test/libc/calls/readlinkat_test.c b/test/libc/calls/readlinkat_test.c
index 8da142e5f..0d3d5a4a3 100644
--- a/test/libc/calls/readlinkat_test.c
+++ b/test/libc/calls/readlinkat_test.c
@@ -23,7 +23,6 @@
 #include "libc/limits.h"
 #include "libc/log/log.h"
 #include "libc/mem/gc.h"
-#include "libc/mem/gc.h"
 #include "libc/runtime/symbols.internal.h"
 #include "libc/str/str.h"
 #include "libc/sysv/consts/at.h"
diff --git a/test/libc/calls/unveil_test.c b/test/libc/calls/unveil_test.c
index 1c5767a79..b252fd8ce 100644
--- a/test/libc/calls/unveil_test.c
+++ b/test/libc/calls/unveil_test.c
@@ -94,6 +94,7 @@ TEST(unveil, api_differences) {
 }
 
 TEST(unveil, rx_readOnlyPreexistingExecutable_worksFine) {
+  if (IsOpenbsd()) return;  // TOOD(jart): why pledge violation?
   SPAWN(fork);
   ASSERT_SYS(0, 0, mkdir("folder", 0755));
   testlib_extract("/zip/life.elf", "folder/life.elf", 0755);
@@ -150,6 +151,7 @@ TEST(unveil, rwc_createExecutableFile_isAllowedButCantBeRun) {
 }
 
 TEST(unveil, rwcx_createExecutableFile_canAlsoBeRun) {
+  if (IsOpenbsd()) return;  // TOOD(jart): why pledge violation?
   SPAWN(fork);
   ASSERT_SYS(0, 0, mkdir("folder", 0755));
   ASSERT_SYS(0, 0, unveil("folder", "rwcx"));
diff --git a/test/libc/fmt/atoi_test.c b/test/libc/fmt/atoi_test.c
index d345c8c22..429a077d3 100644
--- a/test/libc/fmt/atoi_test.c
+++ b/test/libc/fmt/atoi_test.c
@@ -46,12 +46,8 @@ TEST(atoi, test) {
   EXPECT_EQ(-1, atoi("-1"));
   EXPECT_EQ(-9, atoi("-9"));
   EXPECT_EQ(-31337, atoi("-31337"));
-  EXPECT_EQ(INT_MIN, atoi("-2147483648"));
-  EXPECT_EQ(INT_MAX, atoi("2147483647"));
-  EXPECT_EQ(INT_MIN, atoi("-2147483649"));
-  EXPECT_EQ(INT_MAX, atoi("2147483648"));
-  EXPECT_EQ(INT_MIN, atoi("-2147483658"));
-  EXPECT_EQ(INT_MAX, atoi("2147483657"));
+  EXPECT_EQ(-2147483648, atoi("-2147483648"));
+  EXPECT_EQ(2147483647, atoi("2147483647"));
   EXPECT_EQ(123, atoi(" 123"));
   EXPECT_EQ(123, atoi(" \t123"));
   EXPECT_EQ(+123, atoi(" +123"));
@@ -63,21 +59,21 @@ TEST(atoi, test) {
 }
 
 TEST(atoi, testWithinLimit_doesntChangeErrno) {
-  errno = 7;
+  errno = 666;
   EXPECT_EQ(INT_MAX, atoi("2147483647"));
-  EXPECT_EQ(7, errno);
-  errno = 7;
+  EXPECT_EQ(666, errno);
+  errno = 666;
   EXPECT_EQ(INT_MIN, atoi("-2147483648"));
-  EXPECT_EQ(7, errno);
+  EXPECT_EQ(666, errno);
 }
 
-TEST(atoi, testOutsideLimit_saturatesAndSetsErangeErrno) {
-  errno = 0;
-  EXPECT_EQ(INT_MAX, atoi("2147483648"));
-  EXPECT_EQ(ERANGE, errno);
-  errno = 0;
-  EXPECT_EQ(INT_MIN, atoi("-2147483649"));
-  EXPECT_EQ(ERANGE, errno);
+TEST(atol, testWithinLimit_doesntChangeErrno) {
+  errno = 666;
+  EXPECT_EQ(INT_MAX, atol("2147483647"));
+  EXPECT_EQ(666, errno);
+  errno = 666;
+  EXPECT_EQ(INT_MIN, atol("-2147483648"));
+  EXPECT_EQ(666, errno);
 }
 
 TEST(atol, test) {
@@ -97,10 +93,6 @@ TEST(atol, test) {
   EXPECT_EQ(-31337, atol("-31337"));
   EXPECT_EQ(LONG_MIN, atol("-9223372036854775808"));
   EXPECT_EQ(LONG_MAX, atol("9223372036854775807"));
-  EXPECT_EQ(LONG_MIN, atol("-9223372036854775809"));
-  EXPECT_EQ(LONG_MAX, atol("9223372036854775808"));
-  EXPECT_EQ(LONG_MIN, atol("-9223372036854775818"));
-  EXPECT_EQ(LONG_MAX, atol("9223372036854775817"));
   EXPECT_EQ(123, atol(" 123"));
   EXPECT_EQ(123, atol(" \t123"));
   EXPECT_EQ(-123, atol(" -123"));
@@ -571,6 +563,8 @@ TEST(strtol, invalidBin2) {
 BENCH(atoi, bench) {
   EZBENCH2("atoi 10⁸", donothing,
            __expropriate(atoi(__veil("r", "100000000"))));
+  EZBENCH2("atol 10⁸", donothing,
+           __expropriate(atol(__veil("r", "100000000"))));
   EZBENCH2("strtol 10⁸", donothing,
            __expropriate(strtol(__veil("r", "100000000"), 0, 10)));
   EZBENCH2("strtoul 10⁸", donothing,
diff --git a/test/libc/intrin/kprintf_test.c b/test/libc/intrin/kprintf_test.c
index a096c888d..f091d48b0 100644
--- a/test/libc/intrin/kprintf_test.c
+++ b/test/libc/intrin/kprintf_test.c
@@ -20,13 +20,13 @@
 #include "libc/calls/calls.h"
 #include "libc/dce.h"
 #include "libc/errno.h"
-#include "libc/serialize.h"
 #include "libc/limits.h"
 #include "libc/log/libfatal.internal.h"
 #include "libc/macros.internal.h"
 #include "libc/runtime/memtrack.internal.h"
 #include "libc/runtime/runtime.h"
 #include "libc/runtime/symbols.internal.h"
+#include "libc/serialize.h"
 #include "libc/stdio/rand.h"
 #include "libc/stdio/stdio.h"
 #include "libc/str/str.h"
diff --git a/test/libc/mem/realpath_test.c b/test/libc/mem/realpath_test.c
index e24060045..a4796d091 100644
--- a/test/libc/mem/realpath_test.c
+++ b/test/libc/mem/realpath_test.c
@@ -19,6 +19,7 @@
 #include "libc/calls/calls.h"
 #include "libc/dce.h"
 #include "libc/errno.h"
+#include "libc/intrin/strace.internal.h"
 #include "libc/mem/gc.h"
 #include "libc/mem/mem.h"
 #include "libc/runtime/runtime.h"
@@ -45,6 +46,8 @@ TEST(realpath, test2) {
 }
 
 TEST(realpath, test3) {
+  strace_enabled(+1);
+  ASSERT_TRUE(fileexists("conftest.a"));
   char *name = gc(realpath("conftest.l/../conftest.a", NULL));
   if (IsWindows()) {
     // WIN32 acts as a flat namespace, rather than linear inode crawl.
@@ -54,6 +57,7 @@ TEST(realpath, test3) {
     // Every other OS FS is a UNIX inode crawl.
     ASSERT_SYS(ENOTDIR, NULL, name);
   }
+  strace_enabled(-1);
 }
 
 TEST(realpath, test4) {
diff --git a/test/libc/nexgen32e/gclongjmp_test.c b/test/libc/nexgen32e/gclongjmp_test.c
index a31f4a786..c567dda51 100644
--- a/test/libc/nexgen32e/gclongjmp_test.c
+++ b/test/libc/nexgen32e/gclongjmp_test.c
@@ -96,6 +96,10 @@ TEST(gc, torture) {
   for (i = 0; i < n; ++i) EXPECT_SYS(0, 0, pthread_join(t[i], 0));
 }
 
+#if defined(__GNUC__) && __GNUC__ >= 12
+#pragma GCC diagnostic ignored "-Winfinite-recursion"
+#endif
+
 void crawl2(jmp_buf jb, const char *path) {
   if (!strcmp(path, "/") || !strcmp(path, ".")) gclongjmp(jb, 1);
   crawl2(jb, gc(xdirname(path)));
diff --git a/test/libc/proc/posix_spawn_test.c b/test/libc/proc/posix_spawn_test.c
index d4fe8aa7f..046710146 100644
--- a/test/libc/proc/posix_spawn_test.c
+++ b/test/libc/proc/posix_spawn_test.c
@@ -36,7 +36,6 @@
 #include "libc/intrin/safemacros.internal.h"
 #include "libc/limits.h"
 #include "libc/mem/gc.h"
-#include "libc/mem/gc.h"
 #include "libc/mem/mem.h"
 #include "libc/proc/proc.internal.h"
 #include "libc/runtime/internal.h"
@@ -130,6 +129,7 @@ TEST(posix_spawn, ape) {
 }
 
 TEST(posix_spawn, elf) {
+  if (IsOpenbsd()) return;  // mimmutable() ugh
   if (IsXnu() || IsWindows() || IsMetal()) return;
   int ws, pid;
   char *prog = "./life.elf";  // assimilate -bcef
diff --git a/test/libc/release/BUILD.mk b/test/libc/release/BUILD.mk
index e98174d41..85da0a377 100644
--- a/test/libc/release/BUILD.mk
+++ b/test/libc/release/BUILD.mk
@@ -38,10 +38,12 @@ o/$(MODE)/test/libc/release/smoke.o:					\
 		-nostdinc						\
 		-D_COSMO_SOURCE						\
 		-Wl,--gc-sections					\
+		-z noexecstack						\
 		-fno-omit-frame-pointer					\
 		-include o/cosmopolitan.h				\
 		-Wl,-z,max-page-size=0x1000				\
 		-Wl,-z,common-page-size=0x1000				\
+		-Wl,-z,noexecstack					\
 		$<
 
 o/$(MODE)/test/libc/release/smoke.com.dbg:				\
@@ -55,6 +57,7 @@ o/$(MODE)/test/libc/release/smoke.com.dbg:				\
 		-no-pie							\
 		-nostdlib						\
 		--gc-sections						\
+		-z noexecstack						\
 		-z max-page-size=0x1000					\
 		-z common-page-size=0x1000				\
 		-T o/$(MODE)/ape/ape.lds				\
@@ -75,6 +78,7 @@ o/$(MODE)/test/libc/release/smoke-nms.com.dbg:				\
 		-no-pie							\
 		-nostdlib						\
 		--gc-sections						\
+		-z noexecstack						\
 		-z max-page-size=0x1000					\
 		-z common-page-size=0x1000				\
 		-T o/$(MODE)/ape/ape.lds				\
@@ -96,6 +100,7 @@ o/$(MODE)/test/libc/release/smoke-chibicc.com.dbg:			\
 		-no-pie							\
 		-nostdlib						\
 		--gc-sections						\
+		-z noexecstack						\
 		-z max-page-size=0x1000					\
 		-z common-page-size=0x1000				\
 		-T o/$(MODE)/ape/ape.lds				\
@@ -138,6 +143,7 @@ o/$(MODE)/test/libc/release/smokecxx.com.dbg:				\
 		-no-pie							\
 		-nostdlib						\
 		--gc-sections						\
+		-z noexecstack						\
 		-z max-page-size=0x1000					\
 		-z common-page-size=0x1000				\
 		-T o/$(MODE)/ape/ape.lds				\
@@ -158,6 +164,7 @@ o/$(MODE)/test/libc/release/smokecxx.o:					\
 		-fno-pie						\
 		-nostdinc						\
 		-Wl,--gc-sections					\
+		-Wl,-z,noexecstack					\
 		-fno-omit-frame-pointer					\
 		-z max-page-size=0x1000					\
 		-z common-page-size=0x1000				\
@@ -175,6 +182,7 @@ o/$(MODE)/test/libc/release/smokeansi.com.dbg:				\
 		-no-pie							\
 		-nostdlib						\
 		--gc-sections						\
+		-z noexecstack						\
 		-z max-page-size=0x1000					\
 		-z common-page-size=0x1000				\
 		-T o/$(MODE)/ape/ape.lds				\
@@ -198,6 +206,7 @@ o/$(MODE)/test/libc/release/smokeansi.o:				\
 		-nostdinc						\
 		-D_COSMO_SOURCE						\
 		-Wl,--gc-sections					\
+		-Wl,-z,noexecstack					\
 		-fno-omit-frame-pointer					\
 		-include o/cosmopolitan.h				\
 		-Wl,-z,max-page-size=0x1000				\
diff --git a/test/libc/runtime/initorder_test.c b/test/libc/runtime/initorder_test.c
new file mode 100644
index 000000000..d8f5a70a5
--- /dev/null
+++ b/test/libc/runtime/initorder_test.c
@@ -0,0 +1,260 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2024 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/stdio/stdio.h"
+
+// orders of events
+//  -4         pc() bios
+//  -4  ApeLoader() unix
+//  -3    WinMain() win32
+//  -3     _start() unix
+//  -2      cosmo() x86
+//  -2     cosmo2() aarch64
+//  -1      _init() x86
+//   0   ASMPREINIT ok
+//   1   ASMINIT105 ok
+//   2      CTOR110 ok
+//   3   ASMINIT115 ok
+//   4      CTOR120 ok
+//   5     CTOR1000 ok
+//   6    CTOR10000 ok
+//   7    CTOR40000 ok
+//   8    CTOR60000 ok
+//   9   ASMCTOR113 ok
+//  10   ASMCTOR103 ok
+//  11    CTOR65500 ok
+//  12    CTOR65534 ok
+//  13         CTOR ok
+//  14    CTOR65535 ok
+//  15      ASMINIT ok
+//  16      ASMCTOR ok
+//   n       main()
+
+#define N 18
+
+#define COSMOINIT -1
+
+#define CTOR      1
+#define CTOR110   110
+#define CTOR120   120
+#define CTOR1000  1000
+#define CTOR10000 10000
+#define CTOR40000 40000
+#define CTOR60000 60000
+#define CTOR65500 65500
+#define CTOR65534 65534
+#define CTOR65535 65535
+
+#define ASMINIT    2
+#define ASMINIT105 105
+#define ASMINIT115 115
+
+#define ASMCTOR    3
+#define ASMCTOR103 103
+#define ASMCTOR113 113
+
+#define ASMPREINIT    4
+#define ASMPREINIT107 107
+#define ASMPREINIT117 117
+
+#ifdef __x86__
+#define GUFF " #"
+#else
+#define GUFF " //"
+#endif
+
+int event;
+int events[N];
+
+const char *getname(int x) {
+  switch (x) {
+    case 0:
+      return "0";
+    case CTOR:
+      return "CTOR";
+    case CTOR110:
+      return "CTOR110";
+    case CTOR120:
+      return "CTOR120";
+    case CTOR1000:
+      return "CTOR1000";
+    case CTOR10000:
+      return "CTOR10000";
+    case CTOR40000:
+      return "CTOR40000";
+    case CTOR60000:
+      return "CTOR60000";
+    case CTOR65500:
+      return "CTOR65500";
+    case CTOR65534:
+      return "CTOR65534";
+    case CTOR65535:
+      return "CTOR65535";
+    case ASMINIT:
+      return "ASMINIT";
+    case ASMINIT105:
+      return "ASMINIT105";
+    case ASMINIT115:
+      return "ASMINIT115";
+    case ASMCTOR:
+      return "ASMCTOR";
+    case ASMCTOR103:
+      return "ASMCTOR103";
+    case ASMCTOR113:
+      return "ASMCTOR113";
+    case ASMPREINIT:
+      return "ASMPREINIT";
+    case ASMPREINIT107:
+      return "ASMPREINIT107";
+    case ASMPREINIT117:
+      return "ASMPREINIT117";
+    default:
+      return "???";
+  }
+}
+
+__attribute__((__constructor__)) void ctor(void) {
+  events[event++] = CTOR;
+}
+
+__attribute__((__constructor__(110))) void ctor110(void) {
+  events[event++] = CTOR110;
+}
+
+__attribute__((__constructor__(120))) void ctor120(void) {
+  events[event++] = CTOR120;
+}
+
+__attribute__((__constructor__(1000))) void ctor1000(void) {
+  events[event++] = CTOR1000;
+}
+
+__attribute__((__constructor__(10000))) void ctor10000(void) {
+  events[event++] = CTOR10000;
+}
+
+__attribute__((__constructor__(40000))) void ctor40000(void) {
+  events[event++] = CTOR40000;
+}
+
+__attribute__((__constructor__(60000))) void ctor60000(void) {
+  events[event++] = CTOR60000;
+}
+
+__attribute__((__constructor__(65500))) void ctor65500(void) {
+  events[event++] = CTOR65500;
+}
+
+__attribute__((__constructor__(65534))) void ctor65534(void) {
+  events[event++] = CTOR65534;
+}
+
+__attribute__((__constructor__(65535))) void ctor65535(void) {
+  events[event++] = CTOR65535;
+}
+
+void asminit(void) {
+  events[event++] = ASMINIT;
+}
+__attribute__((__section__(
+    ".init_array,\"aw\",@init_array" GUFF))) void *const kAsminit[] = {asminit};
+
+void asminit105(void) {
+  events[event++] = ASMINIT105;
+}
+__attribute__((__section__(
+    ".init_array.105,\"aw\",@init_array" GUFF))) void *const kAsminit105[] = {
+    asminit105};
+
+void asminit115(void) {
+  events[event++] = ASMINIT115;
+}
+__attribute__((__section__(
+    ".init_array.115,\"aw\",@init_array" GUFF))) void *const kAsminit115[] = {
+    asminit115};
+
+void asmpreinit(void) {
+  events[event++] = ASMPREINIT;
+}
+__attribute__((__section__(
+    ".preinit_array,\"a\",@preinit_array" GUFF))) void *const kAsmpreinit[] = {
+    asmpreinit};
+
+void asmpreinit107(void) {
+  events[event++] = ASMPREINIT107;
+}
+__attribute__((
+    __section__(".preinit_array.107,\"a\",@preinit_array" GUFF))) void
+    *const kAsmpreinit107[] = {asmpreinit107};
+
+void asmctor(void) {
+  events[event++] = ASMCTOR;
+}
+__attribute__((__section__(
+    ".ctors,\"aw\",@init_array" GUFF))) void *const kAsmctor[] = {asmctor};
+
+void asmctor103(void) {
+  events[event++] = ASMCTOR103;
+}
+__attribute__((__section__(
+    ".ctors.103,\"aw\",@init_array" GUFF))) void *const kAsmctor103[] = {
+    asmctor103};
+
+void asmctor113(void) {
+  events[event++] = ASMCTOR113;
+}
+__attribute__((__section__(
+    ".ctors.113,\"aw\",@init_array" GUFF))) void *const kAsmctor113[] = {
+    asmctor113};
+
+const int want[N] = {
+    ASMPREINIT,  //
+    ASMINIT105,  //
+    CTOR110,     //
+    ASMINIT115,  //
+    CTOR120,     //
+    CTOR1000,    //
+    CTOR10000,   //
+    CTOR40000,   //
+    CTOR60000,   //
+    ASMCTOR113,  //
+    ASMCTOR103,  //
+    CTOR65500,   //
+    CTOR65534,   //
+    CTOR,        //
+    CTOR65535,   //
+    ASMINIT,     //
+    ASMCTOR,     //
+};
+
+int main() {
+  int fails = 0;
+  printf("\nevents:\n");
+  for (int i = 0; i < N; ++i) {
+    printf("%3d %12s ", i, getname(events[i]));
+    if (events[i] == want[i]) {
+      printf("ok");
+    } else {
+      printf("should be %s", getname(want[i]));
+      ++fails;
+    }
+    printf("\n");
+  }
+  printf("\n");
+  return fails;
+}
diff --git a/test/libc/runtime/tls_test.c b/test/libc/runtime/tls_test.c
index 66fbf9679..ef8f086cc 100644
--- a/test/libc/runtime/tls_test.c
+++ b/test/libc/runtime/tls_test.c
@@ -31,6 +31,8 @@ _Thread_local long y[1] = {40};
 _Alignas(A) _Thread_local long a;
 
 dontubsan void *Worker(void *arg) {
+  ASSERT_EQ(A, _Alignof(a));
+  ASSERT_EQ(0, (uintptr_t)&a & (_Alignof(a) - 1));
   ASSERT_EQ(42, x + y[0] + z);
   ASSERT_EQ(0, (intptr_t)&a & (A - 1));
   if (IsAsan()) {
@@ -41,6 +43,7 @@ dontubsan void *Worker(void *arg) {
 
 TEST(tls, test) {
   ASSERT_EQ(A, _Alignof(a));
+  ASSERT_EQ(0, (uintptr_t)&a & (_Alignof(a) - 1));
   ASSERT_EQ(0, sizeof(struct CosmoTib) % A);
   ASSERT_EQ(0, (intptr_t)__get_tls() & (A - 1));
   EXPECT_EQ(2, z);
diff --git a/test/libc/runtime/zipos_test.c b/test/libc/runtime/zipos_test.c
index c172a737d..621c3bbea 100644
--- a/test/libc/runtime/zipos_test.c
+++ b/test/libc/runtime/zipos_test.c
@@ -21,7 +21,6 @@
 #include "libc/errno.h"
 #include "libc/limits.h"
 #include "libc/mem/gc.h"
-#include "libc/mem/gc.h"
 #include "libc/mem/mem.h"
 #include "libc/runtime/runtime.h"
 #include "libc/runtime/zipos.internal.h"
diff --git a/libc/tinymath/__math_invalidl.c b/test/libc/stdio/fscanf_test.c
similarity index 79%
rename from libc/tinymath/__math_invalidl.c
rename to test/libc/stdio/fscanf_test.c
index 9c4239581..accfb4bdb 100644
--- a/libc/tinymath/__math_invalidl.c
+++ b/test/libc/stdio/fscanf_test.c
@@ -1,7 +1,7 @@
 /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
 │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2023 Justine Alexandra Roberts Tunney                              │
+│ Copyright 2024 Ivan Komarov                                                  │
 │                                                                              │
 │ Permission to use, copy, modify, and/or distribute this software for         │
 │ any purpose with or without fee is hereby granted, provided that the         │
@@ -17,10 +17,17 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
-#include "libc/tinymath/internal.h"
+#include "libc/stdio/stdio.h"
+#include "libc/testlib/testlib.h"
 
-#if LDBL_MANT_DIG != DBL_MANT_DIG
-long double __math_invalidl(long double x) {
-  return (x - x) / (x - x);
+TEST(fscanf, test_readAfterFloat) {
+  FILE *f = fmemopen("infDEAD-.125e-2BEEF", 19, "r");
+  float f1 = 666.666f, f2 = f1;
+  int i1 = 666, i2 = i1;
+  EXPECT_EQ(4, fscanf(f, "%f%x%f%x", &f1, &i1, &f2, &i2));
+  EXPECT_TRUE(isinf(f1));
+  EXPECT_EQ(0xDEAD, i1);
+  EXPECT_EQ(-0.125e-2f, f2);
+  EXPECT_EQ(0xBEEF, i2);
+  fclose(f);
 }
-#endif
diff --git a/test/libc/stdio/sscanf_test.c b/test/libc/stdio/sscanf_test.c
index 571617715..40e35f1c4 100644
--- a/test/libc/stdio/sscanf_test.c
+++ b/test/libc/stdio/sscanf_test.c
@@ -69,9 +69,17 @@ TEST(sscanf, testNonDirectiveCharacterMatching) {
 }
 
 TEST(sscanf, testCharacter) {
-  char c = 0;
-  EXPECT_EQ(1, sscanf("a", "%c", &c));
-  EXPECT_EQ('a', c);
+  char c1 = 0, c2 = c1, c3 = c2, c4 = c3;
+  char s1[32] = {0}, s2[32] = {0};
+  EXPECT_EQ(1, sscanf("a", "%c", &c1));
+  EXPECT_EQ(2, sscanf("ab", "%c %c %c", &c2, &c3, &c4));
+  EXPECT_EQ(1, sscanf("abcde", "%5c", s1));
+  EXPECT_EQ(0, sscanf("abcd", "%5c", s2));
+
+  EXPECT_EQ('a', c1);
+  EXPECT_EQ('a', c2);
+  EXPECT_EQ('b', c3);
+  EXPECT_STREQ("abcde", &s1[0]);
 }
 
 TEST(sscanf, testStringBuffer) {
@@ -394,6 +402,20 @@ TEST(sscanf, floating_point_infinity_double_precision) {
   EXPECT_TRUE(isinf(g));
 }
 
+TEST(sscanf, floating_point_invalid) {
+  float dummy;
+  EXPECT_EQ(0, sscanf("junk", "%f", &dummy));
+  EXPECT_EQ(0, sscanf("e9", "%f", &dummy));
+  EXPECT_EQ(0, sscanf("-e9", "%f", &dummy));
+}
+
+TEST(sscanf, floating_point_invalid_double_precision) {
+  double dummy;
+  EXPECT_EQ(0, sscanf("junk", "%lf", &dummy));
+  EXPECT_EQ(0, sscanf("e9", "%lf", &dummy));
+  EXPECT_EQ(0, sscanf("-e9", "%lf", &dummy));
+}
+
 TEST(sscanf, floating_point_documentation_examples) {
   float a = 666.666f, b = a, c = b, d = c, e = d, f = e, g = f, h = g, i = h,
         j = i;
@@ -401,7 +423,7 @@ TEST(sscanf, floating_point_documentation_examples) {
   EXPECT_EQ(2, sscanf("111.11 -2.22", "%f %f", &a, &b));
   EXPECT_EQ(3, sscanf("Nan nan(2) inF", "%f %f %f", &c, &d, &e));
   EXPECT_EQ(
-      5, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
+      2, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
                 "%f %f %f %f %f", &f, &g, &h, &i, &j));
 
   EXPECT_EQ(111.11f, a);
@@ -411,9 +433,6 @@ TEST(sscanf, floating_point_documentation_examples) {
   EXPECT_TRUE(isinf(e));
   EXPECT_EQ(0X1.BC70A3D70A3D7P+6f, f);
   EXPECT_TRUE(isinf(g));
-  EXPECT_EQ(-0.0000000123f, h);
-  EXPECT_EQ(.0f, i);
-  EXPECT_EQ(.0f, j);
 }
 
 TEST(sscanf, floating_point_documentation_examples_double_precision) {
@@ -423,7 +442,7 @@ TEST(sscanf, floating_point_documentation_examples_double_precision) {
   EXPECT_EQ(2, sscanf("111.11 -2.22", "%lf %lf", &a, &b));
   EXPECT_EQ(3, sscanf("Nan nan(2) inF", "%lf %lf %lf", &c, &d, &e));
   EXPECT_EQ(
-      5, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
+      2, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
                 "%lf %lf %lf %lf %lf", &f, &g, &h, &i, &j));
 
   EXPECT_EQ(111.11, a);
@@ -433,9 +452,6 @@ TEST(sscanf, floating_point_documentation_examples_double_precision) {
   EXPECT_TRUE(isinf(e));
   EXPECT_EQ(0X1.BC70A3D70A3D7P+6, f);
   EXPECT_TRUE(isinf(g));
-  EXPECT_EQ(-0.0000000123, h);
-  EXPECT_EQ(.0, i);
-  EXPECT_EQ(.0, j);
 }
 
 TEST(sscanf, luplus) {
diff --git a/test/libc/str/blake2_test.c b/test/libc/str/blake2_test.c
index 43ee42aa8..0b31b16a1 100644
--- a/test/libc/str/blake2_test.c
+++ b/test/libc/str/blake2_test.c
@@ -16,10 +16,10 @@
 │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/str/blake2.h"
 #include "libc/assert.h"
 #include "libc/mem/mem.h"
 #include "libc/stdio/rand.h"
-#include "libc/str/blake2.h"
 #include "libc/str/str.h"
 #include "libc/str/tab.internal.h"
 #include "libc/testlib/ezbench.h"
@@ -40,7 +40,7 @@ uint8_t *HEXBLAKE2B256(const char *s) {
   n = strlen(s);
   assert(!(n & 1));
   n /= 2;
-  p = malloc(n);
+  p = malloc(n + 1);
   for (i = 0; i < n; ++i) {
     a = kHexToInt[s[i * 2 + 0] & 255];
     b = kHexToInt[s[i * 2 + 1] & 255];
diff --git a/test/libc/time/strftime_test.c b/test/libc/time/strftime_test.c
index 2e37463c0..275c0fb7c 100644
--- a/test/libc/time/strftime_test.c
+++ b/test/libc/time/strftime_test.c
@@ -23,10 +23,9 @@
 #include "libc/time/struct/tm.h"
 #include "libc/time/time.h"
 
-textstartup static void strftime_test_init(void) {
+__attribute__((__constructor__)) void init(void) {
   setenv("TZ", "GST", true);
 }
-const void *const strftime_test_ctor[] initarray = {strftime_test_init};
 
 char *FormatTime(const char *fmt, struct tm *tm) {
   static char buf[64];
diff --git a/test/libc/x/xstrcat_test.c b/test/libc/x/xstrcat_test.c
index 3821a60d6..733736de4 100644
--- a/test/libc/x/xstrcat_test.c
+++ b/test/libc/x/xstrcat_test.c
@@ -35,6 +35,10 @@ TEST(xstrcat, pointerAbuse) {
   EXPECT_STREQ("hi there\n", gc(xstrcat("hi", ' ', "there", '\n')));
 }
 
+#if defined(__GNUC__) && __GNUC__ >= 12
+#pragma GCC diagnostic ignored "-Wuse-after-free"
+#endif
+
 int hard_static(void) {
   char *b, *p;
   p = b = malloc(16);
diff --git a/test/libcxx/BUILD.mk b/test/libcxx/BUILD.mk
index c646ddd56..836e6da8c 100644
--- a/test/libcxx/BUILD.mk
+++ b/test/libcxx/BUILD.mk
@@ -14,9 +14,13 @@ TEST_LIBCXX_TESTS = $(TEST_LIBCXX_COMS:%=%.ok)
 TEST_LIBCXX_DIRECTDEPS =				\
 	LIBC_CALLS					\
 	LIBC_INTRIN					\
+	LIBC_LOG					\
 	LIBC_NEXGEN32E					\
 	LIBC_RUNTIME					\
 	LIBC_STDIO					\
+	LIBC_SYSV					\
+	LIBC_THREAD					\
+	LIBC_TINYMATH					\
 	THIRD_PARTY_LIBCXX				\
 	THIRD_PARTY_OPENMP
 
@@ -37,7 +41,8 @@ o/$(MODE)/test/libcxx/%.com.dbg:			\
 
 $(TEST_LIBCXX_OBJS): private CCFLAGS += -fexceptions -frtti
 
-o/$(MODE)/test/libcxx/openmp_test.o: private CXXFLAGS += -fopenmp -O3
+o/$(MODE)/test/libcxx/openmp_test.o: private CXXFLAGS += -fopenmp
+o/$(MODE)/test/libcxx/openmp_test.com.runs: private QUOTA += -C100
 
 .PHONY: o/$(MODE)/test/libcxx
 o/$(MODE)/test/libcxx:					\
diff --git a/test/libcxx/openmp_test.cc b/test/libcxx/openmp_test.cc
index ecf5ef3c2..ecea814ea 100644
--- a/test/libcxx/openmp_test.cc
+++ b/test/libcxx/openmp_test.cc
@@ -16,143 +16,331 @@
 │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/calls/calls.h"
-#include "libc/calls/struct/timespec.h"
-#include "libc/fmt/itoa.h"
-#include "libc/inttypes.h"
-#include "libc/runtime/runtime.h"
-#include "libc/stdio/stdio.h"
-#include "libc/str/str.h"
-#include "libc/sysv/consts/clock.h"
-#include "third_party/double-conversion/double-to-string.h"
-#include "third_party/double-conversion/utils.h"
-#include "third_party/openmp/omp.h"
+#include <algorithm>
+#include <atomic>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
+#include <ctime>
+#include "libc/stdio/rand.h"
 
-#ifndef __FAST_MATH__
-#define FLAWLESS 0
-#else
-#define FLAWLESS 1e-05
-#endif
+#define PRECISION 2e-6
+#define LV1DCACHE 49152
+#define THRESHOLD 3000000
 
 #if defined(__OPTIMIZE__) && !defined(__SANITIZE_ADDRESS__)
-#define ITERATIONS 10
+#define ITERATIONS 5
 #else
 #define ITERATIONS 1
 #endif
 
-// m×n → (m×n)ᵀ
-template <typename T>
-void transpose(long m, long n, const T *A, long sa, T *B, long sb) {
-#pragma omp parallel for collapse(2)
-  for (long i = 0; i < m; ++i) {
+#define OPTIMIZED __attribute__((__optimize__("-O3,-ffast-math")))
+#define PORTABLE                         \
+  __target_clones("arch=znver4,"         \
+                  "arch=znver3,"         \
+                  "arch=sapphirerapids," \
+                  "arch=alderlake,"      \
+                  "arch=rocketlake,"     \
+                  "arch=cooperlake,"     \
+                  "arch=tigerlake,"      \
+                  "arch=cascadelake,"    \
+                  "arch=skylake-avx512," \
+                  "arch=skylake,"        \
+                  "arch=znver1,"         \
+                  "arch=tremont,"        \
+                  "fma,"                 \
+                  "avx")
+
+static bool is_self_testing;
+
+// m×n → n×m
+template <typename TA, typename TB>
+void transpose(long m, long n, const TA *A, long lda, TB *B, long ldb) {
+#pragma omp parallel for collapse(2) if (m * n > THRESHOLD)
+  for (long i = 0; i < m; ++i)
     for (long j = 0; j < n; ++j) {
-      B[sb * j + i] = A[sa * i + j];
+      B[ldb * j + i] = A[lda * i + j];
     }
-  }
 }
 
 // m×k * k×n → m×n
-template <typename T>
-void matmul(long m, long n, long k, const T *A, long sa, const T *B, long sb,
-            T *C, long sc) {
-#pragma omp parallel for collapse(2)
-  for (long i = 0; i < m; ++i) {
+// k×m * k×n → m×n if aᵀ
+// m×k * n×k → m×n if bᵀ
+// k×m * n×k → m×n if aᵀ and bᵀ
+template <typename TC, typename TA, typename TB>
+void dgemm(bool aᵀ, bool bᵀ, long m, long n, long k, float α, const TA *A,
+           long lda, const TB *B, long ldb, float β, TC *C, long ldc) {
+#pragma omp parallel for collapse(2) if (m * n * k > THRESHOLD)
+  for (long i = 0; i < m; ++i)
     for (long j = 0; j < n; ++j) {
-      T sum = 0;
-      for (long l = 0; l < k; ++l) {
-        sum += A[sa * i + l] * B[sb * l + j];
-      }
-      C[sc * i + j] = sum;
+      double sum = 0;
+      for (long l = 0; l < k; ++l)
+        sum = std::fma((aᵀ ? A[lda * l + i] : A[lda * i + l]) * α,
+                       (bᵀ ? B[ldb * j + l] : B[ldb * l + j]), sum);
+      C[ldc * i + j] = C[ldc * i + j] * β + sum;
     }
-  }
 }
 
-template <long BM, long BN, typename T>
-void gemmk(long k, const T *A, long sa, const T *B, long sb, T *C, long sc) {
-  T S[BM][BN] = {0};
-  for (long l = 0; l < k; ++l) {
-    for (long i = 0; i < BM; ++i) {
-      for (long j = 0; j < BN; ++j) {
-        S[i][j] += A[sa * l + i] * B[sb * l + j];
+template <typename T, typename TC, typename TA, typename TB>
+struct Gemmlin {
+ public:
+  Gemmlin(bool aT, bool bT, float α, const TA *A, long lda, const TB *B,
+          long ldb, float β, TC *C, long ldc)
+      : aT(aT),
+        bT(bT),
+        α(α),
+        A(A),
+        lda(lda),
+        B(B),
+        ldb(ldb),
+        β(β),
+        C(C),
+        ldc(ldc) {
+  }
+
+  void gemm(long m, long n, long k) {
+    if (!m || !n) return;
+    for (long i = 0; i < m; ++i)
+      for (long j = 0; j < n; ++j) {
+        C[ldc * i + j] *= β;
+      }
+    if (!k) return;
+    cub = sqrt(LV1DCACHE) / sqrt(sizeof(T) * 3);
+    mnpack(0, m, 0, n, 0, k);
+  }
+
+ private:
+  void mnpack(long m0, long m,  //
+              long n0, long n,  //
+              long k0, long k) {
+    long mc = rounddown(std::min(m - m0, cub), 4);
+    long mp = m0 + (m - m0) / mc * mc;
+    long nc = rounddown(std::min(n - n0, cub), 4);
+    long np = n0 + (n - n0) / nc * nc;
+    long kc = rounddown(std::min(k - k0, cub), 4);
+    long kp = k0 + (k - k0) / kc * kc;
+    kpack(m0, mc, mp, n0, nc, np, k0, kc, k, kp);
+    if (m - mp) mnpack(mp, m, n0, np, k0, k);
+    if (n - np) mnpack(m0, mp, np, n, k0, k);
+    if (m - mp && n - np) mnpack(mp, m, np, n, k0, k);
+  }
+
+  void kpack(long m0, long mc, long m,  //
+             long n0, long nc, long n,  //
+             long k0, long kc, long k,  //
+             long kp) {
+    rpack(m0, mc, m, n0, nc, n, k0, kc, kp);
+    if (k - kp) rpack(m0, mc, m, n0, nc, n, kp, k - kp, k);
+  }
+
+  void rpack(long m0, long mc, long m,  //
+             long n0, long nc, long n,  //
+             long k0, long kc, long k) {
+    if (!(mc % 4) && !(nc % 4))
+      bgemm<4, 4>(m0, mc, m, n0, nc, n, k0, kc, k);
+    else
+      bgemm<1, 1>(m0, mc, m, n0, nc, n, k0, kc, k);
+  }
+
+  template <int mr, int nr>
+  void bgemm(long m0, long mc, long m,  //
+             long n0, long nc, long n,  //
+             long k0, long kc, long k) {
+    ops = (m - m0) * (n - n0) * (k - k0);
+    ml = (m - m0) / mc;
+    nl = (n - n0) / nc;
+    locks = new lock[ml * nl];
+    there_will_be_blocks<mr, nr>(m0, mc, m, n0, nc, n, k0, kc, k);
+    delete[] locks;
+  }
+
+  template <int mr, int nr>
+  void there_will_be_blocks(long m0, volatile long mc, long m, long n0, long nc,
+                            long n, long k0, long kc, long k) {
+#pragma omp parallel for collapse(2) if (ops > THRESHOLD && mc * kc > 16)
+    for (long ic = m0; ic < m; ic += mc)
+      for (long pc = k0; pc < k; pc += kc)
+        gizmo<mr, nr>(m0, mc, ic, n0, nc, k0, kc, pc, n);
+  }
+
+  template <int mr, int nr>
+  PORTABLE OPTIMIZED void gizmo(long m0, long mc, long ic, long n0, long nc,
+                                long k0, long kc, long pc, long n) {
+    T Ac[mc / mr][kc][mr];
+    for (long i = 0; i < mc; ++i)
+      for (long j = 0; j < kc; ++j)
+        Ac[i / mr][j][i % mr] = α * (aT ? A[lda * (pc + j) + (ic + i)]
+                                        : A[lda * (ic + i) + (pc + j)]);
+    for (long jc = n0; jc < n; jc += nc) {
+      T Bc[nc / nr][nr][kc];
+      for (long j = 0; j < nc; ++j)
+        for (long i = 0; i < kc; ++i)
+          Bc[j / nr][j % nr][i] =
+              bT ? B[ldb * (jc + j) + (pc + i)] : B[ldb * (pc + i) + (jc + j)];
+      T Cc[nc / nr][mc / mr][nr][mr];
+      memset(Cc, 0, nc * mc * sizeof(float));
+      for (long jr = 0; jr < nc / nr; ++jr)
+        for (long ir = 0; ir < mc / mr; ++ir)
+          for (long pr = 0; pr < kc; ++pr)
+            for (long j = 0; j < nr; ++j)
+              for (long i = 0; i < mr; ++i)
+                Cc[jr][ir][j][i] += Ac[ir][pr][i] * Bc[jr][j][pr];
+      const long lk = nl * ((ic - m0) / mc) + ((jc - n0) / nc);
+      locks[lk].acquire();
+      for (long ir = 0; ir < mc; ir += mr)
+        for (long jr = 0; jr < nc; jr += nr)
+          for (long i = 0; i < mr; ++i)
+            for (long j = 0; j < nr; ++j)
+              C[ldc * (ic + ir + i) + (jc + jr + j)] +=
+                  Cc[jr / nr][ir / mr][j][i];
+      locks[lk].release();
+    }
+  }
+
+  inline long rounddown(long x, long r) {
+    if (x < r)
+      return x;
+    else
+      return x & -r;
+  }
+
+  class lock {
+   public:
+    lock() = default;
+    void acquire() {
+      while (lock_.exchange(true, std::memory_order_acquire)) {
       }
     }
-  }
-  for (long i = 0; i < BM; ++i) {
-    for (long j = 0; j < BN; ++j) {
-      C[sc * i + j] = S[i][j];
+    void release() {
+      lock_.store(false, std::memory_order_release);
     }
-  }
+
+   private:
+    std::atomic_bool lock_ = false;
+  };
+
+  bool aT;
+  bool bT;
+  float α;
+  const TA *A;
+  long lda;
+  const TB *B;
+  long ldb;
+  float β;
+  TC *C;
+  long ldc;
+  long ops;
+  long nl;
+  long ml;
+  lock *locks;
+  long cub;
+};
+
+template <typename TC, typename TA, typename TB>
+void sgemm(bool aT, bool bT, long m, long n, long k, float α, const TA *A,
+           long lda, const TB *B, long ldb, float β, TC *C, long ldc) {
+  Gemmlin<float, TC, TA, TB> g{aT, bT, α, A, lda, B, ldb, β, C, ldc};
+  g.gemm(m, n, k);
 }
 
-// (m×k)ᵀ * k×n → m×n
-template <long BM, long BN, typename T>
-void gemm(long m, long n, long k, const T *A, long sa, const T *B, long sb,
-          T *C, long sc) {
-#pragma omp parallel for collapse(2)
-  for (long i = 0; i < m; i += BM) {
-    for (long j = 0; j < n; j += BN) {
-      gemmk<BM, BN>(k, A + i, sa, B + j, sb, C + sc * i + j, sc);
-    }
+template <typename TA, typename TB>
+void show(FILE *f, long max, long m, long n, const TA *A, long lda, const TB *B,
+          long ldb) {
+  flockfile(f);
+  fprintf(f, "      ");
+  for (long j = 0; j < n; ++j) {
+    fprintf(f, "%13ld", j);
   }
-}
-
-template <typename T>
-void show(long m, long n, const T *A, long sa) {
-  long max = 4;
-  printf("{");
+  fprintf(f, "\n");
   for (long i = 0; i < m; ++i) {
-    if (i) {
-      if (i == max) {
-        printf(", ...");
+    if (i == max) {
+      fprintf(f, "...\n");
+      break;
+    }
+    fprintf(f, "%5ld ", i);
+    for (long j = 0; j < n; ++j) {
+      if (j == max) {
+        fprintf(f, " ...");
         break;
-      } else {
-        printf(", ");
+      }
+      char ba[16], bb[16];
+      sprintf(ba, "%13.7f", static_cast<double>(A[lda * i + j]));
+      sprintf(bb, "%13.7f", static_cast<double>(B[ldb * i + j]));
+      for (long k = 0; ba[k] && bb[k]; ++k) {
+        if (ba[k] != bb[k]) fputs_unlocked("\33[31m", f);
+        fputc_unlocked(ba[k], f);
+        if (ba[k] != bb[k]) fputs_unlocked("\33[0m", f);
       }
     }
-    printf("{");
-    for (long j = 0; j < n; ++j) {
-      if (j) {
-        if (j == max) {
-          printf(", ...");
-          break;
-        } else {
-          printf(", ");
-        }
-      }
-      printf("%g", static_cast<double>(A[j + i * sa]));
-    }
-    printf("}");
+    fprintf(f, "\n");
   }
-  printf("}");
+  funlockfile(f);
 }
 
-template <typename T>
-double diff(long m, long n, const T *A, long sa, const T *B, long sb) {
+inline unsigned long GetDoubleBits(double f) {
+  union {
+    double f;
+    unsigned long i;
+  } u;
+  u.f = f;
+  return u.i;
+}
+
+inline bool IsNan(double x) {
+  return (GetDoubleBits(x) & (-1ull >> 1)) > (0x7ffull << 52);
+}
+
+template <typename TA, typename TB>
+double diff(long m, long n, const TA *Want, long lda, const TB *Got, long ldb) {
   double s = 0;
-  for (long i = 0; i < m; ++i) {
-    for (long j = 0; j < n; ++j) {
-      s += fabs(A[sa * i + j] - B[sb * i + j]);
-    }
-  }
-  return s / m / n;
+  int got_nans = 0;
+  int want_nans = 0;
+  for (long i = 0; i < m; ++i)
+    for (long j = 0; j < n; ++j)
+      if (IsNan(Want[ldb * i + j]))
+        ++want_nans;
+      else if (IsNan(Got[ldb * i + j]))
+        ++got_nans;
+      else
+        s += std::fabs(Want[lda * i + j] - Got[ldb * i + j]);
+  if (got_nans) printf("WARNING: got %d NaNs!\n", got_nans);
+  if (want_nans) printf("WARNING: want array has %d NaNs!\n", want_nans);
+  return s / (m * n);
 }
 
-template <typename T>
-void check(double tol, long m, long n, const T *A, long sa, const T *B, long sb,
-           const char *file, long line) {
-  double sad = diff(m, n, A, sa, B, sb);
-  if (sad > tol) {
-    printf("%s:%d: sad %g exceeds %g\n\twant ", file, line, sad, tol);
-    show(m, n, A, sa);
-    printf("\n\t got ");
-    show(m, n, B, sb);
-    printf("\n");
+template <typename TA, typename TB>
+void show_error(FILE *f, long max, long m, long n, const TA *A, long lda,
+                const TB *B, long ldb, const char *file, int line, double sad,
+                double tol) {
+  fprintf(f, "%s:%d: sad %.17g exceeds %g\nwant\n", file, line, sad, tol);
+  show(f, max, m, n, A, lda, B, ldb);
+  fprintf(f, "got\n");
+  show(f, max, m, n, B, ldb, A, lda);
+  fprintf(f, "\n");
+}
+
+template <typename TA, typename TB>
+void check(double tol, long m, long n, const TA *A, long lda, const TB *B,
+           long ldb, const char *file, int line) {
+  double sad = diff(m, n, A, lda, B, ldb);
+  if (sad <= tol) {
+    if (!is_self_testing) {
+      printf("         %g error\n", sad);
+    }
+  } else {
+    show_error(stderr, 16, m, n, A, lda, B, ldb, file, line, sad, tol);
+    const char *path = "/tmp/openmp_test.log";
+    FILE *f = fopen(path, "w");
+    if (f) {
+      show_error(f, 10000, m, n, A, lda, B, ldb, file, line, sad, tol);
+      printf("see also %s\n", path);
+    }
     exit(1);
   }
 }
 
-#define check(tol, m, n, A, sa, B, sb) \
-  check(tol, m, n, A, sa, B, sb, __FILE__, __LINE__)
+#define check(tol, m, n, A, lda, B, ldb) \
+  check(tol, m, n, A, lda, B, ldb, __FILE__, __LINE__)
 
 long micros(void) {
   struct timespec ts;
@@ -173,20 +361,12 @@ long micros(void) {
            #x);                                                             \
   } while (0)
 
-unsigned long rando(void) {
-  static unsigned long s;
-  unsigned long z = (s += 0x9e3779b97f4a7c15);
-  z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
-  z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
-  return z ^ (z >> 31);
-}
-
 double real01(unsigned long x) {  // (0,1)
   return 1. / 4503599627370496. * ((x >> 12) + .5);
 }
 
 double numba(void) {  // (-1,1)
-  return real01(rando()) * 2 - 1;
+  return real01(lemur64()) * 2 - 1;
 }
 
 template <typename T>
@@ -196,41 +376,91 @@ void fill(T *A, long n) {
   }
 }
 
-void check_reference_gemm_is_ok(void) {
-  constexpr long m = 2;
-  constexpr long n = 2;
-  constexpr long k = 2;
-  float A[m][k] = {{1, 2}, {3, 4}};
-  float B[k][n] = {{5, 6}, {7, 8}};
-  float C[m][n] = {{666, 666}, {666, 666}};
-  float G[m][n] = {{19, 22}, {43, 50}};
-  bench(matmul(m, n, k, (float *)A, k, (float *)B, n, (float *)C, n));
-  check(FLAWLESS, m, n, (float *)G, n, (float *)C, n);
-}
-
-void check_transposed_blocking_gemm_is_ok(void) {
-  long m = 1024;
-  long k = 512;
-  long n = 80;
+void test_gemm(long m, long n, long k) {
   float *A = new float[m * k];
+  float *At = new float[k * m];
   float *B = new float[k * n];
+  float *Bt = new float[n * k];
   float *C = new float[m * n];
-  float *D = new float[m * n];
+  float *GOLD = new float[m * n];
+  float α = 1;
+  float β = 0;
   fill(A, m * k);
   fill(B, k * n);
-  bench(matmul(m, n, k, A, k, B, n, C, n));
-  float *At = new float[k * m];
-  bench(transpose(m, k, A, k, At, m));
-  bench((gemm<8, 4>(m, n, k, At, m, B, n, D, n)));
-  check(FLAWLESS, m, n, C, n, D, n);
-  delete[] At;
-  delete[] D;
+  dgemm(0, 0, m, n, k, 1, A, k, B, n, 0, GOLD, n);
+  transpose(m, k, A, k, At, m);
+  transpose(k, n, B, n, Bt, k);
+  sgemm(0, 0, m, n, k, α, A, k, B, n, β, C, n);
+  check(PRECISION, m, n, GOLD, n, C, n);
+  sgemm(1, 0, m, n, k, α, At, m, B, n, β, C, n);
+  check(PRECISION, m, n, GOLD, n, C, n);
+  sgemm(0, 1, m, n, k, α, A, k, Bt, k, β, C, n);
+  check(PRECISION, m, n, GOLD, n, C, n);
+  sgemm(1, 1, m, n, k, α, At, m, Bt, k, β, C, n);
+  check(PRECISION, m, n, GOLD, n, C, n);
+  delete[] GOLD;
   delete[] C;
+  delete[] Bt;
   delete[] B;
+  delete[] At;
+  delete[] A;
+}
+
+void check_gemm_works(void) {
+  static long kSizes[] = {1, 2, 3, 4, 5, 6, 7, 17, 31, 33, 63, 128, 129};
+  is_self_testing = true;
+  long c = 0;
+  long N = sizeof(kSizes) / sizeof(kSizes[0]);
+  for (long i = 0; i < N; ++i) {
+    long m = kSizes[i];
+    for (long j = 0; j < N; ++j) {
+      long n = kSizes[N - 1 - i];
+      for (long k = 0; k < N; ++k) {
+        long K = kSizes[i];
+        if (c++ % 13 == 0) {
+          printf("testing %2ld %2ld %2ld\r", m, n, K);
+        }
+        test_gemm(m, n, K);
+      }
+    }
+  }
+  printf("\r");
+  is_self_testing = false;
+}
+
+long m = 2333 / 3;
+long k = 577 / 3;
+long n = 713 / 3;
+
+void check_sgemm(void) {
+  float *A = new float[m * k];
+  float *At = new float[k * m];
+  float *B = new float[k * n];
+  float *Bt = new float[n * k];
+  float *C = new float[m * n];
+  double *GOLD = new double[m * n];
+  fill(A, m * k);
+  fill(B, k * n);
+  transpose(m, k, A, k, At, m);
+  transpose(k, n, B, n, Bt, k);
+  bench(dgemm(0, 0, m, n, k, 1, A, k, B, n, 0, GOLD, n));
+  bench(sgemm(0, 0, m, n, k, 1, A, k, B, n, 0, C, n));
+  check(PRECISION, m, n, GOLD, n, C, n);
+  bench(sgemm(1, 0, m, n, k, 1, At, m, B, n, 0, C, n));
+  check(PRECISION, m, n, GOLD, n, C, n);
+  bench(sgemm(0, 1, m, n, k, 1, A, k, Bt, k, 0, C, n));
+  check(PRECISION, m, n, GOLD, n, C, n);
+  bench(sgemm(1, 1, m, n, k, 1, At, m, Bt, k, 0, C, n));
+  check(PRECISION, m, n, GOLD, n, C, n);
+  delete[] GOLD;
+  delete[] C;
+  delete[] Bt;
+  delete[] B;
+  delete[] At;
   delete[] A;
 }
 
 int main(int argc, char *argv[]) {
-  check_reference_gemm_is_ok();
-  check_transposed_blocking_gemm_is_ok();
+  check_gemm_works();
+  check_sgemm();
 }
diff --git a/test/math/BUILD.mk b/test/math/BUILD.mk
new file mode 100644
index 000000000..e0a241b20
--- /dev/null
+++ b/test/math/BUILD.mk
@@ -0,0 +1,41 @@
+#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
+#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘
+
+PKGS += TEST_MATH
+
+TEST_MATH_SRCS := $(wildcard test/math/*.c)
+TEST_MATH_SRCS_TEST = $(filter %_test.c,$(TEST_MATH_SRCS))
+TEST_MATH_OBJS = $(TEST_MATH_SRCS:%.c=o/$(MODE)/%.o)
+TEST_MATH_COMS = $(TEST_MATH_SRCS_TEST:%.c=o/$(MODE)/%.com)
+TEST_MATH_BINS = $(TEST_MATH_COMS) $(TEST_MATH_COMS:%=%.dbg)
+TEST_MATH_TESTS = $(TEST_MATH_SRCS_TEST:%.c=o/$(MODE)/%.com.ok)
+TEST_MATH_CHECKS = $(TEST_MATH_SRCS_TEST:%.c=o/$(MODE)/%.com.runs)
+
+TEST_MATH_DIRECTDEPS =				\
+	LIBC_INTRIN				\
+	LIBC_RUNTIME				\
+	LIBC_SYSV				\
+	LIBC_TINYMATH				\
+	THIRD_PARTY_COMPILER_RT
+
+TEST_MATH_DEPS :=				\
+	$(call uniq,$(foreach x,$(TEST_MATH_DIRECTDEPS),$($(x))))
+
+o/$(MODE)/test/math/math.pkg:			\
+		$(TEST_MATH_OBJS)		\
+		$(foreach x,$(TEST_MATH_DIRECTDEPS),$($(x)_A).pkg)
+
+o/$(MODE)/test/math/%.com.dbg:			\
+		$(TEST_MATH_DEPS)		\
+		o/$(MODE)/test/math/%.o	\
+		o/$(MODE)/test/math/math.pkg	\
+		$(CRT)				\
+		$(APE_NO_MODIFY_SELF)
+	@$(APELINK)
+
+$(TEST_MATH_OBJS): private CFLAGS += -fno-builtin
+
+.PHONY: o/$(MODE)/test/math
+o/$(MODE)/test/math:				\
+		$(TEST_MATH_BINS)		\
+		$(TEST_MATH_CHECKS)
diff --git a/test/math/float16_test.c b/test/math/float16_test.c
new file mode 100644
index 000000000..f550b1c27
--- /dev/null
+++ b/test/math/float16_test.c
@@ -0,0 +1,107 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2024 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/math.h"
+
+#define CHECK(x) \
+  if (!(x)) return __LINE__
+#define FALSE(x)             \
+  {                          \
+    volatile bool x_ = x;    \
+    if (x_) return __LINE__; \
+  }
+#define TRUE(x)               \
+  {                           \
+    volatile bool x_ = x;     \
+    if (!x_) return __LINE__; \
+  }
+
+_Float16 identity(_Float16 x) {
+  return x;
+}
+_Float16 (*half)(_Float16) = identity;
+
+int main() {
+  volatile float f;
+  volatile double d;
+  volatile _Float16 pi = 3.141;
+
+  // half → float → half
+  f = pi;
+  pi = f;
+
+  // half → float
+  float __extendhfsf2(_Float16);
+  CHECK(0.f == __extendhfsf2(0));
+  CHECK(3.140625f == __extendhfsf2(pi));
+  CHECK(3.140625f == pi);
+
+  // half → double → half
+  d = pi;
+  pi = d;
+
+  // half → double
+  double __extendhfdf2(_Float16);
+  CHECK(0. == __extendhfdf2(0));
+  CHECK(3.140625 == __extendhfdf2(pi));
+
+  // float → half
+  _Float16 __truncsfhf2(float);
+  CHECK(0 == (float)__truncsfhf2(0));
+  CHECK(pi == (float)__truncsfhf2(3.141f));
+  CHECK(3.140625f == (float)__truncsfhf2(3.141f));
+
+  // double → half
+  _Float16 __truncdfhf2(double);
+  CHECK(0 == (double)__truncdfhf2(0));
+  CHECK(3.140625 == (double)__truncdfhf2(3.141));
+
+  // specials
+  volatile _Float16 nan = NAN;
+  volatile _Float16 positive_infinity = +INFINITY;
+  volatile _Float16 negative_infinity = -INFINITY;
+  CHECK(isnan(nan));
+  CHECK(!isinf(pi));
+  CHECK(!isnan(pi));
+  CHECK(isinf(positive_infinity));
+  CHECK(isinf(negative_infinity));
+  CHECK(!isnan(positive_infinity));
+  CHECK(!isnan(negative_infinity));
+  CHECK(!signbit(pi));
+  CHECK(signbit(half(-pi)));
+  CHECK(!signbit(half(+0.)));
+  CHECK(signbit(half(-0.)));
+
+  // arithmetic
+  CHECK(half(-3) == -half(3));
+  CHECK(half(9) == half(3) * half(3));
+  CHECK(half(0) == half(pi) - half(pi));
+  CHECK(half(6.28125) == half(pi) + half(pi));
+
+  // comparisons
+  CHECK(half(3) > half(2));
+  CHECK(half(3) < half(4));
+  CHECK(half(3) <= half(3));
+  CHECK(half(3) >= half(3));
+  TRUE(half(NAN) != half(NAN));
+  FALSE(half(NAN) == half(NAN));
+  TRUE(half(3) != half(NAN));
+  FALSE(half(3) == half(NAN));
+  TRUE(half(NAN) != half(3));
+  FALSE(half(NAN) == half(3));
+}
diff --git a/test/math/hypot_test.c b/test/math/hypot_test.c
new file mode 100644
index 000000000..dbe345edc
--- /dev/null
+++ b/test/math/hypot_test.c
@@ -0,0 +1,92 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2024 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include <errno.h>
+#include <limits.h>
+#include <math.h>
+
+#define OK 0
+
+#define MAX_ERROR_ULP 1
+
+#define CHECK(x) \
+  if (!(x)) return __LINE__
+
+#define TEST(e, x) \
+  errno = 0;       \
+  CHECK(x);        \
+  CHECK(errno == e)
+
+long lemur(void) {
+  static unsigned __int128 s = 2131259787901769494;
+  return (s *= 15750249268501108917ul) >> 64;
+}
+
+int main() {
+
+  // test base cases
+  TEST(OK, hypot(3, 4) == 5);
+  TEST(OK, hypot(0, 0) == 0);
+  TEST(OK, hypot(5, 12) == 13);
+  TEST(OK, hypot(-5, -12) == 13);
+  TEST(OK, hypot(-3, 4) == 5);
+
+  // test with zeros
+  TEST(OK, hypot(0, 0) == 0);
+  TEST(OK, hypot(0, 3) == 3);
+  TEST(OK, hypot(3, 0) == 3);
+
+  // test with NAN
+  TEST(OK, isnan(hypot(NAN, 1)));
+  TEST(OK, isnan(hypot(1, NAN)));
+  TEST(OK, isnan(hypot(NAN, NAN)));
+
+  // test with INFINITY
+  TEST(OK, hypot(INFINITY, 1) == INFINITY);
+  TEST(OK, hypot(1, INFINITY) == INFINITY);
+  TEST(OK, hypot(-INFINITY, -INFINITY) == INFINITY);
+
+  // test underflow avoidance
+  TEST(OK, hypot(2e-308, 3e-308) > 0);
+
+  // test what happens on overflow
+  // TODO(jart): This should raise ERANGE.
+  TEST(OK, hypot(DBL_MAX, DBL_MAX) == INFINITY);
+
+  // test accuracy assuming hypotl() is correct
+  union {
+    long i;
+    double f;
+  } x, y, a, b;
+  int n = 1000;
+  for (int i = 0; i < n; ++i) {
+    for (int j = 0; j < n; ++j) {
+      x.i = lemur();
+      y.i = lemur();
+      a.f = hypotl(x.f, y.f);
+      b.f = hypot(x.f, y.f);
+      if (isnan(a.f) || isnan(b.f)) {
+        CHECK(isnan(a.f) == isnan(b.f));
+        continue;
+      }
+      long e = b.i - a.i;
+      if (e < 0) e = -e;
+      CHECK(e <= MAX_ERROR_ULP);
+    }
+  }
+}
diff --git a/test/math/hypotf_test.c b/test/math/hypotf_test.c
new file mode 100644
index 000000000..961f3c6d6
--- /dev/null
+++ b/test/math/hypotf_test.c
@@ -0,0 +1,94 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2024 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include <errno.h>
+#include <limits.h>
+#include <math.h>
+
+#define OK 0
+
+#define MAX_ERROR_ULP 1
+
+#define CHECK(x) \
+  if (!(x)) return __LINE__
+
+#define TEST(e, x) \
+  errno = 0;       \
+  CHECK(x);        \
+  CHECK(errno == e)
+
+int rando(void) {
+  static unsigned long s;
+  s *= 6364136223846793005;
+  s += 1442695040888963407;
+  return s >> 32;
+}
+
+int main() {
+
+  // test base cases
+  TEST(OK, hypotf(3, 4) == 5);
+  TEST(OK, hypotf(0, 0) == 0);
+  TEST(OK, hypotf(5, 12) == 13);
+  TEST(OK, hypotf(-5, -12) == 13);
+  TEST(OK, hypotf(-3, 4) == 5);
+
+  // test with zeros
+  TEST(OK, hypotf(0, 0) == 0);
+  TEST(OK, hypotf(0, 3) == 3);
+  TEST(OK, hypotf(3, 0) == 3);
+
+  // test with NAN
+  TEST(OK, isnan(hypotf(NAN, 1)));
+  TEST(OK, isnan(hypotf(1, NAN)));
+  TEST(OK, isnan(hypotf(NAN, NAN)));
+
+  // test underflow avoidance
+  TEST(OK, hypotf(2e-38, 3e-38) > 0);
+
+  // test what happens on overflow
+  // TODO(jart): This should raise ERANGE.
+  TEST(OK, hypotf(FLT_MAX, FLT_MAX) == INFINITY);
+
+  // test with INFINITY
+  TEST(OK, hypotf(INFINITY, 1) == INFINITY);
+  TEST(OK, hypotf(1, INFINITY) == INFINITY);
+  TEST(OK, hypotf(-INFINITY, -INFINITY) == INFINITY);
+
+  // test accuracy assuming hypotl() is correct
+  union {
+    int i;
+    float f;
+  } x, y, a, b;
+  int n = 1000;
+  for (int i = 0; i < n; ++i) {
+    for (int j = 0; j < n; ++j) {
+      x.i = rando();
+      y.i = rando();
+      a.f = hypotl(x.f, y.f);
+      b.f = hypotf(x.f, y.f);
+      if (isnan(a.f) || isnan(b.f)) {
+        CHECK(isnan(a.f) == isnan(b.f));
+        continue;
+      }
+      long e = b.i - a.i;
+      if (e < 0) e = -e;
+      CHECK(e <= MAX_ERROR_ULP);
+    }
+  }
+}
diff --git a/test/math/powf_test.c b/test/math/powf_test.c
new file mode 100644
index 000000000..357299826
--- /dev/null
+++ b/test/math/powf_test.c
@@ -0,0 +1,104 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2024 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include <errno.h>
+#include <float.h>
+#include <math.h>
+
+#define OK 0
+
+#define MAX_ERROR_ULP 1
+
+#define CHECK(x) \
+  if (!(x)) return __LINE__
+
+#define TEST(e, x) \
+  errno = 0;       \
+  CHECK(x);        \
+  CHECK(errno == e)
+
+int rando(void) {
+  static unsigned long s;
+  s *= 6364136223846793005;
+  s += 1442695040888963407;
+  return s >> 32;
+}
+
+int main() {
+
+  // test base cases
+  TEST(OK, !powf(0, 5));
+  TEST(OK, powf(0, 0) == 1);
+  TEST(OK, powf(1, 0) == 1);
+  TEST(OK, powf(2, 0) == 1);
+  TEST(OK, powf(0, 2) == 0);
+  TEST(OK, powf(5, 0) == 1);
+  TEST(OK, powf(1, 5) == 1);
+  TEST(OK, powf(2, 3) == 8);
+  TEST(OK, powf(-2, 3) == -8);
+  TEST(OK, powf(-2, 2) == 4);
+  TEST(OK, powf(2, -2) == 0.25);
+
+  // test edge cases
+  TEST(OK, powf(-2, -2) == 0.25);
+  TEST(OK, powf(2, .5) == sqrtf(2));
+  TEST(OK, powf(2, -.5) == M_SQRT1_2f);
+
+  // test special values
+  TEST(OK, powf(NAN, 0) == 1);
+  TEST(OK, !powf(INFINITY, -2));
+  TEST(OK, isnan(powf(NAN, 2)));
+  TEST(OK, isnan(powf(2, NAN)));
+  TEST(OK, powf(INFINITY, -1) == 0);
+  TEST(OK, powf(INFINITY, 2) == INFINITY);
+  TEST(OK, powf(-INFINITY, 2) == INFINITY);
+  TEST(OK, powf(-INFINITY, 3) == -INFINITY);
+
+  // test domain errors
+  TEST(EDOM, isnan(powf(-1, 0.5)));
+
+  // test pole errors
+  TEST(ERANGE, isinf(powf(0, -1)));
+  TEST(ERANGE, isinf(powf(0, -.5)));
+
+  // test underflow and overflow
+  TEST(ERANGE, powf(1e-38, 2) == 0);
+  TEST(ERANGE, powf(FLT_MAX, 2) == INFINITY);
+
+  // test accuracy assuming pow() is correct
+  union {
+    int i;
+    float f;
+  } x, y, a, b;
+  int n = 1000;
+  for (int i = 0; i < n; ++i) {
+    for (int j = 0; j < n; ++j) {
+      x.i = rando();
+      y.i = rando();
+      a.f = pow(x.f, y.f);
+      b.f = powf(x.f, y.f);
+      if (isnan(a.f) || isnan(b.f)) {
+        CHECK(isnan(a.f) == isnan(b.f));
+        continue;
+      }
+      int e = b.i - a.i;
+      if (e < 0) e = -e;
+      CHECK(e <= MAX_ERROR_ULP);
+    }
+  }
+}
diff --git a/test/net/http/parsehttpmessage_test.c b/test/net/http/parsehttpmessage_test.c
index f5b9e3449..8a308a150 100644
--- a/test/net/http/parsehttpmessage_test.c
+++ b/test/net/http/parsehttpmessage_test.c
@@ -20,6 +20,7 @@
 #include "libc/log/check.h"
 #include "libc/mem/gc.h"
 #include "libc/mem/mem.h"
+#include "libc/serialize.h"
 #include "libc/str/str.h"
 #include "libc/testlib/ezbench.h"
 #include "libc/testlib/testlib.h"
@@ -40,6 +41,20 @@ void TearDown(void) {
   DestroyHttpMessage(req);
 }
 
+char *method(void) {
+  static char s[9];
+  WRITE64LE(s, req->method);
+  return s;
+}
+
+TEST(ParseHttpMethod, test) {
+  ASSERT_EQ(0, ParseHttpMethod(" ", -1));
+  ASSERT_EQ(0, ParseHttpMethod("aaaaaaaaa", -1));
+  ASSERT_EQ(kHttpGet, ParseHttpMethod("get", -1));
+  ASSERT_EQ(kHttpGet, ParseHttpMethod("GET", -1));
+  ASSERT_EQ(kHttpDelete, ParseHttpMethod("DELETE", -1));
+}
+
 TEST(ParseHttpMessage, soLittleState) {
   InitHttpMessage(req, kHttpRequest);
   ASSERT_LE(sizeof(struct HttpMessage), 512);
@@ -59,7 +74,7 @@ TEST(ParseHttpMessage, testNoHeaders) {
   static const char m[] = "GET /foo HTTP/1.0\r\n\r\n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(strlen(m), ParseHttpMessage(req, m, strlen(m)));
-  EXPECT_EQ(kHttpGet, req->method);
+  EXPECT_STREQ("GET", method());
   EXPECT_STREQ("/foo", gc(slice(m, req->uri)));
   EXPECT_EQ(10, req->version);
 }
@@ -72,7 +87,7 @@ Content-Length: 0\r\n\
 \r\n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(strlen(m), ParseHttpMessage(req, m, strlen(m)));
-  EXPECT_EQ(kHttpPost, req->method);
+  EXPECT_STREQ("POST", method());
   EXPECT_STREQ("/foo?bar%20hi", gc(slice(m, req->uri)));
   EXPECT_EQ(10, req->version);
   EXPECT_STREQ("foo.example", gc(slice(m, req->headers[kHttpHost])));
@@ -84,7 +99,7 @@ TEST(ParseHttpMessage, testHttp101) {
   static const char m[] = "GET / HTTP/1.1\r\n\r\n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(strlen(m), ParseHttpMessage(req, m, strlen(m)));
-  EXPECT_EQ(kHttpGet, req->method);
+  EXPECT_STREQ("GET", method());
   EXPECT_STREQ("/", gc(slice(m, req->uri)));
   EXPECT_EQ(11, req->version);
 }
@@ -93,7 +108,7 @@ TEST(ParseHttpMessage, testHttp100) {
   static const char m[] = "GET / HTTP/1.0\r\n\r\n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(strlen(m), ParseHttpMessage(req, m, strlen(m)));
-  EXPECT_EQ(kHttpGet, req->method);
+  EXPECT_STREQ("GET", method());
   EXPECT_STREQ("/", gc(slice(m, req->uri)));
   EXPECT_EQ(10, req->version);
 }
@@ -102,45 +117,40 @@ TEST(ParseHttpMessage, testUnknownMethod_canBeUsedIfYouWant) {
   static const char m[] = "#%*+_^ / HTTP/1.0\r\n\r\n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(strlen(m), ParseHttpMessage(req, m, strlen(m)));
-  EXPECT_FALSE(req->method);
-  EXPECT_STREQ("WUT", kHttpMethod[req->method]);
-  EXPECT_STREQ("#%*+_^", gc(slice(m, req->xmethod)));
+  EXPECT_STREQ("#%*+_^", method());
 }
 
 TEST(ParseHttpMessage, testIllegalMethod) {
   static const char m[] = "ehd@oruc / HTTP/1.0\r\n\r\n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(-1, ParseHttpMessage(req, m, strlen(m)));
-  EXPECT_STREQ("WUT", kHttpMethod[req->method]);
 }
 
-TEST(ParseHttpMessage, testIllegalMethodCasing_weAllowItAndPreserveIt) {
+TEST(ParseHttpMessage, testIllegalMethodCasing_weUpperCaseIt) {
   static const char m[] = "get / HTTP/1.0\r\n\r\n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(strlen(m), ParseHttpMessage(req, m, strlen(m)));
-  EXPECT_STREQ("GET", kHttpMethod[req->method]);
-  EXPECT_STREQ("get", gc(slice(m, req->xmethod)));
+  EXPECT_STREQ("GET", method());
 }
 
 TEST(ParseHttpMessage, testEmptyMethod_isntAllowed) {
   static const char m[] = " / HTTP/1.0\r\n\r\n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(-1, ParseHttpMessage(req, m, strlen(m)));
-  EXPECT_STREQ("WUT", kHttpMethod[req->method]);
 }
 
 TEST(ParseHttpMessage, testEmptyUri_isntAllowed) {
   static const char m[] = "GET  HTTP/1.0\r\n\r\n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(-1, ParseHttpMessage(req, m, strlen(m)));
-  EXPECT_STREQ("GET", kHttpMethod[req->method]);
+  EXPECT_STREQ("GET", method());
 }
 
 TEST(ParseHttpMessage, testHttp09) {
   static const char m[] = "GET /\r\n\r\n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(strlen(m), ParseHttpMessage(req, m, strlen(m)));
-  EXPECT_EQ(kHttpGet, req->method);
+  EXPECT_STREQ("GET", method());
   EXPECT_STREQ("/", gc(slice(m, req->uri)));
   EXPECT_EQ(9, req->version);
 }
@@ -195,7 +205,7 @@ Content-Length: 0\n\
 \n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(strlen(m) - 1, ParseHttpMessage(req, m, strlen(m)));
-  EXPECT_EQ(kHttpPost, req->method);
+  EXPECT_STREQ("POST", method());
   EXPECT_STREQ("/foo?bar%20hi", gc(slice(m, req->uri)));
   EXPECT_EQ(10, req->version);
   EXPECT_STREQ("foo.example", gc(slice(m, req->headers[kHttpHost])));
@@ -217,7 +227,7 @@ Accept-Language: en-US,en;q=0.9\r\n\
 \r\n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(strlen(m), ParseHttpMessage(req, m, strlen(m)));
-  EXPECT_EQ(kHttpGet, req->method);
+  EXPECT_STREQ("GET", method());
   EXPECT_STREQ("/tool/net/redbean.png", gc(slice(m, req->uri)));
   EXPECT_EQ(11, req->version);
   EXPECT_STREQ("10.10.10.124:8080", gc(slice(m, req->headers[kHttpHost])));
@@ -541,14 +551,14 @@ transfer-encoding: chunked\r\n\
 }
 
 BENCH(ParseHttpMessage, bench) {
-  EZBENCH2("DoTiniestHttpRequest", donothing, DoTiniestHttpRequest());
+  EZBENCH2("DoTiniestHttpReque", donothing, DoTiniestHttpRequest());
   EZBENCH2("DoTinyHttpRequest", donothing, DoTinyHttpRequest());
-  EZBENCH2("DoStandardChromeRequest", donothing, DoStandardChromeRequest());
-  EZBENCH2("DoUnstandardChromeRequest", donothing, DoUnstandardChromeRequest());
-  EZBENCH2("DoTiniestHttpResponse", donothing, DoTiniestHttpResponse());
+  EZBENCH2("DoStandardChromeRe", donothing, DoStandardChromeRequest());
+  EZBENCH2("DoUnstandardChrome", donothing, DoUnstandardChromeRequest());
+  EZBENCH2("DoTiniestHttpRespo", donothing, DoTiniestHttpResponse());
   EZBENCH2("DoTinyHttpResponse", donothing, DoTinyHttpResponse());
-  EZBENCH2("DoStandardHttpResponse", donothing, DoStandardHttpResponse());
-  EZBENCH2("DoUnstandardHttpResponse", donothing, DoUnstandardHttpResponse());
+  EZBENCH2("DoStandardHttpResp", donothing, DoStandardHttpResponse());
+  EZBENCH2("DoUnstandardHttpRe", donothing, DoUnstandardHttpResponse());
 }
 
 BENCH(HeaderHas, bench) {
@@ -563,7 +573,7 @@ ACCEPT-encoding: bzip2\r\n\
 \r\n";
   InitHttpMessage(req, kHttpRequest);
   EXPECT_EQ(strlen(m), ParseHttpMessage(req, m, strlen(m)));
-  EZBENCH2("HeaderHas text/plain", donothing,
+  EZBENCH2("HeaderHas txt/pln", donothing,
            HeaderHas(req, m, kHttpAccept, "text/plain", 7));
   EZBENCH2("HeaderHas deflate", donothing,
            HeaderHas(req, m, kHttpAcceptEncoding, "deflate", 7));
diff --git a/test/posix/BUILD.mk b/test/posix/BUILD.mk
index 42faaf7ef..ac5b2e413 100644
--- a/test/posix/BUILD.mk
+++ b/test/posix/BUILD.mk
@@ -51,8 +51,6 @@ o/$(MODE)/test/posix/%.com.dbg:			\
 		$(APE_NO_MODIFY_SELF)
 	@$(APELINK)
 
-$(TEST_POSIX_OBJS): private CFLAGS += -isystem isystem/
-
 .PHONY: o/$(MODE)/test/posix
 o/$(MODE)/test/posix:				\
 		$(TEST_POSIX_BINS)		\
diff --git a/libc/intrin/__clear_cache.c b/test/posix/atoi_test.c
similarity index 70%
rename from libc/intrin/__clear_cache.c
rename to test/posix/atoi_test.c
index 3d93adf45..afef98200 100644
--- a/libc/intrin/__clear_cache.c
+++ b/test/posix/atoi_test.c
@@ -1,7 +1,7 @@
 /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
 │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
 ╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2023 Justine Alexandra Roberts Tunney                              │
+│ Copyright 2024 Justine Alexandra Roberts Tunney                              │
 │                                                                              │
 │ Permission to use, copy, modify, and/or distribute this software for         │
 │ any purpose with or without fee is hereby granted, provided that the         │
@@ -16,22 +16,31 @@
 │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
 
-void __clear_cache2(const void *base, const void *end) {
-#ifdef __aarch64__
-  int icache, dcache;
-  const char *p, *pe = end;
-  static unsigned int ctr_el0 = 0;
-  if (!ctr_el0) asm volatile("mrs\t%0,ctr_el0" : "=r"(ctr_el0));
-  icache = 4 << (ctr_el0 & 15);
-  dcache = 4 << ((ctr_el0 >> 16) & 15);
-  for (p = (const char *)((uintptr_t)base & -dcache); p < pe; p += dcache) {
-    asm volatile("dc\tcvau,%0" : : "r"(p) : "memory");
-  }
-  asm volatile("dsb\tish" ::: "memory");
-  for (p = (const char *)((uintptr_t)base & -icache); p < pe; p += icache) {
-    asm volatile("ic\tivau,%0" : : "r"(p) : "memory");
-  }
-  asm volatile("dsb\tish\nisb" ::: "memory");
-#endif
+#define TEST(x) \
+  if (!(x)) return __LINE__
+
+int main() {
+  TEST(atoi("") == 0);
+  TEST(atoi("-") == 0);
+  TEST(atoi("0") == 0);
+  TEST(atoi("1") == 1);
+  TEST(atoi("+1") == 1);
+  TEST(atoi("-1") == -1);
+  TEST(atoi("1-") == 1);
+  TEST(atoi("--1") == 0);
+  TEST(atoi("16 32") == 16);
+  TEST(atoi("\t 16") == 16);
+  TEST(atoi("\v 16") == 16);
+  TEST(atoi("\n 16") == 16);
+  TEST(atoi("\r 16") == 16);
+  TEST(atoi("rr 16") == 0);
+  TEST(atoi("0123456789") == 123456789);
+  TEST(atoi("2147483647") == INT_MAX);
+  TEST(atoi("-2147483648") == INT_MIN);
+  TEST(atoi("-2147483647") == INT_MIN + 1);
+  TEST(!errno);
 }
diff --git a/test/posix/strtol_test.c b/test/posix/strtol_test.c
new file mode 100644
index 000000000..4a6da98be
--- /dev/null
+++ b/test/posix/strtol_test.c
@@ -0,0 +1,75 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2024 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#define TEST(x) \
+  if (!(x)) return __LINE__
+
+int main() {
+  char *end;
+
+  // Basic conversions
+  TEST(strtol("10", &end, 10) == 10L);
+  TEST(strtol("-10", &end, 10) == -10L);
+  TEST(strtol("+10", &end, 10) == 10L);
+
+  // Edge cases and error detection
+  TEST(strtol("0", &end, 10) == 0L);
+  TEST(strtol("-0", &end, 10) == 0L);
+  TEST(strtol("+0", &end, 10) == 0L);
+  TEST(strtol("9223372036854775807", &end, 10) == LONG_MAX);
+  TEST(strtol("-9223372036854775808", &end, 10) == LONG_MIN);
+
+  // Base specification
+  TEST(strtol("10", &end, 2) == 2L);
+  TEST(strtol("10", &end, 16) == 16L);
+
+  // Invalid input (should not modify errno if conversion is successful)
+  errno = 0;
+  TEST(strtol("invalid", &end, 10) == 0L && errno == 0);
+
+  // Overflow detection
+  errno = 0;
+  TEST(strtol("99999999999999999999999999", &end, 10) == LONG_MAX &&
+       errno == ERANGE);
+
+  // Underflow detection
+  errno = 0;
+  TEST(strtol("-99999999999999999999999999", &end, 10) == LONG_MIN &&
+       errno == ERANGE);
+
+  // Partial conversion with valid characters before invalid ones
+  TEST(strtol("123abc", &end, 10) == 123L && *end == 'a');
+
+  // Testing with leading white space
+  TEST(strtol("   123", &end, 10) == 123L);
+
+  // Base 0 auto-detection
+  TEST(strtol("0x10", &end, 0) == 16L);
+  TEST(strtol("010", &end, 0) == 8L);
+  TEST(strtol("10", &end, 0) == 10L);
+
+  // Check if 'end' pointer is set correctly to the next character after the
+  // last valid digit
+  char *ptr = "1234abcd";
+  strtol(ptr, &end, 10);
+  TEST(end == ptr + 4);
+}
diff --git a/third_party/aarch64/arm_acle.internal.h b/third_party/aarch64/arm_acle.internal.h
index 9c0bd0be6..687b133d2 100644
--- a/third_party/aarch64/arm_acle.internal.h
+++ b/third_party/aarch64/arm_acle.internal.h
@@ -1,12 +1,45 @@
 #if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
 #ifndef _GCC_ARM_ACLE_H
 #define _GCC_ARM_ACLE_H
+#pragma GCC aarch64 "arm_acle.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
+#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) __extension__ extern __inline TYPE __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) NAME (TYPE __value, uint32_t __rotate) { size_t __size = sizeof (TYPE) * __CHAR_BIT__; __rotate = __rotate % __size; return __value >> __rotate | __value << ((__size - __rotate) % __size); }
+_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t)
+_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long)
+_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t)
+#undef _GCC_ARM_ACLE_ROR_FN
+#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) __extension__ extern __inline RTYPE __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __##NAME (ITYPE __value) { return __builtin_##BUILTIN (__value); }
+_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int)
+_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int)
+_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int)
+_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int)
+_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int)
+_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int)
+_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, uint32_t)
+_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, unsigned long)
+_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, uint64_t)
+_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t)
+_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, unsigned long)
+_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t)
+_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, int16_t)
+_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t, uint32_t)
+_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t, uint64_t)
+#undef _GCC_ARM_ACLE_DATA_FN
+__extension__ extern __inline unsigned long
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__revl (unsigned long __value)
+{
+  if (sizeof (unsigned long) == 8)
+    return __revll (__value);
+  else
+    return __rev (__value);
+}
 #pragma GCC push_options
 #pragma GCC target ("arch=armv8.3-a")
-__funline int32_t
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __jcvt (double __a)
 {
   return __builtin_aarch64_jcvtzs (__a);
@@ -14,42 +47,50 @@ __jcvt (double __a)
 #pragma GCC pop_options
 #pragma GCC push_options
 #pragma GCC target ("arch=armv8.5-a")
-__funline float
+__extension__ extern __inline float
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __rint32zf (float __a)
 {
   return __builtin_aarch64_frint32zsf (__a);
 }
-__funline double
+__extension__ extern __inline double
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __rint32z (double __a)
 {
   return __builtin_aarch64_frint32zdf (__a);
 }
-__funline float
+__extension__ extern __inline float
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __rint64zf (float __a)
 {
   return __builtin_aarch64_frint64zsf (__a);
 }
-__funline double
+__extension__ extern __inline double
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __rint64z (double __a)
 {
   return __builtin_aarch64_frint64zdf (__a);
 }
-__funline float
+__extension__ extern __inline float
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __rint32xf (float __a)
 {
   return __builtin_aarch64_frint32xsf (__a);
 }
-__funline double
+__extension__ extern __inline double
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __rint32x (double __a)
 {
   return __builtin_aarch64_frint32xdf (__a);
 }
-__funline float
+__extension__ extern __inline float
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __rint64xf (float __a)
 {
   return __builtin_aarch64_frint64xsf (__a);
 }
-__funline double
+__extension__ extern __inline double
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __rint64x (double __a)
 {
   return __builtin_aarch64_frint64xdf (__a);
@@ -57,42 +98,50 @@ __rint64x (double __a)
 #pragma GCC pop_options
 #pragma GCC push_options
 #pragma GCC target ("+nothing+crc")
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __crc32b (uint32_t __a, uint8_t __b)
 {
   return __builtin_aarch64_crc32b (__a, __b);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __crc32cb (uint32_t __a, uint8_t __b)
 {
   return __builtin_aarch64_crc32cb (__a, __b);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __crc32ch (uint32_t __a, uint16_t __b)
 {
   return __builtin_aarch64_crc32ch (__a, __b);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __crc32cw (uint32_t __a, uint32_t __b)
 {
   return __builtin_aarch64_crc32cw (__a, __b);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __crc32cd (uint32_t __a, uint64_t __b)
 {
   return __builtin_aarch64_crc32cx (__a, __b);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __crc32h (uint32_t __a, uint16_t __b)
 {
   return __builtin_aarch64_crc32h (__a, __b);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __crc32w (uint32_t __a, uint32_t __b)
 {
   return __builtin_aarch64_crc32w (__a, __b);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __crc32d (uint32_t __a, uint64_t __b)
 {
   return __builtin_aarch64_crc32x (__a, __b);
@@ -112,36 +161,72 @@ __crc32d (uint32_t __a, uint64_t __b)
 #define _TMFAILURE_DBG 0x00400000u
 #define _TMFAILURE_INT 0x00800000u
 #define _TMFAILURE_TRIVIAL 0x01000000u
-__funline uint64_t
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __tstart (void)
 {
   return __builtin_aarch64_tstart ();
 }
-__funline void
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __tcommit (void)
 {
   __builtin_aarch64_tcommit ();
 }
-__funline void
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __tcancel (const uint64_t __reason)
 {
   __builtin_aarch64_tcancel (__reason);
 }
-__funline uint64_t
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __ttest (void)
 {
   return __builtin_aarch64_ttest ();
 }
 #pragma GCC pop_options
 #endif
+#ifdef __ARM_FEATURE_LS64
+#pragma GCC push_options
+#pragma GCC target ("+nothing+ls64")
+typedef __arm_data512_t data512_t;
+__extension__ extern __inline data512_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_ld64b (const void *__addr)
+{
+  return __builtin_aarch64_ld64b (__addr);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_st64b (void *__addr, data512_t __value)
+{
+  __builtin_aarch64_st64b (__addr, __value);
+}
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_st64bv (void *__addr, data512_t __value)
+{
+  return __builtin_aarch64_st64bv (__addr, __value);
+}
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_st64bv0 (void *__addr, data512_t __value)
+{
+  return __builtin_aarch64_st64bv0 (__addr, __value);
+}
+#pragma GCC pop_options
+#endif
 #pragma GCC push_options
 #pragma GCC target ("+nothing+rng")
-__funline int
+__extension__ extern __inline int
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __rndr (uint64_t *__res)
 {
   return __builtin_aarch64_rndr (__res);
 }
-__funline int
+__extension__ extern __inline int
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __rndrrs (uint64_t *__res)
 {
   return __builtin_aarch64_rndrrs (__res);
diff --git a/third_party/aarch64/arm_fp16.internal.h b/third_party/aarch64/arm_fp16.internal.h
index ddc72f764..84185a620 100644
--- a/third_party/aarch64/arm_fp16.internal.h
+++ b/third_party/aarch64/arm_fp16.internal.h
@@ -4,447 +4,536 @@
 #pragma GCC push_options
 #pragma GCC target ("arch=armv8.2-a+fp16")
 typedef __fp16 float16_t;
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vabsh_f16 (float16_t __a)
 {
   return __builtin_aarch64_abshf (__a);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vceqzh_f16 (float16_t __a)
 {
   return __builtin_aarch64_cmeqhf_uss (__a, 0.0f);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcgezh_f16 (float16_t __a)
 {
   return __builtin_aarch64_cmgehf_uss (__a, 0.0f);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcgtzh_f16 (float16_t __a)
 {
   return __builtin_aarch64_cmgthf_uss (__a, 0.0f);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vclezh_f16 (float16_t __a)
 {
   return __builtin_aarch64_cmlehf_uss (__a, 0.0f);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcltzh_f16 (float16_t __a)
 {
   return __builtin_aarch64_cmlthf_uss (__a, 0.0f);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_f16_s16 (int16_t __a)
 {
   return __builtin_aarch64_floathihf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_f16_s32 (int32_t __a)
 {
   return __builtin_aarch64_floatsihf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_f16_s64 (int64_t __a)
 {
   return __builtin_aarch64_floatdihf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_f16_u16 (uint16_t __a)
 {
   return __builtin_aarch64_floatunshihf_us (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_f16_u32 (uint32_t __a)
 {
   return __builtin_aarch64_floatunssihf_us (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_f16_u64 (uint64_t __a)
 {
   return __builtin_aarch64_floatunsdihf_us (__a);
 }
-__funline int16_t
+__extension__ extern __inline int16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_s16_f16 (float16_t __a)
 {
   return __builtin_aarch64_fix_trunchfhi (__a);
 }
-__funline int32_t
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_s32_f16 (float16_t __a)
 {
   return __builtin_aarch64_fix_trunchfsi (__a);
 }
-__funline int64_t
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_s64_f16 (float16_t __a)
 {
   return __builtin_aarch64_fix_trunchfdi (__a);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_u16_f16 (float16_t __a)
 {
   return __builtin_aarch64_fixuns_trunchfhi_us (__a);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_u32_f16 (float16_t __a)
 {
   return __builtin_aarch64_fixuns_trunchfsi_us (__a);
 }
-__funline uint64_t
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_u64_f16 (float16_t __a)
 {
   return __builtin_aarch64_fixuns_trunchfdi_us (__a);
 }
-__funline int16_t
+__extension__ extern __inline int16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtah_s16_f16 (float16_t __a)
 {
   return __builtin_aarch64_lroundhfhi (__a);
 }
-__funline int32_t
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtah_s32_f16 (float16_t __a)
 {
   return __builtin_aarch64_lroundhfsi (__a);
 }
-__funline int64_t
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtah_s64_f16 (float16_t __a)
 {
   return __builtin_aarch64_lroundhfdi (__a);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtah_u16_f16 (float16_t __a)
 {
   return __builtin_aarch64_lrounduhfhi_us (__a);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtah_u32_f16 (float16_t __a)
 {
   return __builtin_aarch64_lrounduhfsi_us (__a);
 }
-__funline uint64_t
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtah_u64_f16 (float16_t __a)
 {
   return __builtin_aarch64_lrounduhfdi_us (__a);
 }
-__funline int16_t
+__extension__ extern __inline int16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtmh_s16_f16 (float16_t __a)
 {
   return __builtin_aarch64_lfloorhfhi (__a);
 }
-__funline int32_t
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtmh_s32_f16 (float16_t __a)
 {
   return __builtin_aarch64_lfloorhfsi (__a);
 }
-__funline int64_t
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtmh_s64_f16 (float16_t __a)
 {
   return __builtin_aarch64_lfloorhfdi (__a);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtmh_u16_f16 (float16_t __a)
 {
   return __builtin_aarch64_lflooruhfhi_us (__a);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtmh_u32_f16 (float16_t __a)
 {
   return __builtin_aarch64_lflooruhfsi_us (__a);
 }
-__funline uint64_t
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtmh_u64_f16 (float16_t __a)
 {
   return __builtin_aarch64_lflooruhfdi_us (__a);
 }
-__funline int16_t
+__extension__ extern __inline int16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtnh_s16_f16 (float16_t __a)
 {
   return __builtin_aarch64_lfrintnhfhi (__a);
 }
-__funline int32_t
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtnh_s32_f16 (float16_t __a)
 {
   return __builtin_aarch64_lfrintnhfsi (__a);
 }
-__funline int64_t
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtnh_s64_f16 (float16_t __a)
 {
   return __builtin_aarch64_lfrintnhfdi (__a);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtnh_u16_f16 (float16_t __a)
 {
   return __builtin_aarch64_lfrintnuhfhi_us (__a);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtnh_u32_f16 (float16_t __a)
 {
   return __builtin_aarch64_lfrintnuhfsi_us (__a);
 }
-__funline uint64_t
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtnh_u64_f16 (float16_t __a)
 {
   return __builtin_aarch64_lfrintnuhfdi_us (__a);
 }
-__funline int16_t
+__extension__ extern __inline int16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtph_s16_f16 (float16_t __a)
 {
   return __builtin_aarch64_lceilhfhi (__a);
 }
-__funline int32_t
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtph_s32_f16 (float16_t __a)
 {
   return __builtin_aarch64_lceilhfsi (__a);
 }
-__funline int64_t
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtph_s64_f16 (float16_t __a)
 {
   return __builtin_aarch64_lceilhfdi (__a);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtph_u16_f16 (float16_t __a)
 {
   return __builtin_aarch64_lceiluhfhi_us (__a);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtph_u32_f16 (float16_t __a)
 {
   return __builtin_aarch64_lceiluhfsi_us (__a);
 }
-__funline uint64_t
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtph_u64_f16 (float16_t __a)
 {
   return __builtin_aarch64_lceiluhfdi_us (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vnegh_f16 (float16_t __a)
 {
   return __builtin_aarch64_neghf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrecpeh_f16 (float16_t __a)
 {
   return __builtin_aarch64_frecpehf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrecpxh_f16 (float16_t __a)
 {
   return __builtin_aarch64_frecpxhf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndh_f16 (float16_t __a)
 {
   return __builtin_aarch64_btrunchf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndah_f16 (float16_t __a)
 {
   return __builtin_aarch64_roundhf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndih_f16 (float16_t __a)
 {
   return __builtin_aarch64_nearbyinthf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndmh_f16 (float16_t __a)
 {
   return __builtin_aarch64_floorhf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndnh_f16 (float16_t __a)
 {
-  return __builtin_aarch64_frintnhf (__a);
+  return __builtin_aarch64_roundevenhf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndph_f16 (float16_t __a)
 {
   return __builtin_aarch64_ceilhf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndxh_f16 (float16_t __a)
 {
   return __builtin_aarch64_rinthf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsqrteh_f16 (float16_t __a)
 {
   return __builtin_aarch64_rsqrtehf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsqrth_f16 (float16_t __a)
 {
   return __builtin_aarch64_sqrthf (__a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddh_f16 (float16_t __a, float16_t __b)
 {
   return __a + __b;
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vabdh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_fabdhf (__a, __b);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcageh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_facgehf_uss (__a, __b);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcagth_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_facgthf_uss (__a, __b);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcaleh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_faclehf_uss (__a, __b);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcalth_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_faclthf_uss (__a, __b);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vceqh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_cmeqhf_uss (__a, __b);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcgeh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_cmgehf_uss (__a, __b);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcgth_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_cmgthf_uss (__a, __b);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcleh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_cmlehf_uss (__a, __b);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vclth_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_cmlthf_uss (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_n_f16_s16 (int16_t __a, const int __b)
 {
   return __builtin_aarch64_scvtfhi (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_n_f16_s32 (int32_t __a, const int __b)
 {
   return __builtin_aarch64_scvtfsihf (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_n_f16_s64 (int64_t __a, const int __b)
 {
   return __builtin_aarch64_scvtfdihf (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_n_f16_u16 (uint16_t __a, const int __b)
 {
   return __builtin_aarch64_ucvtfhi_sus (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_n_f16_u32 (uint32_t __a, const int __b)
 {
   return __builtin_aarch64_ucvtfsihf_sus (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_n_f16_u64 (uint64_t __a, const int __b)
 {
   return __builtin_aarch64_ucvtfdihf_sus (__a, __b);
 }
-__funline int16_t
+__extension__ extern __inline int16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_n_s16_f16 (float16_t __a, const int __b)
 {
   return __builtin_aarch64_fcvtzshf (__a, __b);
 }
-__funline int32_t
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_n_s32_f16 (float16_t __a, const int __b)
 {
   return __builtin_aarch64_fcvtzshfsi (__a, __b);
 }
-__funline int64_t
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_n_s64_f16 (float16_t __a, const int __b)
 {
   return __builtin_aarch64_fcvtzshfdi (__a, __b);
 }
-__funline uint16_t
+__extension__ extern __inline uint16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_n_u16_f16 (float16_t __a, const int __b)
 {
   return __builtin_aarch64_fcvtzuhf_uss (__a, __b);
 }
-__funline uint32_t
+__extension__ extern __inline uint32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_n_u32_f16 (float16_t __a, const int __b)
 {
   return __builtin_aarch64_fcvtzuhfsi_uss (__a, __b);
 }
-__funline uint64_t
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvth_n_u64_f16 (float16_t __a, const int __b)
 {
   return __builtin_aarch64_fcvtzuhfdi_uss (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vdivh_f16 (float16_t __a, float16_t __b)
 {
   return __a / __b;
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmaxh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_fmaxhf (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmaxnmh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_fmaxhf (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vminh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_fminhf (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vminnmh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_fminhf (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmulh_f16 (float16_t __a, float16_t __b)
 {
   return __a * __b;
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmulxh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_fmulxhf (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrecpsh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_frecpshf (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsqrtsh_f16 (float16_t __a, float16_t __b)
 {
   return __builtin_aarch64_rsqrtshf (__a, __b);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubh_f16 (float16_t __a, float16_t __b)
 {
   return __a - __b;
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vfmah_f16 (float16_t __a, float16_t __b, float16_t __c)
 {
   return __builtin_aarch64_fmahf (__b, __c, __a);
 }
-__funline float16_t
+__extension__ extern __inline float16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vfmsh_f16 (float16_t __a, float16_t __b, float16_t __c)
 {
   return __builtin_aarch64_fnmahf (__b, __c, __a);
diff --git a/third_party/aarch64/arm_neon.internal.h b/third_party/aarch64/arm_neon.internal.h
index 78560ec85..1cbe2db72 100644
--- a/third_party/aarch64/arm_neon.internal.h
+++ b/third_party/aarch64/arm_neon.internal.h
@@ -3,6 +3,7 @@
 #define _AARCH64_NEON_H_
 #pragma GCC push_options
 #pragma GCC target ("+nothing+simd")
+#pragma GCC aarch64 "arm_neon.h"
 #pragma GCC diagnostic ignored "-Wmissing-braces"
 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
@@ -43,366 +44,6 @@ typedef float float32_t;
 typedef double float64_t;
 typedef __Bfloat16x4_t bfloat16x4_t;
 typedef __Bfloat16x8_t bfloat16x8_t;
-typedef struct bfloat16x4x2_t
-{
-  bfloat16x4_t val[2];
-} bfloat16x4x2_t;
-typedef struct bfloat16x8x2_t
-{
-  bfloat16x8_t val[2];
-} bfloat16x8x2_t;
-typedef struct bfloat16x4x3_t
-{
-  bfloat16x4_t val[3];
-} bfloat16x4x3_t;
-typedef struct bfloat16x8x3_t
-{
-  bfloat16x8_t val[3];
-} bfloat16x8x3_t;
-typedef struct bfloat16x4x4_t
-{
-  bfloat16x4_t val[4];
-} bfloat16x4x4_t;
-typedef struct bfloat16x8x4_t
-{
-  bfloat16x8_t val[4];
-} bfloat16x8x4_t;
-typedef struct int8x8x2_t
-{
-  int8x8_t val[2];
-} int8x8x2_t;
-typedef struct int8x16x2_t
-{
-  int8x16_t val[2];
-} int8x16x2_t;
-typedef struct int16x4x2_t
-{
-  int16x4_t val[2];
-} int16x4x2_t;
-typedef struct int16x8x2_t
-{
-  int16x8_t val[2];
-} int16x8x2_t;
-typedef struct int32x2x2_t
-{
-  int32x2_t val[2];
-} int32x2x2_t;
-typedef struct int32x4x2_t
-{
-  int32x4_t val[2];
-} int32x4x2_t;
-typedef struct int64x1x2_t
-{
-  int64x1_t val[2];
-} int64x1x2_t;
-typedef struct int64x2x2_t
-{
-  int64x2_t val[2];
-} int64x2x2_t;
-typedef struct uint8x8x2_t
-{
-  uint8x8_t val[2];
-} uint8x8x2_t;
-typedef struct uint8x16x2_t
-{
-  uint8x16_t val[2];
-} uint8x16x2_t;
-typedef struct uint16x4x2_t
-{
-  uint16x4_t val[2];
-} uint16x4x2_t;
-typedef struct uint16x8x2_t
-{
-  uint16x8_t val[2];
-} uint16x8x2_t;
-typedef struct uint32x2x2_t
-{
-  uint32x2_t val[2];
-} uint32x2x2_t;
-typedef struct uint32x4x2_t
-{
-  uint32x4_t val[2];
-} uint32x4x2_t;
-typedef struct uint64x1x2_t
-{
-  uint64x1_t val[2];
-} uint64x1x2_t;
-typedef struct uint64x2x2_t
-{
-  uint64x2_t val[2];
-} uint64x2x2_t;
-typedef struct float16x4x2_t
-{
-  float16x4_t val[2];
-} float16x4x2_t;
-typedef struct float16x8x2_t
-{
-  float16x8_t val[2];
-} float16x8x2_t;
-typedef struct float32x2x2_t
-{
-  float32x2_t val[2];
-} float32x2x2_t;
-typedef struct float32x4x2_t
-{
-  float32x4_t val[2];
-} float32x4x2_t;
-typedef struct float64x2x2_t
-{
-  float64x2_t val[2];
-} float64x2x2_t;
-typedef struct float64x1x2_t
-{
-  float64x1_t val[2];
-} float64x1x2_t;
-typedef struct poly8x8x2_t
-{
-  poly8x8_t val[2];
-} poly8x8x2_t;
-typedef struct poly8x16x2_t
-{
-  poly8x16_t val[2];
-} poly8x16x2_t;
-typedef struct poly16x4x2_t
-{
-  poly16x4_t val[2];
-} poly16x4x2_t;
-typedef struct poly16x8x2_t
-{
-  poly16x8_t val[2];
-} poly16x8x2_t;
-typedef struct poly64x1x2_t
-{
-  poly64x1_t val[2];
-} poly64x1x2_t;
-typedef struct poly64x1x3_t
-{
-  poly64x1_t val[3];
-} poly64x1x3_t;
-typedef struct poly64x1x4_t
-{
-  poly64x1_t val[4];
-} poly64x1x4_t;
-typedef struct poly64x2x2_t
-{
-  poly64x2_t val[2];
-} poly64x2x2_t;
-typedef struct poly64x2x3_t
-{
-  poly64x2_t val[3];
-} poly64x2x3_t;
-typedef struct poly64x2x4_t
-{
-  poly64x2_t val[4];
-} poly64x2x4_t;
-typedef struct int8x8x3_t
-{
-  int8x8_t val[3];
-} int8x8x3_t;
-typedef struct int8x16x3_t
-{
-  int8x16_t val[3];
-} int8x16x3_t;
-typedef struct int16x4x3_t
-{
-  int16x4_t val[3];
-} int16x4x3_t;
-typedef struct int16x8x3_t
-{
-  int16x8_t val[3];
-} int16x8x3_t;
-typedef struct int32x2x3_t
-{
-  int32x2_t val[3];
-} int32x2x3_t;
-typedef struct int32x4x3_t
-{
-  int32x4_t val[3];
-} int32x4x3_t;
-typedef struct int64x1x3_t
-{
-  int64x1_t val[3];
-} int64x1x3_t;
-typedef struct int64x2x3_t
-{
-  int64x2_t val[3];
-} int64x2x3_t;
-typedef struct uint8x8x3_t
-{
-  uint8x8_t val[3];
-} uint8x8x3_t;
-typedef struct uint8x16x3_t
-{
-  uint8x16_t val[3];
-} uint8x16x3_t;
-typedef struct uint16x4x3_t
-{
-  uint16x4_t val[3];
-} uint16x4x3_t;
-typedef struct uint16x8x3_t
-{
-  uint16x8_t val[3];
-} uint16x8x3_t;
-typedef struct uint32x2x3_t
-{
-  uint32x2_t val[3];
-} uint32x2x3_t;
-typedef struct uint32x4x3_t
-{
-  uint32x4_t val[3];
-} uint32x4x3_t;
-typedef struct uint64x1x3_t
-{
-  uint64x1_t val[3];
-} uint64x1x3_t;
-typedef struct uint64x2x3_t
-{
-  uint64x2_t val[3];
-} uint64x2x3_t;
-typedef struct float16x4x3_t
-{
-  float16x4_t val[3];
-} float16x4x3_t;
-typedef struct float16x8x3_t
-{
-  float16x8_t val[3];
-} float16x8x3_t;
-typedef struct float32x2x3_t
-{
-  float32x2_t val[3];
-} float32x2x3_t;
-typedef struct float32x4x3_t
-{
-  float32x4_t val[3];
-} float32x4x3_t;
-typedef struct float64x2x3_t
-{
-  float64x2_t val[3];
-} float64x2x3_t;
-typedef struct float64x1x3_t
-{
-  float64x1_t val[3];
-} float64x1x3_t;
-typedef struct poly8x8x3_t
-{
-  poly8x8_t val[3];
-} poly8x8x3_t;
-typedef struct poly8x16x3_t
-{
-  poly8x16_t val[3];
-} poly8x16x3_t;
-typedef struct poly16x4x3_t
-{
-  poly16x4_t val[3];
-} poly16x4x3_t;
-typedef struct poly16x8x3_t
-{
-  poly16x8_t val[3];
-} poly16x8x3_t;
-typedef struct int8x8x4_t
-{
-  int8x8_t val[4];
-} int8x8x4_t;
-typedef struct int8x16x4_t
-{
-  int8x16_t val[4];
-} int8x16x4_t;
-typedef struct int16x4x4_t
-{
-  int16x4_t val[4];
-} int16x4x4_t;
-typedef struct int16x8x4_t
-{
-  int16x8_t val[4];
-} int16x8x4_t;
-typedef struct int32x2x4_t
-{
-  int32x2_t val[4];
-} int32x2x4_t;
-typedef struct int32x4x4_t
-{
-  int32x4_t val[4];
-} int32x4x4_t;
-typedef struct int64x1x4_t
-{
-  int64x1_t val[4];
-} int64x1x4_t;
-typedef struct int64x2x4_t
-{
-  int64x2_t val[4];
-} int64x2x4_t;
-typedef struct uint8x8x4_t
-{
-  uint8x8_t val[4];
-} uint8x8x4_t;
-typedef struct uint8x16x4_t
-{
-  uint8x16_t val[4];
-} uint8x16x4_t;
-typedef struct uint16x4x4_t
-{
-  uint16x4_t val[4];
-} uint16x4x4_t;
-typedef struct uint16x8x4_t
-{
-  uint16x8_t val[4];
-} uint16x8x4_t;
-typedef struct uint32x2x4_t
-{
-  uint32x2_t val[4];
-} uint32x2x4_t;
-typedef struct uint32x4x4_t
-{
-  uint32x4_t val[4];
-} uint32x4x4_t;
-typedef struct uint64x1x4_t
-{
-  uint64x1_t val[4];
-} uint64x1x4_t;
-typedef struct uint64x2x4_t
-{
-  uint64x2_t val[4];
-} uint64x2x4_t;
-typedef struct float16x4x4_t
-{
-  float16x4_t val[4];
-} float16x4x4_t;
-typedef struct float16x8x4_t
-{
-  float16x8_t val[4];
-} float16x8x4_t;
-typedef struct float32x2x4_t
-{
-  float32x2_t val[4];
-} float32x2x4_t;
-typedef struct float32x4x4_t
-{
-  float32x4_t val[4];
-} float32x4x4_t;
-typedef struct float64x2x4_t
-{
-  float64x2_t val[4];
-} float64x2x4_t;
-typedef struct float64x1x4_t
-{
-  float64x1_t val[4];
-} float64x1x4_t;
-typedef struct poly8x8x4_t
-{
-  poly8x8_t val[4];
-} poly8x8x4_t;
-typedef struct poly8x16x4_t
-{
-  poly8x16_t val[4];
-} poly8x16x4_t;
-typedef struct poly16x4x4_t
-{
-  poly16x4_t val[4];
-} poly16x4x4_t;
-typedef struct poly16x8x4_t
-{
-  poly16x8_t val[4];
-} poly16x8x4_t;
 #define __aarch64_vdup_lane_any(__size, __q, __a, __b) vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b))
 #define __aarch64_vdup_lane_f16(__a, __b) __aarch64_vdup_lane_any (f16, , __a, __b)
 #define __aarch64_vdup_lane_f32(__a, __b) __aarch64_vdup_lane_any (f32, , __a, __b)
@@ -593,475 +234,433 @@ __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddl_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
+  return __builtin_aarch64_saddlv8qi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddl_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
+  return __builtin_aarch64_saddlv4hi (__a, __b);
 }
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddl_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
+  return __builtin_aarch64_saddlv2si (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
-         (int8x8_t) __b);
+  return __builtin_aarch64_uaddlv8qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
-         (int16x4_t) __b);
+  return __builtin_aarch64_uaddlv4hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
-         (int32x2_t) __b);
+  return __builtin_aarch64_uaddlv2si_uuu (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
+  return __builtin_aarch64_saddl2v16qi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
+  return __builtin_aarch64_saddl2v8hi (__a, __b);
 }
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
+  return __builtin_aarch64_saddl2v4si (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
-           (int8x16_t) __b);
+  return __builtin_aarch64_uaddl2v16qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
-          (int16x8_t) __b);
+  return __builtin_aarch64_uaddl2v8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
-          (int32x4_t) __b);
+  return __builtin_aarch64_uaddl2v4si_uuu (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddw_s8 (int16x8_t __a, int8x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
+  return __builtin_aarch64_saddwv8qi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddw_s16 (int32x4_t __a, int16x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
+  return __builtin_aarch64_saddwv4hi (__a, __b);
 }
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddw_s32 (int64x2_t __a, int32x2_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
+  return __builtin_aarch64_saddwv2si (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
-         (int8x8_t) __b);
+  return __builtin_aarch64_uaddwv8qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
-         (int16x4_t) __b);
+  return __builtin_aarch64_uaddwv4hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
-         (int32x2_t) __b);
+  return __builtin_aarch64_uaddwv2si_uuu (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
+  return __builtin_aarch64_saddw2v16qi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
+  return __builtin_aarch64_saddw2v8hi (__a, __b);
 }
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
+  return __builtin_aarch64_saddw2v4si (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
-           (int8x16_t) __b);
+  return __builtin_aarch64_uaddw2v16qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
-          (int16x8_t) __b);
+  return __builtin_aarch64_uaddw2v8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
-          (int32x4_t) __b);
+  return __builtin_aarch64_uaddw2v4si_uuu (__a, __b);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhadd_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
+  return __builtin_aarch64_shaddv8qi (__a, __b);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhadd_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
+  return __builtin_aarch64_shaddv4hi (__a, __b);
 }
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhadd_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
+  return __builtin_aarch64_shaddv2si (__a, __b);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
-        (int8x8_t) __b);
+  return __builtin_aarch64_uhaddv8qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
-         (int16x4_t) __b);
+  return __builtin_aarch64_uhaddv4hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
-         (int32x2_t) __b);
+  return __builtin_aarch64_uhaddv2si_uuu (__a, __b);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
+  return __builtin_aarch64_shaddv16qi (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
+  return __builtin_aarch64_shaddv8hi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
+  return __builtin_aarch64_shaddv4si (__a, __b);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
-          (int8x16_t) __b);
+  return __builtin_aarch64_uhaddv16qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
-         (int16x8_t) __b);
+  return __builtin_aarch64_uhaddv8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
-         (int32x4_t) __b);
+  return __builtin_aarch64_uhaddv4si_uuu (__a, __b);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
+  return __builtin_aarch64_srhaddv8qi (__a, __b);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
+  return __builtin_aarch64_srhaddv4hi (__a, __b);
 }
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
+  return __builtin_aarch64_srhaddv2si (__a, __b);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
-         (int8x8_t) __b);
+  return __builtin_aarch64_urhaddv8qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
-          (int16x4_t) __b);
+  return __builtin_aarch64_urhaddv4hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
-          (int32x2_t) __b);
+  return __builtin_aarch64_urhaddv2si_uuu (__a, __b);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
+  return __builtin_aarch64_srhaddv16qi (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
+  return __builtin_aarch64_srhaddv8hi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
+  return __builtin_aarch64_srhaddv4si (__a, __b);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
-           (int8x16_t) __b);
+  return __builtin_aarch64_urhaddv16qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
-          (int16x8_t) __b);
+  return __builtin_aarch64_urhaddv8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
-          (int32x4_t) __b);
+  return __builtin_aarch64_urhaddv4si_uuu (__a, __b);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
+  return __builtin_aarch64_addhnv8hi (__a, __b);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
+  return __builtin_aarch64_addhnv4si (__a, __b);
 }
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
+  return __builtin_aarch64_addhnv2di (__a, __b);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
-        (int16x8_t) __b);
+  return __builtin_aarch64_addhnv8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
-         (int32x4_t) __b);
+  return __builtin_aarch64_addhnv4si_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
-         (int64x2_t) __b);
+  return __builtin_aarch64_addhnv2di_uuu (__a, __b);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
+  return __builtin_aarch64_raddhnv8hi (__a, __b);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
+  return __builtin_aarch64_raddhnv4si (__a, __b);
 }
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
+  return __builtin_aarch64_raddhnv2di (__a, __b);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
-         (int16x8_t) __b);
+  return __builtin_aarch64_raddhnv8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
-          (int32x4_t) __b);
+  return __builtin_aarch64_raddhnv4si_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
-          (int64x2_t) __b);
+  return __builtin_aarch64_raddhnv2di_uuu (__a, __b);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
 {
-  return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
+  return __builtin_aarch64_addhn2v8hi (__a, __b, __c);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
 {
-  return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
+  return __builtin_aarch64_addhn2v4si (__a, __b, __c);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
 {
-  return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
+  return __builtin_aarch64_addhn2v2di (__a, __b, __c);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
-          (int16x8_t) __b,
-          (int16x8_t) __c);
+  return __builtin_aarch64_addhn2v8hi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
-          (int32x4_t) __b,
-          (int32x4_t) __c);
+  return __builtin_aarch64_addhn2v4si_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
 {
-  return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
-          (int64x2_t) __b,
-          (int64x2_t) __c);
+  return __builtin_aarch64_addhn2v2di_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
 {
-  return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
+  return __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
 {
-  return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
+  return __builtin_aarch64_raddhn2v4si (__a, __b, __c);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
 {
-  return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
+  return __builtin_aarch64_raddhn2v2di (__a, __b, __c);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
-           (int16x8_t) __b,
-           (int16x8_t) __c);
+  return __builtin_aarch64_raddhn2v8hi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
-           (int32x4_t) __b,
-           (int32x4_t) __c);
+  return __builtin_aarch64_raddhn2v4si_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
 {
-  return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
-           (int64x2_t) __b,
-           (int64x2_t) __c);
+  return __builtin_aarch64_raddhn2v2di_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -1139,8 +738,7 @@ __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
-       (int8x8_t) __b);
+  return __builtin_aarch64_pmulv8qi_ppp (__a, __b);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -1194,8 +792,7 @@ __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
-         (int8x16_t) __b);
+  return __builtin_aarch64_pmulv16qi_ppp (__a, __b);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -1801,157 +1398,145 @@ __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubl_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
+  return __builtin_aarch64_ssublv8qi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubl_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
+  return __builtin_aarch64_ssublv4hi (__a, __b);
 }
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubl_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
+  return __builtin_aarch64_ssublv2si (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
-         (int8x8_t) __b);
+  return __builtin_aarch64_usublv8qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
-         (int16x4_t) __b);
+  return __builtin_aarch64_usublv4hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
-         (int32x2_t) __b);
+  return __builtin_aarch64_usublv2si_uuu (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
+  return __builtin_aarch64_ssubl2v16qi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
+  return __builtin_aarch64_ssubl2v8hi (__a, __b);
 }
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
+  return __builtin_aarch64_ssubl2v4si (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
-           (int8x16_t) __b);
+  return __builtin_aarch64_usubl2v16qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
-          (int16x8_t) __b);
+  return __builtin_aarch64_usubl2v8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
-          (int32x4_t) __b);
+  return __builtin_aarch64_usubl2v4si_uuu (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubw_s8 (int16x8_t __a, int8x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
+  return __builtin_aarch64_ssubwv8qi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubw_s16 (int32x4_t __a, int16x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
+  return __builtin_aarch64_ssubwv4hi (__a, __b);
 }
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubw_s32 (int64x2_t __a, int32x2_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
+  return __builtin_aarch64_ssubwv2si (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
-         (int8x8_t) __b);
+  return __builtin_aarch64_usubwv8qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
-         (int16x4_t) __b);
+  return __builtin_aarch64_usubwv4hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
-         (int32x2_t) __b);
+  return __builtin_aarch64_usubwv2si_uuu (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
+  return __builtin_aarch64_ssubw2v16qi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
+  return __builtin_aarch64_ssubw2v8hi (__a, __b);
 }
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
+  return __builtin_aarch64_ssubw2v4si (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
-           (int8x16_t) __b);
+  return __builtin_aarch64_usubw2v16qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
-          (int16x8_t) __b);
+  return __builtin_aarch64_usubw2v8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
-          (int32x4_t) __b);
+  return __builtin_aarch64_usubw2v4si_uuu (__a, __b);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -1987,241 +1572,217 @@ __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhsub_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t)__builtin_aarch64_shsubv8qi (__a, __b);
+  return __builtin_aarch64_shsubv8qi (__a, __b);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhsub_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_shsubv4hi (__a, __b);
+  return __builtin_aarch64_shsubv4hi (__a, __b);
 }
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhsub_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_shsubv2si (__a, __b);
+  return __builtin_aarch64_shsubv2si (__a, __b);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhsub_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_uhsubv8qi ((int8x8_t) __a,
-        (int8x8_t) __b);
+  return __builtin_aarch64_uhsubv8qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhsub_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_uhsubv4hi ((int16x4_t) __a,
-         (int16x4_t) __b);
+  return __builtin_aarch64_uhsubv4hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhsub_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_uhsubv2si ((int32x2_t) __a,
-         (int32x2_t) __b);
+  return __builtin_aarch64_uhsubv2si_uuu (__a, __b);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhsubq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t) __builtin_aarch64_shsubv16qi (__a, __b);
+  return __builtin_aarch64_shsubv16qi (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhsubq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_shsubv8hi (__a, __b);
+  return __builtin_aarch64_shsubv8hi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhsubq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_shsubv4si (__a, __b);
+  return __builtin_aarch64_shsubv4si (__a, __b);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhsubq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_uhsubv16qi ((int8x16_t) __a,
-          (int8x16_t) __b);
+  return __builtin_aarch64_uhsubv16qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhsubq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_uhsubv8hi ((int16x8_t) __a,
-         (int16x8_t) __b);
+  return __builtin_aarch64_uhsubv8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vhsubq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_uhsubv4si ((int32x4_t) __a,
-         (int32x4_t) __b);
+  return __builtin_aarch64_uhsubv4si_uuu (__a, __b);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubhn_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_subhnv8hi (__a, __b);
+  return __builtin_aarch64_subhnv8hi (__a, __b);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubhn_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_subhnv4si (__a, __b);
+  return __builtin_aarch64_subhnv4si (__a, __b);
 }
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubhn_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_subhnv2di (__a, __b);
+  return __builtin_aarch64_subhnv2di (__a, __b);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_subhnv8hi ((int16x8_t) __a,
-        (int16x8_t) __b);
+  return __builtin_aarch64_subhnv8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_subhnv4si ((int32x4_t) __a,
-         (int32x4_t) __b);
+  return __builtin_aarch64_subhnv4si_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_subhnv2di ((int64x2_t) __a,
-         (int64x2_t) __b);
+  return __builtin_aarch64_subhnv2di_uuu (__a, __b);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsubhn_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_rsubhnv8hi (__a, __b);
+  return __builtin_aarch64_rsubhnv8hi (__a, __b);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsubhn_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_rsubhnv4si (__a, __b);
+  return __builtin_aarch64_rsubhnv4si (__a, __b);
 }
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsubhn_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_rsubhnv2di (__a, __b);
+  return __builtin_aarch64_rsubhnv2di (__a, __b);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_rsubhnv8hi ((int16x8_t) __a,
-         (int16x8_t) __b);
+  return __builtin_aarch64_rsubhnv8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_rsubhnv4si ((int32x4_t) __a,
-          (int32x4_t) __b);
+  return __builtin_aarch64_rsubhnv4si_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_rsubhnv2di ((int64x2_t) __a,
-          (int64x2_t) __b);
+  return __builtin_aarch64_rsubhnv2di_uuu (__a, __b);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
 {
-  return (int8x16_t) __builtin_aarch64_rsubhn2v8hi (__a, __b, __c);
+  return __builtin_aarch64_rsubhn2v8hi (__a, __b, __c);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
 {
-  return (int16x8_t) __builtin_aarch64_rsubhn2v4si (__a, __b, __c);
+  return __builtin_aarch64_rsubhn2v4si (__a, __b, __c);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
 {
-  return (int32x4_t) __builtin_aarch64_rsubhn2v2di (__a, __b, __c);
+  return __builtin_aarch64_rsubhn2v2di (__a, __b, __c);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  return (uint8x16_t) __builtin_aarch64_rsubhn2v8hi ((int8x8_t) __a,
-           (int16x8_t) __b,
-           (int16x8_t) __c);
+  return __builtin_aarch64_rsubhn2v8hi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  return (uint16x8_t) __builtin_aarch64_rsubhn2v4si ((int16x4_t) __a,
-           (int32x4_t) __b,
-           (int32x4_t) __c);
+  return __builtin_aarch64_rsubhn2v4si_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
 {
-  return (uint32x4_t) __builtin_aarch64_rsubhn2v2di ((int32x2_t) __a,
-           (int64x2_t) __b,
-           (int64x2_t) __c);
+  return __builtin_aarch64_rsubhn2v2di_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
 {
-  return (int8x16_t) __builtin_aarch64_subhn2v8hi (__a, __b, __c);
+  return __builtin_aarch64_subhn2v8hi (__a, __b, __c);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
 {
-  return (int16x8_t) __builtin_aarch64_subhn2v4si (__a, __b, __c);;
+  return __builtin_aarch64_subhn2v4si (__a, __b, __c);;
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
 {
-  return (int32x4_t) __builtin_aarch64_subhn2v2di (__a, __b, __c);
+  return __builtin_aarch64_subhn2v2di (__a, __b, __c);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  return (uint8x16_t) __builtin_aarch64_subhn2v8hi ((int8x8_t) __a,
-          (int16x8_t) __b,
-          (int16x8_t) __c);
+  return __builtin_aarch64_subhn2v8hi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  return (uint16x8_t) __builtin_aarch64_subhn2v4si ((int16x4_t) __a,
-          (int32x4_t) __b,
-          (int32x4_t) __c);
+  return __builtin_aarch64_subhn2v4si_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
 {
-  return (uint32x4_t) __builtin_aarch64_subhn2v2di ((int32x2_t) __a,
-          (int64x2_t) __b,
-          (int64x2_t) __c);
+  return __builtin_aarch64_subhn2v2di_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -5461,19 +5022,19 @@ __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
+  return __builtin_aarch64_combinev8qi (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
+  return __builtin_aarch64_combinev4hi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
+  return __builtin_aarch64_combinev2si (__a, __b);
 }
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -5491,34 +5052,31 @@ __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
+  return __builtin_aarch64_combinev2sf (__a, __b);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
-           (int8x8_t) __b);
+  return __builtin_aarch64_combinev8qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
-           (int16x4_t) __b);
+  return __builtin_aarch64_combinev4hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
-           (int32x2_t) __b);
+  return __builtin_aarch64_combinev2si_uuu (__a, __b);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]);
+  return __builtin_aarch64_combinedi_uuu (__a[0], __b[0]);
 }
 __extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -5530,21 +5088,19 @@ __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
-           (int8x8_t) __b);
+  return __builtin_aarch64_combinev8qi_ppp (__a, __b);
 }
 __extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
-           (int16x4_t) __b);
+  return __builtin_aarch64_combinev4hi_ppp (__a, __b);
 }
 __extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_p64 (poly64x1_t __a, poly64x1_t __b)
 {
-  return (poly64x2_t) __builtin_aarch64_combinedi_ppp (__a[0], __b[0]);
+  return __builtin_aarch64_combinedi_ppp (__a[0], __b[0]);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -5898,46 +5454,25 @@ __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtx_f32_f64 (float64x2_t __a)
 {
-  float32x2_t __result;
-  __asm__ ("fcvtxn %0.2s,%1.2d"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_float_trunc_rodd_lo_v2sf (__a);
 }
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtx_high_f32_f64 (float32x2_t __a, float64x2_t __b)
 {
-  float32x4_t __result;
-  __asm__ ("fcvtxn2 %0.4s,%1.2d"
-           : "=w"(__result)
-           : "w" (__b), "0"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_float_trunc_rodd_hi_v4sf (__a, __b);
 }
 __extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvtxd_f32_f64 (float64_t __a)
 {
-  float32_t __result;
-  __asm__ ("fcvtxn %s0,%d1"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_float_trunc_rodd_df (__a);
 }
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
 {
-  float32x2_t __result;
-  float32x2_t __t1;
-  __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
-           : "=w"(__result), "=w"(__t1)
-           : "0"(__a), "w"(__b), "w"(__c)
-           : );
-  return __result;
+  return __builtin_aarch64_float_mla_nv2sf (__a, __b, __c);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -5955,17 +5490,13 @@ __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c)
 {
-  return (uint16x4_t) __builtin_aarch64_mla_nv4hi ((int16x4_t) __a,
-                                                   (int16x4_t) __b,
-                                                   (int16_t) __c);
+  return __builtin_aarch64_mla_nv4hi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c)
 {
-  return (uint32x2_t) __builtin_aarch64_mla_nv2si ((int32x2_t) __a,
-                                                   (int32x2_t) __b,
-                                                   (int32_t) __c);
+  return __builtin_aarch64_mla_nv2si_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -5989,25 +5520,19 @@ __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
 {
-  return (uint8x8_t) __builtin_aarch64_mlav8qi ((int8x8_t) __a,
-                                                (int8x8_t) __b,
-                                                (int8x8_t) __c);
+  return __builtin_aarch64_mlav8qi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
 {
-  return (uint16x4_t) __builtin_aarch64_mlav4hi ((int16x4_t) __a,
-                                                 (int16x4_t) __b,
-                                                 (int16x4_t) __c);
+  return __builtin_aarch64_mlav4hi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
 {
-  return (uint32x2_t) __builtin_aarch64_mlav2si ((int32x2_t) __a,
-                                                 (int32x2_t) __b,
-                                                 (int32x2_t) __c);
+  return __builtin_aarch64_mlav2si_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -6237,13 +5762,7 @@ __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
 {
-  float32x4_t __result;
-  float32x4_t __t1;
-  __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
-           : "=w"(__result), "=w"(__t1)
-           : "0"(__a), "w"(__b), "w"(__c)
-           : );
-  return __result;
+  return __builtin_aarch64_float_mla_nv4sf (__a, __b, __c);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -6261,17 +5780,13 @@ __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
 {
-  return (uint16x8_t) __builtin_aarch64_mla_nv8hi ((int16x8_t) __a,
-                                                   (int16x8_t) __b,
-                                                   (int16_t) __c);
+  return __builtin_aarch64_mla_nv8hi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
 {
-  return (uint32x4_t) __builtin_aarch64_mla_nv4si ((int32x4_t) __a,
-                                                   (int32x4_t) __b,
-                                                   (int32_t) __c);
+  return __builtin_aarch64_mla_nv4si_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -6295,37 +5810,25 @@ __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
 {
-  return (uint8x16_t) __builtin_aarch64_mlav16qi ((int8x16_t) __a,
-                                                  (int8x16_t) __b,
-                                                  (int8x16_t) __c);
+  return __builtin_aarch64_mlav16qi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  return (uint16x8_t) __builtin_aarch64_mlav8hi ((int16x8_t) __a,
-                                                 (int16x8_t) __b,
-                                                 (int16x8_t) __c);
+  return __builtin_aarch64_mlav8hi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  return (uint32x4_t) __builtin_aarch64_mlav4si ((int32x4_t) __a,
-                                                 (int32x4_t) __b,
-                                                 (int32x4_t) __c);
+  return __builtin_aarch64_mlav4si_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
 {
-  float32x2_t __result;
-  float32x2_t __t1;
-  __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
-           : "=w"(__result), "=w"(__t1)
-           : "0"(__a), "w"(__b), "w"(__c)
-           : );
-  return __result;
+  return __builtin_aarch64_float_mls_nv2sf (__a, __b, __c);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -6343,17 +5846,13 @@ __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c)
 {
-  return (uint16x4_t) __builtin_aarch64_mls_nv4hi ((int16x4_t) __a,
-                                                   (int16x4_t) __b,
-                                                   (int16_t) __c);
+  return __builtin_aarch64_mls_nv4hi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c)
 {
-  return (uint32x2_t) __builtin_aarch64_mls_nv2si ((int32x2_t) __a,
-                                                   (int32x2_t) __b,
-                                                   (int32_t) __c);
+  return __builtin_aarch64_mls_nv2si_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -6377,25 +5876,19 @@ __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
 {
-  return (uint8x8_t) __builtin_aarch64_mlsv8qi ((int8x8_t) __a,
-                                                (int8x8_t) __b,
-                                                (int8x8_t) __c);
+  return __builtin_aarch64_mlsv8qi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
 {
-  return (uint16x4_t) __builtin_aarch64_mlsv4hi ((int16x4_t) __a,
-                                                 (int16x4_t) __b,
-                                                 (int16x4_t) __c);
+  return __builtin_aarch64_mlsv4hi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
 {
-  return (uint32x2_t) __builtin_aarch64_mlsv2si ((int32x2_t) __a,
-                                                 (int32x2_t) __b,
-                                                 (int32x2_t) __c);
+  return __builtin_aarch64_mlsv2si_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -6629,13 +6122,7 @@ __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
 {
-  float32x4_t __result;
-  float32x4_t __t1;
-  __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
-           : "=w"(__result), "=w"(__t1)
-           : "0"(__a), "w"(__b), "w"(__c)
-           : );
-  return __result;
+  return __builtin_aarch64_float_mls_nv4sf (__a, __b, __c);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -6653,17 +6140,13 @@ __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
 {
-  return (uint16x8_t) __builtin_aarch64_mls_nv8hi ((int16x8_t) __a,
-                                                   (int16x8_t) __b,
-                                                   (int16_t) __c);
+  return __builtin_aarch64_mls_nv8hi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
 {
-  return (uint32x4_t) __builtin_aarch64_mls_nv4si ((int32x4_t) __a,
-                                                   (int32x4_t) __b,
-                                                   (int32_t) __c);
+  return __builtin_aarch64_mls_nv4si_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -6687,25 +6170,19 @@ __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
 {
-  return (uint8x16_t) __builtin_aarch64_mlsv16qi ((int8x16_t) __a,
-                                                  (int8x16_t) __b,
-                                                  (int8x16_t) __c);
+  return __builtin_aarch64_mlsv16qi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
 {
-  return (uint16x8_t) __builtin_aarch64_mlsv8hi ((int16x8_t) __a,
-                                                 (int16x8_t) __b,
-                                                 (int16x8_t) __c);
+  return __builtin_aarch64_mlsv8hi_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
 {
-  return (uint32x4_t) __builtin_aarch64_mlsv4si ((int32x4_t) __a,
-                                                 (int32x4_t) __b,
-                                                 (int32x4_t) __c);
+  return __builtin_aarch64_mlsv4si_uuuu (__a, __b, __c);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -6801,22 +6278,19 @@ __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmovn_high_u16 (uint8x8_t __a, uint16x8_t __b)
 {
-  return (uint8x16_t)
-    __builtin_aarch64_xtn2v8hi ((int8x8_t) __a, (int16x8_t) __b);
+  return __builtin_aarch64_xtn2v8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmovn_high_u32 (uint16x4_t __a, uint32x4_t __b)
 {
-  return (uint16x8_t)
-    __builtin_aarch64_xtn2v4si ((int16x4_t) __a, (int32x4_t) __b);
+  return __builtin_aarch64_xtn2v4si_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmovn_high_u64 (uint32x2_t __a, uint64x2_t __b)
 {
-  return (uint32x4_t)
-    __builtin_aarch64_xtn2v2di ((int32x2_t) __a, (int64x2_t) __b);
+  return __builtin_aarch64_xtn2v2di_uuu (__a, __b);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -6840,19 +6314,19 @@ __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmovn_u16 (uint16x8_t __a)
 {
-  return (uint8x8_t)__builtin_aarch64_xtnv8hi ((int16x8_t) __a);
+  return __builtin_aarch64_xtnv8hi_uu (__a);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmovn_u32 (uint32x4_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_xtnv4si ((int32x4_t )__a);
+  return __builtin_aarch64_xtnv4si_uu (__a);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmovn_u64 (uint64x2_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_xtnv2di ((int64x2_t) __a);
+  return __builtin_aarch64_xtnv2di_uu (__a);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -6876,19 +6350,19 @@ __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_n_u16 (uint16x8_t __a, const int __b)
 {
-  return (uint8x8_t)__builtin_aarch64_shrnv8hi ((int16x8_t)__a, __b);
+  return __builtin_aarch64_shrnv8hi_uus (__a, __b);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_n_u32 (uint32x4_t __a, const int __b)
 {
-  return (uint16x4_t)__builtin_aarch64_shrnv4si ((int32x4_t)__a, __b);
+  return __builtin_aarch64_shrnv4si_uus (__a, __b);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_n_u64 (uint64x2_t __a, const int __b)
 {
-  return (uint32x2_t)__builtin_aarch64_shrnv2di ((int64x2_t)__a, __b);
+  return __builtin_aarch64_shrnv2di_uus (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -6966,12 +6440,7 @@ __extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmull_high_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  poly16x8_t __result;
-  __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_pmull_hiv16qi_ppp (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -7085,12 +6554,7 @@ __extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmull_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  poly16x8_t __result;
-  __asm__ ("pmull %0.8h, %1.8b, %2.8b"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_pmullv8qi_ppp (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -7144,12 +6608,7 @@ __extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpadal_s32 (int64x1_t __a, int32x2_t __b)
 {
-  int64x1_t __result;
-  __asm__ ("sadalp %0.1d,%2.2s"
-           : "=w"(__result)
-           : "0"(__a), "w"(__b)
-           : );
-  return __result;
+  return (int64x1_t) __builtin_aarch64_sadalpv2si (__a[0], __b);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -7167,12 +6626,7 @@ __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpadal_u32 (uint64x1_t __a, uint32x2_t __b)
 {
-  uint64x1_t __result;
-  __asm__ ("uadalp %0.1d,%2.2s"
-           : "=w"(__result)
-           : "0"(__a), "w"(__b)
-           : );
-  return __result;
+  return (uint64x1_t) __builtin_aarch64_uadalpv2si_uuu (__a[0], __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -7214,265 +6668,145 @@ __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddl_s8 (int8x8_t __a)
 {
-  int16x4_t __result;
-  __asm__ ("saddlp %0.4h,%1.8b"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_saddlpv8qi (__a);
 }
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddl_s16 (int16x4_t __a)
 {
-  int32x2_t __result;
-  __asm__ ("saddlp %0.2s,%1.4h"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_saddlpv4hi (__a);
 }
 __extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddl_s32 (int32x2_t __a)
 {
-  int64x1_t __result;
-  __asm__ ("saddlp %0.1d,%1.2s"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return (int64x1_t) __builtin_aarch64_saddlpv2si (__a);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddl_u8 (uint8x8_t __a)
 {
-  uint16x4_t __result;
-  __asm__ ("uaddlp %0.4h,%1.8b"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_uaddlpv8qi_uu (__a);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddl_u16 (uint16x4_t __a)
 {
-  uint32x2_t __result;
-  __asm__ ("uaddlp %0.2s,%1.4h"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_uaddlpv4hi_uu (__a);
 }
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddl_u32 (uint32x2_t __a)
 {
-  uint64x1_t __result;
-  __asm__ ("uaddlp %0.1d,%1.2s"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return (uint64x1_t) __builtin_aarch64_uaddlpv2si_uu (__a);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddlq_s8 (int8x16_t __a)
 {
-  int16x8_t __result;
-  __asm__ ("saddlp %0.8h,%1.16b"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_saddlpv16qi (__a);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddlq_s16 (int16x8_t __a)
 {
-  int32x4_t __result;
-  __asm__ ("saddlp %0.4s,%1.8h"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_saddlpv8hi (__a);
 }
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddlq_s32 (int32x4_t __a)
 {
-  int64x2_t __result;
-  __asm__ ("saddlp %0.2d,%1.4s"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_saddlpv4si (__a);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddlq_u8 (uint8x16_t __a)
 {
-  uint16x8_t __result;
-  __asm__ ("uaddlp %0.8h,%1.16b"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_uaddlpv16qi_uu (__a);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddlq_u16 (uint16x8_t __a)
 {
-  uint32x4_t __result;
-  __asm__ ("uaddlp %0.4s,%1.8h"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_uaddlpv8hi_uu (__a);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddlq_u32 (uint32x4_t __a)
 {
-  uint64x2_t __result;
-  __asm__ ("uaddlp %0.2d,%1.4s"
-           : "=w"(__result)
-           : "w"(__a)
-           : );
-  return __result;
+  return __builtin_aarch64_uaddlpv4si_uu (__a);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  int8x16_t __result;
-  __asm__ ("addp %0.16b,%1.16b,%2.16b"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_addpv16qi (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  int16x8_t __result;
-  __asm__ ("addp %0.8h,%1.8h,%2.8h"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_addpv8hi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  int32x4_t __result;
-  __asm__ ("addp %0.4s,%1.4s,%2.4s"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_addpv4si (__a, __b);
 }
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  int64x2_t __result;
-  __asm__ ("addp %0.2d,%1.2d,%2.2d"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_addpv2di (__a, __b);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  uint8x16_t __result;
-  __asm__ ("addp %0.16b,%1.16b,%2.16b"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_addpv16qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  uint16x8_t __result;
-  __asm__ ("addp %0.8h,%1.8h,%2.8h"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_addpv8hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  uint32x4_t __result;
-  __asm__ ("addp %0.4s,%1.4s,%2.4s"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_addpv4si_uuu (__a, __b);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  uint64x2_t __result;
-  __asm__ ("addp %0.2d,%1.2d,%2.2d"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_addpv2di_uuu (__a, __b);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqdmulh_n_s16 (int16x4_t __a, int16_t __b)
 {
-  int16x4_t __result;
-  __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
-           : "=w"(__result)
-           : "w"(__a), "x"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_sqdmulh_nv4hi (__a, __b);
 }
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqdmulh_n_s32 (int32x2_t __a, int32_t __b)
 {
-  int32x2_t __result;
-  __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_sqdmulh_nv2si (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqdmulhq_n_s16 (int16x8_t __a, int16_t __b)
 {
-  int16x8_t __result;
-  __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
-           : "=w"(__result)
-           : "w"(__a), "x"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_sqdmulh_nv8hi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqdmulhq_n_s32 (int32x4_t __a, int32_t __b)
 {
-  int32x4_t __result;
-  __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_sqdmulh_nv4si (__a, __b);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -7532,45 +6866,25 @@ __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqrdmulh_n_s16 (int16x4_t __a, int16_t __b)
 {
-  int16x4_t __result;
-  __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
-           : "=w"(__result)
-           : "w"(__a), "x"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_sqrdmulh_nv4hi (__a, __b);
 }
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqrdmulh_n_s32 (int32x2_t __a, int32_t __b)
 {
-  int32x2_t __result;
-  __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_sqrdmulh_nv2si (__a, __b);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b)
 {
-  int16x8_t __result;
-  __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
-           : "=w"(__result)
-           : "w"(__a), "x"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_sqrdmulh_nv8hi (__a, __b);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b)
 {
-  int32x4_t __result;
-  __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_sqrdmulh_nv4si (__a, __b);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -7702,22 +7016,19 @@ __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c)
 {
-  return (uint8x16_t) __builtin_aarch64_rshrn2v8hi ((int8x8_t) __a,
-          (int16x8_t) __b, __c);
+  return __builtin_aarch64_rshrn2v8hi_uuus (__a, __b, __c);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c)
 {
-  return (uint16x8_t) __builtin_aarch64_rshrn2v4si ((int16x4_t) __a,
-          (int32x4_t) __b, __c);
+  return __builtin_aarch64_rshrn2v4si_uuus (__a, __b, __c);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c)
 {
-  return (uint32x4_t) __builtin_aarch64_rshrn2v2di ((int32x2_t)__a,
-          (int64x2_t)__b, __c);
+  return __builtin_aarch64_rshrn2v2di_uuus (__a, __b, __c);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -7741,19 +7052,19 @@ __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_n_u16 (uint16x8_t __a, const int __b)
 {
-  return (uint8x8_t) __builtin_aarch64_rshrnv8hi ((int16x8_t) __a, __b);
+  return __builtin_aarch64_rshrnv8hi_uus (__a, __b);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_n_u32 (uint32x4_t __a, const int __b)
 {
-  return (uint16x4_t) __builtin_aarch64_rshrnv4si ((int32x4_t) __a, __b);
+  return __builtin_aarch64_rshrnv4si_uus (__a, __b);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_n_u64 (uint64x2_t __a, const int __b)
 {
-  return (uint32x2_t) __builtin_aarch64_rshrnv2di ((int64x2_t) __a, __b);
+  return __builtin_aarch64_rshrnv2di_uus (__a, __b);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -7789,33 +7100,80 @@ __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c)
 {
-  return (uint8x16_t)
-    __builtin_aarch64_shrn2v8hi ((int8x8_t) __a, (int16x8_t) __b, __c);
+  return __builtin_aarch64_shrn2v8hi_uuus (__a, __b, __c);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c)
 {
-  return (uint16x8_t)
-    __builtin_aarch64_shrn2v4si ((int16x4_t) __a, (int32x4_t) __b, __c);
+  return __builtin_aarch64_shrn2v4si_uuus (__a, __b, __c);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c)
 {
-  return (uint32x4_t)
-    __builtin_aarch64_shrn2v2di ((int32x2_t) __a, (int64x2_t) __b, __c);
+  return __builtin_aarch64_shrn2v2di_uuus (__a, __b, __c);
+}
+__extension__ extern __inline poly8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsli_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c)
+{
+  return __builtin_aarch64_ssli_nv8qi_ppps (__a, __b, __c);
+}
+__extension__ extern __inline poly16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ssli_nv4hi_ppps (__a, __b, __c);
+}
+__extension__ extern __inline poly8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsliq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c)
+{
+  return __builtin_aarch64_ssli_nv16qi_ppps (__a, __b, __c);
+}
+__extension__ extern __inline poly16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_ssli_nv8hi_ppps (__a, __b, __c);
+}
+__extension__ extern __inline poly8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c)
+{
+  return __builtin_aarch64_ssri_nv8qi_ppps (__a, __b, __c);
+}
+__extension__ extern __inline poly16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ssri_nv4hi_ppps (__a, __b, __c);
+}
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsri_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
+{
+  return (poly64x1_t) __builtin_aarch64_ssri_ndi_ppps (__a[0], __b[0], __c);
+}
+__extension__ extern __inline poly8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c)
+{
+  return __builtin_aarch64_ssri_nv16qi_ppps (__a, __b, __c);
+}
+__extension__ extern __inline poly16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_ssri_nv8hi_ppps (__a, __b, __c);
+}
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsriq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ssri_nv2di_ppps (__a, __b, __c);
 }
-#define vsli_n_p8(a, b, c) __extension__ ({ poly8x8_t b_ = (b); poly8x8_t a_ = (a); poly8x8_t result; __asm__ ("sli %0.8b,%2.8b,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; })
-#define vsli_n_p16(a, b, c) __extension__ ({ poly16x4_t b_ = (b); poly16x4_t a_ = (a); poly16x4_t result; __asm__ ("sli %0.4h,%2.4h,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; })
-#define vsliq_n_p8(a, b, c) __extension__ ({ poly8x16_t b_ = (b); poly8x16_t a_ = (a); poly8x16_t result; __asm__ ("sli %0.16b,%2.16b,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; })
-#define vsliq_n_p16(a, b, c) __extension__ ({ poly16x8_t b_ = (b); poly16x8_t a_ = (a); poly16x8_t result; __asm__ ("sli %0.8h,%2.8h,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; })
-#define vsri_n_p8(a, b, c) __extension__ ({ poly8x8_t b_ = (b); poly8x8_t a_ = (a); poly8x8_t result; __asm__ ("sri %0.8b,%2.8b,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; })
-#define vsri_n_p16(a, b, c) __extension__ ({ poly16x4_t b_ = (b); poly16x4_t a_ = (a); poly16x4_t result; __asm__ ("sri %0.4h,%2.4h,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; })
-#define vsri_n_p64(a, b, c) __extension__ ({ poly64x1_t b_ = (b); poly64x1_t a_ = (a); poly64x1_t result; __asm__ ("sri %d0,%d2,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; })
-#define vsriq_n_p8(a, b, c) __extension__ ({ poly8x16_t b_ = (b); poly8x16_t a_ = (a); poly8x16_t result; __asm__ ("sri %0.16b,%2.16b,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; })
-#define vsriq_n_p16(a, b, c) __extension__ ({ poly16x8_t b_ = (b); poly16x8_t a_ = (a); poly16x8_t result; __asm__ ("sri %0.8h,%2.8h,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; })
-#define vsriq_n_p64(a, b, c) __extension__ ({ poly64x2_t b_ = (b); poly64x2_t a_ = (a); poly64x2_t result; __asm__ ("sri %0.2d,%2.2d,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; })
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtst_p8 (poly8x8_t __a, poly8x8_t __b)
@@ -7857,166 +7215,594 @@ vtstq_p64 (poly64x2_t __a, poly64x2_t __b)
   return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))
          != __AARCH64_INT64_C (0));
 }
-#define __STRUCTN(t, sz, nelem) typedef struct t ## sz ## x ## nelem ## _t { t ## sz ## _t val[nelem]; } t ## sz ## x ## nelem ## _t;
-__STRUCTN (int, 8, 2)
-__STRUCTN (int, 16, 2)
-__STRUCTN (uint, 8, 2)
-__STRUCTN (uint, 16, 2)
-__STRUCTN (float, 16, 2)
-__STRUCTN (poly, 8, 2)
-__STRUCTN (poly, 16, 2)
-__STRUCTN (int, 8, 3)
-__STRUCTN (int, 16, 3)
-__STRUCTN (int, 32, 3)
-__STRUCTN (int, 64, 3)
-__STRUCTN (uint, 8, 3)
-__STRUCTN (uint, 16, 3)
-__STRUCTN (uint, 32, 3)
-__STRUCTN (uint, 64, 3)
-__STRUCTN (float, 16, 3)
-__STRUCTN (float, 32, 3)
-__STRUCTN (float, 64, 3)
-__STRUCTN (poly, 8, 3)
-__STRUCTN (poly, 16, 3)
-__STRUCTN (int, 8, 4)
-__STRUCTN (int, 64, 4)
-__STRUCTN (uint, 8, 4)
-__STRUCTN (uint, 64, 4)
-__STRUCTN (poly, 8, 4)
-__STRUCTN (float, 64, 4)
-#undef __STRUCTN
-#define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode, qmode, ptr_mode, funcsuffix, signedtype) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_lane_ ## funcsuffix (ptrtype *__ptr, intype __b, const int __c) { __builtin_aarch64_simd_oi __o; largetype __temp; __temp.val[0] = vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoi##qmode (__o, (signedtype) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoi##qmode (__o, (signedtype) __temp.val[1], 1); __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) __ptr, __o, __c); }
-__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16,
-   float16x8_t)
-__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32,
-   float32x4_t)
-__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64,
-   float64x2_t)
-__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
-   int8x16_t)
-__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16,
-   int16x8_t)
-__ST2_LANE_FUNC (poly64x1x2_t, poly64x2x2_t, poly64_t, di, v2di_ssps, di, p64,
-   poly64x2_t)
-__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
-   int8x16_t)
-__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
-   int16x8_t)
-__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
-   int32x4_t)
-__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64,
-   int64x2_t)
-__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
-   int8x16_t)
-__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16,
-   int16x8_t)
-__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32,
-   int32x4_t)
-__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64,
-   int64x2_t)
-#define __ST2Q_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_lane_ ## funcsuffix (ptrtype *__ptr, intype __b, const int __c) { union { intype __i; __builtin_aarch64_simd_oi __o; } __temp = { __b }; __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) __ptr, __temp.__o, __c); }
-__ST2Q_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16)
-__ST2Q_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
-__ST2Q_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
-__ST2Q_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
-__ST2Q_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
-__ST2Q_LANE_FUNC (poly64x2x2_t, poly64_t, v2di, di, p64)
-__ST2Q_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
-__ST2Q_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
-__ST2Q_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
-__ST2Q_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
-__ST2Q_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
-__ST2Q_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
-__ST2Q_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
-__ST2Q_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
-#define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode, qmode, ptr_mode, funcsuffix, signedtype) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_lane_ ## funcsuffix (ptrtype *__ptr, intype __b, const int __c) { __builtin_aarch64_simd_ci __o; largetype __temp; __temp.val[0] = vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregci##qmode (__o, (signedtype) __temp.val[0], 0); __o = __builtin_aarch64_set_qregci##qmode (__o, (signedtype) __temp.val[1], 1); __o = __builtin_aarch64_set_qregci##qmode (__o, (signedtype) __temp.val[2], 2); __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) __ptr, __o, __c); }
-__ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16,
-   float16x8_t)
-__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32,
-   float32x4_t)
-__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64,
-   float64x2_t)
-__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
-   int8x16_t)
-__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16,
-   int16x8_t)
-__ST3_LANE_FUNC (poly64x1x3_t, poly64x2x3_t, poly64_t, di, v2di_ssps, di, p64,
-   poly64x2_t)
-__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
-   int8x16_t)
-__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
-   int16x8_t)
-__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
-   int32x4_t)
-__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64,
-   int64x2_t)
-__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
-   int8x16_t)
-__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16,
-   int16x8_t)
-__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32,
-   int32x4_t)
-__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64,
-   int64x2_t)
-#define __ST3Q_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_lane_ ## funcsuffix (ptrtype *__ptr, intype __b, const int __c) { union { intype __i; __builtin_aarch64_simd_ci __o; } __temp = { __b }; __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) __ptr, __temp.__o, __c); }
-__ST3Q_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16)
-__ST3Q_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
-__ST3Q_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
-__ST3Q_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
-__ST3Q_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
-__ST3Q_LANE_FUNC (poly64x2x3_t, poly64_t, v2di, di, p64)
-__ST3Q_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
-__ST3Q_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
-__ST3Q_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
-__ST3Q_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
-__ST3Q_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
-__ST3Q_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
-__ST3Q_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
-__ST3Q_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
-#define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode, qmode, ptr_mode, funcsuffix, signedtype) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_lane_ ## funcsuffix (ptrtype *__ptr, intype __b, const int __c) { __builtin_aarch64_simd_xi __o; largetype __temp; __temp.val[0] = vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[0], 0); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[1], 1); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[2], 2); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[3], 3); __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) __ptr, __o, __c); }
-__ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16,
-   float16x8_t)
-__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32,
-   float32x4_t)
-__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64,
-   float64x2_t)
-__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
-   int8x16_t)
-__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16,
-   int16x8_t)
-__ST4_LANE_FUNC (poly64x1x4_t, poly64x2x4_t, poly64_t, di, v2di_ssps, di, p64,
-   poly64x2_t)
-__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
-   int8x16_t)
-__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
-   int16x8_t)
-__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
-   int32x4_t)
-__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64,
-   int64x2_t)
-__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
-   int8x16_t)
-__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16,
-   int16x8_t)
-__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32,
-   int32x4_t)
-__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64,
-   int64x2_t)
-#define __ST4Q_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_lane_ ## funcsuffix (ptrtype *__ptr, intype __b, const int __c) { union { intype __i; __builtin_aarch64_simd_xi __o; } __temp = { __b }; __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) __ptr, __temp.__o, __c); }
-__ST4Q_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16)
-__ST4Q_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
-__ST4Q_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
-__ST4Q_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
-__ST4Q_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
-__ST4Q_LANE_FUNC (poly64x2x4_t, poly64_t, v2di, di, p64)
-__ST4Q_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
-__ST4Q_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
-__ST4Q_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
-__ST4Q_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
-__ST4Q_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
-__ST4Q_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
-__ST4Q_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
-__ST4Q_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_f16 (float16_t *__ptr, float16x4x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev4hf ((__builtin_aarch64_simd_hf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_f32 (float32_t *__ptr, float32x2x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev2sf ((__builtin_aarch64_simd_sf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_f64 (float64_t *__ptr, float64x1x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanedf ((__builtin_aarch64_simd_df *) __ptr, __val,
+    __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_p8 (poly8_t *__ptr, poly8x8x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev8qi_sps ((__builtin_aarch64_simd_qi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_p16 (poly16_t *__ptr, poly16x4x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev4hi_sps ((__builtin_aarch64_simd_hi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_p64 (poly64_t *__ptr, poly64x1x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanedi_sps ((__builtin_aarch64_simd_di *) __ptr,
+        __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_s8 (int8_t *__ptr, int8x8x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_s16 (int16_t *__ptr, int16x4x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_s32 (int32_t *__ptr, int32x2x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev2si ((__builtin_aarch64_simd_si *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_s64 (int64_t *__ptr, int64x1x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanedi ((__builtin_aarch64_simd_di *) __ptr, __val,
+    __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_u8 (uint8_t *__ptr, uint8x8x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev8qi_sus ((__builtin_aarch64_simd_qi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_u16 (uint16_t *__ptr, uint16x4x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev4hi_sus ((__builtin_aarch64_simd_hi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_u32 (uint32_t *__ptr, uint32x2x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev2si_sus ((__builtin_aarch64_simd_si *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_u64 (uint64_t *__ptr, uint64x1x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanedi_sus ((__builtin_aarch64_simd_di *) __ptr, __val,
+        __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_f16 (float16_t *__ptr, float16x8x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev8hf ((__builtin_aarch64_simd_hf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_f32 (float32_t *__ptr, float32x4x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev4sf ((__builtin_aarch64_simd_sf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_f64 (float64_t *__ptr, float64x2x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev2df ((__builtin_aarch64_simd_df *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_p8 (poly8_t *__ptr, poly8x16x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev16qi_sps ((__builtin_aarch64_simd_qi *) __ptr,
+           __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_p16 (poly16_t *__ptr, poly16x8x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev8hi_sps ((__builtin_aarch64_simd_hi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_p64 (poly64_t *__ptr, poly64x2x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev2di_sps ((__builtin_aarch64_simd_di *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_s8 (int8_t *__ptr, int8x16x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __val,
+       __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_s16 (int16_t *__ptr, int16x8x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_s32 (int32_t *__ptr, int32x4x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev4si ((__builtin_aarch64_simd_si *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_s64 (int64_t *__ptr, int64x2x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_u8 (uint8_t *__ptr, uint8x16x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev16qi_sus ((__builtin_aarch64_simd_qi *) __ptr,
+           __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_u16 (uint16_t *__ptr, uint16x8x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev8hi_sus ((__builtin_aarch64_simd_hi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_u32 (uint32_t *__ptr, uint32x4x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev4si_sus ((__builtin_aarch64_simd_si *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_u64 (uint64_t *__ptr, uint64x2x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev2di_sus ((__builtin_aarch64_simd_di *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_f16 (float16_t *__ptr, float16x4x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev4hf ((__builtin_aarch64_simd_hf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_f32 (float32_t *__ptr, float32x2x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev2sf ((__builtin_aarch64_simd_sf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_f64 (float64_t *__ptr, float64x1x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanedf ((__builtin_aarch64_simd_df *) __ptr, __val,
+    __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_p8 (poly8_t *__ptr, poly8x8x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev8qi_sps ((__builtin_aarch64_simd_qi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_p16 (poly16_t *__ptr, poly16x4x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev4hi_sps ((__builtin_aarch64_simd_hi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_p64 (poly64_t *__ptr, poly64x1x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanedi_sps ((__builtin_aarch64_simd_di *) __ptr, __val,
+        __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_s8 (int8_t *__ptr, int8x8x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_s16 (int16_t *__ptr, int16x4x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_s32 (int32_t *__ptr, int32x2x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev2si ((__builtin_aarch64_simd_si *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_s64 (int64_t *__ptr, int64x1x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanedi ((__builtin_aarch64_simd_di *) __ptr, __val,
+    __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_u8 (uint8_t *__ptr, uint8x8x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev8qi_sus ((__builtin_aarch64_simd_qi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_u16 (uint16_t *__ptr, uint16x4x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev4hi_sus ((__builtin_aarch64_simd_hi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_u32 (uint32_t *__ptr, uint32x2x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev2si_sus ((__builtin_aarch64_simd_si *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_u64 (uint64_t *__ptr, uint64x1x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanedi_sus ((__builtin_aarch64_simd_di *) __ptr, __val,
+        __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_f16 (float16_t *__ptr, float16x8x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev8hf ((__builtin_aarch64_simd_hf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_f32 (float32_t *__ptr, float32x4x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev4sf ((__builtin_aarch64_simd_sf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_f64 (float64_t *__ptr, float64x2x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev2df ((__builtin_aarch64_simd_df *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_p8 (poly8_t *__ptr, poly8x16x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev16qi_sps ((__builtin_aarch64_simd_qi *) __ptr,
+           __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_p16 (poly16_t *__ptr, poly16x8x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev8hi_sps ((__builtin_aarch64_simd_hi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_p64 (poly64_t *__ptr, poly64x2x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev2di_sps ((__builtin_aarch64_simd_di *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_s8 (int8_t *__ptr, int8x16x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __val,
+       __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_s16 (int16_t *__ptr, int16x8x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_s32 (int32_t *__ptr, int32x4x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev4si ((__builtin_aarch64_simd_si *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_s64 (int64_t *__ptr, int64x2x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_u8 (uint8_t *__ptr, uint8x16x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev16qi_sus ((__builtin_aarch64_simd_qi *) __ptr,
+           __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_u16 (uint16_t *__ptr, uint16x8x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev8hi_sus ((__builtin_aarch64_simd_hi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_u32 (uint32_t *__ptr, uint32x4x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev4si_sus ((__builtin_aarch64_simd_si *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_u64 (uint64_t *__ptr, uint64x2x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev2di_sus ((__builtin_aarch64_simd_di *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_f16 (float16_t *__ptr, float16x4x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev4hf ((__builtin_aarch64_simd_hf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_f32 (float32_t *__ptr, float32x2x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev2sf ((__builtin_aarch64_simd_sf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_f64 (float64_t *__ptr, float64x1x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanedf ((__builtin_aarch64_simd_df *) __ptr, __val,
+    __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_p8 (poly8_t *__ptr, poly8x8x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev8qi_sps ((__builtin_aarch64_simd_qi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_p16 (poly16_t *__ptr, poly16x4x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev4hi_sps ((__builtin_aarch64_simd_hi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_p64 (poly64_t *__ptr, poly64x1x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanedi_sps ((__builtin_aarch64_simd_di *) __ptr, __val,
+        __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_s8 (int8_t *__ptr, int8x8x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_s16 (int16_t *__ptr, int16x4x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_s32 (int32_t *__ptr, int32x2x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev2si ((__builtin_aarch64_simd_si *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_s64 (int64_t *__ptr, int64x1x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanedi ((__builtin_aarch64_simd_di *) __ptr, __val,
+    __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_u8 (uint8_t *__ptr, uint8x8x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev8qi_sus ((__builtin_aarch64_simd_qi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_u16 (uint16_t *__ptr, uint16x4x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev4hi_sus ((__builtin_aarch64_simd_hi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_u32 (uint32_t *__ptr, uint32x2x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev2si_sus ((__builtin_aarch64_simd_si *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_u64 (uint64_t *__ptr, uint64x1x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanedi_sus ((__builtin_aarch64_simd_di *) __ptr, __val,
+        __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_f16 (float16_t *__ptr, float16x8x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev8hf ((__builtin_aarch64_simd_hf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_f32 (float32_t *__ptr, float32x4x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev4sf ((__builtin_aarch64_simd_sf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_f64 (float64_t *__ptr, float64x2x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev2df ((__builtin_aarch64_simd_df *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_p8 (poly8_t *__ptr, poly8x16x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev16qi_sps ((__builtin_aarch64_simd_qi *) __ptr,
+           __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_p16 (poly16_t *__ptr, poly16x8x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev8hi_sps ((__builtin_aarch64_simd_hi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_p64 (poly64_t *__ptr, poly64x2x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev2di_sps ((__builtin_aarch64_simd_di *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_s8 (int8_t *__ptr, int8x16x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __val,
+       __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_s16 (int16_t *__ptr, int16x8x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_s32 (int32_t *__ptr, int32x4x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev4si ((__builtin_aarch64_simd_si *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_s64 (int64_t *__ptr, int64x2x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_u8 (uint8_t *__ptr, uint8x16x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev16qi_sus ((__builtin_aarch64_simd_qi *) __ptr,
+           __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_u16 (uint16_t *__ptr, uint16x8x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev8hi_sus ((__builtin_aarch64_simd_hi *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_u32 (uint32_t *__ptr, uint32x4x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev4si_sus ((__builtin_aarch64_simd_si *) __ptr,
+          __val, __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_u64 (uint64_t *__ptr, uint64x2x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev2di_sus ((__builtin_aarch64_simd_di *) __ptr,
+          __val, __lane);
+}
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddlv_s32 (int32x2_t __a)
@@ -8079,339 +7865,198 @@ vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl1_p8 (poly8x16_t __a, uint8x8_t __b)
+vqtbl1_p8 (poly8x16_t __tab, uint8x8_t __idx)
 {
-  poly8x8_t __result;
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbl1v8qi_ppu (__tab, __idx);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl1_s8 (int8x16_t __a, uint8x8_t __b)
+vqtbl1_s8 (int8x16_t __tab, uint8x8_t __idx)
 {
-  int8x8_t __result;
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbl1v8qi_ssu (__tab, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl1_u8 (uint8x16_t __a, uint8x8_t __b)
+vqtbl1_u8 (uint8x16_t __tab, uint8x8_t __idx)
 {
-  uint8x8_t __result;
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbl1v8qi_uuu (__tab, __idx);
 }
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl1q_p8 (poly8x16_t __a, uint8x16_t __b)
+vqtbl1q_p8 (poly8x16_t __tab, uint8x16_t __idx)
 {
-  poly8x16_t __result;
-  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbl1v16qi_ppu (__tab, __idx);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl1q_s8 (int8x16_t __a, uint8x16_t __b)
+vqtbl1q_s8 (int8x16_t __tab, uint8x16_t __idx)
 {
-  int8x16_t __result;
-  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbl1v16qi_ssu (__tab, __idx);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqtbl1q_u8 (uint8x16_t __a, uint8x16_t __b)
+vqtbl1q_u8 (uint8x16_t __tab, uint8x16_t __idx)
 {
-  uint8x16_t __result;
-  __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
-           : "=w"(__result)
-           : "w"(__a), "w"(__b)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbl1v16qi_uuu (__tab, __idx);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx1_s8 (int8x8_t __r, int8x16_t __tab, uint8x8_t __idx)
 {
-  int8x8_t __result = __r;
-  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
-           : "+w"(__result)
-           : "w"(__tab), "w"(__idx)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbx1v8qi_sssu (__r, __tab, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx1_u8 (uint8x8_t __r, uint8x16_t __tab, uint8x8_t __idx)
 {
-  uint8x8_t __result = __r;
-  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
-           : "+w"(__result)
-           : "w"(__tab), "w"(__idx)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbx1v8qi_uuuu (__r, __tab, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx1_p8 (poly8x8_t __r, poly8x16_t __tab, uint8x8_t __idx)
 {
-  poly8x8_t __result = __r;
-  __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
-           : "+w"(__result)
-           : "w"(__tab), "w"(__idx)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbx1v8qi_pppu (__r, __tab, __idx);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx1q_s8 (int8x16_t __r, int8x16_t __tab, uint8x16_t __idx)
 {
-  int8x16_t __result = __r;
-  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
-           : "+w"(__result)
-           : "w"(__tab), "w"(__idx)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbx1v16qi_sssu (__r, __tab, __idx);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx1q_u8 (uint8x16_t __r, uint8x16_t __tab, uint8x16_t __idx)
 {
-  uint8x16_t __result = __r;
-  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
-           : "+w"(__result)
-           : "w"(__tab), "w"(__idx)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbx1v16qi_uuuu (__r, __tab, __idx);
 }
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx1q_p8 (poly8x16_t __r, poly8x16_t __tab, uint8x16_t __idx)
 {
-  poly8x16_t __result = __r;
-  __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
-           : "+w"(__result)
-           : "w"(__tab), "w"(__idx)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbx1v16qi_pppu (__r, __tab, __idx);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl1_s8 (int8x8_t __tab, int8x8_t __idx)
 {
-  int8x8_t __result;
-  int8x16_t __temp = vcombine_s8 (__tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(__result)
-           : "w"(__temp), "w"(__idx)
-           : );
-  return __result;
+  int8x16_t __temp = vcombine_s8 (__tab,
+      vcreate_s8 (__AARCH64_UINT64_C (0x0)));
+  return __builtin_aarch64_qtbl1v8qi (__temp, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl1_u8 (uint8x8_t __tab, uint8x8_t __idx)
 {
-  uint8x8_t __result;
-  uint8x16_t __temp = vcombine_u8 (__tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(__result)
-           : "w"(__temp), "w"(__idx)
-           : );
-  return __result;
+  uint8x16_t __temp = vcombine_u8 (__tab,
+       vcreate_u8 (__AARCH64_UINT64_C (0x0)));
+  return __builtin_aarch64_qtbl1v8qi_uuu (__temp, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl1_p8 (poly8x8_t __tab, uint8x8_t __idx)
 {
-  poly8x8_t __result;
-  poly8x16_t __temp = vcombine_p8 (__tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(__result)
-           : "w"(__temp), "w"(__idx)
-           : );
-  return __result;
+  poly8x16_t __temp = vcombine_p8 (__tab,
+       vcreate_p8 (__AARCH64_UINT64_C (0x0)));
+  return __builtin_aarch64_qtbl1v8qi_ppu (__temp, __idx);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl2_s8 (int8x8x2_t __tab, int8x8_t __idx)
 {
-  int8x8_t __result;
   int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]);
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(__result)
-           : "w"(__temp), "w"(__idx)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbl1v8qi (__temp, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl2_u8 (uint8x8x2_t __tab, uint8x8_t __idx)
 {
-  uint8x8_t __result;
   uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]);
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(__result)
-           : "w"(__temp), "w"(__idx)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbl1v8qi_uuu (__temp, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl2_p8 (poly8x8x2_t __tab, uint8x8_t __idx)
 {
-  poly8x8_t __result;
   poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]);
-  __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
-           : "=w"(__result)
-           : "w"(__temp), "w"(__idx)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbl1v8qi_ppu (__temp, __idx);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx)
 {
-  int8x8_t __result;
   int8x16x2_t __temp;
-  __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
-  __temp.val[1] = vcombine_s8 (__tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[1], 1);
-  __result = __builtin_aarch64_tbl3v8qi (__o, __idx);
-  return __result;
+  __temp.val[1] = vcombine_s8 (__tab.val[2],
+          vcreate_s8 (__AARCH64_UINT64_C (0x0)));
+  return __builtin_aarch64_qtbl2v8qi (__temp, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx)
 {
-  uint8x8_t __result;
   uint8x16x2_t __temp;
-  __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
-  __temp.val[1] = vcombine_u8 (__tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[1], 1);
-  __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
-  return __result;
+  __temp.val[1] = vcombine_u8 (__tab.val[2],
+          vcreate_u8 (__AARCH64_UINT64_C (0x0)));
+  return __builtin_aarch64_qtbl2v8qi_uuu (__temp, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx)
 {
-  poly8x8_t __result;
   poly8x16x2_t __temp;
-  __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
-  __temp.val[1] = vcombine_p8 (__tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[1], 1);
-  __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
-  return __result;
+  __temp.val[1] = vcombine_p8 (__tab.val[2],
+          vcreate_p8 (__AARCH64_UINT64_C (0x0)));
+  return __builtin_aarch64_qtbl2v8qi_ppu (__temp, __idx);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx)
 {
-  int8x8_t __result;
   int8x16x2_t __temp;
-  __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
   __temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[1], 1);
-  __result = __builtin_aarch64_tbl3v8qi (__o, __idx);
-  return __result;
+  return __builtin_aarch64_qtbl2v8qi (__temp, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx)
 {
-  uint8x8_t __result;
   uint8x16x2_t __temp;
-  __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
   __temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[1], 1);
-  __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
-  return __result;
+  return __builtin_aarch64_qtbl2v8qi_uuu (__temp, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx)
 {
-  poly8x8_t __result;
   poly8x16x2_t __temp;
-  __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
   __temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[1], 1);
-  __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
-  return __result;
+  return __builtin_aarch64_qtbl2v8qi_ppu (__temp, __idx);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbx2_s8 (int8x8_t __r, int8x8x2_t __tab, int8x8_t __idx)
 {
-  int8x8_t __result = __r;
   int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]);
-  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
-           : "+w"(__result)
-           : "w"(__temp), "w"(__idx)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbx1v8qi (__r, __temp, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbx2_u8 (uint8x8_t __r, uint8x8x2_t __tab, uint8x8_t __idx)
 {
-  uint8x8_t __result = __r;
   uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]);
-  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
-           : "+w"(__result)
-           : "w"(__temp), "w"(__idx)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbx1v8qi_uuuu (__r, __temp, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbx2_p8 (poly8x8_t __r, poly8x8x2_t __tab, uint8x8_t __idx)
 {
-  poly8x8_t __result = __r;
   poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]);
-  __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
-           : "+w"(__result)
-           : "w"(__temp), "w"(__idx)
-           : );
-  return __result;
+  return __builtin_aarch64_qtbx1v8qi_pppu (__r, __temp, __idx);
 }
 __extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -8562,19 +8207,19 @@ __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddv_u8 (uint8x8_t __a)
 {
-  return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a);
+  return __builtin_aarch64_reduc_plus_scal_v8qi_uu (__a);
 }
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddv_u16 (uint16x4_t __a)
 {
-  return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a);
+  return __builtin_aarch64_reduc_plus_scal_v4hi_uu (__a);
 }
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddv_u32 (uint32x2_t __a)
 {
-  return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a);
+  return __builtin_aarch64_reduc_plus_scal_v2si_uu (__a);
 }
 __extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -8604,25 +8249,25 @@ __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddvq_u8 (uint8x16_t __a)
 {
-  return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a);
+  return __builtin_aarch64_reduc_plus_scal_v16qi_uu (__a);
 }
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddvq_u16 (uint16x8_t __a)
 {
-  return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a);
+  return __builtin_aarch64_reduc_plus_scal_v8hi_uu (__a);
 }
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddvq_u32 (uint32x4_t __a)
 {
-  return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a);
+  return __builtin_aarch64_reduc_plus_scal_v4si_uu (__a);
 }
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vaddvq_u64 (uint64x2_t __a)
 {
-  return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a);
+  return __builtin_aarch64_reduc_plus_scal_v2di_uu (__a);
 }
 __extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -13003,15 +12648,15 @@ __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_p8 (const poly8_t *__a)
 {
-  return (poly8x8_t)
-    __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  return __builtin_aarch64_ld1v8qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_p16 (const poly16_t *__a)
 {
-  return (poly16x4_t)
-    __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  return __builtin_aarch64_ld1v4hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -13047,22 +12692,22 @@ __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u8 (const uint8_t *__a)
 {
-  return (uint8x8_t)
-    __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a);
+  return __builtin_aarch64_ld1v8qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u16 (const uint16_t *__a)
 {
-  return (uint16x4_t)
-    __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a);
+  return __builtin_aarch64_ld1v4hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u32 (const uint32_t *__a)
 {
-  return (uint32x2_t)
-    __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) __a);
+  return __builtin_aarch64_ld1v2si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -13074,337 +12719,184 @@ __extension__ extern __inline uint8x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u8_x3 (const uint8_t *__a)
 {
-  uint8x8x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = (__builtin_aarch64_simd_ci)__builtin_aarch64_ld1x3v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  __i.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  __i.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  __i.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v8qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int8x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_s8_x3 (const uint8_t *__a)
+vld1_s8_x3 (const int8_t *__a)
 {
-  int8x8x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  __i.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  __i.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  __i.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v8qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u16_x3 (const uint16_t *__a)
 {
-  uint16x4x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  __i.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  __i.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  __i.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v4hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_s16_x3 (const int16_t *__a)
 {
-  int16x4x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  __i.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  __i.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  __i.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v4hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u32_x3 (const uint32_t *__a)
 {
-  uint32x2x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v2si ((const __builtin_aarch64_simd_si *) __a);
-  __i.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-  __i.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-  __i.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v2si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline int32x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vld1_s32_x3 (const uint32_t *__a)
+vld1_s32_x3 (const int32_t *__a)
 {
-  int32x2x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v2si ((const __builtin_aarch64_simd_si *) __a);
-  __i.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-  __i.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-  __i.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v2si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint64x1x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u64_x3 (const uint64_t *__a)
 {
-  uint64x1x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3di ((const __builtin_aarch64_simd_di *) __a);
-  __i.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-  __i.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-  __i.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int64x1x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_s64_x3 (const int64_t *__a)
 {
-  int64x1x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3di ((const __builtin_aarch64_simd_di *) __a);
-  __i.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-  __i.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-  __i.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_f16_x3 (const float16_t *__a)
 {
-  float16x4x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v4hf ((const __builtin_aarch64_simd_hf *) __a);
-  __i.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0);
-  __i.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1);
-  __i.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v4hf ((const __builtin_aarch64_simd_hf *) __a);
 }
 __extension__ extern __inline float32x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_f32_x3 (const float32_t *__a)
 {
-  float32x2x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v2sf ((const __builtin_aarch64_simd_sf *) __a);
-  __i.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
-  __i.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
-  __i.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v2sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline float64x1x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_f64_x3 (const float64_t *__a)
 {
-  float64x1x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3df ((const __builtin_aarch64_simd_df *) __a);
-  __i.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-  __i.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-  __i.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline poly8x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_p8_x3 (const poly8_t *__a)
 {
-  poly8x8x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  __i.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  __i.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  __i.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v8qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_p16_x3 (const poly16_t *__a)
 {
-  poly16x4x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  __i.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  __i.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  __i.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v4hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly64x1x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_p64_x3 (const poly64_t *__a)
 {
-  poly64x1x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3di ((const __builtin_aarch64_simd_di *) __a);
-  __i.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-  __i.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-  __i.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-return __i;
+  return __builtin_aarch64_ld1x3di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint8x16x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u8_x3 (const uint8_t *__a)
 {
-  uint8x16x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  __i.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  __i.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  __i.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v16qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int8x16x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_s8_x3 (const int8_t *__a)
 {
-  int8x16x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  __i.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  __i.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  __i.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v16qi (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u16_x3 (const uint16_t *__a)
 {
-  uint16x8x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  __i.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  __i.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  __i.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v8hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_s16_x3 (const int16_t *__a)
 {
-  int16x8x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  __i.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  __i.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  __i.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v8hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u32_x3 (const uint32_t *__a)
 {
-  uint32x4x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v4si ((const __builtin_aarch64_simd_si *) __a);
-  __i.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-  __i.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-  __i.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v4si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline int32x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_s32_x3 (const int32_t *__a)
 {
-  int32x4x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v4si ((const __builtin_aarch64_simd_si *) __a);
-  __i.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-  __i.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-  __i.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v4si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint64x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u64_x3 (const uint64_t *__a)
 {
-  uint64x2x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v2di ((const __builtin_aarch64_simd_di *) __a);
-  __i.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-  __i.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-  __i.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v2di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int64x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_s64_x3 (const int64_t *__a)
 {
-  int64x2x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v2di ((const __builtin_aarch64_simd_di *) __a);
-  __i.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-  __i.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-  __i.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v2di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_f16_x3 (const float16_t *__a)
 {
-  float16x8x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v8hf ((const __builtin_aarch64_simd_hf *) __a);
-  __i.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0);
-  __i.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1);
-  __i.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v8hf ((const __builtin_aarch64_simd_hf *) __a);
 }
 __extension__ extern __inline float32x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_f32_x3 (const float32_t *__a)
 {
-  float32x4x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v4sf ((const __builtin_aarch64_simd_sf *) __a);
-  __i.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
-  __i.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
-  __i.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v4sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline float64x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_f64_x3 (const float64_t *__a)
 {
-  float64x2x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v2df ((const __builtin_aarch64_simd_df *) __a);
-  __i.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
-  __i.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
-  __i.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v2df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline poly8x16x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_p8_x3 (const poly8_t *__a)
 {
-  poly8x16x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  __i.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  __i.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  __i.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v16qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_p16_x3 (const poly16_t *__a)
 {
-  poly16x8x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  __i.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  __i.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  __i.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v8hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly64x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_p64_x3 (const poly64_t *__a)
 {
-  poly64x2x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v2di ((const __builtin_aarch64_simd_di *) __a);
-  __i.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-  __i.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-  __i.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v2di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -13428,22 +12920,22 @@ __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_p8 (const poly8_t *__a)
 {
-  return (poly8x16_t)
-    __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  return __builtin_aarch64_ld1v16qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_p16 (const poly16_t *__a)
 {
-  return (poly16x8_t)
-    __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  return __builtin_aarch64_ld1v8hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_p64 (const poly64_t *__a)
 {
-  return (poly64x2_t)
-    __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a);
+  return __builtin_aarch64_ld1v2di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -13473,589 +12965,395 @@ __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u8 (const uint8_t *__a)
 {
-  return (uint8x16_t)
-    __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a);
+  return __builtin_aarch64_ld1v16qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint8x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u8_x2 (const uint8_t *__a)
 {
-  uint8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v8qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int8x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_s8_x2 (const int8_t *__a)
 {
-  int8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u16_x2 (const uint16_t *__a)
 {
-  uint16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v4hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_s16_x2 (const int16_t *__a)
 {
-  int16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u32_x2 (const uint32_t *__a)
 {
-  uint32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v2si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline int32x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_s32_x2 (const int32_t *__a)
 {
-  int32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint64x1x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u64_x2 (const uint64_t *__a)
 {
-  uint64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int64x1x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_s64_x2 (const int64_t *__a)
 {
-  int64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_f16_x2 (const float16_t *__a)
 {
-  float16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 0);
-  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v4hf ((const __builtin_aarch64_simd_hf *) __a);
 }
 __extension__ extern __inline float32x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_f32_x2 (const float32_t *__a)
 {
-  float32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v2sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline float64x1x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_f64_x2 (const float64_t *__a)
 {
-  float64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
-  return ret;
+  return __builtin_aarch64_ld1x2df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline poly8x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_p8_x2 (const poly8_t *__a)
 {
-  poly8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v8qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_p16_x2 (const poly16_t *__a)
 {
-  poly16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v4hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly64x1x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_p64_x2 (const poly64_t *__a)
 {
-  poly64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint8x16x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u8_x2 (const uint8_t *__a)
 {
-  uint8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v16qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int8x16x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_s8_x2 (const int8_t *__a)
 {
-  int8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v16qi (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u16_x2 (const uint16_t *__a)
 {
-  uint16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v8hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_s16_x2 (const int16_t *__a)
 {
-  int16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u32_x2 (const uint32_t *__a)
 {
-  uint32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v4si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline int32x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_s32_x2 (const int32_t *__a)
 {
-  int32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint64x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u64_x2 (const uint64_t *__a)
 {
-  uint64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v2di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int64x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_s64_x2 (const int64_t *__a)
 {
-  int64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_f16_x2 (const float16_t *__a)
 {
-  float16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
-  ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v8hf ((const __builtin_aarch64_simd_hf *) __a);
 }
 __extension__ extern __inline float32x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_f32_x2 (const float32_t *__a)
 {
-  float32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v4sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline float64x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_f64_x2 (const float64_t *__a)
 {
-  float64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v2df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline poly8x16x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_p8_x2 (const poly8_t *__a)
 {
-  poly8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v16qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_p16_x2 (const poly16_t *__a)
 {
-  poly16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v8hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly64x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_p64_x2 (const poly64_t *__a)
 {
-  poly64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v2di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u16 (const uint16_t *__a)
 {
-  return (uint16x8_t)
-    __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a);
+  return __builtin_aarch64_ld1v8hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u32 (const uint32_t *__a)
 {
-  return (uint32x4_t)
-    __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) __a);
+  return __builtin_aarch64_ld1v4si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u64 (const uint64_t *__a)
 {
-  return (uint64x2_t)
-    __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a);
+  return __builtin_aarch64_ld1v2di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int8x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_s8_x4 (const int8_t *__a)
 {
-  union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int8x16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_s8_x4 (const int8_t *__a)
 {
-  union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v16qi (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_s16_x4 (const int16_t *__a)
 {
-  union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_s16_x4 (const int16_t *__a)
 {
-  union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int32x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_s32_x4 (const int32_t *__a)
 {
-  union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-  = __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline int32x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_s32_x4 (const int32_t *__a)
 {
-  union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-  = __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint8x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u8_x4 (const uint8_t *__a)
 {
-  union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v8qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint8x16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u8_x4 (const uint8_t *__a)
 {
-  union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v16qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u16_x4 (const uint16_t *__a)
 {
-  union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v4hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u16_x4 (const uint16_t *__a)
 {
-  union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v8hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u32_x4 (const uint32_t *__a)
 {
-  union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v2si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint32x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u32_x4 (const uint32_t *__a)
 {
-  union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v4si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline float16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_f16_x4 (const float16_t *__a)
 {
-  union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v4hf ((const __builtin_aarch64_simd_hf *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v4hf ((const __builtin_aarch64_simd_hf *) __a);
 }
 __extension__ extern __inline float16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_f16_x4 (const float16_t *__a)
 {
-  union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v8hf ((const __builtin_aarch64_simd_hf *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v8hf ((const __builtin_aarch64_simd_hf *) __a);
 }
 __extension__ extern __inline float32x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_f32_x4 (const float32_t *__a)
 {
-  union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v2sf ((const __builtin_aarch64_simd_sf *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v2sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline float32x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_f32_x4 (const float32_t *__a)
 {
-  union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v4sf ((const __builtin_aarch64_simd_sf *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v4sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline poly8x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_p8_x4 (const poly8_t *__a)
 {
-  union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v8qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly8x16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_p8_x4 (const poly8_t *__a)
 {
-  union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v16qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_p16_x4 (const poly16_t *__a)
 {
-  union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v4hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_p16_x4 (const poly16_t *__a)
 {
-  union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v8hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int64x1x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_s64_x4 (const int64_t *__a)
 {
-  union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint64x1x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_u64_x4 (const uint64_t *__a)
 {
-  union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline poly64x1x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_p64_x4 (const poly64_t *__a)
 {
-  union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int64x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_s64_x4 (const int64_t *__a)
 {
-  union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint64x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_u64_x4 (const uint64_t *__a)
 {
-  union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v2di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline poly64x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_p64_x4 (const poly64_t *__a)
 {
-  union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v2di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float64x1x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_f64_x4 (const float64_t *__a)
 {
-  union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4df ((const __builtin_aarch64_simd_df *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline float64x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_f64_x4 (const float64_t *__a)
 {
-  union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v2df ((const __builtin_aarch64_simd_df *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v2df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -14397,1009 +13695,539 @@ __extension__ extern __inline int64x1x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_s64 (const int64_t * __a)
 {
-  int64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint64x1x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_u64 (const uint64_t * __a)
 {
-  uint64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2di_us ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float64x1x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_f64 (const float64_t * __a)
 {
-  float64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
-  return ret;
+  return __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline int8x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_s8 (const int8_t * __a)
 {
-  int8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly8x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_p8 (const poly8_t * __a)
 {
-  poly8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v8qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly64x1x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_p64 (const poly64_t * __a)
 {
-  poly64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2di_ps ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_s16 (const int16_t * __a)
 {
-  int16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_p16 (const poly16_t * __a)
 {
-  poly16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v4hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int32x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_s32 (const int32_t * __a)
 {
-  int32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint8x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_u8 (const uint8_t * __a)
 {
-  uint8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v8qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_u16 (const uint16_t * __a)
 {
-  uint16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v4hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_u32 (const uint32_t * __a)
 {
-  uint32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v2si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline float16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_f16 (const float16_t * __a)
 {
-  float16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4hf (__a);
-  ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_dregoiv4hf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v4hf (__a);
 }
 __extension__ extern __inline float32x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_f32 (const float32_t * __a)
 {
-  float32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline int8x16x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_s8 (const int8_t * __a)
 {
-  int8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly8x16x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_p8 (const poly8_t * __a)
 {
-  poly8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v16qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_s16 (const int16_t * __a)
 {
-  int16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_p16 (const poly16_t * __a)
 {
-  poly16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v8hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly64x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_p64 (const poly64_t * __a)
 {
-  poly64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v2di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int32x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_s32 (const int32_t * __a)
 {
-  int32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline int64x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_s64 (const int64_t * __a)
 {
-  int64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint8x16x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_u8 (const uint8_t * __a)
 {
-  uint8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v16qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_u16 (const uint16_t * __a)
 {
-  uint16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v8hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_u32 (const uint32_t * __a)
 {
-  uint32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v4si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint64x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_u64 (const uint64_t * __a)
 {
-  uint64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v2di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_f16 (const float16_t * __a)
 {
-  float16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8hf (__a);
-  ret.val[0] = __builtin_aarch64_get_qregoiv8hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v8hf (__a);
 }
 __extension__ extern __inline float32x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_f32 (const float32_t * __a)
 {
-  float32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline float64x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_f64 (const float64_t * __a)
 {
-  float64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline int64x1x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_s64 (const int64_t * __a)
 {
-  int64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint64x1x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_u64 (const uint64_t * __a)
 {
-  uint64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3di_us ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float64x1x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_f64 (const float64_t * __a)
 {
-  float64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
-  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
-  return ret;
+  return __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline int8x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_s8 (const int8_t * __a)
 {
-  int8x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly8x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_p8 (const poly8_t * __a)
 {
-  poly8x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v8qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_s16 (const int16_t * __a)
 {
-  int16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_p16 (const poly16_t * __a)
 {
-  poly16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v4hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int32x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_s32 (const int32_t * __a)
 {
-  int32x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint8x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_u8 (const uint8_t * __a)
 {
-  uint8x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v8qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_u16 (const uint16_t * __a)
 {
-  uint16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v4hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_u32 (const uint32_t * __a)
 {
-  uint32x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v2si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline float16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_f16 (const float16_t * __a)
 {
-  float16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4hf (__a);
-  ret.val[0] = __builtin_aarch64_get_dregciv4hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_dregciv4hf (__o, 1);
-  ret.val[2] = __builtin_aarch64_get_dregciv4hf (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v4hf (__a);
 }
 __extension__ extern __inline float32x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_f32 (const float32_t * __a)
 {
-  float32x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
-  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline poly64x1x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_p64 (const poly64_t * __a)
 {
-  poly64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1);
-  ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3di_ps ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int8x16x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_s8 (const int8_t * __a)
 {
-  int8x16x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly8x16x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_p8 (const poly8_t * __a)
 {
-  poly8x16x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v16qi_ps ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_s16 (const int16_t * __a)
 {
-  int16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_p16 (const poly16_t * __a)
 {
-  poly16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v8hi_ps ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int32x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_s32 (const int32_t * __a)
 {
-  int32x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline int64x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_s64 (const int64_t * __a)
 {
-  int64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint8x16x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_u8 (const uint8_t * __a)
 {
-  uint8x16x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v16qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_u16 (const uint16_t * __a)
 {
-  uint16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v8hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_u32 (const uint32_t * __a)
 {
-  uint32x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v4si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint64x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_u64 (const uint64_t * __a)
 {
-  uint64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v2di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_f16 (const float16_t * __a)
 {
-  float16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8hf (__a);
-  ret.val[0] = __builtin_aarch64_get_qregciv8hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_qregciv8hf (__o, 1);
-  ret.val[2] = __builtin_aarch64_get_qregciv8hf (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v8hf (__a);
 }
 __extension__ extern __inline float32x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_f32 (const float32_t * __a)
 {
-  float32x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
-  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline float64x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_f64 (const float64_t * __a)
 {
-  float64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
-  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline poly64x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_p64 (const poly64_t * __a)
 {
-  poly64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1);
-  ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v2di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int64x1x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_s64 (const int64_t * __a)
 {
-  int64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
-  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
-  ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint64x1x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_u64 (const uint64_t * __a)
 {
-  uint64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
-  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
-  ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4di_us ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float64x1x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_f64 (const float64_t * __a)
 {
-  float64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
-  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
-  ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
-  return ret;
+  return __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline int8x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_s8 (const int8_t * __a)
 {
-  int8x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-  ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly8x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_p8 (const poly8_t * __a)
 {
-  poly8x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-  ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v8qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_s16 (const int16_t * __a)
 {
-  int16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-  ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_p16 (const poly16_t * __a)
 {
-  poly16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-  ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v4hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int32x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_s32 (const int32_t * __a)
 {
-  int32x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
-  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
-  ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint8x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_u8 (const uint8_t * __a)
 {
-  uint8x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-  ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v8qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_u16 (const uint16_t * __a)
 {
-  uint16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-  ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v4hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_u32 (const uint32_t * __a)
 {
-  uint32x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
-  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
-  ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v2si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline float16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_f16 (const float16_t * __a)
 {
-  float16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4hf (__a);
-  ret.val[0] = __builtin_aarch64_get_dregxiv4hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_dregxiv4hf (__o, 1);
-  ret.val[2] = __builtin_aarch64_get_dregxiv4hf (__o, 2);
-  ret.val[3] = __builtin_aarch64_get_dregxiv4hf (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v4hf (__a);
 }
 __extension__ extern __inline float32x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_f32 (const float32_t * __a)
 {
-  float32x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
-  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
-  ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline poly64x1x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_p64 (const poly64_t * __a)
 {
-  poly64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1);
-  ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2);
-  ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4di_ps ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int8x16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_s8 (const int8_t * __a)
 {
-  int8x16x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-  ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly8x16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_p8 (const poly8_t * __a)
 {
-  poly8x16x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-  ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v16qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_s16 (const int16_t * __a)
 {
-  int16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-  ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_p16 (const poly16_t * __a)
 {
-  poly16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-  ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v8hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int32x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_s32 (const int32_t * __a)
 {
-  int32x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
-  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
-  ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline int64x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_s64 (const int64_t * __a)
 {
-  int64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
-  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
-  ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint8x16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_u8 (const uint8_t * __a)
 {
-  uint8x16x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-  ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v16qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_u16 (const uint16_t * __a)
 {
-  uint16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-  ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v8hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_u32 (const uint32_t * __a)
 {
-  uint32x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
-  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
-  ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v4si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint64x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_u64 (const uint64_t * __a)
 {
-  uint64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
-  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
-  ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v2di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_f16 (const float16_t * __a)
 {
-  float16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8hf (__a);
-  ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
-  ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
-  ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v8hf (__a);
 }
 __extension__ extern __inline float32x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_f32 (const float32_t * __a)
 {
-  float32x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
-  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
-  ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline float64x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_f64 (const float64_t * __a)
 {
-  float64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
-  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
-  ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline poly64x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_p64 (const poly64_t * __a)
 {
-  poly64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1);
-  ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2);
-  ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v2di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline poly128_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -15411,1154 +14239,1142 @@ __extension__ extern __inline int8x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_s8 (const int8_t * __a)
 {
-  int8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_s16 (const int16_t * __a)
 {
-  int16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int32x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_s32 (const int32_t * __a)
 {
-  int32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline float16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_f16 (const float16_t * __a)
 {
-  float16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
-  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
 }
 __extension__ extern __inline float32x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_f32 (const float32_t * __a)
 {
-  float32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline float64x1x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_f64 (const float64_t * __a)
 {
-  float64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
-  return ret;
+  return __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline uint8x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_u8 (const uint8_t * __a)
 {
-  uint8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv8qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_u16 (const uint16_t * __a)
 {
-  uint16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv4hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_u32 (const uint32_t * __a)
 {
-  uint32x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv2si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline poly8x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_p8 (const poly8_t * __a)
 {
-  poly8x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv8qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_p16 (const poly16_t * __a)
 {
-  poly16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv4hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly64x1x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_p64 (const poly64_t * __a)
 {
-  poly64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rdi_ps ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int64x1x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_s64 (const int64_t * __a)
 {
-  int64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint64x1x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_u64 (const uint64_t * __a)
 {
-  uint64x1x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rdi_us ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int8x16x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_s8 (const int8_t * __a)
 {
-  int8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly8x16x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_p8 (const poly8_t * __a)
 {
-  poly8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv16qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_s16 (const int16_t * __a)
 {
-  int16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_p16 (const poly16_t * __a)
 {
-  poly16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv8hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int32x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_s32 (const int32_t * __a)
 {
-  int32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline int64x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_s64 (const int64_t * __a)
 {
-  int64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint8x16x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_u8 (const uint8_t * __a)
 {
-  uint8x16x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv16qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_u16 (const uint16_t * __a)
 {
-  uint16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv8hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_u32 (const uint32_t * __a)
 {
-  uint32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv4si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint64x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_u64 (const uint64_t * __a)
 {
-  uint64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv2di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_f16 (const float16_t * __a)
 {
-  float16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
-  ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a);
 }
 __extension__ extern __inline float32x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_f32 (const float32_t * __a)
 {
-  float32x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline float64x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_f64 (const float64_t * __a)
 {
-  float64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline poly64x2x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_p64 (const poly64_t * __a)
 {
-  poly64x2x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv2di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int64x1x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_s64 (const int64_t * __a)
 {
-  int64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint64x1x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_u64 (const uint64_t * __a)
 {
-  uint64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rdi_us ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float64x1x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_f64 (const float64_t * __a)
 {
-  float64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
-  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
-  return ret;
+  return __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline int8x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_s8 (const int8_t * __a)
 {
-  int8x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly8x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_p8 (const poly8_t * __a)
 {
-  poly8x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv8qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_s16 (const int16_t * __a)
 {
-  int16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_p16 (const poly16_t * __a)
 {
-  poly16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv4hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int32x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_s32 (const int32_t * __a)
 {
-  int32x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint8x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_u8 (const uint8_t * __a)
 {
-  uint8x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv8qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_u16 (const uint16_t * __a)
 {
-  uint16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv4hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_u32 (const uint32_t * __a)
 {
-  uint32x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv2si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline float16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_f16 (const float16_t * __a)
 {
-  float16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0);
-  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1);
-  ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a);
 }
 __extension__ extern __inline float32x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_f32 (const float32_t * __a)
 {
-  float32x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
-  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline poly64x1x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_p64 (const poly64_t * __a)
 {
-  poly64x1x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1);
-  ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rdi_ps ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int8x16x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_s8 (const int8_t * __a)
 {
-  int8x16x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly8x16x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_p8 (const poly8_t * __a)
 {
-  poly8x16x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv16qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_s16 (const int16_t * __a)
 {
-  int16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_p16 (const poly16_t * __a)
 {
-  poly16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv8hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int32x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_s32 (const int32_t * __a)
 {
-  int32x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline int64x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_s64 (const int64_t * __a)
 {
-  int64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint8x16x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_u8 (const uint8_t * __a)
 {
-  uint8x16x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv16qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_u16 (const uint16_t * __a)
 {
-  uint16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv8hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_u32 (const uint32_t * __a)
 {
-  uint32x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv4si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint64x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_u64 (const uint64_t * __a)
 {
-  uint64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv2di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_f16 (const float16_t * __a)
 {
-  float16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0);
-  ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1);
-  ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a);
 }
 __extension__ extern __inline float32x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_f32 (const float32_t * __a)
 {
-  float32x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
-  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline float64x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_f64 (const float64_t * __a)
 {
-  float64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
-  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline poly64x2x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_p64 (const poly64_t * __a)
 {
-  poly64x2x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1);
-  ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv2di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int64x1x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_s64 (const int64_t * __a)
 {
-  int64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
-  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
-  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
-  ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint64x1x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_u64 (const uint64_t * __a)
 {
-  uint64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
-  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
-  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
-  ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rdi_us ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float64x1x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_f64 (const float64_t * __a)
 {
-  float64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
-  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
-  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
-  ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
-  return ret;
+  return __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline int8x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_s8 (const int8_t * __a)
 {
-  int8x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-  ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly8x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_p8 (const poly8_t * __a)
 {
-  poly8x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-  ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv8qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_s16 (const int16_t * __a)
 {
-  int16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-  ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_p16 (const poly16_t * __a)
 {
-  poly16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-  ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv4hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int32x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_s32 (const int32_t * __a)
 {
-  int32x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
-  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
-  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
-  ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint8x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_u8 (const uint8_t * __a)
 {
-  uint8x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-  ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv8qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_u16 (const uint16_t * __a)
 {
-  uint16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-  ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv4hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_u32 (const uint32_t * __a)
 {
-  uint32x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
-  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
-  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
-  ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv2si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline float16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_f16 (const float16_t * __a)
 {
-  float16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 0);
-  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 1);
-  ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 2);
-  ret.val[3] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a);
 }
 __extension__ extern __inline float32x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_f32 (const float32_t * __a)
 {
-  float32x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
-  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
-  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
-  ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline poly64x1x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_p64 (const poly64_t * __a)
 {
-  poly64x1x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0);
-  ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1);
-  ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2);
-  ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rdi_ps ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline int8x16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_s8 (const int8_t * __a)
 {
-  int8x16x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-  ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline poly8x16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_p8 (const poly8_t * __a)
 {
-  poly8x16x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-  ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv16qi_ps (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline int16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_s16 (const int16_t * __a)
 {
-  int16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-  ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline poly16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_p16 (const poly16_t * __a)
 {
-  poly16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-  ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv8hi_ps (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline int32x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_s32 (const int32_t * __a)
 {
-  int32x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
-  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
-  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
-  ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline int64x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_s64 (const int64_t * __a)
 {
-  int64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
-  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
-  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
-  ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline uint8x16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_u8 (const uint8_t * __a)
 {
-  uint8x16x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-  ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv16qi_us (
+    (const __builtin_aarch64_simd_qi *) __a);
 }
 __extension__ extern __inline uint16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_u16 (const uint16_t * __a)
 {
-  uint16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-  ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv8hi_us (
+    (const __builtin_aarch64_simd_hi *) __a);
 }
 __extension__ extern __inline uint32x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_u32 (const uint32_t * __a)
 {
-  uint32x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
-  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
-  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
-  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
-  ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv4si_us (
+    (const __builtin_aarch64_simd_si *) __a);
 }
 __extension__ extern __inline uint64x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_u64 (const uint64_t * __a)
 {
-  uint64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
-  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
-  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
-  ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv2di_us (
+    (const __builtin_aarch64_simd_di *) __a);
 }
 __extension__ extern __inline float16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_f16 (const float16_t * __a)
 {
-  float16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a);
-  ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 0);
-  ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 1);
-  ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 2);
-  ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a);
 }
 __extension__ extern __inline float32x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_f32 (const float32_t * __a)
 {
-  float32x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
-  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
-  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
-  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
-  ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
 }
 __extension__ extern __inline float64x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_f64 (const float64_t * __a)
 {
-  float64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
-  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
-  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
-  ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
 }
 __extension__ extern __inline poly64x2x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_p64 (const poly64_t * __a)
 {
-  poly64x2x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
-  ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0);
-  ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1);
-  ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2);
-  ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv2di_ps (
+    (const __builtin_aarch64_simd_di *) __a);
+}
+__extension__ extern __inline uint8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_u8 (const uint8_t * __ptr, uint8x8x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev8qi_usus (
+   (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_u16 (const uint16_t * __ptr, uint16x4x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev4hi_usus (
+   (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_u32 (const uint32_t * __ptr, uint32x2x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev2si_usus (
+   (__builtin_aarch64_simd_si *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_u64 (const uint64_t * __ptr, uint64x1x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanedi_usus (
+   (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline int8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_s8 (const int8_t * __ptr, int8x8x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev8qi (
+   (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline int16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_s16 (const int16_t * __ptr, int16x4x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev4hi (
+   (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline int32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_s32 (const int32_t * __ptr, int32x2x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev2si (
+   (__builtin_aarch64_simd_si *) __ptr, __b, __c);
+}
+__extension__ extern __inline int64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_s64 (const int64_t * __ptr, int64x1x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanedi (
+   (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline float16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_f16 (const float16_t * __ptr, float16x4x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev4hf (
+   (__builtin_aarch64_simd_hf *) __ptr, __b, __c);
+}
+__extension__ extern __inline float32x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_f32 (const float32_t * __ptr, float32x2x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev2sf (
+   (__builtin_aarch64_simd_sf *) __ptr, __b, __c);
+}
+__extension__ extern __inline float64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_f64 (const float64_t * __ptr, float64x1x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanedf (
+   (__builtin_aarch64_simd_df *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly8x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_p8 (const poly8_t * __ptr, poly8x8x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev8qi_psps (
+   (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_p16 (const poly16_t * __ptr, poly16x4x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev4hi_psps (
+   (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly64x1x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_p64 (const poly64_t * __ptr, poly64x1x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanedi_psps (
+   (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_u8 (const uint8_t * __ptr, uint8x16x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev16qi_usus (
+ (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_u16 (const uint16_t * __ptr, uint16x8x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev8hi_usus (
+ (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_u32 (const uint32_t * __ptr, uint32x4x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev4si_usus (
+ (__builtin_aarch64_simd_si *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_u64 (const uint64_t * __ptr, uint64x2x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev2di_usus (
+ (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline int8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_s8 (const int8_t * __ptr, int8x16x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev16qi (
+ (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline int16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_s16 (const int16_t * __ptr, int16x8x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev8hi (
+ (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline int32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_s32 (const int32_t * __ptr, int32x4x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev4si (
+ (__builtin_aarch64_simd_si *) __ptr, __b, __c);
+}
+__extension__ extern __inline int64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_s64 (const int64_t * __ptr, int64x2x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev2di (
+ (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline float16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_f16 (const float16_t * __ptr, float16x8x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev8hf (
+ (__builtin_aarch64_simd_hf *) __ptr, __b, __c);
+}
+__extension__ extern __inline float32x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_f32 (const float32_t * __ptr, float32x4x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev4sf (
+ (__builtin_aarch64_simd_sf *) __ptr, __b, __c);
+}
+__extension__ extern __inline float64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_f64 (const float64_t * __ptr, float64x2x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev2df (
+ (__builtin_aarch64_simd_df *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly8x16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_p8 (const poly8_t * __ptr, poly8x16x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev16qi_psps (
+ (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_p16 (const poly16_t * __ptr, poly16x8x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev8hi_psps (
+ (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly64x2x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_p64 (const poly64_t * __ptr, poly64x2x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev2di_psps (
+ (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint8x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_u8 (const uint8_t * __ptr, uint8x8x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev8qi_usus (
+   (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_u16 (const uint16_t * __ptr, uint16x4x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev4hi_usus (
+   (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint32x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_u32 (const uint32_t * __ptr, uint32x2x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev2si_usus (
+   (__builtin_aarch64_simd_si *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_u64 (const uint64_t * __ptr, uint64x1x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanedi_usus (
+   (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline int8x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_s8 (const int8_t * __ptr, int8x8x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev8qi (
+   (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline int16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_s16 (const int16_t * __ptr, int16x4x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev4hi (
+   (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline int32x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_s32 (const int32_t * __ptr, int32x2x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev2si (
+   (__builtin_aarch64_simd_si *) __ptr, __b, __c);
+}
+__extension__ extern __inline int64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_s64 (const int64_t * __ptr, int64x1x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanedi (
+   (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline float16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_f16 (const float16_t * __ptr, float16x4x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev4hf (
+   (__builtin_aarch64_simd_hf *) __ptr, __b, __c);
+}
+__extension__ extern __inline float32x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_f32 (const float32_t * __ptr, float32x2x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev2sf (
+   (__builtin_aarch64_simd_sf *) __ptr, __b, __c);
+}
+__extension__ extern __inline float64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_f64 (const float64_t * __ptr, float64x1x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanedf (
+   (__builtin_aarch64_simd_df *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly8x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_p8 (const poly8_t * __ptr, poly8x8x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev8qi_psps (
+   (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_p16 (const poly16_t * __ptr, poly16x4x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev4hi_psps (
+   (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly64x1x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_p64 (const poly64_t * __ptr, poly64x1x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanedi_psps (
+   (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint8x16x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_u8 (const uint8_t * __ptr, uint8x16x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev16qi_usus (
+ (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_u16 (const uint16_t * __ptr, uint16x8x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev8hi_usus (
+ (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint32x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_u32 (const uint32_t * __ptr, uint32x4x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev4si_usus (
+ (__builtin_aarch64_simd_si *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_u64 (const uint64_t * __ptr, uint64x2x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev2di_usus (
+ (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline int8x16x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_s8 (const int8_t * __ptr, int8x16x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev16qi (
+ (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline int16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_s16 (const int16_t * __ptr, int16x8x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev8hi (
+ (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline int32x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_s32 (const int32_t * __ptr, int32x4x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev4si (
+ (__builtin_aarch64_simd_si *) __ptr, __b, __c);
+}
+__extension__ extern __inline int64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_s64 (const int64_t * __ptr, int64x2x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev2di (
+ (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline float16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_f16 (const float16_t * __ptr, float16x8x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev8hf (
+ (__builtin_aarch64_simd_hf *) __ptr, __b, __c);
+}
+__extension__ extern __inline float32x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_f32 (const float32_t * __ptr, float32x4x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev4sf (
+ (__builtin_aarch64_simd_sf *) __ptr, __b, __c);
+}
+__extension__ extern __inline float64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_f64 (const float64_t * __ptr, float64x2x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev2df (
+ (__builtin_aarch64_simd_df *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly8x16x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_p8 (const poly8_t * __ptr, poly8x16x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev16qi_psps (
+ (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_p16 (const poly16_t * __ptr, poly16x8x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev8hi_psps (
+ (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly64x2x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_p64 (const poly64_t * __ptr, poly64x2x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev2di_psps (
+ (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_u8 (const uint8_t * __ptr, uint8x8x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev8qi_usus (
+   (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_u16 (const uint16_t * __ptr, uint16x4x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev4hi_usus (
+   (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint32x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_u32 (const uint32_t * __ptr, uint32x2x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev2si_usus (
+   (__builtin_aarch64_simd_si *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_u64 (const uint64_t * __ptr, uint64x1x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanedi_usus (
+   (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline int8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_s8 (const int8_t * __ptr, int8x8x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev8qi (
+   (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline int16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_s16 (const int16_t * __ptr, int16x4x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev4hi (
+   (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline int32x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_s32 (const int32_t * __ptr, int32x2x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev2si (
+   (__builtin_aarch64_simd_si *) __ptr, __b, __c);
+}
+__extension__ extern __inline int64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_s64 (const int64_t * __ptr, int64x1x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanedi (
+   (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline float16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_f16 (const float16_t * __ptr, float16x4x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev4hf (
+   (__builtin_aarch64_simd_hf *) __ptr, __b, __c);
+}
+__extension__ extern __inline float32x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_f32 (const float32_t * __ptr, float32x2x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev2sf (
+   (__builtin_aarch64_simd_sf *) __ptr, __b, __c);
+}
+__extension__ extern __inline float64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_f64 (const float64_t * __ptr, float64x1x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanedf (
+   (__builtin_aarch64_simd_df *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly8x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_p8 (const poly8_t * __ptr, poly8x8x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev8qi_psps (
+   (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_p16 (const poly16_t * __ptr, poly16x4x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev4hi_psps (
+   (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly64x1x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_p64 (const poly64_t * __ptr, poly64x1x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanedi_psps (
+   (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint8x16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_u8 (const uint8_t * __ptr, uint8x16x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev16qi_usus (
+ (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_u16 (const uint16_t * __ptr, uint16x8x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev8hi_usus (
+ (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint32x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_u32 (const uint32_t * __ptr, uint32x4x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev4si_usus (
+ (__builtin_aarch64_simd_si *) __ptr, __b, __c);
+}
+__extension__ extern __inline uint64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_u64 (const uint64_t * __ptr, uint64x2x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev2di_usus (
+ (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline int8x16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_s8 (const int8_t * __ptr, int8x16x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev16qi (
+ (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline int16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_s16 (const int16_t * __ptr, int16x8x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev8hi (
+ (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline int32x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_s32 (const int32_t * __ptr, int32x4x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev4si (
+ (__builtin_aarch64_simd_si *) __ptr, __b, __c);
+}
+__extension__ extern __inline int64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_s64 (const int64_t * __ptr, int64x2x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev2di (
+ (__builtin_aarch64_simd_di *) __ptr, __b, __c);
+}
+__extension__ extern __inline float16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_f16 (const float16_t * __ptr, float16x8x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev8hf (
+ (__builtin_aarch64_simd_hf *) __ptr, __b, __c);
+}
+__extension__ extern __inline float32x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_f32 (const float32_t * __ptr, float32x4x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev4sf (
+ (__builtin_aarch64_simd_sf *) __ptr, __b, __c);
+}
+__extension__ extern __inline float64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_f64 (const float64_t * __ptr, float64x2x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev2df (
+ (__builtin_aarch64_simd_df *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly8x16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_p8 (const poly8_t * __ptr, poly8x16x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev16qi_psps (
+ (__builtin_aarch64_simd_qi *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_p16 (const poly16_t * __ptr, poly16x8x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev8hi_psps (
+ (__builtin_aarch64_simd_hi *) __ptr, __b, __c);
+}
+__extension__ extern __inline poly64x2x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_p64 (const poly64_t * __ptr, poly64x2x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev2di_psps (
+ (__builtin_aarch64_simd_di *) __ptr, __b, __c);
 }
-#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, qmode, ptrmode, funcsuffix, signedtype) __extension__ extern __inline intype __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) { __builtin_aarch64_simd_oi __o; largetype __temp; __temp.val[0] = vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); __temp.val[1] = vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); __o = __builtin_aarch64_set_qregoi##qmode (__o, (signedtype) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoi##qmode (__o, (signedtype) __temp.val[1], 1); __o = __builtin_aarch64_ld2_lane##mode ( (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0); __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1); return __b; }
-__LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v4hf,
-   v8hf, hf, f16, float16x8_t)
-__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf,
-   sf, f32, float32x4_t)
-__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df,
-   df, f64, float64x2_t)
-__LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
-   int8x16_t)
-__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi,
-   p16, int16x8_t)
-__LD2_LANE_FUNC (poly64x1x2_t, poly64x1_t, poly64x2x2_t, poly64_t, di,
-   v2di_ssps, di, p64, poly64x2_t)
-__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
-   int8x16_t)
-__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
-   int16x8_t)
-__LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
-   int32x4_t)
-__LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, di, v2di, di, s64,
-   int64x2_t)
-__LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
-   int8x16_t)
-__LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi,
-   u16, int16x8_t)
-__LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si,
-   u32, int32x4_t)
-__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di,
-   u64, int64x2_t)
-#define __LD2Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) __extension__ extern __inline intype __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) { __builtin_aarch64_simd_oi __o; intype ret; __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); __o = __builtin_aarch64_ld2_lane##mode ( (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0); ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1); return ret; }
-__LD2Q_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
-__LD2Q_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
-__LD2Q_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
-__LD2Q_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
-__LD2Q_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
-__LD2Q_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64)
-__LD2Q_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
-__LD2Q_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
-__LD2Q_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
-__LD2Q_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
-__LD2Q_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
-__LD2Q_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
-__LD2Q_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
-__LD2Q_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
-#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, qmode, ptrmode, funcsuffix, signedtype) __extension__ extern __inline intype __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) { __builtin_aarch64_simd_ci __o; largetype __temp; __temp.val[0] = vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); __temp.val[1] = vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); __temp.val[2] = vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); __o = __builtin_aarch64_set_qregci##qmode (__o, (signedtype) __temp.val[0], 0); __o = __builtin_aarch64_set_qregci##qmode (__o, (signedtype) __temp.val[1], 1); __o = __builtin_aarch64_set_qregci##qmode (__o, (signedtype) __temp.val[2], 2); __o = __builtin_aarch64_ld3_lane##mode ( (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0); __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1); __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2); return __b; }
-__LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v4hf,
-   v8hf, hf, f16, float16x8_t)
-__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf,
-   sf, f32, float32x4_t)
-__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df,
-   df, f64, float64x2_t)
-__LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
-   int8x16_t)
-__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi,
-   p16, int16x8_t)
-__LD3_LANE_FUNC (poly64x1x3_t, poly64x1_t, poly64x2x3_t, poly64_t, di,
-   v2di_ssps, di, p64, poly64x2_t)
-__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
-   int8x16_t)
-__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
-   int16x8_t)
-__LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
-   int32x4_t)
-__LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, di, v2di, di, s64,
-   int64x2_t)
-__LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
-   int8x16_t)
-__LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi,
-   u16, int16x8_t)
-__LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, si,
-   u32, int32x4_t)
-__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di,
-   u64, int64x2_t)
-#define __LD3Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) __extension__ extern __inline intype __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) { __builtin_aarch64_simd_ci __o; intype ret; __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); __o = __builtin_aarch64_ld3_lane##mode ( (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0); ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1); ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2); return ret; }
-__LD3Q_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
-__LD3Q_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
-__LD3Q_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
-__LD3Q_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
-__LD3Q_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
-__LD3Q_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64)
-__LD3Q_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
-__LD3Q_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
-__LD3Q_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
-__LD3Q_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
-__LD3Q_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
-__LD3Q_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
-__LD3Q_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
-__LD3Q_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
-#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, qmode, ptrmode, funcsuffix, signedtype) __extension__ extern __inline intype __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) { __builtin_aarch64_simd_xi __o; largetype __temp; __temp.val[0] = vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); __temp.val[1] = vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); __temp.val[2] = vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); __temp.val[3] = vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0)); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[0], 0); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[1], 1); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[2], 2); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[3], 3); __o = __builtin_aarch64_ld4_lane##mode ( (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0); __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1); __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2); __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3); return __b; }
-__LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf,
-   v8hf, hf, f16, float16x8_t)
-__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf,
-   sf, f32, float32x4_t)
-__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df,
-   df, f64, float64x2_t)
-__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
-   int8x16_t)
-__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi,
-   p16, int16x8_t)
-__LD4_LANE_FUNC (poly64x1x4_t, poly64x1_t, poly64x2x4_t, poly64_t, di,
-   v2di_ssps, di, p64, poly64x2_t)
-__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
-   int8x16_t)
-__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
-   int16x8_t)
-__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
-   int32x4_t)
-__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64,
-   int64x2_t)
-__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
-   int8x16_t)
-__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi,
-   u16, int16x8_t)
-__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si,
-   u32, int32x4_t)
-__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di,
-   u64, int64x2_t)
-#define __LD4Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) __extension__ extern __inline intype __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) { __builtin_aarch64_simd_xi __o; intype ret; __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); __o = __builtin_aarch64_ld4_lane##mode ( (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0); ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1); ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2); ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3); return ret; }
-__LD4Q_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
-__LD4Q_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
-__LD4Q_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
-__LD4Q_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
-__LD4Q_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
-__LD4Q_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64)
-__LD4Q_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
-__LD4Q_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
-__LD4Q_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
-__LD4Q_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
-__LD4Q_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
-__LD4Q_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
-__LD4Q_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
-__LD4Q_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmax_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_smax_nanv2sf (__a, __b);
+  return __builtin_aarch64_fmax_nanv2sf (__a, __b);
 }
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmax_f64 (float64x1_t __a, float64x1_t __b)
 {
     return (float64x1_t)
-      { __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0),
+      { __builtin_aarch64_fmax_nandf (vget_lane_f64 (__a, 0),
           vget_lane_f64 (__b, 0)) };
 }
 __extension__ extern __inline int8x8_t
@@ -16604,13 +15420,13 @@ __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_smax_nanv4sf (__a, __b);
+  return __builtin_aarch64_fmax_nanv4sf (__a, __b);
 }
 __extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_smax_nanv2df (__a, __b);
+  return __builtin_aarch64_fmax_nanv2df (__a, __b);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -17173,14 +15989,14 @@ __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmin_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_smin_nanv2sf (__a, __b);
+  return __builtin_aarch64_fmin_nanv2sf (__a, __b);
 }
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmin_f64 (float64x1_t __a, float64x1_t __b)
 {
     return (float64x1_t)
-   { __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0),
+   { __builtin_aarch64_fmin_nandf (vget_lane_f64 (__a, 0),
        vget_lane_f64 (__b, 0)) };
 }
 __extension__ extern __inline int8x8_t
@@ -17226,13 +16042,13 @@ __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vminq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_smin_nanv4sf (__a, __b);
+  return __builtin_aarch64_fmin_nanv4sf (__a, __b);
 }
 __extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vminq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_smin_nanv2df (__a, __b);
+  return __builtin_aarch64_fmin_nanv2df (__a, __b);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -17411,32 +16227,32 @@ __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
 {
-  return __a + __b * __c;
+  return __builtin_aarch64_float_mlav2sf (__a, __b, __c);
 }
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
 {
-  return __a + __b * __c;
+  return (float64x1_t) {__builtin_aarch64_float_mladf (__a[0], __b[0], __c[0])};
 }
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
 {
-  return __a + __b * __c;
+  return __builtin_aarch64_float_mlav4sf (__a, __b, __c);
 }
 __extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
 {
-  return __a + __b * __c;
+  return __builtin_aarch64_float_mlav2df (__a, __b, __c);
 }
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
         float32x2_t __c, const int __lane)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_float_mla_lanev2sf (__a, __b, __c, __lane);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -17471,7 +16287,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
          float32x4_t __c, const int __lane)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_float_mla_laneqv2sf (__a, __b, __c, __lane);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -17506,7 +16322,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
   float32x2_t __c, const int __lane)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_float_mla_lanev4sf (__a, __b, __c, __lane);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -17541,7 +16357,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
    float32x4_t __c, const int __lane)
 {
-  return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_float_mla_laneqv4sf (__a, __b, __c, __lane);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -17575,32 +16391,32 @@ __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
 {
-  return __a - __b * __c;
+  return __builtin_aarch64_float_mlsv2sf (__a, __b, __c);
 }
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
 {
-  return __a - __b * __c;
+  return (float64x1_t) {__builtin_aarch64_float_mlsdf (__a[0], __b[0], __c[0])};
 }
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
 {
-  return __a - __b * __c;
+  return __builtin_aarch64_float_mlsv4sf (__a, __b, __c);
 }
 __extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
 {
-  return __a - __b * __c;
+  return __builtin_aarch64_float_mlsv2df (__a, __b, __c);
 }
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
         float32x2_t __c, const int __lane)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_float_mls_lanev2sf (__a, __b, __c, __lane);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -17635,7 +16451,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
         float32x4_t __c, const int __lane)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_float_mls_laneqv2sf (__a, __b, __c, __lane);
 }
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -17670,7 +16486,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
   float32x2_t __c, const int __lane)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_float_mls_lanev4sf (__a, __b, __c, __lane);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -17705,7 +16521,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
   float32x4_t __c, const int __lane)
 {
-  return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
+  return __builtin_aarch64_float_mls_laneqv4sf (__a, __b, __c, __lane);
 }
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -18346,22 +17162,19 @@ __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
-       (int8x8_t) __b);
+  return __builtin_aarch64_addpv8qi_uuu (__a, __b);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
-        (int16x4_t) __b);
+  return __builtin_aarch64_addpv4hi_uuu (__a, __b);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
-        (int32x2_t) __b);
+  return __builtin_aarch64_addpv2si_uuu (__a, __b);
 }
 __extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -18385,7 +17198,7 @@ __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vpaddd_u64 (uint64x2_t __a)
 {
-  return __builtin_aarch64_addpdi ((int64x2_t) __a);
+  return __builtin_aarch64_addpdi_uu (__a);
 }
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -19899,373 +18712,217 @@ __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl2_s8 (int8x16x2_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1);
-  return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbl2v8qi_ssu (__tab, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl2_u8 (uint8x16x2_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbl2v8qi_uuu (__tab, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl2_p8 (poly8x16x2_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbl2v8qi_ppu (__tab, __idx);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl2q_s8 (int8x16x2_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbl2v16qi_ssu (__tab, __idx);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl2q_u8 (uint8x16x2_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbl2v16qi_uuu (__tab, __idx);
 }
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl2q_p8 (poly8x16x2_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbl2v16qi_ppu (__tab, __idx);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl3_s8 (int8x16x3_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbl3v8qi_ssu (__tab, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl3_u8 (uint8x16x3_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbl3v8qi_uuu (__tab, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl3_p8 (poly8x16x3_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbl3v8qi_ppu (__tab, __idx);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl3q_s8 (int8x16x3_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbl3v16qi_ssu (__tab, __idx);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl3q_u8 (uint8x16x3_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbl3v16qi_uuu (__tab, __idx);
 }
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl3q_p8 (poly8x16x3_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbl3v16qi_ppu (__tab, __idx);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl4_s8 (int8x16x4_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
-  return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbl4v8qi_ssu (__tab, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl4_u8 (uint8x16x4_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
-  return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbl4v8qi_uuu (__tab, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl4_p8 (poly8x16x4_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
-  return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbl4v8qi_ppu (__tab, __idx);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl4q_s8 (int8x16x4_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
-  return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbl4v16qi_ssu (__tab, __idx);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl4q_u8 (uint8x16x4_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
-  return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbl4v16qi_uuu (__tab, __idx);
 }
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl4q_p8 (poly8x16x4_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
-  return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbl4v16qi_ppu (__tab, __idx);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx2_s8 (int8x8_t __r, int8x16x2_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1);
-  return __builtin_aarch64_tbx4v8qi (__r, __o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbx2v8qi_sssu (__r, __tab, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx2_u8 (uint8x8_t __r, uint8x16x2_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
-      (int8x8_t)__idx);
+  return __builtin_aarch64_qtbx2v8qi_uuuu (__r, __tab, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx2_p8 (poly8x8_t __r, poly8x16x2_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
-      (int8x8_t)__idx);
+  return __builtin_aarch64_qtbx2v8qi_pppu (__r, __tab, __idx);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx2q_s8 (int8x16_t __r, int8x16x2_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1);
-  return __builtin_aarch64_tbx4v16qi (__r, __o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbx2v16qi_sssu (__r, __tab, __idx);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx2q_u8 (uint8x16_t __r, uint8x16x2_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o,
-        (int8x16_t)__idx);
+  return __builtin_aarch64_qtbx2v16qi_uuuu (__r, __tab, __idx);
 }
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx2q_p8 (poly8x16_t __r, poly8x16x2_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o,
-        (int8x16_t)__idx);
+  return __builtin_aarch64_qtbx2v16qi_pppu (__r, __tab, __idx);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx3_s8 (int8x8_t __r, int8x16x3_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[2], 2);
-  return __builtin_aarch64_qtbx3v8qi (__r, __o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbx3v8qi_sssu (__r, __tab, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx3_u8 (uint8x8_t __r, uint8x16x3_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)__r, __o,
-       (int8x8_t)__idx);
+  return __builtin_aarch64_qtbx3v8qi_uuuu (__r, __tab, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx3_p8 (poly8x8_t __r, poly8x16x3_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)__r, __o,
-       (int8x8_t)__idx);
+  return __builtin_aarch64_qtbx3v8qi_pppu (__r, __tab, __idx);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx3q_s8 (int8x16_t __r, int8x16x3_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[2], 2);
-  return __builtin_aarch64_qtbx3v16qi (__r, __o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbx3v16qi_sssu (__r, __tab, __idx);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx3q_u8 (uint8x16_t __r, uint8x16x3_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)__r, __o,
-         (int8x16_t)__idx);
+  return __builtin_aarch64_qtbx3v16qi_uuuu (__r, __tab, __idx);
 }
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx3q_p8 (poly8x16_t __r, poly8x16x3_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)__r, __o,
-         (int8x16_t)__idx);
+  return __builtin_aarch64_qtbx3v16qi_pppu (__r, __tab, __idx);
 }
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx4_s8 (int8x8_t __r, int8x16x4_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[3], 3);
-  return __builtin_aarch64_qtbx4v8qi (__r, __o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbx4v8qi_sssu (__r, __tab, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx4_u8 (uint8x8_t __r, uint8x16x4_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
-  return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)__r, __o,
-       (int8x8_t)__idx);
+  return __builtin_aarch64_qtbx4v8qi_uuuu (__r, __tab, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx4_p8 (poly8x8_t __r, poly8x16x4_t __tab, uint8x8_t __idx)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
-  return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)__r, __o,
-       (int8x8_t)__idx);
+  return __builtin_aarch64_qtbx4v8qi_pppu (__r, __tab, __idx);
 }
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx4q_s8 (int8x16_t __r, int8x16x4_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[3], 3);
-  return __builtin_aarch64_qtbx4v16qi (__r, __o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbx4v16qi_sssu (__r, __tab, __idx);
 }
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx4q_u8 (uint8x16_t __r, uint8x16x4_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
-  return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)__r, __o,
-         (int8x16_t)__idx);
+  return __builtin_aarch64_qtbx4v16qi_uuuu (__r, __tab, __idx);
 }
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx4q_p8 (poly8x16_t __r, poly8x16x4_t __tab, uint8x16_t __idx)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3);
-  return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)__r, __o,
-         (int8x16_t)__idx);
+  return __builtin_aarch64_qtbx4v16qi_pppu (__r, __tab, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -20737,31 +19394,31 @@ __extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndns_f32 (float32_t __a)
 {
-  return __builtin_aarch64_frintnsf (__a);
+  return __builtin_aarch64_roundevensf (__a);
 }
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndn_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_frintnv2sf (__a);
+  return __builtin_aarch64_roundevenv2sf (__a);
 }
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndn_f64 (float64x1_t __a)
 {
-  return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
+  return (float64x1_t) {__builtin_aarch64_roundevendf (__a[0])};
 }
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndnq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_frintnv4sf (__a);
+  return __builtin_aarch64_roundevenv4sf (__a);
 }
 __extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndnq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_frintnv2df (__a);
+  return __builtin_aarch64_roundevenv2df (__a);
 }
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -21603,19 +20260,19 @@ __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshr_n_u8 (uint8x8_t __a, const int __b)
 {
-  return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
+  return __builtin_aarch64_lshrv8qi_uus (__a, __b);
 }
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshr_n_u16 (uint16x4_t __a, const int __b)
 {
-  return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
+  return __builtin_aarch64_lshrv4hi_uus (__a, __b);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshr_n_u32 (uint32x2_t __a, const int __b)
 {
-  return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
+  return __builtin_aarch64_lshrv2si_uus (__a, __b);
 }
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -21651,25 +20308,25 @@ __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrq_n_u8 (uint8x16_t __a, const int __b)
 {
-  return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
+  return __builtin_aarch64_lshrv16qi_uus (__a, __b);
 }
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrq_n_u16 (uint16x8_t __a, const int __b)
 {
-  return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
+  return __builtin_aarch64_lshrv8hi_uus (__a, __b);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrq_n_u32 (uint32x4_t __a, const int __b)
 {
-  return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
+  return __builtin_aarch64_lshrv4si_uus (__a, __b);
 }
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrq_n_u64 (uint64x2_t __a, const int __b)
 {
-  return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
+  return __builtin_aarch64_lshrv2di_uus (__a, __b);
 }
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -22137,15 +20794,13 @@ __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_p8 (poly8_t *__a, poly8x8_t __b)
 {
-  __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a,
-        (int8x8_t) __b);
+  __builtin_aarch64_st1v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __b);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_p16 (poly16_t *__a, poly16x4_t __b)
 {
-  __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a,
-        (int16x4_t) __b);
+  __builtin_aarch64_st1v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __b);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -22181,22 +20836,19 @@ __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_u8 (uint8_t *__a, uint8x8_t __b)
 {
-  __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a,
-        (int8x8_t) __b);
+  __builtin_aarch64_st1v8qi_su ((__builtin_aarch64_simd_qi *) __a, __b);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_u16 (uint16_t *__a, uint16x4_t __b)
 {
-  __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a,
-        (int16x4_t) __b);
+  __builtin_aarch64_st1v4hi_su ((__builtin_aarch64_simd_hi *) __a, __b);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_u32 (uint32_t *__a, uint32x2_t __b)
 {
-  __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) __a,
-        (int32x2_t) __b);
+  __builtin_aarch64_st1v2si_su ((__builtin_aarch64_simd_si *) __a, __b);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -22226,22 +20878,19 @@ __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_p8 (poly8_t *__a, poly8x16_t __b)
 {
-  __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a,
-         (int8x16_t) __b);
+  __builtin_aarch64_st1v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __b);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_p16 (poly16_t *__a, poly16x8_t __b)
 {
-  __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a,
-        (int16x8_t) __b);
+  __builtin_aarch64_st1v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __b);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_p64 (poly64_t *__a, poly64x2_t __b)
 {
-  __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) __a,
-    (poly64x2_t) __b);
+  __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) __a, __b);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -22271,29 +20920,25 @@ __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_u8 (uint8_t *__a, uint8x16_t __b)
 {
-  __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a,
-         (int8x16_t) __b);
+  __builtin_aarch64_st1v16qi_su ((__builtin_aarch64_simd_qi *) __a, __b);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_u16 (uint16_t *__a, uint16x8_t __b)
 {
-  __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a,
-        (int16x8_t) __b);
+  __builtin_aarch64_st1v8hi_su ((__builtin_aarch64_simd_hi *) __a, __b);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_u32 (uint32_t *__a, uint32x4_t __b)
 {
-  __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) __a,
-        (int32x4_t) __b);
+  __builtin_aarch64_st1v4si_su ((__builtin_aarch64_simd_si *) __a, __b);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_u64 (uint64_t *__a, uint64x2_t __b)
 {
-  __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) __a,
-        (int64x2_t) __b);
+  __builtin_aarch64_st1v2di_su ((__builtin_aarch64_simd_di *) __a, __b);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -22467,1879 +21112,1010 @@ __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_s64_x2 (int64_t * __a, int64x1x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  int64x2x2_t __temp;
-  __temp.val[0]
-    = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __temp.val[1]
-    = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_u64_x2 (uint64_t * __a, uint64x1x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  uint64x2x2_t __temp;
-  __temp.val[0]
-    = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1]
-    = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st1x2di_su ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_f64_x2 (float64_t * __a, float64x1x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  float64x2x2_t __temp;
-  __temp.val[0]
-    = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1]
-    = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2df ((__builtin_aarch64_simd_df *) __a, __o);
+  __builtin_aarch64_st1x2df ((__builtin_aarch64_simd_df *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_s8_x2 (int8_t * __a, int8x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  int8x16x2_t __temp;
-  __temp.val[0]
-    = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __temp.val[1]
-    = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_p8_x2 (poly8_t * __a, poly8x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  poly8x16x2_t __temp;
-  __temp.val[0]
-    = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __temp.val[1]
-    = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st1x2v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_s16_x2 (int16_t * __a, int16x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  int16x8x2_t __temp;
-  __temp.val[0]
-    = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __temp.val[1]
-    = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_p16_x2 (poly16_t * __a, poly16x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  poly16x8x2_t __temp;
-  __temp.val[0]
-    = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1]
-    = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st1x2v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_s32_x2 (int32_t * __a, int32x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  int32x4x2_t __temp;
-  __temp.val[0]
-    = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __temp.val[1]
-    = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_u8_x2 (uint8_t * __a, uint8x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  uint8x16x2_t __temp;
-  __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st1x2v8qi_su ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_u16_x2 (uint16_t * __a, uint16x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  uint16x8x2_t __temp;
-  __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st1x2v4hi_su ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_u32_x2 (uint32_t * __a, uint32x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  uint32x4x2_t __temp;
-  __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st1x2v2si_su ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_f16_x2 (float16_t * __a, float16x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  float16x8x2_t __temp;
-  __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1);
-  __builtin_aarch64_st1x2v4hf (__a, __o);
+  __builtin_aarch64_st1x2v4hf (__a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_f32_x2 (float32_t * __a, float32x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  float32x4x2_t __temp;
-  __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  __builtin_aarch64_st1x2v2sf ((__builtin_aarch64_simd_sf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_p64_x2 (poly64_t * __a, poly64x1x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  poly64x2x2_t __temp;
-  __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-            (poly64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-            (poly64x2_t) __temp.val[1], 1);
-  __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st1x2di_sp ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_s8_x2 (int8_t * __a, int8x16x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_p8_x2 (poly8_t * __a, poly8x16x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st1x2v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_s16_x2 (int16_t * __a, int16x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st1x2v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_s32_x2 (int32_t * __a, int32x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_s64_x2 (int64_t * __a, int64x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_u8_x2 (uint8_t * __a, uint8x16x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st1x2v16qi_su ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_u16_x2 (uint16_t * __a, uint16x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st1x2v8hi_su ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_u32_x2 (uint32_t * __a, uint32x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st1x2v4si_su ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st1x2v2di_su ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_f16_x2 (float16_t * __a, float16x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1);
-  __builtin_aarch64_st1x2v8hf (__a, __o);
+  __builtin_aarch64_st1x2v8hf (__a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_f32_x2 (float32_t * __a, float32x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  __builtin_aarch64_st1x2v4sf ((__builtin_aarch64_simd_sf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_f64_x2 (float64_t * __a, float64x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v2df ((__builtin_aarch64_simd_df *) __a, __o);
+  __builtin_aarch64_st1x2v2df ((__builtin_aarch64_simd_df *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-            (poly64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-            (poly64x2_t) __val.val[1], 1);
-  __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st1x2v2di_sp ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_s64_x3 (int64_t * __a, int64x1x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  int64x2x3_t __temp;
-  __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_u64_x3 (uint64_t * __a, uint64x1x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  uint64x2x3_t __temp;
-  __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st1x3di_su ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_f64_x3 (float64_t * __a, float64x1x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  float64x2x3_t __temp;
-  __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3df ((__builtin_aarch64_simd_df *) __a, __o);
+  __builtin_aarch64_st1x3df ((__builtin_aarch64_simd_df *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_s8_x3 (int8_t * __a, int8x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  int8x16x3_t __temp;
-  __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_p8_x3 (poly8_t * __a, poly8x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  poly8x16x3_t __temp;
-  __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st1x3v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_s16_x3 (int16_t * __a, int16x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  int16x8x3_t __temp;
-  __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_p16_x3 (poly16_t * __a, poly16x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  poly16x8x3_t __temp;
-  __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st1x3v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_s32_x3 (int32_t * __a, int32x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  int32x4x3_t __temp;
-  __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_u8_x3 (uint8_t * __a, uint8x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  uint8x16x3_t __temp;
-  __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st1x3v8qi_su ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_u16_x3 (uint16_t * __a, uint16x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  uint16x8x3_t __temp;
-  __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st1x3v4hi_su ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_u32_x3 (uint32_t * __a, uint32x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  uint32x4x3_t __temp;
-  __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st1x3v2si_su ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_f16_x3 (float16_t * __a, float16x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  float16x8x3_t __temp;
-  __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
+  __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_hf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_f32_x3 (float32_t * __a, float32x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  float32x4x3_t __temp;
-  __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  __builtin_aarch64_st1x3v2sf ((__builtin_aarch64_simd_sf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_p64_x3 (poly64_t * __a, poly64x1x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  poly64x2x3_t __temp;
-  __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-            (poly64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-            (poly64x2_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-            (poly64x2_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st1x3di_sp ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_s8_x3 (int8_t * __a, int8x16x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_p8_x3 (poly8_t * __a, poly8x16x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st1x3v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_s16_x3 (int16_t * __a, int16x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st1x3v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_s32_x3 (int32_t * __a, int32x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_s64_x3 (int64_t * __a, int64x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a,
+          (int64x2x3_t) __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_u8_x3 (uint8_t * __a, uint8x16x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st1x3v16qi_su ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_u16_x3 (uint16_t * __a, uint16x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st1x3v8hi_su ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_u32_x3 (uint32_t * __a, uint32x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st1x3v4si_su ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st1x3v2di_su ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_f16_x3 (float16_t * __a, float16x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
+  __builtin_aarch64_st1x3v8hf ((__builtin_aarch64_simd_hf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_f32_x3 (float32_t * __a, float32x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  __builtin_aarch64_st1x3v4sf ((__builtin_aarch64_simd_sf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_f64_x3 (float64_t * __a, float64x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v2df ((__builtin_aarch64_simd_df *) __a, __o);
+  __builtin_aarch64_st1x3v2df ((__builtin_aarch64_simd_df *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-            (poly64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-            (poly64x2_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-            (poly64x2_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st1x3v2di_sp ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_s8_x4 (int8_t * __a, int8x8x4_t val)
+vst1_s8_x4 (int8_t * __a, int8x8x4_t __val)
 {
-  union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o);
+  __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_s8_x4 (int8_t * __a, int8x16x4_t val)
+vst1q_s8_x4 (int8_t * __a, int8x16x4_t __val)
 {
-  union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o);
+  __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_s16_x4 (int16_t * __a, int16x4x4_t val)
+vst1_s16_x4 (int16_t * __a, int16x4x4_t __val)
 {
-  union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o);
+  __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_s16_x4 (int16_t * __a, int16x8x4_t val)
+vst1q_s16_x4 (int16_t * __a, int16x8x4_t __val)
 {
-  union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o);
+  __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_s32_x4 (int32_t * __a, int32x2x4_t val)
+vst1_s32_x4 (int32_t * __a, int32x2x4_t __val)
 {
-  union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o);
+  __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_s32_x4 (int32_t * __a, int32x4x4_t val)
+vst1q_s32_x4 (int32_t * __a, int32x4x4_t __val)
 {
-  union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o);
+  __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_u8_x4 (uint8_t * __a, uint8x8x4_t val)
+vst1_u8_x4 (uint8_t * __a, uint8x8x4_t __val)
 {
-  union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o);
+  __builtin_aarch64_st1x4v8qi_su ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t val)
+vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t __val)
 {
-  union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o);
+  __builtin_aarch64_st1x4v16qi_su ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_u16_x4 (uint16_t * __a, uint16x4x4_t val)
+vst1_u16_x4 (uint16_t * __a, uint16x4x4_t __val)
 {
-  union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o);
+  __builtin_aarch64_st1x4v4hi_su ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t val)
+vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t __val)
 {
-  union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o);
+  __builtin_aarch64_st1x4v8hi_su ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_u32_x4 (uint32_t * __a, uint32x2x4_t val)
+vst1_u32_x4 (uint32_t * __a, uint32x2x4_t __val)
 {
-  union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o);
+  __builtin_aarch64_st1x4v2si_su ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t val)
+vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t __val)
 {
-  union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o);
+  __builtin_aarch64_st1x4v4si_su ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_f16_x4 (float16_t * __a, float16x4x4_t val)
+vst1_f16_x4 (float16_t * __a, float16x4x4_t __val)
 {
-  union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v4hf ((__builtin_aarch64_simd_hf *) __a, __u.__o);
+  __builtin_aarch64_st1x4v4hf ((__builtin_aarch64_simd_hf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_f16_x4 (float16_t * __a, float16x8x4_t val)
+vst1q_f16_x4 (float16_t * __a, float16x8x4_t __val)
 {
-  union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v8hf ((__builtin_aarch64_simd_hf *) __a, __u.__o);
+  __builtin_aarch64_st1x4v8hf ((__builtin_aarch64_simd_hf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_f32_x4 (float32_t * __a, float32x2x4_t val)
+vst1_f32_x4 (float32_t * __a, float32x2x4_t __val)
 {
-  union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v2sf ((__builtin_aarch64_simd_sf *) __a, __u.__o);
+  __builtin_aarch64_st1x4v2sf ((__builtin_aarch64_simd_sf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_f32_x4 (float32_t * __a, float32x4x4_t val)
+vst1q_f32_x4 (float32_t * __a, float32x4x4_t __val)
 {
-  union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v4sf ((__builtin_aarch64_simd_sf *) __a, __u.__o);
+  __builtin_aarch64_st1x4v4sf ((__builtin_aarch64_simd_sf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_p8_x4 (poly8_t * __a, poly8x8x4_t val)
+vst1_p8_x4 (poly8_t * __a, poly8x8x4_t __val)
 {
-  union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o);
+  __builtin_aarch64_st1x4v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t val)
+vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t __val)
 {
-  union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o);
+  __builtin_aarch64_st1x4v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_p16_x4 (poly16_t * __a, poly16x4x4_t val)
+vst1_p16_x4 (poly16_t * __a, poly16x4x4_t __val)
 {
-  union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o);
+  __builtin_aarch64_st1x4v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t val)
+vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t __val)
 {
-  union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o);
+  __builtin_aarch64_st1x4v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_s64_x4 (int64_t * __a, int64x1x4_t val)
+vst1_s64_x4 (int64_t * __a, int64x1x4_t __val)
 {
-  union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o);
+  __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_u64_x4 (uint64_t * __a, uint64x1x4_t val)
+vst1_u64_x4 (uint64_t * __a, uint64x1x4_t __val)
 {
-  union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o);
+  __builtin_aarch64_st1x4di_su ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_p64_x4 (poly64_t * __a, poly64x1x4_t val)
+vst1_p64_x4 (poly64_t * __a, poly64x1x4_t __val)
 {
-  union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o);
+  __builtin_aarch64_st1x4di_sp ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_s64_x4 (int64_t * __a, int64x2x4_t val)
+vst1q_s64_x4 (int64_t * __a, int64x2x4_t __val)
 {
-  union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o);
+  __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t val)
+vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t __val)
 {
-  union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o);
+  __builtin_aarch64_st1x4v2di_su ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t val)
+vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t __val)
 {
-  union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o);
+  __builtin_aarch64_st1x4v2di_sp ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_f64_x4 (float64_t * __a, float64x1x4_t val)
+vst1_f64_x4 (float64_t * __a, float64x1x4_t __val)
 {
-  union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4df ((__builtin_aarch64_simd_df *) __a, __u.__o);
+  __builtin_aarch64_st1x4df ((__builtin_aarch64_simd_df *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_f64_x4 (float64_t * __a, float64x2x4_t val)
+vst1q_f64_x4 (float64_t * __a, float64x2x4_t __val)
 {
-  union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v2df ((__builtin_aarch64_simd_df *) __a, __u.__o);
+  __builtin_aarch64_st1x4v2df ((__builtin_aarch64_simd_df *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_s64 (int64_t * __a, int64x1x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  int64x2x2_t __temp;
-  __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1);
-  __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_u64 (uint64_t * __a, uint64x1x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  uint64x2x2_t __temp;
-  __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1);
-  __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st2di_su ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_f64 (float64_t * __a, float64x1x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  float64x2x2_t __temp;
-  __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1);
-  __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
+  __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_s8 (int8_t * __a, int8x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  int8x16x2_t __temp;
-  __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_p8 (poly8_t * __a, poly8x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  poly8x16x2_t __temp;
-  __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st2v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_s16 (int16_t * __a, int16x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  int16x8x2_t __temp;
-  __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_p16 (poly16_t * __a, poly16x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  poly16x8x2_t __temp;
-  __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st2v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_s32 (int32_t * __a, int32x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  int32x4x2_t __temp;
-  __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1);
-  __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_u8 (uint8_t * __a, uint8x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  uint8x16x2_t __temp;
-  __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st2v8qi_su ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_u16 (uint16_t * __a, uint16x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  uint16x8x2_t __temp;
-  __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st2v4hi_su ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_u32 (uint32_t * __a, uint32x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  uint32x4x2_t __temp;
-  __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1);
-  __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st2v2si_su ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_f16 (float16_t * __a, float16x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  float16x8x2_t __temp;
-  __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1);
-  __builtin_aarch64_st2v4hf (__a, __o);
+  __builtin_aarch64_st2v4hf ((__builtin_aarch64_simd_hf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_f32 (float32_t * __a, float32x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  float32x4x2_t __temp;
-  __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1);
-  __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_p64 (poly64_t * __a, poly64x1x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  poly64x2x2_t __temp;
-  __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-            (poly64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-            (poly64x2_t) __temp.val[1], 1);
-  __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st2di_sp ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_s8 (int8_t * __a, int8x16x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_p8 (poly8_t * __a, poly8x16x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st2v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_s16 (int16_t * __a, int16x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_p16 (poly16_t * __a, poly16x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st2v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_s32 (int32_t * __a, int32x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1);
-  __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_s64 (int64_t * __a, int64x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1);
-  __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_u8 (uint8_t * __a, uint8x16x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st2v16qi_su ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_u16 (uint16_t * __a, uint16x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st2v8hi_su ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_u32 (uint32_t * __a, uint32x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1);
-  __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st2v4si_su ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_u64 (uint64_t * __a, uint64x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1);
-  __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st2v2di_su ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_f16 (float16_t * __a, float16x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1);
-  __builtin_aarch64_st2v8hf (__a, __o);
+  __builtin_aarch64_st2v8hf ((__builtin_aarch64_simd_hf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_f32 (float32_t * __a, float32x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1);
-  __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_f64 (float64_t * __a, float64x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1);
-  __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
+  __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_p64 (poly64_t * __a, poly64x2x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-            (poly64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv2di_ssps (__o,
-            (poly64x2_t) __val.val[1], 1);
-  __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st2v2di_sp ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_s64 (int64_t * __a, int64x1x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  int64x2x3_t __temp;
-  __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
-  __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_u64 (uint64_t * __a, uint64x1x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  uint64x2x3_t __temp;
-  __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2);
-  __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st3di_su ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_f64 (float64_t * __a, float64x1x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  float64x2x3_t __temp;
-  __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2);
-  __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
+  __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_s8 (int8_t * __a, int8x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  int8x16x3_t __temp;
-  __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
-  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_p8 (poly8_t * __a, poly8x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  poly8x16x3_t __temp;
-  __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
-  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st3v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_s16 (int16_t * __a, int16x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  int16x8x3_t __temp;
-  __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
-  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_p16 (poly16_t * __a, poly16x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  poly16x8x3_t __temp;
-  __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
-  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st3v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_s32 (int32_t * __a, int32x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  int32x4x3_t __temp;
-  __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2);
-  __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_u8 (uint8_t * __a, uint8x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  uint8x16x3_t __temp;
-  __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2);
-  __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st3v8qi_su ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_u16 (uint16_t * __a, uint16x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  uint16x8x3_t __temp;
-  __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2);
-  __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st3v4hi_su ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_u32 (uint32_t * __a, uint32x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  uint32x4x3_t __temp;
-  __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2);
-  __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st3v2si_su ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_f16 (float16_t * __a, float16x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  float16x8x3_t __temp;
-  __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2);
-  __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
+  __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_f32 (float32_t * __a, float32x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  float32x4x3_t __temp;
-  __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2);
-  __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_p64 (poly64_t * __a, poly64x1x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  poly64x2x3_t __temp;
-  __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-            (poly64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-            (poly64x2_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-            (poly64x2_t) __temp.val[2], 2);
-  __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st3di_sp ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_s8 (int8_t * __a, int8x16x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
-  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_p8 (poly8_t * __a, poly8x16x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
-  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st3v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_s16 (int16_t * __a, int16x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
-  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_p16 (poly16_t * __a, poly16x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
-  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st3v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_s32 (int32_t * __a, int32x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2);
-  __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_s64 (int64_t * __a, int64x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2);
-  __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_u8 (uint8_t * __a, uint8x16x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2);
-  __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st3v16qi_su ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_u16 (uint16_t * __a, uint16x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2);
-  __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st3v8hi_su ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_u32 (uint32_t * __a, uint32x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2);
-  __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st3v4si_su ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_u64 (uint64_t * __a, uint64x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2);
-  __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st3v2di_su ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_f16 (float16_t * __a, float16x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2);
-  __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
+  __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_f32 (float32_t * __a, float32x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2);
-  __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_f64 (float64_t * __a, float64x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2);
-  __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
+  __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_p64 (poly64_t * __a, poly64x2x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-            (poly64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-            (poly64x2_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv2di_ssps (__o,
-            (poly64x2_t) __val.val[2], 2);
-  __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st3v2di_sp ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_s64 (int64_t * __a, int64x1x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  int64x2x4_t __temp;
-  __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __temp.val[3] = vcombine_s64 (__val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3);
-  __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_u64 (uint64_t * __a, uint64x1x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  uint64x2x4_t __temp;
-  __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __temp.val[3] = vcombine_u64 (__val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3);
-  __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st4di_su ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_f64 (float64_t * __a, float64x1x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  float64x2x4_t __temp;
-  __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __temp.val[3] = vcombine_f64 (__val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[3], 3);
-  __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
+  __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_s8 (int8_t * __a, int8x8x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  int8x16x4_t __temp;
-  __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __temp.val[3] = vcombine_s8 (__val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3);
-  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_p8 (poly8_t * __a, poly8x8x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  poly8x16x4_t __temp;
-  __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __temp.val[3] = vcombine_p8 (__val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3);
-  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st4v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_s16 (int16_t * __a, int16x4x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  int16x8x4_t __temp;
-  __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __temp.val[3] = vcombine_s16 (__val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3);
-  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_p16 (poly16_t * __a, poly16x4x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  poly16x8x4_t __temp;
-  __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __temp.val[3] = vcombine_p16 (__val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3);
-  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st4v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_s32 (int32_t * __a, int32x2x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  int32x4x4_t __temp;
-  __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __temp.val[3] = vcombine_s32 (__val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3);
-  __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_u8 (uint8_t * __a, uint8x8x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  uint8x16x4_t __temp;
-  __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __temp.val[3] = vcombine_u8 (__val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3);
-  __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st4v8qi_su ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_u16 (uint16_t * __a, uint16x4x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  uint16x8x4_t __temp;
-  __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __temp.val[3] = vcombine_u16 (__val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3);
-  __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st4v4hi_su ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_u32 (uint32_t * __a, uint32x2x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  uint32x4x4_t __temp;
-  __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __temp.val[3] = vcombine_u32 (__val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3);
-  __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st4v2si_su ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_f16 (float16_t * __a, float16x4x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  float16x8x4_t __temp;
-  __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __temp.val[3] = vcombine_f16 (__val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[3], 3);
-  __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
+  __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_f32 (float32_t * __a, float32x2x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  float32x4x4_t __temp;
-  __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __temp.val[3] = vcombine_f32 (__val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[3], 3);
-  __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_p64 (poly64_t * __a, poly64x1x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  poly64x2x4_t __temp;
-  __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __temp.val[3] = vcombine_p64 (__val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-            (poly64x2_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-            (poly64x2_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-            (poly64x2_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-            (poly64x2_t) __temp.val[3], 3);
-  __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st4di_sp ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_s8 (int8_t * __a, int8x16x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3);
-  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_p8 (poly8_t * __a, poly8x16x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3);
-  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st4v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_s16 (int16_t * __a, int16x8x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3);
-  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_p16 (poly16_t * __a, poly16x8x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3);
-  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st4v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_s32 (int32_t * __a, int32x4x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[3], 3);
-  __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_s64 (int64_t * __a, int64x2x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[3], 3);
-  __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_u8 (uint8_t * __a, uint8x16x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3);
-  __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
+  __builtin_aarch64_st4v16qi_su ((__builtin_aarch64_simd_qi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_u16 (uint16_t * __a, uint16x8x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3);
-  __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
+  __builtin_aarch64_st4v8hi_su ((__builtin_aarch64_simd_hi *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_u32 (uint32_t * __a, uint32x4x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[3], 3);
-  __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
+  __builtin_aarch64_st4v4si_su ((__builtin_aarch64_simd_si *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_u64 (uint64_t * __a, uint64x2x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[3], 3);
-  __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st4v2di_su ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_f16 (float16_t * __a, float16x8x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[3], 3);
-  __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
+  __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_f32 (float32_t * __a, float32x4x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[3], 3);
-  __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
+  __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_f64 (float64_t * __a, float64x2x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[3], 3);
-  __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
+  __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_p64 (poly64_t * __a, poly64x2x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-            (poly64x2_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-            (poly64x2_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-            (poly64x2_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv2di_ssps (__o,
-            (poly64x2_t) __val.val[3], 3);
-  __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
+  __builtin_aarch64_st4v2di_sp ((__builtin_aarch64_simd_di *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -24413,51 +22189,28 @@ __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx)
 {
-  int8x8_t __result;
   int8x16x2_t __temp;
-  __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
   __temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[1], 1);
-  __result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx);
-  return __result;
+  return __builtin_aarch64_qtbx2v8qi (__r, __temp, __idx);
 }
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx)
 {
-  uint8x8_t __result;
   uint8x16x2_t __temp;
-  __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
   __temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[1], 1);
-  __result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
-        (int8x8_t)__idx);
-  return __result;
+  return __builtin_aarch64_qtbx2v8qi_uuuu (__r, __temp, __idx);
 }
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx)
 {
-  poly8x8_t __result;
   poly8x16x2_t __temp;
-  __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
   __temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv16qi (__o,
-        (int8x16_t) __temp.val[1], 1);
-  __result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
-        (int8x8_t)__idx);
-  return __result;
+  return __builtin_aarch64_qtbx2v8qi_pppu (__r, __temp, __idx);
 }
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -26547,13 +24300,13 @@ __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndn_f16 (float16x4_t __a)
 {
-  return __builtin_aarch64_frintnv4hf (__a);
+  return __builtin_aarch64_roundevenv4hf (__a);
 }
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndnq_f16 (float16x8_t __a)
 {
-  return __builtin_aarch64_frintnv8hf (__a);
+  return __builtin_aarch64_roundevenv8hf (__a);
 }
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -26799,13 +24552,13 @@ __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmax_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return __builtin_aarch64_smax_nanv4hf (__a, __b);
+  return __builtin_aarch64_fmax_nanv4hf (__a, __b);
 }
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmaxq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_smax_nanv8hf (__a, __b);
+  return __builtin_aarch64_fmax_nanv8hf (__a, __b);
 }
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -26823,13 +24576,13 @@ __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmin_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return __builtin_aarch64_smin_nanv4hf (__a, __b);
+  return __builtin_aarch64_fmin_nanv4hf (__a, __b);
 }
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vminq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_smin_nanv8hf (__a, __b);
+  return __builtin_aarch64_fmin_nanv8hf (__a, __b);
 }
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -27246,25 +24999,25 @@ __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vdot_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b)
 {
-  return __builtin_aarch64_udotv8qi_uuuu (__r, __a, __b);
+  return __builtin_aarch64_udot_prodv8qi_uuuu (__a, __b, __r);
 }
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vdotq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_aarch64_udotv16qi_uuuu (__r, __a, __b);
+  return __builtin_aarch64_udot_prodv16qi_uuuu (__a, __b, __r);
 }
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vdot_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_sdotv8qi (__r, __a, __b);
+  return __builtin_aarch64_sdot_prodv8qi (__a, __b, __r);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vdotq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_sdotv16qi (__r, __a, __b);
+  return __builtin_aarch64_sdot_prodv16qi (__a, __b, __r);
 }
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -28220,7 +25973,7 @@ __extension__ extern __inline bfloat16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcombine_bf16 (bfloat16x4_t __a, bfloat16x4_t __b)
 {
-  return (bfloat16x8_t)__builtin_aarch64_combinev4bf (__a, __b);
+  return __builtin_aarch64_combinev4bf (__a, __b);
 }
 __extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -28286,65 +26039,38 @@ __extension__ extern __inline bfloat16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_bf16_x2 (const bfloat16_t *__a)
 {
-  bfloat16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v4bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0);
-  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v4bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_bf16_x2 (const bfloat16_t *__a)
 {
-  bfloat16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld1x2v8bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0);
-  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld1x2v8bf (
+    (const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_bf16_x3 (const bfloat16_t *__a)
 {
-  bfloat16x4x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v4bf ((const __builtin_aarch64_simd_bf *) __a);
-  __i.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0);
-  __i.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1);
-  __i.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v4bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_bf16_x3 (const bfloat16_t *__a)
 {
-  bfloat16x8x3_t __i;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld1x3v8bf ((const __builtin_aarch64_simd_bf *) __a);
-  __i.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0);
-  __i.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1);
-  __i.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2);
-  return __i;
+  return __builtin_aarch64_ld1x3v8bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1_bf16_x4 (const bfloat16_t *__a)
 {
-  union { bfloat16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v4bf ((const __builtin_aarch64_simd_bf *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v4bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld1q_bf16_x4 (const bfloat16_t *__a)
 {
-  union { bfloat16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au;
-  __au.__o
-    = __builtin_aarch64_ld1x4v8bf ((const __builtin_aarch64_simd_bf *) __a);
-  return __au.__i;
+  return __builtin_aarch64_ld1x4v8bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -28374,145 +26100,73 @@ __extension__ extern __inline bfloat16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_bf16 (const bfloat16_t * __a)
 {
-  bfloat16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v4bf (__a);
-  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0);
-  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v4bf (__a);
 }
 __extension__ extern __inline bfloat16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_bf16 (const bfloat16_t * __a)
 {
-  bfloat16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2v8bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0);
-  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2v8bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2_dup_bf16 (const bfloat16_t * __a)
 {
-  bfloat16x4x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv4bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0);
-  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv4bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x8x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld2q_dup_bf16 (const bfloat16_t * __a)
 {
-  bfloat16x8x2_t ret;
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_ld2rv8bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0);
-  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1);
-  return ret;
+  return __builtin_aarch64_ld2rv8bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_bf16 (const bfloat16_t * __a)
 {
-  bfloat16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v4bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0);
-  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1);
-  ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v4bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_bf16 (const bfloat16_t * __a)
 {
-  bfloat16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3v8bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0);
-  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1);
-  ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3v8bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x4x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3_dup_bf16 (const bfloat16_t * __a)
 {
-  bfloat16x4x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv4bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0);
-  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1);
-  ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv4bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x8x3_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld3q_dup_bf16 (const bfloat16_t * __a)
 {
-  bfloat16x8x3_t ret;
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_ld3rv8bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0);
-  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1);
-  ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2);
-  return ret;
+  return __builtin_aarch64_ld3rv8bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_bf16 (const bfloat16_t * __a)
 {
-  bfloat16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v4bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 0);
-  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 1);
-  ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 2);
-  ret.val[3] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v4bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_bf16 (const bfloat16_t * __a)
 {
-  bfloat16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4v8bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 0);
-  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 1);
-  ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 2);
-  ret.val[3] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4v8bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x4x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4_dup_bf16 (const bfloat16_t * __a)
 {
-  bfloat16x4x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv4bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 0);
-  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 1);
-  ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 2);
-  ret.val[3] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv4bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline bfloat16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vld4q_dup_bf16 (const bfloat16_t * __a)
 {
-  bfloat16x8x4_t ret;
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_ld4rv8bf ((const __builtin_aarch64_simd_bf *) __a);
-  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 0);
-  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 1);
-  ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 2);
-  ret.val[3] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 3);
-  return ret;
+  return __builtin_aarch64_ld4rv8bf ((const __builtin_aarch64_simd_bf *) __a);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -28524,60 +26178,37 @@ __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_bf16_x2 (bfloat16_t * __a, bfloat16x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  bfloat16x8x2_t __temp;
-  __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[1], 1);
-  __builtin_aarch64_st1x2v4bf (__a, __o);
+  __builtin_aarch64_st1x2v4bf (__a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_bf16_x2 (bfloat16_t * __a, bfloat16x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[1], 1);
-  __builtin_aarch64_st1x2v8bf (__a, __o);
+  __builtin_aarch64_st1x2v8bf (__a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  bfloat16x8x3_t __temp;
-  __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2);
-  __builtin_aarch64_st1x3v4bf ((__builtin_aarch64_simd_bf *) __a, __o);
+  __builtin_aarch64_st1x3v4bf ((__builtin_aarch64_simd_bf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst1q_bf16_x3 (bfloat16_t * __a, bfloat16x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[2], 2);
-  __builtin_aarch64_st1x3v8bf ((__builtin_aarch64_simd_bf *) __a, __o);
+  __builtin_aarch64_st1x3v8bf ((__builtin_aarch64_simd_bf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1_bf16_x4 (bfloat16_t * __a, bfloat16x4x4_t val)
+vst1_bf16_x4 (bfloat16_t * __a, bfloat16x4x4_t __val)
 {
-  union { bfloat16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v4bf ((__builtin_aarch64_simd_bf *) __a, __u.__o);
+  __builtin_aarch64_st1x4v4bf ((__builtin_aarch64_simd_bf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vst1q_bf16_x4 (bfloat16_t * __a, bfloat16x8x4_t val)
+vst1q_bf16_x4 (bfloat16_t * __a, bfloat16x8x4_t __val)
 {
-  union { bfloat16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val };
-  __builtin_aarch64_st1x4v8bf ((__builtin_aarch64_simd_bf *) __a, __u.__o);
+  __builtin_aarch64_st1x4v8bf ((__builtin_aarch64_simd_bf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -28601,73 +26232,37 @@ __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_bf16 (bfloat16_t * __a, bfloat16x4x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  bfloat16x8x2_t __temp;
-  __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[1], 1);
-  __builtin_aarch64_st2v4bf (__a, __o);
+  __builtin_aarch64_st2v4bf (__a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2q_bf16 (bfloat16_t * __a, bfloat16x8x2_t __val)
 {
-  __builtin_aarch64_simd_oi __o;
-  __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[1], 1);
-  __builtin_aarch64_st2v8bf (__a, __o);
+  __builtin_aarch64_st2v8bf (__a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3_bf16 (bfloat16_t * __a, bfloat16x4x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  bfloat16x8x3_t __temp;
-  __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2);
-  __builtin_aarch64_st3v4bf ((__builtin_aarch64_simd_bf *) __a, __o);
+  __builtin_aarch64_st3v4bf ((__builtin_aarch64_simd_bf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst3q_bf16 (bfloat16_t * __a, bfloat16x8x3_t __val)
 {
-  __builtin_aarch64_simd_ci __o;
-  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[2], 2);
-  __builtin_aarch64_st3v8bf ((__builtin_aarch64_simd_bf *) __a, __o);
+  __builtin_aarch64_st3v8bf ((__builtin_aarch64_simd_bf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4_bf16 (bfloat16_t * __a, bfloat16x4x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  bfloat16x8x4_t __temp;
-  __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __temp.val[3] = vcombine_bf16 (__val.val[3], vcreate_bf16 (__AARCH64_UINT64_C (0)));
-  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[3], 3);
-  __builtin_aarch64_st4v4bf ((__builtin_aarch64_simd_bf *) __a, __o);
+  __builtin_aarch64_st4v4bf ((__builtin_aarch64_simd_bf *) __a, __val);
 }
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst4q_bf16 (bfloat16_t * __a, bfloat16x8x4_t __val)
 {
-  __builtin_aarch64_simd_xi __o;
-  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[0], 0);
-  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[1], 1);
-  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[2], 2);
-  __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[3], 3);
-  __builtin_aarch64_st4v8bf ((__builtin_aarch64_simd_bf *) __a, __o);
+  __builtin_aarch64_st4v8bf ((__builtin_aarch64_simd_bf *) __a, __val);
 }
 __extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -29183,24 +26778,90 @@ vcopyq_laneq_bf16 (bfloat16x8_t __a, const int __lane1,
   return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
       __a, __lane1);
 }
-__LD2_LANE_FUNC (bfloat16x4x2_t, bfloat16x4_t, bfloat16x8x2_t, bfloat16_t, v4bf,
-   v8bf, bf, bf16, bfloat16x8_t)
-__LD2Q_LANE_FUNC (bfloat16x8x2_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
-__LD3_LANE_FUNC (bfloat16x4x3_t, bfloat16x4_t, bfloat16x8x3_t, bfloat16_t, v4bf,
-   v8bf, bf, bf16, bfloat16x8_t)
-__LD3Q_LANE_FUNC (bfloat16x8x3_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
-__LD4_LANE_FUNC (bfloat16x4x4_t, bfloat16x4_t, bfloat16x8x4_t, bfloat16_t, v4bf,
-   v8bf, bf, bf16, bfloat16x8_t)
-__LD4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16)
-__ST2_LANE_FUNC (bfloat16x4x2_t, bfloat16x8x2_t, bfloat16_t, v4bf, v8bf, bf,
-   bf16, bfloat16x8_t)
-__ST2Q_LANE_FUNC (bfloat16x8x2_t, bfloat16_t, v8bf, bf, bf16)
-__ST3_LANE_FUNC (bfloat16x4x3_t, bfloat16x8x3_t, bfloat16_t, v4bf, v8bf, bf,
-   bf16, bfloat16x8_t)
-__ST3Q_LANE_FUNC (bfloat16x8x3_t, bfloat16_t, v8bf, bf, bf16)
-__ST4_LANE_FUNC (bfloat16x4x4_t, bfloat16x8x4_t, bfloat16_t, v4bf, v8bf, bf,
-   bf16, bfloat16x8_t)
-__ST4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16_t, v8bf, bf, bf16)
+__extension__ extern __inline bfloat16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2_lane_bf16 (const bfloat16_t * __ptr, bfloat16x4x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev4bf (
+   (__builtin_aarch64_simd_bf *) __ptr, __b, __c);
+}
+__extension__ extern __inline bfloat16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld2q_lane_bf16 (const bfloat16_t * __ptr, bfloat16x8x2_t __b, const int __c)
+{
+  return __builtin_aarch64_ld2_lanev8bf (
+   (__builtin_aarch64_simd_bf *) __ptr, __b, __c);
+}
+__extension__ extern __inline bfloat16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3_lane_bf16 (const bfloat16_t * __ptr, bfloat16x4x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev4bf (
+   (__builtin_aarch64_simd_bf *) __ptr, __b, __c);
+}
+__extension__ extern __inline bfloat16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld3q_lane_bf16 (const bfloat16_t * __ptr, bfloat16x8x3_t __b, const int __c)
+{
+  return __builtin_aarch64_ld3_lanev8bf (
+   (__builtin_aarch64_simd_bf *) __ptr, __b, __c);
+}
+__extension__ extern __inline bfloat16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4_lane_bf16 (const bfloat16_t * __ptr, bfloat16x4x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev4bf (
+   (__builtin_aarch64_simd_bf *) __ptr, __b, __c);
+}
+__extension__ extern __inline bfloat16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__))
+vld4q_lane_bf16 (const bfloat16_t * __ptr, bfloat16x8x4_t __b, const int __c)
+{
+  return __builtin_aarch64_ld4_lanev8bf (
+   (__builtin_aarch64_simd_bf *) __ptr, __b, __c);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_bf16 (bfloat16_t *__ptr, bfloat16x4x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev4bf ((__builtin_aarch64_simd_bf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x2_t __val, const int __lane)
+{
+  __builtin_aarch64_st2_lanev8bf ((__builtin_aarch64_simd_bf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_bf16 (bfloat16_t *__ptr, bfloat16x4x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev4bf ((__builtin_aarch64_simd_bf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x3_t __val, const int __lane)
+{
+  __builtin_aarch64_st3_lanev8bf ((__builtin_aarch64_simd_bf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_bf16 (bfloat16_t *__ptr, bfloat16x4x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev4bf ((__builtin_aarch64_simd_bf *) __ptr, __val,
+      __lane);
+}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x4_t __val, const int __lane)
+{
+  __builtin_aarch64_st4_lanev8bf ((__builtin_aarch64_simd_bf *) __ptr, __val,
+      __lane);
+}
 #pragma GCC pop_options
 #pragma GCC push_options
 #pragma GCC target ("arch=armv8.2-a+i8mm")
@@ -29208,13 +26869,13 @@ __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_usdotv8qi_ssus (__r, __a, __b);
+  return __builtin_aarch64_usdot_prodv8qi_suss (__a, __b, __r);
 }
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_usdotv16qi_ssus (__r, __a, __b);
+  return __builtin_aarch64_usdot_prodv16qi_suss (__a, __b, __r);
 }
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -29385,17 +27046,5 @@ vaddq_p128 (poly128_t __a, poly128_t __b)
 #undef __aarch64_vdupq_laneq_u16
 #undef __aarch64_vdupq_laneq_u32
 #undef __aarch64_vdupq_laneq_u64
-#undef __LD2_LANE_FUNC
-#undef __LD2Q_LANE_FUNC
-#undef __LD3_LANE_FUNC
-#undef __LD3Q_LANE_FUNC
-#undef __LD4_LANE_FUNC
-#undef __LD4Q_LANE_FUNC
-#undef __ST2_LANE_FUNC
-#undef __ST2Q_LANE_FUNC
-#undef __ST3_LANE_FUNC
-#undef __ST3Q_LANE_FUNC
-#undef __ST4_LANE_FUNC
-#undef __ST4Q_LANE_FUNC
 #endif
 #endif
diff --git a/third_party/aarch64/arm_sve.internal.h b/third_party/aarch64/arm_sve.internal.h
index a4fb4000a..c224bdba3 100644
--- a/third_party/aarch64/arm_sve.internal.h
+++ b/third_party/aarch64/arm_sve.internal.h
@@ -5,6 +5,6 @@
 typedef __fp16 float16_t;
 typedef float float32_t;
 typedef double float64_t;
-#pragma GCC aarch64 "third_party/aarch64/arm_sve.internal.h"
+#pragma GCC aarch64 "arm_sve.h"
 #endif
 #endif
diff --git a/third_party/aarch64/upgrade.sh b/third_party/aarch64/upgrade.sh
index 04376e53a..1e4e6c5b7 100755
--- a/third_party/aarch64/upgrade.sh
+++ b/third_party/aarch64/upgrade.sh
@@ -13,17 +13,16 @@
 # 3. You should fix up the `#pragma GCC aarch64` things.
 #
 
-s=/opt/cross11portcosmo/lib/gcc/aarch64-linux-musl/11.2.0/include
+s=/opt/goodies/include
 d=third_party/aarch64
 
 FILES='
+acc_prof
 arm_acle
+arm_bf16
 arm_fp16
 arm_neon
-acc_prof
-arm_bf16
 arm_sve
-acc_prof
 openacc
 '
 
diff --git a/third_party/argon2/argon2.c b/third_party/argon2/argon2.c
index 323120a69..24f04d4d8 100644
--- a/third_party/argon2/argon2.c
+++ b/third_party/argon2/argon2.c
@@ -22,11 +22,6 @@
 #include "third_party/argon2/core.h"
 #include "third_party/argon2/encoding.h"
 
-asm(".ident\t\"\\n\\n\
-argon2 (CC0 or Apache2)\\n\
-Copyright 2016 Daniel Dinu, Dmitry Khovratovich\\n\
-Copyright 2016 Jean-Philippe Aumasson, Samuel Neves\"");
-
 /**
  * Function that gives the string representation of an argon2_type.
  * @param type The argon2_type that we want the string for
diff --git a/third_party/argon2/blake2b.c b/third_party/argon2/blake2b.c
index f62c988f2..26392c322 100644
--- a/third_party/argon2/blake2b.c
+++ b/third_party/argon2/blake2b.c
@@ -21,11 +21,6 @@
 #include "third_party/argon2/blake2.h"
 #include "third_party/argon2/core.h"
 
-asm(".ident\t\"\\n\\n\
-argon2 (CC0 or Apache2)\\n\
-Copyright 2016 Daniel Dinu, Dmitry Khovratovich\\n\
-Copyright 2016 Jean-Philippe Aumasson, Samuel Neves\"");
-
 /* Ensure param structs have not been wrongly padded */
 /* Poor man's static_assert */
 enum {
diff --git a/third_party/argon2/core.c b/third_party/argon2/core.c
index 340fc22c2..72f04ee50 100644
--- a/third_party/argon2/core.c
+++ b/third_party/argon2/core.c
@@ -20,10 +20,10 @@
 #include "third_party/argon2/blake2.h"
 #include "third_party/argon2/core.h"
 
-asm(".ident\t\"\\n\\n\
-argon2 (CC0 or Apache2)\\n\
-Copyright 2016 Daniel Dinu, Dmitry Khovratovich\\n\
-Copyright 2016 Jean-Philippe Aumasson, Samuel Neves\"");
+__notice(argon2_notice, "\
+argon2 (CC0 or Apache2)\n\
+Copyright 2016 Daniel Dinu, Dmitry Khovratovich\n\
+Copyright 2016 Jean-Philippe Aumasson, Samuel Neves");
 
 int FLAG_clear_internal_memory = 1;
 
diff --git a/third_party/argon2/encoding.c b/third_party/argon2/encoding.c
index 22f4707fc..22a50dcf3 100644
--- a/third_party/argon2/encoding.c
+++ b/third_party/argon2/encoding.c
@@ -21,11 +21,6 @@
 #include "libc/str/str.h"
 #include "third_party/argon2/core.h"
 
-asm(".ident\t\"\\n\\n\
-argon2 (CC0 or Apache2)\\n\
-Copyright 2016 Daniel Dinu, Dmitry Khovratovich\\n\
-Copyright 2016 Jean-Philippe Aumasson, Samuel Neves\"");
-
 /*
  * Example code for a decoder and encoder of "hash strings", with Argon2
  * parameters.
diff --git a/third_party/argon2/ref.c b/third_party/argon2/ref.c
index 1809aca03..f21c902de 100644
--- a/third_party/argon2/ref.c
+++ b/third_party/argon2/ref.c
@@ -22,11 +22,6 @@
 #include "third_party/argon2/blamka-round-ref.h"
 #include "third_party/argon2/core.h"
 
-asm(".ident\t\"\\n\\n\
-argon2 (CC0 or Apache2)\\n\
-Copyright 2016 Daniel Dinu, Dmitry Khovratovich\\n\
-Copyright 2016 Jean-Philippe Aumasson, Samuel Neves\"");
-
 /*
  * Argon2 reference source code package - reference C implementations
  *
diff --git a/third_party/awk/main.c b/third_party/awk/main.c
index d1ec5b755..07fba0946 100644
--- a/third_party/awk/main.c
+++ b/third_party/awk/main.c
@@ -39,28 +39,29 @@
 #include "libc/sysv/consts/sig.h"
 #include "third_party/awk/awk.h"
 
-asm(".ident\t\"\\n\\n\
-Copyright (C) Lucent Technologies 1997\\n\
-All Rights Reserved\\n\
-\\n\
-Permission to use, copy, modify, and distribute this software and\\n\
-its documentation for any purpose and without fee is hereby\\n\
-granted, provided that the above copyright notice appear in all\\n\
-copies and that both that the copyright notice and this\\n\
-permission notice and warranty disclaimer appear in supporting\\n\
-documentation, and that the name Lucent Technologies or any of\\n\
-its entities not be used in advertising or publicity pertaining\\n\
-to distribution of the software without specific, written prior\\n\
-permission.\\n\
-\\n\
-LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,\\n\
-INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.\\n\
-IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY\\n\
-SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES\\n\
-WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER\\n\
-IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,\\n\
-ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF\\n\
-THIS SOFTWARE.\"");
+__notice(awk_notice, "\
+The One True Awk\n\
+Copyright (C) Lucent Technologies 1997\n\
+All Rights Reserved\n\
+\n\
+Permission to use, copy, modify, and distribute this software and\n\
+its documentation for any purpose and without fee is hereby\n\
+granted, provided that the above copyright notice appear in all\n\
+copies and that both that the copyright notice and this\n\
+permission notice and warranty disclaimer appear in supporting\n\
+documentation, and that the name Lucent Technologies or any of\n\
+its entities not be used in advertising or publicity pertaining\n\
+to distribution of the software without specific, written prior\n\
+permission.\n\
+\n\
+LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,\n\
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.\n\
+IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY\n\
+SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES\n\
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER\n\
+IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,\n\
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF\n\
+THIS SOFTWARE.");
 
 const char	*version = "version 20220530";
 extern	int	nfields;
diff --git a/third_party/awk/run.c b/third_party/awk/run.c
index f31259db2..1ee408e4d 100644
--- a/third_party/awk/run.c
+++ b/third_party/awk/run.c
@@ -103,7 +103,8 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
 		if (rminlen)
 			minlen += quantum - rminlen;
 		tbuf = (char *) realloc(*pbuf, minlen);
-		DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
+		// [jart] use after free error
+		// DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
 		if (tbuf == NULL) {
 			if (whatrtn)
 				FATAL("out of memory in %s", whatrtn);
diff --git a/third_party/bash/BUILD.mk b/third_party/bash/BUILD.mk
index df6f7e10a..790dc452d 100644
--- a/third_party/bash/BUILD.mk
+++ b/third_party/bash/BUILD.mk
@@ -83,6 +83,7 @@ $(THIRD_PARTY_BASH_OBJS): private					\
 			-Wno-nonnull-compare				\
 			-Wno-unused-variable				\
 			-Wno-missing-braces				\
+			-Wno-use-after-free				\
 			-Wno-unused-label				\
 			-Wno-unused-value				\
 			-Wno-return-type				\
diff --git a/third_party/bzip2/crctable.c b/third_party/bzip2/crctable.c
index 2fee0a90a..2c21bd6a5 100644
--- a/third_party/bzip2/crctable.c
+++ b/third_party/bzip2/crctable.c
@@ -20,6 +20,7 @@
 
 uint32_t BZ2_crc32Table[256];
 
+__attribute__((__constructor__(10)))
 static textstartup void BZ2_crc32Table_init() {
   unsigned i, j, u;
   for (i = 0; i < 256; ++i) {
@@ -38,10 +39,6 @@ static textstartup void BZ2_crc32Table_init() {
   }
 }
 
-const void *const BZ2_crc32Table_ctor[] initarray = {
-    BZ2_crc32Table_init,
-};
-
 #else
 
 const uint32_t BZ2_crc32Table[256] = {
diff --git a/third_party/chibicc/chibicc.c b/third_party/chibicc/chibicc.c
index 3273b3077..8224433a5 100644
--- a/third_party/chibicc/chibicc.c
+++ b/third_party/chibicc/chibicc.c
@@ -9,11 +9,10 @@
 #include "libc/sysv/consts/sig.h"
 #include "libc/x/xasprintf.h"
 
-asm(".ident\t\"\\n\\n\
-chibicc (MIT/ISC License)\\n\
-Copyright 2019 Rui Ueyama\\n\
-Copyright 2020 Justine Alexandra Roberts Tunney\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(chibicc_notice, "\
+chibicc (MIT/ISC License)\n\
+Copyright 2019 Rui Ueyama\n\
+Copyright 2020 Justine Alexandra Roberts Tunney");
 
 typedef enum {
   FILE_NONE,
diff --git a/third_party/chibicc/preprocess.c b/third_party/chibicc/preprocess.c
index dd9780597..ff2dee27e 100644
--- a/third_party/chibicc/preprocess.c
+++ b/third_party/chibicc/preprocess.c
@@ -789,11 +789,12 @@ static Token *preprocess2(Token *tok) {
           char *path = xasprintf("%s/%s", dirname(tmp), filename);
           free(tmp);
           bool exists = fileexists(path);
-          free(path);
           if (exists) {
             tok = include_file(tok, path, start->next->next);
+            free(path);
             continue;
           }
+          free(path);
         }
         char *path = search_include_paths(filename);
         tok = include_file(tok, path ? path : filename, start->next->next);
diff --git a/third_party/compiler_rt/comprt.S b/third_party/compiler_rt/comprt.S
index 84814f711..95060b658 100644
--- a/third_party/compiler_rt/comprt.S
+++ b/third_party/compiler_rt/comprt.S
@@ -7,12 +7,8 @@ huge_compiler_rt_license:
 	.endobj	huge_compiler_rt_license,globl,hidden
 	.previous
 
-.ident "\n
-compiler_rt (Licensed MIT)
-Copyright (c) 2009-2015 by the contributors listed in:
-github.com/llvm-mirror/compiler-rt/blob/master/CREDITS.TXT"
-
-.ident "\n
+	.section .notice,"aR",@progbits
+	.asciz	"\n\n\
 compiler_rt (Licensed \"University of Illinois/NCSA Open Source License\")
 Copyright (c) 2009-2018 by the contributors listed in:
 github.com/llvm-mirror/compiler-rt/blob/master/CREDITS.TXT
diff --git a/third_party/compiler_rt/extendhfdf2.c b/third_party/compiler_rt/extendhfdf2.c
new file mode 100644
index 000000000..729eb04c1
--- /dev/null
+++ b/third_party/compiler_rt/extendhfdf2.c
@@ -0,0 +1,17 @@
+//===-- lib/extendhfdf2.c - half -> dubble conversion -------------*- C -*-===//
+//
+//                The Cosmopolitan Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#define SRC_HALF
+#define DST_DOUBLE
+#include "third_party/compiler_rt/fp16_extend_impl.inc"
+
+COMPILER_RT_ABI dst_t __extendhfdf2(src_t a) {
+    return __extendXfYf2__(a);
+}
diff --git a/third_party/compiler_rt/extendhfsf2.c b/third_party/compiler_rt/extendhfsf2.c
index 8d122cfb6..f891d9542 100644
--- a/third_party/compiler_rt/extendhfsf2.c
+++ b/third_party/compiler_rt/extendhfsf2.c
@@ -1,35 +1,27 @@
 //===-- lib/extendhfsf2.c - half -> single conversion -------------*- C -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-
-__static_yoink("huge_compiler_rt_license");
 
 #define SRC_HALF
 #define DST_SINGLE
-#include "third_party/compiler_rt/fp_extend_impl.inc"
+#include "fp16_extend_impl.inc"
 
 // Use a forwarding definition and noinline to implement a poor man's alias,
 // as there isn't a good cross-platform way of defining one.
-COMPILER_RT_ABI __attribute__((__noinline__)) float __extendhfsf2(uint16_t a) {
-    return __extendXfYf2__(a);
+COMPILER_RT_ABI NOINLINE float __extendhfsf2(src_t a) {
+  return __extendXfYf2__(a);
 }
 
-COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) {
-    return __extendhfsf2(a);
-}
+COMPILER_RT_ABI float __gnu_h2f_ieee(src_t a) { return __extendhfsf2(a); }
 
 #if defined(__ARM_EABI__)
 #if defined(COMPILER_RT_ARMHF_TARGET)
-AEABI_RTABI float __aeabi_h2f(uint16_t a) {
-  return __extendhfsf2(a);
-}
+AEABI_RTABI float __aeabi_h2f(src_t a) { return __extendhfsf2(a); }
 #else
-AEABI_RTABI float __aeabi_h2f(uint16_t a) COMPILER_RT_ALIAS(__extendhfsf2);
+COMPILER_RT_ALIAS(__extendhfsf2, __aeabi_h2f)
 #endif
 #endif
diff --git a/third_party/compiler_rt/fp16_extend.inc b/third_party/compiler_rt/fp16_extend.inc
new file mode 100644
index 000000000..993b3b1db
--- /dev/null
+++ b/third_party/compiler_rt/fp16_extend.inc
@@ -0,0 +1,170 @@
+//===-lib/fp_extend.h - low precision -> high precision conversion -*- C
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Set source and destination setting
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_EXTEND_HEADER
+#define FP_EXTEND_HEADER
+
+#include "int_lib.h"
+
+#if defined SRC_SINGLE
+typedef float src_t;
+typedef uint32_t src_rep_t;
+#define SRC_REP_C UINT32_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 23;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 8;
+#define src_rep_t_clz clzsi
+
+#elif defined SRC_DOUBLE
+typedef double src_t;
+typedef uint64_t src_rep_t;
+#define SRC_REP_C UINT64_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 52;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 11;
+
+static inline int src_rep_t_clz_impl(src_rep_t a) {
+#if defined __LP64__
+  return __builtin_clzl(a);
+#else
+  if (a & REP_C(0xffffffff00000000))
+    return clzsi(a >> 32);
+  else
+    return 32 + clzsi(a & REP_C(0xffffffff));
+#endif
+}
+#define src_rep_t_clz src_rep_t_clz_impl
+
+#elif defined SRC_80
+typedef xf_float src_t;
+typedef __uint128_t src_rep_t;
+#define SRC_REP_C (__uint128_t)
+// sign bit, exponent and significand occupy the lower 80 bits.
+static const int srcBits = 80;
+static const int srcSigFracBits = 63;
+// -1 accounts for the sign bit.
+// -1 accounts for the explicitly stored integer bit.
+// srcBits - srcSigFracBits - 1 - 1
+static const int srcExpBits = 15;
+
+#elif defined SRC_HALF
+typedef _Float16 src_t;
+typedef uint16_t src_rep_t;
+#define SRC_REP_C UINT16_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 10;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 5;
+
+static inline int src_rep_t_clz_impl(src_rep_t a) {
+  return __builtin_clz(a) - 16;
+}
+
+#define src_rep_t_clz src_rep_t_clz_impl
+
+#else
+#error Source should be half, single, or double precision!
+#endif // end source precision
+
+#if defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+#define DST_REP_C UINT32_C
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 23;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 8;
+
+#elif defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+#define DST_REP_C UINT64_C
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 52;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 11;
+
+#elif defined DST_QUAD
+typedef tf_float dst_t;
+typedef __uint128_t dst_rep_t;
+#define DST_REP_C (__uint128_t)
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 112;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 15;
+
+#else
+#error Destination should be single, double, or quad precision!
+#endif // end destination precision
+
+// End of specialization parameters.
+
+// TODO: These helper routines should be placed into fp_lib.h
+// Currently they depend on macros/constants defined above.
+
+static inline src_rep_t extract_sign_from_src(src_rep_t x) {
+  const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
+  return (x & srcSignMask) >> (srcBits - 1);
+}
+
+static inline src_rep_t extract_exp_from_src(src_rep_t x) {
+  const int srcSigBits = srcBits - 1 - srcExpBits;
+  const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
+  return (x & srcExpMask) >> srcSigBits;
+}
+
+static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
+  const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
+  return x & srcSigFracMask;
+}
+
+#ifdef src_rep_t_clz
+static inline int clz_in_sig_frac(src_rep_t sigFrac) {
+      const int skip = 1 + srcExpBits;
+      return src_rep_t_clz(sigFrac) - skip;
+}
+#endif
+
+static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
+  return (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
+}
+
+// Two helper routines for conversion to and from the representation of
+// floating-point data as integer values follow.
+
+static inline src_rep_t srcToRep(src_t x) {
+  const union {
+    src_t f;
+    src_rep_t i;
+  } rep = {.f = x};
+  return rep.i;
+}
+
+static inline dst_t dstFromRep(dst_rep_t x) {
+  const union {
+    dst_t f;
+    dst_rep_t i;
+  } rep = {.i = x};
+  return rep.f;
+}
+// End helper routines.  Conversion implementation follows.
+
+#endif // FP_EXTEND_HEADER
diff --git a/third_party/compiler_rt/fp16_extend_impl.inc b/third_party/compiler_rt/fp16_extend_impl.inc
new file mode 100644
index 000000000..367705773
--- /dev/null
+++ b/third_party/compiler_rt/fp16_extend_impl.inc
@@ -0,0 +1,108 @@
+//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a fairly generic conversion from a narrower to a wider
+// IEEE-754 floating-point type.  The constants and types defined following the
+// includes below parameterize the conversion.
+//
+// It does not support types that don't use the usual IEEE-754 interchange
+// formats; specifically, some work would be needed to adapt it to
+// (for example) the Intel 80-bit format or PowerPC double-double format.
+//
+// Note please, however, that this implementation is only intended to support
+// *widening* operations; if you need to convert to a *narrower* floating-point
+// type (e.g. double -> float), then this routine will not do what you want it
+// to.
+//
+// It also requires that integer types at least as large as both formats
+// are available on the target platform; this may pose a problem when trying
+// to add support for quad on some 32-bit systems, for example.  You also may
+// run into trouble finding an appropriate CLZ function for wide source types;
+// you will likely need to roll your own on some platforms.
+//
+// Finally, the following assumptions are made:
+//
+// 1. Floating-point types and integer types have the same endianness on the
+//    target platform.
+//
+// 2. Quiet NaNs, if supported, are indicated by the leading bit of the
+//    significand field being set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp16_extend.inc"
+
+// The source type may use a usual IEEE-754 interchange format or Intel 80-bit
+// format. In particular, for the source type srcSigFracBits may be not equal to
+// srcSigBits. The destination type is assumed to be one of IEEE-754 standard
+// types.
+static __inline dst_t __extendXfYf2__(src_t a) {
+  // Various constants whose values follow from the type parameters.
+  // Any reasonable optimizer will fold and propagate all of these.
+  const int srcInfExp = (1 << srcExpBits) - 1;
+  const int srcExpBias = srcInfExp >> 1;
+
+  const int dstInfExp = (1 << dstExpBits) - 1;
+  const int dstExpBias = dstInfExp >> 1;
+
+  // Break a into a sign and representation of the absolute value.
+  const src_rep_t aRep = srcToRep(a);
+  const src_rep_t srcSign = extract_sign_from_src(aRep);
+  const src_rep_t srcExp = extract_exp_from_src(aRep);
+  const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);
+
+  dst_rep_t dstSign = srcSign;
+  dst_rep_t dstExp;
+  dst_rep_t dstSigFrac;
+
+  if (srcExp >= 1 && srcExp < (src_rep_t)srcInfExp) {
+    // a is a normal number.
+    dstExp = (dst_rep_t)srcExp + (dst_rep_t)(dstExpBias - srcExpBias);
+    dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
+  }
+
+  else if (srcExp == srcInfExp) {
+    // a is NaN or infinity.
+    dstExp = dstInfExp;
+    dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
+  }
+
+  else if (srcSigFrac) {
+    // a is denormal.
+    if (srcExpBits == dstExpBits) {
+      // The exponent fields are identical and this is a denormal number, so all
+      // the non-significand bits are zero. In particular, this branch is always
+      // taken when we extend a denormal F80 to F128.
+      dstExp = 0;
+      dstSigFrac = ((dst_rep_t)srcSigFrac) << (dstSigFracBits - srcSigFracBits);
+    } else {
+#ifndef src_rep_t_clz
+      // If src_rep_t_clz is not defined this branch must be unreachable.
+      __builtin_unreachable();
+#else
+      // Renormalize the significand and clear the leading bit.
+      // For F80 -> F128 this codepath is unused.
+      const int scale = clz_in_sig_frac(srcSigFrac) + 1;
+      dstExp = dstExpBias - srcExpBias - scale + 1;
+      dstSigFrac = (dst_rep_t)srcSigFrac
+                   << (dstSigFracBits - srcSigFracBits + scale);
+      const dst_rep_t dstMinNormal = DST_REP_C(1) << (dstBits - 1 - dstExpBits);
+      dstSigFrac ^= dstMinNormal;
+#endif
+    }
+  }
+
+  else {
+    // a is zero.
+    dstExp = 0;
+    dstSigFrac = 0;
+  }
+
+  const dst_rep_t result = construct_dst_rep(dstSign, dstExp, dstSigFrac);
+  return dstFromRep(result);
+}
diff --git a/third_party/compiler_rt/fp16_trunc.inc b/third_party/compiler_rt/fp16_trunc.inc
new file mode 100644
index 000000000..c88311a1d
--- /dev/null
+++ b/third_party/compiler_rt/fp16_trunc.inc
@@ -0,0 +1,154 @@
+//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Set source and destination precision setting
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_TRUNC_HEADER
+#define FP_TRUNC_HEADER
+
+#include "int_lib.h"
+
+#if defined SRC_SINGLE
+typedef float src_t;
+typedef uint32_t src_rep_t;
+#define SRC_REP_C UINT32_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 23;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 8;
+
+#elif defined SRC_DOUBLE
+typedef double src_t;
+typedef uint64_t src_rep_t;
+#define SRC_REP_C UINT64_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 52;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 11;
+
+#elif defined SRC_QUAD
+typedef tf_float src_t;
+typedef __uint128_t src_rep_t;
+#define SRC_REP_C (__uint128_t)
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 112;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 15;
+
+#else
+#error Source should be double precision or quad precision!
+#endif // end source precision
+
+#if defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+#define DST_REP_C UINT64_C
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 52;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 11;
+
+#elif defined DST_80
+typedef xf_float dst_t;
+typedef __uint128_t dst_rep_t;
+#define DST_REP_C (__uint128_t)
+static const int dstBits = 80;
+static const int dstSigFracBits = 63;
+// -1 accounts for the sign bit.
+// -1 accounts for the explicitly stored integer bit.
+// dstBits - dstSigFracBits - 1 - 1
+static const int dstExpBits = 15;
+
+#elif defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+#define DST_REP_C UINT32_C
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 23;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 8;
+
+#elif defined DST_HALF
+typedef _Float16 dst_t;
+typedef uint16_t dst_rep_t;
+#define DST_REP_C UINT16_C
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 10;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 5;
+
+#elif defined DST_BFLOAT
+typedef __bf16 dst_t;
+typedef uint16_t dst_rep_t;
+#define DST_REP_C UINT16_C
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 7;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 8;
+
+#else
+#error Destination should be single precision or double precision!
+#endif // end destination precision
+
+// TODO: These helper routines should be placed into fp_lib.h
+// Currently they depend on macros/constants defined above.
+
+static inline src_rep_t extract_sign_from_src(src_rep_t x) {
+  const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
+  return (x & srcSignMask) >> (srcBits - 1);
+}
+
+static inline src_rep_t extract_exp_from_src(src_rep_t x) {
+  const int srcSigBits = srcBits - 1 - srcExpBits;
+  const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
+  return (x & srcExpMask) >> srcSigBits;
+}
+
+static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
+  const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
+  return x & srcSigFracMask;
+}
+
+static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
+  dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
+  // Set the explicit integer bit in F80 if present.
+  if (dstBits == 80 && exp) {
+    result |= (DST_REP_C(1) << dstSigFracBits);
+  }
+  return result;
+}
+
+// End of specialization parameters.  Two helper routines for conversion to and
+// from the representation of floating-point data as integer values follow.
+
+static inline src_rep_t srcToRep(src_t x) {
+  const union {
+    src_t f;
+    src_rep_t i;
+  } rep = {.f = x};
+  return rep.i;
+}
+
+static inline dst_t dstFromRep(dst_rep_t x) {
+  const union {
+    dst_t f;
+    dst_rep_t i;
+  } rep = {.i = x};
+  return rep.f;
+}
+
+#endif // FP_TRUNC_HEADER
diff --git a/third_party/compiler_rt/fp16_trunc_impl.inc b/third_party/compiler_rt/fp16_trunc_impl.inc
new file mode 100644
index 000000000..610588478
--- /dev/null
+++ b/third_party/compiler_rt/fp16_trunc_impl.inc
@@ -0,0 +1,155 @@
+//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a fairly generic conversion from a wider to a narrower
+// IEEE-754 floating-point type in the default (round to nearest, ties to even)
+// rounding mode.  The constants and types defined following the includes below
+// parameterize the conversion.
+//
+// This routine can be trivially adapted to support conversions to
+// half-precision or from quad-precision. It does not support types that don't
+// use the usual IEEE-754 interchange formats; specifically, some work would be
+// needed to adapt it to (for example) the Intel 80-bit format or PowerPC
+// double-double format.
+//
+// Note please, however, that this implementation is only intended to support
+// *narrowing* operations; if you need to convert to a *wider* floating-point
+// type (e.g. float -> double), then this routine will not do what you want it
+// to.
+//
+// It also requires that integer types at least as large as both formats
+// are available on the target platform; this may pose a problem when trying
+// to add support for quad on some 32-bit systems, for example.
+//
+// Finally, the following assumptions are made:
+//
+// 1. Floating-point types and integer types have the same endianness on the
+//    target platform.
+//
+// 2. Quiet NaNs, if supported, are indicated by the leading bit of the
+//    significand field being set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp16_trunc.inc"
+
+// The destination type may use a usual IEEE-754 interchange format or Intel
+// 80-bit format. In particular, for the destination type dstSigFracBits may be
+// not equal to dstSigBits. The source type is assumed to be one of IEEE-754
+// standard types.
+static __inline dst_t __truncXfYf2__(src_t a) {
+  // Various constants whose values follow from the type parameters.
+  // Any reasonable optimizer will fold and propagate all of these.
+  const int srcInfExp = (1 << srcExpBits) - 1;
+  const int srcExpBias = srcInfExp >> 1;
+
+  const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigFracBits;
+  const src_rep_t roundMask =
+      (SRC_REP_C(1) << (srcSigFracBits - dstSigFracBits)) - 1;
+  const src_rep_t halfway = SRC_REP_C(1)
+                            << (srcSigFracBits - dstSigFracBits - 1);
+  const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigFracBits - 1);
+  const src_rep_t srcNaNCode = srcQNaN - 1;
+
+  const int dstInfExp = (1 << dstExpBits) - 1;
+  const int dstExpBias = dstInfExp >> 1;
+  const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
+
+  const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigFracBits - 1);
+  const dst_rep_t dstNaNCode = dstQNaN - 1;
+
+  const src_rep_t aRep = srcToRep(a);
+  const src_rep_t srcSign = extract_sign_from_src(aRep);
+  const src_rep_t srcExp = extract_exp_from_src(aRep);
+  const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);
+
+  dst_rep_t dstSign = srcSign;
+  dst_rep_t dstExp;
+  dst_rep_t dstSigFrac;
+
+  // Same size exponents and a's significand tail is 0.
+  // The significand can be truncated and the exponent can be copied over.
+  const int sigFracTailBits = srcSigFracBits - dstSigFracBits;
+  if (srcExpBits == dstExpBits &&
+      ((aRep >> sigFracTailBits) << sigFracTailBits) == aRep) {
+    dstExp = srcExp;
+    dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits);
+    return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));
+  }
+
+  const int dstExpCandidate = ((int)srcExp - srcExpBias) + dstExpBias;
+  if (dstExpCandidate >= 1 && dstExpCandidate < dstInfExp) {
+    // The exponent of a is within the range of normal numbers in the
+    // destination format. We can convert by simply right-shifting with
+    // rounding and adjusting the exponent.
+    dstExp = dstExpCandidate;
+    dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits);
+
+    const src_rep_t roundBits = srcSigFrac & roundMask;
+    // Round to nearest.
+    if (roundBits > halfway)
+      dstSigFrac++;
+    // Tie to even.
+    else if (roundBits == halfway)
+      dstSigFrac += dstSigFrac & 1;
+
+    // Rounding has changed the exponent.
+    if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) {
+      dstExp += 1;
+      dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits);
+    }
+  } else if (srcExp == srcInfExp && srcSigFrac) {
+    // a is NaN.
+    // Conjure the result by beginning with infinity, setting the qNaN
+    // bit and inserting the (truncated) trailing NaN field.
+    dstExp = dstInfExp;
+    dstSigFrac = dstQNaN;
+    dstSigFrac |= ((srcSigFrac & srcNaNCode) >> sigFracTailBits) & dstNaNCode;
+  } else if ((int)srcExp >= overflowExponent) {
+    dstExp = dstInfExp;
+    dstSigFrac = 0;
+  } else {
+    // a underflows on conversion to the destination type or is an exact
+    // zero.  The result may be a denormal or zero.  Extract the exponent
+    // to get the shift amount for the denormalization.
+    src_rep_t significand = srcSigFrac;
+    int shift = srcExpBias - dstExpBias - srcExp;
+
+    if (srcExp) {
+      // Set the implicit integer bit if the source is a normal number.
+      significand |= srcMinNormal;
+      shift += 1;
+    }
+
+    // Right shift by the denormalization amount with sticky.
+    if (shift > srcSigFracBits) {
+      dstExp = 0;
+      dstSigFrac = 0;
+    } else {
+      dstExp = 0;
+      const bool sticky = shift && ((significand << (srcBits - shift)) != 0);
+      src_rep_t denormalizedSignificand = significand >> shift | sticky;
+      dstSigFrac = denormalizedSignificand >> sigFracTailBits;
+      const src_rep_t roundBits = denormalizedSignificand & roundMask;
+      // Round to nearest
+      if (roundBits > halfway)
+        dstSigFrac++;
+      // Ties to even
+      else if (roundBits == halfway)
+        dstSigFrac += dstSigFrac & 1;
+
+      // Rounding has changed the exponent.
+      if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) {
+        dstExp += 1;
+        dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits);
+      }
+    }
+  }
+
+  return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));
+}
diff --git a/third_party/compiler_rt/fp_extend_common.inc b/third_party/compiler_rt/fp_extend_common.inc
index 7da5c78f8..22de9959c 100644
--- a/third_party/compiler_rt/fp_extend_common.inc
+++ b/third_party/compiler_rt/fp_extend_common.inc
@@ -41,11 +41,21 @@ static __inline int src_rep_t_clz(src_rep_t a) {
 }
 
 #elif defined SRC_HALF
-typedef uint16_t src_t;
+#error use fp16_extend.inc
+typedef _Float16 src_t;
 typedef uint16_t src_rep_t;
 #define SRC_REP_C UINT16_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
 static const int srcSigBits = 10;
-#define src_rep_t_clz __builtin_clz
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 5;
+
+static inline int src_rep_t_clz_impl(src_rep_t a) {
+  return __builtin_clz(a) - 16;
+}
+
+#define src_rep_t_clz src_rep_t_clz_impl
 
 #else
 #error Source should be half, single, or double precision!
diff --git a/third_party/compiler_rt/fp_trunc_common.inc b/third_party/compiler_rt/fp_trunc_common.inc
index bdd6d156b..8c761d241 100644
--- a/third_party/compiler_rt/fp_trunc_common.inc
+++ b/third_party/compiler_rt/fp_trunc_common.inc
@@ -51,6 +51,7 @@ typedef uint32_t dst_rep_t;
 static const int dstSigBits = 23;
 
 #elif defined DST_HALF
+#error use fp16_trunc.inc
 typedef uint16_t dst_t;
 typedef uint16_t dst_rep_t;
 #define DST_REP_C UINT16_C
diff --git a/third_party/compiler_rt/ilogbl.c b/third_party/compiler_rt/ilogbl.c
index 73ccc6352..09e4c9630 100644
--- a/third_party/compiler_rt/ilogbl.c
+++ b/third_party/compiler_rt/ilogbl.c
@@ -30,11 +30,7 @@
 #include "libc/tinymath/internal.h"
 #include "libc/tinymath/ldshape.internal.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 /**
  * Returns log₂𝑥 exponent part of double.
diff --git a/third_party/compiler_rt/int_lib.h b/third_party/compiler_rt/int_lib.h
index 46d4e654f..4a8e47242 100644
--- a/third_party/compiler_rt/int_lib.h
+++ b/third_party/compiler_rt/int_lib.h
@@ -46,10 +46,12 @@
 
 #ifdef _MSC_VER
 #define ALWAYS_INLINE __forceinline
+#define NOINLINE __declspec(noinline)
 #define NORETURN __declspec(noreturn)
 #define UNUSED
 #else
 #define ALWAYS_INLINE __attribute__((__always_inline__))
+#define NOINLINE __attribute__((__noinline__))
 #define NORETURN __attribute__((__noreturn__))
 #define UNUSED __attribute__((__unused__))
 #endif
diff --git a/third_party/compiler_rt/logbl.c b/third_party/compiler_rt/logbl.c
index 171b9aa44..27c639045 100644
--- a/third_party/compiler_rt/logbl.c
+++ b/third_party/compiler_rt/logbl.c
@@ -27,11 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 long double logbl(long double x)
 {
diff --git a/third_party/compiler_rt/truncdfhf2.c b/third_party/compiler_rt/truncdfhf2.c
index 9ed6ff2fa..9a01e2c2e 100644
--- a/third_party/compiler_rt/truncdfhf2.c
+++ b/third_party/compiler_rt/truncdfhf2.c
@@ -1,28 +1,21 @@
 //===-- lib/truncdfhf2.c - double -> half conversion --------------*- C -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-__static_yoink("huge_compiler_rt_license");
-
 #define SRC_DOUBLE
 #define DST_HALF
-#include "third_party/compiler_rt/fp_trunc_impl.inc"
+#include "fp16_trunc_impl.inc"
 
-COMPILER_RT_ABI uint16_t __truncdfhf2(double a) {
-    return __truncXfYf2__(a);
-}
+COMPILER_RT_ABI dst_t __truncdfhf2(double a) { return __truncXfYf2__(a); }
 
 #if defined(__ARM_EABI__)
 #if defined(COMPILER_RT_ARMHF_TARGET)
-AEABI_RTABI uint16_t __aeabi_d2h(double a) {
-  return __truncdfhf2(a);
-}
+AEABI_RTABI dst_t __aeabi_d2h(double a) { return __truncdfhf2(a); }
 #else
-AEABI_RTABI uint16_t __aeabi_d2h(double a) COMPILER_RT_ALIAS(__truncdfhf2);
+COMPILER_RT_ALIAS(__truncdfhf2, __aeabi_d2h)
 #endif
 #endif
diff --git a/third_party/compiler_rt/truncsfhf2.c b/third_party/compiler_rt/truncsfhf2.c
index 582ed089e..d15e1884f 100644
--- a/third_party/compiler_rt/truncsfhf2.c
+++ b/third_party/compiler_rt/truncsfhf2.c
@@ -1,34 +1,27 @@
 //===-- lib/truncsfhf2.c - single -> half conversion --------------*- C -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-__static_yoink("huge_compiler_rt_license");
-
 #define SRC_SINGLE
 #define DST_HALF
-#include "third_party/compiler_rt/fp_trunc_impl.inc"
+#include "fp16_trunc_impl.inc"
 
 // Use a forwarding definition and noinline to implement a poor man's alias,
 // as there isn't a good cross-platform way of defining one.
-COMPILER_RT_ABI __attribute__((__noinline__)) uint16_t __truncsfhf2(float a) {
-    return __truncXfYf2__(a);
+COMPILER_RT_ABI NOINLINE dst_t __truncsfhf2(float a) {
+  return __truncXfYf2__(a);
 }
 
-COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) {
-    return __truncsfhf2(a);
-}
+COMPILER_RT_ABI dst_t __gnu_f2h_ieee(float a) { return __truncsfhf2(a); }
 
 #if defined(__ARM_EABI__)
 #if defined(COMPILER_RT_ARMHF_TARGET)
-AEABI_RTABI uint16_t __aeabi_f2h(float a) {
-  return __truncsfhf2(a);
-}
+AEABI_RTABI dst_t __aeabi_f2h(float a) { return __truncsfhf2(a); }
 #else
-AEABI_RTABI uint16_t __aeabi_f2h(float a) COMPILER_RT_ALIAS(__truncsfhf2);
+COMPILER_RT_ALIAS(__truncsfhf2, __aeabi_f2h)
 #endif
 #endif
diff --git a/third_party/dlmalloc/init.inc b/third_party/dlmalloc/init.inc
index b635d84bf..684e68995 100644
--- a/third_party/dlmalloc/init.inc
+++ b/third_party/dlmalloc/init.inc
@@ -8,7 +8,7 @@ static void dlmalloc_post_fork_child(void)  { (void)INITIAL_LOCK(&(gm)->mutex);
 #endif /* LOCK_AT_FORK */
 
 /* Initialize mparams */
-__attribute__((__constructor__)) int init_mparams(void) {
+__attribute__((__constructor__(50))) int init_mparams(void) {
 #ifdef NEED_GLOBAL_LOCK_INIT
   if (malloc_global_mutex_status <= 0)
     init_malloc_global_mutex();
diff --git a/third_party/double-conversion/bignum-dtoa.cc b/third_party/double-conversion/bignum-dtoa.cc
index 5b9325158..a9a75e4f8 100644
--- a/third_party/double-conversion/bignum-dtoa.cc
+++ b/third_party/double-conversion/bignum-dtoa.cc
@@ -28,11 +28,7 @@
 #include "third_party/double-conversion/bignum.h"
 #include "third_party/double-conversion/ieee.h"
 #include "third_party/libcxx/cmath"
-
-asm(".ident\t\"\\n\\n\
-double-conversion (BSD-3 License)\\n\
-Copyright 2006-2012 the V8 project authors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("double_conversion_notice");
 
 namespace double_conversion {
 
diff --git a/third_party/double-conversion/bignum.cc b/third_party/double-conversion/bignum.cc
index 62853945a..cadef28b0 100644
--- a/third_party/double-conversion/bignum.cc
+++ b/third_party/double-conversion/bignum.cc
@@ -28,11 +28,7 @@
 #include "third_party/double-conversion/utils.h"
 #include "third_party/libcxx/algorithm"
 #include "third_party/libcxx/cstring"
-
-asm(".ident\t\"\\n\\n\
-double-conversion (BSD-3 License)\\n\
-Copyright 2006-2012 the V8 project authors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("double_conversion_notice");
 
 namespace double_conversion {
 
diff --git a/third_party/double-conversion/cached-powers.cc b/third_party/double-conversion/cached-powers.cc
index 18253cfc4..05156a099 100644
--- a/third_party/double-conversion/cached-powers.cc
+++ b/third_party/double-conversion/cached-powers.cc
@@ -29,11 +29,7 @@
 #include "third_party/libcxx/climits"
 #include "third_party/libcxx/cmath"
 #include "third_party/libcxx/cstdarg"
-
-asm(".ident\t\"\\n\\n\
-double-conversion (BSD-3 License)\\n\
-Copyright 2006-2012 the V8 project authors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("double_conversion_notice");
 
 namespace double_conversion {
 
diff --git a/third_party/double-conversion/double-to-string.cc b/third_party/double-conversion/double-to-string.cc
index a58a5c925..0dfe0dc41 100644
--- a/third_party/double-conversion/double-to-string.cc
+++ b/third_party/double-conversion/double-to-string.cc
@@ -33,11 +33,7 @@
 #include "third_party/libcxx/algorithm"
 #include "third_party/libcxx/climits"
 #include "third_party/libcxx/cmath"
-
-asm(".ident\t\"\\n\\n\
-double-conversion (BSD-3 License)\\n\
-Copyright 2006-2012 the V8 project authors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("double_conversion_notice");
 
 namespace double_conversion {
 
diff --git a/third_party/double-conversion/dubble.c b/third_party/double-conversion/dubble.c
new file mode 100644
index 000000000..a73cf09aa
--- /dev/null
+++ b/third_party/double-conversion/dubble.c
@@ -0,0 +1,3 @@
+__notice(double_conversion_notice, "\
+double-conversion (BSD-3 License)\n\
+Copyright 2006-2012 the V8 project authors");
diff --git a/third_party/double-conversion/fast-dtoa.cc b/third_party/double-conversion/fast-dtoa.cc
index 60195c26d..84fcfd36b 100644
--- a/third_party/double-conversion/fast-dtoa.cc
+++ b/third_party/double-conversion/fast-dtoa.cc
@@ -28,11 +28,7 @@
 #include "third_party/double-conversion/diy-fp.h"
 #include "third_party/double-conversion/fast-dtoa.h"
 #include "third_party/double-conversion/ieee.h"
-
-asm(".ident\t\"\\n\\n\
-double-conversion (BSD-3 License)\\n\
-Copyright 2006-2012 the V8 project authors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("double_conversion_notice");
 
 namespace double_conversion {
 
diff --git a/third_party/double-conversion/fixed-dtoa.cc b/third_party/double-conversion/fixed-dtoa.cc
index cca64aca7..d00093609 100644
--- a/third_party/double-conversion/fixed-dtoa.cc
+++ b/third_party/double-conversion/fixed-dtoa.cc
@@ -27,11 +27,7 @@
 #include "third_party/double-conversion/fixed-dtoa.h"
 #include "third_party/double-conversion/ieee.h"
 #include "third_party/libcxx/cmath"
-
-asm(".ident\t\"\\n\\n\
-double-conversion (BSD-3 License)\\n\
-Copyright 2006-2012 the V8 project authors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("double_conversion_notice");
 
 namespace double_conversion {
 
diff --git a/third_party/double-conversion/string-to-double.cc b/third_party/double-conversion/string-to-double.cc
index 0d88f2d60..cc29f9555 100644
--- a/third_party/double-conversion/string-to-double.cc
+++ b/third_party/double-conversion/string-to-double.cc
@@ -31,11 +31,7 @@
 #include "third_party/libcxx/climits"
 #include "third_party/libcxx/cmath"
 #include "third_party/libcxx/locale"
-
-asm(".ident\t\"\\n\\n\
-double-conversion (BSD-3 License)\\n\
-Copyright 2006-2012 the V8 project authors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("double_conversion_notice");
 
 #ifdef _MSC_VER
 #  if _MSC_VER >= 1900
diff --git a/third_party/double-conversion/strtod.cc b/third_party/double-conversion/strtod.cc
index 9c00ef19c..bf6dde672 100644
--- a/third_party/double-conversion/strtod.cc
+++ b/third_party/double-conversion/strtod.cc
@@ -30,11 +30,7 @@
 #include "third_party/double-conversion/strtod.h"
 #include "third_party/libcxx/climits"
 #include "third_party/libcxx/cstdarg"
-
-asm(".ident\t\"\\n\\n\
-double-conversion (BSD-3 License)\\n\
-Copyright 2006-2012 the V8 project authors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("double_conversion_notice");
 
 namespace double_conversion {
 
diff --git a/third_party/gdtoa/gdtoa.internal.h b/third_party/gdtoa/gdtoa.internal.h
index bbb4d1e2c..67f11af0a 100644
--- a/third_party/gdtoa/gdtoa.internal.h
+++ b/third_party/gdtoa/gdtoa.internal.h
@@ -4,12 +4,7 @@
 #include "libc/str/str.h"
 #include "third_party/gdtoa/gdtoa.h"
 
-asm(".ident\t\"\\n\\n\
-gdtoa (MIT License)\\n\
-The author of this software is David M. Gay\\n\
-Kudos go to Guy L. Steele, Jr. and Jon L. White\\n\
-Copyright (C) 1997, 1998, 2000 by Lucent Technologies\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("gdtoa_notice");
 
 #define IEEE_Arith          1
 #define IEEE_8087           1
diff --git a/third_party/gdtoa/misc.c b/third_party/gdtoa/misc.c
index 43b1d4729..75d3883d8 100644
--- a/third_party/gdtoa/misc.c
+++ b/third_party/gdtoa/misc.c
@@ -98,7 +98,7 @@ __gdtoa_Bclear(void)
 	__gdtoa_unlock();
 }
 
-__attribute__((__constructor__)) static void
+__attribute__((__constructor__(60))) static void
 __gdtoa_Binit(void)
 {
 	__gdtoa_initlock();
diff --git a/third_party/gdtoa/notice.c b/third_party/gdtoa/notice.c
new file mode 100644
index 000000000..1046f9235
--- /dev/null
+++ b/third_party/gdtoa/notice.c
@@ -0,0 +1,5 @@
+__notice(gdtoa_notice, "\
+gdtoa (MIT License)\n\
+The author of this software is David M. Gay\n\
+Kudos go to Guy L. Steele, Jr. and Jon L. White\n\
+Copyright (C) 1997, 1998, 2000 by Lucent Technologies");
diff --git a/third_party/getopt/getopt.c b/third_party/getopt/getopt.c
index f1d7b716e..e51509a51 100644
--- a/third_party/getopt/getopt.c
+++ b/third_party/getopt/getopt.c
@@ -37,10 +37,8 @@
 #include "libc/errno.h"
 #include "third_party/getopt/getopt.internal.h"
 
-asm(".ident\t\"\\n\
-getopt (BSD-3)\\n\
-Copyright 1987, 1993, 1994 The Regents of the University of California\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(notice_getopt, "getopt (BSD-3)\n\
+Copyright 1987, 1993, 1994 The Regents of the University of California");
 
 #define BADCH  '?'
 #define BADARG ':'
diff --git a/third_party/hiredis/sds.c b/third_party/hiredis/sds.c
index 7f2f5692d..1e7b108e9 100644
--- a/third_party/hiredis/sds.c
+++ b/third_party/hiredis/sds.c
@@ -64,6 +64,8 @@
 #include "third_party/hiredis/sds.h"
 #include "third_party/hiredis/sdsalloc.h"
 
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
+
 static inline int sdsHdrSize(char type) {
     switch(type&SDS_TYPE_MASK) {
         case SDS_TYPE_5:
diff --git a/third_party/intel/amxbf16intrin.internal.h b/third_party/intel/amxbf16intrin.internal.h
index d7b31ecb0..8a1ed88fb 100644
--- a/third_party/intel/amxbf16intrin.internal.h
+++ b/third_party/intel/amxbf16intrin.internal.h
@@ -9,7 +9,7 @@
 #pragma GCC target("amx-bf16")
 #define __DISABLE_AMX_BF16__
 #endif
-#if defined(__x86_64__) && defined(__AMX_BF16__)
+#if defined(__x86_64__)
 #define _tile_dpbf16ps_internal(dst,src1,src2) __asm__ volatile ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
 #define _tile_dpbf16ps(dst,src1,src2) _tile_dpbf16ps_internal (dst, src1, src2)
 #endif
diff --git a/third_party/intel/amxint8intrin.internal.h b/third_party/intel/amxint8intrin.internal.h
index f1d25d11f..e5c566735 100644
--- a/third_party/intel/amxint8intrin.internal.h
+++ b/third_party/intel/amxint8intrin.internal.h
@@ -9,7 +9,7 @@
 #pragma GCC target("amx-int8")
 #define __DISABLE_AMX_INT8__
 #endif
-#if defined(__x86_64__) && defined(__AMX_INT8__)
+#if defined(__x86_64__)
 #define _tile_int8_dp_internal(name,dst,src1,src2) __asm__ volatile ("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
 #define _tile_dpbssd(dst,src1,src2) _tile_int8_dp_internal (tdpbssd, dst, src1, src2)
 #define _tile_dpbsud(dst,src1,src2) _tile_int8_dp_internal (tdpbsud, dst, src1, src2)
diff --git a/third_party/intel/amxtileintrin.internal.h b/third_party/intel/amxtileintrin.internal.h
index 3913c900e..00e403118 100644
--- a/third_party/intel/amxtileintrin.internal.h
+++ b/third_party/intel/amxtileintrin.internal.h
@@ -9,7 +9,7 @@
 #pragma GCC target("amx-tile")
 #define __DISABLE_AMX_TILE__
 #endif
-#if defined(__x86_64__) && defined(__AMX_TILE__)
+#if defined(__x86_64__)
 extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _tile_loadconfig (const void *__config)
@@ -29,11 +29,11 @@ _tile_release (void)
   __asm__ volatile ("tilerelease" ::);
 }
 #define _tile_loadd(dst,base,stride) _tile_loadd_internal (dst, base, stride)
-#define _tile_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) base), "r" ((long) stride))
+#define _tile_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) (base)), "r" ((long) (stride)))
 #define _tile_stream_loadd(dst,base,stride) _tile_stream_loadd_internal (dst, base, stride)
-#define _tile_stream_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) base), "r" ((long) stride))
+#define _tile_stream_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) (base)), "r" ((long) (stride)))
 #define _tile_stored(dst,base,stride) _tile_stored_internal (dst, base, stride)
-#define _tile_stored_internal(src,base,stride) __asm__ volatile ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" :: "r" ((void*) base), "r" ((long) stride) : "memory")
+#define _tile_stored_internal(src,base,stride) __asm__ volatile ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" :: "r" ((void*) (base)), "r" ((long) (stride)) : "memory")
 #define _tile_zero(dst) _tile_zero_internal (dst)
 #define _tile_zero_internal(dst) __asm__ volatile ("tilezero\t%%tmm"#dst ::)
 #endif
diff --git a/third_party/intel/avx512bf16intrin.internal.h b/third_party/intel/avx512bf16intrin.internal.h
index 088340d6a..5bc3f8358 100644
--- a/third_party/intel/avx512bf16intrin.internal.h
+++ b/third_party/intel/avx512bf16intrin.internal.h
@@ -11,6 +11,14 @@
 #endif
 typedef short __v32bh __attribute__ ((__vector_size__ (64)));
 typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
+extern __inline float
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsbh_ss (__bfloat16 __A)
+{
+  union{ float a; unsigned int b;} __tmp;
+  __tmp.b = ((unsigned int)(__A)) << 16;
+  return __tmp.a;
+}
 extern __inline __m512bh
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
@@ -65,6 +73,29 @@ _mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
 {
   return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
 }
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpbh_ps (__m256bh __A)
+{
+  return (__m512)_mm512_castsi512_ps ((__m512i)_mm512_slli_epi32 (
+  (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16));
+}
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpbh_ps (__mmask16 __U, __m256bh __A)
+{
+  return (__m512)_mm512_castsi512_ps ((__m512i) _mm512_slli_epi32 (
+  (__m512i)_mm512_maskz_cvtepi16_epi32 (
+  (__mmask16)__U, (__m256i)__A), 16));
+}
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
+{
+  return (__m512)_mm512_castsi512_ps ((__m512i)(_mm512_mask_slli_epi32 (
+  (__m512i)__S, (__mmask16)__U,
+  (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
+}
 #ifdef __DISABLE_AVX512BF16__
 #undef __DISABLE_AVX512BF16__
 #pragma GCC pop_options
diff --git a/third_party/intel/avx512bf16vlintrin.internal.h b/third_party/intel/avx512bf16vlintrin.internal.h
index 83019cf5f..216196fcf 100644
--- a/third_party/intel/avx512bf16vlintrin.internal.h
+++ b/third_party/intel/avx512bf16vlintrin.internal.h
@@ -13,6 +13,7 @@ typedef short __v16bh __attribute__ ((__vector_size__ (32)));
 typedef short __v8bh __attribute__ ((__vector_size__ (16)));
 typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
 typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef unsigned short __bfloat16;
 extern __inline __m256bh
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
@@ -121,6 +122,61 @@ _mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
 {
   return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
 }
+extern __inline __bfloat16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtness_sbh (float __A)
+{
+  __v4sf __V = {__A, 0, 0, 0};
+  __v8hi __R = __builtin_ia32_cvtneps2bf16_v4sf_mask ((__v4sf)__V,
+        (__v8hi)_mm_undefined_si128 (), (__mmask8)-1);
+  return __R[0];
+}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpbh_ps (__m128bh __A)
+{
+  return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
+  (__m128i)_mm_cvtepi16_epi32 ((__m128i)__A), 16));
+}
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpbh_ps (__m128bh __A)
+{
+  return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
+  (__m256i)_mm256_cvtepi16_epi32 ((__m128i)__A), 16));
+}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
+{
+  return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
+  (__m128i)_mm_maskz_cvtepi16_epi32 (
+  (__mmask8)__U, (__m128i)__A), 16));
+}
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
+{
+  return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
+  (__m256i)_mm256_maskz_cvtepi16_epi32 (
+  (__mmask8)__U, (__m128i)__A), 16));
+}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpbh_ps (__m128 __S, __mmask8 __U, __m128bh __A)
+{
+  return (__m128)_mm_castsi128_ps ((__m128i)_mm_mask_slli_epi32 (
+  (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32 (
+  (__m128i)__A), 16));
+}
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpbh_ps (__m256 __S, __mmask8 __U, __m128bh __A)
+{
+  return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_mask_slli_epi32 (
+  (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32 (
+  (__m128i)__A), 16));
+}
 #ifdef __DISABLE_AVX512BF16VL__
 #undef __DISABLE_AVX512BF16VL__
 #pragma GCC pop_options
diff --git a/third_party/intel/avx512dqintrin.internal.h b/third_party/intel/avx512dqintrin.internal.h
index 38d8010f1..0cf6a7080 100644
--- a/third_party/intel/avx512dqintrin.internal.h
+++ b/third_party/intel/avx512dqintrin.internal.h
@@ -2248,9 +2248,9 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 #define _mm_mask_fpclass_ss_mask(X, C, U) ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), (int) (C), (__mmask8) (U)))
 #define _mm_mask_fpclass_sd_mask(X, C, U) ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), (int) (C), (__mmask8) (U)))
 #define _mm512_mask_fpclass_pd_mask(u, X, C) ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), (int) (C), (__mmask8)(u)))
-#define _mm512_mask_fpclass_ps_mask(u, x, c) ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x), (int) (c),(__mmask8)(u)))
+#define _mm512_mask_fpclass_ps_mask(u, x, c) ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x), (int) (c),(__mmask16)(u)))
 #define _mm512_fpclass_pd_mask(X, C) ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), (int) (C), (__mmask8)-1))
-#define _mm512_fpclass_ps_mask(x, c) ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x), (int) (c),(__mmask8)-1))
+#define _mm512_fpclass_ps_mask(x, c) ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x), (int) (c),(__mmask16)-1))
 #define _mm_reduce_sd(A, B, C) ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), (__mmask8)-1))
 #define _mm_mask_reduce_sd(W, U, A, B, C) ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U)))
 #define _mm_maskz_reduce_sd(U, A, B, C) ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), (__mmask8)(U)))
diff --git a/third_party/intel/avx512fintrin.internal.h b/third_party/intel/avx512fintrin.internal.h
index 79158ec85..a2c2c788c 100644
--- a/third_party/intel/avx512fintrin.internal.h
+++ b/third_party/intel/avx512fintrin.internal.h
@@ -1351,74 +1351,92 @@ _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
 #define _mm_mask_sub_round_ss(W, U, A, B, C) (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
 #define _mm_maskz_sub_round_ss(U, A, B, C) (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
 #endif
+typedef enum
+{
+  _MM_TERNLOG_A = 0xF0,
+  _MM_TERNLOG_B = 0xCC,
+  _MM_TERNLOG_C = 0xAA
+} _MM_TERNLOG_ENUM;
 #ifdef __OPTIMIZE__
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
       const int __imm)
 {
-  return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
-           (__v8di) __B,
-           (__v8di) __C, __imm,
-           (__mmask8) -1);
+  return (__m512i)
+    __builtin_ia32_pternlogq512_mask ((__v8di) __A,
+          (__v8di) __B,
+          (__v8di) __C,
+          (unsigned char) __imm,
+          (__mmask8) -1);
 }
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
     __m512i __C, const int __imm)
 {
-  return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
-           (__v8di) __B,
-           (__v8di) __C, __imm,
-           (__mmask8) __U);
+  return (__m512i)
+    __builtin_ia32_pternlogq512_mask ((__v8di) __A,
+          (__v8di) __B,
+          (__v8di) __C,
+          (unsigned char) __imm,
+          (__mmask8) __U);
 }
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
      __m512i __C, const int __imm)
 {
-  return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
-            (__v8di) __B,
-            (__v8di) __C,
-            __imm, (__mmask8) __U);
+  return (__m512i)
+    __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
+           (__v8di) __B,
+           (__v8di) __C,
+           (unsigned char) __imm,
+           (__mmask8) __U);
 }
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
       const int __imm)
 {
-  return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
-           (__v16si) __B,
-           (__v16si) __C,
-           __imm, (__mmask16) -1);
+  return (__m512i)
+    __builtin_ia32_pternlogd512_mask ((__v16si) __A,
+          (__v16si) __B,
+          (__v16si) __C,
+          (unsigned char) __imm,
+          (__mmask16) -1);
 }
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
     __m512i __C, const int __imm)
 {
-  return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
-           (__v16si) __B,
-           (__v16si) __C,
-           __imm, (__mmask16) __U);
+  return (__m512i)
+    __builtin_ia32_pternlogd512_mask ((__v16si) __A,
+          (__v16si) __B,
+          (__v16si) __C,
+          (unsigned char) __imm,
+          (__mmask16) __U);
 }
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
      __m512i __C, const int __imm)
 {
-  return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
-            (__v16si) __B,
-            (__v16si) __C,
-            __imm, (__mmask16) __U);
+  return (__m512i)
+    __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
+           (__v16si) __B,
+           (__v16si) __C,
+           (unsigned char) __imm,
+           (__mmask16) __U);
 }
 #else
-#define _mm512_ternarylogic_epi64(A, B, C, I) ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
-#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
-#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
-#define _mm512_ternarylogic_epi32(A, B, C, I) ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), (__mmask16)-1))
-#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), (__mmask16)(U)))
-#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), (__mmask16)(U)))
+#define _mm512_ternarylogic_epi64(A, B, C, I) ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), (__v8di) (__m512i) (B), (__v8di) (__m512i) (C), (unsigned char) (I), (__mmask8) -1))
+#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), (__v8di) (__m512i) (B), (__v8di) (__m512i) (C), (unsigned char)(I), (__mmask8) (U)))
+#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) (__m512i) (A), (__v8di) (__m512i) (B), (__v8di) (__m512i) (C), (unsigned char) (I), (__mmask8) (U)))
+#define _mm512_ternarylogic_epi32(A, B, C, I) ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), (__v16si) (__m512i) (B), (__v16si) (__m512i) (C), (unsigned char) (I), (__mmask16) -1))
+#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), (__v16si) (__m512i) (B), (__v16si) (__m512i) (C), (unsigned char) (I), (__mmask16) (U)))
+#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) (__m512i) (A), (__v16si) (__m512i) (B), (__v16si) (__m512i) (C), (unsigned char) (I), (__mmask16) (U)))
 #endif
 extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -2680,14 +2698,18 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
             (__mmask8) __U, __R);
 }
 #else
-#define _mm512_scalef_round_pd(A, B, C) (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
-#define _mm512_mask_scalef_round_pd(W, U, A, B, C) (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
-#define _mm512_maskz_scalef_round_pd(U, A, B, C) (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
-#define _mm512_scalef_round_ps(A, B, C) (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
-#define _mm512_mask_scalef_round_ps(W, U, A, B, C) (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
-#define _mm512_maskz_scalef_round_ps(U, A, B, C) (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
-#define _mm_scalef_round_sd(A, B, C) (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, (__v2df)_mm_setzero_pd (), -1, C)
-#define _mm_scalef_round_ss(A, B, C) (__m128)__builtin_ia32_scalefss_mask_round (A, B, (__v4sf)_mm_setzero_ps (), -1, C)
+#define _mm512_scalef_round_pd(A, B, C) ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (__v8df) _mm512_undefined_pd(), -1, (C)))
+#define _mm512_mask_scalef_round_pd(W, U, A, B, C) ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
+#define _mm512_maskz_scalef_round_pd(U, A, B, C) ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (__v8df) _mm512_setzero_pd(), (U), (C)))
+#define _mm512_scalef_round_ps(A, B, C) ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (__v16sf) _mm512_undefined_ps(), -1, (C)))
+#define _mm512_mask_scalef_round_ps(W, U, A, B, C) ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
+#define _mm512_maskz_scalef_round_ps(U, A, B, C) ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (__v16sf) _mm512_setzero_ps(), (U), (C)))
+#define _mm_scalef_round_sd(A, B, C) ((__m128d) __builtin_ia32_scalefsd_mask_round ((A), (B), (__v2df) _mm_undefined_pd (), -1, (C)))
+#define _mm_scalef_round_ss(A, B, C) ((__m128) __builtin_ia32_scalefss_mask_round ((A), (B), (__v4sf) _mm_undefined_ps (), -1, (C)))
+#define _mm_mask_scalef_round_sd(W, U, A, B, C) ((__m128d) __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
+#define _mm_mask_scalef_round_ss(W, U, A, B, C) ((__m128) __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
+#define _mm_maskz_scalef_round_sd(U, A, B, C) ((__m128d) __builtin_ia32_scalefsd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), (U), (C)))
+#define _mm_maskz_scalef_round_ss(U, A, B, C) ((__m128) __builtin_ia32_scalefss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), (U), (C)))
 #endif
 #define _mm_mask_scalef_sd(W, U, A, B) _mm_mask_scalef_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
 #define _mm_maskz_scalef_sd(U, A, B) _mm_maskz_scalef_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
diff --git a/third_party/intel/avx512fp16intrin.internal.h b/third_party/intel/avx512fp16intrin.internal.h
new file mode 100644
index 000000000..4f249cc30
--- /dev/null
+++ b/third_party/intel/avx512fp16intrin.internal.h
@@ -0,0 +1,5452 @@
+#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
+#endif
+#ifndef __AVX512FP16INTRIN_H_INCLUDED
+#define __AVX512FP16INTRIN_H_INCLUDED
+#ifndef __AVX512FP16__
+#pragma GCC push_options
+#pragma GCC target("avx512fp16")
+#define __DISABLE_AVX512FP16__
+#endif
+typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
+typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
+typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
+typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
+typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
+typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32), __may_alias__, __aligned__ (1)));
+typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ph (_Float16 __A7, _Float16 __A6, _Float16 __A5,
+     _Float16 __A4, _Float16 __A3, _Float16 __A2,
+     _Float16 __A1, _Float16 __A0)
+{
+  return __extension__ (__m128h)(__v8hf){ __A0, __A1, __A2, __A3,
+       __A4, __A5, __A6, __A7 };
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_ph (_Float16 __A15, _Float16 __A14, _Float16 __A13,
+        _Float16 __A12, _Float16 __A11, _Float16 __A10,
+        _Float16 __A9, _Float16 __A8, _Float16 __A7,
+        _Float16 __A6, _Float16 __A5, _Float16 __A4,
+        _Float16 __A3, _Float16 __A2, _Float16 __A1,
+        _Float16 __A0)
+{
+  return __extension__ (__m256h)(__v16hf){ __A0, __A1, __A2, __A3,
+        __A4, __A5, __A6, __A7,
+        __A8, __A9, __A10, __A11,
+        __A12, __A13, __A14, __A15 };
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_ph (_Float16 __A31, _Float16 __A30, _Float16 __A29,
+        _Float16 __A28, _Float16 __A27, _Float16 __A26,
+        _Float16 __A25, _Float16 __A24, _Float16 __A23,
+        _Float16 __A22, _Float16 __A21, _Float16 __A20,
+        _Float16 __A19, _Float16 __A18, _Float16 __A17,
+        _Float16 __A16, _Float16 __A15, _Float16 __A14,
+        _Float16 __A13, _Float16 __A12, _Float16 __A11,
+        _Float16 __A10, _Float16 __A9, _Float16 __A8,
+        _Float16 __A7, _Float16 __A6, _Float16 __A5,
+        _Float16 __A4, _Float16 __A3, _Float16 __A2,
+        _Float16 __A1, _Float16 __A0)
+{
+  return __extension__ (__m512h)(__v32hf){ __A0, __A1, __A2, __A3,
+        __A4, __A5, __A6, __A7,
+        __A8, __A9, __A10, __A11,
+        __A12, __A13, __A14, __A15,
+        __A16, __A17, __A18, __A19,
+        __A20, __A21, __A22, __A23,
+        __A24, __A25, __A26, __A27,
+        __A28, __A29, __A30, __A31 };
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
+      _Float16 __A3, _Float16 __A4, _Float16 __A5,
+      _Float16 __A6, _Float16 __A7)
+{
+  return _mm_set_ph (__A7, __A6, __A5, __A4, __A3, __A2, __A1, __A0);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
+  _Float16 __A3, _Float16 __A4, _Float16 __A5,
+  _Float16 __A6, _Float16 __A7, _Float16 __A8,
+  _Float16 __A9, _Float16 __A10, _Float16 __A11,
+  _Float16 __A12, _Float16 __A13, _Float16 __A14,
+  _Float16 __A15)
+{
+  return _mm256_set_ph (__A15, __A14, __A13, __A12, __A11, __A10, __A9,
+   __A8, __A7, __A6, __A5, __A4, __A3, __A2, __A1,
+   __A0);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
+  _Float16 __A3, _Float16 __A4, _Float16 __A5,
+  _Float16 __A6, _Float16 __A7, _Float16 __A8,
+  _Float16 __A9, _Float16 __A10, _Float16 __A11,
+  _Float16 __A12, _Float16 __A13, _Float16 __A14,
+  _Float16 __A15, _Float16 __A16, _Float16 __A17,
+  _Float16 __A18, _Float16 __A19, _Float16 __A20,
+  _Float16 __A21, _Float16 __A22, _Float16 __A23,
+  _Float16 __A24, _Float16 __A25, _Float16 __A26,
+  _Float16 __A27, _Float16 __A28, _Float16 __A29,
+  _Float16 __A30, _Float16 __A31)
+{
+  return _mm512_set_ph (__A31, __A30, __A29, __A28, __A27, __A26, __A25,
+   __A24, __A23, __A22, __A21, __A20, __A19, __A18,
+   __A17, __A16, __A15, __A14, __A13, __A12, __A11,
+   __A10, __A9, __A8, __A7, __A6, __A5, __A4, __A3,
+   __A2, __A1, __A0);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_ph (_Float16 __A)
+{
+  return _mm_set_ph (__A, __A, __A, __A, __A, __A, __A, __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_ph (_Float16 __A)
+{
+  return _mm256_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
+   __A, __A, __A, __A, __A, __A, __A, __A);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_ph (_Float16 __A)
+{
+  return _mm512_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
+   __A, __A, __A, __A, __A, __A, __A, __A,
+   __A, __A, __A, __A, __A, __A, __A, __A,
+   __A, __A, __A, __A, __A, __A, __A, __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_ph (void)
+{
+  return _mm_set1_ph (0.0f);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_ph (void)
+{
+  return _mm256_set1_ph (0.0f);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_ph (void)
+{
+  return _mm512_set1_ph (0.0f);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_ph (void)
+{
+  __m128h __Y = __Y;
+  return __Y;
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_ph (void)
+{
+  __m256h __Y = __Y;
+  return __Y;
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_ph (void)
+{
+  __m512h __Y = __Y;
+  return __Y;
+}
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_h (__m128h __A)
+{
+  return __A[0];
+}
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsh_h (__m256h __A)
+{
+  return __A[0];
+}
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsh_h (__m512h __A)
+{
+  return __A[0];
+}
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_ps (__m512h __a)
+{
+  return (__m512) __a;
+}
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_pd (__m512h __a)
+{
+  return (__m512d) __a;
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_si512 (__m512h __a)
+{
+  return (__m512i) __a;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph512_ph128 (__m512h __A)
+{
+  union
+  {
+    __m128h a[4];
+    __m512h v;
+  } u = { .v = __A };
+  return u.a[0];
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph512_ph256 (__m512h __A)
+{
+  union
+  {
+    __m256h a[2];
+    __m512h v;
+  } u = { .v = __A };
+  return u.a[0];
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph128_ph512 (__m128h __A)
+{
+  union
+  {
+    __m128h a[4];
+    __m512h v;
+  } u;
+  u.a[0] = __A;
+  return u.v;
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph256_ph512 (__m256h __A)
+{
+  union
+  {
+    __m256h a[2];
+    __m512h v;
+  } u;
+  u.a[0] = __A;
+  return u.v;
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_zextph128_ph512 (__m128h __A)
+{
+  return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
+           (__m128) __A, 0);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_zextph256_ph512 (__m256h __A)
+{
+  return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
+           (__m256d) __A, 0);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castps_ph (__m512 __a)
+{
+  return (__m512h) __a;
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castpd_ph (__m512d __a)
+{
+  return (__m512h) __a;
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castsi512_ph (__m512i __a)
+{
+  return (__m512h) __a;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_sh (_Float16 __F)
+{
+  return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, __F);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_sh (void const *__P)
+{
+  return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+       *(_Float16 const *) __P);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_ph (void const *__P)
+{
+  return *(const __m512h *) __P;
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_ph (void const *__P)
+{
+  return *(const __m256h *) __P;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ph (void const *__P)
+{
+  return *(const __m128h *) __P;
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_ph (void const *__P)
+{
+  return *(const __m512h_u *) __P;
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_ph (void const *__P)
+{
+  return *(const __m256h_u *) __P;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_ph (void const *__P)
+{
+  return *(const __m128h_u *) __P;
+}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_sh (void *__P, __m128h __A)
+{
+  *(_Float16 *) __P = ((__v8hf)__A)[0];
+}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_ph (void *__P, __m512h __A)
+{
+   *(__m512h *) __P = __A;
+}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_ph (void *__P, __m256h __A)
+{
+   *(__m256h *) __P = __A;
+}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ph (void *__P, __m128h __A)
+{
+   *(__m128h *) __P = __A;
+}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_ph (void *__P, __m512h __A)
+{
+   *(__m512h_u *) __P = __A;
+}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_ph (void *__P, __m256h __A)
+{
+   *(__m256h_u *) __P = __A;
+}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_ph (void *__P, __m128h __A)
+{
+   *(__m128h_u *) __P = __A;
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_ph (__m512h __A)
+{
+  return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF),
+          (__m512i) __A);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_ph (__m512h __A, __m512h __B)
+{
+  return (__m512h) ((__v32hf) __A + (__v32hf) __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+  return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+  return __builtin_ia32_addph512_mask (__B, __C,
+           _mm512_setzero_ph (), __A);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_ph (__m512h __A, __m512h __B)
+{
+  return (__m512h) ((__v32hf) __A - (__v32hf) __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+  return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+  return __builtin_ia32_subph512_mask (__B, __C,
+           _mm512_setzero_ph (), __A);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_ph (__m512h __A, __m512h __B)
+{
+  return (__m512h) ((__v32hf) __A * (__v32hf) __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+  return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+  return __builtin_ia32_mulph512_mask (__B, __C,
+           _mm512_setzero_ph (), __A);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_ph (__m512h __A, __m512h __B)
+{
+  return (__m512h) ((__v32hf) __A / (__v32hf) __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+  return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+  return __builtin_ia32_divph512_mask (__B, __C,
+           _mm512_setzero_ph (), __A);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+  return __builtin_ia32_addph512_mask_round (__A, __B,
+          _mm512_setzero_ph (),
+          (__mmask32) -1, __C);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+     __m512h __D, const int __E)
+{
+  return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+      const int __D)
+{
+  return __builtin_ia32_addph512_mask_round (__B, __C,
+          _mm512_setzero_ph (),
+          __A, __D);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+  return __builtin_ia32_subph512_mask_round (__A, __B,
+          _mm512_setzero_ph (),
+          (__mmask32) -1, __C);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+     __m512h __D, const int __E)
+{
+  return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+      const int __D)
+{
+  return __builtin_ia32_subph512_mask_round (__B, __C,
+          _mm512_setzero_ph (),
+          __A, __D);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+  return __builtin_ia32_mulph512_mask_round (__A, __B,
+          _mm512_setzero_ph (),
+          (__mmask32) -1, __C);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+     __m512h __D, const int __E)
+{
+  return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+      const int __D)
+{
+  return __builtin_ia32_mulph512_mask_round (__B, __C,
+          _mm512_setzero_ph (),
+          __A, __D);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+  return __builtin_ia32_divph512_mask_round (__A, __B,
+          _mm512_setzero_ph (),
+          (__mmask32) -1, __C);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+     __m512h __D, const int __E)
+{
+  return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+      const int __D)
+{
+  return __builtin_ia32_divph512_mask_round (__B, __C,
+          _mm512_setzero_ph (),
+          __A, __D);
+}
+#else
+#define _mm512_add_round_ph(A, B, C) ((__m512h)__builtin_ia32_addph512_mask_round((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C)))
+#define _mm512_mask_add_round_ph(A, B, C, D, E) ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
+#define _mm512_maskz_add_round_ph(A, B, C, D) ((__m512h)__builtin_ia32_addph512_mask_round((B), (C), _mm512_setzero_ph (), (A), (D)))
+#define _mm512_sub_round_ph(A, B, C) ((__m512h)__builtin_ia32_subph512_mask_round((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C)))
+#define _mm512_mask_sub_round_ph(A, B, C, D, E) ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
+#define _mm512_maskz_sub_round_ph(A, B, C, D) ((__m512h)__builtin_ia32_subph512_mask_round((B), (C), _mm512_setzero_ph (), (A), (D)))
+#define _mm512_mul_round_ph(A, B, C) ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C)))
+#define _mm512_mask_mul_round_ph(A, B, C, D, E) ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
+#define _mm512_maskz_mul_round_ph(A, B, C, D) ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C), _mm512_setzero_ph (), (A), (D)))
+#define _mm512_div_round_ph(A, B, C) ((__m512h)__builtin_ia32_divph512_mask_round((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C)))
+#define _mm512_mask_div_round_ph(A, B, C, D, E) ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
+#define _mm512_maskz_div_round_ph(A, B, C, D) ((__m512h)__builtin_ia32_divph512_mask_round((B), (C), _mm512_setzero_ph (), (A), (D)))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_conj_pch (__m512h __A)
+{
+  return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31));
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A)
+{
+  return (__m512h)
+    __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
+       (__v16sf) __W,
+       (__mmask16) __U);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_conj_pch (__mmask16 __U, __m512h __A)
+{
+  return (__m512h)
+    __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
+       (__v16sf) _mm512_setzero_ps (),
+       (__mmask16) __U);
+}
+extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_sh (__m128h __A, __m128h __B)
+{
+  __A[0] += __B[0];
+  return __A;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_addsh_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (),
+        __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_sh (__m128h __A, __m128h __B)
+{
+  __A[0] -= __B[0];
+  return __A;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_subsh_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (),
+        __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_sh (__m128h __A, __m128h __B)
+{
+  __A[0] *= __B[0];
+  return __A;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_mulsh_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_sh (__m128h __A, __m128h __B)
+{
+  __A[0] /= __B[0];
+  return __A;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_divsh_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (),
+        __A);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_round_sh (__m128h __A, __m128h __B, const int __C)
+{
+  return __builtin_ia32_addsh_mask_round (__A, __B,
+       _mm_setzero_ph (),
+       (__mmask8) -1, __C);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
+         __m128h __D, const int __E)
+{
+  return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
+   const int __D)
+{
+  return __builtin_ia32_addsh_mask_round (__B, __C,
+       _mm_setzero_ph (),
+       __A, __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_round_sh (__m128h __A, __m128h __B, const int __C)
+{
+  return __builtin_ia32_subsh_mask_round (__A, __B,
+       _mm_setzero_ph (),
+       (__mmask8) -1, __C);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
+         __m128h __D, const int __E)
+{
+  return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
+   const int __D)
+{
+  return __builtin_ia32_subsh_mask_round (__B, __C,
+       _mm_setzero_ph (),
+       __A, __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_round_sh (__m128h __A, __m128h __B, const int __C)
+{
+  return __builtin_ia32_mulsh_mask_round (__A, __B,
+       _mm_setzero_ph (),
+       (__mmask8) -1, __C);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
+         __m128h __D, const int __E)
+{
+  return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
+   const int __D)
+{
+  return __builtin_ia32_mulsh_mask_round (__B, __C,
+       _mm_setzero_ph (),
+       __A, __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_round_sh (__m128h __A, __m128h __B, const int __C)
+{
+  return __builtin_ia32_divsh_mask_round (__A, __B,
+       _mm_setzero_ph (),
+       (__mmask8) -1, __C);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
+         __m128h __D, const int __E)
+{
+  return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
+   const int __D)
+{
+  return __builtin_ia32_divsh_mask_round (__B, __C,
+       _mm_setzero_ph (),
+       __A, __D);
+}
+#else
+#define _mm_add_round_sh(A, B, C) ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C)))
+#define _mm_mask_add_round_sh(A, B, C, D, E) ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E)))
+#define _mm_maskz_add_round_sh(A, B, C, D) ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D)))
+#define _mm_sub_round_sh(A, B, C) ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C)))
+#define _mm_mask_sub_round_sh(A, B, C, D, E) ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E)))
+#define _mm_maskz_sub_round_sh(A, B, C, D) ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D)))
+#define _mm_mul_round_sh(A, B, C) ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C)))
+#define _mm_mask_mul_round_sh(A, B, C, D, E) ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E)))
+#define _mm_maskz_mul_round_sh(A, B, C, D) ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D)))
+#define _mm_div_round_sh(A, B, C) ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C)))
+#define _mm_mask_div_round_sh(A, B, C, D, E) ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E)))
+#define _mm_maskz_div_round_sh(A, B, C, D) ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D)))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_ph (__m512h __A, __m512h __B)
+{
+  return __builtin_ia32_maxph512_mask (__A, __B,
+           _mm512_setzero_ph (),
+           (__mmask32) -1);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+  return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+  return __builtin_ia32_maxph512_mask (__B, __C,
+           _mm512_setzero_ph (), __A);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_ph (__m512h __A, __m512h __B)
+{
+  return __builtin_ia32_minph512_mask (__A, __B,
+           _mm512_setzero_ph (),
+           (__mmask32) -1);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+  return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+  return __builtin_ia32_minph512_mask (__B, __C,
+           _mm512_setzero_ph (), __A);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+  return __builtin_ia32_maxph512_mask_round (__A, __B,
+          _mm512_setzero_ph (),
+          (__mmask32) -1, __C);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+     __m512h __D, const int __E)
+{
+  return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+      const int __D)
+{
+  return __builtin_ia32_maxph512_mask_round (__B, __C,
+          _mm512_setzero_ph (),
+          __A, __D);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+  return __builtin_ia32_minph512_mask_round (__A, __B,
+          _mm512_setzero_ph (),
+          (__mmask32) -1, __C);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+     __m512h __D, const int __E)
+{
+  return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+      const int __D)
+{
+  return __builtin_ia32_minph512_mask_round (__B, __C,
+          _mm512_setzero_ph (),
+          __A, __D);
+}
+#else
+#define _mm512_max_round_ph(A, B, C) (__builtin_ia32_maxph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C)))
+#define _mm512_mask_max_round_ph(A, B, C, D, E) (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
+#define _mm512_maskz_max_round_ph(A, B, C, D) (__builtin_ia32_maxph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), (D)))
+#define _mm512_min_round_ph(A, B, C) (__builtin_ia32_minph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C)))
+#define _mm512_mask_min_round_ph(A, B, C, D, E) (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
+#define _mm512_maskz_min_round_ph(A, B, C, D) (__builtin_ia32_minph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), (D)))
+#endif
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_sh (__m128h __A, __m128h __B)
+{
+  __A[0] = __A[0] > __B[0] ? __A[0] : __B[0];
+  return __A;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_maxsh_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (),
+        __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_sh (__m128h __A, __m128h __B)
+{
+  __A[0] = __A[0] < __B[0] ? __A[0] : __B[0];
+  return __A;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_minsh_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (),
+        __A);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_round_sh (__m128h __A, __m128h __B, const int __C)
+{
+  return __builtin_ia32_maxsh_mask_round (__A, __B,
+       _mm_setzero_ph (),
+       (__mmask8) -1, __C);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
+         __m128h __D, const int __E)
+{
+  return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
+   const int __D)
+{
+  return __builtin_ia32_maxsh_mask_round (__B, __C,
+       _mm_setzero_ph (),
+       __A, __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_round_sh (__m128h __A, __m128h __B, const int __C)
+{
+  return __builtin_ia32_minsh_mask_round (__A, __B,
+       _mm_setzero_ph (),
+       (__mmask8) -1, __C);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
+         __m128h __D, const int __E)
+{
+  return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
+   const int __D)
+{
+  return __builtin_ia32_minsh_mask_round (__B, __C,
+       _mm_setzero_ph (),
+       __A, __D);
+}
+#else
+#define _mm_max_round_sh(A, B, C) (__builtin_ia32_maxsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C)))
+#define _mm_mask_max_round_sh(A, B, C, D, E) (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E)))
+#define _mm_maskz_max_round_sh(A, B, C, D) (__builtin_ia32_maxsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D)))
+#define _mm_min_round_sh(A, B, C) (__builtin_ia32_minsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C)))
+#define _mm_mask_min_round_sh(A, B, C, D, E) (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E)))
+#define _mm_maskz_min_round_sh(A, B, C, D) (__builtin_ia32_minsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D)))
+#endif
+#ifdef __OPTIMIZE
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
+{
+  return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
+         (__mmask32) -1);
+}
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
+    const int __D)
+{
+  return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
+         __A);
+}
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
+     const int __D)
+{
+  return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
+        __C, (__mmask32) -1,
+        __D);
+}
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
+          const int __D, const int __E)
+{
+  return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
+        __D, __A,
+        __E);
+}
+#else
+#define _mm512_cmp_ph_mask(A, B, C) (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
+#define _mm512_mask_cmp_ph_mask(A, B, C, D) (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
+#define _mm512_cmp_round_ph_mask(A, B, C, D) (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
+#define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
+#endif
+#ifdef __OPTIMIZE__
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C)
+{
+  return (__mmask8)
+    __builtin_ia32_cmpsh_mask_round (__A, __B,
+         __C, (__mmask8) -1,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
+        const int __D)
+{
+  return (__mmask8)
+    __builtin_ia32_cmpsh_mask_round (__B, __C,
+         __D, __A,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C,
+         const int __D)
+{
+  return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B,
+           __C, (__mmask8) -1,
+           __D);
+}
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
+       const int __D, const int __E)
+{
+  return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C,
+           __D, __A,
+           __E);
+}
+#else
+#define _mm_cmp_sh_mask(A, B, C) (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (_MM_FROUND_CUR_DIRECTION)))
+#define _mm_mask_cmp_sh_mask(A, B, C, D) (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (_MM_FROUND_CUR_DIRECTION)))
+#define _mm_cmp_round_sh_mask(A, B, C, D) (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D)))
+#define _mm_mask_cmp_round_sh_mask(A, B, C, D, E) (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E)))
+#endif
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comieq_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comilt_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comile_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comigt_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comige_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comineq_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomieq_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomilt_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomile_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomigt_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomige_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomineq_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comi_sh (__m128h __A, __m128h __B, const int __P)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R)
+{
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
+       (__mmask8) -1,__R);
+}
+#else
+#define _mm_comi_round_sh(A, B, P, R) (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
+#define _mm_comi_sh(A, B, P) (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), _MM_FROUND_CUR_DIRECTION))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_ph (__m512h __A)
+{
+  return __builtin_ia32_sqrtph512_mask_round (__A,
+           _mm512_setzero_ph(),
+           (__mmask32) -1,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
+{
+  return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
+{
+  return __builtin_ia32_sqrtph512_mask_round (__B,
+           _mm512_setzero_ph (),
+           __A,
+           _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_round_ph (__m512h __A, const int __B)
+{
+  return __builtin_ia32_sqrtph512_mask_round (__A,
+           _mm512_setzero_ph(),
+           (__mmask32) -1, __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+      const int __D)
+{
+  return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
+{
+  return __builtin_ia32_sqrtph512_mask_round (__B,
+           _mm512_setzero_ph (),
+           __A, __C);
+}
+#else
+#define _mm512_sqrt_round_ph(A, B) (__builtin_ia32_sqrtph512_mask_round ((A), _mm512_setzero_ph (), (__mmask32)-1, (B)))
+#define _mm512_mask_sqrt_round_ph(A, B, C, D) (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
+#define _mm512_maskz_sqrt_round_ph(A, B, C) (__builtin_ia32_sqrtph512_mask_round ((B), _mm512_setzero_ph (), (A), (C)))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt_ph (__m512h __A)
+{
+  return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
+      (__mmask32) -1);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
+{
+  return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
+{
+  return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
+      __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
+          (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
+          __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_sqrtsh_mask_round (__B, __A,
+        _mm_setzero_ph (),
+        (__mmask8) -1,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_sqrtsh_mask_round (__C, __B,
+        _mm_setzero_ph (),
+        __A, _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C)
+{
+  return __builtin_ia32_sqrtsh_mask_round (__B, __A,
+        _mm_setzero_ph (),
+        (__mmask8) -1, __C);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
+   __m128h __D, const int __E)
+{
+  return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
+        __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
+    const int __D)
+{
+  return __builtin_ia32_sqrtsh_mask_round (__C, __B,
+        _mm_setzero_ph (),
+        __A, __D);
+}
+#else
+#define _mm_sqrt_round_sh(A, B, C) (__builtin_ia32_sqrtsh_mask_round ((B), (A), _mm_setzero_ph (), (__mmask8)-1, (C)))
+#define _mm_mask_sqrt_round_sh(A, B, C, D, E) (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E)))
+#define _mm_maskz_sqrt_round_sh(A, B, C, D) (__builtin_ia32_sqrtsh_mask_round ((C), (B), _mm_setzero_ph (), (A), (D)))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp_ph (__m512h __A)
+{
+  return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
+           (__mmask32) -1);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
+{
+  return __builtin_ia32_rcpph512_mask (__C, __A, __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
+{
+  return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
+           __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (),
+        (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (),
+        __A);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_ph (__m512h __A, __m512h __B)
+{
+  return __builtin_ia32_scalefph512_mask_round (__A, __B,
+      _mm512_setzero_ph (),
+      (__mmask32) -1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+  return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
+      _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+  return __builtin_ia32_scalefph512_mask_round (__B, __C,
+      _mm512_setzero_ph (),
+      __A,
+      _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+  return __builtin_ia32_scalefph512_mask_round (__A, __B,
+      _mm512_setzero_ph (),
+      (__mmask32) -1, __C);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+        __m512h __D, const int __E)
+{
+  return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
+      __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+         const int __D)
+{
+  return __builtin_ia32_scalefph512_mask_round (__B, __C,
+      _mm512_setzero_ph (),
+      __A, __D);
+}
+#else
+#define _mm512_scalef_round_ph(A, B, C) (__builtin_ia32_scalefph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C)))
+#define _mm512_mask_scalef_round_ph(A, B, C, D, E) (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
+#define _mm512_maskz_scalef_round_ph(A, B, C, D) (__builtin_ia32_scalefph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), (D)))
+#endif
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_scalefsh_mask_round (__A, __B,
+          _mm_setzero_ph (),
+          (__mmask8) -1,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_scalefsh_mask_round (__B, __C,
+          _mm_setzero_ph (),
+          __A,
+          _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
+{
+  return __builtin_ia32_scalefsh_mask_round (__A, __B,
+          _mm_setzero_ph (),
+          (__mmask8) -1, __C);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
+     __m128h __D, const int __E)
+{
+  return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
+          __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
+      const int __D)
+{
+  return __builtin_ia32_scalefsh_mask_round (__B, __C,
+          _mm_setzero_ph (),
+          __A, __D);
+}
+#else
+#define _mm_scalef_round_sh(A, B, C) (__builtin_ia32_scalefsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C)))
+#define _mm_mask_scalef_round_sh(A, B, C, D, E) (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E)))
+#define _mm_maskz_scalef_round_sh(A, B, C, D) (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D)))
+#endif
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_ph (__m512h __A, int __B)
+{
+  return __builtin_ia32_reduceph512_mask_round (__A, __B,
+      _mm512_setzero_ph (),
+      (__mmask32) -1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
+{
+  return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
+      _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
+{
+  return __builtin_ia32_reduceph512_mask_round (__B, __C,
+      _mm512_setzero_ph (),
+      __A,
+      _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
+{
+  return __builtin_ia32_reduceph512_mask_round (__A, __B,
+      _mm512_setzero_ph (),
+      (__mmask32) -1, __C);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+        int __D, const int __E)
+{
+  return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
+      __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
+         const int __D)
+{
+  return __builtin_ia32_reduceph512_mask_round (__B, __C,
+      _mm512_setzero_ph (),
+      __A, __D);
+}
+#else
+#define _mm512_reduce_ph(A, B) (__builtin_ia32_reduceph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION))
+#define _mm512_mask_reduce_ph(A, B, C, D) (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_maskz_reduce_ph(A, B, C) (__builtin_ia32_reduceph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_reduce_round_ph(A, B, C) (__builtin_ia32_reduceph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C)))
+#define _mm512_mask_reduce_round_ph(A, B, C, D, E) (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
+#define _mm512_maskz_reduce_round_ph(A, B, C, D) (__builtin_ia32_reduceph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), (D)))
+#endif
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_sh (__m128h __A, __m128h __B, int __C)
+{
+  return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
+          _mm_setzero_ph (),
+          (__mmask8) -1,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
+      __m128h __D, int __E)
+{
+  return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
+{
+  return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
+          _mm_setzero_ph (), __A,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
+{
+  return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
+          _mm_setzero_ph (),
+          (__mmask8) -1, __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
+     __m128h __D, int __E, const int __F)
+{
+  return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A,
+          __B, __F);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
+      int __D, const int __E)
+{
+  return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
+          _mm_setzero_ph (),
+          __A, __E);
+}
+#else
+#define _mm_reduce_sh(A, B, C) (__builtin_ia32_reducesh_mask_round ((A), (B), (C), _mm_setzero_ph (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+#define _mm_mask_reduce_sh(A, B, C, D, E) (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), _MM_FROUND_CUR_DIRECTION))
+#define _mm_maskz_reduce_sh(A, B, C, D) (__builtin_ia32_reducesh_mask_round ((B), (C), (D), _mm_setzero_ph (), (A), _MM_FROUND_CUR_DIRECTION))
+#define _mm_reduce_round_sh(A, B, C, D) (__builtin_ia32_reducesh_mask_round ((A), (B), (C), _mm_setzero_ph (), (__mmask8)-1, (D)))
+#define _mm_mask_reduce_round_sh(A, B, C, D, E, F) (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F)))
+#define _mm_maskz_reduce_round_sh(A, B, C, D, E) (__builtin_ia32_reducesh_mask_round ((B), (C), (D), _mm_setzero_ph (), (A), (E)))
+#endif
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_ph (__m512h __A, int __B)
+{
+  return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
+        _mm512_setzero_ph (),
+        (__mmask32) -1,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
+      __m512h __C, int __D)
+{
+  return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
+{
+  return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
+        _mm512_setzero_ph (),
+        __A,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
+{
+  return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
+        _mm512_setzero_ph (),
+        (__mmask32) -1,
+        __C);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
+     __m512h __C, int __D, const int __E)
+{
+  return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
+        __B, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
+      const int __D)
+{
+  return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
+        _mm512_setzero_ph (),
+        __A, __D);
+}
+#else
+#define _mm512_roundscale_ph(A, B) (__builtin_ia32_rndscaleph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION))
+#define _mm512_mask_roundscale_ph(A, B, C, D) (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_maskz_roundscale_ph(A, B, C) (__builtin_ia32_rndscaleph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_roundscale_round_ph(A, B, C) (__builtin_ia32_rndscaleph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C)))
+#define _mm512_mask_roundscale_round_ph(A, B, C, D, E) (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
+#define _mm512_maskz_roundscale_round_ph(A, B, C, D) (__builtin_ia32_rndscaleph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), (D)))
+#endif
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
+{
+  return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
+            _mm_setzero_ph (),
+            (__mmask8) -1,
+            _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
+   __m128h __D, int __E)
+{
+  return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B,
+            _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
+{
+  return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
+            _mm_setzero_ph (), __A,
+            _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
+{
+  return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
+            _mm_setzero_ph (),
+            (__mmask8) -1,
+            __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
+         __m128h __D, int __E, const int __F)
+{
+  return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E,
+            __A, __B, __F);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
+          int __D, const int __E)
+{
+  return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
+            _mm_setzero_ph (),
+            __A, __E);
+}
+#else
+#define _mm_roundscale_sh(A, B, C) (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), _mm_setzero_ph (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+#define _mm_mask_roundscale_sh(A, B, C, D, E) (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), _MM_FROUND_CUR_DIRECTION))
+#define _mm_maskz_roundscale_sh(A, B, C, D) (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), _mm_setzero_ph (), (A), _MM_FROUND_CUR_DIRECTION))
+#define _mm_roundscale_round_sh(A, B, C, D) (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), _mm_setzero_ph (), (__mmask8)-1, (D)))
+#define _mm_mask_roundscale_round_sh(A, B, C, D, E, F) (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F)))
+#define _mm_maskz_roundscale_round_sh(A, B, C, D, E) (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), _mm_setzero_ph (), (A), (E)))
+#endif
+#ifdef __OPTIMIZE__
+extern __inline __mmask8
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fpclass_sh_mask (__m128h __A, const int __imm)
+{
+  return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm,
+         (__mmask8) -1);
+}
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fpclass_sh_mask (__mmask8 __U, __m128h __A, const int __imm)
+{
+  return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, __U);
+}
+#else
+#define _mm_fpclass_sh_mask(X, C) ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), (int) (C), (__mmask8) (-1)))
+#define _mm_mask_fpclass_sh_mask(U, X, C) ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), (int) (C), (__mmask8) (U)))
+#endif
+#ifdef __OPTIMIZE__
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A,
+        const int __imm)
+{
+  return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
+             __imm, __U);
+}
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fpclass_ph_mask (__m512h __A, const int __imm)
+{
+  return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
+             __imm,
+             (__mmask32) -1);
+}
+#else
+#define _mm512_mask_fpclass_ph_mask(u, x, c) ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), (int) (c),(__mmask8)(u)))
+#define _mm512_fpclass_ph_mask(x, c) ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), (int) (c),(__mmask8)-1))
+#endif
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_sh (__m128h __A, __m128h __B)
+{
+  return (__m128h)
+    __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
+     (__v8hf) _mm_setzero_ph (),
+     (__mmask8) -1,
+     _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+  return (__m128h)
+    __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
+     (__v8hf) __W, (__mmask8) __U,
+     _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_sh (__mmask8 __U, __m128h __A, __m128h __B)
+{
+  return (__m128h)
+    __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
+     (__v8hf) _mm_setzero_ph (),
+     (__mmask8) __U,
+     _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_ph (__m512h __A)
+{
+  return (__m512h)
+    __builtin_ia32_getexpph512_mask ((__v32hf) __A,
+         (__v32hf) _mm512_setzero_ph (),
+         (__mmask32) -1, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A)
+{
+  return (__m512h)
+    __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W,
+         (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A)
+{
+  return (__m512h)
+    __builtin_ia32_getexpph512_mask ((__v32hf) __A,
+         (__v32hf) _mm512_setzero_ph (),
+         (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_round_sh (__m128h __A, __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
+             (__v8hf) __B,
+             _mm_setzero_ph (),
+             (__mmask8) -1,
+             __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
+     __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf) __W,
+             (__mmask8) __U, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
+      const int __R)
+{
+  return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf)
+             _mm_setzero_ph (),
+             (__mmask8) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_round_ph (__m512h __A, const int __R)
+{
+  return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
+          (__v32hf)
+          _mm512_setzero_ph (),
+          (__mmask32) -1, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
+        const int __R)
+{
+  return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
+          (__v32hf) __W,
+          (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
+{
+  return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
+          (__v32hf)
+          _mm512_setzero_ph (),
+          (__mmask32) __U, __R);
+}
+#else
+#define _mm_getexp_round_sh(A, B, R) ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1, R))
+#define _mm_mask_getexp_round_sh(W, U, A, B, C) (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C)
+#define _mm_maskz_getexp_round_sh(U, A, B, C) (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, (__v8hf)_mm_setzero_ph(), U, C)
+#define _mm512_getexp_round_ph(A, R) ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
+#define _mm512_mask_getexp_round_ph(W, U, A, R) ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), (__v32hf)(__m512h)(W), (__mmask32)(U), R))
+#define _mm512_maskz_getexp_round_ph(U, A, R) ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
+#endif
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_sh (__m128h __A, __m128h __B,
+  _MM_MANTISSA_NORM_ENUM __C,
+  _MM_MANTISSA_SIGN_ENUM __D)
+{
+  return (__m128h)
+    __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
+      (__D << 2) | __C, _mm_setzero_ph (),
+      (__mmask8) -1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_sh (__m128h __W, __mmask8 __U, __m128h __A,
+       __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
+       _MM_MANTISSA_SIGN_ENUM __D)
+{
+  return (__m128h)
+    __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
+      (__D << 2) | __C, (__v8hf) __W,
+      __U, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_sh (__mmask8 __U, __m128h __A, __m128h __B,
+        _MM_MANTISSA_NORM_ENUM __C,
+        _MM_MANTISSA_SIGN_ENUM __D)
+{
+  return (__m128h)
+    __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
+      (__D << 2) | __C,
+      (__v8hf) _mm_setzero_ph(),
+      __U, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
+     _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
+           (__C << 2) | __B,
+           _mm512_setzero_ph (),
+           (__mmask32) -1,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A,
+   _MM_MANTISSA_NORM_ENUM __B,
+   _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
+           (__C << 2) | __B,
+           (__v32hf) __W, __U,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A,
+    _MM_MANTISSA_NORM_ENUM __B,
+    _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
+           (__C << 2) | __B,
+           (__v32hf)
+           _mm512_setzero_ph (),
+           __U,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_round_sh (__m128h __A, __m128h __B,
+        _MM_MANTISSA_NORM_ENUM __C,
+        _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+  return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
+       (__v8hf) __B,
+       (__D << 2) | __C,
+       _mm_setzero_ph (),
+       (__mmask8) -1,
+       __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
+      __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
+      _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+  return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
+       (__v8hf) __B,
+       (__D << 2) | __C,
+       (__v8hf) __W,
+       __U, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
+       _MM_MANTISSA_NORM_ENUM __C,
+       _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+  return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
+       (__v8hf) __B,
+       (__D << 2) | __C,
+       (__v8hf)
+       _mm_setzero_ph(),
+       __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
+    _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
+           (__C << 2) | __B,
+           _mm512_setzero_ph (),
+           (__mmask32) -1, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
+         _MM_MANTISSA_NORM_ENUM __B,
+         _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
+           (__C << 2) | __B,
+           (__v32hf) __W, __U,
+           __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
+          _MM_MANTISSA_NORM_ENUM __B,
+          _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
+           (__C << 2) | __B,
+           (__v32hf)
+           _mm512_setzero_ph (),
+           __U, __R);
+}
+#else
+#define _mm512_getmant_ph(X, B, C) ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), (int)(((C)<<2) | (B)), (__v32hf)(__m512h) _mm512_setzero_ph(), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION))
+#define _mm512_mask_getmant_ph(W, U, X, B, C) ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), (int)(((C)<<2) | (B)), (__v32hf)(__m512h)(W), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_maskz_getmant_ph(U, X, B, C) ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), (int)(((C)<<2) | (B)), (__v32hf)(__m512h) _mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
+#define _mm_getmant_sh(X, Y, C, D) ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(((D)<<2) | (C)), (__v8hf)(__m128h) _mm_setzero_ph (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+#define _mm_mask_getmant_sh(W, U, X, Y, C, D) ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(((D)<<2) | (C)), (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+#define _mm_maskz_getmant_sh(U, X, Y, C, D) ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(((D)<<2) | (C)), (__v8hf)(__m128h) _mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+#define _mm512_getmant_round_ph(X, B, C, R) ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), (int)(((C)<<2) | (B)), (__v32hf)(__m512h) _mm512_setzero_ph(), (__mmask32)-1, (R)))
+#define _mm512_mask_getmant_round_ph(W, U, X, B, C, R) ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), (int)(((C)<<2) | (B)), (__v32hf)(__m512h)(W), (__mmask32)(U), (R)))
+#define _mm512_maskz_getmant_round_ph(U, X, B, C, R) ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), (int)(((C)<<2) | (B)), (__v32hf)(__m512h) _mm512_setzero_ph(), (__mmask32)(U), (R)))
+#define _mm_getmant_round_sh(X, Y, C, D, R) ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(((D)<<2) | (C)), (__v8hf)(__m128h) _mm_setzero_ph (), (__mmask8)-1, (R)))
+#define _mm_mask_getmant_round_sh(W, U, X, Y, C, D, R) ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(((D)<<2) | (C)), (__v8hf)(__m128h)(W), (__mmask8)(U), (R)))
+#define _mm_maskz_getmant_round_sh(U, X, Y, C, D, R) ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(((D)<<2) | (C)), (__v8hf)(__m128h) _mm_setzero_ph(), (__mmask8)(U), (R)))
+#endif
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi16_si128 (short __A)
+{
+  return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A);
+}
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si16 (__m128i __A)
+{
+  return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C)
+{
+  return __builtin_ia32_loadsh_mask (__C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B)
+{
+  return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A);
+}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C)
+{
+  __builtin_ia32_storesh_mask (__A, __C, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_sh (__m128h __A, __m128h __B)
+{
+  __A[0] = __B[0];
+  return __A;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_epi32 (__m256h __A)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2dq512_mask_round (__A,
+         (__v16si)
+         _mm512_setzero_si512 (),
+         (__mmask16) -1,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2dq512_mask_round (__C,
+         (__v16si) __A,
+         __B,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_epi32 (__mmask16 __A, __m256h __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2dq512_mask_round (__B,
+         (__v16si)
+         _mm512_setzero_si512 (),
+         __A,
+         _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundph_epi32 (__m256h __A, int __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2dq512_mask_round (__A,
+         (__v16si)
+         _mm512_setzero_si512 (),
+         (__mmask16) -1,
+         __B);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundph_epi32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2dq512_mask_round (__C,
+         (__v16si) __A,
+         __B,
+         __D);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2dq512_mask_round (__B,
+         (__v16si)
+         _mm512_setzero_si512 (),
+         __A,
+         __C);
+}
+#else
+#define _mm512_cvt_roundph_epi32(A, B) ((__m512i) __builtin_ia32_vcvtph2dq512_mask_round ((A), (__v16si) _mm512_setzero_si512 (), (__mmask16)-1, (B)))
+#define _mm512_mask_cvt_roundph_epi32(A, B, C, D) ((__m512i) __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D)))
+#define _mm512_maskz_cvt_roundph_epi32(A, B, C) ((__m512i) __builtin_ia32_vcvtph2dq512_mask_round ((B), (__v16si) _mm512_setzero_si512 (), (A), (C)))
+#endif
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_epu32 (__m256h __A)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2udq512_mask_round (__A,
+          (__v16si)
+          _mm512_setzero_si512 (),
+          (__mmask16) -1,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2udq512_mask_round (__C,
+          (__v16si) __A,
+          __B,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_epu32 (__mmask16 __A, __m256h __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2udq512_mask_round (__B,
+          (__v16si)
+          _mm512_setzero_si512 (),
+          __A,
+          _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundph_epu32 (__m256h __A, int __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2udq512_mask_round (__A,
+          (__v16si)
+          _mm512_setzero_si512 (),
+          (__mmask16) -1,
+          __B);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundph_epu32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2udq512_mask_round (__C,
+          (__v16si) __A,
+          __B,
+          __D);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2udq512_mask_round (__B,
+          (__v16si)
+          _mm512_setzero_si512 (),
+          __A,
+          __C);
+}
+#else
+#define _mm512_cvt_roundph_epu32(A, B) ((__m512i) __builtin_ia32_vcvtph2udq512_mask_round ((A), (__v16si) _mm512_setzero_si512 (), (__mmask16)-1, (B)))
+#define _mm512_mask_cvt_roundph_epu32(A, B, C, D) ((__m512i) __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D)))
+#define _mm512_maskz_cvt_roundph_epu32(A, B, C) ((__m512i) __builtin_ia32_vcvtph2udq512_mask_round ((B), (__v16si) _mm512_setzero_si512 (), (A), (C)))
+#endif
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttph_epi32 (__m256h __A)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2dq512_mask_round (__A,
+          (__v16si)
+          _mm512_setzero_si512 (),
+          (__mmask16) -1,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2dq512_mask_round (__C,
+          (__v16si) __A,
+          __B,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttph_epi32 (__mmask16 __A, __m256h __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2dq512_mask_round (__B,
+          (__v16si)
+          _mm512_setzero_si512 (),
+          __A,
+          _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundph_epi32 (__m256h __A, int __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2dq512_mask_round (__A,
+          (__v16si)
+          _mm512_setzero_si512 (),
+          (__mmask16) -1,
+          __B);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundph_epi32 (__m512i __A, __mmask16 __B,
+    __m256h __C, int __D)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2dq512_mask_round (__C,
+          (__v16si) __A,
+          __B,
+          __D);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2dq512_mask_round (__B,
+          (__v16si)
+          _mm512_setzero_si512 (),
+          __A,
+          __C);
+}
+#else
+#define _mm512_cvtt_roundph_epi32(A, B) ((__m512i) __builtin_ia32_vcvttph2dq512_mask_round ((A), (__v16si) (_mm512_setzero_si512 ()), (__mmask16)(-1), (B)))
+#define _mm512_mask_cvtt_roundph_epi32(A, B, C, D) ((__m512i) __builtin_ia32_vcvttph2dq512_mask_round ((C), (__v16si)(A), (B), (D)))
+#define _mm512_maskz_cvtt_roundph_epi32(A, B, C) ((__m512i) __builtin_ia32_vcvttph2dq512_mask_round ((B), (__v16si) _mm512_setzero_si512 (), (A), (C)))
+#endif
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttph_epu32 (__m256h __A)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2udq512_mask_round (__A,
+           (__v16si)
+           _mm512_setzero_si512 (),
+           (__mmask16) -1,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2udq512_mask_round (__C,
+           (__v16si) __A,
+           __B,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttph_epu32 (__mmask16 __A, __m256h __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2udq512_mask_round (__B,
+           (__v16si)
+           _mm512_setzero_si512 (),
+           __A,
+           _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundph_epu32 (__m256h __A, int __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2udq512_mask_round (__A,
+           (__v16si)
+           _mm512_setzero_si512 (),
+           (__mmask16) -1,
+           __B);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundph_epu32 (__m512i __A, __mmask16 __B,
+    __m256h __C, int __D)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2udq512_mask_round (__C,
+           (__v16si) __A,
+           __B,
+           __D);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2udq512_mask_round (__B,
+           (__v16si)
+           _mm512_setzero_si512 (),
+           __A,
+           __C);
+}
+#else
+#define _mm512_cvtt_roundph_epu32(A, B) ((__m512i) __builtin_ia32_vcvttph2udq512_mask_round ((A), (__v16si) _mm512_setzero_si512 (), (__mmask16)-1, (B)))
+#define _mm512_mask_cvtt_roundph_epu32(A, B, C, D) ((__m512i) __builtin_ia32_vcvttph2udq512_mask_round ((C), (__v16si)(A), (B), (D)))
+#define _mm512_maskz_cvtt_roundph_epu32(A, B, C) ((__m512i) __builtin_ia32_vcvttph2udq512_mask_round ((B), (__v16si) _mm512_setzero_si512 (), (A), (C)))
+#endif
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_ph (__m512i __A)
+{
+  return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
+       _mm256_setzero_ph (),
+       (__mmask16) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_ph (__m256h __A, __mmask16 __B, __m512i __C)
+{
+  return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
+       __A,
+       __B,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_ph (__mmask16 __A, __m512i __B)
+{
+  return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
+       _mm256_setzero_ph (),
+       __A,
+       _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi32_ph (__m512i __A, int __B)
+{
+  return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
+       _mm256_setzero_ph (),
+       (__mmask16) -1,
+       __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
+{
+  return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
+       __A,
+       __B,
+       __D);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi32_ph (__mmask16 __A, __m512i __B, int __C)
+{
+  return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
+       _mm256_setzero_ph (),
+       __A,
+       __C);
+}
+#else
+#define _mm512_cvt_roundepi32_ph(A, B) (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(A), _mm256_setzero_ph (), (__mmask16)-1, (B)))
+#define _mm512_mask_cvt_roundepi32_ph(A, B, C, D) (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(C), (A), (B), (D)))
+#define _mm512_maskz_cvt_roundepi32_ph(A, B, C) (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(B), _mm256_setzero_ph (), (A), (C)))
+#endif
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu32_ph (__m512i __A)
+{
+  return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
+        _mm256_setzero_ph (),
+        (__mmask16) -1,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu32_ph (__m256h __A, __mmask16 __B, __m512i __C)
+{
+  return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
+        __A,
+        __B,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu32_ph (__mmask16 __A, __m512i __B)
+{
+  return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
+        _mm256_setzero_ph (),
+        __A,
+        _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu32_ph (__m512i __A, int __B)
+{
+  return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
+        _mm256_setzero_ph (),
+        (__mmask16) -1,
+        __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
+{
+  return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
+        __A,
+        __B,
+        __D);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu32_ph (__mmask16 __A, __m512i __B, int __C)
+{
+  return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
+        _mm256_setzero_ph (),
+        __A,
+        __C);
+}
+#else
+#define _mm512_cvt_roundepu32_ph(A, B) (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)(A), _mm256_setzero_ph (), (__mmask16)-1, B))
+#define _mm512_mask_cvt_roundepu32_ph(A, B, C, D) (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)C, A, B, D))
+#define _mm512_maskz_cvt_roundepu32_ph(A, B, C) (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)B, _mm256_setzero_ph (), A, C))
+#endif
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_epi64 (__m128h __A)
+{
+  return __builtin_ia32_vcvtph2qq512_mask_round (__A,
+       _mm512_setzero_si512 (),
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtph2qq512_mask_round (__B,
+       _mm512_setzero_si512 (),
+       __A,
+       _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundph_epi64 (__m128h __A, int __B)
+{
+  return __builtin_ia32_vcvtph2qq512_mask_round (__A,
+       _mm512_setzero_si512 (),
+       (__mmask8) -1,
+       __B);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
+{
+  return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, __D);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
+{
+  return __builtin_ia32_vcvtph2qq512_mask_round (__B,
+       _mm512_setzero_si512 (),
+       __A,
+       __C);
+}
+#else
+#define _mm512_cvt_roundph_epi64(A, B) (__builtin_ia32_vcvtph2qq512_mask_round ((A), _mm512_setzero_si512 (), (__mmask8)-1, (B)))
+#define _mm512_mask_cvt_roundph_epi64(A, B, C, D) (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D)))
+#define _mm512_maskz_cvt_roundph_epi64(A, B, C) (__builtin_ia32_vcvtph2qq512_mask_round ((B), _mm512_setzero_si512 (), (A), (C)))
+#endif
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_epu64 (__m128h __A)
+{
+  return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
+        _mm512_setzero_si512 (),
+        (__mmask8) -1,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
+        _mm512_setzero_si512 (),
+        __A,
+        _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundph_epu64 (__m128h __A, int __B)
+{
+  return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
+        _mm512_setzero_si512 (),
+        (__mmask8) -1,
+        __B);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
+{
+  return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, __D);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
+{
+  return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
+        _mm512_setzero_si512 (),
+        __A,
+        __C);
+}
+#else
+#define _mm512_cvt_roundph_epu64(A, B) (__builtin_ia32_vcvtph2uqq512_mask_round ((A), _mm512_setzero_si512 (), (__mmask8)-1, (B)))
+#define _mm512_mask_cvt_roundph_epu64(A, B, C, D) (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D)))
+#define _mm512_maskz_cvt_roundph_epu64(A, B, C) (__builtin_ia32_vcvtph2uqq512_mask_round ((B), _mm512_setzero_si512 (), (A), (C)))
+#endif
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttph_epi64 (__m128h __A)
+{
+  return __builtin_ia32_vcvttph2qq512_mask_round (__A,
+        _mm512_setzero_si512 (),
+        (__mmask8) -1,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvttph2qq512_mask_round (__B,
+        _mm512_setzero_si512 (),
+        __A,
+        _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundph_epi64 (__m128h __A, int __B)
+{
+  return __builtin_ia32_vcvttph2qq512_mask_round (__A,
+        _mm512_setzero_si512 (),
+        (__mmask8) -1,
+        __B);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
+{
+  return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, __D);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
+{
+  return __builtin_ia32_vcvttph2qq512_mask_round (__B,
+        _mm512_setzero_si512 (),
+        __A,
+        __C);
+}
+#else
+#define _mm512_cvtt_roundph_epi64(A, B) (__builtin_ia32_vcvttph2qq512_mask_round ((A), _mm512_setzero_si512 (), (__mmask8)-1, (B)))
+#define _mm512_mask_cvtt_roundph_epi64(A, B, C, D) __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D))
+#define _mm512_maskz_cvtt_roundph_epi64(A, B, C) (__builtin_ia32_vcvttph2qq512_mask_round ((B), _mm512_setzero_si512 (), (A), (C)))
+#endif
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttph_epu64 (__m128h __A)
+{
+  return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
+         _mm512_setzero_si512 (),
+         (__mmask8) -1,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
+         _mm512_setzero_si512 (),
+         __A,
+         _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundph_epu64 (__m128h __A, int __B)
+{
+  return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
+         _mm512_setzero_si512 (),
+         (__mmask8) -1,
+         __B);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
+{
+  return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, __D);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
+{
+  return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
+         _mm512_setzero_si512 (),
+         __A,
+         __C);
+}
+#else
+#define _mm512_cvtt_roundph_epu64(A, B) (__builtin_ia32_vcvttph2uqq512_mask_round ((A), _mm512_setzero_si512 (), (__mmask8)-1, (B)))
+#define _mm512_mask_cvtt_roundph_epu64(A, B, C, D) __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D))
+#define _mm512_maskz_cvtt_roundph_epu64(A, B, C) (__builtin_ia32_vcvttph2uqq512_mask_round ((B), _mm512_setzero_si512 (), (A), (C)))
+#endif
+extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_ph (__m512i __A)
+{
+  return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
+       _mm_setzero_ph (),
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m512i __C)
+{
+  return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
+       __A,
+       __B,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_ph (__mmask8 __A, __m512i __B)
+{
+  return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
+       _mm_setzero_ph (),
+       __A,
+       _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi64_ph (__m512i __A, int __B)
+{
+  return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
+       _mm_setzero_ph (),
+       (__mmask8) -1,
+       __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
+{
+  return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
+       __A,
+       __B,
+       __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi64_ph (__mmask8 __A, __m512i __B, int __C)
+{
+  return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
+       _mm_setzero_ph (),
+       __A,
+       __C);
+}
+#else
+#define _mm512_cvt_roundepi64_ph(A, B) (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(A), _mm_setzero_ph (), (__mmask8)-1, (B)))
+#define _mm512_mask_cvt_roundepi64_ph(A, B, C, D) (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
+#define _mm512_maskz_cvt_roundepi64_ph(A, B, C) (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(B), _mm_setzero_ph (), (A), (C)))
+#endif
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu64_ph (__m512i __A)
+{
+  return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
+        _mm_setzero_ph (),
+        (__mmask8) -1,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m512i __C)
+{
+  return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
+        __A,
+        __B,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu64_ph (__mmask8 __A, __m512i __B)
+{
+  return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
+        _mm_setzero_ph (),
+        __A,
+        _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu64_ph (__m512i __A, int __B)
+{
+  return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
+        _mm_setzero_ph (),
+        (__mmask8) -1,
+        __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
+{
+  return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
+        __A,
+        __B,
+        __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu64_ph (__mmask8 __A, __m512i __B, int __C)
+{
+  return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
+        _mm_setzero_ph (),
+        __A,
+        __C);
+}
+#else
+#define _mm512_cvt_roundepu64_ph(A, B) (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(A), _mm_setzero_ph (), (__mmask8)-1, (B)))
+#define _mm512_mask_cvt_roundepu64_ph(A, B, C, D) (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
+#define _mm512_maskz_cvt_roundepu64_ph(A, B, C) (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(B), _mm_setzero_ph (), (A), (C)))
+#endif
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_epi16 (__m512h __A)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2w512_mask_round (__A,
+           (__v32hi)
+           _mm512_setzero_si512 (),
+           (__mmask32) -1,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2w512_mask_round (__C,
+           (__v32hi) __A,
+           __B,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_epi16 (__mmask32 __A, __m512h __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2w512_mask_round (__B,
+           (__v32hi)
+           _mm512_setzero_si512 (),
+           __A,
+           _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundph_epi16 (__m512h __A, int __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2w512_mask_round (__A,
+           (__v32hi)
+           _mm512_setzero_si512 (),
+           (__mmask32) -1,
+           __B);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundph_epi16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2w512_mask_round (__C,
+           (__v32hi) __A,
+           __B,
+           __D);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2w512_mask_round (__B,
+           (__v32hi)
+           _mm512_setzero_si512 (),
+           __A,
+           __C);
+}
+#else
+#define _mm512_cvt_roundph_epi16(A, B) ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A), (__v32hi) _mm512_setzero_si512 (), (__mmask32)-1, (B)))
+#define _mm512_mask_cvt_roundph_epi16(A, B, C, D) ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C), (__v32hi)(A), (B), (D)))
+#define _mm512_maskz_cvt_roundph_epi16(A, B, C) ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B), (__v32hi) _mm512_setzero_si512 (), (A), (C)))
+#endif
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_epu16 (__m512h __A)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2uw512_mask_round (__A,
+            (__v32hi)
+            _mm512_setzero_si512 (),
+            (__mmask32) -1,
+            _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B,
+            _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_epu16 (__mmask32 __A, __m512h __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2uw512_mask_round (__B,
+            (__v32hi)
+            _mm512_setzero_si512 (),
+            __A,
+            _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundph_epu16 (__m512h __A, int __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2uw512_mask_round (__A,
+            (__v32hi)
+            _mm512_setzero_si512 (),
+            (__mmask32) -1,
+            __B);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundph_epu16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, __D);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvtph2uw512_mask_round (__B,
+            (__v32hi)
+            _mm512_setzero_si512 (),
+            __A,
+            __C);
+}
+#else
+#define _mm512_cvt_roundph_epu16(A, B) ((__m512i) __builtin_ia32_vcvtph2uw512_mask_round ((A), (__v32hi) _mm512_setzero_si512 (), (__mmask32)-1, (B)))
+#define _mm512_mask_cvt_roundph_epu16(A, B, C, D) ((__m512i) __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D)))
+#define _mm512_maskz_cvt_roundph_epu16(A, B, C) ((__m512i) __builtin_ia32_vcvtph2uw512_mask_round ((B), (__v32hi) _mm512_setzero_si512 (), (A), (C)))
+#endif
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttph_epi16 (__m512h __A)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2w512_mask_round (__A,
+         (__v32hi)
+         _mm512_setzero_si512 (),
+         (__mmask32) -1,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2w512_mask_round (__C,
+         (__v32hi) __A,
+         __B,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttph_epi16 (__mmask32 __A, __m512h __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2w512_mask_round (__B,
+         (__v32hi)
+         _mm512_setzero_si512 (),
+         __A,
+         _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundph_epi16 (__m512h __A, int __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2w512_mask_round (__A,
+         (__v32hi)
+         _mm512_setzero_si512 (),
+         (__mmask32) -1,
+         __B);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundph_epi16 (__m512i __A, __mmask32 __B,
+    __m512h __C, int __D)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2w512_mask_round (__C,
+         (__v32hi) __A,
+         __B,
+         __D);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2w512_mask_round (__B,
+         (__v32hi)
+         _mm512_setzero_si512 (),
+         __A,
+         __C);
+}
+#else
+#define _mm512_cvtt_roundph_epi16(A, B) ((__m512i) __builtin_ia32_vcvttph2w512_mask_round ((A), (__v32hi) _mm512_setzero_si512 (), (__mmask32)-1, (B)))
+#define _mm512_mask_cvtt_roundph_epi16(A, B, C, D) ((__m512i) __builtin_ia32_vcvttph2w512_mask_round ((C), (__v32hi)(A), (B), (D)))
+#define _mm512_maskz_cvtt_roundph_epi16(A, B, C) ((__m512i) __builtin_ia32_vcvttph2w512_mask_round ((B), (__v32hi) _mm512_setzero_si512 (), (A), (C)))
+#endif
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttph_epu16 (__m512h __A)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2uw512_mask_round (__A,
+          (__v32hi)
+          _mm512_setzero_si512 (),
+          (__mmask32) -1,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2uw512_mask_round (__C,
+          (__v32hi) __A,
+          __B,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttph_epu16 (__mmask32 __A, __m512h __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2uw512_mask_round (__B,
+          (__v32hi)
+          _mm512_setzero_si512 (),
+          __A,
+          _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundph_epu16 (__m512h __A, int __B)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2uw512_mask_round (__A,
+          (__v32hi)
+          _mm512_setzero_si512 (),
+          (__mmask32) -1,
+          __B);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundph_epu16 (__m512i __A, __mmask32 __B,
+    __m512h __C, int __D)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2uw512_mask_round (__C,
+          (__v32hi) __A,
+          __B,
+          __D);
+}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
+{
+  return (__m512i)
+    __builtin_ia32_vcvttph2uw512_mask_round (__B,
+          (__v32hi)
+          _mm512_setzero_si512 (),
+          __A,
+          __C);
+}
+#else
+#define _mm512_cvtt_roundph_epu16(A, B) ((__m512i) __builtin_ia32_vcvttph2uw512_mask_round ((A), (__v32hi) _mm512_setzero_si512 (), (__mmask32)-1, (B)))
+#define _mm512_mask_cvtt_roundph_epu16(A, B, C, D) ((__m512i) __builtin_ia32_vcvttph2uw512_mask_round ((C), (__v32hi)(A), (B), (D)))
+#define _mm512_maskz_cvtt_roundph_epu16(A, B, C) ((__m512i) __builtin_ia32_vcvttph2uw512_mask_round ((B), (__v32hi) _mm512_setzero_si512 (), (A), (C)))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi16_ph (__m512i __A)
+{
+  return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
+      _mm512_setzero_ph (),
+      (__mmask32) -1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi16_ph (__m512h __A, __mmask32 __B, __m512i __C)
+{
+  return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
+      __A,
+      __B,
+      _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi16_ph (__mmask32 __A, __m512i __B)
+{
+  return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
+      _mm512_setzero_ph (),
+      __A,
+      _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi16_ph (__m512i __A, int __B)
+{
+  return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
+      _mm512_setzero_ph (),
+      (__mmask32) -1,
+      __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
+{
+  return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
+      __A,
+      __B,
+      __D);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi16_ph (__mmask32 __A, __m512i __B, int __C)
+{
+  return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
+      _mm512_setzero_ph (),
+      __A,
+      __C);
+}
+#else
+#define _mm512_cvt_roundepi16_ph(A, B) (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(A), _mm512_setzero_ph (), (__mmask32)-1, (B)))
+#define _mm512_mask_cvt_roundepi16_ph(A, B, C, D) (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(C), (A), (B), (D)))
+#define _mm512_maskz_cvt_roundepi16_ph(A, B, C) (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(B), _mm512_setzero_ph (), (A), (C)))
+#endif
+  extern __inline __m512h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+  _mm512_cvtepu16_ph (__m512i __A)
+  {
+    return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
+         _mm512_setzero_ph (),
+         (__mmask32) -1,
+         _MM_FROUND_CUR_DIRECTION);
+  }
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu16_ph (__m512h __A, __mmask32 __B, __m512i __C)
+{
+  return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
+       __A,
+       __B,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu16_ph (__mmask32 __A, __m512i __B)
+{
+  return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
+       _mm512_setzero_ph (),
+       __A,
+       _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu16_ph (__m512i __A, int __B)
+{
+  return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
+       _mm512_setzero_ph (),
+       (__mmask32) -1,
+       __B);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
+{
+  return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
+       __A,
+       __B,
+       __D);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C)
+{
+  return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
+       _mm512_setzero_ph (),
+       __A,
+       __C);
+}
+#else
+#define _mm512_cvt_roundepu16_ph(A, B) (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(A), _mm512_setzero_ph (), (__mmask32)-1, (B)))
+#define _mm512_mask_cvt_roundepu16_ph(A, B, C, D) (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(C), (A), (B), (D)))
+#define _mm512_maskz_cvt_roundepu16_ph(A, B, C) (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(B), _mm512_setzero_ph (), (A), (C)))
+#endif
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_i32 (__m128h __A)
+{
+  return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_u32 (__m128h __A)
+{
+  return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
+        _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_i32 (__m128h __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
+}
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_u32 (__m128h __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
+}
+#else
+#define _mm_cvt_roundsh_i32(A, B) ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
+#define _mm_cvt_roundsh_u32(A, B) ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
+#endif
+#ifdef __x86_64__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_i64 (__m128h __A)
+{
+  return (long long)
+    __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_u64 (__m128h __A)
+{
+  return (long long)
+    __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_i64 (__m128h __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
+}
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_u64 (__m128h __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
+}
+#else
+#define _mm_cvt_roundsh_i64(A, B) ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
+#define _mm_cvt_roundsh_u64(A, B) ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
+#endif
+#endif
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_i32 (__m128h __A)
+{
+  return (int)
+    __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_u32 (__m128h __A)
+{
+  return (int)
+    __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_i32 (__m128h __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R);
+}
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_u32 (__m128h __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R);
+}
+#else
+#define _mm_cvtt_roundsh_i32(A, B) ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
+#define _mm_cvtt_roundsh_u32(A, B) ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
+#endif
+#ifdef __x86_64__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_i64 (__m128h __A)
+{
+  return (long long)
+    __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_u64 (__m128h __A)
+{
+  return (long long)
+    __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_i64 (__m128h __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R);
+}
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_u64 (__m128h __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R);
+}
+#else
+#define _mm_cvtt_roundsh_i64(A, B) ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
+#define _mm_cvtt_roundsh_u64(A, B) ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
+#endif
+#endif
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti32_sh (__m128h __A, int __B)
+{
+  return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu32_sh (__m128h __A, unsigned int __B)
+{
+  return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
+{
+  return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
+{
+  return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
+}
+#else
+#define _mm_cvt_roundi32_sh(A, B, C) (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
+#define _mm_cvt_roundu32_sh(A, B, C) (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
+#endif
+#ifdef __x86_64__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti64_sh (__m128h __A, long long __B)
+{
+  return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu64_sh (__m128h __A, unsigned long long __B)
+{
+  return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
+{
+  return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
+{
+  return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
+}
+#else
+#define _mm_cvt_roundi64_sh(A, B, C) (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
+#define _mm_cvt_roundu64_sh(A, B, C) (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
+#endif
+#endif
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_pd (__m128h __A)
+{
+  return __builtin_ia32_vcvtph2pd512_mask_round (__A,
+       _mm512_setzero_pd (),
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_pd (__m512d __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtph2pd512_mask_round (__B,
+       _mm512_setzero_pd (),
+       __A,
+       _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundph_pd (__m128h __A, int __B)
+{
+  return __builtin_ia32_vcvtph2pd512_mask_round (__A,
+       _mm512_setzero_pd (),
+       (__mmask8) -1,
+       __B);
+}
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundph_pd (__m512d __A, __mmask8 __B, __m128h __C, int __D)
+{
+  return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, __D);
+}
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundph_pd (__mmask8 __A, __m128h __B, int __C)
+{
+  return __builtin_ia32_vcvtph2pd512_mask_round (__B,
+       _mm512_setzero_pd (),
+       __A,
+       __C);
+}
+#else
+#define _mm512_cvt_roundph_pd(A, B) (__builtin_ia32_vcvtph2pd512_mask_round ((A), _mm512_setzero_pd (), (__mmask8)-1, (B)))
+#define _mm512_mask_cvt_roundph_pd(A, B, C, D) (__builtin_ia32_vcvtph2pd512_mask_round ((C), (A), (B), (D)))
+#define _mm512_maskz_cvt_roundph_pd(A, B, C) (__builtin_ia32_vcvtph2pd512_mask_round ((B), _mm512_setzero_pd (), (A), (C)))
+#endif
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtxph_ps (__m256h __A)
+{
+  return __builtin_ia32_vcvtph2psx512_mask_round (__A,
+        _mm512_setzero_ps (),
+        (__mmask16) -1,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtxph_ps (__m512 __A, __mmask16 __B, __m256h __C)
+{
+  return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtxph_ps (__mmask16 __A, __m256h __B)
+{
+  return __builtin_ia32_vcvtph2psx512_mask_round (__B,
+        _mm512_setzero_ps (),
+        __A,
+        _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtx_roundph_ps (__m256h __A, int __B)
+{
+  return __builtin_ia32_vcvtph2psx512_mask_round (__A,
+        _mm512_setzero_ps (),
+        (__mmask16) -1,
+        __B);
+}
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtx_roundph_ps (__m512 __A, __mmask16 __B, __m256h __C, int __D)
+{
+  return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, __D);
+}
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtx_roundph_ps (__mmask16 __A, __m256h __B, int __C)
+{
+  return __builtin_ia32_vcvtph2psx512_mask_round (__B,
+        _mm512_setzero_ps (),
+        __A,
+        __C);
+}
+#else
+#define _mm512_cvtx_roundph_ps(A, B) (__builtin_ia32_vcvtph2psx512_mask_round ((A), _mm512_setzero_ps (), (__mmask16)-1, (B)))
+#define _mm512_mask_cvtx_roundph_ps(A, B, C, D) (__builtin_ia32_vcvtph2psx512_mask_round ((C), (A), (B), (D)))
+#define _mm512_maskz_cvtx_roundph_ps(A, B, C) (__builtin_ia32_vcvtph2psx512_mask_round ((B), _mm512_setzero_ps (), (A), (C)))
+#endif
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtxps_ph (__m512 __A)
+{
+  return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
+        _mm256_setzero_ph (),
+        (__mmask16) -1,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtxps_ph (__m256h __A, __mmask16 __B, __m512 __C)
+{
+  return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
+        __A, __B,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtxps_ph (__mmask16 __A, __m512 __B)
+{
+  return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
+        _mm256_setzero_ph (),
+        __A,
+        _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtx_roundps_ph (__m512 __A, int __B)
+{
+  return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
+        _mm256_setzero_ph (),
+        (__mmask16) -1,
+        __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtx_roundps_ph (__m256h __A, __mmask16 __B, __m512 __C, int __D)
+{
+  return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
+        __A, __B, __D);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtx_roundps_ph (__mmask16 __A, __m512 __B, int __C)
+{
+  return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
+        _mm256_setzero_ph (),
+        __A, __C);
+}
+#else
+#define _mm512_cvtx_roundps_ph(A, B) (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(A), _mm256_setzero_ph (), (__mmask16)-1, (B)))
+#define _mm512_mask_cvtx_roundps_ph(A, B, C, D) (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(C), (A), (B), (D)))
+#define _mm512_maskz_cvtx_roundps_ph(A, B, C) (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(B), _mm256_setzero_ph (), (A), (C)))
+#endif
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_ph (__m512d __A)
+{
+  return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
+       _mm_setzero_ph (),
+       (__mmask8) -1,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m512d __C)
+{
+  return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
+       __A, __B,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_ph (__mmask8 __A, __m512d __B)
+{
+  return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
+       _mm_setzero_ph (),
+       __A,
+       _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_ph (__m512d __A, int __B)
+{
+  return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
+       _mm_setzero_ph (),
+       (__mmask8) -1,
+       __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_ph (__m128h __A, __mmask8 __B, __m512d __C, int __D)
+{
+  return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
+       __A, __B, __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C)
+{
+  return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
+       _mm_setzero_ph (),
+       __A, __C);
+}
+#else
+#define _mm512_cvt_roundpd_ph(A, B) (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(A), _mm_setzero_ph (), (__mmask8)-1, (B)))
+#define _mm512_mask_cvt_roundpd_ph(A, B, C, D) (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(C), (A), (B), (D)))
+#define _mm512_maskz_cvt_roundpd_ph(A, B, C) (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(B), _mm_setzero_ph (), (A), (C)))
+#endif
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_ss (__m128 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
+           _mm_setzero_ps (),
+           (__mmask8) -1,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
+    __m128h __D)
+{
+  return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
+     __m128h __C)
+{
+  return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
+           _mm_setzero_ps (),
+           __A, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_sd (__m128d __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
+           _mm_setzero_pd (),
+           (__mmask8) -1,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
+    __m128h __D)
+{
+  return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
+           _mm_setzero_pd (),
+           __A, _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
+{
+  return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
+           _mm_setzero_ps (),
+           (__mmask8) -1, __R);
+}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
+    __m128h __D, const int __R)
+{
+  return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R);
+}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
+     __m128h __C, const int __R)
+{
+  return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
+           _mm_setzero_ps (),
+           __A, __R);
+}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
+{
+  return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
+           _mm_setzero_pd (),
+           (__mmask8) -1, __R);
+}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
+    __m128h __D, const int __R)
+{
+  return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R);
+}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
+{
+  return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
+           _mm_setzero_pd (),
+           __A, __R);
+}
+#else
+#define _mm_cvt_roundsh_ss(A, B, R) (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), _mm_setzero_ps (), (__mmask8) -1, (R)))
+#define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
+#define _mm_maskz_cvt_roundsh_ss(A, B, C, R) (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), _mm_setzero_ps (), (A), (R)))
+#define _mm_cvt_roundsh_sd(A, B, R) (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), _mm_setzero_pd (), (__mmask8) -1, (R)))
+#define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R)))
+#define _mm_maskz_cvt_roundsh_sd(A, B, C, R) (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), _mm_setzero_pd (), (A), (R)))
+#endif
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_sh (__m128h __A, __m128 __B)
+{
+  return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
+           _mm_setzero_ph (),
+           (__mmask8) -1,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D)
+{
+  return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C)
+{
+  return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
+           _mm_setzero_ph (),
+           __A, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_sh (__m128h __A, __m128d __B)
+{
+  return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
+           _mm_setzero_ph (),
+           (__mmask8) -1,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D)
+{
+  return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C)
+{
+  return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
+           _mm_setzero_ph (),
+           __A, _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R)
+{
+  return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
+           _mm_setzero_ph (),
+           (__mmask8) -1, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D,
+    const int __R)
+{
+  return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C,
+     const int __R)
+{
+  return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
+           _mm_setzero_ph (),
+           __A, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R)
+{
+  return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
+           _mm_setzero_ph (),
+           (__mmask8) -1, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D,
+    const int __R)
+{
+  return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C,
+     const int __R)
+{
+  return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
+           _mm_setzero_ph (),
+           __A, __R);
+}
+#else
+#define _mm_cvt_roundss_sh(A, B, R) (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), _mm_setzero_ph (), (__mmask8) -1, R))
+#define _mm_mask_cvt_roundss_sh(A, B, C, D, R) (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
+#define _mm_maskz_cvt_roundss_sh(A, B, C, R) (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), _mm_setzero_ph (), A, R))
+#define _mm_cvt_roundsd_sh(A, B, R) (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), _mm_setzero_ph (), (__mmask8) -1, R))
+#define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
+#define _mm_maskz_cvt_roundsd_sh(A, B, C, R) (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), _mm_setzero_ph (), (A), (R)))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
+     (__v32hf) __B,
+     (__v32hf) __C,
+     (__mmask32) -1,
+     _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
+     (__v32hf) __B,
+     (__v32hf) __C,
+     (__mmask32) __U,
+     _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
+      (__v32hf) __B,
+      (__v32hf) __C,
+      (__mmask32) __U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
+      (__v32hf) __B,
+      (__v32hf) __C,
+      (__mmask32) __U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
+     (__v32hf) __B,
+     (__v32hf) __C,
+     (__mmask32) -1, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
+          __m512h __C, const int __R)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
+     (__v32hf) __B,
+     (__v32hf) __C,
+     (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
+    __mmask32 __U, const int __R)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
+      (__v32hf) __B,
+      (__v32hf) __C,
+      (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
+    __m512h __C, const int __R)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
+      (__v32hf) __B,
+      (__v32hf) __C,
+      (__mmask32) __U, __R);
+}
+#else
+#define _mm512_fmaddsub_round_ph(A, B, C, R) ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R)))
+#define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R)))
+#define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R)))
+#define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) ((__m512h)__builtin_ia32_vfmaddsubph512_maskz ((A), (B), (C), (U), (R)))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+  _mm512_fmsubadd_ph (__m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
+     (__v32hf) __B,
+     (__v32hf) __C,
+     (__mmask32) -1,
+     _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_ph (__m512h __A, __mmask32 __U,
+    __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
+     (__v32hf) __B,
+     (__v32hf) __C,
+     (__mmask32) __U,
+     _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_ph (__m512h __A, __m512h __B,
+     __m512h __C, __mmask32 __U)
+{
+  return (__m512h)
+    __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
+      (__v32hf) __B,
+      (__v32hf) __C,
+      (__mmask32) __U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_ph (__mmask32 __U, __m512h __A,
+     __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
+      (__v32hf) __B,
+      (__v32hf) __C,
+      (__mmask32) __U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubadd_round_ph (__m512h __A, __m512h __B,
+     __m512h __C, const int __R)
+{
+  return (__m512h)
+    __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
+     (__v32hf) __B,
+     (__v32hf) __C,
+     (__mmask32) -1, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
+          __m512h __C, const int __R)
+{
+  return (__m512h)
+    __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
+     (__v32hf) __B,
+     (__v32hf) __C,
+     (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
+    __mmask32 __U, const int __R)
+{
+  return (__m512h)
+    __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
+      (__v32hf) __B,
+      (__v32hf) __C,
+      (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
+    __m512h __C, const int __R)
+{
+  return (__m512h)
+    __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
+      (__v32hf) __B,
+      (__v32hf) __C,
+      (__mmask32) __U, __R);
+}
+#else
+#define _mm512_fmsubadd_round_ph(A, B, C, R) ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R)))
+#define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R)))
+#define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R)))
+#define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R)))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+  _mm512_fmadd_ph (__m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
+         (__v32hf) __B,
+         (__v32hf) __C,
+         (__mmask32) -1,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
+         (__v32hf) __B,
+         (__v32hf) __C,
+         (__mmask32) __U,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
+          (__v32hf) __B,
+          (__v32hf) __C,
+          (__mmask32) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
+          (__v32hf) __B,
+          (__v32hf) __C,
+          (__mmask32) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
+             (__v32hf) __B,
+             (__v32hf) __C,
+             (__mmask32) -1, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
+          __m512h __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
+             (__v32hf) __B,
+             (__v32hf) __C,
+             (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
+    __mmask32 __U, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
+       (__v32hf) __B,
+       (__v32hf) __C,
+       (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
+    __m512h __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
+       (__v32hf) __B,
+       (__v32hf) __C,
+       (__mmask32) __U, __R);
+}
+#else
+#define _mm512_fmadd_round_ph(A, B, C, R) ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), -1, (R)))
+#define _mm512_mask_fmadd_round_ph(A, U, B, C, R) ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), (U), (R)))
+#define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) ((__m512h)__builtin_ia32_vfmaddph512_mask3 ((A), (B), (C), (U), (R)))
+#define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) ((__m512h)__builtin_ia32_vfmaddph512_maskz ((A), (B), (C), (U), (R)))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
+          (__v32hf) __B,
+          (__v32hf) __C,
+          (__mmask32) -1,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
+          (__v32hf) __B,
+          (__v32hf) __C,
+          (__mmask32) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
+{
+  return (__m512h)
+    __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
+           (__v32hf) __B,
+           (__v32hf) __C,
+           (__mmask32) __U,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
+           (__v32hf) __B,
+           (__v32hf) __C,
+           (__mmask32) __U,
+           _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
+             (__v32hf) __B,
+             (__v32hf) __C,
+             (__mmask32) -1, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
+          __m512h __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
+             (__v32hf) __B,
+             (__v32hf) __C,
+             (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
+    __mmask32 __U, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
+       (__v32hf) __B,
+       (__v32hf) __C,
+       (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
+    __m512h __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
+       (__v32hf) __B,
+       (__v32hf) __C,
+       (__mmask32) __U, __R);
+}
+#else
+#define _mm512_fnmadd_round_ph(A, B, C, R) ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), -1, (R)))
+#define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), (U), (R)))
+#define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) ((__m512h)__builtin_ia32_vfnmaddph512_mask3 ((A), (B), (C), (U), (R)))
+#define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) ((__m512h)__builtin_ia32_vfnmaddph512_maskz ((A), (B), (C), (U), (R)))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_ph (__m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
+         (__v32hf) __B,
+         (__v32hf) __C,
+         (__mmask32) -1,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
+         (__v32hf) __B,
+         (__v32hf) __C,
+         (__mmask32) __U,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
+{
+  return (__m512h)
+    __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
+          (__v32hf) __B,
+          (__v32hf) __C,
+          (__mmask32) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
+          (__v32hf) __B,
+          (__v32hf) __C,
+          (__mmask32) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
+             (__v32hf) __B,
+             (__v32hf) __C,
+             (__mmask32) -1, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
+          __m512h __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
+             (__v32hf) __B,
+             (__v32hf) __C,
+             (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
+    __mmask32 __U, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
+       (__v32hf) __B,
+       (__v32hf) __C,
+       (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
+    __m512h __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
+       (__v32hf) __B,
+       (__v32hf) __C,
+       (__mmask32) __U, __R);
+}
+#else
+#define _mm512_fmsub_round_ph(A, B, C, R) ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), -1, (R)))
+#define _mm512_mask_fmsub_round_ph(A, U, B, C, R) ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), (U), (R)))
+#define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) ((__m512h)__builtin_ia32_vfmsubph512_mask3 ((A), (B), (C), (U), (R)))
+#define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) ((__m512h)__builtin_ia32_vfmsubph512_maskz ((A), (B), (C), (U), (R)))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
+          (__v32hf) __B,
+          (__v32hf) __C,
+          (__mmask32) -1,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
+          (__v32hf) __B,
+          (__v32hf) __C,
+          (__mmask32) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
+{
+  return (__m512h)
+    __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
+           (__v32hf) __B,
+           (__v32hf) __C,
+           (__mmask32) __U,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
+           (__v32hf) __B,
+           (__v32hf) __C,
+           (__mmask32) __U,
+           _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
+             (__v32hf) __B,
+             (__v32hf) __C,
+             (__mmask32) -1, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
+          __m512h __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
+             (__v32hf) __B,
+             (__v32hf) __C,
+             (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
+    __mmask32 __U, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
+       (__v32hf) __B,
+       (__v32hf) __C,
+       (__mmask32) __U, __R);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
+    __m512h __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
+       (__v32hf) __B,
+       (__v32hf) __C,
+       (__mmask32) __U, __R);
+}
+#else
+#define _mm512_fnmsub_round_ph(A, B, C, R) ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), -1, (R)))
+#define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), (U), (R)))
+#define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) ((__m512h)__builtin_ia32_vfnmsubph512_mask3 ((A), (B), (C), (U), (R)))
+#define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) ((__m512h)__builtin_ia32_vfnmsubph512_maskz ((A), (B), (C), (U), (R)))
+#endif
+extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+        (__v8hf) __A,
+        (__v8hf) __B,
+        (__mmask8) -1,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+        (__v8hf) __A,
+        (__v8hf) __B,
+        (__mmask8) __U,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
+         (__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) __U,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+         (__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) __U,
+         _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+        (__v8hf) __A,
+        (__v8hf) __B,
+        (__mmask8) -1,
+        __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+    const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+        (__v8hf) __A,
+        (__v8hf) __B,
+        (__mmask8) __U, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
+     const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
+         (__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) __U, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
+     __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+         (__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) __U, __R);
+}
+#else
+#define _mm_fmadd_round_sh(A, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R)))
+#define _mm_mask_fmadd_round_sh(A, U, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R)))
+#define _mm_mask3_fmadd_round_sh(A, B, C, U, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R)))
+#define _mm_maskz_fmadd_round_sh(U, A, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R)))
+#endif
+extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
+         (__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) -1,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
+        (__v8hf) __A,
+        (__v8hf) __B,
+        (__mmask8) __U,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
+         (__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) __U,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
+         (__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) __U,
+         _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
+         (__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) -1,
+         __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+    const int __R)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
+        (__v8hf) __A,
+        (__v8hf) __B,
+        (__mmask8) __U, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
+     const int __R)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
+         (__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) __U, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
+     __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
+         (__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) __U, __R);
+}
+#else
+#define _mm_fnmadd_round_sh(A, B, C, R) ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R)))
+#define _mm_mask_fnmadd_round_sh(A, U, B, C, R) ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R)))
+#define _mm_mask3_fnmadd_round_sh(A, B, C, U, R) ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R)))
+#define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R)))
+#endif
+extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+        (__v8hf) __A,
+        -(__v8hf) __B,
+        (__mmask8) -1,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+        (__v8hf) __A,
+        -(__v8hf) __B,
+        (__mmask8) __U,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
+         (__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) __U,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+         (__v8hf) __A,
+         -(__v8hf) __B,
+         (__mmask8) __U,
+         _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+        (__v8hf) __A,
+        -(__v8hf) __B,
+        (__mmask8) -1,
+        __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+    const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+        (__v8hf) __A,
+        -(__v8hf) __B,
+        (__mmask8) __U, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
+     const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
+         (__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) __U, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
+     __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+         (__v8hf) __A,
+         -(__v8hf) __B,
+         (__mmask8) __U, __R);
+}
+#else
+#define _mm_fmsub_round_sh(A, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R)))
+#define _mm_mask_fmsub_round_sh(A, U, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R)))
+#define _mm_mask3_fmsub_round_sh(A, B, C, U, R) ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R)))
+#define _mm_maskz_fmsub_round_sh(U, A, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R)))
+#endif
+extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+        -(__v8hf) __A,
+        -(__v8hf) __B,
+        (__mmask8) -1,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+        -(__v8hf) __A,
+        -(__v8hf) __B,
+        (__mmask8) __U,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
+         -(__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) __U,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+         -(__v8hf) __A,
+         -(__v8hf) __B,
+         (__mmask8) __U,
+         _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+        -(__v8hf) __A,
+        -(__v8hf) __B,
+        (__mmask8) -1,
+        __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+    const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+        -(__v8hf) __A,
+        -(__v8hf) __B,
+        (__mmask8) __U, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
+     const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
+         -(__v8hf) __A,
+         (__v8hf) __B,
+         (__mmask8) __U, __R);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
+     __m128h __B, const int __R)
+{
+  return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+         -(__v8hf) __A,
+         -(__v8hf) __B,
+         (__mmask8) __U, __R);
+}
+#else
+#define _mm_fnmsub_round_sh(A, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R)))
+#define _mm_mask_fnmsub_round_sh(A, U, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R)))
+#define _mm_mask3_fnmsub_round_sh(A, B, C, U, R) ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R)))
+#define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R)))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
+     (__v32hf) __B,
+     (__v32hf) __C,
+     _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fcmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
+          (__v32hf) __C,
+          (__v32hf) __D, __B,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
+           (__v32hf) __B,
+           (__v32hf) __C,
+           __D, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fcmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
+           (__v32hf) __C,
+           (__v32hf) __D,
+           __A, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_pch (__m512h __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
+           (__v32hf) __B,
+           (__v32hf) __C,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
+         (__v32hf) __C,
+         (__v32hf) __D, __B,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
+          (__v32hf) __B,
+          (__v32hf) __C,
+          __D, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
+          (__v32hf) __C,
+          (__v32hf) __D,
+          __A, _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
+     (__v32hf) __B,
+     (__v32hf) __C,
+     __D);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fcmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
+         __m512h __D, const int __E)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
+          (__v32hf) __C,
+          (__v32hf) __D, __B,
+          __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
+          __mmask16 __D, const int __E)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
+           (__v32hf) __B,
+           (__v32hf) __C,
+           __D, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fcmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
+          __m512h __D, const int __E)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
+           (__v32hf) __C,
+           (__v32hf) __D,
+           __A, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
+           (__v32hf) __B,
+           (__v32hf) __C,
+           __D);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
+        __m512h __D, const int __E)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
+         (__v32hf) __C,
+         (__v32hf) __D, __B,
+         __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
+         __mmask16 __D, const int __E)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
+          (__v32hf) __B,
+          (__v32hf) __C,
+          __D, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
+         __m512h __D, const int __E)
+{
+  return (__m512h)
+    __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
+          (__v32hf) __C,
+          (__v32hf) __D,
+          __A, __E);
+}
+#else
+#define _mm512_fcmadd_round_pch(A, B, C, D) (__m512h) __builtin_ia32_vfcmaddcph512_round ((A), (B), (C), (D))
+#define _mm512_mask_fcmadd_round_pch(A, B, C, D, E) ((__m512h) __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) (A), (__v32hf) (C), (__v32hf) (D), (B), (E)))
+#define _mm512_mask3_fcmadd_round_pch(A, B, C, D, E) ((__m512h) __builtin_ia32_vfcmaddcph512_mask3_round ((A), (B), (C), (D), (E)))
+#define _mm512_maskz_fcmadd_round_pch(A, B, C, D, E) (__m512h) __builtin_ia32_vfcmaddcph512_maskz_round ((B), (C), (D), (A), (E))
+#define _mm512_fmadd_round_pch(A, B, C, D) (__m512h) __builtin_ia32_vfmaddcph512_round ((A), (B), (C), (D))
+#define _mm512_mask_fmadd_round_pch(A, B, C, D, E) ((__m512h) __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) (A), (__v32hf) (C), (__v32hf) (D), (B), (E)))
+#define _mm512_mask3_fmadd_round_pch(A, B, C, D, E) (__m512h) __builtin_ia32_vfmaddcph512_mask3_round ((A), (B), (C), (D), (E))
+#define _mm512_maskz_fmadd_round_pch(A, B, C, D, E) (__m512h) __builtin_ia32_vfmaddcph512_maskz_round ((B), (C), (D), (A), (E))
+#endif
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fcmul_pch (__m512h __A, __m512h __B)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
+           (__v32hf) __B,
+           _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fcmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
+         (__v32hf) __D,
+         (__v32hf) __A,
+         __B, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fcmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
+         (__v32hf) __C,
+         _mm512_setzero_ph (),
+         __A, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmul_pch (__m512h __A, __m512h __B)
+{
+  return (__m512h)
+    __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
+          (__v32hf) __B,
+          _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
+{
+  return (__m512h)
+    __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
+        (__v32hf) __D,
+        (__v32hf) __A,
+        __B, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
+{
+  return (__m512h)
+    __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
+        (__v32hf) __C,
+        _mm512_setzero_ph (),
+        __A, _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fcmul_round_pch (__m512h __A, __m512h __B, const int __D)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
+           (__v32hf) __B, __D);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fcmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
+        __m512h __D, const int __E)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
+         (__v32hf) __D,
+         (__v32hf) __A,
+         __B, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fcmul_round_pch (__mmask16 __A, __m512h __B,
+         __m512h __C, const int __E)
+{
+  return (__m512h)
+    __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
+         (__v32hf) __C,
+         _mm512_setzero_ph (),
+         __A, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmul_round_pch (__m512h __A, __m512h __B, const int __D)
+{
+  return (__m512h)
+    __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
+          (__v32hf) __B,
+          __D);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
+       __m512h __D, const int __E)
+{
+  return (__m512h)
+    __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
+        (__v32hf) __D,
+        (__v32hf) __A,
+        __B, __E);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmul_round_pch (__mmask16 __A, __m512h __B,
+        __m512h __C, const int __E)
+{
+  return (__m512h)
+    __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
+        (__v32hf) __C,
+        _mm512_setzero_ph (),
+        __A, __E);
+}
+#else
+#define _mm512_fcmul_round_pch(A, B, D) (__m512h) __builtin_ia32_vfcmulcph512_round ((A), (B), (D))
+#define _mm512_mask_fcmul_round_pch(A, B, C, D, E) (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((C), (D), (A), (B), (E))
+#define _mm512_maskz_fcmul_round_pch(A, B, C, E) (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((B), (C), (__v32hf) _mm512_setzero_ph (), (A), (E))
+#define _mm512_fmul_round_pch(A, B, D) (__m512h) __builtin_ia32_vfmulcph512_round ((A), (B), (D))
+#define _mm512_mask_fmul_round_pch(A, B, C, D, E) (__m512h) __builtin_ia32_vfmulcph512_mask_round ((C), (D), (A), (B), (E))
+#define _mm512_maskz_fmul_round_pch(A, B, C, E) (__m512h) __builtin_ia32_vfmulcph512_mask_round ((B), (C), (__v32hf) _mm512_setzero_ph (), (A), (E))
+#endif
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fcmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
+       (__v8hf) __C,
+       (__v8hf) __D, __B,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
+        (__v8hf) __B,
+        (__v8hf) __C, __D,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fcmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
+        (__v8hf) __C,
+        (__v8hf) __D,
+        __A, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
+         (__v8hf) __B,
+         (__v8hf) __C,
+         _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
+      (__v8hf) __C,
+      (__v8hf) __D, __B,
+      _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C, __D,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
+       (__v8hf) __C,
+       (__v8hf) __D,
+       __A, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_sch (__m128h __A, __m128h __B, __m128h __C)
+{
+  return (__m128h)
+    __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
+        (__v8hf) __B,
+        (__v8hf) __C,
+        _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fcmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
+      __m128h __D, const int __E)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
+       (__v8hf) __C,
+       (__v8hf) __D,
+       __B, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
+       __mmask8 __D, const int __E)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
+        (__v8hf) __B,
+        (__v8hf) __C,
+        __D, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fcmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
+       __m128h __D, const int __E)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
+        (__v8hf) __C,
+        (__v8hf) __D,
+        __A, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
+         (__v8hf) __B,
+         (__v8hf) __C,
+         __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
+     __m128h __D, const int __E)
+{
+  return (__m128h)
+    __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
+      (__v8hf) __C,
+      (__v8hf) __D,
+      __B, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
+      __mmask8 __D, const int __E)
+{
+  return (__m128h)
+    __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       __D, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
+      __m128h __D, const int __E)
+{
+  return (__m128h)
+    __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
+       (__v8hf) __C,
+       (__v8hf) __D,
+       __A, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
+        (__v8hf) __B,
+        (__v8hf) __C,
+        __D);
+}
+#else
+#define _mm_mask_fcmadd_round_sch(A, B, C, D, E) ((__m128h) __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A), (__v8hf) (C), (__v8hf) (D), (B), (E)))
+#define _mm_mask3_fcmadd_round_sch(A, B, C, D, E) ((__m128h) __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A), (__v8hf) (B), (__v8hf) (C), (D), (E)))
+#define _mm_maskz_fcmadd_round_sch(A, B, C, D, E) __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E))
+#define _mm_fcmadd_round_sch(A, B, C, D) __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D))
+#define _mm_mask_fmadd_round_sch(A, B, C, D, E) ((__m128h) __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A), (__v8hf) (C), (__v8hf) (D), (B), (E)))
+#define _mm_mask3_fmadd_round_sch(A, B, C, D, E) ((__m128h) __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A), (__v8hf) (B), (__v8hf) (C), (D), (E)))
+#define _mm_maskz_fmadd_round_sch(A, B, C, D, E) __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E))
+#define _mm_fmadd_round_sch(A, B, C, D) __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D))
+#endif
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fcmul_sch (__m128h __A, __m128h __B)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
+        (__v8hf) __B,
+        _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fcmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
+      (__v8hf) __D,
+      (__v8hf) __A,
+      __B, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fcmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
+      (__v8hf) __C,
+      _mm_setzero_ph (),
+      __A, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmul_sch (__m128h __A, __m128h __B)
+{
+  return (__m128h)
+    __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
+       (__v8hf) __B,
+       _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
+     (__v8hf) __D,
+     (__v8hf) __A,
+     __B, _MM_FROUND_CUR_DIRECTION);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return (__m128h)
+    __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
+     (__v8hf) __C,
+     _mm_setzero_ph (),
+     __A, _MM_FROUND_CUR_DIRECTION);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fcmul_round_sch (__m128h __A, __m128h __B, const int __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
+        (__v8hf) __B,
+        __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fcmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
+     __m128h __D, const int __E)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
+      (__v8hf) __D,
+      (__v8hf) __A,
+      __B, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fcmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
+      const int __E)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
+      (__v8hf) __C,
+      _mm_setzero_ph (),
+      __A, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmul_round_sch (__m128h __A, __m128h __B, const int __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
+       (__v8hf) __B, __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
+    __m128h __D, const int __E)
+{
+  return (__m128h)
+    __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
+     (__v8hf) __D,
+     (__v8hf) __A,
+     __B, __E);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
+{
+  return (__m128h)
+    __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
+     (__v8hf) __C,
+     _mm_setzero_ph (),
+     __A, __E);
+}
+#else
+#define _mm_fcmul_round_sch(__A, __B, __D) (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, (__v8hf) __B, __D)
+#define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E) (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, (__v8hf) __D, (__v8hf) __A, __B, __E)
+#define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E) (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, (__v8hf) __C, _mm_setzero_ph (), __A, __E)
+#define _mm_fmul_round_sch(__A, __B, __D) (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A, (__v8hf) __B, __D)
+#define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E) (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, (__v8hf) __D, (__v8hf) __A, __B, __E)
+#define _mm_maskz_fmul_round_sch(__A, __B, __C, __E) (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, (__v8hf) __C, _mm_setzero_ph (), __A, __E)
+#endif
+#define _MM512_REDUCE_OP(op) __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); __m256h __T3 = (__T1 op __T2); __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); __m128h __T6 = (__T4 op __T5); __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); __m128h __T8 = (__T6 op __T7); __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); __m128h __T10 = __T8 op __T9; return __T10[0] op __T10[1]
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_add_ph (__m512h __A)
+{
+   _MM512_REDUCE_OP (+);
+}
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_mul_ph (__m512h __A)
+{
+   _MM512_REDUCE_OP (*);
+}
+#undef _MM512_REDUCE_OP
+#ifdef __AVX512VL__
+#define _MM512_REDUCE_OP(op) __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); __m256h __T3 = __builtin_ia32_##op##ph256_mask (__T1, __T2, _mm256_setzero_ph (), (__mmask16) -1); __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); __m128h __T6 = __builtin_ia32_##op##ph128_mask (__T4, __T5, _mm_setzero_ph (),(__mmask8) -1); __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); __m128h __T8 = (__m128h) __builtin_ia32_##op##ph128_mask (__T6, __T7, _mm_setzero_ph (),(__mmask8) -1); __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, (__v8hi) { 4, 5 }); __m128h __T10 = __builtin_ia32_##op##ph128_mask (__T8, __T9, _mm_setzero_ph (),(__mmask8) -1); __m128h __T11 = (__m128h) __builtin_shuffle (__T10, (__v8hi) { 1, 0 }); __m128h __T12 = __builtin_ia32_##op##ph128_mask (__T10, __T11, _mm_setzero_ph (),(__mmask8) -1); return __T12[0]
+#else
+#define _MM512_REDUCE_OP(op) __m512h __T1 = (__m512h) __builtin_shuffle ((__m512d) __A, (__v8di) { 4, 5, 6, 7, 0, 0, 0, 0 }); __m512h __T2 = _mm512_##op##_ph (__A, __T1); __m512h __T3 = (__m512h) __builtin_shuffle ((__m512d) __T2, (__v8di) { 2, 3, 0, 0, 0, 0, 0, 0 }); __m512h __T4 = _mm512_##op##_ph (__T2, __T3); __m512h __T5 = (__m512h) __builtin_shuffle ((__m512d) __T4, (__v8di) { 1, 0, 0, 0, 0, 0, 0, 0 }); __m512h __T6 = _mm512_##op##_ph (__T4, __T5); __m512h __T7 = (__m512h) __builtin_shuffle ((__m512) __T6, (__v16si) { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }); __m512h __T8 = _mm512_##op##_ph (__T6, __T7); __m512h __T9 = (__m512h) __builtin_shuffle (__T8, (__v32hi) { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }); __m512h __T10 = _mm512_##op##_ph (__T8, __T9); return __T10[0]
+#endif
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_min_ph (__m512h __A)
+{
+  _MM512_REDUCE_OP (min);
+}
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_max_ph (__m512h __A)
+{
+  _MM512_REDUCE_OP (max);
+}
+#undef _MM512_REDUCE_OP
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_ph (__mmask32 __U, __m512h __A, __m512h __W)
+{
+  return (__m512h) __builtin_ia32_movdquhi512_mask ((__v32hi) __W,
+          (__v32hi) __A,
+          (__mmask32) __U);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_ph (__m512h __A, __m512i __I, __m512h __B)
+{
+  return (__m512h) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A,
+             (__v32hi) __I,
+             (__v32hi) __B,
+             (__mmask32)-1);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_ph (__m512i __A, __m512h __B)
+{
+  return (__m512h) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
+           (__v32hi) __A,
+           (__v32hi)
+           (_mm512_setzero_ph ()),
+           (__mmask32)-1);
+}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_pch (_Float16 _Complex __A)
+{
+  union
+  {
+    _Float16 _Complex a;
+    float b;
+  } u = { .a = __A};
+  return (__m512h) _mm512_set1_ps (u.b);
+}
+#define _mm512_mul_pch(A, B) _mm512_fmul_pch ((A), (B))
+#define _mm512_mask_mul_pch(W, U, A, B) _mm512_mask_fmul_pch ((W), (U), (A), (B))
+#define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch ((U), (A), (B))
+#define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch ((A), (B), (R))
+#define _mm512_mask_mul_round_pch(W, U, A, B, R) _mm512_mask_fmul_round_pch ((W), (U), (A), (B), (R))
+#define _mm512_maskz_mul_round_pch(U, A, B, R) _mm512_maskz_fmul_round_pch ((U), (A), (B), (R))
+#define _mm512_cmul_pch(A, B) _mm512_fcmul_pch ((A), (B))
+#define _mm512_mask_cmul_pch(W, U, A, B) _mm512_mask_fcmul_pch ((W), (U), (A), (B))
+#define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch ((U), (A), (B))
+#define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch ((A), (B), (R))
+#define _mm512_mask_cmul_round_pch(W, U, A, B, R) _mm512_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
+#define _mm512_maskz_cmul_round_pch(U, A, B, R) _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
+#define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B))
+#define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B))
+#define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B))
+#define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R))
+#define _mm_mask_mul_round_sch(W, U, A, B, R) _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R))
+#define _mm_maskz_mul_round_sch(U, A, B, R) _mm_maskz_fmul_round_sch ((U), (A), (B), (R))
+#define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B))
+#define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B))
+#define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B))
+#define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R))
+#define _mm_mask_cmul_round_sch(W, U, A, B, R) _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R))
+#define _mm_maskz_cmul_round_sch(U, A, B, R) _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
+#ifdef __DISABLE_AVX512FP16__
+#undef __DISABLE_AVX512FP16__
+#pragma GCC pop_options
+#endif
+#endif
+#endif
diff --git a/third_party/intel/avx512fp16vlintrin.internal.h b/third_party/intel/avx512fp16vlintrin.internal.h
new file mode 100644
index 000000000..d42a041a9
--- /dev/null
+++ b/third_party/intel/avx512fp16vlintrin.internal.h
@@ -0,0 +1,2819 @@
+#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512fp16vlintrin.h> directly; include <immintrin.h> instead."
+#endif
+#ifndef __AVX512FP16VLINTRIN_H_INCLUDED
+#define __AVX512FP16VLINTRIN_H_INCLUDED
+#if !defined(__AVX512VL__) || !defined(__AVX512FP16__)
+#pragma GCC push_options
+#pragma GCC target("avx512fp16,avx512vl")
+#define __DISABLE_AVX512FP16VL__
+#endif
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castph_ps (__m128h __a)
+{
+  return (__m128) __a;
+}
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph_ps (__m256h __a)
+{
+  return (__m256) __a;
+}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castph_pd (__m128h __a)
+{
+  return (__m128d) __a;
+}
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph_pd (__m256h __a)
+{
+  return (__m256d) __a;
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castph_si128 (__m128h __a)
+{
+  return (__m128i) __a;
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph_si256 (__m256h __a)
+{
+  return (__m256i) __a;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_ph (__m128 __a)
+{
+  return (__m128h) __a;
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_ph (__m256 __a)
+{
+  return (__m256h) __a;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_ph (__m128d __a)
+{
+  return (__m128h) __a;
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_ph (__m256d __a)
+{
+  return (__m256h) __a;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_ph (__m128i __a)
+{
+  return (__m128h) __a;
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_ph (__m256i __a)
+{
+  return (__m256h) __a;
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph256_ph128 (__m256h __A)
+{
+  union
+  {
+    __m128h a[2];
+    __m256h v;
+  } u = { .v = __A };
+  return u.a[0];
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph128_ph256 (__m128h __A)
+{
+  union
+  {
+    __m128h a[2];
+    __m256h v;
+  } u;
+  u.a[0] = __A;
+  return u.v;
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zextph128_ph256 (__m128h __A)
+{
+  return (__m256h) _mm256_insertf128_ps (_mm256_setzero_ps (),
+      (__m128) __A, 0);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_conj_pch (__m256h __A)
+{
+  return (__m256h) _mm256_xor_epi32 ((__m256i) __A, _mm256_set1_epi32 (1<<31));
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_conj_pch (__m256h __W, __mmask8 __U, __m256h __A)
+{
+  return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf)
+         _mm256_conj_pch (__A),
+        (__v8sf) __W,
+        (__mmask8) __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_conj_pch (__mmask8 __U, __m256h __A)
+{
+  return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf)
+         _mm256_conj_pch (__A),
+        (__v8sf)
+         _mm256_setzero_ps (),
+        (__mmask8) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_conj_pch (__m128h __A)
+{
+  return (__m128h) _mm_xor_epi32 ((__m128i) __A, _mm_set1_epi32 (1<<31));
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_conj_pch (__m128h __W, __mmask8 __U, __m128h __A)
+{
+  return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A),
+        (__v4sf) __W,
+        (__mmask8) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_conj_pch (__mmask8 __U, __m128h __A)
+{
+  return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A),
+        (__v4sf) _mm_setzero_ps (),
+        (__mmask8) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ph (__m128h __A, __m128h __B)
+{
+  return (__m128h) ((__v8hf) __A + (__v8hf) __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_ph (__m256h __A, __m256h __B)
+{
+  return (__m256h) ((__v16hf) __A + (__v16hf) __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_addph128_mask (__C, __D, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
+{
+  return __builtin_ia32_addph256_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_addph128_mask (__B, __C, _mm_setzero_ph (),
+           __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+  return __builtin_ia32_addph256_mask (__B, __C,
+           _mm256_setzero_ph (), __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ph (__m128h __A, __m128h __B)
+{
+  return (__m128h) ((__v8hf) __A - (__v8hf) __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_ph (__m256h __A, __m256h __B)
+{
+  return (__m256h) ((__v16hf) __A - (__v16hf) __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_subph128_mask (__C, __D, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
+{
+  return __builtin_ia32_subph256_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_subph128_mask (__B, __C, _mm_setzero_ph (),
+           __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+  return __builtin_ia32_subph256_mask (__B, __C,
+           _mm256_setzero_ph (), __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ph (__m128h __A, __m128h __B)
+{
+  return (__m128h) ((__v8hf) __A * (__v8hf) __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_ph (__m256h __A, __m256h __B)
+{
+  return (__m256h) ((__v16hf) __A * (__v16hf) __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_mulph128_mask (__C, __D, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
+{
+  return __builtin_ia32_mulph256_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_mulph128_mask (__B, __C, _mm_setzero_ph (),
+           __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+  return __builtin_ia32_mulph256_mask (__B, __C,
+           _mm256_setzero_ph (), __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ph (__m128h __A, __m128h __B)
+{
+  return (__m128h) ((__v8hf) __A / (__v8hf) __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_div_ph (__m256h __A, __m256h __B)
+{
+  return (__m256h) ((__v16hf) __A / (__v16hf) __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_divph128_mask (__C, __D, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
+{
+  return __builtin_ia32_divph256_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_divph128_mask (__B, __C, _mm_setzero_ph (),
+           __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+  return __builtin_ia32_divph256_mask (__B, __C,
+           _mm256_setzero_ph (), __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ph (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_maxph128_mask (__A, __B,
+           _mm_setzero_ph (),
+           (__mmask8) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_ph (__m256h __A, __m256h __B)
+{
+  return __builtin_ia32_maxph256_mask (__A, __B,
+           _mm256_setzero_ph (),
+           (__mmask16) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_maxph128_mask (__C, __D, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
+{
+  return __builtin_ia32_maxph256_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_maxph128_mask (__B, __C, _mm_setzero_ph (),
+           __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+  return __builtin_ia32_maxph256_mask (__B, __C,
+           _mm256_setzero_ph (), __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ph (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_minph128_mask (__A, __B,
+           _mm_setzero_ph (),
+           (__mmask8) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_ph (__m256h __A, __m256h __B)
+{
+  return __builtin_ia32_minph256_mask (__A, __B,
+           _mm256_setzero_ph (),
+           (__mmask16) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_minph128_mask (__C, __D, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
+{
+  return __builtin_ia32_minph256_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_minph128_mask (__B, __C, _mm_setzero_ph (),
+           __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+  return __builtin_ia32_minph256_mask (__B, __C,
+           _mm256_setzero_ph (), __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_ph (__m128h __A)
+{
+  return (__m128h) _mm_and_si128 ( _mm_set1_epi32 (0x7FFF7FFF),
+       (__m128i) __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_ph (__m256h __A)
+{
+  return (__m256h) _mm256_and_si256 ( _mm256_set1_epi32 (0x7FFF7FFF),
+          (__m256i) __A);
+}
+#ifdef __OPTIMIZE
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ph_mask (__m128h __A, __m128h __B, const int __C)
+{
+  return (__mmask8) __builtin_ia32_cmpph128_mask (__A, __B, __C,
+        (__mmask8) -1);
+}
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_ph_mask (__mmask8 __A, __m128h __B, __m128h __C,
+        const int __D)
+{
+  return (__mmask8) __builtin_ia32_cmpph128_mask (__B, __C, __D, __A);
+}
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ph_mask (__m256h __A, __m256h __B, const int __C)
+{
+  return (__mmask16) __builtin_ia32_cmpph256_mask (__A, __B, __C,
+         (__mmask16) -1);
+}
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_ph_mask (__mmask16 __A, __m256h __B, __m256h __C,
+        const int __D)
+{
+  return (__mmask16) __builtin_ia32_cmpph256_mask (__B, __C, __D,
+         __A);
+}
+#else
+#define _mm_cmp_ph_mask(A, B, C) (__builtin_ia32_cmpph128_mask ((A), (B), (C), (-1)))
+#define _mm_mask_cmp_ph_mask(A, B, C, D) (__builtin_ia32_cmpph128_mask ((B), (C), (D), (A)))
+#define _mm256_cmp_ph_mask(A, B, C) (__builtin_ia32_cmpph256_mask ((A), (B), (C), (-1)))
+#define _mm256_mask_cmp_ph_mask(A, B, C, D) (__builtin_ia32_cmpph256_mask ((B), (C), (D), (A)))
+#endif
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ph (__m128h __A)
+{
+  return __builtin_ia32_sqrtph128_mask (__A, _mm_setzero_ph (),
+     (__mmask8) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_ph (__m256h __A)
+{
+  return __builtin_ia32_sqrtph256_mask (__A, _mm256_setzero_ph (),
+     (__mmask16) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_sqrtph128_mask (__C, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
+{
+  return __builtin_ia32_sqrtph256_mask (__C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_ph (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_sqrtph128_mask (__B, _mm_setzero_ph (),
+     __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sqrt_ph (__mmask16 __A, __m256h __B)
+{
+  return __builtin_ia32_sqrtph256_mask (__B, _mm256_setzero_ph (),
+     __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ph (__m128h __A)
+{
+  return __builtin_ia32_rsqrtph128_mask (__A, _mm_setzero_ph (),
+      (__mmask8) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rsqrt_ph (__m256h __A)
+{
+  return __builtin_ia32_rsqrtph256_mask (__A, _mm256_setzero_ph (),
+      (__mmask16) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_rsqrtph128_mask (__C, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rsqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
+{
+  return __builtin_ia32_rsqrtph256_mask (__C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt_ph (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_rsqrtph128_mask (__B, _mm_setzero_ph (), __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B)
+{
+  return __builtin_ia32_rsqrtph256_mask (__B, _mm256_setzero_ph (),
+      __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ph (__m128h __A)
+{
+  return __builtin_ia32_rcpph128_mask (__A, _mm_setzero_ph (),
+           (__mmask8) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rcp_ph (__m256h __A)
+{
+  return __builtin_ia32_rcpph256_mask (__A, _mm256_setzero_ph (),
+           (__mmask16) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_rcpph128_mask (__C, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C)
+{
+  return __builtin_ia32_rcpph256_mask (__C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp_ph (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_rcpph128_mask (__B, _mm_setzero_ph (), __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B)
+{
+  return __builtin_ia32_rcpph256_mask (__B, _mm256_setzero_ph (),
+           __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_ph (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_scalefph128_mask (__A, __B,
+       _mm_setzero_ph (),
+       (__mmask8) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_scalef_ph (__m256h __A, __m256h __B)
+{
+  return __builtin_ia32_scalefph256_mask (__A, __B,
+       _mm256_setzero_ph (),
+       (__mmask16) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_scalefph128_mask (__C, __D, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C,
+         __m256h __D)
+{
+  return __builtin_ia32_scalefph256_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_scalefph128_mask (__B, __C,
+       _mm_setzero_ph (), __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+  return __builtin_ia32_scalefph256_mask (__B, __C,
+       _mm256_setzero_ph (),
+       __A);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_ph (__m128h __A, int __B)
+{
+  return __builtin_ia32_reduceph128_mask (__A, __B,
+       _mm_setzero_ph (),
+       (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
+{
+  return __builtin_ia32_reduceph128_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C)
+{
+  return __builtin_ia32_reduceph128_mask (__B, __C,
+       _mm_setzero_ph (), __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_ph (__m256h __A, int __B)
+{
+  return __builtin_ia32_reduceph256_mask (__A, __B,
+       _mm256_setzero_ph (),
+       (__mmask16) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D)
+{
+  return __builtin_ia32_reduceph256_mask (__C, __D, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C)
+{
+  return __builtin_ia32_reduceph256_mask (__B, __C,
+       _mm256_setzero_ph (),
+       __A);
+}
+#else
+#define _mm_reduce_ph(A, B) (__builtin_ia32_reduceph128_mask ((A), (B), _mm_setzero_ph (), ((__mmask8)-1)))
+#define _mm_mask_reduce_ph(A, B, C, D) (__builtin_ia32_reduceph128_mask ((C), (D), (A), (B)))
+#define _mm_maskz_reduce_ph(A, B, C) (__builtin_ia32_reduceph128_mask ((B), (C), _mm_setzero_ph (), (A)))
+#define _mm256_reduce_ph(A, B) (__builtin_ia32_reduceph256_mask ((A), (B), _mm256_setzero_ph (), ((__mmask16)-1)))
+#define _mm256_mask_reduce_ph(A, B, C, D) (__builtin_ia32_reduceph256_mask ((C), (D), (A), (B)))
+#define _mm256_maskz_reduce_ph(A, B, C) (__builtin_ia32_reduceph256_mask ((B), (C), _mm256_setzero_ph (), (A)))
+#endif
+#ifdef __OPTIMIZE__
+  extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+  _mm_roundscale_ph (__m128h __A, int __B)
+  {
+    return __builtin_ia32_rndscaleph128_mask (__A, __B,
+           _mm_setzero_ph (),
+           (__mmask8) -1);
+  }
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
+{
+  return __builtin_ia32_rndscaleph128_mask (__C, __D, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C)
+{
+  return __builtin_ia32_rndscaleph128_mask (__B, __C,
+         _mm_setzero_ph (), __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_roundscale_ph (__m256h __A, int __B)
+{
+  return __builtin_ia32_rndscaleph256_mask (__A, __B,
+         _mm256_setzero_ph (),
+         (__mmask16) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C,
+      int __D)
+{
+  return __builtin_ia32_rndscaleph256_mask (__C, __D, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
+{
+  return __builtin_ia32_rndscaleph256_mask (__B, __C,
+         _mm256_setzero_ph (),
+         __A);
+}
+#else
+#define _mm_roundscale_ph(A, B) (__builtin_ia32_rndscaleph128_mask ((A), (B), _mm_setzero_ph (), ((__mmask8)-1)))
+#define _mm_mask_roundscale_ph(A, B, C, D) (__builtin_ia32_rndscaleph128_mask ((C), (D), (A), (B)))
+#define _mm_maskz_roundscale_ph(A, B, C) (__builtin_ia32_rndscaleph128_mask ((B), (C), _mm_setzero_ph (), (A)))
+#define _mm256_roundscale_ph(A, B) (__builtin_ia32_rndscaleph256_mask ((A), (B), _mm256_setzero_ph(), ((__mmask16)-1)))
+#define _mm256_mask_roundscale_ph(A, B, C, D) (__builtin_ia32_rndscaleph256_mask ((C), (D), (A), (B)))
+#define _mm256_maskz_roundscale_ph(A, B, C) (__builtin_ia32_rndscaleph256_mask ((B), (C), _mm256_setzero_ph (), (A)))
+#endif
+#ifdef __OPTIMIZE__
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+  _mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm)
+{
+  return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
+            __imm, __U);
+}
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fpclass_ph_mask (__m128h __A, const int __imm)
+{
+  return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
+            __imm,
+            (__mmask8) -1);
+}
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fpclass_ph_mask (__mmask16 __U, __m256h __A, const int __imm)
+{
+  return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
+             __imm, __U);
+}
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fpclass_ph_mask (__m256h __A, const int __imm)
+{
+  return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
+             __imm,
+             (__mmask16) -1);
+}
+#else
+#define _mm_fpclass_ph_mask(X, C) ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X), (int) (C),(__mmask8)-1))
+#define _mm_mask_fpclass_ph_mask(u, X, C) ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X), (int) (C),(__mmask8)(u)))
+#define _mm256_fpclass_ph_mask(X, C) ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), (int) (C),(__mmask16)-1))
+#define _mm256_mask_fpclass_ph_mask(u, X, C) ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), (int) (C),(__mmask16)(u)))
+#endif
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_getexp_ph (__m256h __A)
+{
+  return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
+          (__v16hf)
+          _mm256_setzero_ph (),
+          (__mmask16) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_getexp_ph (__m256h __W, __mmask16 __U, __m256h __A)
+{
+  return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
+          (__v16hf) __W,
+          (__mmask16) __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_getexp_ph (__mmask16 __U, __m256h __A)
+{
+  return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
+          (__v16hf)
+          _mm256_setzero_ph (),
+          (__mmask16) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_ph (__m128h __A)
+{
+  return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
+          (__v8hf)
+          _mm_setzero_ph (),
+          (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_ph (__m128h __W, __mmask8 __U, __m128h __A)
+{
+  return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
+          (__v8hf) __W,
+          (__mmask8) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_ph (__mmask8 __U, __m128h __A)
+{
+  return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
+          (__v8hf)
+          _mm_setzero_ph (),
+          (__mmask8) __U);
+}
+#ifdef __OPTIMIZE__
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_getmant_ph (__m256h __A, _MM_MANTISSA_NORM_ENUM __B,
+     _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
+           (__C << 2) | __B,
+           (__v16hf)
+           _mm256_setzero_ph (),
+           (__mmask16) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_getmant_ph (__m256h __W, __mmask16 __U, __m256h __A,
+   _MM_MANTISSA_NORM_ENUM __B,
+   _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
+           (__C << 2) | __B,
+           (__v16hf) __W,
+           (__mmask16) __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_getmant_ph (__mmask16 __U, __m256h __A,
+    _MM_MANTISSA_NORM_ENUM __B,
+    _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
+           (__C << 2) | __B,
+           (__v16hf)
+           _mm256_setzero_ph (),
+           (__mmask16) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_ph (__m128h __A, _MM_MANTISSA_NORM_ENUM __B,
+  _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
+           (__C << 2) | __B,
+           (__v8hf)
+           _mm_setzero_ph (),
+           (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_ph (__m128h __W, __mmask8 __U, __m128h __A,
+       _MM_MANTISSA_NORM_ENUM __B,
+       _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
+           (__C << 2) | __B,
+           (__v8hf) __W,
+           (__mmask8) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_ph (__mmask8 __U, __m128h __A,
+        _MM_MANTISSA_NORM_ENUM __B,
+        _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
+           (__C << 2) | __B,
+           (__v8hf)
+           _mm_setzero_ph (),
+           (__mmask8) __U);
+}
+#else
+#define _mm256_getmant_ph(X, B, C) ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), (int)(((C)<<2) | (B)), (__v16hf)(__m256h)_mm256_setzero_ph (), (__mmask16)-1))
+#define _mm256_mask_getmant_ph(W, U, X, B, C) ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), (int)(((C)<<2) | (B)), (__v16hf)(__m256h)(W), (__mmask16)(U)))
+#define _mm256_maskz_getmant_ph(U, X, B, C) ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), (int)(((C)<<2) | (B)), (__v16hf)(__m256h)_mm256_setzero_ph (), (__mmask16)(U)))
+#define _mm_getmant_ph(X, B, C) ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), (int)(((C)<<2) | (B)), (__v8hf)(__m128h)_mm_setzero_ph (), (__mmask8)-1))
+#define _mm_mask_getmant_ph(W, U, X, B, C) ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), (int)(((C)<<2) | (B)), (__v8hf)(__m128h)(W), (__mmask8)(U)))
+#define _mm_maskz_getmant_ph(U, X, B, C) ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), (int)(((C)<<2) | (B)), (__v8hf)(__m128h)_mm_setzero_ph (), (__mmask8)(U)))
+#endif
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtph_epi32 (__m128h __A)
+{
+  return (__m128i)
+    __builtin_ia32_vcvtph2dq128_mask (__A,
+          (__v4si)
+          _mm_setzero_si128 (),
+          (__mmask8) -1);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtph_epi32 (__m128i __A, __mmask8 __B, __m128h __C)
+{
+  return (__m128i)
+    __builtin_ia32_vcvtph2dq128_mask (__C, ( __v4si) __A, __B);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B)
+{
+  return (__m128i)
+    __builtin_ia32_vcvtph2dq128_mask (__B,
+          (__v4si) _mm_setzero_si128 (),
+          __A);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtph_epi32 (__m128h __A)
+{
+  return (__m256i)
+    __builtin_ia32_vcvtph2dq256_mask (__A,
+          (__v8si)
+          _mm256_setzero_si256 (),
+          (__mmask8) -1);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtph_epi32 (__m256i __A, __mmask8 __B, __m128h __C)
+{
+  return (__m256i)
+    __builtin_ia32_vcvtph2dq256_mask (__C, ( __v8si) __A, __B);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B)
+{
+  return (__m256i)
+    __builtin_ia32_vcvtph2dq256_mask (__B,
+          (__v8si)
+          _mm256_setzero_si256 (),
+          __A);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtph_epu32 (__m128h __A)
+{
+  return (__m128i)
+    __builtin_ia32_vcvtph2udq128_mask (__A,
+           (__v4si)
+           _mm_setzero_si128 (),
+           (__mmask8) -1);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtph_epu32 (__m128i __A, __mmask8 __B, __m128h __C)
+{
+  return (__m128i)
+    __builtin_ia32_vcvtph2udq128_mask (__C, ( __v4si) __A, __B);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
+{
+  return (__m128i)
+    __builtin_ia32_vcvtph2udq128_mask (__B,
+           (__v4si)
+           _mm_setzero_si128 (),
+           __A);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtph_epu32 (__m128h __A)
+{
+  return (__m256i)
+    __builtin_ia32_vcvtph2udq256_mask (__A,
+           (__v8si)
+           _mm256_setzero_si256 (),
+           (__mmask8) -1);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtph_epu32 (__m256i __A, __mmask8 __B, __m128h __C)
+{
+  return (__m256i)
+    __builtin_ia32_vcvtph2udq256_mask (__C, ( __v8si) __A, __B);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
+{
+  return (__m256i)
+    __builtin_ia32_vcvtph2udq256_mask (__B,
+           (__v8si) _mm256_setzero_si256 (),
+           __A);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttph_epi32 (__m128h __A)
+{
+  return (__m128i)
+    __builtin_ia32_vcvttph2dq128_mask (__A,
+           (__v4si) _mm_setzero_si128 (),
+           (__mmask8) -1);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttph_epi32 (__m128i __A, __mmask8 __B, __m128h __C)
+{
+  return (__m128i)__builtin_ia32_vcvttph2dq128_mask (__C,
+           ( __v4si) __A,
+           __B);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B)
+{
+  return (__m128i)
+    __builtin_ia32_vcvttph2dq128_mask (__B,
+           (__v4si) _mm_setzero_si128 (),
+           __A);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttph_epi32 (__m128h __A)
+{
+  return (__m256i)
+    __builtin_ia32_vcvttph2dq256_mask (__A,
+           (__v8si)
+           _mm256_setzero_si256 (),
+           (__mmask8) -1);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttph_epi32 (__m256i __A, __mmask8 __B, __m128h __C)
+{
+  return (__m256i)
+    __builtin_ia32_vcvttph2dq256_mask (__C,
+           ( __v8si) __A,
+           __B);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B)
+{
+  return (__m256i)
+    __builtin_ia32_vcvttph2dq256_mask (__B,
+           (__v8si)
+           _mm256_setzero_si256 (),
+           __A);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttph_epu32 (__m128h __A)
+{
+  return (__m128i)
+    __builtin_ia32_vcvttph2udq128_mask (__A,
+     (__v4si)
+     _mm_setzero_si128 (),
+     (__mmask8) -1);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttph_epu32 (__m128i __A, __mmask8 __B, __m128h __C)
+{
+  return (__m128i)
+    __builtin_ia32_vcvttph2udq128_mask (__C,
+     ( __v4si) __A,
+     __B);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B)
+{
+  return (__m128i)
+    __builtin_ia32_vcvttph2udq128_mask (__B,
+     (__v4si)
+     _mm_setzero_si128 (),
+     __A);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttph_epu32 (__m128h __A)
+{
+  return (__m256i)
+    __builtin_ia32_vcvttph2udq256_mask (__A,
+     (__v8si)
+     _mm256_setzero_si256 (), (__mmask8) -1);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttph_epu32 (__m256i __A, __mmask8 __B, __m128h __C)
+{
+  return (__m256i)
+    __builtin_ia32_vcvttph2udq256_mask (__C,
+     ( __v8si) __A,
+     __B);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B)
+{
+  return (__m256i)
+    __builtin_ia32_vcvttph2udq256_mask (__B,
+     (__v8si)
+     _mm256_setzero_si256 (),
+     __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_ph (__m128i __A)
+{
+  return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __A,
+        _mm_setzero_ph (),
+        (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m128i __C)
+{
+  return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi32_ph (__mmask8 __A, __m128i __B)
+{
+  return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __B,
+        _mm_setzero_ph (),
+        __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_ph (__m256i __A)
+{
+  return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __A,
+        _mm_setzero_ph (),
+        (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m256i __C)
+{
+  return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi32_ph (__mmask8 __A, __m256i __B)
+{
+  return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __B,
+        _mm_setzero_ph (),
+        __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu32_ph (__m128i __A)
+{
+  return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __A,
+         _mm_setzero_ph (),
+         (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m128i __C)
+{
+  return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __C,
+         __A,
+         __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu32_ph (__mmask8 __A, __m128i __B)
+{
+  return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __B,
+         _mm_setzero_ph (),
+         __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu32_ph (__m256i __A)
+{
+  return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __A,
+         _mm_setzero_ph (),
+         (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m256i __C)
+{
+  return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu32_ph (__mmask8 __A, __m256i __B)
+{
+  return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __B,
+         _mm_setzero_ph (),
+         __A);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtph_epi64 (__m128h __A)
+{
+  return
+    __builtin_ia32_vcvtph2qq128_mask (__A,
+          _mm_setzero_si128 (),
+          (__mmask8) -1);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtph_epi64 (__m128i __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtph2qq128_mask (__C, __A, __B);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtph2qq128_mask (__B,
+        _mm_setzero_si128 (),
+        __A);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtph_epi64 (__m128h __A)
+{
+  return __builtin_ia32_vcvtph2qq256_mask (__A,
+        _mm256_setzero_si256 (),
+        (__mmask8) -1);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtph_epi64 (__m256i __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtph2qq256_mask (__C, __A, __B);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtph2qq256_mask (__B,
+        _mm256_setzero_si256 (),
+        __A);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtph_epu64 (__m128h __A)
+{
+  return __builtin_ia32_vcvtph2uqq128_mask (__A,
+         _mm_setzero_si128 (),
+         (__mmask8) -1);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtph_epu64 (__m128i __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtph2uqq128_mask (__C, __A, __B);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtph2uqq128_mask (__B,
+         _mm_setzero_si128 (),
+         __A);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtph_epu64 (__m128h __A)
+{
+  return __builtin_ia32_vcvtph2uqq256_mask (__A,
+         _mm256_setzero_si256 (),
+         (__mmask8) -1);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtph_epu64 (__m256i __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtph2uqq256_mask (__C, __A, __B);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtph2uqq256_mask (__B,
+         _mm256_setzero_si256 (),
+         __A);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttph_epi64 (__m128h __A)
+{
+  return __builtin_ia32_vcvttph2qq128_mask (__A,
+         _mm_setzero_si128 (),
+         (__mmask8) -1);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttph_epi64 (__m128i __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvttph2qq128_mask (__C,
+         __A,
+         __B);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvttph2qq128_mask (__B,
+         _mm_setzero_si128 (),
+         __A);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttph_epi64 (__m128h __A)
+{
+  return __builtin_ia32_vcvttph2qq256_mask (__A,
+         _mm256_setzero_si256 (),
+         (__mmask8) -1);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttph_epi64 (__m256i __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvttph2qq256_mask (__C,
+         __A,
+         __B);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvttph2qq256_mask (__B,
+         _mm256_setzero_si256 (),
+         __A);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttph_epu64 (__m128h __A)
+{
+  return __builtin_ia32_vcvttph2uqq128_mask (__A,
+          _mm_setzero_si128 (),
+          (__mmask8) -1);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttph_epu64 (__m128i __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvttph2uqq128_mask (__C,
+          __A,
+          __B);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvttph2uqq128_mask (__B,
+          _mm_setzero_si128 (),
+          __A);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttph_epu64 (__m128h __A)
+{
+  return __builtin_ia32_vcvttph2uqq256_mask (__A,
+          _mm256_setzero_si256 (),
+          (__mmask8) -1);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttph_epu64 (__m256i __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvttph2uqq256_mask (__C,
+          __A,
+          __B);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvttph2uqq256_mask (__B,
+          _mm256_setzero_si256 (),
+          __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi64_ph (__m128i __A)
+{
+  return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __A,
+        _mm_setzero_ph (),
+        (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m128i __C)
+{
+  return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi64_ph (__mmask8 __A, __m128i __B)
+{
+  return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __B,
+        _mm_setzero_ph (),
+        __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi64_ph (__m256i __A)
+{
+  return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __A,
+        _mm_setzero_ph (),
+        (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m256i __C)
+{
+  return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi64_ph (__mmask8 __A, __m256i __B)
+{
+  return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __B,
+        _mm_setzero_ph (),
+        __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu64_ph (__m128i __A)
+{
+  return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __A,
+         _mm_setzero_ph (),
+         (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m128i __C)
+{
+  return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu64_ph (__mmask8 __A, __m128i __B)
+{
+  return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __B,
+         _mm_setzero_ph (),
+         __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu64_ph (__m256i __A)
+{
+  return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __A,
+         _mm_setzero_ph (),
+         (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m256i __C)
+{
+  return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu64_ph (__mmask8 __A, __m256i __B)
+{
+  return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __B,
+         _mm_setzero_ph (),
+         __A);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtph_epi16 (__m128h __A)
+{
+  return (__m128i)
+    __builtin_ia32_vcvtph2w128_mask (__A,
+         (__v8hi)
+         _mm_setzero_si128 (),
+         (__mmask8) -1);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtph_epi16 (__m128i __A, __mmask8 __B, __m128h __C)
+{
+  return (__m128i)
+    __builtin_ia32_vcvtph2w128_mask (__C, ( __v8hi) __A, __B);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtph_epi16 (__mmask8 __A, __m128h __B)
+{
+  return (__m128i)
+    __builtin_ia32_vcvtph2w128_mask (__B,
+         (__v8hi)
+         _mm_setzero_si128 (),
+         __A);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtph_epi16 (__m256h __A)
+{
+  return (__m256i)
+    __builtin_ia32_vcvtph2w256_mask (__A,
+         (__v16hi)
+         _mm256_setzero_si256 (),
+         (__mmask16) -1);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtph_epi16 (__m256i __A, __mmask16 __B, __m256h __C)
+{
+  return (__m256i)
+    __builtin_ia32_vcvtph2w256_mask (__C, ( __v16hi) __A, __B);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtph_epi16 (__mmask16 __A, __m256h __B)
+{
+  return (__m256i)
+    __builtin_ia32_vcvtph2w256_mask (__B,
+         (__v16hi)
+         _mm256_setzero_si256 (),
+         __A);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtph_epu16 (__m128h __A)
+{
+  return (__m128i)
+    __builtin_ia32_vcvtph2uw128_mask (__A,
+          (__v8hi)
+          _mm_setzero_si128 (),
+          (__mmask8) -1);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtph_epu16 (__m128i __A, __mmask8 __B, __m128h __C)
+{
+  return (__m128i)
+    __builtin_ia32_vcvtph2uw128_mask (__C, ( __v8hi) __A, __B);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtph_epu16 (__mmask8 __A, __m128h __B)
+{
+  return (__m128i)
+    __builtin_ia32_vcvtph2uw128_mask (__B,
+          (__v8hi)
+          _mm_setzero_si128 (),
+          __A);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtph_epu16 (__m256h __A)
+{
+  return (__m256i)
+    __builtin_ia32_vcvtph2uw256_mask (__A,
+          (__v16hi)
+          _mm256_setzero_si256 (),
+          (__mmask16) -1);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtph_epu16 (__m256i __A, __mmask16 __B, __m256h __C)
+{
+  return (__m256i)
+    __builtin_ia32_vcvtph2uw256_mask (__C, ( __v16hi) __A, __B);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtph_epu16 (__mmask16 __A, __m256h __B)
+{
+  return (__m256i)
+    __builtin_ia32_vcvtph2uw256_mask (__B,
+          (__v16hi)
+          _mm256_setzero_si256 (),
+          __A);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttph_epi16 (__m128h __A)
+{
+  return (__m128i)
+    __builtin_ia32_vcvttph2w128_mask (__A,
+          (__v8hi)
+          _mm_setzero_si128 (),
+          (__mmask8) -1);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttph_epi16 (__m128i __A, __mmask8 __B, __m128h __C)
+{
+  return (__m128i)
+    __builtin_ia32_vcvttph2w128_mask (__C,
+          ( __v8hi) __A,
+          __B);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttph_epi16 (__mmask8 __A, __m128h __B)
+{
+  return (__m128i)
+    __builtin_ia32_vcvttph2w128_mask (__B,
+          (__v8hi)
+          _mm_setzero_si128 (),
+          __A);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttph_epi16 (__m256h __A)
+{
+  return (__m256i)
+    __builtin_ia32_vcvttph2w256_mask (__A,
+          (__v16hi)
+          _mm256_setzero_si256 (),
+          (__mmask16) -1);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttph_epi16 (__m256i __A, __mmask16 __B, __m256h __C)
+{
+  return (__m256i)
+    __builtin_ia32_vcvttph2w256_mask (__C,
+          ( __v16hi) __A,
+          __B);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttph_epi16 (__mmask16 __A, __m256h __B)
+{
+  return (__m256i)
+    __builtin_ia32_vcvttph2w256_mask (__B,
+          (__v16hi)
+          _mm256_setzero_si256 (),
+          __A);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttph_epu16 (__m128h __A)
+{
+  return (__m128i)
+    __builtin_ia32_vcvttph2uw128_mask (__A,
+           (__v8hi)
+           _mm_setzero_si128 (),
+           (__mmask8) -1);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttph_epu16 (__m128i __A, __mmask8 __B, __m128h __C)
+{
+  return (__m128i)
+    __builtin_ia32_vcvttph2uw128_mask (__C,
+           ( __v8hi) __A,
+           __B);
+}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttph_epu16 (__mmask8 __A, __m128h __B)
+{
+  return (__m128i)
+    __builtin_ia32_vcvttph2uw128_mask (__B,
+           (__v8hi)
+           _mm_setzero_si128 (),
+           __A);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttph_epu16 (__m256h __A)
+{
+  return (__m256i)
+    __builtin_ia32_vcvttph2uw256_mask (__A,
+           (__v16hi)
+           _mm256_setzero_si256 (),
+           (__mmask16) -1);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttph_epu16 (__m256i __A, __mmask16 __B, __m256h __C)
+{
+  return (__m256i)
+    __builtin_ia32_vcvttph2uw256_mask (__C,
+           ( __v16hi) __A,
+           __B);
+}
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttph_epu16 (__mmask16 __A, __m256h __B)
+{
+  return (__m256i)
+    __builtin_ia32_vcvttph2uw256_mask (__B,
+           (__v16hi) _mm256_setzero_si256 (),
+           __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_ph (__m128i __A)
+{
+  return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __A,
+       _mm_setzero_ph (),
+       (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi16_ph (__m128h __A, __mmask8 __B, __m128i __C)
+{
+  return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __C,
+       __A,
+       __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi16_ph (__mmask8 __A, __m128i __B)
+{
+  return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __B,
+       _mm_setzero_ph (),
+       __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi16_ph (__m256i __A)
+{
+  return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __A,
+       _mm256_setzero_ph (),
+       (__mmask16) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi16_ph (__m256h __A, __mmask16 __B, __m256i __C)
+{
+  return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __C,
+       __A,
+       __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi16_ph (__mmask16 __A, __m256i __B)
+{
+  return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __B,
+       _mm256_setzero_ph (),
+       __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_ph (__m128i __A)
+{
+  return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __A,
+        _mm_setzero_ph (),
+        (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu16_ph (__m128h __A, __mmask8 __B, __m128i __C)
+{
+  return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu16_ph (__mmask8 __A, __m128i __B)
+{
+  return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __B,
+        _mm_setzero_ph (),
+        __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu16_ph (__m256i __A)
+{
+  return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __A,
+        _mm256_setzero_ph (),
+        (__mmask16) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu16_ph (__m256h __A, __mmask16 __B, __m256i __C)
+{
+  return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __C, __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu16_ph (__mmask16 __A, __m256i __B)
+{
+  return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __B,
+        _mm256_setzero_ph (),
+        __A);
+}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtph_pd (__m128h __A)
+{
+  return __builtin_ia32_vcvtph2pd128_mask (__A,
+        _mm_setzero_pd (),
+        (__mmask8) -1);
+}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtph_pd (__m128d __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtph2pd128_mask (__C, __A, __B);
+}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtph2pd128_mask (__B, _mm_setzero_pd (), __A);
+}
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtph_pd (__m128h __A)
+{
+  return __builtin_ia32_vcvtph2pd256_mask (__A,
+        _mm256_setzero_pd (),
+        (__mmask8) -1);
+}
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtph_pd (__m256d __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtph2pd256_mask (__C, __A, __B);
+}
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtph2pd256_mask (__B,
+        _mm256_setzero_pd (),
+        __A);
+}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtxph_ps (__m128h __A)
+{
+  return __builtin_ia32_vcvtph2psx128_mask (__A,
+        _mm_setzero_ps (),
+        (__mmask8) -1);
+}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtxph_ps (__m128 __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtph2psx128_mask (__C, __A, __B);
+}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtxph_ps (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtph2psx128_mask (__B, _mm_setzero_ps (), __A);
+}
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtxph_ps (__m128h __A)
+{
+  return __builtin_ia32_vcvtph2psx256_mask (__A,
+         _mm256_setzero_ps (),
+         (__mmask8) -1);
+}
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtxph_ps (__m256 __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtph2psx256_mask (__C, __A, __B);
+}
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtxph_ps (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtph2psx256_mask (__B,
+         _mm256_setzero_ps (),
+         __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtxps_ph (__m128 __A)
+{
+  return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __A,
+         _mm_setzero_ph (),
+         (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m128 __C)
+{
+  return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtxps_ph (__mmask8 __A, __m128 __B)
+{
+  return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __B,
+         _mm_setzero_ph (),
+         __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtxps_ph (__m256 __A)
+{
+  return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __A,
+         _mm_setzero_ph (),
+         (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m256 __C)
+{
+  return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtxps_ph (__mmask8 __A, __m256 __B)
+{
+  return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __B,
+         _mm_setzero_ph (),
+         __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_ph (__m128d __A)
+{
+  return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __A,
+        _mm_setzero_ph (),
+        (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m128d __C)
+{
+  return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpd_ph (__mmask8 __A, __m128d __B)
+{
+  return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __B,
+        _mm_setzero_ph (),
+        __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_ph (__m256d __A)
+{
+  return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __A,
+        _mm_setzero_ph (),
+        (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m256d __C)
+{
+  return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __C, __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpd_ph (__mmask8 __A, __m256d __B)
+{
+  return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __B,
+        _mm_setzero_ph (),
+        __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C)
+{
+  return (__m256h)__builtin_ia32_vfmaddsubph256_mask ((__v16hf)__A,
+            (__v16hf)__B,
+            (__v16hf)__C,
+            (__mmask16)-1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmaddsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
+    __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmaddsubph256_mask ((__v16hf) __A,
+             (__v16hf) __B,
+             (__v16hf) __C,
+             (__mmask16) __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C,
+     __mmask16 __U)
+{
+  return (__m256h) __builtin_ia32_vfmaddsubph256_mask3 ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C,
+       (__mmask16)
+       __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmaddsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
+     __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmaddsubph256_maskz ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C,
+       (__mmask16)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C)
+{
+  return (__m128h)__builtin_ia32_vfmaddsubph128_mask ((__v8hf)__A,
+            (__v8hf)__B,
+            (__v8hf)__C,
+            (__mmask8)-1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmaddsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
+        __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmaddsubph128_mask ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf) __C,
+             (__mmask8) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C,
+         __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfmaddsubph128_mask3 ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       (__mmask8)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmaddsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
+         __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmaddsubph128_maskz ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       (__mmask8)
+       __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A,
+             (__v16hf) __B,
+             (__v16hf) __C,
+             (__mmask16) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsubadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
+    __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A,
+             (__v16hf) __B,
+             (__v16hf) __C,
+             (__mmask16) __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C,
+     __mmask16 __U)
+{
+  return (__m256h) __builtin_ia32_vfmsubaddph256_mask3 ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C,
+       (__mmask16)
+       __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmsubadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
+     __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmsubaddph256_maskz ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C,
+       (__mmask16)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf) __C,
+             (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsubadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
+        __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf) __C,
+             (__mmask8) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C,
+         __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfmsubaddph128_mask3 ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       (__mmask8)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsubadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
+         __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmsubaddph128_maskz ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       (__mmask8)
+       __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_ph (__m256h __A, __m256h __B, __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A,
+             (__v16hf) __B,
+             (__v16hf) __C,
+             (__mmask16) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
+    __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A,
+             (__v16hf) __B,
+             (__v16hf) __C,
+             (__mmask16) __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmadd_ph (__m256h __A, __m256h __B, __m256h __C,
+     __mmask16 __U)
+{
+  return (__m256h) __builtin_ia32_vfmaddph256_mask3 ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C,
+       (__mmask16)
+       __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
+     __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmaddph256_maskz ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C,
+       (__mmask16)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_ph (__m128h __A, __m128h __B, __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf) __C,
+             (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
+        __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf) __C,
+             (__mmask8) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_ph (__m128h __A, __m128h __B, __m128h __C,
+         __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfmaddph128_mask3 ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       (__mmask8)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
+         __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmaddph128_maskz ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       (__mmask8)
+       __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A,
+             (__v16hf) __B,
+             (__v16hf) __C,
+             (__mmask16) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
+    __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A,
+             (__v16hf) __B,
+             (__v16hf) __C,
+             (__mmask16) __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C,
+     __mmask16 __U)
+{
+  return (__m256h) __builtin_ia32_vfnmaddph256_mask3 ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C,
+       (__mmask16)
+       __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
+     __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfnmaddph256_maskz ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C,
+       (__mmask16)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf) __C,
+             (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
+        __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf) __C,
+             (__mmask8) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C,
+         __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfnmaddph128_mask3 ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       (__mmask8)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
+         __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfnmaddph128_maskz ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       (__mmask8)
+       __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsub_ph (__m256h __A, __m256h __B, __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A,
+             (__v16hf) __B,
+             (__v16hf) __C,
+             (__mmask16) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
+    __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A,
+             (__v16hf) __B,
+             (__v16hf) __C,
+             (__mmask16) __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmsub_ph (__m256h __A, __m256h __B, __m256h __C,
+     __mmask16 __U)
+{
+  return (__m256h) __builtin_ia32_vfmsubph256_mask3 ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C,
+       (__mmask16)
+       __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
+     __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmsubph256_maskz ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C,
+       (__mmask16)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_ph (__m128h __A, __m128h __B, __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf) __C,
+             (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
+        __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf) __C,
+             (__mmask8) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_ph (__m128h __A, __m128h __B, __m128h __C,
+         __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfmsubph128_mask3 ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       (__mmask8)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
+         __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmsubph128_maskz ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       (__mmask8)
+       __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A,
+             (__v16hf) __B,
+             (__v16hf) __C,
+             (__mmask16) -1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
+    __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A,
+             (__v16hf) __B,
+             (__v16hf) __C,
+             (__mmask16) __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C,
+     __mmask16 __U)
+{
+  return (__m256h) __builtin_ia32_vfnmsubph256_mask3 ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C,
+       (__mmask16)
+       __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
+     __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfnmsubph256_maskz ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C,
+       (__mmask16)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf) __C,
+             (__mmask8) -1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
+        __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A,
+             (__v8hf) __B,
+             (__v8hf) __C,
+             (__mmask8) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C,
+         __mmask8 __U)
+{
+  return (__m128h) __builtin_ia32_vfnmsubph128_mask3 ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       (__mmask8)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
+         __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfnmsubph128_maskz ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C,
+       (__mmask8)
+       __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_pch (__m128h __A, __m128h __B, __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmaddcph128 ((__v8hf) __A,
+      (__v8hf) __B,
+      (__v8hf) __C);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfmaddcph128_mask ((__v8hf) __A,
+          (__v8hf) __C,
+          (__v8hf) __D, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_pch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfmaddcph128_mask3 ((__v8hf) __A,
+           (__v8hf) __B,
+           (__v8hf) __C, __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
+{
+  return (__m128h) __builtin_ia32_vfmaddcph128_maskz ((__v8hf) __B,
+            (__v8hf) __C,
+            (__v8hf) __D, __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_pch (__m256h __A, __m256h __B, __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmaddcph256 ((__v16hf) __A,
+      (__v16hf) __B,
+      (__v16hf) __C);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
+{
+  return (__m256h)
+     __builtin_ia32_vfmaddcph256_mask ((__v16hf) __A,
+           (__v16hf) __C,
+           (__v16hf) __D, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmadd_pch (__m256h __A, __m256h __B, __m256h __C, __mmask8 __D)
+{
+  return (__m256h)
+    __builtin_ia32_vfmaddcph256_mask3 ((__v16hf) __A,
+           (__v16hf) __B,
+           (__v16hf) __C, __D);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D)
+{
+  return (__m256h)__builtin_ia32_vfmaddcph256_maskz ((__v16hf) __B,
+           (__v16hf) __C,
+           (__v16hf) __D, __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfcmaddcph128 ((__v8hf) __A,
+       (__v8hf) __B,
+       (__v8hf) __C);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fcmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return (__m128h)
+     __builtin_ia32_vfcmaddcph128_mask ((__v8hf) __A,
+     (__v8hf) __C,
+     (__v8hf) __D, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
+{
+  return (__m128h)
+    __builtin_ia32_vfcmaddcph128_mask3 ((__v8hf) __A,
+     (__v8hf) __B,
+     (__v8hf) __C, __D);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fcmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
+{
+  return (__m128h)__builtin_ia32_vfcmaddcph128_maskz ((__v8hf) __B,
+            (__v8hf) __C,
+            (__v8hf) __D, __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfcmaddcph256 ((__v16hf) __A,
+       (__v16hf) __B,
+       (__v16hf) __C);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fcmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
+{
+  return (__m256h)
+     __builtin_ia32_vfcmaddcph256_mask ((__v16hf) __A,
+     (__v16hf) __C,
+     (__v16hf) __D, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C, __mmask8 __D)
+{
+  return (__m256h)
+    __builtin_ia32_vfcmaddcph256_mask3 ((__v16hf) __A,
+     (__v16hf) __B,
+     (__v16hf) __C, __D);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fcmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D)
+{
+  return (__m256h) __builtin_ia32_vfcmaddcph256_maskz ((__v16hf) __B,
+             (__v16hf) __C,
+             (__v16hf) __D, __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmul_pch (__m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfmulcph128 ((__v8hf) __A, (__v8hf) __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __C,
+          (__v8hf) __D,
+          (__v8hf) __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmul_pch (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __B,
+          (__v8hf) __C,
+          _mm_setzero_ph (),
+          __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmul_pch (__m256h __A, __m256h __B)
+{
+  return (__m256h) __builtin_ia32_vfmulcph256 ((__v16hf) __A,
+            (__v16hf) __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
+{
+  return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __C,
+          (__v16hf) __D,
+          (__v16hf) __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmul_pch (__mmask8 __A, __m256h __B, __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __B,
+          (__v16hf) __C,
+          _mm256_setzero_ph (),
+          __A);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fcmul_pch (__m128h __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vfcmulcph128 ((__v8hf) __A,
+      (__v8hf) __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fcmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __C,
+           (__v8hf) __D,
+           (__v8hf) __A, __B);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fcmul_pch (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __B,
+           (__v8hf) __C,
+           _mm_setzero_ph (),
+           __A);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fcmul_pch (__m256h __A, __m256h __B)
+{
+  return (__m256h) __builtin_ia32_vfcmulcph256 ((__v16hf) __A, (__v16hf) __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fcmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
+{
+  return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __C,
+           (__v16hf) __D,
+           (__v16hf) __A, __B);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fcmul_pch (__mmask8 __A, __m256h __B, __m256h __C)
+{
+  return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __B,
+           (__v16hf) __C,
+           _mm256_setzero_ph (),
+           __A);
+}
+#define _MM256_REDUCE_OP(op) __m128h __T1 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 0); __m128h __T2 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 1); __m128h __T3 = (__T1 op __T2); __m128h __T4 = (__m128h) __builtin_shuffle (__T3, (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); __m128h __T5 = (__T3) op (__T4); __m128h __T6 = (__m128h) __builtin_shuffle (__T5, (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); __m128h __T7 = __T5 op __T6; return __T7[0] op __T7[1]
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_add_ph (__m256h __A)
+{
+  _MM256_REDUCE_OP (+);
+}
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_mul_ph (__m256h __A)
+{
+  _MM256_REDUCE_OP (*);
+}
+#undef _MM256_REDUCE_OP
+#define _MM256_REDUCE_OP(op) __m128h __T1 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 0); __m128h __T2 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 1); __m128h __T3 = _mm_##op (__T1, __T2); __m128h __T4 = (__m128h) __builtin_shuffle (__T3, (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); __m128h __T5 = _mm_##op (__T3, __T4); __m128h __T6 = (__m128h) __builtin_shuffle (__T5, (__v8hi) { 4, 5 }); __m128h __T7 = _mm_##op (__T5, __T6); __m128h __T8 = (__m128h) __builtin_shuffle (__T7, (__v8hi) { 1, 0 }); __m128h __T9 = _mm_##op (__T7, __T8); return __T9[0]
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_min_ph (__m256h __A)
+{
+  _MM256_REDUCE_OP (min_ph);
+}
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_max_ph (__m256h __A)
+{
+  _MM256_REDUCE_OP (max_ph);
+}
+#define _MM_REDUCE_OP(op) __m128h __T1 = (__m128h) __builtin_shuffle (__A, (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); __m128h __T2 = (__A) op (__T1); __m128h __T3 = (__m128h) __builtin_shuffle (__T2, (__v8hi){ 2, 3, 0, 1, 4, 5, 6, 7 }); __m128h __T4 = __T2 op __T3; return __T4[0] op __T4[1]
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_add_ph (__m128h __A)
+{
+  _MM_REDUCE_OP (+);
+}
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_mul_ph (__m128h __A)
+{
+  _MM_REDUCE_OP (*);
+}
+#undef _MM_REDUCE_OP
+#define _MM_REDUCE_OP(op) __m128h __T1 = (__m128h) __builtin_shuffle (__A, (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); __m128h __T2 = _mm_##op (__A, __T1); __m128h __T3 = (__m128h) __builtin_shuffle (__T2, (__v8hi){ 4, 5 }); __m128h __T4 = _mm_##op (__T2, __T3); __m128h __T5 = (__m128h) __builtin_shuffle (__T4, (__v8hi){ 1, 0 }); __m128h __T6 = _mm_##op (__T4, __T5); return __T6[0]
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_min_ph (__m128h __A)
+{
+  _MM_REDUCE_OP (min_ph);
+}
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_max_ph (__m128h __A)
+{
+  _MM_REDUCE_OP (max_ph);
+}
+#undef _MM256_REDUCE_OP
+#undef _MM_REDUCE_OP
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_ph (__mmask16 __U, __m256h __A, __m256h __W)
+{
+  return (__m256h) __builtin_ia32_movdquhi256_mask ((__v16hi) __W,
+          (__v16hi) __A,
+          (__mmask16) __U);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex2var_ph (__m256h __A, __m256i __I, __m256h __B)
+{
+  return (__m256h) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
+             (__v16hi) __I,
+             (__v16hi) __B,
+             (__mmask16)-1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutexvar_ph (__m256i __A, __m256h __B)
+{
+  return (__m256h) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
+           (__v16hi) __A,
+           (__v16hi)
+           (_mm256_setzero_ph ()),
+           (__mmask16)-1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_ph (__mmask8 __U, __m128h __A, __m128h __W)
+{
+  return (__m128h) __builtin_ia32_movdquhi128_mask ((__v8hi) __W,
+          (__v8hi) __A,
+          (__mmask8) __U);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutex2var_ph (__m128h __A, __m128i __I, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
+             (__v8hi) __I,
+             (__v8hi) __B,
+             (__mmask8)-1);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutexvar_ph (__m128i __A, __m128h __B)
+{
+  return (__m128h) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
+           (__v8hi) __A,
+           (__v8hi)
+           (_mm_setzero_ph ()),
+           (__mmask8)-1);
+}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_pch (_Float16 _Complex __A)
+{
+  union
+  {
+    _Float16 _Complex a;
+    float b;
+  } u = { .a = __A };
+  return (__m256h) _mm256_set1_ps (u.b);
+}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pch (_Float16 _Complex __A)
+{
+  union
+  {
+    _Float16 _Complex a;
+    float b;
+  } u = { .a = __A };
+  return (__m128h) _mm_set1_ps (u.b);
+}
+#define _mm_mul_pch(A, B) _mm_fmul_pch ((A), (B))
+#define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch ((W), (U), (A), (B))
+#define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch ((U), (A), (B))
+#define _mm256_mul_pch(A, B) _mm256_fmul_pch ((A), (B))
+#define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch ((W), (U), (A), (B))
+#define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch ((U), (A), (B))
+#define _mm_cmul_pch(A, B) _mm_fcmul_pch ((A), (B))
+#define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch ((W), (U), (A), (B))
+#define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch ((U), (A), (B))
+#define _mm256_cmul_pch(A, B) _mm256_fcmul_pch ((A), (B))
+#define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch ((W), (U), (A), (B))
+#define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch((U), (A), (B))
+#ifdef __DISABLE_AVX512FP16VL__
+#undef __DISABLE_AVX512FP16VL__
+#pragma GCC pop_options
+#endif
+#endif
+#endif
diff --git a/third_party/intel/avx512vlintrin.internal.h b/third_party/intel/avx512vlintrin.internal.h
index 73d7e3054..493f80abb 100644
--- a/third_party/intel/avx512vlintrin.internal.h
+++ b/third_party/intel/avx512vlintrin.internal.h
@@ -9486,10 +9486,12 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
       const int __imm)
 {
-  return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
-           (__v4di) __B,
-           (__v4di) __C, __imm,
-           (__mmask8) -1);
+  return (__m256i)
+    __builtin_ia32_pternlogq256_mask ((__v4di) __A,
+          (__v4di) __B,
+          (__v4di) __C,
+          (unsigned char) __imm,
+          (__mmask8) -1);
 }
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -9497,10 +9499,12 @@ _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
     __m256i __B, __m256i __C,
     const int __imm)
 {
-  return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
-           (__v4di) __B,
-           (__v4di) __C, __imm,
-           (__mmask8) __U);
+  return (__m256i)
+    __builtin_ia32_pternlogq256_mask ((__v4di) __A,
+          (__v4di) __B,
+          (__v4di) __C,
+          (unsigned char) __imm,
+          (__mmask8) __U);
 }
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -9508,21 +9512,24 @@ _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
      __m256i __B, __m256i __C,
      const int __imm)
 {
-  return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
-            (__v4di) __B,
-            (__v4di) __C,
-            __imm,
-            (__mmask8) __U);
+  return (__m256i)
+    __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
+           (__v4di) __B,
+           (__v4di) __C,
+           (unsigned char) __imm,
+           (__mmask8) __U);
 }
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
       const int __imm)
 {
-  return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
-           (__v8si) __B,
-           (__v8si) __C, __imm,
-           (__mmask8) -1);
+  return (__m256i)
+    __builtin_ia32_pternlogd256_mask ((__v8si) __A,
+          (__v8si) __B,
+          (__v8si) __C,
+          (unsigned char) __imm,
+          (__mmask8) -1);
 }
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -9530,10 +9537,12 @@ _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
     __m256i __B, __m256i __C,
     const int __imm)
 {
-  return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
-           (__v8si) __B,
-           (__v8si) __C, __imm,
-           (__mmask8) __U);
+  return (__m256i)
+    __builtin_ia32_pternlogd256_mask ((__v8si) __A,
+          (__v8si) __B,
+          (__v8si) __C,
+          (unsigned char) __imm,
+          (__mmask8) __U);
 }
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -9541,73 +9550,88 @@ _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
      __m256i __B, __m256i __C,
      const int __imm)
 {
-  return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
-            (__v8si) __B,
-            (__v8si) __C,
-            __imm,
-            (__mmask8) __U);
+  return (__m256i)
+    __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
+           (__v8si) __B,
+           (__v8si) __C,
+           (unsigned char) __imm,
+           (__mmask8) __U);
 }
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
    const int __imm)
 {
-  return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
-           (__v2di) __B,
-           (__v2di) __C, __imm,
-           (__mmask8) -1);
+  return (__m128i)
+    __builtin_ia32_pternlogq128_mask ((__v2di) __A,
+          (__v2di) __B,
+          (__v2di) __C,
+          (unsigned char) __imm,
+          (__mmask8) -1);
 }
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
-        __m128i __B, __m128i __C, const int __imm)
+        __m128i __B, __m128i __C,
+        const int __imm)
 {
-  return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
-           (__v2di) __B,
-           (__v2di) __C, __imm,
-           (__mmask8) __U);
+  return (__m128i)
+    __builtin_ia32_pternlogq128_mask ((__v2di) __A,
+          (__v2di) __B,
+          (__v2di) __C,
+          (unsigned char) __imm,
+          (__mmask8) __U);
 }
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
-         __m128i __B, __m128i __C, const int __imm)
+         __m128i __B, __m128i __C,
+         const int __imm)
 {
-  return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
-            (__v2di) __B,
-            (__v2di) __C,
-            __imm,
-            (__mmask8) __U);
+  return (__m128i)
+    __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
+           (__v2di) __B,
+           (__v2di) __C,
+           (unsigned char) __imm,
+           (__mmask8) __U);
 }
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
    const int __imm)
 {
-  return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
-           (__v4si) __B,
-           (__v4si) __C, __imm,
-           (__mmask8) -1);
+  return (__m128i)
+    __builtin_ia32_pternlogd128_mask ((__v4si) __A,
+          (__v4si) __B,
+          (__v4si) __C,
+          (unsigned char) __imm,
+          (__mmask8) -1);
 }
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
-        __m128i __B, __m128i __C, const int __imm)
+        __m128i __B, __m128i __C,
+        const int __imm)
 {
-  return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
-           (__v4si) __B,
-           (__v4si) __C, __imm,
-           (__mmask8) __U);
+  return (__m128i)
+    __builtin_ia32_pternlogd128_mask ((__v4si) __A,
+          (__v4si) __B,
+          (__v4si) __C,
+          (unsigned char) __imm,
+          (__mmask8) __U);
 }
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
-         __m128i __B, __m128i __C, const int __imm)
+         __m128i __B, __m128i __C,
+         const int __imm)
 {
-  return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
-            (__v4si) __B,
-            (__v4si) __C,
-            __imm,
-            (__mmask8) __U);
+  return (__m128i)
+    __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
+           (__v4si) __B,
+           (__v4si) __C,
+           (unsigned char) __imm,
+           (__mmask8) __U);
 }
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -11335,18 +11359,18 @@ _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm_maskz_slli_epi32(U, X, C) ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
 #define _mm_mask_slli_epi64(W, U, X, C) ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C), (__v2di)(__m128i)(W), (__mmask8)(U)))
 #define _mm_maskz_slli_epi64(U, X, C) ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
-#define _mm256_ternarylogic_epi64(A, B, C, I) ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
-#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
-#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
-#define _mm256_ternarylogic_epi32(A, B, C, I) ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
-#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
-#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
-#define _mm_ternarylogic_epi64(A, B, C, I) ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
-#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
-#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
-#define _mm_ternarylogic_epi32(A, B, C, I) ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
-#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
-#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
+#define _mm256_ternarylogic_epi64(A, B, C, I) ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) (__m256i) (A), (__v4di) (__m256i) (B), (__v4di) (__m256i) (C), (unsigned char) (I), (__mmask8) -1))
+#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) (__m256i) (A), (__v4di) (__m256i) (B), (__v4di) (__m256i) (C), (unsigned char) (I), (__mmask8) (U)))
+#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) (__m256i) (A), (__v4di) (__m256i) (B), (__v4di) (__m256i) (C), (unsigned char) (I), (__mmask8) (U)))
+#define _mm256_ternarylogic_epi32(A, B, C, I) ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) (__m256i) (A), (__v8si) (__m256i) (B), (__v8si) (__m256i) (C), (unsigned char) (I), (__mmask8) -1))
+#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) (__m256i) (A), (__v8si) (__m256i) (B), (__v8si) (__m256i) (C), (unsigned char) (I), (__mmask8) (U)))
+#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) (__m256i) (A), (__v8si) (__m256i) (B), (__v8si) (__m256i) (C), (unsigned char) (I), (__mmask8) (U)))
+#define _mm_ternarylogic_epi64(A, B, C, I) ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) (__m128i) (A), (__v2di) (__m128i) (B), (__v2di) (__m128i) (C), (unsigned char) (I), (__mmask8) -1))
+#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) (__m128i) (A), (__v2di) (__m128i) (B), (__v2di) (__m128i) (C), (unsigned char) (I), (__mmask8) (U)))
+#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) (__m128i) (A), (__v2di) (__m128i) (B), (__v2di) (__m128i) (C), (unsigned char) (I), (__mmask8) (U)))
+#define _mm_ternarylogic_epi32(A, B, C, I) ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) (__m128i) (A), (__v4si) (__m128i) (B), (__v4si) (__m128i) (C), (unsigned char) (I), (__mmask8) -1))
+#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) (__m128i) (A), (__v4si) (__m128i) (B), (__v4si) (__m128i) (C), (unsigned char) (I), (__mmask8) (U)))
+#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) (__m128i) (A), (__v4si) (__m128i) (B), (__v4si) (__m128i) (C), (unsigned char) (I), (__mmask8) (U)))
 #define _mm256_roundscale_ps(A, B) ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
 #define _mm256_mask_roundscale_ps(W, U, A, B) ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
 #define _mm256_maskz_roundscale_ps(U, A, B) ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
diff --git a/third_party/intel/bmiintrin.internal.h b/third_party/intel/bmiintrin.internal.h
index cf2042f1b..0c17f0bd2 100644
--- a/third_party/intel/bmiintrin.internal.h
+++ b/third_party/intel/bmiintrin.internal.h
@@ -14,12 +14,22 @@ __tzcnt_u16 (unsigned short __X)
 {
   return __builtin_ia32_tzcnt_u16 (__X);
 }
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tzcnt_u16 (unsigned short __X)
+{
+  return __builtin_ia32_tzcnt_u16 (__X);
+}
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __andn_u32 (unsigned int __X, unsigned int __Y)
 {
   return ~__X & __Y;
 }
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_andn_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __andn_u32 (__X, __Y);
+}
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __bextr_u32 (unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_bextr_u32 (__X, __Y);
@@ -76,6 +86,11 @@ __andn_u64 (unsigned long long __X, unsigned long long __Y)
   return ~__X & __Y;
 }
 extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_andn_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __andn_u64 (__X, __Y);
+}
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __bextr_u64 (unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_bextr_u64 (__X, __Y);
diff --git a/third_party/intel/cpuid.internal.h b/third_party/intel/cpuid.internal.h
index 081886d9b..3f082193c 100644
--- a/third_party/intel/cpuid.internal.h
+++ b/third_party/intel/cpuid.internal.h
@@ -48,7 +48,6 @@
 #define bit_AVX2 (1 << 5)
 #define bit_BMI2 (1 << 8)
 #define bit_RTM (1 << 11)
-#define bit_MPX (1 << 14)
 #define bit_AVX512F (1 << 16)
 #define bit_AVX512DQ (1 << 17)
 #define bit_RDSEED (1 << 18)
@@ -84,6 +83,7 @@
 #define bit_AVX5124VNNIW (1 << 2)
 #define bit_AVX5124FMAPS (1 << 3)
 #define bit_AVX512VP2INTERSECT (1 << 8)
+#define bit_AVX512FP16 (1 << 23)
 #define bit_IBT (1 << 20)
 #define bit_UINTR (1 << 5)
 #define bit_PCONFIG (1 << 18)
@@ -92,8 +92,6 @@
 #define bit_AMX_BF16 (1 << 22)
 #define bit_AMX_TILE (1 << 24)
 #define bit_AMX_INT8 (1 << 25)
-#define bit_BNDREGS (1 << 3)
-#define bit_BNDCSR (1 << 4)
 #define bit_XSAVEOPT (1 << 0)
 #define bit_XSAVEC (1 << 1)
 #define bit_XSAVES (1 << 3)
diff --git a/third_party/intel/emmintrin.internal.h b/third_party/intel/emmintrin.internal.h
index d54c75287..c0c3bf59d 100644
--- a/third_party/intel/emmintrin.internal.h
+++ b/third_party/intel/emmintrin.internal.h
@@ -550,13 +550,12 @@ _mm_loadu_si64 (void const *__P)
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_loadu_si32 (void const *__P)
 {
-  return _mm_set_epi32 (*(int *)__P, (int)0, (int)0, (int)0);
+  return _mm_set_epi32 (0, 0, 0, (*(__m32_u *)__P)[0]);
 }
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_loadu_si16 (void const *__P)
 {
-  return _mm_set_epi16 (*(short *)__P, (short)0, (short)0, (short)0,
-   (short)0, (short)0, (short)0, (short)0);
+  return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, (*(__m16_u *)__P)[0]);
 }
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_store_si128 (__m128i *__P, __m128i __B)
diff --git a/third_party/intel/ia32intrin.internal.h b/third_party/intel/ia32intrin.internal.h
index faeaff648..f6a5172c7 100644
--- a/third_party/intel/ia32intrin.internal.h
+++ b/third_party/intel/ia32intrin.internal.h
@@ -21,10 +21,10 @@ __bswapd (int __X)
   return __builtin_bswap32 (__X);
 }
 #ifndef __iamcu__
-#ifndef __SSE4_2__
+#ifndef __CRC32__
 #pragma GCC push_options
-#pragma GCC target("sse4.2")
-#define __DISABLE_SSE4_2__
+#pragma GCC target("crc32")
+#define __DISABLE_CRC32__
 #endif
 extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -44,8 +44,8 @@ __crc32d (unsigned int __C, unsigned int __V)
 {
   return __builtin_ia32_crc32si (__C, __V);
 }
-#ifdef __DISABLE_SSE4_2__
-#undef __DISABLE_SSE4_2__
+#ifdef __DISABLE_CRC32__
+#undef __DISABLE_CRC32__
 #pragma GCC pop_options
 #endif
 #endif
@@ -63,9 +63,19 @@ __rdpmc (int __S)
   return __builtin_ia32_rdpmc (__S);
 }
 #endif
-#define __rdtsc() __builtin_ia32_rdtsc ()
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdtsc (void)
+{
+  return __builtin_ia32_rdtsc ();
+}
 #ifndef __iamcu__
-#define __rdtscp(a) __builtin_ia32_rdtscp (a)
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdtscp (unsigned int *__A)
+{
+  return __builtin_ia32_rdtscp (__A);
+}
 #endif
 extern __inline unsigned char
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -130,10 +140,10 @@ __bswapq (long long __X)
 {
   return __builtin_bswap64 (__X);
 }
-#ifndef __SSE4_2__
+#ifndef __CRC32__
 #pragma GCC push_options
-#pragma GCC target("sse4.2")
-#define __DISABLE_SSE4_2__
+#pragma GCC target("crc32")
+#define __DISABLE_CRC32__
 #endif
 extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -141,8 +151,8 @@ __crc32q (unsigned long long __C, unsigned long long __V)
 {
   return __builtin_ia32_crc32di (__C, __V);
 }
-#ifdef __DISABLE_SSE4_2__
-#undef __DISABLE_SSE4_2__
+#ifdef __DISABLE_CRC32__
+#undef __DISABLE_CRC32__
 #pragma GCC pop_options
 #endif
 extern __inline long long
diff --git a/third_party/intel/immintrin.internal.h b/third_party/intel/immintrin.internal.h
index 60b7ec6d9..a932d9755 100644
--- a/third_party/intel/immintrin.internal.h
+++ b/third_party/intel/immintrin.internal.h
@@ -36,6 +36,10 @@
 #include "third_party/intel/avx512bitalgintrin.internal.h"
 #include "third_party/intel/avx512vp2intersectintrin.internal.h"
 #include "third_party/intel/avx512vp2intersectvlintrin.internal.h"
+#ifdef __SSE2__
+#include "third_party/intel/avx512fp16intrin.internal.h"
+#include "third_party/intel/avx512fp16vlintrin.internal.h"
+#endif
 #include "third_party/intel/shaintrin.internal.h"
 #include "third_party/intel/fmaintrin.internal.h"
 #include "third_party/intel/f16cintrin.internal.h"
diff --git a/third_party/intel/mwaitintrin.internal.h b/third_party/intel/mwaitintrin.internal.h
new file mode 100644
index 000000000..6405a2c9e
--- /dev/null
+++ b/third_party/intel/mwaitintrin.internal.h
@@ -0,0 +1,26 @@
+#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
+#ifndef _MWAITINTRIN_H_INCLUDED
+#define _MWAITINTRIN_H_INCLUDED
+#ifndef __MWAIT__
+#pragma GCC push_options
+#pragma GCC target("mwait")
+#define __DISABLE_MWAIT__
+#endif
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
+{
+  __builtin_ia32_monitor (__P, __E, __H);
+}
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mwait (unsigned int __E, unsigned int __H)
+{
+  __builtin_ia32_mwait (__E, __H);
+}
+#ifdef __DISABLE_MWAIT__
+#undef __DISABLE_MWAIT__
+#pragma GCC pop_options
+#endif
+#endif
+#endif
diff --git a/third_party/intel/pmmintrin.internal.h b/third_party/intel/pmmintrin.internal.h
index abac40c73..b66b8412b 100644
--- a/third_party/intel/pmmintrin.internal.h
+++ b/third_party/intel/pmmintrin.internal.h
@@ -2,6 +2,7 @@
 #ifndef _PMMINTRIN_H_INCLUDED
 #define _PMMINTRIN_H_INCLUDED
 #include "third_party/intel/emmintrin.internal.h"
+#include "third_party/intel/mwaitintrin.internal.h"
 #ifndef __SSE3__
 #pragma GCC push_options
 #pragma GCC target("sse3")
@@ -67,16 +68,6 @@ _mm_lddqu_si128 (__m128i const *__P)
 {
   return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
 }
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
-{
-  __builtin_ia32_monitor (__P, __E, __H);
-}
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mwait (unsigned int __E, unsigned int __H)
-{
-  __builtin_ia32_mwait (__E, __H);
-}
 #ifdef __DISABLE_SSE3__
 #undef __DISABLE_SSE3__
 #pragma GCC pop_options
diff --git a/third_party/intel/serializeintrin.internal.h b/third_party/intel/serializeintrin.internal.h
index a68abbf43..22fa1c97b 100644
--- a/third_party/intel/serializeintrin.internal.h
+++ b/third_party/intel/serializeintrin.internal.h
@@ -9,7 +9,12 @@
 #pragma GCC target("serialize")
 #define __DISABLE_SERIALIZE__
 #endif
-#define _serialize() __builtin_ia32_serialize ()
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_serialize (void)
+{
+  __builtin_ia32_serialize ();
+}
 #ifdef __DISABLE_SERIALIZE__
 #undef __DISABLE_SERIALIZE__
 #pragma GCC pop_options
diff --git a/third_party/intel/smmintrin.internal.h b/third_party/intel/smmintrin.internal.h
index 2fae19a10..5179c6e2e 100644
--- a/third_party/intel/smmintrin.internal.h
+++ b/third_party/intel/smmintrin.internal.h
@@ -524,15 +524,10 @@ _mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
 #pragma GCC pop_options
 #endif
 #include "third_party/intel/popcntintrin.internal.h"
-#ifndef __SSE4_1__
+#ifndef __CRC32__
 #pragma GCC push_options
-#pragma GCC target("sse4.1")
-#define __DISABLE_SSE4_1__
-#endif
-#ifndef __SSE4_2__
-#pragma GCC push_options
-#pragma GCC target("sse4.2")
-#define __DISABLE_SSE4_2__
+#pragma GCC target("crc32")
+#define __DISABLE_CRC32__
 #endif
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_crc32_u8 (unsigned int __C, unsigned char __V)
@@ -556,12 +551,8 @@ _mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
   return __builtin_ia32_crc32di (__C, __V);
 }
 #endif
-#ifdef __DISABLE_SSE4_2__
-#undef __DISABLE_SSE4_2__
-#pragma GCC pop_options
-#endif
-#ifdef __DISABLE_SSE4_1__
-#undef __DISABLE_SSE4_1__
+#ifdef __DISABLE_CRC32__
+#undef __DISABLE_CRC32__
 #pragma GCC pop_options
 #endif
 #endif
diff --git a/third_party/intel/upgrade.sh b/third_party/intel/upgrade.sh
index 02458cbe1..f5f32ddae 100755
--- a/third_party/intel/upgrade.sh
+++ b/third_party/intel/upgrade.sh
@@ -1,32 +1,27 @@
 #!/bin/sh
 
-s=/opt/cross11portcosmo/lib/gcc/x86_64-linux-musl/11.2.0/include
+s=/opt/include
 d=third_party/intel
 
 FILES='
+adxintrin
+ammintrin
 amxbf16intrin
 amxint8intrin
 amxtileintrin
-avx512bf16intrin
-avx512bf16vlintrin
-avx512vp2intersectintrin
-avx512vp2intersectvlintrin
-avxvnniintrin
-enqcmdintrin
-hresetintrin
-keylockerintrin
-serializeintrin
-tsxldtrkintrin
-uintrintrin
-x86gprintrin
+avx2intrin
 avx5124fmapsintrin
 avx5124vnniwintrin
+avx512bf16intrin
+avx512bf16vlintrin
 avx512bitalgintrin
 avx512bwintrin
 avx512cdintrin
 avx512dqintrin
 avx512erintrin
 avx512fintrin
+avx512fp16intrin
+avx512fp16vlintrin
 avx512ifmaintrin
 avx512ifmavlintrin
 avx512pfintrin
@@ -39,12 +34,12 @@ avx512vldqintrin
 avx512vlintrin
 avx512vnniintrin
 avx512vnnivlintrin
+avx512vp2intersectintrin
+avx512vp2intersectvlintrin
 avx512vpopcntdqintrin
 avx512vpopcntdqvlintrin
-adxintrin
-ammintrin
-avx2intrin
 avxintrin
+avxvnniintrin
 bmi2intrin
 bmiintrin
 cetintrin
@@ -54,19 +49,23 @@ clwbintrin
 clzerointrin
 cpuid
 emmintrin
+enqcmdintrin
 f16cintrin
 fma4intrin
 fmaintrin
 fxsrintrin
 gfniintrin
+hresetintrin
 ia32intrin
 immintrin
+keylockerintrin
 lwpintrin
 lzcntintrin
 mm3dnow
 mm_malloc
 mmintrin
 movdirintrin
+mwaitintrin
 mwaitxintrin
 nmmintrin
 pconfigintrin
@@ -76,16 +75,20 @@ popcntintrin
 prfchwintrin
 rdseedintrin
 rtmintrin
+serializeintrin
 sgxintrin
 shaintrin
 smmintrin
 tbmintrin
 tmmintrin
+tsxldtrkintrin
+uintrintrin
 vaesintrin
 vpclmulqdqintrin
 waitpkgintrin
 wbnoinvdintrin
 wmmintrin
+x86gprintrin
 x86intrin
 xmmintrin
 xopintrin
diff --git a/third_party/intel/vaesintrin.internal.h b/third_party/intel/vaesintrin.internal.h
index e0b577112..6a55221af 100644
--- a/third_party/intel/vaesintrin.internal.h
+++ b/third_party/intel/vaesintrin.internal.h
@@ -1,7 +1,4 @@
 #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
-#ifndef _IMMINTRIN_H_INCLUDED
-# error "Never use <vaesintrin.h> directly; include <immintrin.h> instead."
-#endif
 #ifndef __VAESINTRIN_H_INCLUDED
 #define __VAESINTRIN_H_INCLUDED
 #if !defined(__VAES__) || !defined(__AVX__)
diff --git a/third_party/intel/x86gprintrin.internal.h b/third_party/intel/x86gprintrin.internal.h
index 3f8aedf78..875718588 100644
--- a/third_party/intel/x86gprintrin.internal.h
+++ b/third_party/intel/x86gprintrin.internal.h
@@ -1,6 +1,11 @@
 #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
 #ifndef _X86GPRINTRIN_H_INCLUDED
 #define _X86GPRINTRIN_H_INCLUDED
+#if !defined _SOFT_FLOAT || defined __MMX__ || defined __SSE__
+#pragma GCC push_options
+#pragma GCC target("general-regs-only")
+#define __DISABLE_GENERAL_REGS_ONLY__
+#endif
 #include "third_party/intel/ia32intrin.internal.h"
 #ifndef __iamcu__
 #include "third_party/intel/adxintrin.internal.h"
@@ -16,6 +21,7 @@
 #include "third_party/intel/lzcntintrin.internal.h"
 #include "third_party/intel/lwpintrin.internal.h"
 #include "third_party/intel/movdirintrin.internal.h"
+#include "third_party/intel/mwaitintrin.internal.h"
 #include "third_party/intel/mwaitxintrin.internal.h"
 #include "third_party/intel/pconfigintrin.internal.h"
 #include "third_party/intel/popcntintrin.internal.h"
@@ -175,5 +181,9 @@ _ptwrite32 (unsigned __B)
 #pragma GCC pop_options
 #endif
 #endif
+#ifdef __DISABLE_GENERAL_REGS_ONLY__
+#undef __DISABLE_GENERAL_REGS_ONLY__
+#pragma GCC pop_options
+#endif
 #endif
 #endif
diff --git a/third_party/intel/xmmintrin.internal.h b/third_party/intel/xmmintrin.internal.h
index 7c744991b..3a97b9fa1 100644
--- a/third_party/intel/xmmintrin.internal.h
+++ b/third_party/intel/xmmintrin.internal.h
@@ -1,9 +1,10 @@
 #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
 #ifndef _XMMINTRIN_H_INCLUDED
 #define _XMMINTRIN_H_INCLUDED
-#include "third_party/intel/mm_malloc.internal.h"
 #include "third_party/intel/mmintrin.internal.h"
-enum _mm_hint {
+#include "third_party/intel/mm_malloc.internal.h"
+enum _mm_hint
+{
   _MM_HINT_ET0 = 7,
   _MM_HINT_ET1 = 6,
   _MM_HINT_T0 = 3,
@@ -12,953 +13,950 @@ enum _mm_hint {
   _MM_HINT_NTA = 0
 };
 #ifdef __OPTIMIZE__
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_prefetch(const void *__P, enum _mm_hint __I) {
-  __builtin_prefetch(__P, (__I & 0x4) >> 2, __I & 0x3);
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_prefetch (const void *__P, enum _mm_hint __I)
+{
+  __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3);
 }
 #else
-#define _mm_prefetch(P, I) __builtin_prefetch((P), ((I & 0x4) >> 2), (I & 0x3))
+#define _mm_prefetch(P, I) __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3))
 #endif
 #ifndef __SSE__
 #pragma GCC push_options
 #pragma GCC target("sse")
 #define __DISABLE_SSE__
 #endif
-typedef float __m128 __attribute__((__vector_size__(16), __may_alias__));
-typedef float __m128_u
-    __attribute__((__vector_size__(16), __may_alias__, __aligned__(1)));
-typedef float __v4sf __attribute__((__vector_size__(16)));
-#define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
-  (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
-#define _MM_EXCEPT_MASK       0x003f
-#define _MM_EXCEPT_INVALID    0x0001
-#define _MM_EXCEPT_DENORM     0x0002
-#define _MM_EXCEPT_DIV_ZERO   0x0004
-#define _MM_EXCEPT_OVERFLOW   0x0008
-#define _MM_EXCEPT_UNDERFLOW  0x0010
-#define _MM_EXCEPT_INEXACT    0x0020
-#define _MM_MASK_MASK         0x1f80
-#define _MM_MASK_INVALID      0x0080
-#define _MM_MASK_DENORM       0x0100
-#define _MM_MASK_DIV_ZERO     0x0200
-#define _MM_MASK_OVERFLOW     0x0400
-#define _MM_MASK_UNDERFLOW    0x0800
-#define _MM_MASK_INEXACT      0x1000
-#define _MM_ROUND_MASK        0x6000
-#define _MM_ROUND_NEAREST     0x0000
-#define _MM_ROUND_DOWN        0x2000
-#define _MM_ROUND_UP          0x4000
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
+#define _MM_EXCEPT_MASK 0x003f
+#define _MM_EXCEPT_INVALID 0x0001
+#define _MM_EXCEPT_DENORM 0x0002
+#define _MM_EXCEPT_DIV_ZERO 0x0004
+#define _MM_EXCEPT_OVERFLOW 0x0008
+#define _MM_EXCEPT_UNDERFLOW 0x0010
+#define _MM_EXCEPT_INEXACT 0x0020
+#define _MM_MASK_MASK 0x1f80
+#define _MM_MASK_INVALID 0x0080
+#define _MM_MASK_DENORM 0x0100
+#define _MM_MASK_DIV_ZERO 0x0200
+#define _MM_MASK_OVERFLOW 0x0400
+#define _MM_MASK_UNDERFLOW 0x0800
+#define _MM_MASK_INEXACT 0x1000
+#define _MM_ROUND_MASK 0x6000
+#define _MM_ROUND_NEAREST 0x0000
+#define _MM_ROUND_DOWN 0x2000
+#define _MM_ROUND_UP 0x4000
 #define _MM_ROUND_TOWARD_ZERO 0x6000
-#define _MM_FLUSH_ZERO_MASK   0x8000
-#define _MM_FLUSH_ZERO_ON     0x8000
-#define _MM_FLUSH_ZERO_OFF    0x0000
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_undefined_ps(void) {
+#define _MM_FLUSH_ZERO_MASK 0x8000
+#define _MM_FLUSH_ZERO_ON 0x8000
+#define _MM_FLUSH_ZERO_OFF 0x0000
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_ps (void)
+{
   __m128 __Y = __Y;
   return __Y;
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_setzero_ps(void) {
-  return __extension__(__m128){0.0f, 0.0f, 0.0f, 0.0f};
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_ps (void)
+{
+  return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_add_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_addss((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_sub_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_subss((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_mul_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_mulss((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_div_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_divss((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_sqrt_ss(__m128 __A) {
-  return (__m128)__builtin_ia32_sqrtss((__v4sf)__A);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ss (__m128 __A)
+{
+  return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_rcp_ss(__m128 __A) {
-  return (__m128)__builtin_ia32_rcpss((__v4sf)__A);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ss (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_rsqrt_ss(__m128 __A) {
-  return (__m128)__builtin_ia32_rsqrtss((__v4sf)__A);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ss (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_min_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_minss((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_max_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_maxss((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_add_ps(__m128 __A, __m128 __B) {
-  return (__m128)((__v4sf)__A + (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) ((__v4sf)__A + (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_sub_ps(__m128 __A, __m128 __B) {
-  return (__m128)((__v4sf)__A - (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) ((__v4sf)__A - (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_mul_ps(__m128 __A, __m128 __B) {
-  return (__m128)((__v4sf)__A * (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) ((__v4sf)__A * (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_div_ps(__m128 __A, __m128 __B) {
-  return (__m128)((__v4sf)__A / (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) ((__v4sf)__A / (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_sqrt_ps(__m128 __A) {
-  return (__m128)__builtin_ia32_sqrtps((__v4sf)__A);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_rcp_ps(__m128 __A) {
-  return (__m128)__builtin_ia32_rcpps((__v4sf)__A);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_rsqrt_ps(__m128 __A) {
-  return (__m128)__builtin_ia32_rsqrtps((__v4sf)__A);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_min_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_minps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_max_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_maxps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_and_ps(__m128 __A, __m128 __B) {
-  return __builtin_ia32_andps(__A, __B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_ps (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_andps (__A, __B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_andnot_ps(__m128 __A, __m128 __B) {
-  return __builtin_ia32_andnps(__A, __B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_ps (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_andnps (__A, __B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_or_ps(__m128 __A, __m128 __B) {
-  return __builtin_ia32_orps(__A, __B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_ps (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_orps (__A, __B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_xor_ps(__m128 __A, __m128 __B) {
-  return __builtin_ia32_xorps(__A, __B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_ps (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_xorps (__A, __B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpeq_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpeqss((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmplt_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpltss((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmple_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpless((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpgt_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_movss(
-      (__v4sf)__A, (__v4sf)__builtin_ia32_cmpltss((__v4sf)__B, (__v4sf)__A));
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+     (__v4sf)
+     __builtin_ia32_cmpltss ((__v4sf) __B,
+        (__v4sf)
+        __A));
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpge_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_movss(
-      (__v4sf)__A, (__v4sf)__builtin_ia32_cmpless((__v4sf)__B, (__v4sf)__A));
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+     (__v4sf)
+     __builtin_ia32_cmpless ((__v4sf) __B,
+        (__v4sf)
+        __A));
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpneq_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpneqss((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpnlt_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpnltss((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpnle_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpnless((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpngt_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_movss(
-      (__v4sf)__A, (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__B, (__v4sf)__A));
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+     (__v4sf)
+     __builtin_ia32_cmpnltss ((__v4sf) __B,
+         (__v4sf)
+         __A));
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpnge_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_movss(
-      (__v4sf)__A, (__v4sf)__builtin_ia32_cmpnless((__v4sf)__B, (__v4sf)__A));
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+     (__v4sf)
+     __builtin_ia32_cmpnless ((__v4sf) __B,
+         (__v4sf)
+         __A));
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpord_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpordss((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpunord_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpunordss((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpeq_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpeqps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmplt_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpltps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmple_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpleps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpgt_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpgtps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpge_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpgeps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpneq_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpneqps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpnlt_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpnltps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpnle_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpnleps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpngt_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpngtps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpnge_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpngeps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpord_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpordps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cmpunord_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_cmpunordps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_comieq_ss(__m128 __A, __m128 __B) {
-  return __builtin_ia32_comieq((__v4sf)__A, (__v4sf)__B);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comieq_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_comilt_ss(__m128 __A, __m128 __B) {
-  return __builtin_ia32_comilt((__v4sf)__A, (__v4sf)__B);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comilt_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_comile_ss(__m128 __A, __m128 __B) {
-  return __builtin_ia32_comile((__v4sf)__A, (__v4sf)__B);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comile_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_comigt_ss(__m128 __A, __m128 __B) {
-  return __builtin_ia32_comigt((__v4sf)__A, (__v4sf)__B);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comigt_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_comige_ss(__m128 __A, __m128 __B) {
-  return __builtin_ia32_comige((__v4sf)__A, (__v4sf)__B);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comige_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_comineq_ss(__m128 __A, __m128 __B) {
-  return __builtin_ia32_comineq((__v4sf)__A, (__v4sf)__B);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comineq_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_ucomieq_ss(__m128 __A, __m128 __B) {
-  return __builtin_ia32_ucomieq((__v4sf)__A, (__v4sf)__B);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomieq_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_ucomilt_ss(__m128 __A, __m128 __B) {
-  return __builtin_ia32_ucomilt((__v4sf)__A, (__v4sf)__B);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomilt_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_ucomile_ss(__m128 __A, __m128 __B) {
-  return __builtin_ia32_ucomile((__v4sf)__A, (__v4sf)__B);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomile_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_ucomigt_ss(__m128 __A, __m128 __B) {
-  return __builtin_ia32_ucomigt((__v4sf)__A, (__v4sf)__B);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomigt_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_ucomige_ss(__m128 __A, __m128 __B) {
-  return __builtin_ia32_ucomige((__v4sf)__A, (__v4sf)__B);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomige_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_ucomineq_ss(__m128 __A, __m128 __B) {
-  return __builtin_ia32_ucomineq((__v4sf)__A, (__v4sf)__B);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomineq_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtss_si32(__m128 __A) {
-  return __builtin_ia32_cvtss2si((__v4sf)__A);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si32 (__m128 __A)
+{
+  return __builtin_ia32_cvtss2si ((__v4sf) __A);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvt_ss2si(__m128 __A) {
-  return _mm_cvtss_si32(__A);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ss2si (__m128 __A)
+{
+  return _mm_cvtss_si32 (__A);
 }
 #ifdef __x86_64__
-extern __inline long long
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtss_si64(__m128 __A) {
-  return __builtin_ia32_cvtss2si64((__v4sf)__A);
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64 (__m128 __A)
+{
+  return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
 }
-extern __inline long long
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtss_si64x(__m128 __A) {
-  return __builtin_ia32_cvtss2si64((__v4sf)__A);
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64x (__m128 __A)
+{
+  return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
 }
 #endif
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtps_pi32(__m128 __A) {
-  return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__A);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi32 (__m128 __A)
+{
+  return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvt_ps2pi(__m128 __A) {
-  return _mm_cvtps_pi32(__A);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ps2pi (__m128 __A)
+{
+  return _mm_cvtps_pi32 (__A);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvttss_si32(__m128 __A) {
-  return __builtin_ia32_cvttss2si((__v4sf)__A);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si32 (__m128 __A)
+{
+  return __builtin_ia32_cvttss2si ((__v4sf) __A);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtt_ss2si(__m128 __A) {
-  return _mm_cvttss_si32(__A);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ss2si (__m128 __A)
+{
+  return _mm_cvttss_si32 (__A);
 }
 #ifdef __x86_64__
-extern __inline long long
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvttss_si64(__m128 __A) {
-  return __builtin_ia32_cvttss2si64((__v4sf)__A);
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64 (__m128 __A)
+{
+  return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
 }
-extern __inline long long
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvttss_si64x(__m128 __A) {
-  return __builtin_ia32_cvttss2si64((__v4sf)__A);
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64x (__m128 __A)
+{
+  return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
 }
 #endif
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvttps_pi32(__m128 __A) {
-  return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__A);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_pi32 (__m128 __A)
+{
+  return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtt_ps2pi(__m128 __A) {
-  return _mm_cvttps_pi32(__A);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ps2pi (__m128 __A)
+{
+  return _mm_cvttps_pi32 (__A);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtsi32_ss(__m128 __A, int __B) {
-  return (__m128)__builtin_ia32_cvtsi2ss((__v4sf)__A, __B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_ss (__m128 __A, int __B)
+{
+  return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvt_si2ss(__m128 __A, int __B) {
-  return _mm_cvtsi32_ss(__A, __B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_si2ss (__m128 __A, int __B)
+{
+  return _mm_cvtsi32_ss (__A, __B);
 }
 #ifdef __x86_64__
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtsi64_ss(__m128 __A, long long __B) {
-  return (__m128)__builtin_ia32_cvtsi642ss((__v4sf)__A, __B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_ss (__m128 __A, long long __B)
+{
+  return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtsi64x_ss(__m128 __A, long long __B) {
-  return (__m128)__builtin_ia32_cvtsi642ss((__v4sf)__A, __B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_ss (__m128 __A, long long __B)
+{
+  return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
 }
 #endif
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtpi32_ps(__m128 __A, __m64 __B) {
-  return (__m128)__builtin_ia32_cvtpi2ps((__v4sf)__A, (__v2si)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32_ps (__m128 __A, __m64 __B)
+{
+  return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvt_pi2ps(__m128 __A, __m64 __B) {
-  return _mm_cvtpi32_ps(__A, __B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_pi2ps (__m128 __A, __m64 __B)
+{
+  return _mm_cvtpi32_ps (__A, __B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtpi16_ps(__m64 __A) {
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi16_ps (__m64 __A)
+{
   __v4hi __sign;
   __v2si __hisi, __losi;
   __v4sf __zero, __ra, __rb;
-  __sign = __builtin_ia32_pcmpgtw((__v4hi)0LL, (__v4hi)__A);
-  __losi = (__v2si)__builtin_ia32_punpcklwd((__v4hi)__A, __sign);
-  __hisi = (__v2si)__builtin_ia32_punpckhwd((__v4hi)__A, __sign);
-  __zero = (__v4sf)_mm_setzero_ps();
-  __ra = __builtin_ia32_cvtpi2ps(__zero, __losi);
-  __rb = __builtin_ia32_cvtpi2ps(__ra, __hisi);
-  return (__m128)__builtin_ia32_movlhps(__ra, __rb);
+  __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);
+  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
+  __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
+  __zero = (__v4sf) _mm_setzero_ps ();
+  __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
+  __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);
+  return (__m128) __builtin_ia32_movlhps (__ra, __rb);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtpu16_ps(__m64 __A) {
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpu16_ps (__m64 __A)
+{
   __v2si __hisi, __losi;
   __v4sf __zero, __ra, __rb;
-  __losi = (__v2si)__builtin_ia32_punpcklwd((__v4hi)__A, (__v4hi)0LL);
-  __hisi = (__v2si)__builtin_ia32_punpckhwd((__v4hi)__A, (__v4hi)0LL);
-  __zero = (__v4sf)_mm_setzero_ps();
-  __ra = __builtin_ia32_cvtpi2ps(__zero, __losi);
-  __rb = __builtin_ia32_cvtpi2ps(__ra, __hisi);
-  return (__m128)__builtin_ia32_movlhps(__ra, __rb);
+  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
+  __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
+  __zero = (__v4sf) _mm_setzero_ps ();
+  __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
+  __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);
+  return (__m128) __builtin_ia32_movlhps (__ra, __rb);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtpi8_ps(__m64 __A) {
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi8_ps (__m64 __A)
+{
   __v8qi __sign;
-  __sign = __builtin_ia32_pcmpgtb((__v8qi)0LL, (__v8qi)__A);
-  __A = (__m64)__builtin_ia32_punpcklbw((__v8qi)__A, __sign);
+  __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A);
+  __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign);
   return _mm_cvtpi16_ps(__A);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtpu8_ps(__m64 __A) {
-  __A = (__m64)__builtin_ia32_punpcklbw((__v8qi)__A, (__v8qi)0LL);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpu8_ps(__m64 __A)
+{
+  __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
   return _mm_cvtpu16_ps(__A);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtpi32x2_ps(__m64 __A, __m64 __B) {
-  __v4sf __zero = (__v4sf)_mm_setzero_ps();
-  __v4sf __sfa = __builtin_ia32_cvtpi2ps(__zero, (__v2si)__A);
-  __v4sf __sfb = __builtin_ia32_cvtpi2ps(__sfa, (__v2si)__B);
-  return (__m128)__builtin_ia32_movlhps(__sfa, __sfb);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
+{
+  __v4sf __zero = (__v4sf) _mm_setzero_ps ();
+  __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A);
+  __v4sf __sfb = __builtin_ia32_cvtpi2ps (__sfa, (__v2si)__B);
+  return (__m128) __builtin_ia32_movlhps (__sfa, __sfb);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtps_pi16(__m128 __A) {
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi16(__m128 __A)
+{
   __v4sf __hisf = (__v4sf)__A;
-  __v4sf __losf = __builtin_ia32_movhlps(__hisf, __hisf);
-  __v2si __hisi = __builtin_ia32_cvtps2pi(__hisf);
-  __v2si __losi = __builtin_ia32_cvtps2pi(__losf);
-  return (__m64)__builtin_ia32_packssdw(__hisi, __losi);
+  __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf);
+  __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf);
+  __v2si __losi = __builtin_ia32_cvtps2pi (__losf);
+  return (__m64) __builtin_ia32_packssdw (__hisi, __losi);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtps_pi8(__m128 __A) {
-  __v4hi __tmp = (__v4hi)_mm_cvtps_pi16(__A);
-  return (__m64)__builtin_ia32_packsswb(__tmp, (__v4hi)0LL);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi8(__m128 __A)
+{
+  __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
+  return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL);
 }
 #ifdef __OPTIMIZE__
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_shuffle_ps(__m128 __A, __m128 __B, int const __mask) {
-  return (__m128)__builtin_ia32_shufps((__v4sf)__A, (__v4sf)__B, __mask);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask)
+{
+  return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
 }
 #else
-#define _mm_shuffle_ps(A, B, MASK)                                         \
-  ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
-                                 (int)(MASK)))
+#define _mm_shuffle_ps(A, B, MASK) ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(MASK)))
 #endif
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_unpackhi_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_unpckhps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_unpacklo_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_unpcklps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_loadh_pi(__m128 __A, __m64 const *__P) {
-  return (__m128)__builtin_ia32_loadhps((__v4sf)__A, (const __v2sf *)__P);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadh_pi (__m128 __A, __m64 const *__P)
+{
+  return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P);
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_storeh_pi(__m64 *__P, __m128 __A) {
-  __builtin_ia32_storehps((__v2sf *)__P, (__v4sf)__A);
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeh_pi (__m64 *__P, __m128 __A)
+{
+  __builtin_ia32_storehps ((__v2sf *)__P, (__v4sf)__A);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_movehl_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_movhlps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehl_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_movelh_ps(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_ia32_movlhps((__v4sf)__A, (__v4sf)__B);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movelh_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_loadl_pi(__m128 __A, __m64 const *__P) {
-  return (__m128)__builtin_ia32_loadlps((__v4sf)__A, (const __v2sf *)__P);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_pi (__m128 __A, __m64 const *__P)
+{
+  return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P);
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_storel_pi(__m64 *__P, __m128 __A) {
-  __builtin_ia32_storelps((__v2sf *)__P, (__v4sf)__A);
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_pi (__m64 *__P, __m128 __A)
+{
+  __builtin_ia32_storelps ((__v2sf *)__P, (__v4sf)__A);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_movemask_ps(__m128 __A) {
-  return __builtin_ia32_movmskps((__v4sf)__A);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_ps (__m128 __A)
+{
+  return __builtin_ia32_movmskps ((__v4sf)__A);
 }
-extern __inline unsigned int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_getcsr(void) {
-  return __builtin_ia32_stmxcsr();
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getcsr (void)
+{
+  return __builtin_ia32_stmxcsr ();
 }
-extern __inline unsigned int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _MM_GET_EXCEPTION_STATE(void) {
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_EXCEPTION_STATE (void)
+{
   return _mm_getcsr() & _MM_EXCEPT_MASK;
 }
-extern __inline unsigned int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _MM_GET_EXCEPTION_MASK(void) {
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_EXCEPTION_MASK (void)
+{
   return _mm_getcsr() & _MM_MASK_MASK;
 }
-extern __inline unsigned int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _MM_GET_ROUNDING_MODE(void) {
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_ROUNDING_MODE (void)
+{
   return _mm_getcsr() & _MM_ROUND_MASK;
 }
-extern __inline unsigned int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _MM_GET_FLUSH_ZERO_MODE(void) {
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_FLUSH_ZERO_MODE (void)
+{
   return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_setcsr(unsigned int __I) {
-  __builtin_ia32_ldmxcsr(__I);
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setcsr (unsigned int __I)
+{
+  __builtin_ia32_ldmxcsr (__I);
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _MM_SET_EXCEPTION_STATE(unsigned int __mask) {
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_EXCEPTION_STATE(unsigned int __mask)
+{
   _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _MM_SET_EXCEPTION_MASK(unsigned int __mask) {
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_EXCEPTION_MASK (unsigned int __mask)
+{
   _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _MM_SET_ROUNDING_MODE(unsigned int __mode) {
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_ROUNDING_MODE (unsigned int __mode)
+{
   _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode);
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _MM_SET_FLUSH_ZERO_MODE(unsigned int __mode) {
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
+{
   _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_set_ss(float __F) {
-  return __extension__(__m128)(__v4sf){__F, 0.0f, 0.0f, 0.0f};
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ss (float __F)
+{
+  return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f };
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_set1_ps(float __F) {
-  return __extension__(__m128)(__v4sf){__F, __F, __F, __F};
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_ps (float __F)
+{
+  return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_set_ps1(float __F) {
-  return _mm_set1_ps(__F);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps1 (float __F)
+{
+  return _mm_set1_ps (__F);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_load_ss(float const *__P) {
-  return _mm_set_ss(*__P);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ss (float const *__P)
+{
+  return _mm_set_ss (*__P);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_load1_ps(float const *__P) {
-  return _mm_set1_ps(*__P);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load1_ps (float const *__P)
+{
+  return _mm_set1_ps (*__P);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_load_ps1(float const *__P) {
-  return _mm_load1_ps(__P);
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps1 (float const *__P)
+{
+  return _mm_load1_ps (__P);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_load_ps(float const *__P) {
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps (float const *__P)
+{
   return *(__m128 *)__P;
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_loadu_ps(float const *__P) {
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_ps (float const *__P)
+{
   return *(__m128_u *)__P;
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_loadr_ps(float const *__P) {
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadr_ps (float const *__P)
+{
   __v4sf __tmp = *(__v4sf *)__P;
-  return (__m128)__builtin_ia32_shufps(__tmp, __tmp, _MM_SHUFFLE(0, 1, 2, 3));
+  return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3));
 }
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__,
-                                      __artificial__))
-_mm_set_ps(const float __Z, const float __Y, const float __X, const float __W) {
-  return __extension__(__m128)(__v4sf){__W, __X, __Y, __Z};
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
+{
+  return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_setr_ps(float __Z, float __Y, float __X, float __W) {
-  return __extension__(__m128)(__v4sf){__Z, __Y, __X, __W};
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_ps (float __Z, float __Y, float __X, float __W)
+{
+  return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_store_ss(float *__P, __m128 __A) {
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ss (float *__P, __m128 __A)
+{
   *__P = ((__v4sf)__A)[0];
 }
-extern __inline float
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_cvtss_f32(__m128 __A) {
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_f32 (__m128 __A)
+{
   return ((__v4sf)__A)[0];
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_store_ps(float *__P, __m128 __A) {
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps (float *__P, __m128 __A)
+{
   *(__m128 *)__P = __A;
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_storeu_ps(float *__P, __m128 __A) {
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_ps (float *__P, __m128 __A)
+{
   *(__m128_u *)__P = __A;
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_store1_ps(float *__P, __m128 __A) {
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store1_ps (float *__P, __m128 __A)
+{
   __v4sf __va = (__v4sf)__A;
-  __v4sf __tmp = __builtin_ia32_shufps(__va, __va, _MM_SHUFFLE(0, 0, 0, 0));
-  _mm_storeu_ps(__P, __tmp);
+  __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0));
+  _mm_storeu_ps (__P, __tmp);
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_store_ps1(float *__P, __m128 __A) {
-  _mm_store1_ps(__P, __A);
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps1 (float *__P, __m128 __A)
+{
+  _mm_store1_ps (__P, __A);
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_storer_ps(float *__P, __m128 __A) {
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storer_ps (float *__P, __m128 __A)
+{
   __v4sf __va = (__v4sf)__A;
-  __v4sf __tmp = __builtin_ia32_shufps(__va, __va, _MM_SHUFFLE(0, 1, 2, 3));
-  _mm_store_ps(__P, __tmp);
+  __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3));
+  _mm_store_ps (__P, __tmp);
 }
-extern __inline __m128
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_move_ss(__m128 __A, __m128 __B) {
-  return (__m128)__builtin_shuffle(
-      (__v4sf)__A, (__v4sf)__B,
-      __extension__(__attribute__((__vector_size__(16))) int){4, 1, 2, 3});
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_shuffle ((__v4sf)__A, (__v4sf)__B,
+                                     __extension__
+                                     (__attribute__((__vector_size__ (16))) int)
+                                     {4,1,2,3});
 }
 #ifdef __OPTIMIZE__
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_extract_pi16(__m64 const __A, int const __N) {
-  return (unsigned short)__builtin_ia32_vec_ext_v4hi((__v4hi)__A, __N);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_pi16 (__m64 const __A, int const __N)
+{
+  return (unsigned short) __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_pextrw(__m64 const __A, int const __N) {
-  return _mm_extract_pi16(__A, __N);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pextrw (__m64 const __A, int const __N)
+{
+  return _mm_extract_pi16 (__A, __N);
 }
 #else
-#define _mm_extract_pi16(A, N)                                          \
-  ((int)(unsigned short)__builtin_ia32_vec_ext_v4hi((__v4hi)(__m64)(A), \
-                                                    (int)(N)))
+#define _mm_extract_pi16(A, N) ((int) (unsigned short) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
 #define _m_pextrw(A, N) _mm_extract_pi16(A, N)
 #endif
 #ifdef __OPTIMIZE__
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_insert_pi16(__m64 const __A, int const __D, int const __N) {
-  return (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)__A, __D, __N);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
+{
+  return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_pinsrw(__m64 const __A, int const __D, int const __N) {
-  return _mm_insert_pi16(__A, __D, __N);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pinsrw (__m64 const __A, int const __D, int const __N)
+{
+  return _mm_insert_pi16 (__A, __D, __N);
 }
 #else
-#define _mm_insert_pi16(A, D, N) \
-  ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)(__m64)(A), (int)(D), (int)(N)))
+#define _mm_insert_pi16(A, D, N) ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A), (int)(D), (int)(N)))
 #define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N)
 #endif
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_max_pi16(__m64 __A, __m64 __B) {
-  return (__m64)__builtin_ia32_pmaxsw((__v4hi)__A, (__v4hi)__B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pi16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_pmaxsw(__m64 __A, __m64 __B) {
-  return _mm_max_pi16(__A, __B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxsw (__m64 __A, __m64 __B)
+{
+  return _mm_max_pi16 (__A, __B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_max_pu8(__m64 __A, __m64 __B) {
-  return (__m64)__builtin_ia32_pmaxub((__v8qi)__A, (__v8qi)__B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_pmaxub(__m64 __A, __m64 __B) {
-  return _mm_max_pu8(__A, __B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxub (__m64 __A, __m64 __B)
+{
+  return _mm_max_pu8 (__A, __B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_min_pi16(__m64 __A, __m64 __B) {
-  return (__m64)__builtin_ia32_pminsw((__v4hi)__A, (__v4hi)__B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pi16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_pminsw(__m64 __A, __m64 __B) {
-  return _mm_min_pi16(__A, __B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminsw (__m64 __A, __m64 __B)
+{
+  return _mm_min_pi16 (__A, __B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_min_pu8(__m64 __A, __m64 __B) {
-  return (__m64)__builtin_ia32_pminub((__v8qi)__A, (__v8qi)__B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_pminub(__m64 __A, __m64 __B) {
-  return _mm_min_pu8(__A, __B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminub (__m64 __A, __m64 __B)
+{
+  return _mm_min_pu8 (__A, __B);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_movemask_pi8(__m64 __A) {
-  return __builtin_ia32_pmovmskb((__v8qi)__A);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_pi8 (__m64 __A)
+{
+  return __builtin_ia32_pmovmskb ((__v8qi)__A);
 }
-extern __inline int
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_pmovmskb(__m64 __A) {
-  return _mm_movemask_pi8(__A);
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmovmskb (__m64 __A)
+{
+  return _mm_movemask_pi8 (__A);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_mulhi_pu16(__m64 __A, __m64 __B) {
-  return (__m64)__builtin_ia32_pmulhuw((__v4hi)__A, (__v4hi)__B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_pmulhuw(__m64 __A, __m64 __B) {
-  return _mm_mulhi_pu16(__A, __B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhuw (__m64 __A, __m64 __B)
+{
+  return _mm_mulhi_pu16 (__A, __B);
 }
 #ifdef __OPTIMIZE__
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_shuffle_pi16(__m64 __A, int const __N) {
-  return (__m64)__builtin_ia32_pshufw((__v4hi)__A, __N);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __A, int const __N)
+{
+  return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_pshufw(__m64 __A, int const __N) {
-  return _mm_shuffle_pi16(__A, __N);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pshufw (__m64 __A, int const __N)
+{
+  return _mm_shuffle_pi16 (__A, __N);
 }
 #else
-#define _mm_shuffle_pi16(A, N) \
-  ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(A), (int)(N)))
-#define _m_pshufw(A, N) _mm_shuffle_pi16(A, N)
+#define _mm_shuffle_pi16(A, N) ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
+#define _m_pshufw(A, N) _mm_shuffle_pi16 (A, N)
 #endif
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_maskmove_si64(__m64 __A, __m64 __N, char *__P) {
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
+{
 #ifdef __MMX_WITH_SSE__
-  typedef long long __v2di __attribute__((__vector_size__(16)));
-  typedef char __v16qi __attribute__((__vector_size__(16)));
-  __v2di __A128 = __extension__(__v2di){((__v1di)__A)[0], 0};
-  __v2di __N128 = __extension__(__v2di){((__v1di)__N)[0], 0};
-  __SIZE_TYPE__ offset = ((__SIZE_TYPE__)__P) & 0xf;
-  if (offset) {
-    if (offset > 8) offset = 8;
-    __P = (char *)(((__SIZE_TYPE__)__P) - offset);
-    switch (offset) {
-      case 1:
-        __A128 = __builtin_ia32_pslldqi128(__A128, 8);
-        __N128 = __builtin_ia32_pslldqi128(__N128, 8);
-        break;
-      case 2:
-        __A128 = __builtin_ia32_pslldqi128(__A128, 2 * 8);
-        __N128 = __builtin_ia32_pslldqi128(__N128, 2 * 8);
-        break;
-      case 3:
-        __A128 = __builtin_ia32_pslldqi128(__A128, 3 * 8);
-        __N128 = __builtin_ia32_pslldqi128(__N128, 3 * 8);
-        break;
-      case 4:
-        __A128 = __builtin_ia32_pslldqi128(__A128, 4 * 8);
-        __N128 = __builtin_ia32_pslldqi128(__N128, 4 * 8);
-        break;
-      case 5:
-        __A128 = __builtin_ia32_pslldqi128(__A128, 5 * 8);
-        __N128 = __builtin_ia32_pslldqi128(__N128, 5 * 8);
-        break;
-      case 6:
-        __A128 = __builtin_ia32_pslldqi128(__A128, 6 * 8);
-        __N128 = __builtin_ia32_pslldqi128(__N128, 6 * 8);
-        break;
-      case 7:
-        __A128 = __builtin_ia32_pslldqi128(__A128, 7 * 8);
-        __N128 = __builtin_ia32_pslldqi128(__N128, 7 * 8);
-        break;
-      case 8:
-        __A128 = __builtin_ia32_pslldqi128(__A128, 8 * 8);
-        __N128 = __builtin_ia32_pslldqi128(__N128, 8 * 8);
-        break;
-      default:
-        break;
+  typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+  typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+  __v2di __A128 = __extension__ (__v2di) { ((__v1di) __A)[0], 0 };
+  __v2di __N128 = __extension__ (__v2di) { ((__v1di) __N)[0], 0 };
+  __SIZE_TYPE__ offset = ((__SIZE_TYPE__) __P) & 0xf;
+  if (offset)
+    {
+      if (offset > 8)
+ offset = 8;
+      __P = (char *) (((__SIZE_TYPE__) __P) - offset);
+      switch (offset)
+ {
+ case 1:
+   __A128 = __builtin_ia32_pslldqi128 (__A128, 8);
+   __N128 = __builtin_ia32_pslldqi128 (__N128, 8);
+   break;
+ case 2:
+   __A128 = __builtin_ia32_pslldqi128 (__A128, 2 * 8);
+   __N128 = __builtin_ia32_pslldqi128 (__N128, 2 * 8);
+   break;
+ case 3:
+   __A128 = __builtin_ia32_pslldqi128 (__A128, 3 * 8);
+   __N128 = __builtin_ia32_pslldqi128 (__N128, 3 * 8);
+   break;
+ case 4:
+   __A128 = __builtin_ia32_pslldqi128 (__A128, 4 * 8);
+   __N128 = __builtin_ia32_pslldqi128 (__N128, 4 * 8);
+   break;
+ case 5:
+   __A128 = __builtin_ia32_pslldqi128 (__A128, 5 * 8);
+   __N128 = __builtin_ia32_pslldqi128 (__N128, 5 * 8);
+   break;
+ case 6:
+   __A128 = __builtin_ia32_pslldqi128 (__A128, 6 * 8);
+   __N128 = __builtin_ia32_pslldqi128 (__N128, 6 * 8);
+   break;
+ case 7:
+   __A128 = __builtin_ia32_pslldqi128 (__A128, 7 * 8);
+   __N128 = __builtin_ia32_pslldqi128 (__N128, 7 * 8);
+   break;
+ case 8:
+   __A128 = __builtin_ia32_pslldqi128 (__A128, 8 * 8);
+   __N128 = __builtin_ia32_pslldqi128 (__N128, 8 * 8);
+   break;
+ default:
+   break;
+ }
     }
-  }
-  __builtin_ia32_maskmovdqu((__v16qi)__A128, (__v16qi)__N128, __P);
+  __builtin_ia32_maskmovdqu ((__v16qi)__A128, (__v16qi)__N128, __P);
 #else
-  __builtin_ia32_maskmovq((__v8qi)__A, (__v8qi)__N, __P);
+  __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
 #endif
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_maskmovq(__m64 __A, __m64 __N, char *__P) {
-  _mm_maskmove_si64(__A, __N, __P);
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_maskmovq (__m64 __A, __m64 __N, char *__P)
+{
+  _mm_maskmove_si64 (__A, __N, __P);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_avg_pu8(__m64 __A, __m64 __B) {
-  return (__m64)__builtin_ia32_pavgb((__v8qi)__A, (__v8qi)__B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_pavgb(__m64 __A, __m64 __B) {
-  return _mm_avg_pu8(__A, __B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgb (__m64 __A, __m64 __B)
+{
+  return _mm_avg_pu8 (__A, __B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_avg_pu16(__m64 __A, __m64 __B) {
-  return (__m64)__builtin_ia32_pavgw((__v4hi)__A, (__v4hi)__B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_pavgw(__m64 __A, __m64 __B) {
-  return _mm_avg_pu16(__A, __B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgw (__m64 __A, __m64 __B)
+{
+  return _mm_avg_pu16 (__A, __B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_sad_pu8(__m64 __A, __m64 __B) {
-  return (__m64)__builtin_ia32_psadbw((__v8qi)__A, (__v8qi)__B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sad_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
 }
-extern __inline __m64
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _m_psadbw(__m64 __A, __m64 __B) {
-  return _mm_sad_pu8(__A, __B);
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psadbw (__m64 __A, __m64 __B)
+{
+  return _mm_sad_pu8 (__A, __B);
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_stream_pi(__m64 *__P, __m64 __A) {
-  __builtin_ia32_movntq((unsigned long long *)__P, (unsigned long long)__A);
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_pi (__m64 *__P, __m64 __A)
+{
+  __builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A);
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_stream_ps(float *__P, __m128 __A) {
-  __builtin_ia32_movntps(__P, (__v4sf)__A);
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_ps (float *__P, __m128 __A)
+{
+  __builtin_ia32_movntps (__P, (__v4sf)__A);
 }
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_sfence(void) {
-  __builtin_ia32_sfence();
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sfence (void)
+{
+  __builtin_ia32_sfence ();
 }
-#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)                      \
-  do {                                                                 \
-    __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \
-    __v4sf __t0 = __builtin_ia32_unpcklps(__r0, __r1);                 \
-    __v4sf __t1 = __builtin_ia32_unpcklps(__r2, __r3);                 \
-    __v4sf __t2 = __builtin_ia32_unpckhps(__r0, __r1);                 \
-    __v4sf __t3 = __builtin_ia32_unpckhps(__r2, __r3);                 \
-    (row0) = __builtin_ia32_movlhps(__t0, __t1);                       \
-    (row1) = __builtin_ia32_movhlps(__t1, __t0);                       \
-    (row2) = __builtin_ia32_movlhps(__t2, __t3);                       \
-    (row3) = __builtin_ia32_movhlps(__t3, __t2);                       \
-  } while (0)
+#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) do { __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1); __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3); __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1); __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3); (row0) = __builtin_ia32_movlhps (__t0, __t1); (row1) = __builtin_ia32_movhlps (__t1, __t0); (row2) = __builtin_ia32_movlhps (__t2, __t3); (row3) = __builtin_ia32_movhlps (__t3, __t2); } while (0)
 #include "third_party/intel/emmintrin.internal.h"
 #ifdef __DISABLE_SSE__
 #undef __DISABLE_SSE__
 #pragma GCC pop_options
 #endif
-extern __inline void
-    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-    _mm_pause(void) {
-  __builtin_ia32_pause();
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_pause (void)
+{
+  __builtin_ia32_pause ();
 }
 #endif
 #endif
diff --git a/third_party/less/main.c b/third_party/less/main.c
index 21ed227ca..529fcb457 100644
--- a/third_party/less/main.c
+++ b/third_party/less/main.c
@@ -1,27 +1,27 @@
-asm(".ident\t\"\\n\\n\
-Less\\n\
-Copyright (C) 1984-2023  Mark Nudelman\\n\
-\\n\
-Redistribution and use in source and binary forms, with or without\\n\
-modification, are permitted provided that the following conditions\\n\
-are met:\\n\
-1. Redistributions of source code must retain the above copyright\\n\
-   notice, this list of conditions and the following disclaimer.\\n\
-2. Redistributions in binary form must reproduce the above copyright\\n\
-   notice in the documentation and/or other materials provided with\\n\
-   the distribution.\\n\
-\\n\
-THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY\\n\
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\\n\
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\\n\
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE\\n\
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\\n\
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT\\n\
-OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR\\n\
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\\n\
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE\\n\
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN\\n\
-IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\"");
+__notice(less_notice, "\
+Less\n\
+Copyright (C) 1984-2023  Mark Nudelman\n\
+\n\
+Redistribution and use in source and binary forms, with or without\n\
+modification, are permitted provided that the following conditions\n\
+are met:\n\
+1. Redistributions of source code must retain the above copyright\n\
+   notice, this list of conditions and the following disclaimer.\n\
+2. Redistributions in binary form must reproduce the above copyright\n\
+   notice in the documentation and/or other materials provided with\n\
+   the distribution.\n\
+\n\
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY\n\
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n\
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n\
+PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE\n\
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n\
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT\n\
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR\n\
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n\
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE\n\
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN\n\
+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.");
 
 /*
  * Copyright (C) 1984-2023  Mark Nudelman
diff --git a/third_party/libcxx/BUILD.mk b/third_party/libcxx/BUILD.mk
index b2005f133..b3b29eeb7 100644
--- a/third_party/libcxx/BUILD.mk
+++ b/third_party/libcxx/BUILD.mk
@@ -223,6 +223,7 @@ $(THIRD_PARTY_LIBCXX_A_OBJS): private				\
 			-fdata-sections				\
 			-fexceptions				\
 			-frtti					\
+			-Wno-alloc-size-larger-than		\
 			-DLIBCXX_BUILDING_LIBCXXABI
 
 THIRD_PARTY_LIBCXX_LIBS = $(foreach x,$(THIRD_PARTY_LIBCXX_ARTIFACTS),$($(x)))
diff --git a/third_party/libcxx/__config b/third_party/libcxx/__config
index 0194906ef..d0d2c4b0e 100644
--- a/third_party/libcxx/__config
+++ b/third_party/libcxx/__config
@@ -78,7 +78,7 @@
 #if defined(_LIBCPP_ABI_UNSTABLE) || _LIBCPP_ABI_VERSION >= 2
 // Change short string representation so that string data starts at offset 0,
 // improving its alignment in some cases.
-#  define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
+// #  define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
 // Fix deque iterator type in order to support incomplete types.
 #  define _LIBCPP_ABI_INCOMPLETE_TYPES_IN_DEQUE
 // Fix undefined behavior in how std::list stores its linked nodes.
@@ -332,7 +332,7 @@
 #if (defined(__APPLE__) && !defined(__i386__) && !defined(__x86_64__) &&       \
      (!defined(__arm__) || __ARM_ARCH_7K__ >= 2)) ||                           \
     defined(_LIBCPP_ALTERNATE_STRING_LAYOUT)
-#define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
+// #define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
 #endif
 
 #if __has_feature(cxx_alignas)
diff --git a/third_party/libcxx/cassert b/third_party/libcxx/cassert
index a3daa06a9..582a75434 100644
--- a/third_party/libcxx/cassert
+++ b/third_party/libcxx/cassert
@@ -17,7 +17,7 @@ Macros:
 */
 
 #include "third_party/libcxx/__config"
-#include "libc/assert.h"
+#include "libc/isystem/assert.h"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/third_party/libcxx/cfenv b/third_party/libcxx/cfenv
index bba3dcd31..a66bf926a 100644
--- a/third_party/libcxx/cfenv
+++ b/third_party/libcxx/cfenv
@@ -53,7 +53,7 @@ int feupdateenv(const fenv_t* envp);
 */
 
 #include "third_party/libcxx/__config"
-#include "libc/runtime/fenv.h"
+#include "libc/isystem/fenv.h"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/third_party/libcxx/cfloat b/third_party/libcxx/cfloat
index 9b5e73c3a..1886a4f96 100644
--- a/third_party/libcxx/cfloat
+++ b/third_party/libcxx/cfloat
@@ -70,8 +70,7 @@ Macros:
 */
 
 #include "third_party/libcxx/__config"
-#include "libc/math.h"
-#include "libc/runtime/fenv.h"
+#include "libc/isystem/float.h"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/third_party/libcxx/cinttypes b/third_party/libcxx/cinttypes
index 61d1334dd..a57977fae 100644
--- a/third_party/libcxx/cinttypes
+++ b/third_party/libcxx/cinttypes
@@ -236,8 +236,7 @@ uintmax_t wcstoumax(const wchar_t* restrict nptr, wchar_t** restrict endptr, int
 
 #include "third_party/libcxx/__config"
 #include "third_party/libcxx/cstdint"
-#include "libc/inttypes.h"
-#include "libc/fmt/conv.h"
+#include "libc/isystem/inttypes.h"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/third_party/libcxx/ciso646 b/third_party/libcxx/ciso646
index 6749b74c0..1b442a032 100644
--- a/third_party/libcxx/ciso646
+++ b/third_party/libcxx/ciso646
@@ -21,4 +21,6 @@
 #pragma GCC system_header
 #endif
 
+#include "libc/isystem/iso646.h"
+
 #endif  // _LIBCPP_CISO646
diff --git a/third_party/libcxx/clocale b/third_party/libcxx/clocale
index bad5ab57f..126c3521a 100644
--- a/third_party/libcxx/clocale
+++ b/third_party/libcxx/clocale
@@ -35,8 +35,7 @@ lconv* localeconv();
 */
 
 #include "third_party/libcxx/__config"
-#include "libc/str/unicode.h"
-#include "libc/str/locale.h"
+#include "libc/isystem/locale.h"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/third_party/libcxx/csetjmp b/third_party/libcxx/csetjmp
index 9dc5f49bf..d9bfb5305 100644
--- a/third_party/libcxx/csetjmp
+++ b/third_party/libcxx/csetjmp
@@ -31,7 +31,7 @@ void longjmp(jmp_buf env, int val);
 */
 
 #include "third_party/libcxx/__config"
-#include "libc/runtime/runtime.h"
+#include "libc/isystem/setjmp.h"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/third_party/libcxx/csignal b/third_party/libcxx/csignal
index 8530f6248..d6acd9032 100644
--- a/third_party/libcxx/csignal
+++ b/third_party/libcxx/csignal
@@ -40,12 +40,7 @@ int raise(int sig);
 */
 
 #include "third_party/libcxx/__config"
-#include "libc/calls/calls.h"
-#include "libc/calls/struct/sigaction.h"
-#include "libc/calls/struct/siginfo.h"
-#include "libc/sysv/consts/sa.h"
-#include "libc/sysv/consts/sig.h"
-#include "libc/sysv/consts/sicode.h"
+#include "libc/isystem/signal.h"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/third_party/libcxx/cstdarg b/third_party/libcxx/cstdarg
index d3c9a734a..9564fd3a8 100644
--- a/third_party/libcxx/cstdarg
+++ b/third_party/libcxx/cstdarg
@@ -11,6 +11,7 @@
 #define _LIBCPP_CSTDARG
 
 #include "third_party/libcxx/__config"
+#include "libc/isystem/stdarg.h"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/third_party/libcxx/cstdbool b/third_party/libcxx/cstdbool
index a32e40195..0ea403fd7 100644
--- a/third_party/libcxx/cstdbool
+++ b/third_party/libcxx/cstdbool
@@ -20,6 +20,7 @@ Macros:
 */
 
 #include "third_party/libcxx/__config"
+#include "libc/isystem/stdbool.h"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/third_party/libcxx/cstddef b/third_party/libcxx/cstddef
index a20079083..f3672dba6 100644
--- a/third_party/libcxx/cstddef
+++ b/third_party/libcxx/cstddef
@@ -35,6 +35,7 @@ Types:
 
 #include "third_party/libcxx/__config"
 #include "third_party/libcxx/version"
+#include "libc/isystem/stddef.h"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/third_party/libcxx/cstdint b/third_party/libcxx/cstdint
index 8ebad1acb..f1dffe83f 100644
--- a/third_party/libcxx/cstdint
+++ b/third_party/libcxx/cstdint
@@ -10,11 +10,7 @@
 #ifndef _LIBCPP_CSTDINT
 #define _LIBCPP_CSTDINT
 
-#include "libc/inttypes.h"
-#include "libc/fmt/conv.h"
-#include "libc/limits.h"
-#include "libc/literal.h"
-#include "libc/calls/weirdtypes.h"
+#include "libc/isystem/stdint.h"
 #include "third_party/libcxx/__config"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/third_party/libcxx/cstdlib b/third_party/libcxx/cstdlib
index e2c88b522..818ba2ccf 100644
--- a/third_party/libcxx/cstdlib
+++ b/third_party/libcxx/cstdlib
@@ -13,6 +13,7 @@
 #include "third_party/libcxx/__config"
 #include "libc/str/str.h"
 #include "third_party/libcxx/stdlib.h"
+#include "libc/isystem/stdlib.h"
 
 /*
     cstdlib synopsis
diff --git a/third_party/libcxx/ctime b/third_party/libcxx/ctime
index 4879de5bb..37f121b7e 100644
--- a/third_party/libcxx/ctime
+++ b/third_party/libcxx/ctime
@@ -11,15 +11,7 @@
 #define _LIBCPP_CTIME
 
 #include "third_party/libcxx/__config"
-#include "libc/calls/struct/timespec.h"
-#include "libc/calls/struct/timeval.h"
-#include "libc/sysv/consts/clock.h"
-#include "libc/sysv/consts/sched.h"
-#include "libc/sysv/consts/timer.h"
-#include "libc/calls/weirdtypes.h"
-#include "libc/time/struct/tm.h"
-#include "libc/calls/calls.h"
-#include "libc/time/time.h"
+#include "libc/isystem/time.h"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/third_party/libcxx/ctype.h b/third_party/libcxx/ctype.h
index e6a95b2ca..b89d403ec 100644
--- a/third_party/libcxx/ctype.h
+++ b/third_party/libcxx/ctype.h
@@ -35,7 +35,7 @@ int toupper(int c);
 #pragma GCC system_header
 #endif
 
-#include "libc/str/str.h"
+#include "libc/isystem/ctype.h"
 
 #ifdef __cplusplus
 
diff --git a/third_party/libcxx/errno.h b/third_party/libcxx/errno.h
index c1210f888..1dfe334b6 100644
--- a/third_party/libcxx/errno.h
+++ b/third_party/libcxx/errno.h
@@ -28,7 +28,7 @@ Macros:
 #pragma GCC system_header
 #endif
 
-#include "libc/errno.h"
+#include "libc/isystem/errno.h"
 
 #ifdef __cplusplus
 
diff --git a/third_party/libcxx/limits.h b/third_party/libcxx/limits.h
index b78191a71..379e34fcf 100644
--- a/third_party/libcxx/limits.h
+++ b/third_party/libcxx/limits.h
@@ -43,22 +43,6 @@ Macros:
 #pragma GCC system_header
 #endif
 
-#ifndef __GNUC__
-#include "libc/limits.h"
-#else
-// GCC header limits.h recursively includes itself through another header called
-// syslimits.h for some reason. This setup breaks down if we directly
-// #include_next GCC's limits.h (reasons not entirely clear to me). Therefore,
-// we manually re-create the necessary include sequence below:
-
-// Get the system limits.h defines (force recurse into the next level)
-#define _GCC_LIMITS_H_
-#define _GCC_NEXT_LIMITS_H
-#include "libc/limits.h"
-
-// Get the ISO C defines
-#undef _GCC_LIMITS_H_
-#include "libc/limits.h"
-#endif // __GNUC__
+#include "libc/isystem/limits.h"
 
 #endif // _LIBCPP_LIMITS_H
diff --git a/third_party/libcxx/locale.h b/third_party/libcxx/locale.h
index bcf7c1a3f..dd61d9df2 100644
--- a/third_party/libcxx/locale.h
+++ b/third_party/libcxx/locale.h
@@ -39,7 +39,6 @@ Functions:
 #pragma GCC system_header
 #endif
 
-#include "libc/str/locale.h"
-#include "libc/str/unicode.h"
+#include "libc/isystem/locale.h"
 
 #endif // _LIBCPP_LOCALE_H
diff --git a/third_party/libcxx/math.h b/third_party/libcxx/math.h
index 6bb8d562c..9171afc7c 100644
--- a/third_party/libcxx/math.h
+++ b/third_party/libcxx/math.h
@@ -19,7 +19,7 @@
 #define _LIBCPP_STDLIB_INCLUDE_NEXT
 #include "third_party/libcxx/stdlib.h"
 
-#include "libc/math.h"
+#include "libc/isystem/math.h"
 
 #ifdef __cplusplus
 
diff --git a/third_party/libcxx/stdio.h b/third_party/libcxx/stdio.h
index c16c2d66e..79c965488 100644
--- a/third_party/libcxx/stdio.h
+++ b/third_party/libcxx/stdio.h
@@ -102,9 +102,7 @@ void perror(const char* s);
 #pragma GCC system_header
 #endif
 
-#include "libc/calls/calls.h"
-#include "libc/temp.h"
-#include "libc/stdio/stdio.h"
+#include "libc/isystem/stdio.h"
 
 #ifdef __cplusplus
 
diff --git a/third_party/libcxx/stdlib.h b/third_party/libcxx/stdlib.h
index a561dc78f..aea595b06 100644
--- a/third_party/libcxx/stdlib.h
+++ b/third_party/libcxx/stdlib.h
@@ -9,13 +9,7 @@
 
 #if defined(__need_malloc_and_calloc) || defined(_LIBCPP_STDLIB_INCLUDE_NEXT)
 
-#include "libc/stdio/rand.h"
-#include "libc/mem/mem.h"
-#include "libc/runtime/runtime.h"
-#include "libc/runtime/runtime.h"
-#include "libc/mem/alg.h"
-#include "libc/stdio/stdio.h"
-#include "libc/fmt/conv.h"
+#include "libc/isystem/stdlib.h"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/third_party/libcxx/string b/third_party/libcxx/string
index ea0b695ef..c8c822545 100644
--- a/third_party/libcxx/string
+++ b/third_party/libcxx/string
@@ -702,34 +702,41 @@ private:
 
 #ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
 
-    struct __long
-    {
-        pointer   __data_;
+    struct __long {
+        pointer __data_;
         size_type __size_;
-        size_type __cap_;
+        size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
+        size_type __is_long_ : 1;
     };
 
-#ifdef _LIBCPP_BIG_ENDIAN
-    static const size_type __short_mask = 0x01;
-    static const size_type __long_mask  = 0x1ul;
-#else  // _LIBCPP_BIG_ENDIAN
-    static const size_type __short_mask = 0x80;
-    static const size_type __long_mask  = ~(size_type(~0) >> 1);
-#endif  // _LIBCPP_BIG_ENDIAN
+    enum { __min_cap = (sizeof(__long) - 1) / sizeof(value_type) > 2 ? (sizeof(__long) - 1) / sizeof(value_type) : 2 };
 
-    enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
-                      (sizeof(__long) - 1)/sizeof(value_type) : 2};
-
-    struct __short
-    {
+    struct __short {
         value_type __data_[__min_cap];
-        struct
-            : __padding<value_type>
-        {
-            unsigned char __size_;
-        };
+        unsigned char __padding_[sizeof(value_type) - 1];
+        unsigned char __size_    : 7;
+        unsigned char __is_long_ : 1;
     };
 
+    // The __endian_factor is required because the field we use to store the size
+    // has one fewer bit than it would if it were not a bitfield.
+    //
+    // If the LSB is used to store the short-flag in the short string representation,
+    // we have to multiply the size by two when it is stored and divide it by two when
+    // it is loaded to make sure that we always store an even number. In the long string
+    // representation, we can ignore this because we can assume that we always allocate
+    // an even amount of value_types.
+    //
+    // If the MSB is used for the short-flag, the max_size() is numeric_limits<size_type>::max() / 2.
+    // This does not impact the short string representation, since we never need the MSB
+    // for representing the size of a short string anyway.
+
+#  ifdef _LIBCPP_BIG_ENDIAN
+    static const size_type __endian_factor = 2;
+#  else
+    static const size_type __endian_factor = 1;
+#  endif
+
 #else
 
     struct __long
diff --git a/third_party/libcxx/string.h b/third_party/libcxx/string.h
index 5490cbc8d..abd6be931 100644
--- a/third_party/libcxx/string.h
+++ b/third_party/libcxx/string.h
@@ -16,7 +16,7 @@
 #pragma GCC system_header
 #endif
 
-#include "libc/str/str.h"
+#include "libc/isystem/string.h"
 
 /*
     string.h synopsis
diff --git a/third_party/libcxx/vector b/third_party/libcxx/vector
index 014c8a51d..91735b728 100644
--- a/third_party/libcxx/vector
+++ b/third_party/libcxx/vector
@@ -1456,6 +1456,8 @@ vector<_Tp, _Allocator>::assign(_ForwardIterator __first, _ForwardIterator __las
     else
     {
         __vdeallocate();
+        // [jart] make compiler not complain about maximum object size
+        __asm__("" : "+r"(__new_size));
         __vallocate(__recommend(__new_size));
         __construct_at_end(__first, __last, __new_size);
     }
diff --git a/third_party/libcxx/wchar.h b/third_party/libcxx/wchar.h
index c5e80d9bd..17996b194 100644
--- a/third_party/libcxx/wchar.h
+++ b/third_party/libcxx/wchar.h
@@ -9,8 +9,7 @@
 
 #ifndef _LIBCPP_WCHAR_H
 #define _LIBCPP_WCHAR_H
-#include "libc/str/str.h"
-#include "libc/time/time.h"
+#include "libc/isystem/wchar.h"
 #include "third_party/libcxx/__config"
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/third_party/libcxx/wctype.h b/third_party/libcxx/wctype.h
index 1ea8f5652..b256d58dd 100644
--- a/third_party/libcxx/wctype.h
+++ b/third_party/libcxx/wctype.h
@@ -50,8 +50,7 @@ wctrans_t wctrans(const char* property);
 #pragma GCC system_header
 #endif
 
-#include "libc/str/str.h"
-#include "libc/time/time.h"
+#include "libc/isystem/wctype.h"
 
 #ifdef __cplusplus
 
diff --git a/third_party/libcxxabi/test/BUILD.mk b/third_party/libcxxabi/test/BUILD.mk
index 8bfd39d40..6bc0d5861 100644
--- a/third_party/libcxxabi/test/BUILD.mk
+++ b/third_party/libcxxabi/test/BUILD.mk
@@ -32,26 +32,27 @@ THIRD_PARTY_LIBCXXABI_TEST_SRCS =					\
 	third_party/libcxxabi/test/cxa_thread_atexit_test.pass.cc	\
 	third_party/libcxxabi/test/cxa_vec_new_overflow_PR41395.pass.cc	\
 	third_party/libcxxabi/test/dynamic_cast.pass.cc			\
+	third_party/libcxxabi/test/dynamic_cast14.pass.cc		\
 	third_party/libcxxabi/test/dynamic_cast3.pass.cc		\
 	third_party/libcxxabi/test/dynamic_cast5.pass.cc		\
-	third_party/libcxxabi/test/dynamic_cast14.pass.cc		\
 	third_party/libcxxabi/test/dynamic_cast_stress.pass.cc		\
-	third_party/libcxxabi/test/exception_object_alignment.pass.cc	\
 	third_party/libcxxabi/test/exception_object_alignment.2.pass.cc	\
+	third_party/libcxxabi/test/exception_object_alignment.pass.cc	\
 	third_party/libcxxabi/test/guard_test_basic.pass.cc		\
-	third_party/libcxxabi/test/incomplete_type.sh.cc		\
+	third_party/libcxxabi/test/incomplete_type_test.pass.cc		\
+	third_party/libcxxabi/test/incomplete_type_test.lib.cc		\
 	third_party/libcxxabi/test/inherited_exception.pass.cc		\
 	third_party/libcxxabi/test/test_aux_runtime.pass.cc		\
 	third_party/libcxxabi/test/test_aux_runtime_op_array_new.pass.cc\
 	third_party/libcxxabi/test/test_demangle.pass.cc		\
-	third_party/libcxxabi/test/test_exception_address_alignment.pass.cc	\
+	third_party/libcxxabi/test/test_exception_address_alignment.pass.cc \
 	third_party/libcxxabi/test/test_exception_storage.pass.cc	\
 	third_party/libcxxabi/test/test_fallback_malloc.pass.cc		\
 	third_party/libcxxabi/test/test_guard.pass.cc			\
 	third_party/libcxxabi/test/test_vector1.pass.cc			\
 	third_party/libcxxabi/test/test_vector2.pass.cc			\
-	third_party/libcxxabi/test/thread_local_destruction_order.pass.cc	\
 	third_party/libcxxabi/test/test_vector3.pass.cc			\
+	third_party/libcxxabi/test/thread_local_destruction_order.pass.cc \
 	third_party/libcxxabi/test/uncaught_exception.pass.cc		\
 	third_party/libcxxabi/test/uncaught_exceptions.pass.cc		\
 	third_party/libcxxabi/test/unittest_demangle.pass.cc		\
@@ -69,33 +70,21 @@ THIRD_PARTY_LIBCXXABI_TEST_SRCS_TOOSLOW_COSMO =				\
 THIRD_PARTY_LIBCXXABI_TEST_SRCS_FAILING_GCC =				\
 	third_party/libcxxabi/test/catch_array_01.pass.cc		\
 	third_party/libcxxabi/test/catch_function_01.pass.cc		\
-	third_party/libcxxabi/test/catch_member_function_pointer_01.pass.cc	\
+	third_party/libcxxabi/test/catch_member_function_pointer_01.pass.cc \
 	third_party/libcxxabi/test/catch_member_function_pointer_02.pass.cc
 
+THIRD_PARTY_LIBCXXABI_TEST_OBJS =					\
+	$(THIRD_PARTY_LIBCXXABI_TEST_SRCS:%.cc=o/$(MODE)/%.o)
+
+THIRD_PARTY_LIBCXXABI_TEST_COMS_SRCS =					\
+	$(filter-out %.pass.cc,$(THIRD_PARTY_LIBCXXABI_TEST_SRCS))
+
 THIRD_PARTY_LIBCXXABI_TEST_COMS =					\
-	$(THIRD_PARTY_LIBCXXABI_TEST_SRCS:%.cc=o/$(MODE)/%.com)
+	$(THIRD_PARTY_LIBCXXABI_TEST_COMS_SRCS:%.cc=o/$(MODE)/%.com)
 
 THIRD_PARTY_LIBCXXABI_TEST_TESTS =					\
 	$(THIRD_PARTY_LIBCXXABI_TEST_COMS:%=%.ok)
 
-THIRD_PARTY_LIBCXXABI_TEST_OBJS =					\
-	$(THIRD_PARTY_LIBCXXABI_TEST_SRCS:%.cc=o/$(MODE)/%.o)		\
-	o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.one.o	\
-	o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.two.o
-
-THIRD_PARTY_LIBCXXABI_TEST_OBJS_WNO_EXCEPTIONS =			\
-	o/$(MODE)/third_party/libcxxabi/test/catch_class_03.pass.o	\
-	o/$(MODE)/third_party/libcxxabi/test/catch_class_04.pass.o	\
-	o/$(MODE)/third_party/libcxxabi/test/catch_ptr.pass.o		\
-	o/$(MODE)/third_party/libcxxabi/test/catch_ptr_02.pass.o	\
-	o/$(MODE)/third_party/libcxxabi/test/inherited_exception.pass.o
-
-THIRD_PARTY_LIBCXXABI_TEST_OBJS_CPP14 =					\
-	o/$(MODE)/third_party/libcxxabi/test/unwind_02.pass.o		\
-	o/$(MODE)/third_party/libcxxabi/test/unwind_03.pass.o		\
-	o/$(MODE)/third_party/libcxxabi/test/unwind_04.pass.o		\
-	o/$(MODE)/third_party/libcxxabi/test/unwind_05.pass.o
-
 THIRD_PARTY_LIBCXXABI_TEST_BINS =					\
 	$(THIRD_PARTY_LIBCXXABI_TEST_COMS)				\
 	$(THIRD_PARTY_LIBCXXABI_TEST_COMS:%=%.dbg)
@@ -105,19 +94,29 @@ THIRD_PARTY_LIBCXXABI_TEST_CHECKS =					\
 	$(THIRD_PARTY_LIBCXXABI_TEST_HDRS:%=o/$(MODE)/%.ok)
 
 THIRD_PARTY_LIBCXXABI_TEST_DIRECTDEPS =					\
-	LIBC_NEXGEN32E							\
+	LIBC_CALLS							\
+	LIBC_INTRIN							\
 	LIBC_LOG							\
+	LIBC_MEM							\
+	LIBC_NEXGEN32E							\
+	LIBC_PROC							\
+	LIBC_RUNTIME							\
+	LIBC_STDIO							\
+	LIBC_THREAD							\
 	THIRD_PARTY_LIBCXX						\
-	THIRD_PARTY_LIBCXXABI
+	THIRD_PARTY_LIBCXXABI						\
+	THIRD_PARTY_LIBUNWIND
 
 THIRD_PARTY_LIBCXXABI_TEST_DEPS :=					\
 	$(call uniq,$(foreach x,$(THIRD_PARTY_LIBCXXABI_TEST_DIRECTDEPS),$($(x))))
 
 $(THIRD_PARTY_LIBCXXABI_TEST_A):					\
-		$(THIRD_PARTY_LIBCXXABI_TEST_A).pkg
+		$(THIRD_PARTY_LIBCXXABI_TEST_A).pkg			\
+		$(THIRD_PARTY_LIBCXXABI_TEST_OBJS)
 
 $(THIRD_PARTY_LIBCXXABI_TEST_A).pkg:					\
-		$(foreach x,$(THIRD_PARTY_LIBCXXABI_TEST_DIRECTDEPS),$($(x)_A).pkg)
+		$(foreach x,$(THIRD_PARTY_LIBCXXABI_TEST_DIRECTDEPS),$($(x)_A).pkg) \
+		$(THIRD_PARTY_LIBCXXABI_TEST_OBJS)
 
 o/$(MODE)/third_party/libcxxabi/test/%.com.dbg:				\
 		$(THIRD_PARTY_LIBCXXABI_TEST_DEPS)			\
@@ -137,38 +136,27 @@ $(THIRD_PARTY_LIBCXXABI_TEST_OBJS): private				\
 			-D_LIBCPP_BUILDING_LIBRARY			\
 			-D_LIBCPP_CONSTINIT=__constinit
 
-$(THIRD_PARTY_LIBCXXABI_TEST_OBJS_WNO_EXCEPTIONS): private		\
-		CXXFLAGS +=						\
-			-Wno-exceptions
-
-$(THIRD_PARTY_LIBCXXABI_TEST_OBJS_CPP14): private			\
-		CXXFLAGS +=						\
-			-std=gnu++14
-
-o/$(MODE)/third_party/libcxxabi/test/guard_test_basic.pass.o: private	\
-		CXXFLAGS +=						\
-			-Wno-invalid-memory-model
-
-o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.one.o: private	\
-		CXXFLAGS +=						\
-			-Wno-unreachable-code
-o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.two.o: private	\
-		CXXFLAGS +=						\
-			-Wno-unreachable-code				\
-			-DTU_ONE
-o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.com.dbg:	\
-		$(THIRD_PARTY_LIBCXXABI_TEST_DEPS)			\
-		$(THIRD_PARTY_LIBCXXABI_TEST_A)				\
-		o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.one.o	\
-		o/$(MODE)/third_party/libcxxabi/test/incomplete_type.sh.two.o	\
-		$(THIRD_PARTY_LIBCXXABI_TEST_A).pkg			\
-		$(CRT)							\
-		$(APE_NO_MODIFY_SELF)
-	@$(APELINK)
-
 $(THIRD_PARTY_LIBCXXABI_TEST_OBJS): private CONFIG_CPPFLAGS += -UNDEBUG
 o/$(MODE)/third_party/libcxxabi/test/catch_multi_level_pointer.pass.o: private COPTS += -O0
 o/$(MODE)/third_party/libcxxabi/test/catch_multi_level_pointer.pass.o: private QUOTA += -C30 -M4000m
+o/$(MODE)/third_party/libcxxabi/test/guard_test_basic.pass.o: private CXXFLAGS += -Wno-invalid-memory-model
+o/$(MODE)/third_party/libcxxabi/test/incomplete_type_test.pass.o: private CXXFLAGS += -Wno-unreachable-code
+o/$(MODE)/third_party/libcxxabi/test/incomplete_type_test.lib.o: private CXXFLAGS += -Wno-unreachable-code -DTU_ONE
+
+o/$(MODE)/third_party/libcxxabi/test/catch_class_03.pass.o		\
+o/$(MODE)/third_party/libcxxabi/test/catch_class_04.pass.o		\
+o/$(MODE)/third_party/libcxxabi/test/catch_ptr.pass.o			\
+o/$(MODE)/third_party/libcxxabi/test/catch_ptr_02.pass.o		\
+o/$(MODE)/third_party/libcxxabi/test/inherited_exception.pass.o: private \
+		CXXFLAGS +=						\
+			-Wno-exceptions
+
+o/$(MODE)/third_party/libcxxabi/test/unwind_02.pass.o			\
+o/$(MODE)/third_party/libcxxabi/test/unwind_03.pass.o			\
+o/$(MODE)/third_party/libcxxabi/test/unwind_04.pass.o			\
+o/$(MODE)/third_party/libcxxabi/test/unwind_05.pass.o: private		\
+		CXXFLAGS +=						\
+			-std=gnu++14
 
 .PHONY: o/$(MODE)/third_party/libcxxabi/test
 o/$(MODE)/third_party/libcxxabi/test:					\
diff --git a/third_party/libcxxabi/test/incomplete_type.sh.one.cc b/third_party/libcxxabi/test/incomplete_type.sh.one.cc
deleted file mode 120000
index 100e3f363..000000000
--- a/third_party/libcxxabi/test/incomplete_type.sh.one.cc
+++ /dev/null
@@ -1 +0,0 @@
-incomplete_type.sh.cc
\ No newline at end of file
diff --git a/third_party/libcxxabi/test/incomplete_type.sh.two.cc b/third_party/libcxxabi/test/incomplete_type.sh.two.cc
deleted file mode 120000
index 100e3f363..000000000
--- a/third_party/libcxxabi/test/incomplete_type.sh.two.cc
+++ /dev/null
@@ -1 +0,0 @@
-incomplete_type.sh.cc
\ No newline at end of file
diff --git a/third_party/libcxxabi/test/incomplete_type_test.lib.cc b/third_party/libcxxabi/test/incomplete_type_test.lib.cc
new file mode 120000
index 000000000..a10bb894c
--- /dev/null
+++ b/third_party/libcxxabi/test/incomplete_type_test.lib.cc
@@ -0,0 +1 @@
+incomplete_type_test.pass.cc
\ No newline at end of file
diff --git a/third_party/libcxxabi/test/incomplete_type.sh.cc b/third_party/libcxxabi/test/incomplete_type_test.pass.cc
similarity index 100%
rename from third_party/libcxxabi/test/incomplete_type.sh.cc
rename to third_party/libcxxabi/test/incomplete_type_test.pass.cc
diff --git a/third_party/linenoise/linenoise.c b/third_party/linenoise/linenoise.c
index c6386b2ba..cf673ba1f 100644
--- a/third_party/linenoise/linenoise.c
+++ b/third_party/linenoise/linenoise.c
@@ -172,11 +172,11 @@
 #include "net/http/escape.h"
 #include "tool/build/lib/case.h"
 
-asm(".ident\t\"\\n\\n\
-Cosmopolitan Linenoise (BSD-2)\\n\
-Copyright 2018-2020 Justine Tunney <jtunney@gmail.com>\\n\
-Copyright 2010-2016 Salvatore Sanfilippo <antirez@gmail.com>\\n\
-Copyright 2010-2013 Pieter Noordhuis <pcnoordhuis@gmail.com>\"");
+__notice(linenoise_notice, "\
+Cosmopolitan Linenoise (BSD-2)\n\
+Copyright 2018-2020 Justine Tunney <jtunney@gmail.com>\n\
+Copyright 2010-2016 Salvatore Sanfilippo <antirez@gmail.com>\n\
+Copyright 2010-2013 Pieter Noordhuis <pcnoordhuis@gmail.com>");
 
 #define LINENOISE_POLL_MS 50
 
@@ -2644,10 +2644,7 @@ static void linenoiseAtExit(void) {
   linenoiseRingFree();
 }
 
+__attribute__((__constructor__(99)))
 static textstartup void linenoiseInit() {
   atexit(linenoiseAtExit);
 }
-
-const void *const linenoiseCtor[] initarray = {
-    linenoiseInit,
-};
diff --git a/third_party/lua/BUILD.mk b/third_party/lua/BUILD.mk
index 60a73e960..bd62923b2 100644
--- a/third_party/lua/BUILD.mk
+++ b/third_party/lua/BUILD.mk
@@ -85,6 +85,7 @@ THIRD_PARTY_LUA_A_SRCS =						\
 	third_party/lua/llock.c						\
 	third_party/lua/lmathlib.c					\
 	third_party/lua/lmem.c						\
+	third_party/lua/lnotice.c					\
 	third_party/lua/loadlib.c					\
 	third_party/lua/lobject.c					\
 	third_party/lua/lopcodes.c					\
diff --git a/third_party/lua/lapi.c b/third_party/lua/lapi.c
index 15230810a..ae8988f2c 100644
--- a/third_party/lua/lapi.c
+++ b/third_party/lua/lapi.c
@@ -42,11 +42,7 @@
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lundump.h"
 #include "third_party/lua/lvm.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 const char lua_ident[] =
diff --git a/third_party/lua/lauxlib.c b/third_party/lua/lauxlib.c
index a1555ba18..1fa74d1f2 100644
--- a/third_party/lua/lauxlib.c
+++ b/third_party/lua/lauxlib.c
@@ -34,11 +34,7 @@
 #include "third_party/lua/lauxlib.h"
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lua.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /**
diff --git a/third_party/lua/lbaselib.c b/third_party/lua/lbaselib.c
index f5ab58b16..5e936c406 100644
--- a/third_party/lua/lbaselib.c
+++ b/third_party/lua/lbaselib.c
@@ -32,11 +32,7 @@
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 static int luaB_print (lua_State *L) {
diff --git a/third_party/lua/lcode.c b/third_party/lua/lcode.c
index 6468d2243..c6c9c0a2b 100644
--- a/third_party/lua/lcode.c
+++ b/third_party/lua/lcode.c
@@ -42,11 +42,7 @@
 #include "third_party/lua/ltable.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lvm.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /* Maximum number of registers in a Lua function (must fit in 8 bits) */
diff --git a/third_party/lua/lcorolib.c b/third_party/lua/lcorolib.c
index e15b3e749..be6f75f5d 100644
--- a/third_party/lua/lcorolib.c
+++ b/third_party/lua/lcorolib.c
@@ -31,11 +31,7 @@
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 static lua_State *getco (lua_State *L) {
diff --git a/third_party/lua/ldblib.c b/third_party/lua/ldblib.c
index 8918ffcbe..74f6ed32b 100644
--- a/third_party/lua/ldblib.c
+++ b/third_party/lua/ldblib.c
@@ -32,11 +32,7 @@
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /*
diff --git a/third_party/lua/ldebug.c b/third_party/lua/ldebug.c
index 16197115f..8e103095f 100644
--- a/third_party/lua/ldebug.c
+++ b/third_party/lua/ldebug.c
@@ -42,11 +42,7 @@
 #include "third_party/lua/ltm.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lvm.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 #define noLuaClosure(f)		((f) == NULL || (f)->c.tt == LUA_VCCL)
diff --git a/third_party/lua/ldo.c b/third_party/lua/ldo.c
index 1978b1ff4..abfa70719 100644
--- a/third_party/lua/ldo.c
+++ b/third_party/lua/ldo.c
@@ -51,11 +51,7 @@
 #include "third_party/lua/lundump.h"
 #include "third_party/lua/lvm.h"
 #include "third_party/lua/lzio.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 #define errorstatus(s)	((s) > LUA_YIELD)
diff --git a/third_party/lua/ldump.c b/third_party/lua/ldump.c
index 8e4d6b7c0..a1e9666b1 100644
--- a/third_party/lua/ldump.c
+++ b/third_party/lua/ldump.c
@@ -32,11 +32,7 @@
 #include "third_party/lua/lstate.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lundump.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 typedef struct {
diff --git a/third_party/lua/lfunc.c b/third_party/lua/lfunc.c
index b3e69eef3..7ff7cafd0 100644
--- a/third_party/lua/lfunc.c
+++ b/third_party/lua/lfunc.c
@@ -37,11 +37,7 @@
 #include "third_party/lua/lstate.h"
 #include "third_party/lua/ltm.h"
 #include "third_party/lua/lua.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 CClosure *luaF_newCclosure (lua_State *L, int nupvals) {
diff --git a/third_party/lua/lgc.c b/third_party/lua/lgc.c
index 57027bb3b..a52142624 100644
--- a/third_party/lua/lgc.c
+++ b/third_party/lua/lgc.c
@@ -40,11 +40,7 @@
 #include "third_party/lua/ltable.h"
 #include "third_party/lua/ltm.h"
 #include "third_party/lua/lua.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /*
diff --git a/third_party/lua/linit.c b/third_party/lua/linit.c
index e625cc728..2ebb7fbd0 100644
--- a/third_party/lua/linit.c
+++ b/third_party/lua/linit.c
@@ -46,11 +46,7 @@
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /*
diff --git a/third_party/lua/liolib.c b/third_party/lua/liolib.c
index 30cc43815..c8bc69076 100644
--- a/third_party/lua/liolib.c
+++ b/third_party/lua/liolib.c
@@ -37,11 +37,7 @@
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /*
diff --git a/third_party/lua/llex.c b/third_party/lua/llex.c
index 3e1446b7f..688d2a3b0 100644
--- a/third_party/lua/llex.c
+++ b/third_party/lua/llex.c
@@ -40,11 +40,7 @@
 #include "third_party/lua/ltable.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lzio.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 #define next(ls)	(ls->current = zgetc(ls->z))
diff --git a/third_party/lua/lmathlib.c b/third_party/lua/lmathlib.c
index b0968ed16..8b1a291f4 100644
--- a/third_party/lua/lmathlib.c
+++ b/third_party/lua/lmathlib.c
@@ -34,11 +34,7 @@
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 #undef PI
diff --git a/third_party/lua/lmem.c b/third_party/lua/lmem.c
index 7d0ee1648..780e01bf8 100644
--- a/third_party/lua/lmem.c
+++ b/third_party/lua/lmem.c
@@ -36,11 +36,7 @@
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lstate.h"
 #include "third_party/lua/lua.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 #if defined(EMERGENCYGCTESTS)
diff --git a/third_party/lua/lnotice.c b/third_party/lua/lnotice.c
new file mode 100644
index 000000000..9eb4e4e17
--- /dev/null
+++ b/third_party/lua/lnotice.c
@@ -0,0 +1,3 @@
+__notice(lua_notice, "\
+Lua 5.4.3 (MIT License)\n\
+Copyright 1994–2021 Lua.org, PUC-Rio.");
diff --git a/third_party/lua/loadlib.c b/third_party/lua/loadlib.c
index 0feae4d7f..43bf0168d 100644
--- a/third_party/lua/loadlib.c
+++ b/third_party/lua/loadlib.c
@@ -34,11 +34,7 @@
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 /*
 ** This module contains an implementation of loadlib for Unix systems
diff --git a/third_party/lua/lobject.c b/third_party/lua/lobject.c
index 24e03a9d6..d855ac166 100644
--- a/third_party/lua/lobject.c
+++ b/third_party/lua/lobject.c
@@ -38,11 +38,7 @@
 #include "third_party/lua/lstring.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lvm.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 static lua_Integer intarith (lua_State *L, int op, lua_Integer v1,
diff --git a/third_party/lua/lopcodes.c b/third_party/lua/lopcodes.c
index 75558af3a..90d726553 100644
--- a/third_party/lua/lopcodes.c
+++ b/third_party/lua/lopcodes.c
@@ -29,11 +29,7 @@
 #define LUA_CORE
 #include "third_party/lua/lopcodes.h"
 #include "third_party/lua/lprefix.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /* ORDER OP */
diff --git a/third_party/lua/loslib.c b/third_party/lua/loslib.c
index 647e473b9..4f979c59f 100644
--- a/third_party/lua/loslib.c
+++ b/third_party/lua/loslib.c
@@ -41,11 +41,7 @@
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /*
diff --git a/third_party/lua/lparser.c b/third_party/lua/lparser.c
index 96f89293e..79e598fc1 100644
--- a/third_party/lua/lparser.c
+++ b/third_party/lua/lparser.c
@@ -42,11 +42,7 @@
 #include "third_party/lua/lstring.h"
 #include "third_party/lua/ltable.h"
 #include "third_party/lua/lua.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /* maximum number of local variables per function (must be smaller
diff --git a/third_party/lua/lrepl.c b/third_party/lua/lrepl.c
index 9a4236ac5..ec65fd069 100644
--- a/third_party/lua/lrepl.c
+++ b/third_party/lua/lrepl.c
@@ -48,11 +48,7 @@
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 static const char *const kKeywordHints[] = {
diff --git a/third_party/lua/lstate.c b/third_party/lua/lstate.c
index f2479b38c..e44a5cf31 100644
--- a/third_party/lua/lstate.c
+++ b/third_party/lua/lstate.c
@@ -42,11 +42,7 @@
 #include "third_party/lua/ltable.h"
 #include "third_party/lua/ltm.h"
 #include "third_party/lua/lua.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /*
diff --git a/third_party/lua/lstring.c b/third_party/lua/lstring.c
index 9aa128242..68efe07a0 100644
--- a/third_party/lua/lstring.c
+++ b/third_party/lua/lstring.c
@@ -36,11 +36,7 @@
 #include "third_party/lua/lstate.h"
 #include "third_party/lua/lstring.h"
 #include "third_party/lua/lua.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /*
diff --git a/third_party/lua/lstrlib.c b/third_party/lua/lstrlib.c
index 45aecbdbc..60db68c4a 100644
--- a/third_party/lua/lstrlib.c
+++ b/third_party/lua/lstrlib.c
@@ -34,11 +34,7 @@
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /*
diff --git a/third_party/lua/ltable.c b/third_party/lua/ltable.c
index a00128613..63f2867f8 100644
--- a/third_party/lua/ltable.c
+++ b/third_party/lua/ltable.c
@@ -38,11 +38,7 @@
 #include "third_party/lua/ltable.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lvm.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /*
diff --git a/third_party/lua/ltablib.c b/third_party/lua/ltablib.c
index b7a524445..fe8a1fe62 100644
--- a/third_party/lua/ltablib.c
+++ b/third_party/lua/ltablib.c
@@ -34,11 +34,7 @@
 #include "third_party/lua/lprefix.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /*
diff --git a/third_party/lua/ltests.c b/third_party/lua/ltests.c
index b96967efb..10f18d0dc 100644
--- a/third_party/lua/ltests.c
+++ b/third_party/lua/ltests.c
@@ -43,11 +43,7 @@
 #include "third_party/lua/ltable.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /*
diff --git a/third_party/lua/ltm.c b/third_party/lua/ltm.c
index 59b9b8dd4..b7a0d3914 100644
--- a/third_party/lua/ltm.c
+++ b/third_party/lua/ltm.c
@@ -38,11 +38,7 @@
 #include "third_party/lua/ltm.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lvm.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 static const char udatatypename[] = "userdata";
diff --git a/third_party/lua/lua.main.c b/third_party/lua/lua.main.c
index f7155e185..a49b7ae58 100644
--- a/third_party/lua/lua.main.c
+++ b/third_party/lua/lua.main.c
@@ -50,11 +50,7 @@
 #include "third_party/lua/lualib.h"
 #include "third_party/lua/lunix.h"
 #include "tool/args/args.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 STATIC_STACK_ALIGN(GetStackSize());
 
diff --git a/third_party/lua/luac.main.c b/third_party/lua/luac.main.c
index c5765d888..8b22aa237 100644
--- a/third_party/lua/luac.main.c
+++ b/third_party/lua/luac.main.c
@@ -43,11 +43,7 @@
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
 #include "third_party/lua/lundump.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 static void PrintFunction(const Proto* f, int full);
 #define luaU_print	PrintFunction
diff --git a/third_party/lua/lvm.c b/third_party/lua/lvm.c
index feaaf9cb4..36414ef32 100644
--- a/third_party/lua/lvm.c
+++ b/third_party/lua/lvm.c
@@ -41,11 +41,7 @@
 #include "third_party/lua/ltm.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lvm.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 /*
diff --git a/third_party/lua/lzio.c b/third_party/lua/lzio.c
index da9e2f6c9..3c674495f 100644
--- a/third_party/lua/lzio.c
+++ b/third_party/lua/lzio.c
@@ -34,11 +34,7 @@
 #include "third_party/lua/lstate.h"
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lzio.h"
-
-asm(".ident\t\"\\n\\n\
-Lua 5.4.3 (MIT License)\\n\
-Copyright 1994–2021 Lua.org, PUC-Rio.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("lua_notice");
 
 
 int luaZ_fill (ZIO *z) {
diff --git a/third_party/lz4cli/BUILD.mk b/third_party/lz4cli/BUILD.mk
index b1e6daf70..2440066db 100644
--- a/third_party/lz4cli/BUILD.mk
+++ b/third_party/lz4cli/BUILD.mk
@@ -58,9 +58,6 @@ o/$(MODE)/third_party/lz4cli/lz4cli.com.dbg:		\
 		$(APE_NO_MODIFY_SELF)
 	@$(APELINK)
 
-o/$(MODE)/third_party/lz4cli/lz4cli.o:			\
-		third_party/lz4cli/COPYING
-
 $(THIRD_PARTY_LZ4CLI_OBJS):				\
 		$(BUILD_FILES)				\
 		third_party/lz4cli/BUILD.mk
diff --git a/third_party/lz4cli/COPYING b/third_party/lz4cli/COPYING
index e5e0fa31d..c86990a2b 100644
--- a/third_party/lz4cli/COPYING
+++ b/third_party/lz4cli/COPYING
@@ -1,4 +1,3 @@
-.ident	"\n
 LZ4cli - LZ4 Command Line Interface (GPL v2)
 Copyright (C) Yann Collet 2011-2016
 
@@ -18,4 +17,4 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 
 You can contact the author at :
 - LZ4 source repository : https://github.com/lz4/lz4
-- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c"
+- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
diff --git a/third_party/lz4cli/lz4cli.c b/third_party/lz4cli/lz4cli.c
index a2e5ecc57..55e327faa 100644
--- a/third_party/lz4cli/lz4cli.c
+++ b/third_party/lz4cli/lz4cli.c
@@ -30,7 +30,27 @@
   The license of this compression CLI program is GPLv2.
 */
 
-asm(".include \"third_party/lz4cli/COPYING\"");
+__notice(lz4cli_notice, "\
+LZ4cli - LZ4 Command Line Interface (GPL v2)\n\
+Copyright (C) Yann Collet 2011-2016\n\
+\n\
+This program is free software; you can redistribute it and/or modify\n\
+it under the terms of the GNU General Public License as published by\n\
+the Free Software Foundation; either version 2 of the License, or\n\
+(at your option) any later version.\n\
+\n\
+This program is distributed in the hope that it will be useful,\n\
+but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n\
+GNU General Public License for more details.\n\
+\n\
+You should have received a copy of the GNU General Public License along\n\
+with this program; if not, write to the Free Software Foundation, Inc.,\n\
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n\
+\n\
+You can contact the author at :\n\
+- LZ4 source repository : https://github.com/lz4/lz4\n\
+- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c");
 
 /****************************
 *  Includes
diff --git a/third_party/maxmind/maxminddb.c b/third_party/maxmind/maxminddb.c
index a3d176253..a51ecc495 100644
--- a/third_party/maxmind/maxminddb.c
+++ b/third_party/maxmind/maxminddb.c
@@ -37,10 +37,9 @@
 #include "libc/sysv/consts/sock.h"
 #include "tool/build/lib/case.h"
 
-asm(".ident\t\"\\n\\n\
-libmaxminddb (Apache 2.0)\\n\
-Copyright 2013-2021 MaxMind Incorporated\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(libmaxminddb_notice, "\
+libmaxminddb (Apache 2.0)\n\
+Copyright 2013-2021 MaxMind Incorporated");
 
 #define METADATA_MARKER              "\xab\xcd\xefMaxMind.com"
 #define METADATA_BLOCK_MAX_SIZE      131072 /* This is 128kb */
diff --git a/third_party/mbedtls/aes.c b/third_party/mbedtls/aes.c
index a36aa52e5..6af04dd7c 100644
--- a/third_party/mbedtls/aes.c
+++ b/third_party/mbedtls/aes.c
@@ -27,12 +27,7 @@
 #include "third_party/mbedtls/common.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview FIPS-197 compliant AES implementation
diff --git a/third_party/mbedtls/aesce.c b/third_party/mbedtls/aesce.c
index 2866769f5..8ede1f1b8 100644
--- a/third_party/mbedtls/aesce.c
+++ b/third_party/mbedtls/aesce.c
@@ -18,12 +18,7 @@
 #include "third_party/mbedtls/aesce.h"
 #include "libc/str/str.h"
 #include "third_party/aarch64/arm_neon.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  Armv8-A Cryptographic Extension support functions for Aarch64
diff --git a/third_party/mbedtls/aesni.c b/third_party/mbedtls/aesni.c
index 13a913771..1babe33a7 100644
--- a/third_party/mbedtls/aesni.c
+++ b/third_party/mbedtls/aesni.c
@@ -19,12 +19,7 @@
 #include "libc/serialize.h"
 #include "libc/str/str.h"
 #include "third_party/mbedtls/common.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  * [AES-WP] http://software.intel.com/en-us/articles/intel-advanced-encryption-standard-aes-instructions-set
diff --git a/third_party/mbedtls/asn1parse.c b/third_party/mbedtls/asn1parse.c
index 8e6458fd1..aa419b6f4 100644
--- a/third_party/mbedtls/asn1parse.c
+++ b/third_party/mbedtls/asn1parse.c
@@ -21,12 +21,7 @@
 #include "third_party/mbedtls/common.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 #if defined(MBEDTLS_ASN1_PARSE_C)
 
diff --git a/third_party/mbedtls/asn1write.c b/third_party/mbedtls/asn1write.c
index 5196bd333..1e968fea3 100644
--- a/third_party/mbedtls/asn1write.c
+++ b/third_party/mbedtls/asn1write.c
@@ -20,12 +20,7 @@
 #include "third_party/mbedtls/common.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview ASN.1 buffer writing functionality
diff --git a/third_party/mbedtls/base64.c b/third_party/mbedtls/base64.c
index 56f5182c5..c46fcc1de 100644
--- a/third_party/mbedtls/base64.c
+++ b/third_party/mbedtls/base64.c
@@ -19,12 +19,7 @@
 #include "libc/str/str.h"
 #include "third_party/mbedtls/common.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 #define ENC "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
 
diff --git a/third_party/mbedtls/bigmul4.c b/third_party/mbedtls/bigmul4.c
index 686828503..89fbd362d 100644
--- a/third_party/mbedtls/bigmul4.c
+++ b/third_party/mbedtls/bigmul4.c
@@ -30,13 +30,11 @@
  */
 void (*Mul4x4)(uint64_t C[16], const uint64_t A[8], const uint64_t B[8]);
 
-static textstartup void Mul4x4Init()
+__attribute__((__constructor__(10))) static textstartup void Mul4x4Init()
 {
     Mul4x4 = X86_HAVE(ADX) && X86_HAVE(BMI2) ? Mul4x4Adx : Mul4x4Pure;
 }
 
-const void *const Mul4x4Ctor[] initarray = {Mul4x4Init};
-
 void Mul4x4Pure(uint64_t C[16], const uint64_t A[8], const uint64_t B[8])
 {
     uint128_t t;
diff --git a/third_party/mbedtls/bignum.c b/third_party/mbedtls/bignum.c
index a5695e4fa..96f0eb1a1 100644
--- a/third_party/mbedtls/bignum.c
+++ b/third_party/mbedtls/bignum.c
@@ -33,12 +33,7 @@
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/profile.h"
 #include "third_party/mbedtls/select.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview Big Numbers.
diff --git a/third_party/mbedtls/ccm.c b/third_party/mbedtls/ccm.c
index bbe5fe76c..747c313f1 100644
--- a/third_party/mbedtls/ccm.c
+++ b/third_party/mbedtls/ccm.c
@@ -20,12 +20,7 @@
 #include "third_party/mbedtls/common.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview NIST SP800-38C compliant CCM implementation
diff --git a/third_party/mbedtls/certs.c b/third_party/mbedtls/certs.c
index 3170df134..a946ad5a2 100644
--- a/third_party/mbedtls/certs.c
+++ b/third_party/mbedtls/certs.c
@@ -17,12 +17,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "third_party/mbedtls/certs.h"
 #include "third_party/mbedtls/common.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 #if defined(MBEDTLS_CERTS_C)
 
diff --git a/third_party/mbedtls/chacha20.c b/third_party/mbedtls/chacha20.c
index 12e3b052e..c7600c24e 100644
--- a/third_party/mbedtls/chacha20.c
+++ b/third_party/mbedtls/chacha20.c
@@ -22,12 +22,7 @@
 #include "third_party/mbedtls/common.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /* Parameter validation macros */
 #define CHACHA20_VALIDATE_RET( cond )                                       \
diff --git a/third_party/mbedtls/chachapoly.c b/third_party/mbedtls/chachapoly.c
index 84dc8fcd8..090b7ab4e 100644
--- a/third_party/mbedtls/chachapoly.c
+++ b/third_party/mbedtls/chachapoly.c
@@ -21,12 +21,7 @@
 #include "third_party/mbedtls/common.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview ChaCha20-Poly1305 AEAD construction based on RFC 7539.
diff --git a/third_party/mbedtls/cipher.c b/third_party/mbedtls/cipher.c
index a031b054f..2390f1983 100644
--- a/third_party/mbedtls/cipher.c
+++ b/third_party/mbedtls/cipher.c
@@ -26,12 +26,7 @@
 #include "third_party/mbedtls/gcm.h"
 #include "third_party/mbedtls/nist_kw.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 #if defined(MBEDTLS_CIPHER_C)
 
diff --git a/third_party/mbedtls/cipher_wrap.c b/third_party/mbedtls/cipher_wrap.c
index 176cca76f..c7926079f 100644
--- a/third_party/mbedtls/cipher_wrap.c
+++ b/third_party/mbedtls/cipher_wrap.c
@@ -27,12 +27,7 @@
 #include "third_party/mbedtls/gcm.h"
 #include "third_party/mbedtls/nist_kw.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 #if defined(MBEDTLS_CIPHER_C)
 
diff --git a/third_party/mbedtls/ctr_drbg.c b/third_party/mbedtls/ctr_drbg.c
index 052c741e1..01ab3b2c9 100644
--- a/third_party/mbedtls/ctr_drbg.c
+++ b/third_party/mbedtls/ctr_drbg.c
@@ -21,12 +21,7 @@
 #include "third_party/mbedtls/ctr_drbg.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview CTR_DRBG implementation based on AES-256 (NIST SP 800-90)
diff --git a/third_party/mbedtls/debug.c b/third_party/mbedtls/debug.c
index 462d986a4..27b395b10 100644
--- a/third_party/mbedtls/debug.c
+++ b/third_party/mbedtls/debug.c
@@ -20,12 +20,7 @@
 #include "third_party/mbedtls/debug.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 char mbedtls_debug_threshold;
 
diff --git a/third_party/mbedtls/des.c b/third_party/mbedtls/des.c
index 1e412fdfc..33dc7f5bb 100644
--- a/third_party/mbedtls/des.c
+++ b/third_party/mbedtls/des.c
@@ -22,12 +22,7 @@
 #include "third_party/mbedtls/common.h"
 #include "third_party/mbedtls/endian.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview FIPS-46-3 compliant Triple-DES implementation
diff --git a/third_party/mbedtls/dhm.c b/third_party/mbedtls/dhm.c
index 4b9652b0c..222b2f1aa 100644
--- a/third_party/mbedtls/dhm.c
+++ b/third_party/mbedtls/dhm.c
@@ -24,12 +24,7 @@
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/pem.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview Diffie-Hellman-Merkle key exchange
diff --git a/third_party/mbedtls/ecdh.c b/third_party/mbedtls/ecdh.c
index a0d60e41c..76114dae9 100644
--- a/third_party/mbedtls/ecdh.c
+++ b/third_party/mbedtls/ecdh.c
@@ -19,12 +19,7 @@
 #include "third_party/mbedtls/ecdh.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview Elliptic curve Diffie-Hellman
diff --git a/third_party/mbedtls/ecdh_everest.c b/third_party/mbedtls/ecdh_everest.c
index 29034ce02..f0f865121 100644
--- a/third_party/mbedtls/ecdh_everest.c
+++ b/third_party/mbedtls/ecdh_everest.c
@@ -20,12 +20,7 @@
 #include "third_party/mbedtls/everest.h"
 #if defined(MBEDTLS_ECDH_C) && defined(MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED)
 #define KEYSIZE 32
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * \brief           This function sets up the ECDH context with the information
diff --git a/third_party/mbedtls/ecdsa.c b/third_party/mbedtls/ecdsa.c
index 0317da903..0d2528f25 100644
--- a/third_party/mbedtls/ecdsa.c
+++ b/third_party/mbedtls/ecdsa.c
@@ -23,12 +23,7 @@
 #include "third_party/mbedtls/hmac_drbg.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/profile.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview Elliptic curve Digital Signature Algorithm
diff --git a/third_party/mbedtls/ecp.c b/third_party/mbedtls/ecp.c
index 3ac9211f7..4b696ef83 100644
--- a/third_party/mbedtls/ecp.c
+++ b/third_party/mbedtls/ecp.c
@@ -32,12 +32,7 @@
 #include "third_party/mbedtls/hmac_drbg.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/profile.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview Elliptic curves over GF(p): generic functions
diff --git a/third_party/mbedtls/ecp_curves.c b/third_party/mbedtls/ecp_curves.c
index 06bef587a..7ab258e49 100644
--- a/third_party/mbedtls/ecp_curves.c
+++ b/third_party/mbedtls/ecp_curves.c
@@ -20,12 +20,7 @@
 #include "third_party/mbedtls/ecp.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  Elliptic curves over GF(p): curve-specific data and functions
diff --git a/third_party/mbedtls/entropy.c b/third_party/mbedtls/entropy.c
index 32f8658d7..5edc6f7af 100644
--- a/third_party/mbedtls/entropy.c
+++ b/third_party/mbedtls/entropy.c
@@ -23,12 +23,7 @@
 #include "third_party/mbedtls/entropy_poll.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  Entropy accumulator implementation
diff --git a/third_party/mbedtls/error.c b/third_party/mbedtls/error.c
index b53082847..b41282740 100644
--- a/third_party/mbedtls/error.c
+++ b/third_party/mbedtls/error.c
@@ -48,12 +48,7 @@
 #include "third_party/mbedtls/sha512.h"
 #include "third_party/mbedtls/ssl.h"
 #include "third_party/mbedtls/x509.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  Error message information
diff --git a/third_party/mbedtls/everest.c b/third_party/mbedtls/everest.c
index 75208b8d1..a4af39253 100644
--- a/third_party/mbedtls/everest.c
+++ b/third_party/mbedtls/everest.c
@@ -18,10 +18,11 @@
 #include "libc/serialize.h"
 #include "third_party/mbedtls/endian.h"
 
-asm(".ident\t\"\\n\\n\
-Everest (Apache 2.0)\\n\
-Copyright 2016-2018 INRIA and Microsoft Corporation\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(cosmo_everest_notice, "\
+Cosmopolitan Everest (Apache 2.0)\n\
+Copyright 2024 Justine Alexndra Roberts Tunney\n\
+Copyright 2016-2018 INRIA and Microsoft Corporation\n\
+Changes: Made C code look nice and not have pointers");
 
 #define DW(x)     (uint128_t)(x)
 #define EQ(x, y)  ((((x ^ y) | (~(x ^ y) + 1)) >> 63) - 1)
diff --git a/third_party/mbedtls/gcm.c b/third_party/mbedtls/gcm.c
index 61df3d1cd..5f330eaca 100644
--- a/third_party/mbedtls/gcm.c
+++ b/third_party/mbedtls/gcm.c
@@ -29,12 +29,7 @@
 #include "third_party/mbedtls/endian.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  NIST SP800-38D compliant GCM implementation
diff --git a/third_party/mbedtls/hkdf.c b/third_party/mbedtls/hkdf.c
index f0369c159..d5b76ea40 100644
--- a/third_party/mbedtls/hkdf.c
+++ b/third_party/mbedtls/hkdf.c
@@ -20,12 +20,7 @@
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/hkdf.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview HKDF implementation (RFC 5869)
diff --git a/third_party/mbedtls/hmac_drbg.c b/third_party/mbedtls/hmac_drbg.c
index a2b3c531e..fa5284d1d 100644
--- a/third_party/mbedtls/hmac_drbg.c
+++ b/third_party/mbedtls/hmac_drbg.c
@@ -21,12 +21,7 @@
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/hmac_drbg.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  HMAC_DRBG implementation (NIST SP 800-90)
diff --git a/third_party/mbedtls/md.c b/third_party/mbedtls/md.c
index 8942051a2..82f371cea 100644
--- a/third_party/mbedtls/md.c
+++ b/third_party/mbedtls/md.c
@@ -27,12 +27,7 @@
 #include "third_party/mbedtls/sha1.h"
 #include "third_party/mbedtls/sha256.h"
 #include "third_party/mbedtls/sha512.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * \file md.c
diff --git a/third_party/mbedtls/md5.c b/third_party/mbedtls/md5.c
index 10deb3367..71674bbe8 100644
--- a/third_party/mbedtls/md5.c
+++ b/third_party/mbedtls/md5.c
@@ -22,12 +22,7 @@
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/md.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  RFC 1321 compliant MD5 implementation
diff --git a/third_party/mbedtls/md5t.c b/third_party/mbedtls/md5t.c
index 4d050ddf9..1955eac68 100644
--- a/third_party/mbedtls/md5t.c
+++ b/third_party/mbedtls/md5t.c
@@ -18,12 +18,7 @@
 #include "libc/str/str.h"
 #include "third_party/mbedtls/md5.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  * RFC 1321 test vectors
diff --git a/third_party/mbedtls/memory_buffer_alloc.c b/third_party/mbedtls/memory_buffer_alloc.c
index b3cc7d128..283b92d0a 100644
--- a/third_party/mbedtls/memory_buffer_alloc.c
+++ b/third_party/mbedtls/memory_buffer_alloc.c
@@ -18,12 +18,7 @@
 #include "third_party/mbedtls/common.h"
 #include "third_party/mbedtls/memory_buffer_alloc.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  Buffer-based memory allocator
diff --git a/third_party/mbedtls/nist_kw.c b/third_party/mbedtls/nist_kw.c
index 4523b6192..a95956301 100644
--- a/third_party/mbedtls/nist_kw.c
+++ b/third_party/mbedtls/nist_kw.c
@@ -21,12 +21,7 @@
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/nist_kw.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  Implementation of NIST SP 800-38F key wrapping, supporting KW and KWP modes
diff --git a/third_party/mbedtls/notice.c b/third_party/mbedtls/notice.c
new file mode 100644
index 000000000..efae1a05e
--- /dev/null
+++ b/third_party/mbedtls/notice.c
@@ -0,0 +1,4 @@
+__notice(mbedtls_notice, "\
+Mbed TLS (Apache 2.0)\n\
+Copyright ARM Limited\n\
+Copyright The Mbed TLS Contributors");
diff --git a/third_party/mbedtls/oid.c b/third_party/mbedtls/oid.c
index 88083c7ec..837e909b9 100644
--- a/third_party/mbedtls/oid.c
+++ b/third_party/mbedtls/oid.c
@@ -21,12 +21,7 @@
 #include "third_party/mbedtls/oid.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/rsa.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview Object Identifier (OID) database
diff --git a/third_party/mbedtls/pem.c b/third_party/mbedtls/pem.c
index 8b8e12d7b..6bc84f716 100644
--- a/third_party/mbedtls/pem.c
+++ b/third_party/mbedtls/pem.c
@@ -27,12 +27,7 @@
 #include "third_party/mbedtls/md5.h"
 #include "third_party/mbedtls/pem.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview Privacy Enhanced Mail (PEM) decoding
diff --git a/third_party/mbedtls/pk.c b/third_party/mbedtls/pk.c
index 736089ab0..92b3d3556 100644
--- a/third_party/mbedtls/pk.c
+++ b/third_party/mbedtls/pk.c
@@ -23,12 +23,7 @@
 #include "third_party/mbedtls/pk_internal.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/rsa.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview Public Key abstraction layer
diff --git a/third_party/mbedtls/pk_wrap.c b/third_party/mbedtls/pk_wrap.c
index 9aaba9947..a649523a1 100644
--- a/third_party/mbedtls/pk_wrap.c
+++ b/third_party/mbedtls/pk_wrap.c
@@ -25,12 +25,7 @@
 #include "third_party/mbedtls/pk_internal.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/rsa.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview Public Key abstraction layer: wrapper functions
diff --git a/third_party/mbedtls/pkcs5.c b/third_party/mbedtls/pkcs5.c
index 10cc06f42..00910985b 100644
--- a/third_party/mbedtls/pkcs5.c
+++ b/third_party/mbedtls/pkcs5.c
@@ -22,12 +22,7 @@
 #include "third_party/mbedtls/oid.h"
 #include "third_party/mbedtls/pkcs5.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview PKCS#5 functions, e.g. PBKDF2
diff --git a/third_party/mbedtls/pkparse.c b/third_party/mbedtls/pkparse.c
index d657fd801..a5b16e2f3 100644
--- a/third_party/mbedtls/pkparse.c
+++ b/third_party/mbedtls/pkparse.c
@@ -27,12 +27,7 @@
 #include "third_party/mbedtls/pkcs5.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/rsa.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  Public Key layer for parsing key files and structures
diff --git a/third_party/mbedtls/pkwrite.c b/third_party/mbedtls/pkwrite.c
index 91c58252c..23d52818c 100644
--- a/third_party/mbedtls/pkwrite.c
+++ b/third_party/mbedtls/pkwrite.c
@@ -26,12 +26,7 @@
 #include "third_party/mbedtls/pk.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/rsa.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  Public Key layer for writing key files and structures
diff --git a/third_party/mbedtls/poly1305.c b/third_party/mbedtls/poly1305.c
index c3f543d5c..20f3f838c 100644
--- a/third_party/mbedtls/poly1305.c
+++ b/third_party/mbedtls/poly1305.c
@@ -21,12 +21,7 @@
 #include "third_party/mbedtls/common.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * \file poly1305.c
diff --git a/third_party/mbedtls/rsa.c b/third_party/mbedtls/rsa.c
index 158c526ba..edc6ae58f 100644
--- a/third_party/mbedtls/rsa.c
+++ b/third_party/mbedtls/rsa.c
@@ -27,12 +27,7 @@
 #include "third_party/mbedtls/profile.h"
 #include "third_party/mbedtls/rsa_internal.h"
 #include "third_party/mbedtls/sha1.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview The RSA public-key cryptosystem
diff --git a/third_party/mbedtls/rsa_internal.c b/third_party/mbedtls/rsa_internal.c
index aa9d03fb2..5359d5d80 100644
--- a/third_party/mbedtls/rsa_internal.c
+++ b/third_party/mbedtls/rsa_internal.c
@@ -20,12 +20,7 @@
 #include "third_party/mbedtls/profile.h"
 #include "third_party/mbedtls/rsa.h"
 #include "third_party/mbedtls/rsa_internal.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  Helper functions for the RSA module
diff --git a/third_party/mbedtls/sha1.c b/third_party/mbedtls/sha1.c
index 26806ab94..be933c939 100644
--- a/third_party/mbedtls/sha1.c
+++ b/third_party/mbedtls/sha1.c
@@ -27,12 +27,7 @@
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/md.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview FIPS-180-1 compliant SHA-1 implementation
diff --git a/third_party/mbedtls/sha1t.c b/third_party/mbedtls/sha1t.c
index 641061b9c..fc553f34d 100644
--- a/third_party/mbedtls/sha1t.c
+++ b/third_party/mbedtls/sha1t.c
@@ -18,12 +18,7 @@
 #include "libc/str/str.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/sha1.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  * FIPS-180-1 test vectors
diff --git a/third_party/mbedtls/sha256.c b/third_party/mbedtls/sha256.c
index 3261608f1..88e621b53 100644
--- a/third_party/mbedtls/sha256.c
+++ b/third_party/mbedtls/sha256.c
@@ -27,12 +27,7 @@
 #include "third_party/mbedtls/endian.h"
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/md.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview FIPS-180-2 compliant SHA-256 implementation
diff --git a/third_party/mbedtls/sha256t.c b/third_party/mbedtls/sha256t.c
index 996c30019..f3b822447 100644
--- a/third_party/mbedtls/sha256t.c
+++ b/third_party/mbedtls/sha256t.c
@@ -18,12 +18,7 @@
 #include "libc/str/str.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/sha256.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  * FIPS-180-2 test vectors
diff --git a/third_party/mbedtls/sha512.c b/third_party/mbedtls/sha512.c
index a5e197e18..8f4494787 100644
--- a/third_party/mbedtls/sha512.c
+++ b/third_party/mbedtls/sha512.c
@@ -28,12 +28,7 @@
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/md.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview FIPS-180-2 compliant SHA-384/512 implementation
diff --git a/third_party/mbedtls/sha512t.c b/third_party/mbedtls/sha512t.c
index d8349047f..5e4730831 100644
--- a/third_party/mbedtls/sha512t.c
+++ b/third_party/mbedtls/sha512t.c
@@ -19,12 +19,7 @@
 #include "libc/str/str.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/sha512.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  * FIPS-180-2 test vectors
diff --git a/third_party/mbedtls/shiftright.c b/third_party/mbedtls/shiftright.c
index e3be94ccd..3162eae01 100644
--- a/third_party/mbedtls/shiftright.c
+++ b/third_party/mbedtls/shiftright.c
@@ -21,8 +21,7 @@
 
 void (*ShiftRight)(uint64_t *, size_t, unsigned char);
 
+__attribute__((__constructor__(10)))
 static textstartup void ShiftRightInit(void) {
   ShiftRight = 0 && X86_HAVE(AVX) ? ShiftRightAvx : ShiftRightPure;
 }
-
-const void *const ShiftRightCtor[] initarray = {ShiftRightInit};
diff --git a/third_party/mbedtls/ssl_cache.c b/third_party/mbedtls/ssl_cache.c
index 453326702..c4d435ad9 100644
--- a/third_party/mbedtls/ssl_cache.c
+++ b/third_party/mbedtls/ssl_cache.c
@@ -21,12 +21,7 @@
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/ssl_cache.h"
 #include "third_party/mbedtls/ssl_internal.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  SSL session cache implementation
diff --git a/third_party/mbedtls/ssl_ciphersuites.c b/third_party/mbedtls/ssl_ciphersuites.c
index a7a22a1ad..812fa4cce 100644
--- a/third_party/mbedtls/ssl_ciphersuites.c
+++ b/third_party/mbedtls/ssl_ciphersuites.c
@@ -25,12 +25,7 @@
 #include "third_party/mbedtls/common.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/ssl.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  CRYPTOGRAPHY 101
diff --git a/third_party/mbedtls/ssl_cli.c b/third_party/mbedtls/ssl_cli.c
index b8f2e79b7..002c4aba9 100644
--- a/third_party/mbedtls/ssl_cli.c
+++ b/third_party/mbedtls/ssl_cli.c
@@ -22,12 +22,7 @@
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/ssl.h"
 #include "third_party/mbedtls/ssl_internal.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  SSLv3/TLSv1 client-side functions
diff --git a/third_party/mbedtls/ssl_cookie.c b/third_party/mbedtls/ssl_cookie.c
index 19eec8754..7ca95fcd4 100644
--- a/third_party/mbedtls/ssl_cookie.c
+++ b/third_party/mbedtls/ssl_cookie.c
@@ -20,12 +20,7 @@
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/ssl_cookie.h"
 #include "third_party/mbedtls/ssl_internal.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  DTLS cookie callbacks implementation
diff --git a/third_party/mbedtls/ssl_msg.c b/third_party/mbedtls/ssl_msg.c
index 68ad70f49..41fb201d3 100644
--- a/third_party/mbedtls/ssl_msg.c
+++ b/third_party/mbedtls/ssl_msg.c
@@ -27,12 +27,7 @@
 #include "third_party/mbedtls/ssl.h"
 #include "third_party/mbedtls/ssl_internal.h"
 #include "third_party/mbedtls/ssl_invasive.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  Generic SSL/TLS messaging layer functions
diff --git a/third_party/mbedtls/ssl_srv.c b/third_party/mbedtls/ssl_srv.c
index a51e54b6e..ef4e5c964 100644
--- a/third_party/mbedtls/ssl_srv.c
+++ b/third_party/mbedtls/ssl_srv.c
@@ -27,12 +27,7 @@
 #include "third_party/mbedtls/profile.h"
 #include "third_party/mbedtls/ssl.h"
 #include "third_party/mbedtls/ssl_internal.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  SSLv3/TLSv1 server-side functions
diff --git a/third_party/mbedtls/ssl_ticket.c b/third_party/mbedtls/ssl_ticket.c
index 01fee1941..70e4a4fa9 100644
--- a/third_party/mbedtls/ssl_ticket.c
+++ b/third_party/mbedtls/ssl_ticket.c
@@ -21,12 +21,7 @@
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/ssl_internal.h"
 #include "third_party/mbedtls/ssl_ticket.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  TLS server tickets callbacks implementation
diff --git a/third_party/mbedtls/ssl_tls.c b/third_party/mbedtls/ssl_tls.c
index df248f2f7..5460a4d74 100644
--- a/third_party/mbedtls/ssl_tls.c
+++ b/third_party/mbedtls/ssl_tls.c
@@ -28,12 +28,7 @@
 #include "third_party/mbedtls/ssl_ciphersuites.h"
 #include "third_party/mbedtls/ssl_internal.h"
 #include "third_party/mbedtls/version.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview SSLv3/TLSv1 shared functions
diff --git a/third_party/mbedtls/ssl_tls13_keys.c b/third_party/mbedtls/ssl_tls13_keys.c
index ae521c0c2..523959f03 100644
--- a/third_party/mbedtls/ssl_tls13_keys.c
+++ b/third_party/mbedtls/ssl_tls13_keys.c
@@ -19,12 +19,7 @@
 #include "third_party/mbedtls/hkdf.h"
 #include "third_party/mbedtls/ssl_internal.h"
 #include "third_party/mbedtls/ssl_tls13_keys.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  TLS 1.3 key schedule
diff --git a/third_party/mbedtls/test/everest_unravaged.c b/third_party/mbedtls/test/everest_unravaged.c
index cc4737be1..9c24f1f42 100644
--- a/third_party/mbedtls/test/everest_unravaged.c
+++ b/third_party/mbedtls/test/everest_unravaged.c
@@ -7,10 +7,8 @@
 #include "libc/serialize.h"
 #include "third_party/mbedtls/platform.h"
 
-asm(".ident\t\"\\n\\n\
-Everest (Apache 2.0)\\n\
-Copyright 2016-2018 INRIA and Microsoft Corporation\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(everest_notice, "Everest (Apache 2.0)\n\
+Copyright 2016-2018 INRIA and Microsoft Corporation");
 
 /*
  *  ECDH with curve-optimized implementation multiplexing
diff --git a/third_party/mbedtls/test/lib.c b/third_party/mbedtls/test/lib.c
index e284a67b4..5e72cd6b6 100644
--- a/third_party/mbedtls/test/lib.c
+++ b/third_party/mbedtls/test/lib.c
@@ -50,12 +50,7 @@
 #include "third_party/mbedtls/error.h"
 #include "libc/serialize.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 __static_yoink("zipos");
 
diff --git a/third_party/mbedtls/test/test.inc b/third_party/mbedtls/test/test.inc
index 9412aac12..f4a992d4e 100644
--- a/third_party/mbedtls/test/test.inc
+++ b/third_party/mbedtls/test/test.inc
@@ -6,11 +6,6 @@
 #include "libc/sysv/consts/exit.h"
 #include "third_party/mbedtls/config.h"
 #include "third_party/mbedtls/test/lib.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 __static_yoink("zipos");
diff --git a/third_party/mbedtls/x509.c b/third_party/mbedtls/x509.c
index dab92a7dd..62949a890 100644
--- a/third_party/mbedtls/x509.c
+++ b/third_party/mbedtls/x509.c
@@ -28,12 +28,7 @@
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/x509.h"
 #include "third_party/mbedtls/x509_crt.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  X.509 common functions for parsing and verification
diff --git a/third_party/mbedtls/x509_create.c b/third_party/mbedtls/x509_create.c
index 2e2dbd8a0..ef28ec1ba 100644
--- a/third_party/mbedtls/x509_create.c
+++ b/third_party/mbedtls/x509_create.c
@@ -20,12 +20,7 @@
 #include "third_party/mbedtls/error.h"
 #include "third_party/mbedtls/oid.h"
 #include "third_party/mbedtls/x509.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  X.509 base functions for creating certificates / CSRs
diff --git a/third_party/mbedtls/x509_crl.c b/third_party/mbedtls/x509_crl.c
index 807e63873..f9460fccb 100644
--- a/third_party/mbedtls/x509_crl.c
+++ b/third_party/mbedtls/x509_crl.c
@@ -21,12 +21,7 @@
 #include "third_party/mbedtls/pem.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/x509_crl.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview X.509 Certidicate Revocation List (CRL) parsing
diff --git a/third_party/mbedtls/x509_crt.c b/third_party/mbedtls/x509_crt.c
index 47a4c103a..da53f29d2 100644
--- a/third_party/mbedtls/x509_crt.c
+++ b/third_party/mbedtls/x509_crt.c
@@ -32,12 +32,7 @@
 #include "third_party/mbedtls/oid.h"
 #include "third_party/mbedtls/pem.h"
 #include "third_party/mbedtls/platform.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  X.509 certificate parsing and verification
diff --git a/third_party/mbedtls/x509_csr.c b/third_party/mbedtls/x509_csr.c
index ee8b4941c..4ebb8d39f 100644
--- a/third_party/mbedtls/x509_csr.c
+++ b/third_party/mbedtls/x509_csr.c
@@ -21,12 +21,7 @@
 #include "third_party/mbedtls/pem.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/x509_csr.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /**
  * @fileoverview X.509 Certificate Signing Request (CSR) parsing
diff --git a/third_party/mbedtls/x509write_crt.c b/third_party/mbedtls/x509write_crt.c
index 987ba9df8..7558b38ec 100644
--- a/third_party/mbedtls/x509write_crt.c
+++ b/third_party/mbedtls/x509write_crt.c
@@ -23,12 +23,7 @@
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/sha1.h"
 #include "third_party/mbedtls/x509_crt.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  X.509 certificate writing
diff --git a/third_party/mbedtls/x509write_csr.c b/third_party/mbedtls/x509write_csr.c
index 3d22704ad..ffda801af 100644
--- a/third_party/mbedtls/x509write_csr.c
+++ b/third_party/mbedtls/x509write_csr.c
@@ -22,12 +22,7 @@
 #include "third_party/mbedtls/pem.h"
 #include "third_party/mbedtls/platform.h"
 #include "third_party/mbedtls/x509_csr.h"
-
-asm(".ident\t\"\\n\\n\
-Mbed TLS (Apache 2.0)\\n\
-Copyright ARM Limited\\n\
-Copyright Mbed TLS Contributors\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("mbedtls_notice");
 
 /*
  *  X.509 Certificate Signing Request writing
diff --git a/third_party/musl/crypt.c b/third_party/musl/crypt.c
index 2bf39082e..a6a81de20 100644
--- a/third_party/musl/crypt.c
+++ b/third_party/musl/crypt.c
@@ -27,11 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "third_party/musl/crypt.h"
 #include "third_party/musl/crypt.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 /**
  * Encrypts password the old fashioned way.
diff --git a/third_party/musl/crypt.h b/third_party/musl/crypt.h
index 111c445e8..5fc464f9d 100644
--- a/third_party/musl/crypt.h
+++ b/third_party/musl/crypt.h
@@ -7,10 +7,8 @@ struct crypt_data {
   char __buf[256];
 };
 
-void encrypt(char *, int);
-void setkey(const char *);
-char *crypt(const char *, const char *);
-char *crypt_r(const char *, const char *, struct crypt_data *);
+char *crypt(const char *, const char *) libcesque;
+char *crypt_r(const char *, const char *, struct crypt_data *) libcesque;
 
 COSMOPOLITAN_C_END_
 #endif /* COSMOPOLITAN_THIRD_PARTY_MUSL_CRYPT_H_ */
diff --git a/third_party/musl/crypt_blowfish.c b/third_party/musl/crypt_blowfish.c
index 9dfec80c0..5586c53c6 100644
--- a/third_party/musl/crypt_blowfish.c
+++ b/third_party/musl/crypt_blowfish.c
@@ -32,11 +32,7 @@
 #include "libc/mem/gc.h"
 #include "libc/str/str.h"
 #include "third_party/musl/crypt.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 /* Modified by Rich Felker in for inclusion in musl libc, based on
  * Solar Designer's second size-optimized version sent to the musl
diff --git a/third_party/musl/crypt_des.c b/third_party/musl/crypt_des.c
index 87ae04bff..564059ac7 100644
--- a/third_party/musl/crypt_des.c
+++ b/third_party/musl/crypt_des.c
@@ -31,11 +31,9 @@
 #include "libc/str/str.h"
 #include "third_party/musl/crypt.internal.h"
 #include "third_party/musl/crypt_des.internal.h"
+__static_yoink("musl_libc_notice");
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
 
 /*
  * This version has been further modified by Rich Felker, primary author
diff --git a/third_party/musl/crypt_md5.c b/third_party/musl/crypt_md5.c
index 1c00345b0..5bf75c1eb 100644
--- a/third_party/musl/crypt_md5.c
+++ b/third_party/musl/crypt_md5.c
@@ -30,14 +30,10 @@
 #include "libc/literal.h"
 #include "libc/str/str.h"
 #include "third_party/musl/crypt.internal.h"
+__static_yoink("musl_libc_notice");
 
 #pragma GCC diagnostic ignored "-Wmissing-braces"
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 /*
  * md5 crypt implementation
  *
diff --git a/third_party/musl/crypt_r.c b/third_party/musl/crypt_r.c
index e90411ff5..f0155b8c6 100644
--- a/third_party/musl/crypt_r.c
+++ b/third_party/musl/crypt_r.c
@@ -27,11 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "third_party/musl/crypt.h"
 #include "third_party/musl/crypt.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 /**
  * Encrypts password the old fashioned way.
diff --git a/third_party/musl/crypt_sha256.c b/third_party/musl/crypt_sha256.c
index ec699090d..d5e2f8059 100644
--- a/third_party/musl/crypt_sha256.c
+++ b/third_party/musl/crypt_sha256.c
@@ -40,14 +40,10 @@
 #include "libc/sysv/consts/exit.h"
 #include "third_party/gdtoa/gdtoa.h"
 #include "third_party/musl/crypt.internal.h"
+__static_yoink("musl_libc_notice");
 
 #pragma GCC diagnostic ignored "-Wmissing-braces"
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 /*
  * public domain sha256 crypt implementation
  *
diff --git a/third_party/musl/crypt_sha512.c b/third_party/musl/crypt_sha512.c
index 122a64cff..7d3b4414b 100644
--- a/third_party/musl/crypt_sha512.c
+++ b/third_party/musl/crypt_sha512.c
@@ -40,14 +40,10 @@
 #include "libc/sysv/consts/exit.h"
 #include "third_party/gdtoa/gdtoa.h"
 #include "third_party/musl/crypt.internal.h"
+__static_yoink("musl_libc_notice");
 
 #pragma GCC diagnostic ignored "-Wmissing-braces"
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 /*
  * public domain sha512 crypt implementation
  *
diff --git a/third_party/musl/dn_comp.c b/third_party/musl/dn_comp.c
index a6db07072..331d20f18 100644
--- a/third_party/musl/dn_comp.c
+++ b/third_party/musl/dn_comp.c
@@ -93,7 +93,7 @@ int dn_comp(const char *src,
 	    unsigned char **dnptrs,
 	    unsigned char **lastdnptr)
 {
-	int i, j, n, m=0, offset, bestlen=0, bestoff;
+	int i, j, n, m=0, offset, bestlen=0, bestoff=0;
 	unsigned char lens[127];
 	unsigned char **p;
 	const char *end;
diff --git a/third_party/musl/encrypt.c b/third_party/musl/encrypt.c
index 6474c6a84..722ea2650 100644
--- a/third_party/musl/encrypt.c
+++ b/third_party/musl/encrypt.c
@@ -41,11 +41,7 @@
 #include "libc/sysv/consts/ok.h"
 #include "third_party/gdtoa/gdtoa.h"
 #include "third_party/musl/crypt_des.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 static struct expanded_key __encrypt_key;
 
diff --git a/third_party/musl/fnmatch.c b/third_party/musl/fnmatch.c
index ed04f5c21..d2f32e259 100644
--- a/third_party/musl/fnmatch.c
+++ b/third_party/musl/fnmatch.c
@@ -28,6 +28,7 @@
 #include "libc/limits.h"
 #include "libc/str/str.h"
 #include "third_party/musl/fnmatch.h"
+__static_yoink("musl_libc_notice");
 
 /*
  * An implementation of what I call the "Sea of Stars" algorithm for
@@ -44,11 +45,6 @@
  * - Rich Felker, April 2012
  */
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 #define END         0
 #define UNMATCHABLE -2
 #define BRACKET     -3
diff --git a/third_party/musl/forkpty.c b/third_party/musl/forkpty.c
index f53984a51..ea881b0fb 100644
--- a/third_party/musl/forkpty.c
+++ b/third_party/musl/forkpty.c
@@ -35,11 +35,7 @@
 #include "libc/runtime/runtime.h"
 #include "libc/errno.h"
 #include "libc/calls/termios.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 int forkpty(int *pm, char *name, const struct termios *tio, const struct winsize *ws)
 {
diff --git a/third_party/musl/freeaddrinfo.c b/third_party/musl/freeaddrinfo.c
index 9fda19caa..e9bcfbd7b 100644
--- a/third_party/musl/freeaddrinfo.c
+++ b/third_party/musl/freeaddrinfo.c
@@ -29,11 +29,7 @@
 #include "libc/mem/mem.h"
 #include "third_party/musl/lookup.internal.h"
 #include "third_party/musl/netdb.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 void freeaddrinfo(struct addrinfo *p)
 {
diff --git a/third_party/musl/gai_strerror.c b/third_party/musl/gai_strerror.c
index 2b6e85850..7e9a83eed 100644
--- a/third_party/musl/gai_strerror.c
+++ b/third_party/musl/gai_strerror.c
@@ -26,11 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "third_party/musl/netdb.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 static const char msgs[] =
 	"Invalid flags\0"
diff --git a/third_party/musl/getaddrinfo.c b/third_party/musl/getaddrinfo.c
index 7c418d518..9c8d0a926 100644
--- a/third_party/musl/getaddrinfo.c
+++ b/third_party/musl/getaddrinfo.c
@@ -40,11 +40,7 @@
 #include "third_party/musl/lookup.internal.h"
 #include "libc/intrin/atomic.h"
 #include "third_party/musl/netdb.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 int getaddrinfo(const char *host,
                 const char *serv,
diff --git a/third_party/musl/gethostbyaddr_r.c b/third_party/musl/gethostbyaddr_r.c
index e4934b94c..51572bb1d 100644
--- a/third_party/musl/gethostbyaddr_r.c
+++ b/third_party/musl/gethostbyaddr_r.c
@@ -32,11 +32,7 @@
 #include "libc/str/str.h"
 #include "libc/sysv/consts/af.h"
 #include "third_party/musl/netdb.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 errno_t gethostbyaddr_r(const void *a, socklen_t l, int af,
 	struct hostent *h, char *buf, size_t buflen,
diff --git a/third_party/musl/gethostbyname2_r.c b/third_party/musl/gethostbyname2_r.c
index 9ae8c0841..e95bf9535 100644
--- a/third_party/musl/gethostbyname2_r.c
+++ b/third_party/musl/gethostbyname2_r.c
@@ -30,11 +30,7 @@
 #include "libc/sysv/consts/af.h"
 #include "third_party/musl/lookup.internal.h"
 #include "third_party/musl/netdb.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 errno_t gethostbyname2_r(const char *name, int af,
 	struct hostent *h, char *buf, size_t buflen,
diff --git a/third_party/musl/getnameinfo.c b/third_party/musl/getnameinfo.c
index 62f20e009..ffced1dd3 100644
--- a/third_party/musl/getnameinfo.c
+++ b/third_party/musl/getnameinfo.c
@@ -37,11 +37,7 @@
 #include "third_party/musl/lookup.internal.h"
 #include "third_party/musl/netdb.h"
 #include "third_party/musl/resolv.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 #define PTR_MAX (64 + sizeof ".in-addr.arpa")
 #define RR_PTR 12
diff --git a/third_party/musl/getservbyname_r.c b/third_party/musl/getservbyname_r.c
index cd088ac29..d6e238fba 100644
--- a/third_party/musl/getservbyname_r.c
+++ b/third_party/musl/getservbyname_r.c
@@ -32,11 +32,7 @@
 #include "libc/sysv/consts/ipproto.h"
 #include "third_party/musl/lookup.internal.h"
 #include "third_party/musl/netdb.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 #define ALIGN (sizeof(struct { char a; char *b; }) - sizeof(char *))
 
diff --git a/third_party/musl/getservbyport_r.c b/third_party/musl/getservbyport_r.c
index 3749ef393..0e98a8eaf 100644
--- a/third_party/musl/getservbyport_r.c
+++ b/third_party/musl/getservbyport_r.c
@@ -31,11 +31,7 @@
 #include "libc/str/str.h"
 #include "libc/sysv/consts/af.h"
 #include "third_party/musl/netdb.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 errno_t getservbyport_r(int port, const char *prots,
 	struct servent *se, char *buf, size_t buflen, struct servent **res)
diff --git a/third_party/musl/getspnam_r.c b/third_party/musl/getspnam_r.c
index 3b08b1642..dae46748f 100644
--- a/third_party/musl/getspnam_r.c
+++ b/third_party/musl/getspnam_r.c
@@ -31,11 +31,7 @@
 #include <ctype.h>
 #include <pthread.h>
 #include "pwf.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 /* This implementation support Openwall-style TCB passwords in place of
  * traditional shadow, if the appropriate directories and files exist.
diff --git a/third_party/musl/glob.c b/third_party/musl/glob.c
index c7156ccb3..655532d9a 100644
--- a/third_party/musl/glob.c
+++ b/third_party/musl/glob.c
@@ -36,14 +36,10 @@
 #include "libc/sysv/consts/dt.h"
 #include "libc/sysv/consts/s.h"
 #include "third_party/musl/fnmatch.h"
+__static_yoink("musl_libc_notice");
 
 #define MAXPATH 1024
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 struct GlobList {
   struct GlobList *next;
   char name[];
diff --git a/third_party/musl/grp.c b/third_party/musl/grp.c
index 7a788c7f0..17d777ef7 100644
--- a/third_party/musl/grp.c
+++ b/third_party/musl/grp.c
@@ -34,11 +34,7 @@
 #include "libc/sysv/consts/limits.h"
 #include "libc/thread/thread.h"
 #include "third_party/musl/passwd.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 static unsigned atou(char **s) {
   unsigned x;
diff --git a/third_party/musl/hsearch.c b/third_party/musl/hsearch.c
index 883fec34f..e4140f404 100644
--- a/third_party/musl/hsearch.c
+++ b/third_party/musl/hsearch.c
@@ -28,11 +28,7 @@
 #include "libc/mem/mem.h"
 #include "libc/str/str.h"
 #include "third_party/musl/search.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2020 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 /*
 open addressing hash table with 2^n table size
diff --git a/third_party/musl/insque.c b/third_party/musl/insque.c
index 63c00539b..cec746ec2 100644
--- a/third_party/musl/insque.c
+++ b/third_party/musl/insque.c
@@ -26,11 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "third_party/musl/search.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2020 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 struct node {
 	struct node *next;
diff --git a/third_party/musl/lockf.c b/third_party/musl/lockf.c
index 33c57e3d6..a0ec7e670 100644
--- a/third_party/musl/lockf.c
+++ b/third_party/musl/lockf.c
@@ -32,11 +32,7 @@
 #include "libc/errno.h"
 #include "libc/sysv/consts/f.h"
 #include "libc/sysv/errfuns.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 /**
  * Locks file.
diff --git a/third_party/musl/lookup_ipliteral.c b/third_party/musl/lookup_ipliteral.c
index 1ba9a2d5e..a02b0239b 100644
--- a/third_party/musl/lookup_ipliteral.c
+++ b/third_party/musl/lookup_ipliteral.c
@@ -32,11 +32,7 @@
 #include "libc/limits.h"
 #include "libc/sock/sock.h"
 #include "third_party/musl/lookup.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 int __lookup_ipliteral(struct address buf[static 1], const char *name, int family)
 {
diff --git a/third_party/musl/lookup_name.c b/third_party/musl/lookup_name.c
index 37214b72a..291178213 100644
--- a/third_party/musl/lookup_name.c
+++ b/third_party/musl/lookup_name.c
@@ -40,11 +40,7 @@
 #include "third_party/musl/lookup.internal.h"
 #include "third_party/musl/netdb.h"
 #include "third_party/musl/resolv.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 static int is_valid_hostname(const char *host)
 {
diff --git a/third_party/musl/lookup_serv.c b/third_party/musl/lookup_serv.c
index 8eb45abc7..cc7fbffb2 100644
--- a/third_party/musl/lookup_serv.c
+++ b/third_party/musl/lookup_serv.c
@@ -33,11 +33,7 @@
 #include "libc/errno.h"
 #include "libc/calls/sysdir.internal.h"
 #include "third_party/musl/lookup.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 int __lookup_serv(struct service buf[static MAXSERVS],
 		  const char *name,
diff --git a/third_party/musl/lsearch.c b/third_party/musl/lsearch.c
index 1d1ee6a5c..c4b871206 100644
--- a/third_party/musl/lsearch.c
+++ b/third_party/musl/lsearch.c
@@ -28,11 +28,7 @@
 #include "libc/mem/mem.h"
 #include "libc/str/str.h"
 #include "third_party/musl/search.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2020 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 
 void *lsearch(const void *key, void *base, size_t *nelp, size_t width,
diff --git a/third_party/musl/mntent.c b/third_party/musl/mntent.c
index cc010090a..275ba586d 100644
--- a/third_party/musl/mntent.c
+++ b/third_party/musl/mntent.c
@@ -31,11 +31,7 @@
 #include "libc/stdio/stdio.h"
 #include "libc/str/str.h"
 #include "third_party/musl/mntent.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 static char *internal_buf;
 static size_t internal_bufsize;
diff --git a/third_party/musl/ns_parse.c b/third_party/musl/ns_parse.c
index e3964e2fb..dad9dd71c 100644
--- a/third_party/musl/ns_parse.c
+++ b/third_party/musl/ns_parse.c
@@ -28,11 +28,7 @@
 #include "libc/errno.h"
 #include "third_party/musl/resolv.h"
 #include "third_party/musl/nameser.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 const struct _ns_flagdata _ns_flagdata[16] = {
 	{ 0x8000, 15 },
diff --git a/third_party/musl/proto.c b/third_party/musl/proto.c
index 86edea22b..6e616dbf0 100644
--- a/third_party/musl/proto.c
+++ b/third_party/musl/proto.c
@@ -27,11 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/str/str.h"
 #include "third_party/musl/netdb.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 /* do we really need all these?? */
 
diff --git a/third_party/musl/pwd.c b/third_party/musl/pwd.c
index a1dedd141..fc54b77cf 100644
--- a/third_party/musl/pwd.c
+++ b/third_party/musl/pwd.c
@@ -38,11 +38,7 @@
 #include "libc/str/str.h"
 #include "libc/thread/thread.h"
 #include "third_party/musl/passwd.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 #ifdef FTRACE
 // if the default mode debugging tools are enabled, and we're linking
diff --git a/third_party/musl/rand48.c b/third_party/musl/rand48.c
index fa0a94db3..5c46adf7d 100644
--- a/third_party/musl/rand48.c
+++ b/third_party/musl/rand48.c
@@ -27,11 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "third_party/musl/rand48.h"
 #include "libc/str/str.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 static unsigned short __seed48[7] = { 0, 0, 0, 0xe66d, 0xdeec, 0x5, 0xb };
 
diff --git a/third_party/musl/res_mkquery.c b/third_party/musl/res_mkquery.c
index f10f2148d..f955d6cfb 100644
--- a/third_party/musl/res_mkquery.c
+++ b/third_party/musl/res_mkquery.c
@@ -29,11 +29,7 @@
 #include "libc/sysv/consts/clock.h"
 #include "libc/stdio/rand.h"
 #include "third_party/musl/resolv.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 int __res_mkquery(int op, const char *dname, int class, int type,
 	const unsigned char *data, int datalen,
diff --git a/third_party/musl/res_msend.c b/third_party/musl/res_msend.c
index 705a0e5b4..1a65bee68 100644
--- a/third_party/musl/res_msend.c
+++ b/third_party/musl/res_msend.c
@@ -45,11 +45,7 @@
 #include "libc/sysv/consts/tcp.h"
 #include "libc/thread/thread.h"
 #include "lookup.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 static void cleanup(void *p)
 {
diff --git a/third_party/musl/res_query.c b/third_party/musl/res_query.c
index f324e58bb..04d7427f7 100644
--- a/third_party/musl/res_query.c
+++ b/third_party/musl/res_query.c
@@ -28,11 +28,7 @@
 #include "third_party/musl/resolv.h"
 #include "third_party/musl/netdb.h"
 #include "third_party/musl/resolv.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 int res_query(const char *name, int class, int type, unsigned char *dest, int len)
 {
diff --git a/third_party/musl/res_querydomain.c b/third_party/musl/res_querydomain.c
index 809bc6877..e91149944 100644
--- a/third_party/musl/res_querydomain.c
+++ b/third_party/musl/res_querydomain.c
@@ -27,11 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/str/str.h"
 #include "third_party/musl/resolv.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 int res_querydomain(const char *name, const char *domain, int class, int type, unsigned char *dest, int len)
 {
diff --git a/third_party/musl/res_send.c b/third_party/musl/res_send.c
index 1ef881370..cd28d0706 100644
--- a/third_party/musl/res_send.c
+++ b/third_party/musl/res_send.c
@@ -27,11 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/str/str.h"
 #include "third_party/musl/resolv.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 int __res_send(const unsigned char *msg, int msglen, unsigned char *answer, int anslen)
 {
diff --git a/third_party/musl/resolvconf.c b/third_party/musl/resolvconf.c
index bdcfbc719..ddc44b363 100644
--- a/third_party/musl/resolvconf.c
+++ b/third_party/musl/resolvconf.c
@@ -38,11 +38,7 @@
 #include "libc/sysv/consts/af.h"
 #include "libc/sock/sock.h"
 #include "third_party/musl/lookup.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 // TODO(jart): ipv6 nameservers on windows
 // TODO(jart): can we polyfill `search` on windows
diff --git a/third_party/musl/strfmon.c b/third_party/musl/strfmon.c
index 82bd8c0a9..95e590612 100644
--- a/third_party/musl/strfmon.c
+++ b/third_party/musl/strfmon.c
@@ -30,11 +30,7 @@
 #include "libc/str/locale.h"
 #include "libc/str/str.h"
 #include "libc/thread/tls.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 static ssize_t vstrfmon_l(char *s, size_t n, locale_t loc, const char *fmt, va_list ap)
 {
diff --git a/third_party/musl/tdelete.c b/third_party/musl/tdelete.c
index 6023062fc..52e871341 100644
--- a/third_party/musl/tdelete.c
+++ b/third_party/musl/tdelete.c
@@ -27,11 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/mem/mem.h"
 #include "third_party/musl/tsearch.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2020 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 
 void *tdelete(const void *restrict key, void **restrict rootp,
diff --git a/third_party/musl/tdestroy.c b/third_party/musl/tdestroy.c
index 51a681669..17904479c 100644
--- a/third_party/musl/tdestroy.c
+++ b/third_party/musl/tdestroy.c
@@ -27,11 +27,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/mem/mem.h"
 #include "third_party/musl/tsearch.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2020 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 void tdestroy(void *root, void (*freekey)(void *))
 {
diff --git a/third_party/musl/tempnam.c b/third_party/musl/tempnam.c
index c72761ddd..93d8a6856 100644
--- a/third_party/musl/tempnam.c
+++ b/third_party/musl/tempnam.c
@@ -37,14 +37,10 @@
 #include "libc/sysv/consts/at.h"
 #include "libc/sysv/consts/clock.h"
 #include "libc/time/time.h"
+__static_yoink("musl_libc_notice");
 
 #define MAXTRIES 100
 
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 static char *
 __randname(char *template)
 {
diff --git a/third_party/musl/tfind.c b/third_party/musl/tfind.c
index 0735306b9..b4a668bcd 100644
--- a/third_party/musl/tfind.c
+++ b/third_party/musl/tfind.c
@@ -26,11 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "third_party/musl/tsearch.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2020 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 void *tfind(const void *key, void *const *rootp,
 	int(*cmp)(const void *, const void *))
diff --git a/third_party/musl/tsearch.c b/third_party/musl/tsearch.c
index 1c9ceecd8..fdfd0c75e 100644
--- a/third_party/musl/tsearch.c
+++ b/third_party/musl/tsearch.c
@@ -28,11 +28,7 @@
 #include "libc/mem/mem.h"
 #include "libc/str/str.h"
 #include "third_party/musl/tsearch.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2020 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 static inline int height(struct node *n) { return n ? n->h : 0; }
 
diff --git a/third_party/musl/twalk.c b/third_party/musl/twalk.c
index 48ec2ab19..ed5bdbffc 100644
--- a/third_party/musl/twalk.c
+++ b/third_party/musl/twalk.c
@@ -26,11 +26,7 @@
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "third_party/musl/tsearch.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Musl libc (MIT License)\\n\
-Copyright 2005-2020 Rich Felker, et. al.\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("musl_libc_notice");
 
 
 static void walk(const struct node *r, void (*action)(const void *, VISIT, int), int d)
diff --git a/third_party/nsync/common.c b/third_party/nsync/common.c
index 93a9cd4db..a50b6f041 100644
--- a/third_party/nsync/common.c
+++ b/third_party/nsync/common.c
@@ -34,11 +34,7 @@
 #include "third_party/nsync/mu_semaphore.h"
 #include "third_party/nsync/races.internal.h"
 #include "third_party/nsync/wait_s.internal.h"
-
-asm(".ident\t\"\\n\\n\
-*NSYNC (Apache 2.0)\\n\
-Copyright 2016 Google, Inc.\\n\
-https://github.com/google/nsync\"");
+__static_yoink("nsync_notice");
 
 /* This package provides a mutex nsync_mu and a Mesa-style condition
  * variable nsync_cv. */
diff --git a/third_party/nsync/futex.c b/third_party/nsync/futex.c
index de9a5889b..feb8c49ef 100644
--- a/third_party/nsync/futex.c
+++ b/third_party/nsync/futex.c
@@ -143,7 +143,7 @@ static int nsync_futex_polyfill_ (atomic_int *w, int expect, struct timespec *ab
 		if (abstime && timespec_cmp (timespec_real (), *abstime) >= 0) {
 			return -ETIMEDOUT;
 		}
-		pthread_yield ();
+		pthread_yield_np ();
 	}
 }
 
@@ -373,7 +373,7 @@ int nsync_futex_wake_ (atomic_int *w, int count, char pshare) {
 		}
 	} else {
 	Polyfill:
-		pthread_yield ();
+		pthread_yield_np ();
 		rc = 0;
 	}
 
diff --git a/third_party/nsync/mem/nsync_counter.c b/third_party/nsync/mem/nsync_counter.c
index d974977d8..c508797fc 100644
--- a/third_party/nsync/mem/nsync_counter.c
+++ b/third_party/nsync/mem/nsync_counter.c
@@ -26,11 +26,7 @@
 #include "third_party/nsync/races.internal.h"
 #include "third_party/nsync/wait_s.internal.h"
 #include "third_party/nsync/waiter.h"
-
-asm(".ident\t\"\\n\\n\
-*NSYNC (Apache 2.0)\\n\
-Copyright 2016 Google, Inc.\\n\
-https://github.com/google/nsync\"");
+__static_yoink("nsync_notice");
 
 /* Internal details of nsync_counter. */
 struct nsync_counter_s_ {
diff --git a/third_party/nsync/mem/nsync_cv.c b/third_party/nsync/mem/nsync_cv.c
index fb081094a..926379340 100644
--- a/third_party/nsync/mem/nsync_cv.c
+++ b/third_party/nsync/mem/nsync_cv.c
@@ -26,16 +26,12 @@
 #include "third_party/nsync/races.internal.h"
 #include "third_party/nsync/wait_s.internal.h"
 #include "third_party/nsync/waiter.h"
+__static_yoink("nsync_notice");
 
 // once we're paying the cost of nsync we might as well get the benefit
 // of a better pthread_once(), since no other component pulls it in now
 __static_yoink("nsync_run_once");
 
-asm(".ident\t\"\\n\\n\
-*NSYNC (Apache 2.0)\\n\
-Copyright 2016 Google, Inc.\\n\
-https://github.com/google/nsync\"");
-
 /* Initialize *cv. */
 void nsync_cv_init (nsync_cv *cv) {
         bzero ((void *) cv, sizeof (*cv));
diff --git a/third_party/nsync/mem/nsync_debug.c b/third_party/nsync/mem/nsync_debug.c
index e677c7ea0..1b72c1175 100644
--- a/third_party/nsync/mem/nsync_debug.c
+++ b/third_party/nsync/mem/nsync_debug.c
@@ -21,11 +21,7 @@
 #include "third_party/nsync/mu_semaphore.h"
 #include "third_party/nsync/races.internal.h"
 #include "third_party/nsync/wait_s.internal.h"
-
-asm(".ident\t\"\\n\\n\
-*NSYNC (Apache 2.0)\\n\
-Copyright 2016 Google, Inc.\\n\
-https://github.com/google/nsync\"");
+__static_yoink("nsync_notice");
 
 /* Routines for debugging. */
 
diff --git a/third_party/nsync/mem/nsync_mu_wait.c b/third_party/nsync/mem/nsync_mu_wait.c
index c7839950a..3c48ea599 100644
--- a/third_party/nsync/mem/nsync_mu_wait.c
+++ b/third_party/nsync/mem/nsync_mu_wait.c
@@ -22,11 +22,7 @@
 #include "third_party/nsync/mu_semaphore.h"
 #include "third_party/nsync/races.internal.h"
 #include "third_party/nsync/wait_s.internal.h"
-
-asm(".ident\t\"\\n\\n\
-*NSYNC (Apache 2.0)\\n\
-Copyright 2016 Google, Inc.\\n\
-https://github.com/google/nsync\"");
+__static_yoink("nsync_notice");
 
 /* Attempt to remove waiter *w from *mu's
    waiter queue.  If successful, leave the lock held in mode *l_type, and
diff --git a/third_party/nsync/mem/nsync_note.c b/third_party/nsync/mem/nsync_note.c
index 8e7276112..bdf8e9ad0 100644
--- a/third_party/nsync/mem/nsync_note.c
+++ b/third_party/nsync/mem/nsync_note.c
@@ -25,11 +25,7 @@
 #include "third_party/nsync/races.internal.h"
 #include "third_party/nsync/wait_s.internal.h"
 #include "third_party/nsync/waiter.h"
-
-asm(".ident\t\"\\n\\n\
-*NSYNC (Apache 2.0)\\n\
-Copyright 2016 Google, Inc.\\n\
-https://github.com/google/nsync\"");
+__static_yoink("nsync_notice");
 
 /* Locking discipline for the nsync_note implementation:
 
diff --git a/third_party/nsync/mem/nsync_once.c b/third_party/nsync/mem/nsync_once.c
index 8320a6266..163923359 100644
--- a/third_party/nsync/mem/nsync_once.c
+++ b/third_party/nsync/mem/nsync_once.c
@@ -22,11 +22,7 @@
 #include "third_party/nsync/once.h"
 #include "third_party/nsync/races.internal.h"
 #include "third_party/nsync/wait_s.internal.h"
-
-asm(".ident\t\"\\n\\n\
-*NSYNC (Apache 2.0)\\n\
-Copyright 2016 Google, Inc.\\n\
-https://github.com/google/nsync\"");
+__static_yoink("nsync_notice");
 
 /* An once_sync_s struct contains a lock, and a condition variable on which
    threads may wait for an nsync_once to be initialized by another thread.
diff --git a/third_party/nsync/mem/nsync_sem_wait.c b/third_party/nsync/mem/nsync_sem_wait.c
index c32a2842e..62507d686 100644
--- a/third_party/nsync/mem/nsync_sem_wait.c
+++ b/third_party/nsync/mem/nsync_sem_wait.c
@@ -23,11 +23,7 @@
 #include "third_party/nsync/common.internal.h"
 #include "third_party/nsync/mu_semaphore.h"
 #include "third_party/nsync/wait_s.internal.h"
-
-asm(".ident\t\"\\n\\n\
-*NSYNC (Apache 2.0)\\n\
-Copyright 2016 Google, Inc.\\n\
-https://github.com/google/nsync\"");
+__static_yoink("nsync_notice");
 
 /* Wait until one of:
      w->sem is non-zero----decrement it and return 0.
diff --git a/third_party/nsync/mem/nsync_wait.c b/third_party/nsync/mem/nsync_wait.c
index f669730ac..9d8e95b7d 100644
--- a/third_party/nsync/mem/nsync_wait.c
+++ b/third_party/nsync/mem/nsync_wait.c
@@ -25,11 +25,7 @@
 #include "third_party/nsync/races.internal.h"
 #include "third_party/nsync/wait_s.internal.h"
 #include "third_party/nsync/waiter.h"
-
-asm(".ident\t\"\\n\\n\
-*NSYNC (Apache 2.0)\\n\
-Copyright 2016 Google, Inc.\\n\
-https://github.com/google/nsync\"");
+__static_yoink("nsync_notice");
 
 int nsync_wait_n (void *mu, void (*lock) (void *), void (*unlock) (void *),
 		  nsync_time abs_deadline,
diff --git a/third_party/nsync/mu.c b/third_party/nsync/mu.c
index 354c38621..4cae68328 100644
--- a/third_party/nsync/mu.c
+++ b/third_party/nsync/mu.c
@@ -23,11 +23,7 @@
 #include "third_party/nsync/mu_semaphore.h"
 #include "third_party/nsync/races.internal.h"
 #include "third_party/nsync/wait_s.internal.h"
-
-asm(".ident\t\"\\n\\n\
-*NSYNC (Apache 2.0)\\n\
-Copyright 2016 Google, Inc.\\n\
-https://github.com/google/nsync\"");
+__static_yoink("nsync_notice");
 
 /* Initialize *mu. */
 void nsync_mu_init (nsync_mu *mu) {
diff --git a/third_party/nsync/mu_semaphore.c b/third_party/nsync/mu_semaphore.c
index 43f2b5ccc..493efa2c4 100644
--- a/third_party/nsync/mu_semaphore.c
+++ b/third_party/nsync/mu_semaphore.c
@@ -19,17 +19,13 @@
 #include "libc/calls/cp.internal.h"
 #include "libc/dce.h"
 #include "third_party/nsync/mu_semaphore.internal.h"
+__static_yoink("nsync_notice");
 
 /* Apple's ulock (part by Cosmo futexes) is an internal API, but:
    1. Unlike GCD it's cancellable, i.e. can be EINTR'd by signals
    2. We currently always use ulock anyway for joining threads */
 #define PREFER_GCD_OVER_ULOCK 1
 
-asm(".ident\t\"\\n\\n\
-*NSYNC (Apache 2.0)\\n\
-Copyright 2016 Google, Inc.\\n\
-https://github.com/google/nsync\"");
-
 /* Initialize *s; the initial value is 0. */
 void nsync_mu_semaphore_init (nsync_semaphore *s) {
 	if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) {
diff --git a/third_party/nsync/notice.c b/third_party/nsync/notice.c
new file mode 100644
index 000000000..614e3f62d
--- /dev/null
+++ b/third_party/nsync/notice.c
@@ -0,0 +1,4 @@
+__notice(nsync_notice, "\
+*NSYNC (Apache 2.0)\n\
+Copyright 2016 Google, Inc.\n\
+https://github.com/google/nsync");
diff --git a/third_party/nsync/yield.c b/third_party/nsync/yield.c
index 495391fdd..f15b29f6c 100644
--- a/third_party/nsync/yield.c
+++ b/third_party/nsync/yield.c
@@ -22,6 +22,6 @@
 #include "third_party/nsync/common.internal.h"
 
 void nsync_yield_ (void) {
-	pthread_yield ();
+	pthread_yield_np ();
 	STRACE ("nsync_yield_()");
 }
diff --git a/third_party/openmp/kmp_os.h b/third_party/openmp/kmp_os.h
index 8ef3746ce..6ce5d511a 100644
--- a/third_party/openmp/kmp_os.h
+++ b/third_party/openmp/kmp_os.h
@@ -75,7 +75,7 @@
 #error Unknown compiler
 #endif
 
-#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD) && !KMP_OS_WASI
+#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD) && !KMP_OS_WASI && !defined(__COSMOPOLITAN__)
 #define KMP_AFFINITY_SUPPORTED 1
 #if KMP_OS_WINDOWS && KMP_ARCH_X86_64
 #define KMP_GROUP_AFFINITY 1
diff --git a/third_party/pcre/pcre2_compile.c b/third_party/pcre/pcre2_compile.c
index fb910ed8e..4fc63cd57 100644
--- a/third_party/pcre/pcre2_compile.c
+++ b/third_party/pcre/pcre2_compile.c
@@ -1,9 +1,5 @@
 #include "libc/str/str.h"
 
-asm(".ident\t\"\\n\\n\
-PCRE2 (PCRE2 License)\\n\
-Copyright (c) 1997-2022 University of Cambridge\"");
-
 /*************************************************
 *      Perl-Compatible Regular Expressions       *
 *************************************************/
diff --git a/third_party/pcre/pcre2_match.c b/third_party/pcre/pcre2_match.c
index 606a850c3..897c88b29 100644
--- a/third_party/pcre/pcre2_match.c
+++ b/third_party/pcre/pcre2_match.c
@@ -1,8 +1,8 @@
 #include "libc/str/str.h"
 
-asm(".ident\t\"\\n\\n\
-PCRE2 (PCRE2 License)\\n\
-Copyright (c) 1997-2022 University of Cambridge\"");
+__notice(pcre2_notice, "\
+PCRE2 (PCRE2 License)\n\
+Copyright (c) 1997-2022 University of Cambridge");
 
 /*************************************************
 *      Perl-Compatible Regular Expressions       *
diff --git a/third_party/puff/puff.c b/third_party/puff/puff.c
index 94593db28..21bc92ec4 100644
--- a/third_party/puff/puff.c
+++ b/third_party/puff/puff.c
@@ -25,10 +25,9 @@
 #include "third_party/puff/puff.h"
 #include "libc/runtime/runtime.h"
 
-asm(".ident\t\"\\n\\n\
-puff (zlib License)\\n\
-Copyright 2002-203 Mark Adler\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(puff_notice, "\
+puff (zlib License)\n\
+Copyright 2002-2013 Mark Adler");
 
 // Origin: git@github.com:madler/zlib.git
 // Commit: 03614c56ad299f9b238c75aa1e66f0c08fc4fc8b
diff --git a/third_party/python/BUILD.mk b/third_party/python/BUILD.mk
index 33ef97a47..81868c829 100644
--- a/third_party/python/BUILD.mk
+++ b/third_party/python/BUILD.mk
@@ -534,6 +534,7 @@ THIRD_PARTY_PYTHON_STAGE2_A_SRCS =					\
 	third_party/python/Modules/_csv.c				\
 	third_party/python/Modules/_datetimemodule.c			\
 	third_party/python/Modules/_decimal/_decimal.c			\
+	third_party/python/Modules/_decimal/libmpdec/notice.c		\
 	third_party/python/Modules/_decimal/libmpdec/basearith.c	\
 	third_party/python/Modules/_decimal/libmpdec/constants.c	\
 	third_party/python/Modules/_decimal/libmpdec/context.c		\
diff --git a/third_party/python/Modules/_decimal/_decimal.c b/third_party/python/Modules/_decimal/_decimal.c
index 0c0903a70..3940663c9 100644
--- a/third_party/python/Modules/_decimal/_decimal.c
+++ b/third_party/python/Modules/_decimal/_decimal.c
@@ -96,11 +96,6 @@ PYTHON_PROVIDE("_decimal.setcontext");
 PYTHON_YOINK("numbers");
 PYTHON_YOINK("collections");
 
-asm(".ident\t\"\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 #if !defined(MPD_VERSION_HEX) || MPD_VERSION_HEX < 0x02040100
   #error "libmpdec version >= 2.4.1 required"
 #endif
diff --git a/third_party/python/Modules/_decimal/libmpdec/basearith.c b/third_party/python/Modules/_decimal/libmpdec/basearith.c
index 893268e06..94c5835f6 100644
--- a/third_party/python/Modules/_decimal/libmpdec/basearith.c
+++ b/third_party/python/Modules/_decimal/libmpdec/basearith.c
@@ -31,11 +31,7 @@
 #include "third_party/python/Modules/_decimal/libmpdec/constants.h"
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 #include "third_party/python/Modules/_decimal/libmpdec/typearith.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 /*********************************************************************/
 /*                   Calculations in base MPD_RADIX                  */
diff --git a/third_party/python/Modules/_decimal/libmpdec/constants.c b/third_party/python/Modules/_decimal/libmpdec/constants.c
index 587482f19..7f85e4f2a 100644
--- a/third_party/python/Modules/_decimal/libmpdec/constants.c
+++ b/third_party/python/Modules/_decimal/libmpdec/constants.c
@@ -29,11 +29,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "third_party/python/Modules/_decimal/libmpdec/constants.h"
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 /* number-theory.c */
 const mpd_uint_t mpd_moduli[3] = {
diff --git a/third_party/python/Modules/_decimal/libmpdec/context.c b/third_party/python/Modules/_decimal/libmpdec/context.c
index fff1b2dae..d57742f03 100644
--- a/third_party/python/Modules/_decimal/libmpdec/context.c
+++ b/third_party/python/Modules/_decimal/libmpdec/context.c
@@ -30,11 +30,7 @@
 #include "libc/calls/calls.h"
 #include "libc/sysv/consts/sig.h"
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 void
 mpd_dflt_traphandler(mpd_context_t *ctx)
diff --git a/third_party/python/Modules/_decimal/libmpdec/convolute.c b/third_party/python/Modules/_decimal/libmpdec/convolute.c
index 25b85d128..89cd5db13 100644
--- a/third_party/python/Modules/_decimal/libmpdec/convolute.c
+++ b/third_party/python/Modules/_decimal/libmpdec/convolute.c
@@ -36,11 +36,7 @@
 #include "third_party/python/Modules/_decimal/libmpdec/numbertheory.h"
 #include "third_party/python/Modules/_decimal/libmpdec/sixstep.h"
 #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 
 /* Bignum: Fast convolution using the Number Theoretic Transform.
diff --git a/third_party/python/Modules/_decimal/libmpdec/crt.c b/third_party/python/Modules/_decimal/libmpdec/crt.c
index e20acadc5..70db6f24f 100644
--- a/third_party/python/Modules/_decimal/libmpdec/crt.c
+++ b/third_party/python/Modules/_decimal/libmpdec/crt.c
@@ -31,11 +31,7 @@
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 #include "third_party/python/Modules/_decimal/libmpdec/numbertheory.h"
 #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 /* Bignum: Chinese Remainder Theorem, extends the maximum transform length. */
 
diff --git a/third_party/python/Modules/_decimal/libmpdec/difradix2.c b/third_party/python/Modules/_decimal/libmpdec/difradix2.c
index ac59f54a6..c5f7b4fae 100644
--- a/third_party/python/Modules/_decimal/libmpdec/difradix2.c
+++ b/third_party/python/Modules/_decimal/libmpdec/difradix2.c
@@ -32,11 +32,7 @@
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 #include "third_party/python/Modules/_decimal/libmpdec/numbertheory.h"
 #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 /* Bignum: The actual transform routine (decimation in frequency). */
 
diff --git a/third_party/python/Modules/_decimal/libmpdec/fnt.c b/third_party/python/Modules/_decimal/libmpdec/fnt.c
index 4c9b6147e..cad1f4abf 100644
--- a/third_party/python/Modules/_decimal/libmpdec/fnt.c
+++ b/third_party/python/Modules/_decimal/libmpdec/fnt.c
@@ -33,11 +33,7 @@
 #include "third_party/python/Modules/_decimal/libmpdec/fnt.h"
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 #include "third_party/python/Modules/_decimal/libmpdec/numbertheory.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 /* Bignum: Fast transform for medium-sized coefficients. */
 
diff --git a/third_party/python/Modules/_decimal/libmpdec/fourstep.c b/third_party/python/Modules/_decimal/libmpdec/fourstep.c
index 60912059c..36d3bb815 100644
--- a/third_party/python/Modules/_decimal/libmpdec/fourstep.c
+++ b/third_party/python/Modules/_decimal/libmpdec/fourstep.c
@@ -33,11 +33,7 @@
 #include "third_party/python/Modules/_decimal/libmpdec/sixstep.h"
 #include "third_party/python/Modules/_decimal/libmpdec/transpose.h"
 #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 /*
                  Cache Efficient Matrix Fourier Transform
diff --git a/third_party/python/Modules/_decimal/libmpdec/io.c b/third_party/python/Modules/_decimal/libmpdec/io.c
index bb8a92fc6..687417335 100644
--- a/third_party/python/Modules/_decimal/libmpdec/io.c
+++ b/third_party/python/Modules/_decimal/libmpdec/io.c
@@ -35,15 +35,15 @@
 #include "third_party/python/Modules/_decimal/libmpdec/io.h"
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 #include "third_party/python/Modules/_decimal/libmpdec/typearith.h"
+__static_yoink("libmpdec_notice");
 
 #if __GNUC__ >= 11
 #pragma GCC diagnostic ignored "-Wmisleading-indentation"
 #endif
 
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+#if __GNUC__ >= 12
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
 
 
 /* This file contains functions for decimal <-> string conversions, including
diff --git a/third_party/python/Modules/_decimal/libmpdec/memory.c b/third_party/python/Modules/_decimal/libmpdec/memory.c
index e194159fb..98914f293 100644
--- a/third_party/python/Modules/_decimal/libmpdec/memory.c
+++ b/third_party/python/Modules/_decimal/libmpdec/memory.c
@@ -31,11 +31,7 @@
 #include "third_party/python/Modules/_decimal/libmpdec/mpalloc.h"
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 #include "third_party/python/Modules/_decimal/libmpdec/typearith.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 /* Guaranteed minimum allocation for a coefficient. May be changed once
    at program start using mpd_setminalloc(). */
diff --git a/third_party/python/Modules/_decimal/libmpdec/mpdecimal.c b/third_party/python/Modules/_decimal/libmpdec/mpdecimal.c
index e1a818f36..e93eee8a2 100644
--- a/third_party/python/Modules/_decimal/libmpdec/mpdecimal.c
+++ b/third_party/python/Modules/_decimal/libmpdec/mpdecimal.c
@@ -36,11 +36,7 @@
 #include "third_party/python/Modules/_decimal/libmpdec/mpalloc.h"
 #include "third_party/python/Modules/_decimal/libmpdec/typearith.h"
 #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 #define MPD_NEWTONDIV_CUTOFF 1024L
 
diff --git a/third_party/python/Modules/_decimal/libmpdec/notice.c b/third_party/python/Modules/_decimal/libmpdec/notice.c
new file mode 100644
index 000000000..fd4f0fbe1
--- /dev/null
+++ b/third_party/python/Modules/_decimal/libmpdec/notice.c
@@ -0,0 +1,3 @@
+__notice(libmpdec_notice, "\
+libmpdec (BSD-2)\n\
+Copyright 2008-2016 Stefan Krah");
diff --git a/third_party/python/Modules/_decimal/libmpdec/numbertheory.c b/third_party/python/Modules/_decimal/libmpdec/numbertheory.c
index 1551a5968..604d29f5c 100644
--- a/third_party/python/Modules/_decimal/libmpdec/numbertheory.c
+++ b/third_party/python/Modules/_decimal/libmpdec/numbertheory.c
@@ -31,11 +31,7 @@
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 #include "third_party/python/Modules/_decimal/libmpdec/numbertheory.h"
 #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 /* Bignum: Initialize the Number Theoretic Transform. */
 
diff --git a/third_party/python/Modules/_decimal/libmpdec/sixstep.c b/third_party/python/Modules/_decimal/libmpdec/sixstep.c
index 06e8ba071..16d3e3f28 100644
--- a/third_party/python/Modules/_decimal/libmpdec/sixstep.c
+++ b/third_party/python/Modules/_decimal/libmpdec/sixstep.c
@@ -34,11 +34,7 @@
 #include "third_party/python/Modules/_decimal/libmpdec/sixstep.h"
 #include "third_party/python/Modules/_decimal/libmpdec/transpose.h"
 #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 /*
                 Cache Efficient Matrix Fourier Transform
diff --git a/third_party/python/Modules/_decimal/libmpdec/transpose.c b/third_party/python/Modules/_decimal/libmpdec/transpose.c
index 4419b6063..ceb3d9f63 100644
--- a/third_party/python/Modules/_decimal/libmpdec/transpose.c
+++ b/third_party/python/Modules/_decimal/libmpdec/transpose.c
@@ -34,11 +34,7 @@
 #include "third_party/python/Modules/_decimal/libmpdec/constants.h"
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 #include "third_party/python/Modules/_decimal/libmpdec/typearith.h"
-
-asm(".ident\t\"\\n\\n\
-libmpdec (BSD-2)\\n\
-Copyright 2008-2016 Stefan Krah\"");
-asm(".include \"libc/disclaimer.inc\"");
+__static_yoink("libmpdec_notice");
 
 #define BUFSIZE 4096
 #define SIDE 128
diff --git a/third_party/python/Modules/_randommodule.c b/third_party/python/Modules/_randommodule.c
index 51fea4586..bafdf9fae 100644
--- a/third_party/python/Modules/_randommodule.c
+++ b/third_party/python/Modules/_randommodule.c
@@ -25,10 +25,8 @@
 PYTHON_PROVIDE("_random");
 PYTHON_PROVIDE("_random.Random");
 
-asm(".ident\t\"\\n\\n\
-mt19937 (BSD-3)\\n\
-Copyright 1997-2004 Makoto Matsumoto and Takuji Nishimura\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(python_mt19937_notice, "Python mt19937 (BSD-3)\n\
+Copyright 1997-2004 Makoto Matsumoto and Takuji Nishimura");
 
 /* ------------------------------------------------------------------
    The code in this module was based on a download from:
diff --git a/third_party/python/Modules/_sqlite/cache.c b/third_party/python/Modules/_sqlite/cache.c
index 3076e68f8..f88969fde 100644
--- a/third_party/python/Modules/_sqlite/cache.c
+++ b/third_party/python/Modules/_sqlite/cache.c
@@ -25,11 +25,6 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "third_party/python/Modules/_sqlite/cache.h"
 
-asm(".ident\t\"\\n\\n\
-pysqlite (zlib license)\\n\
-Copyright (C) 2005-2010 Gerhard Häring <gh@ghaering.de>\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 /* only used internally */
 pysqlite_Node* pysqlite_new_node(PyObject* key, PyObject* data)
 {
diff --git a/third_party/python/Modules/_sqlite/connection.c b/third_party/python/Modules/_sqlite/connection.c
index b1c3bb638..38651adbb 100644
--- a/third_party/python/Modules/_sqlite/connection.c
+++ b/third_party/python/Modules/_sqlite/connection.c
@@ -36,11 +36,6 @@
 
 PYTHON_YOINK("sqlite3.dump");
 
-asm(".ident\t\"\\n\\n\
-pysqlite (zlib license)\\n\
-Copyright (C) 2005-2010 Gerhard Häring <gh@ghaering.de>\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 #define ACTION_FINALIZE 1
 #define ACTION_RESET 2
 
diff --git a/third_party/python/Modules/_sqlite/cursor.c b/third_party/python/Modules/_sqlite/cursor.c
index cc2c5e8f3..c4fc23f91 100644
--- a/third_party/python/Modules/_sqlite/cursor.c
+++ b/third_party/python/Modules/_sqlite/cursor.c
@@ -27,11 +27,6 @@
 #include "third_party/python/Modules/_sqlite/module.h"
 #include "third_party/python/Modules/_sqlite/util.h"
 
-asm(".ident\t\"\\n\\n\
-pysqlite (zlib license)\\n\
-Copyright (C) 2005-2010 Gerhard Häring <gh@ghaering.de>\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 PyObject* pysqlite_cursor_iternext(pysqlite_Cursor* self);
 
 static const char errmsg_fetch_across_rollback[] = "Cursor needed to be reset because of commit/rollback and can no longer be fetched from.";
diff --git a/third_party/python/Modules/_sqlite/microprotocols.c b/third_party/python/Modules/_sqlite/microprotocols.c
index 168b931be..eecada5b3 100644
--- a/third_party/python/Modules/_sqlite/microprotocols.c
+++ b/third_party/python/Modules/_sqlite/microprotocols.c
@@ -27,11 +27,6 @@
 #include "third_party/python/Modules/_sqlite/microprotocols.h"
 #include "third_party/python/Modules/_sqlite/prepare_protocol.h"
 
-asm(".ident\t\"\\n\\n\
-pysqlite (zlib license)\\n\
-Copyright (C) 2005-2010 Gerhard Häring <gh@ghaering.de>\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 /** the adapters registry **/
 
 PyObject *psyco_adapters;
diff --git a/third_party/python/Modules/_sqlite/module.c b/third_party/python/Modules/_sqlite/module.c
index bf031a97f..63926be5c 100644
--- a/third_party/python/Modules/_sqlite/module.c
+++ b/third_party/python/Modules/_sqlite/module.c
@@ -32,12 +32,11 @@
 #include "third_party/python/Modules/_sqlite/row.h"
 #include "third_party/python/Modules/_sqlite/statement.h"
 
-PYTHON_PROVIDE("_sqlite3");
+__notice(pysqlite_notice, "\
+pysqlite (zlib license)\n\
+Copyright (C) 2005-2010 Gerhard Häring <gh@ghaering.de>");
 
-asm(".ident\t\"\\n\\n\
-pysqlite (zlib license)\\n\
-Copyright (C) 2005-2010 Gerhard Häring <gh@ghaering.de>\"");
-asm(".include \"libc/disclaimer.inc\"");
+PYTHON_PROVIDE("_sqlite3");
 
 /* #if SQLITE_VERSION_NUMBER >= 3003003 */
 /* #define HAVE_SHARED_CACHE */
diff --git a/third_party/python/Modules/_sqlite/prepare_protocol.c b/third_party/python/Modules/_sqlite/prepare_protocol.c
index eafb29baf..5b110e9fd 100644
--- a/third_party/python/Modules/_sqlite/prepare_protocol.c
+++ b/third_party/python/Modules/_sqlite/prepare_protocol.c
@@ -25,11 +25,6 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "third_party/python/Modules/_sqlite/prepare_protocol.h"
 
-asm(".ident\t\"\\n\\n\
-pysqlite (zlib license)\\n\
-Copyright (C) 2005-2010 Gerhard Häring <gh@ghaering.de>\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 int pysqlite_prepare_protocol_init(pysqlite_PrepareProtocol* self, PyObject* args, PyObject* kwargs)
 {
     return 0;
diff --git a/third_party/python/Modules/_sqlite/row.c b/third_party/python/Modules/_sqlite/row.c
index b23f1afc3..be06b9213 100644
--- a/third_party/python/Modules/_sqlite/row.c
+++ b/third_party/python/Modules/_sqlite/row.c
@@ -26,11 +26,6 @@
 #include "third_party/python/Modules/_sqlite/cursor.h"
 #include "third_party/python/Modules/_sqlite/row.h"
 
-asm(".ident\t\"\\n\\n\
-pysqlite (zlib license)\\n\
-Copyright (C) 2005-2010 Gerhard Häring <gh@ghaering.de>\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 void pysqlite_row_dealloc(pysqlite_Row* self)
 {
     Py_XDECREF(self->data);
diff --git a/third_party/python/Modules/_sqlite/statement.c b/third_party/python/Modules/_sqlite/statement.c
index 066a6e870..381d97d63 100644
--- a/third_party/python/Modules/_sqlite/statement.c
+++ b/third_party/python/Modules/_sqlite/statement.c
@@ -30,11 +30,6 @@
 #include "third_party/python/Modules/_sqlite/statement.h"
 #include "third_party/python/Modules/_sqlite/util.h"
 
-asm(".ident\t\"\\n\\n\
-pysqlite (zlib license)\\n\
-Copyright (C) 2005-2010 Gerhard Häring <gh@ghaering.de>\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 /* prototypes */
 static int pysqlite_check_remaining_sql(const char* tail);
 
diff --git a/third_party/python/Modules/_sqlite/util.c b/third_party/python/Modules/_sqlite/util.c
index 7eae8f8a3..ab53adb08 100644
--- a/third_party/python/Modules/_sqlite/util.c
+++ b/third_party/python/Modules/_sqlite/util.c
@@ -26,11 +26,6 @@
 #include "third_party/python/Modules/_sqlite/connection.h"
 #include "third_party/python/Modules/_sqlite/module.h"
 
-asm(".ident\t\"\\n\\n\
-pysqlite (zlib license)\\n\
-Copyright (C) 2005-2010 Gerhard Häring <gh@ghaering.de>\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 int pysqlite_step(sqlite3_stmt* statement, pysqlite_Connection* connection)
 {
     int rc;
diff --git a/third_party/python/Modules/_sre.c b/third_party/python/Modules/_sre.c
index da31f9cdd..a786dbdb4 100644
--- a/third_party/python/Modules/_sre.c
+++ b/third_party/python/Modules/_sre.c
@@ -39,10 +39,9 @@ PYTHON_PROVIDE("_sre.compile");
 PYTHON_PROVIDE("_sre.getcodesize");
 PYTHON_PROVIDE("_sre.getlower");
 
-asm(".ident\t\"\\n\\n\
-SRE 2.2.2 (Python license)\\n\
-Copyright 1997-2002 Secret Labs AB\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(python_sre_notice, "\
+SRE 2.2.2 (Python license)\n\
+Copyright 1997-2002 Secret Labs AB");
 
 /*
  * Secret Labs' Regular Expression Engine
diff --git a/third_party/python/Modules/expat/xmlparse.c b/third_party/python/Modules/expat/xmlparse.c
index 6bfce3613..a6ba69a5b 100644
--- a/third_party/python/Modules/expat/xmlparse.c
+++ b/third_party/python/Modules/expat/xmlparse.c
@@ -46,11 +46,9 @@
    USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
 
-asm(".ident\t\"\\n\\n\
-expat (MIT License)\\n\
-Copyright (c) 1997-2000 Thai Open Source Software Center Ltd\\n\
-Copyright (c) 2000-2017 Expat development team\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(expat_notice, "expat (MIT License)\n\
+Copyright (c) 1997-2000 Thai Open Source Software Center Ltd\n\
+Copyright (c) 2000-2017 Expat development team");
 
 #define XML_BUILDING_EXPAT 1
 
diff --git a/third_party/python/Modules/expat/xmlrole.c b/third_party/python/Modules/expat/xmlrole.c
index 6ce8885db..401329321 100644
--- a/third_party/python/Modules/expat/xmlrole.c
+++ b/third_party/python/Modules/expat/xmlrole.c
@@ -30,12 +30,6 @@
    USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
 
-asm(".ident\t\"\\n\\n\
-expat (MIT License)\\n\
-Copyright (c) 1997-2000 Thai Open Source Software Center Ltd\\n\
-Copyright (c) 2000-2017 Expat development team\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 #include "third_party/python/Modules/expat/expat_config.h"
 #include "third_party/python/Modules/expat/expat_external.h"
 #include "third_party/python/Modules/expat/internal.inc"
diff --git a/third_party/python/Modules/expat/xmltok.c b/third_party/python/Modules/expat/xmltok.c
index 149b12e10..6f1e6df95 100644
--- a/third_party/python/Modules/expat/xmltok.c
+++ b/third_party/python/Modules/expat/xmltok.c
@@ -30,12 +30,6 @@
    USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
 
-asm(".ident\t\"\\n\\n\
-expat (MIT License)\\n\
-Copyright (c) 1997-2000 Thai Open Source Software Center Ltd\\n\
-Copyright (c) 2000-2017 Expat development team\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 #include "third_party/python/Modules/expat/expat_config.h"
 #include "third_party/python/Modules/expat/expat_external.h"
 #include "third_party/python/Modules/expat/internal.inc"
diff --git a/third_party/python/Modules/hashtable.c b/third_party/python/Modules/hashtable.c
index fbed762c3..335ed3b17 100644
--- a/third_party/python/Modules/hashtable.c
+++ b/third_party/python/Modules/hashtable.c
@@ -8,10 +8,9 @@
 #include "third_party/python/Include/pymem.h"
 #include "third_party/python/Modules/hashtable.h"
 
-asm(".ident\t\"\\n\\n\
-cfuhash (bsd-3)\\n\
-Copyright (c) 2005 Don Owens\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(cfuhash_notice, "\
+cfuhash (bsd-3)\n\
+Copyright (c) 2005 Don Owens");
 
 /* The implementation of the hash table (_Py_hashtable_t) is based on the
    cfuhash project:
diff --git a/third_party/python/Modules/parsermodule.c b/third_party/python/Modules/parsermodule.c
index 001054863..cd9c7fa22 100644
--- a/third_party/python/Modules/parsermodule.c
+++ b/third_party/python/Modules/parsermodule.c
@@ -40,12 +40,13 @@ PYTHON_PROVIDE("parser.st2tuple");
 PYTHON_PROVIDE("parser.suite");
 PYTHON_PROVIDE("parser.tuple2st");
 
-asm(".ident\t\"\\n\\n\
-parsermodule (Python license)\\n\
-Copyright 1995-1996 by Virginia Polytechnic Institute & State\\n\
-University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\\n\
-Virginia, USA.  Portions copyright 1991-1995 by Stichting Mathematisch\\n\
-Centrum, Amsterdam, The Netherlands.\"");
+__notice(parsermodule_notice, "\
+parsermodule (Python license)\n\
+Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\
+University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\
+Virginia, USA.  Portions copyright 1991-1995 by Stichting Mathematisch\n\
+Centrum, Amsterdam, The Netherlands.");
+
 static const char parser_copyright_string[] =
 "Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\
 University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\
diff --git a/third_party/python/Modules/posixmodule.c b/third_party/python/Modules/posixmodule.c
index 98d55bd8f..fee776524 100644
--- a/third_party/python/Modules/posixmodule.c
+++ b/third_party/python/Modules/posixmodule.c
@@ -89,6 +89,7 @@
 #include "third_party/python/Include/warnings.h"
 #include "third_party/python/Include/yoink.h"
 #include "third_party/python/Modules/_multiprocessing/multiprocessing.h"
+#include "libc/unistd.h"
 #include "third_party/python/pyconfig.h"
 
 PYTHON_PROVIDE("posix");
diff --git a/third_party/python/Modules/syslogmodule.c b/third_party/python/Modules/syslogmodule.c
index 6d2e4a916..31c66ba9b 100644
--- a/third_party/python/Modules/syslogmodule.c
+++ b/third_party/python/Modules/syslogmodule.c
@@ -59,11 +59,10 @@ PYTHON_PROVIDE("syslog.openlog");
 PYTHON_PROVIDE("syslog.setlogmask");
 PYTHON_PROVIDE("syslog.syslog");
 
-asm(".ident\t\"\\n\\n\
-syslogmodule (mit)\\n\
-Copyright 1994 by Lance Ellinghouse\\n\
-Cathedral City, California Republic, United States of America\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(python_syslogmodule_notice, "\
+syslogmodule (MIT License)\n\
+Copyright 1994 by Lance Ellinghouse\n\
+Cathedral City, California Republic, United States of America");
 
 /***********************************************************
 Copyright 1994 by Lance Ellinghouse,
diff --git a/third_party/python/Modules/unicodedata_3.2.0.c b/third_party/python/Modules/unicodedata_3.2.0.c
index 49e4df3a5..b2d967577 100644
--- a/third_party/python/Modules/unicodedata_3.2.0.c
+++ b/third_party/python/Modules/unicodedata_3.2.0.c
@@ -432,12 +432,10 @@ static const unsigned char _PyUnicode_ChangeIndex_3_2_0_rodata[365+1][2] = { /*
     {  2, 0x02},
     {0},
 };
+__attribute__((__constructor__(99)))
 static textstartup void _PyUnicode_ChangeIndex_3_2_0_init(void) {
     rldecode2(_PyUnicode_ChangeIndex_3_2_0, (void *)_PyUnicode_ChangeIndex_3_2_0_rodata);
 }
-const void *const _PyUnicode_ChangeIndex_3_2_0_ctor[] initarray = {
-    _PyUnicode_ChangeIndex_3_2_0_init,
-};
 
 unsigned char _PyUnicode_ChangeData_3_2_0[31360];
 static const unsigned char _PyUnicode_ChangeData_3_2_0_rodata[1390+1][2] = { /* 8.8648% profit */
@@ -1833,12 +1831,10 @@ static const unsigned char _PyUnicode_ChangeData_3_2_0_rodata[1390+1][2] = { /*
     { 16, 0x00},
     {0},
 };
+__attribute__((__constructor__(99)))
 static textstartup void _PyUnicode_ChangeData_3_2_0_init(void) {
     rldecode2(_PyUnicode_ChangeData_3_2_0, (void *)_PyUnicode_ChangeData_3_2_0_rodata);
 }
-const void *const _PyUnicode_ChangeData_3_2_0_ctor[] initarray = {
-    _PyUnicode_ChangeData_3_2_0_init,
-};
 
 const _PyUnicode_ChangeRecord *_PyUnicode_GetChange_3_2_0(Py_UCS4 n)
 {
diff --git a/third_party/python/Modules/unicodedata_comp.c b/third_party/python/Modules/unicodedata_comp.c
index 9b2c142fe..95706fd0c 100644
--- a/third_party/python/Modules/unicodedata_comp.c
+++ b/third_party/python/Modules/unicodedata_comp.c
@@ -1152,6 +1152,7 @@ static const unsigned short _PyUnicode_CompIndex_rodata[1146+1][2] = { /* 38.598
     {  1, 0x2a9},
     {0},
 };
+__attribute__((__constructor__(99)))
 static textstartup void _PyUnicode_CompIndex_init(void) {
     int i, j, k;
     for (k = i = 0; i < 1146; ++i) {
@@ -1160,9 +1161,6 @@ static textstartup void _PyUnicode_CompIndex_init(void) {
         }
     }
 }
-const void *const _PyUnicode_CompIndex_ctor[] initarray = {
-    _PyUnicode_CompIndex_init,
-};
 
 const unsigned int _PyUnicode_CompData[1449] = {
     0, 0, 0, 922746880, 17, 9011200, 3758096384, 1101, 0, 50594176, 
diff --git a/third_party/python/Modules/unicodedata_decomp.c b/third_party/python/Modules/unicodedata_decomp.c
index ca8b27add..8fbcf188f 100644
--- a/third_party/python/Modules/unicodedata_decomp.c
+++ b/third_party/python/Modules/unicodedata_decomp.c
@@ -1446,12 +1446,10 @@ static const unsigned char _PyUnicode_DecompIndex1_rodata[159+1][2] = { /* 3.653
     { 39, 0x00},
     {0},
 };
+__attribute__((__constructor__(99)))
 static textstartup void _PyUnicode_DecompIndex1_init(void) {
     rldecode2(_PyUnicode_DecompIndex1, (void *)_PyUnicode_DecompIndex1_rodata);
 }
-const void *const _PyUnicode_DecompIndex1_ctor[] initarray = {
-    _PyUnicode_DecompIndex1_init,
-};
 
 const unsigned short _PyUnicode_DecompIndex2[13312] = {
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
diff --git a/third_party/python/Modules/unicodedata_phrasebook.c b/third_party/python/Modules/unicodedata_phrasebook.c
index dfa0630fc..702ef6089 100644
--- a/third_party/python/Modules/unicodedata_phrasebook.c
+++ b/third_party/python/Modules/unicodedata_phrasebook.c
@@ -11647,12 +11647,10 @@ static const unsigned char _PyUnicode_PhrasebookOffset1_rodata[206+1][2] = { /*
     {253, 0x34},
     {0},
 };
+__attribute__((__constructor__(99)))
 static textstartup void _PyUnicode_PhrasebookOffset1_init(void) {
     rldecode2(_PyUnicode_PhrasebookOffset1, (void *)_PyUnicode_PhrasebookOffset1_rodata);
 }
-const void *const _PyUnicode_PhrasebookOffset1_ctor[] initarray = {
-    _PyUnicode_PhrasebookOffset1_init,
-};
 
 const unsigned int _PyUnicode_PhrasebookOffset2[25056] = {
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 786433, 37748832, 
diff --git a/third_party/python/Modules/unicodedata_records.c b/third_party/python/Modules/unicodedata_records.c
index 9c7b8ba51..7c08e5974 100644
--- a/third_party/python/Modules/unicodedata_records.c
+++ b/third_party/python/Modules/unicodedata_records.c
@@ -674,6 +674,7 @@ static const unsigned short _PyUnicode_RecordsIndex1_rodata[323+1][2] = { /* 7.4
     {  1, 0x10a},
     {0},
 };
+__attribute__((__constructor__(99)))
 static textstartup void _PyUnicode_RecordsIndex1_init(void) {
     int i, j, k;
     for (k = i = 0; i < 323; ++i) {
@@ -682,9 +683,6 @@ static textstartup void _PyUnicode_RecordsIndex1_init(void) {
         }
     }
 }
-const void *const _PyUnicode_RecordsIndex1_ctor[] initarray = {
-    _PyUnicode_RecordsIndex1_init,
-};
 
 unsigned short _PyUnicode_RecordsIndex2[34176];
 static const unsigned short _PyUnicode_RecordsIndex2_rodata[4990+1][2] = { /* 29.2018% profit */
@@ -5680,6 +5678,7 @@ static const unsigned short _PyUnicode_RecordsIndex2_rodata[4990+1][2] = { /* 29
     {  2, 0x00},
     {0},
 };
+__attribute__((__constructor__(99)))
 static textstartup void _PyUnicode_RecordsIndex2_init(void) {
     int i, j, k;
     for (k = i = 0; i < 4990; ++i) {
@@ -5688,7 +5687,3 @@ static textstartup void _PyUnicode_RecordsIndex2_init(void) {
         }
     }
 }
-const void *const _PyUnicode_RecordsIndex2_ctor[] initarray = {
-    _PyUnicode_RecordsIndex2_init,
-};
-
diff --git a/third_party/python/Modules/unicodedata_typerecords.c b/third_party/python/Modules/unicodedata_typerecords.c
index afa05895c..864ba3b4c 100644
--- a/third_party/python/Modules/unicodedata_typerecords.c
+++ b/third_party/python/Modules/unicodedata_typerecords.c
@@ -881,6 +881,7 @@ static const unsigned short _PyUnicode_TypeRecordsIndex1_rodata[370+1][2] = { /*
     {  1, 0x112},
     {0},
 };
+__attribute__((__constructor__(99)))
 static textstartup void _PyUnicode_TypeRecordsIndex1_init(void) {
     int i, j, k;
     for (k = i = 0; i < 370; ++i) {
@@ -889,9 +890,6 @@ static textstartup void _PyUnicode_TypeRecordsIndex1_init(void) {
         }
     }
 }
-const void *const _PyUnicode_TypeRecordsIndex1_ctor[] initarray = {
-    _PyUnicode_TypeRecordsIndex1_init,
-};
 
 unsigned short _PyUnicode_TypeRecordsIndex2[35200];
 static const unsigned short _PyUnicode_TypeRecordsIndex2_rodata[4889+1][2] = { /* 27.7784% profit */
@@ -5786,6 +5784,7 @@ static const unsigned short _PyUnicode_TypeRecordsIndex2_rodata[4889+1][2] = { /
     {  2, 0x00},
     {0},
 };
+__attribute__((__constructor__(99)))
 static textstartup void _PyUnicode_TypeRecordsIndex2_init(void) {
     int i, j, k;
     for (k = i = 0; i < 4889; ++i) {
@@ -5794,7 +5793,3 @@ static textstartup void _PyUnicode_TypeRecordsIndex2_init(void) {
         }
     }
 }
-const void *const _PyUnicode_TypeRecordsIndex2_ctor[] initarray = {
-    _PyUnicode_TypeRecordsIndex2_init,
-};
-
diff --git a/third_party/python/Python/getcopyright.c b/third_party/python/Python/getcopyright.c
index 9ec5beadc..233148714 100644
--- a/third_party/python/Python/getcopyright.c
+++ b/third_party/python/Python/getcopyright.c
@@ -10,16 +10,16 @@
 #include "libc/str/str.h"
 #include "third_party/python/Include/pylifecycle.h"
 
-asm(".ident\t\"\\n\\n\
-Python 3.6 (https://docs.python.org/3/license.html)\\n\
-Copyright (c) 2001-2021 Python Software Foundation.\\n\
-All Rights Reserved.\\n\
-Copyright (c) 2000 BeOpen.com.\\n\
-All Rights Reserved.\\n\
-Copyright (c) 1995-2001 Corporation for National Research Initiatives.\\n\
-All Rights Reserved.\\n\
-Copyright (c) 1991-1995 Stichting Mathematisch Centrum, Amsterdam.\\n\
-All Rights Reserved.\"");
+__notice(python_notice, "\
+Python 3.6 (https://docs.python.org/3/license.html)\n\
+Copyright (c) 2001-2021 Python Software Foundation.\n\
+All Rights Reserved.\n\
+Copyright (c) 2000 BeOpen.com.\n\
+All Rights Reserved.\n\
+Copyright (c) 1995-2001 Corporation for National Research Initiatives.\n\
+All Rights Reserved.\n\
+Copyright (c) 1991-1995 Stichting Mathematisch Centrum, Amsterdam.\n\
+All Rights Reserved.");
 
 const char *
 Py_GetCopyright(void)
@@ -29,7 +29,7 @@ Py_GetCopyright(void)
         char *r = 0;
         const char *p;
         appends(&r, "");
-        for (p = __comment_start; *p; p += strlen(p) + 1) {
+        for (p = __notices; *p; p += strlen(p) + 1) {
             appends(&r, p);
         }
         res = r;
diff --git a/third_party/python/Python/getopt.c b/third_party/python/Python/getopt.c
index c54c7aa68..24aa6b49b 100644
--- a/third_party/python/Python/getopt.c
+++ b/third_party/python/Python/getopt.c
@@ -8,9 +8,9 @@
 #include "libc/str/str.h"
 #include "third_party/python/Include/pygetopt.h"
 
-asm(".ident\t\"\\n\\n\
-python getopt (isc license)\\n\
-Copyright 1992-1994 David Gottner\"");
+__notice(python_getopt_notice, "\
+python getopt (isc license)\n\
+Copyright 1992-1994 David Gottner");
 
 /*---------------------------------------------------------------------------*
  * <RCS keywords>
diff --git a/third_party/python/Python/sysmodule.c b/third_party/python/Python/sysmodule.c
index 9cf5f5cf1..8b0c6edc7 100644
--- a/third_party/python/Python/sysmodule.c
+++ b/third_party/python/Python/sysmodule.c
@@ -2013,6 +2013,7 @@ _PySys_Init(void)
                          PyBool_FromLong(Py_DontWriteBytecodeFlag));
     SET_SYS_FROM_STRING("api_version",
                         PyLong_FromLong(PYTHON_API_VERSION));
+    // asm("int3");
     SET_SYS_FROM_STRING("copyright",
                         PyUnicode_FromString(Py_GetCopyright()));
     SET_SYS_FROM_STRING("platform",
diff --git a/third_party/regex/notice.inc b/third_party/regex/notice.inc
deleted file mode 100644
index d154d1146..000000000
--- a/third_party/regex/notice.inc
+++ /dev/null
@@ -1,10 +0,0 @@
-asm(".ident\t\"\\n\
-Musl Libc (MIT License)\\n\
-Copyright 2005-2014 Rich Felker\"");
-asm(".include \"libc/disclaimer.inc\"");
-
-asm(".ident\t\"\\n\
-TRE regex (BSD-2 License)\\n\
-Copyright 2001-2009 Ville Laurikari <vl@iki.fi>\\n\
-Copyright 2016 Szabolcs Nagy\"");
-asm(".include \"libc/disclaimer.inc\"");
diff --git a/third_party/regex/regexec.c b/third_party/regex/regexec.c
index 81f722087..fd4b4446f 100644
--- a/third_party/regex/regexec.c
+++ b/third_party/regex/regexec.c
@@ -60,6 +60,11 @@
 #include "libc/limits.h"
 #include "third_party/regex/tre.inc"
 
+__notice(tre_regex, "\
+TRE regex (BSD-2 License)\n\
+Copyright 2001-2009 Ville Laurikari <vl@iki.fi>\n\
+Copyright 2016 Szabolcs Nagy");
+
 static void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
                             const tre_tnfa_t *tnfa, regoff_t *tags,
                             regoff_t match_eo);
diff --git a/third_party/regex/tre.inc b/third_party/regex/tre.inc
index 2c26396d9..2f6e39854 100644
--- a/third_party/regex/tre.inc
+++ b/third_party/regex/tre.inc
@@ -60,7 +60,6 @@
 #include "libc/mem/alg.h"
 #include "libc/mem/mem.h"
 #include "libc/str/str.h"
-#include "third_party/regex/notice.inc"
 #include "third_party/regex/regex.h"
 
 #undef TRE_MBSTATE
diff --git a/third_party/stb/stb_image.c b/third_party/stb/stb_image.c
index 6852c3adc..f796a034b 100644
--- a/third_party/stb/stb_image.c
+++ b/third_party/stb/stb_image.c
@@ -36,10 +36,10 @@
 #include "third_party/aarch64/arm_neon.internal.h"
 #include "third_party/intel/ammintrin.internal.h"
 
-asm(".ident\t\"\\n\\n\
-stb_image (Public Domain)\\n\
-Credit: Sean Barrett, et al.\\n\
-http://nothings.org/stb\"");
+__notice(stb_image_notice, "\
+stb_image (Public Domain)\n\
+Credit: Sean Barrett, et al.\n\
+http://nothings.org/stb");
 
 #ifdef __x86_64__
 #define STBI_SSE2
diff --git a/third_party/stb/stb_image_resize.c b/third_party/stb/stb_image_resize.c
index 24cb373e5..7fc71a33e 100644
--- a/third_party/stb/stb_image_resize.c
+++ b/third_party/stb/stb_image_resize.c
@@ -23,10 +23,10 @@
 #include "libc/mem/mem.h"
 #include "libc/str/str.h"
 
-asm(".ident\t\"\\n\\n\
-stb_image_resize (Public Domain)\\n\
-Credit: Jorge L Rodriguez (@VinoBS), Sean Barrett, et al.\\n\
-http://nothings.org/stb\"");
+__notice(stb_image_resize_notice, "\
+stb_image_resize (Public Domain)\n\
+Credit: Jorge L Rodriguez (@VinoBS), Sean Barrett, et al.\n\
+http://nothings.org/stb");
 
 #define STBIR_ASSERT(x)       assert(x)
 #define STBIR_MALLOC(size, c) ((void)(c), malloc(size))
diff --git a/third_party/stb/stb_image_write.c b/third_party/stb/stb_image_write.c
index 1f984bc78..9af55ae36 100644
--- a/third_party/stb/stb_image_write.c
+++ b/third_party/stb/stb_image_write.c
@@ -29,10 +29,10 @@
 #include "libc/str/str.h"
 #include "third_party/zlib/zlib.h"
 
-asm(".ident\t\"\\n\\n\
-stb_image_write (Public Domain)\\n\
-Credit: Sean Barrett, et al.\\n\
-http://nothings.org/stb\"");
+__notice(stb_image_write_notice, "\
+stb_image_write (Public Domain)\n\
+Credit: Sean Barrett, et al.\n\
+http://nothings.org/stb");
 
 #define STBIW_UCHAR(x)                       (unsigned char)((x)&0xff)
 
diff --git a/third_party/stb/stb_rect_pack.c b/third_party/stb/stb_rect_pack.c
index 65fab7afa..eb07e1e0d 100644
--- a/third_party/stb/stb_rect_pack.c
+++ b/third_party/stb/stb_rect_pack.c
@@ -21,10 +21,9 @@
 #include "libc/dce.h"
 #include "libc/mem/alg.h"
 
-asm(".ident\t\"\\n\\n\
-stb_rect_pack (MIT License)\\n\
-Copyright 2017 Sean Barrett\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(stb_rect_pack_notice, "\
+stb_rect_pack (MIT License)\n\
+Copyright 2017 Sean Barrett");
 
 // stb_rect_pack.h - v1.01 - public domain - rectangle packing
 // Sean Barrett 2014
diff --git a/third_party/stb/stb_truetype.c b/third_party/stb/stb_truetype.c
index a59346df3..e1449c11b 100644
--- a/third_party/stb/stb_truetype.c
+++ b/third_party/stb/stb_truetype.c
@@ -36,10 +36,9 @@
 #include "libc/str/str.h"
 #include "third_party/stb/stb_rect_pack.h"
 
-asm(".ident\t\"\\n\\n\
-stb_truetype (MIT License)\\n\
-Copyright 2017 Sean Barrett\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(stb_truetype_notice, "\
+stb_truetype (MIT License)\n\
+Copyright 2017 Sean Barrett");
 
 // stb_truetype.h - v1.26 - public domain
 // authored from 2009-2021 by Sean Barrett / RAD Game Tools
diff --git a/third_party/stb/stb_vorbis.c b/third_party/stb/stb_vorbis.c
index 3b3da48f3..221dee242 100644
--- a/third_party/stb/stb_vorbis.c
+++ b/third_party/stb/stb_vorbis.c
@@ -46,10 +46,10 @@
 #include "libc/mem/mem.h"
 #include "libc/str/str.h"
 
-asm(".ident\t\"\\n\\n\
-stb_vorbis (Public Domain)\\n\
-Credit: Sean Barrett, et al.\\n\
-http://nothings.org/stb\"");
+__notice(stb_vorbis_notice, "\
+stb_vorbis (Public Domain)\n\
+Credit: Sean Barrett, et al.\n\
+http://nothings.org/stb");
 
 // STB_VORBIS_NO_PUSHDATA_API
 //     does not compile the code for the various stb_vorbis_*_pushdata()
diff --git a/third_party/tree/list.c b/third_party/tree/list.c
index c3aea1cdd..865bbad86 100644
--- a/third_party/tree/list.c
+++ b/third_party/tree/list.c
@@ -145,7 +145,7 @@ struct totals listdir(char *dirname, struct _info **dir, int lev, dev_t dev, boo
   int namemax = 257, namelen;
   int descend, htmldescend = 0, found, n, dirlen = strlen(dirname), pathlen = dirlen + 257;
   int needsclosed;
-  char *path, *newpath, *filename, *err = NULL;
+  char *path, *newpath = NULL, *filename, *err = NULL;
 
   int es = (dirname[strlen(dirname) - 1] == '/');
 
diff --git a/third_party/xed/x86features.c b/third_party/xed/x86features.c
index 3d8fca26c..12cebdf6e 100644
--- a/third_party/xed/x86features.c
+++ b/third_party/xed/x86features.c
@@ -18,13 +18,6 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "third_party/xed/x86isa.h"
 
-asm(".ident\t\"\\n\\n\
-Xed (Apache 2.0)\\n\
-Copyright 2018 Intel Corporation\\n\
-Copyright 2019 Justine Alexandra Roberts Tunney\\n\
-Modifications: Trimmed down to 3kb [2019-03-22 jart]\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 /**
  * Mapping of enum XedChip -> bitset<enum XedIsaSet>.
  *
diff --git a/third_party/xed/x86ild.greg.c b/third_party/xed/x86ild.greg.c
index 3e76b71af..5bcb2d927 100644
--- a/third_party/xed/x86ild.greg.c
+++ b/third_party/xed/x86ild.greg.c
@@ -28,12 +28,11 @@
 #include "third_party/xed/private.h"
 #include "third_party/xed/x86.h"
 
-asm(".ident\t\"\\n\\n\
-Xed (Apache 2.0)\\n\
-Copyright 2018 Intel Corporation\\n\
-Copyright 2019 Justine Alexandra Roberts Tunney\\n\
-Modifications: Trimmed down to 3kb [2019-03-22 jart]\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(xed_notice, "\
+Xed (Apache 2.0)\n\
+Copyright 2018 Intel Corporation\n\
+Copyright 2019 Justine Alexandra Roberts Tunney\n\
+Changes: Trimmed Intel's assembler down to 3kb [2019-03-22 jart]");
 
 #define XED_ILD_HASMODRM_IGNORE_MOD 2
 
diff --git a/third_party/xed/x86isa.c b/third_party/xed/x86isa.c
index 9cc96240e..48b3af6bd 100644
--- a/third_party/xed/x86isa.c
+++ b/third_party/xed/x86isa.c
@@ -19,13 +19,6 @@
 #include "third_party/xed/x86.h"
 #include "third_party/xed/x86isa.h"
 
-asm(".ident\t\"\\n\\n\
-Xed (Apache 2.0)\\n\
-Copyright 2018 Intel Corporation\\n\
-Copyright 2019 Justine Alexandra Roberts Tunney\\n\
-Modifications: Trimmed down to 3kb [2019-03-22 jart]\"");
-asm(".include \"libc/disclaimer.inc\"");
-
 bool xed_isa_set_is_valid_for_chip(int isa_set, int chip) {
   unsigned n, r;
   n = isa_set / 64;
diff --git a/third_party/zlib/adler32_simd.c b/third_party/zlib/adler32_simd.c
index f767c49bd..b3d310fbb 100644
--- a/third_party/zlib/adler32_simd.c
+++ b/third_party/zlib/adler32_simd.c
@@ -1,6 +1,4 @@
-asm(".ident\t\"\\n\\n\
-Chromium (BSD-3 License)\\n\
-Copyright 2017 The Chromium Authors\"");
+__static_yoink("chromium_notice");
 
 /* adler32_simd.c
  *
diff --git a/third_party/zlib/crc32_simd.c b/third_party/zlib/crc32_simd.c
index cf38bb94b..a1b35ab56 100644
--- a/third_party/zlib/crc32_simd.c
+++ b/third_party/zlib/crc32_simd.c
@@ -5,10 +5,7 @@
  * found in the Chromium source repository LICENSE file.
  */
 
-asm(".ident\t\"\\n\\n\
-Chromium (BSD-3 License)\\n\
-Copyright 2017 The Chromium Authors\"");
-
+__static_yoink("chromium_notice");
 #include "third_party/intel/x86gprintrin.internal.h"
 #include "third_party/zlib/crc32_simd.internal.h"
 #if defined(CRC32_SIMD_AVX512_PCLMUL)
diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c
index 6db7b0c28..58f9474e7 100644
--- a/third_party/zlib/deflate.c
+++ b/third_party/zlib/deflate.c
@@ -11,11 +11,7 @@
 #include "third_party/zlib/deflate.internal.h"
 #include "third_party/zlib/internal.h"
 #include "third_party/zlib/zutil.internal.h"
-
-asm(".ident\t\"\\n\\n\
-zlib 1.2.13 (zlib License)\\n\
-Copyright 1995-2022 Jean-loup Gailly and Mark Adler\\n\
-Invented 1990 Phillip Walter Katz\"");
+__static_yoink("zlib_notice");
 
 /*
  *  ALGORITHM
diff --git a/third_party/zlib/inffast_chunk.c b/third_party/zlib/inffast_chunk.c
index 7ab74c011..a5302a49b 100644
--- a/third_party/zlib/inffast_chunk.c
+++ b/third_party/zlib/inffast_chunk.c
@@ -7,10 +7,7 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-asm(".ident\t\"\\n\\n\
-Chromium (BSD-3 License)\\n\
-Copyright 2017 The Chromium Authors\"");
-
+__static_yoink("chromium_notice");
 #include "third_party/zlib/zutil.internal.h"
 #include "third_party/zlib/inftrees.internal.h"
 #include "third_party/zlib/inflate.internal.h"
diff --git a/third_party/zlib/inflate.c b/third_party/zlib/inflate.c
index 2d5a8c424..75fa6b56e 100644
--- a/third_party/zlib/inflate.c
+++ b/third_party/zlib/inflate.c
@@ -6,10 +6,7 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-asm(".ident\t\"\\n\\n\
-zlib 1.2.13 (zlib License)\\n\
-Copyright 1995-2022 Jean-loup Gailly and Mark Adler\\n\
-Invented 1990 Phillip Walter Katz\"");
+__static_yoink("zlib_notice");
 
 /*
  * Change history:
diff --git a/third_party/zlib/inftrees.c b/third_party/zlib/inftrees.c
index 4a628af76..c089e77b0 100644
--- a/third_party/zlib/inftrees.c
+++ b/third_party/zlib/inftrees.c
@@ -5,14 +5,10 @@
  * Copyright (C) 1995-2022 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
+__static_yoink("zlib_notice");
 #include "third_party/zlib/inftrees.internal.h"
 #include "third_party/zlib/zutil.internal.h"
 
-asm(".ident\t\"\\n\\n\
-zlib 1.2.13 (zlib License)\\n\
-Copyright 1995-2022 Jean-loup Gailly and Mark Adler\\n\
-Invented 1990 Phillip Walter Katz\"");
-
 #define MAXBITS 15
 
 /*
diff --git a/third_party/zlib/notice.c b/third_party/zlib/notice.c
new file mode 100644
index 000000000..5f6ca928d
--- /dev/null
+++ b/third_party/zlib/notice.c
@@ -0,0 +1,4 @@
+__notice(zlib_notice, "\
+zlib 1.2.13 (zlib License)\n\
+Copyright 1995-2022 Jean-loup Gailly and Mark Adler\n\
+Invented 1990 Phillip Walter Katz");
diff --git a/third_party/zstd/BUILD.mk b/third_party/zstd/BUILD.mk
index 27edd513b..5aa69393c 100644
--- a/third_party/zstd/BUILD.mk
+++ b/third_party/zstd/BUILD.mk
@@ -147,6 +147,7 @@ $(THIRD_PARTY_ZSTD_A_CHECKS): private				\
 $(THIRD_PARTY_ZSTD_A_OBJS): private				\
 		CFLAGS +=					\
 			-Wframe-larger-than=262144		\
+			-Wno-array-bounds			\
 			-Wno-comment
 
 $(THIRD_PARTY_ZSTD_A_OBJS): private				\
diff --git a/tool/build/apelink.c b/tool/build/apelink.c
index 7b1f7d57e..0c9052f34 100644
--- a/tool/build/apelink.c
+++ b/tool/build/apelink.c
@@ -102,7 +102,7 @@
   "               - 32: freebsd\n"                             \
   "               - 64: netbsd\n"                              \
   "\n"                                                         \
-  "             for example, `-s 0b1110001` may be used to\n"  \
+  "             for example, `-V 0b1110001` may be used to\n"  \
   "             produce ELF binaries that only support the\n"  \
   "             truly open unix systems. in this case when\n"  \
   "             a single input executable is supplied, the\n"  \
@@ -120,8 +120,8 @@
   "             also pass strings in a variety of intuitive\n" \
   "             supported representations. for example, bsd\n" \
   "             will enable freebsd+netbsd+openbsd and that\n" \
-  "             string too is a legal input. the -s flag is\n" \
-  "             also repeatable, e.g. `-s nt -s xnu` to use\n" \
+  "             string too is a legal input. the -V flag is\n" \
+  "             also repeatable, e.g. `-V nt -V xnu` to use\n" \
   "             the union of the two.\n"                       \
   "\n"                                                         \
   "             since the support vector controls the file\n"  \
@@ -988,7 +988,7 @@ static void GetOpts(int argc, char *argv[]) {
         if (ParseSupportVector(optarg, &bits)) {
           support_vector |= bits;
         } else {
-          Die(prog, "unrecognized token passed to -s support vector flag");
+          Die(prog, "unrecognized token passed to -V support vector flag");
           exit(1);
         }
         got_support_vector = true;
@@ -2036,7 +2036,7 @@ int main(int argc, char *argv[]) {
     //      let our shell script compile the ape loader on first run.
     //
     if (support_vector & _HOSTXNU) {
-      bool gotsome;
+      bool gotsome = false;
       p = stpcpy(p, "else\n");  // if [ -d /Applications ]; then
 
       // output native mach-o morph
@@ -2136,6 +2136,7 @@ int main(int argc, char *argv[]) {
     }
 
     // extract the ape loader for open platforms
+    bool gotsome = false;
     if (inputs.n && (support_vector & _HOSTXNU)) {
       p = stpcpy(p, "if [ ! -d /Applications ]; then\n");
     }
@@ -2158,9 +2159,13 @@ int main(int argc, char *argv[]) {
                       "mv -f \"$t.$$\" \"$t\" ||exit\n");
         p = stpcpy(p, "exec \"$t\" \"$o\" \"$@\"\n"
                       "fi\n");
+        gotsome = true;
       }
     }
     if (inputs.n && (support_vector & _HOSTXNU)) {
+      if (!gotsome) {
+        p = stpcpy(p, "true\n");
+      }
       p = stpcpy(p, "fi\n");
     }
 
diff --git a/tool/build/compile.c b/tool/build/compile.c
index aee8f9e30..576177ef5 100644
--- a/tool/build/compile.c
+++ b/tool/build/compile.c
@@ -226,65 +226,6 @@ const char *const kSafeEnv[] = {
     "SYSTEMROOT",  // needed by socket()
 };
 
-const char *const kGccOnlyFlags[] = {
-    "--nocompress-debug-sections",
-    "--noexecstack",
-    "-Wa,--nocompress-debug-sections",
-    "-Wa,--noexecstack",
-    "-Wa,-msse2avx",
-    "-Wno-unused-but-set-variable",
-    "-Wunsafe-loop-optimizations",
-    "-fbranch-target-load-optimize",
-    "-fcx-limited-range",
-    "-fdelete-dead-exceptions",
-    "-femit-struct-debug-baseonly",
-    "-ffp-int-builtin-inexact",
-    "-finline-functions-called-once",
-    "-fipa-pta",
-    "-fivopts",
-    "-flimit-function-alignment",
-    "-fmerge-constants",
-    "-fmodulo-sched",
-    "-fmodulo-sched-allow-regmoves",
-    "-fno-align-jumps",
-    "-fno-align-labels",
-    "-fno-align-loops",
-    "-fno-cx-limited-range",
-    "-fno-fp-int-builtin-inexact",
-    "-fno-gnu-unique",
-    "-fno-gnu-unique",
-    "-fno-inline-functions-called-once",
-    "-fno-instrument-functions",
-    "-fno-schedule-insns2",
-    "-fno-whole-program",
-    "-fopt-info-vec",
-    "-fopt-info-vec-missed",
-    "-freg-struct-return",
-    "-freschedule-modulo-scheduled-loops",
-    "-frounding-math",
-    "-fsched2-use-superblocks",
-    "-fschedule-insns",
-    "-fschedule-insns2",
-    "-fshrink-wrap",
-    "-fshrink-wrap-separate",
-    "-fsignaling-nans",
-    "-fstack-clash-protection",
-    "-ftracer",
-    "-ftrapv",
-    "-ftree-loop-im",
-    "-ftree-loop-vectorize",
-    "-funsafe-loop-optimizations",
-    "-fversion-loops-for-strides",
-    "-fwhole-program",
-    "-gdescribe-dies",
-    "-gstabs",
-    "-mcall-ms2sysv-xlogues",
-    "-mdispatch-scheduler",
-    "-mfpmath=sse+387",
-    "-mmitigate-rop",
-    "-mno-fentry",
-};
-
 void OnAlrm(int sig) {
   ++gotalrm;
 }
@@ -400,21 +341,38 @@ bool IsSafeEnv(const char *s) {
   return false;
 }
 
-bool IsGccOnlyFlag(const char *s) {
-  int m, l, r, x;
-  l = 0;
-  r = ARRAYLEN(kGccOnlyFlags) - 1;
-  while (l <= r) {
-    m = (l & r) + ((l ^ r) >> 1);  // floor((a+b)/2)
-    x = strcmp(s, kGccOnlyFlags[m]);
-    if (x < 0) {
-      r = m - 1;
-    } else if (x > 0) {
-      l = m + 1;
-    } else {
-      return true;
+char *Slurp(const char *path) {
+  int fd;
+  char *res = 0;
+  if ((fd = open(path, O_RDONLY)) != -1) {
+    ssize_t size;
+    if ((size = lseek(fd, 0, SEEK_END)) != -1) {
+      char *buf;
+      if ((buf = calloc(1, size + 1))) {
+        if (pread(fd, buf, size, 0) == size) {
+          res = buf;
+        } else {
+          free(buf);
+        }
+      }
     }
+    close(fd);
   }
+  return res;
+}
+
+bool HasFlag(const char *flags, const char *s) {
+  char buf[256];
+  size_t n = strlen(s);
+  if (!flags) return false;
+  if (n + 2 > sizeof(buf)) return false;
+  memcpy(buf, s, n);
+  buf[n] = '\n';
+  buf[n + 1] = 0;
+  return !!strstr(flags, buf);
+}
+
+bool IsGccOnlyFlag(const char *s) {
   if (s[0] == '-') {
     if (s[1] == 'f') {
       if (startswith(s, "-ffixed-")) return true;
@@ -428,8 +386,25 @@ bool IsGccOnlyFlag(const char *s) {
     if (startswith(s, "-mstringop-strategy=")) return true;
     if (startswith(s, "-mpreferred-stack-boundary=")) return true;
     if (startswith(s, "-Wframe-larger-than=")) return true;
+    if (startswith(s, "-Walloca-larger-than=")) return true;
   }
-  return false;
+  static bool once;
+  static char *gcc_only_flags;
+  if (!once) {
+    gcc_only_flags = Slurp("build/bootstrap/gcc-only-flags.txt");
+    once = true;
+  }
+  return HasFlag(gcc_only_flags, s);
+}
+
+bool IsClangOnlyFlag(const char *s) {
+  static bool once;
+  static char *clang_only_flags;
+  if (!once) {
+    clang_only_flags = Slurp("build/bootstrap/clang-only-flags.txt");
+    once = true;
+  }
+  return HasFlag(clang_only_flags, s);
 }
 
 bool FileExistsAndIsNewerThan(const char *filepath, const char *thanpath) {
@@ -926,12 +901,12 @@ int main(int argc, char *argv[]) {
   }
 
   s = basename(strdup(cmd));
-  if (strstr(s, "gcc") || strstr(s, "g++")) {
-    iscc = true;
-    isgcc = true;
-  } else if (strstr(s, "clang") || strstr(s, "clang++")) {
+  if (strstr(s, "clang") || strstr(s, "clang++")) {
     iscc = true;
     isclang = true;
+  } else if (strstr(s, "gcc") || strstr(s, "g++")) {
+    iscc = true;
+    isgcc = true;
   } else if (strstr(s, "ld.bfd")) {
     isbfd = true;
   } else if (strstr(s, "ar.com")) {
@@ -990,6 +965,9 @@ int main(int argc, char *argv[]) {
       AddArg(argv[i]);
       continue;
     }
+    if (isgcc && IsClangOnlyFlag(argv[i])) {
+      continue;
+    }
     if (isclang && IsGccOnlyFlag(argv[i])) {
       continue;
     }
@@ -1188,7 +1166,9 @@ int main(int argc, char *argv[]) {
                          !strcmp(argv[i], "-O3"))) {
       /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97623 */
       AddArg(argv[i]);
-      AddArg("-fno-code-hoisting");
+      if (!isclang) {
+        AddArg("-fno-code-hoisting");
+      }
     } else {
       AddArg(argv[i]);
     }
diff --git a/tool/build/findape.c b/tool/build/findape.c
new file mode 100644
index 000000000..88373b651
--- /dev/null
+++ b/tool/build/findape.c
@@ -0,0 +1,64 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2023 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/calls/calls.h"
+#include "libc/runtime/runtime.h"
+#include "libc/serialize.h"
+#include "libc/stdio/ftw.h"
+#include "libc/stdio/stdio.h"
+#include "libc/sysv/consts/o.h"
+
+// finds ape executables
+// usage: findelf PATH...
+
+static int OnFile(const char *fpath, const struct stat *st, int typeflag,
+                  struct FTW *ftwbuf) {
+  if (typeflag == FTW_F) {
+    char buf[8] = {0};
+    int fd = open(fpath, O_RDONLY);
+    if (fd != -1) {
+      pread(fd, buf, sizeof(buf), 0);
+      close(fd);
+      if (READ64LE(buf) == READ64LE("MZqFpD='") ||
+          READ64LE(buf) == READ64LE("jartsr='") ||
+          READ64LE(buf) == READ64LE("APEDBG='")) {
+        tinyprint(1, fpath, "\n", NULL);
+      }
+    } else {
+      perror(fpath);
+    }
+  }
+  return 0;
+}
+
+static void HandleArg(const char *path) {
+  if (nftw(path, OnFile, 128, FTW_PHYS | FTW_DEPTH)) {
+    perror(path);
+    exit(1);
+  }
+}
+
+int main(int argc, char *argv[]) {
+  if (argc <= 1) {
+    HandleArg(".");
+  } else {
+    for (int i = 1; i < argc; ++i) {
+      HandleArg(argv[i]);
+    }
+  }
+}
diff --git a/tool/build/findelf.c b/tool/build/findelf.c
new file mode 100644
index 000000000..4b781eae4
--- /dev/null
+++ b/tool/build/findelf.c
@@ -0,0 +1,65 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2023 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/calls/calls.h"
+#include "libc/elf/def.h"
+#include "libc/elf/struct/ehdr.h"
+#include "libc/runtime/runtime.h"
+#include "libc/serialize.h"
+#include "libc/stdio/ftw.h"
+#include "libc/stdio/stdio.h"
+#include "libc/str/str.h"
+#include "libc/sysv/consts/o.h"
+
+// finds elf executables
+// usage: findelf PATH...
+
+static int OnFile(const char *fpath, const struct stat *st, int typeflag,
+                  struct FTW *ftwbuf) {
+  if (typeflag == FTW_F && (st->st_mode & 0111)) {
+    Elf64_Ehdr ehdr = {0};
+    int fd = open(fpath, O_RDONLY);
+    if (fd != -1) {
+      pread(fd, &ehdr, sizeof(ehdr), 0);
+      close(fd);
+      if (READ32LE(ehdr.e_ident) == READ32LE(ELFMAG) && ehdr.e_type != ET_REL) {
+        tinyprint(1, fpath, "\n", NULL);
+      }
+    } else {
+      perror(fpath);
+    }
+  }
+  return 0;
+}
+
+static void HandleArg(const char *path) {
+  if (nftw(path, OnFile, 128, FTW_PHYS | FTW_DEPTH)) {
+    perror(path);
+    exit(1);
+  }
+}
+
+int main(int argc, char *argv[]) {
+  if (argc <= 1) {
+    HandleArg(".");
+  } else {
+    for (int i = 1; i < argc; ++i) {
+      HandleArg(argv[i]);
+    }
+  }
+}
diff --git a/tool/build/findpe.c b/tool/build/findpe.c
index 548ea288a..68e59dc1a 100644
--- a/tool/build/findpe.c
+++ b/tool/build/findpe.c
@@ -22,6 +22,9 @@
 #include "libc/stdio/stdio.h"
 #include "libc/sysv/consts/o.h"
 
+// finds portable executables (and actually portable executable)
+// usage: findelf PATH...
+
 static int OnFile(const char *fpath, const struct stat *st, int typeflag,
                   struct FTW *ftwbuf) {
   if (typeflag == FTW_F) {
diff --git a/tool/build/fixupobj.c b/tool/build/fixupobj.c
index f57192c16..f86d429dd 100644
--- a/tool/build/fixupobj.c
+++ b/tool/build/fixupobj.c
@@ -35,7 +35,9 @@
 #include "libc/mem/gc.h"
 #include "libc/runtime/runtime.h"
 #include "libc/serialize.h"
+#include "libc/stdalign.internal.h"
 #include "libc/stdckdint.h"
+#include "libc/stdio/stdio.h"
 #include "libc/str/str.h"
 #include "libc/sysv/consts/map.h"
 #include "libc/sysv/consts/msync.h"
@@ -48,8 +50,10 @@
  * @fileoverview GCC Codegen Fixer-Upper.
  */
 
-#define COSMO_TLS_REG     28
-#define MRS_TPIDR_EL0     0xd53bd040u
+#define COSMO_TLS_REG 28
+#define MRS_TPIDR_EL0 0xd53bd040u
+#define IFUNC_SECTION ".init.202.ifunc"
+
 #define MOV_REG(DST, SRC) (0xaa0003e0u | (SRC) << 16 | (DST))
 
 static int mode;
@@ -221,8 +225,56 @@ static void CheckPrivilegedCrossReferences(void) {
   }
 }
 
+// Change AMD code to use %gs:0x30 instead of %fs:0
+// We assume -mno-tls-direct-seg-refs has been used
+static void ChangeTlsFsToGs(unsigned char *p, size_t n) {
+  unsigned char *e = p + n - 9;
+  while (p <= e) {
+    // we're checking for the following expression:
+    //   0144 == p[0] &&           // %fs
+    //   0110 == (p[1] & 0373) &&  // rex.w (and ignore rex.r)
+    //   (0213 == p[2] ||          // mov reg/mem → reg (word-sized)
+    //   0003 == p[2]) &&          // add reg/mem → reg (word-sized)
+    //   0004 == (p[3] & 0307) &&  // mod/rm (4,reg,0) means sib → reg
+    //   0045 == p[4] &&           // sib (5,4,0) → (rbp,rsp,0) → disp32
+    //   0000 == p[5] &&           // displacement (von Neumann endian)
+    //   0000 == p[6] &&           // displacement
+    //   0000 == p[7] &&           // displacement
+    //   0000 == p[8]              // displacement
+    uint64_t w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377");
+    if ((w == READ64LE("\144\110\213\004\045\000\000\000") ||
+         w == READ64LE("\144\110\003\004\045\000\000\000")) &&
+        !p[8]) {
+      p[0] = 0145;  // change %fs to %gs
+      p[5] = 0x30;  // change 0 to 0x30
+      p += 9;
+    } else {
+      ++p;
+    }
+  }
+}
+
+static void RewriteTlsCodeAmd64(void) {
+  int i;
+  uint8_t *p;
+  Elf64_Shdr *shdr;
+  for (i = 0; i < elf->e_shnum; ++i) {
+    if (!(shdr = GetElfSectionHeaderAddress(elf, esize, i))) {
+      Die("elf header overflow #1");
+    }
+    if (shdr->sh_type == SHT_PROGBITS &&  //
+        (shdr->sh_flags & SHF_ALLOC) &&   //
+        (shdr->sh_flags & SHF_EXECINSTR)) {
+      if (!(p = GetElfSectionAddress(elf, esize, shdr))) {
+        Die("elf header overflow #2");
+      }
+      ChangeTlsFsToGs(p, shdr->sh_size);
+    }
+  }
+}
+
 // Modify ARM64 code to use x28 for TLS rather than tpidr_el0.
-static void RewriteTlsCode(void) {
+static void RewriteTlsCodeArm64(void) {
   int i;
   Elf64_Shdr *shdr;
   uint32_t *p, *pe;
@@ -365,6 +417,203 @@ static void RelinkZipFiles(void) {
   eocd = foot;
 }
 
+// when __attribute__((__target_clones__(...))) is used, the compiler
+// will generate multiple implementations of a function for different
+// microarchitectures as well as a resolver function that tells which
+// function is appropriate to call. however the compiler doesn't make
+// code for the actual function. it also doesn't record where resolve
+// functions are located in the binary so we've reverse eng'd it here
+static void GenerateIfuncInit(void) {
+  char *name, *s;
+  long code_i = 0;
+  long relas_i = 0;
+  static char code[16384];
+  static Elf64_Rela relas[1024];
+  Elf64_Shdr *symtab_shdr = GetElfSymbolTable(elf, esize, SHT_SYMTAB, 0);
+  if (!symtab_shdr) Die("symbol table section header not found");
+  Elf64_Word symtab_shdr_index =
+      ((char *)symtab_shdr - ((char *)elf + elf->e_shoff)) / elf->e_shentsize;
+  for (Elf64_Xword i = 0; i < symcount; ++i) {
+    if (syms[i].st_shndx == SHN_UNDEF) continue;
+    if (syms[i].st_shndx >= SHN_LORESERVE) continue;
+    if (ELF64_ST_TYPE(syms[i].st_info) != STT_GNU_IFUNC) continue;
+    if (!(name = GetElfString(elf, esize, symstrs, syms[i].st_name)))
+      Die("could not get symbol name of ifunc");
+    static char resolver_name[65536];
+    strlcpy(resolver_name, name, sizeof(resolver_name));
+    if (strlcat(resolver_name, ".resolver", sizeof(resolver_name)) >=
+        sizeof(resolver_name))
+      Die("ifunc name too long");
+    Elf64_Xword function_sym_index = i;
+    Elf64_Xword resolver_sym_index = -1;
+    for (Elf64_Xword i = 0; i < symcount; ++i) {
+      if (syms[i].st_shndx == SHN_UNDEF) continue;
+      if (syms[i].st_shndx >= SHN_LORESERVE) continue;
+      if (ELF64_ST_TYPE(syms[i].st_info) != STT_FUNC) continue;
+      if (!(s = GetElfString(elf, esize, symstrs, syms[i].st_name))) continue;
+      if (strcmp(s, resolver_name)) continue;
+      resolver_sym_index = i;
+      break;
+    }
+    if (resolver_sym_index == -1)
+      // this can happen if a function with __target_clones() also has a
+      // __weak_reference() defined, in which case GCC shall only create
+      // one resolver function for the two of them so we can ignore this
+      // HOWEVER the GOT will still have an entry for each two functions
+      continue;
+
+    // call the resolver (using cosmo's special .init abi)
+    static const char chunk1[] = {
+        0x57,                          // push %rdi
+        0x56,                          // push %rsi
+        0xe8, 0x00, 0x00, 0x00, 0x00,  // call f.resolver
+    };
+    if (code_i + sizeof(chunk1) > sizeof(code) || relas_i + 1 > ARRAYLEN(relas))
+      Die("too many ifuncs");
+    memcpy(code + code_i, chunk1, sizeof(chunk1));
+    relas[relas_i].r_info = ELF64_R_INFO(resolver_sym_index, R_X86_64_PLT32);
+    relas[relas_i].r_offset = code_i + 1 + 1 + 1;
+    relas[relas_i].r_addend = -4;
+    code_i += sizeof(chunk1);
+    relas_i += 1;
+
+    // move the resolved function address into the GOT slot. it's very
+    // important that this happen, because the linker by default makes
+    // self-referencing PLT functions whose execution falls through oh
+    // no. we need to repeat this process for any aliases this defines
+    static const char chunk2[] = {
+        0x48, 0x89, 0x05, 0x00, 0x00, 0x00, 0x00,  // mov %rax,f@gotpcrel(%rip)
+    };
+    for (Elf64_Xword i = 0; i < symcount; ++i) {
+      if (i == function_sym_index ||
+          (ELF64_ST_TYPE(syms[i].st_info) == STT_GNU_IFUNC &&
+           syms[i].st_shndx == syms[function_sym_index].st_shndx &&
+           syms[i].st_value == syms[function_sym_index].st_value)) {
+        if (code_i + sizeof(chunk2) > sizeof(code) ||
+            relas_i + 1 > ARRAYLEN(relas))
+          Die("too many ifuncs");
+        memcpy(code + code_i, chunk2, sizeof(chunk2));
+        relas[relas_i].r_info = ELF64_R_INFO(i, R_X86_64_GOTPCREL);
+        relas[relas_i].r_offset = code_i + 3;
+        relas[relas_i].r_addend = -4;
+        code_i += sizeof(chunk2);
+        relas_i += 1;
+      }
+    }
+
+    static const char chunk3[] = {
+        0x5e,  // pop %rsi
+        0x5f,  // pop %rdi
+    };
+    if (code_i + sizeof(chunk3) > sizeof(code)) Die("too many ifuncs");
+    memcpy(code + code_i, chunk3, sizeof(chunk3));
+    code_i += sizeof(chunk3);
+  }
+  if (!code_i) return;
+
+  // prepare to mutate elf
+  // remap file so it has more space
+  if (elf->e_shnum + 2 > 65535) Die("too many sections");
+  size_t reserve_size = esize + 32 * 1024 * 1024;
+  if (ftruncate(fildes, reserve_size)) SysExit("ifunc ftruncate #1");
+  elf = mmap((char *)elf, reserve_size, PROT_READ | PROT_WRITE,
+             MAP_FIXED | MAP_SHARED, fildes, 0);
+  if (elf == MAP_FAILED) SysExit("ifunc mmap");
+
+  // duplicate section name strings table to end of file
+  Elf64_Shdr *shdrstr_shdr = (Elf64_Shdr *)((char *)elf + elf->e_shoff +
+                                            elf->e_shstrndx * elf->e_shentsize);
+  memcpy((char *)elf + esize, (char *)elf + shdrstr_shdr->sh_offset,
+         shdrstr_shdr->sh_size);
+  shdrstr_shdr->sh_offset = esize;
+  esize += shdrstr_shdr->sh_size;
+
+  // append strings for the two sections we're creating
+  const char *code_section_name = IFUNC_SECTION;
+  Elf64_Word code_section_name_offset = shdrstr_shdr->sh_size;
+  memcpy((char *)elf + esize, code_section_name, strlen(code_section_name) + 1);
+  shdrstr_shdr->sh_size += strlen(code_section_name) + 1;
+  esize += strlen(code_section_name) + 1;
+  const char *rela_section_name = ".rela" IFUNC_SECTION;
+  Elf64_Word rela_section_name_offset = shdrstr_shdr->sh_size;
+  memcpy((char *)elf + esize, rela_section_name, strlen(rela_section_name) + 1);
+  shdrstr_shdr->sh_size += strlen(rela_section_name) + 1;
+  esize += strlen(rela_section_name) + 1;
+  unassert(esize == shdrstr_shdr->sh_offset + shdrstr_shdr->sh_size);
+  ++esize;
+
+  // duplicate section headers to end of file
+  esize = (esize + alignof(Elf64_Shdr) - 1) & -alignof(Elf64_Shdr);
+  memcpy((char *)elf + esize, (char *)elf + elf->e_shoff,
+         elf->e_shnum * elf->e_shentsize);
+  elf->e_shoff = esize;
+  esize += elf->e_shnum * elf->e_shentsize;
+  unassert(esize == elf->e_shoff + elf->e_shnum * elf->e_shentsize);
+
+  // append code section header
+  Elf64_Shdr *code_shdr = (Elf64_Shdr *)((char *)elf + esize);
+  Elf64_Word code_shdr_index = elf->e_shnum++;
+  esize += elf->e_shentsize;
+  code_shdr->sh_name = code_section_name_offset;
+  code_shdr->sh_type = SHT_PROGBITS;
+  code_shdr->sh_flags = SHF_ALLOC | SHF_EXECINSTR;
+  code_shdr->sh_addr = 0;
+  code_shdr->sh_link = 0;
+  code_shdr->sh_info = 0;
+  code_shdr->sh_entsize = 1;
+  code_shdr->sh_addralign = 1;
+  code_shdr->sh_size = code_i;
+
+  // append code's rela section header
+  Elf64_Shdr *rela_shdr = (Elf64_Shdr *)((char *)elf + esize);
+  esize += elf->e_shentsize;
+  rela_shdr->sh_name = rela_section_name_offset;
+  rela_shdr->sh_type = SHT_RELA;
+  rela_shdr->sh_flags = SHF_INFO_LINK;
+  rela_shdr->sh_addr = 0;
+  rela_shdr->sh_info = code_shdr_index;
+  rela_shdr->sh_link = symtab_shdr_index;
+  rela_shdr->sh_entsize = sizeof(Elf64_Rela);
+  rela_shdr->sh_addralign = alignof(Elf64_Rela);
+  rela_shdr->sh_size = relas_i * sizeof(Elf64_Rela);
+  elf->e_shnum++;
+
+  // append relas
+  esize = (esize + 63) & -64;
+  rela_shdr->sh_offset = esize;
+  memcpy((char *)elf + esize, relas, relas_i * sizeof(Elf64_Rela));
+  esize += relas_i * sizeof(Elf64_Rela);
+  unassert(esize == rela_shdr->sh_offset + rela_shdr->sh_size);
+
+  // append code
+  esize = (esize + 63) & -64;
+  code_shdr->sh_offset = esize;
+  memcpy((char *)elf + esize, code, code_i);
+  esize += code_i;
+  unassert(esize == code_shdr->sh_offset + code_shdr->sh_size);
+
+  if (ftruncate(fildes, esize)) SysExit("ifunc ftruncate #1");
+}
+
+// when __attribute__((__target_clones__(...))) is used, static binaries
+// become poisoned with rela IFUNC relocations, which the linker refuses
+// to remove. even if we objcopy the ape executable as binary the linker
+// preserves its precious ifunc code and puts them before the executable
+// header. the good news is that the linker actually does link correctly
+// which means we can delete the broken rela sections in the elf binary.
+static void PurgeIfuncSections(void) {
+  Elf64_Shdr *shdrs = (Elf64_Shdr *)((char *)elf + elf->e_shoff);
+  for (Elf64_Word i = 0; i < elf->e_shnum; ++i) {
+    char *name;
+    if (shdrs[i].sh_type == SHT_RELA ||
+        ((name = GetElfSectionName(elf, esize, shdrs + i)) &&
+         !strcmp(name, ".init.202.ifunc"))) {
+      shdrs[i].sh_type = SHT_NULL;
+      shdrs[i].sh_flags &= ~SHF_ALLOC;
+    }
+  }
+}
+
 static void FixupObject(void) {
   if ((fildes = open(epath, mode)) == -1) {
     SysExit("open");
@@ -373,8 +622,8 @@ static void FixupObject(void) {
     SysExit("lseek");
   }
   if (esize) {
-    if ((elf = mmap(0, esize, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0)) ==
-        MAP_FAILED) {
+    if ((elf = mmap((void *)0x032100000000, esize, PROT_READ | PROT_WRITE,
+                    MAP_FIXED | MAP_SHARED, fildes, 0)) == MAP_FAILED) {
       SysExit("mmap");
     }
     if (!IsElf64Binary(elf, esize)) {
@@ -392,14 +641,17 @@ static void FixupObject(void) {
     CheckPrivilegedCrossReferences();
     if (mode == O_RDWR) {
       if (elf->e_machine == EM_NEXGEN32E) {
+        RewriteTlsCodeAmd64();
         OptimizePatchableFunctionEntries();
+        GenerateIfuncInit();
       } else if (elf->e_machine == EM_AARCH64) {
-        RewriteTlsCode();
+        RewriteTlsCodeArm64();
         if (elf->e_type != ET_REL) {
           UseFreebsdOsAbi();
         }
       }
       if (elf->e_type != ET_REL) {
+        PurgeIfuncSections();
         RelinkZipFiles();
       }
       if (msync(elf, esize, MS_ASYNC | MS_INVALIDATE)) {
diff --git a/tool/build/mkdeps.c b/tool/build/mkdeps.c
index 91dd299de..3aa2a2b69 100644
--- a/tool/build/mkdeps.c
+++ b/tool/build/mkdeps.c
@@ -19,6 +19,7 @@
 #include "libc/calls/calls.h"
 #include "libc/calls/struct/stat.h"
 #include "libc/errno.h"
+#include "libc/fmt/itoa.h"
 #include "libc/fmt/libgen.h"
 #include "libc/fmt/magnumstrs.internal.h"
 #include "libc/intrin/kprintf.h"
@@ -45,48 +46,48 @@
   "copyright 2023 justine tunney\n" \
   "https://github.com/jart/cosmopolitan\n"
 
-#define MANUAL                                                      \
-  " -r o// -o OUTPUT INPUT...\n"                                    \
-  "\n"                                                              \
-  "DESCRIPTION\n"                                                   \
-  "\n"                                                              \
-  "  Generates header file dependencies for your makefile\n"        \
-  "\n"                                                              \
-  "  This tool computes the transitive closure of included paths\n" \
-  "  for every source file in your repository. This program does\n" \
-  "  it orders of a magnitude faster than `gcc -M` on each file.\n" \
-  "\n"                                                              \
-  "  Includes look like this:\n"                                    \
-  "\n"                                                              \
-  "    - #include <stdio.h>\n"                                      \
-  "    - #include \"samedir.h\"\n"                                  \
-  "    - #include \"root/of/repository/foo.h\"\n"                   \
-  "    - .include \"asm/x86_64/foo.s\"\n"                           \
-  "\n"                                                              \
-  "  Your generated make code looks like this:\n"                   \
-  "\n"                                                              \
-  "    o//package/foo.o: \\\n"                                      \
-  "      package/foo.c \\\n"                                        \
-  "      package/foo.h \\\n"                                        \
-  "      package/bar.h \\\n"                                        \
-  "      libc/isystem/stdio.h\n"                                    \
-  "    o//package/bar.o: \\\n"                                      \
-  "      package/bar.c \\\n"                                        \
-  "      package/bar.h\n"                                           \
-  "\n"                                                              \
-  "FLAGS\n"                                                         \
-  "\n"                                                              \
-  "  -h         show usage\n"                                       \
-  "  -o OUTPUT  set output path\n"                                  \
-  "  -g ROOT    set generated path [default: o/]\n"                 \
-  "  -r ROOT    set build output path, e.g. o/$(MODE)/\n"           \
-  "  -S PATH    isystem include path [default: libc/isystem/]\n"    \
-  "  -s         hermetically sealed mode [repeatable]\n"            \
-  "\n"                                                              \
-  "ARGUMENTS\n"                                                     \
-  "\n"                                                              \
-  "  OUTPUT     shall be makefile code\n"                           \
-  "  INPUT      should be source or @args.txt\n"                    \
+#define MANUAL                                                               \
+  " -r o// -o OUTPUT INPUT...\n"                                             \
+  "\n"                                                                       \
+  "DESCRIPTION\n"                                                            \
+  "\n"                                                                       \
+  "  Generates header file dependencies for your makefile\n"                 \
+  "\n"                                                                       \
+  "  This tool computes the transitive closure of included paths\n"          \
+  "  for every source file in your repository. This program does\n"          \
+  "  it orders of a magnitude faster than `gcc -M` on each file.\n"          \
+  "\n"                                                                       \
+  "  Includes look like this:\n"                                             \
+  "\n"                                                                       \
+  "    - #include <stdio.h>\n"                                               \
+  "    - #include \"samedir.h\"\n"                                           \
+  "    - #include \"root/of/repository/foo.h\"\n"                            \
+  "    - .include \"asm/x86_64/foo.s\"\n"                                    \
+  "\n"                                                                       \
+  "  Your generated make code looks like this:\n"                            \
+  "\n"                                                                       \
+  "    o//package/foo.o: \\\n"                                               \
+  "      package/foo.c \\\n"                                                 \
+  "      package/foo.h \\\n"                                                 \
+  "      package/bar.h \\\n"                                                 \
+  "      libc/isystem/stdio.h\n"                                             \
+  "    o//package/bar.o: \\\n"                                               \
+  "      package/bar.c \\\n"                                                 \
+  "      package/bar.h\n"                                                    \
+  "\n"                                                                       \
+  "FLAGS\n"                                                                  \
+  "\n"                                                                       \
+  "  -h         show usage\n"                                                \
+  "  -o OUTPUT  set output path\n"                                           \
+  "  -g ROOT    set generated path [default: o/]\n"                          \
+  "  -r ROOT    set build output path, e.g. o/$(MODE)/\n"                    \
+  "  -S PATH    isystem include path [repeatable; default: libc/isystem/]\n" \
+  "  -s         hermetically sealed mode [repeatable]\n"                     \
+  "\n"                                                                       \
+  "ARGUMENTS\n"                                                              \
+  "\n"                                                                       \
+  "  OUTPUT     shall be makefile code\n"                                    \
+  "  INPUT      should be source or @args.txt\n"                             \
   "\n"
 
 #define Read32(s) (s[3] << 24 | s[2] << 16 | s[1] << 8 | s[0])
@@ -118,6 +119,11 @@ struct Edges {
   struct Edge *p;
 };
 
+struct Paths {
+  long n;
+  const char *p[64];
+};
+
 static const uint32_t kSourceExts[] = {
     EXT("s"),    // assembly
     EXT("S"),    // assembly with c preprocessor
@@ -135,7 +141,7 @@ static const char *prog;
 static struct Edges edges;
 static struct Sauce *sauces;
 static struct Sources sources;
-static const char *systempath;
+static struct Paths systempaths;
 static const char *buildroot;
 static const char *genroot;
 static const char *outpath;
@@ -329,7 +335,7 @@ static const char *FindIncludePath(const char *map, size_t mapsize,
   // scan backwards for hash character
   for (;;) {
     if (q == map) {
-      return false;
+      return 0;
     }
     if (IsBlank(q[-1])) {
       --q;
@@ -342,7 +348,7 @@ static const char *FindIncludePath(const char *map, size_t mapsize,
       --q;
       break;
     } else {
-      return false;
+      return 0;
     }
   }
 
@@ -359,7 +365,7 @@ static const char *FindIncludePath(const char *map, size_t mapsize,
       if (q[-1] == '\n') {
         break;
       } else {
-        return false;
+        return 0;
       }
     }
   }
@@ -378,7 +384,7 @@ static const char *FindIncludePath(const char *map, size_t mapsize,
       ++q;
       break;
     } else {
-      return false;
+      return 0;
     }
   }
 
@@ -395,7 +401,7 @@ static void LoadRelationships(int argc, char *argv[]) {
   struct GetArgs ga;
   int srcid, dependency;
   static char srcdirbuf[PATH_MAX];
-  const char *p, *pe, *src, *path, *pathend, *srcdir;
+  const char *p, *pe, *src, *path, *pathend, *srcdir, *final;
   getargs_init(&ga, argv + optind);
   while ((src = getargs_next(&ga))) {
     CreateSourceId(src);
@@ -433,51 +439,71 @@ static void LoadRelationships(int argc, char *argv[]) {
         DieSys(src);
       }
       for (p = map, pe = map + size; p < pe; ++p) {
-        char *bp;
-        char right;
-        char buf[PATH_MAX];
         if (!(p = memmem(p, pe - p, "include ", 8))) break;
         if (!(path = FindIncludePath(map, size, p, is_assembly))) continue;
-        bp = buf;
+        // copy the specified include path
+        char right;
         if (path[-1] == '<') {
-          if (!systempath) continue;
-          bp = stpcpy(bp, systempath);
+          if (!systempaths.n) continue;
           right = '>';
         } else {
           right = '"';
         }
-        if ((pathend = memchr(path, right, pe - path))) {
-          const char *final;
-          char juf[PATH_MAX];
-          if ((bp - buf) + (pathend - path) >= PATH_MAX) {
-            tinyprint(2, src, ": include path too long\n", NULL);
-            exit(1);
-          }
-          *(bp = mempcpy(bp, path, pathend - path)) = 0;
-          // let foo/bar.c say `#include "foo/hdr.h"`
-          dependency = GetSourceId((final = buf));
-          // let foo/bar.c say `#include "hdr.h"`
-          if (dependency == -1 && right == '"' && !strchr(buf, '/')) {
-            if (!(final = __join_paths(juf, PATH_MAX, srcdir, buf))) {
-              DiePathTooLong(buf);
+        if (!(pathend = memchr(path, right, pe - path))) continue;
+        if (pathend - path >= PATH_MAX) {
+          tinyprint(2, src, ": uses really long include path\n", NULL);
+          exit(1);
+        }
+        char juf[PATH_MAX];
+        char incpath[PATH_MAX];
+        *(char *)mempcpy(incpath, path, pathend - path) = 0;
+        if (right == '>') {
+          // handle angle bracket includes
+          dependency = -1;
+          for (long i = 0; i < systempaths.n; ++i) {
+            if (!(final =
+                      __join_paths(juf, PATH_MAX, systempaths.p[i], incpath))) {
+              DiePathTooLong(incpath);
+            }
+            if ((dependency = GetSourceId(final)) != -1) {
+              break;
             }
-            dependency = GetSourceId(final);
           }
-          if (dependency == -1) {
-            if (startswith(buf, genroot)) {
-              dependency = CreateSourceId(src);
-            } else if (!hermetic) {
-              continue;
-            } else if (hermetic == 1 && right == '>') {
+          if (dependency != -1) {
+            AppendEdge(&edges, dependency, srcid);
+            p = pathend + 1;
+          } else {
+            if (hermetic == 1) {
               // chances are the `#include <foo>` is in some #ifdef
               // that'll never actually be executed; thus we ignore
               // since landlock make unveil() shall catch it anyway
               continue;
+            }
+            tinyprint(2, incpath,
+                      ": system header not specified by the HDRS/SRCS/INCS "
+                      "make variables defined by the hermetic mono repo\n",
+                      NULL);
+            exit(1);
+          }
+        } else {
+          // handle double quote includes
+          // let foo/bar.c say `#include "foo/hdr.h"`
+          dependency = GetSourceId((final = incpath));
+          // let foo/bar.c say `#include "hdr.h"`
+          if (dependency == -1 && !strchr(final, '/')) {
+            if (!(final = __join_paths(juf, PATH_MAX, srcdir, final))) {
+              DiePathTooLong(incpath);
+            }
+            dependency = GetSourceId(final);
+          }
+          if (dependency == -1) {
+            if (startswith(final, genroot)) {
+              dependency = CreateSourceId(src);
             } else {
-              tinyprint(
-                  2, final,
-                  ": path not specified by HDRS/SRCS/INCS make variables\n",
-                  NULL);
+              tinyprint(2, incpath,
+                        ": path not specified by HDRS/SRCS/INCS make variables "
+                        "(it was included by ",
+                        src, ")\n", NULL);
               exit(1);
             }
           }
@@ -498,6 +524,13 @@ static wontreturn void ShowUsage(int rc, int fd) {
   exit(rc);
 }
 
+static void AddPath(struct Paths *paths, const char *path) {
+  if (paths->n == ARRAYLEN(paths->p)) {
+    Die("too many path arguments");
+  }
+  paths->p[paths->n++] = path;
+}
+
 static void GetOpts(int argc, char *argv[]) {
   int opt;
   while ((opt = getopt(argc, argv, "hnsgS:o:r:")) != -1) {
@@ -506,10 +539,7 @@ static void GetOpts(int argc, char *argv[]) {
         ++hermetic;
         break;
       case 'S':
-        if (systempath) {
-          Die("multiple system paths not supported yet");
-        }
-        systempath = optarg;
+        AddPath(&systempaths, optarg);
         break;
       case 'o':
         if (outpath) {
@@ -555,29 +585,33 @@ static void GetOpts(int argc, char *argv[]) {
   if (!startswith(buildroot, genroot)) {
     Die("build output path must start with generated output path");
   }
-  if (!systempath && hermetic) {
-    systempath = "libc/isystem/";
+  if (!systempaths.n && hermetic) {
+    AddPath(&systempaths, "third_party/libcxx/include/");
+    AddPath(&systempaths, "libc/isystem/");
   }
-  if (systempath && !hermetic) {
+  if (systempaths.n && !hermetic) {
     Die("system path can only be specified in hermetic mode");
   }
-  if (systempath) {
+  long j = 0;
+  for (long i = 0; i < systempaths.n; ++i) {
     size_t n;
     struct stat st;
-    if (stat(systempath, &st)) {
-      DieSys(systempath);
+    const char *path = systempaths.p[i];
+    if (!stat(path, &st)) {
+      systempaths.p[j++] = path;
+      if (!S_ISDIR(st.st_mode)) {
+        errno = ENOTDIR;
+        DieSys(path);
+      }
     }
-    if (!S_ISDIR(st.st_mode)) {
-      errno = ENOTDIR;
-      DieSys(systempath);
+    if ((n = strlen(path)) >= PATH_MAX) {
+      DiePathTooLong(path);
     }
-    if ((n = strlen(systempath)) >= PATH_MAX) {
-      DiePathTooLong(systempath);
-    }
-    if (!n || systempath[n - 1] != '/') {
+    if (!n || path[n - 1] != '/') {
       Die("system path must end with slash");
     }
   }
+  systempaths.n = j;
 }
 
 static const char *StripExt(char pathbuf[hasatleast PATH_MAX], const char *s) {
diff --git a/tool/build/objbincopy.c b/tool/build/objbincopy.c
index a17b3fb63..ab3a906b5 100644
--- a/tool/build/objbincopy.c
+++ b/tool/build/objbincopy.c
@@ -201,7 +201,7 @@ static void ValidateMachoSection(const char *inpath,         //
         Die(inpath, "don't bother with mach-o sections");
       }
       namelen = strnlen(loadseg->name, sizeof(loadseg->name));
-      if (!loadseg->name) {
+      if (!loadseg->name[0]) {
         Die(inpath, "mach-o load segment missing name");
       }
       if (filesize || (loadseg->vaddr && loadseg->memsz)) {
diff --git a/tool/build/package.c b/tool/build/package.c
index df271c788..b52299eff 100644
--- a/tool/build/package.c
+++ b/tool/build/package.c
@@ -433,7 +433,8 @@ static void LoadSymbols(struct Package *pkg, uint32_t object) {
     symbol.type = ELF64_ST_TYPE(obj->syms[i].st_info);
     if (symbol.bind_ != STB_LOCAL &&
         (symbol.type == STT_OBJECT || symbol.type == STT_FUNC ||
-         symbol.type == STT_COMMON || symbol.type == STT_NOTYPE)) {
+         symbol.type == STT_COMMON || symbol.type == STT_NOTYPE ||
+         symbol.type == STT_GNU_IFUNC)) {
       if (!(name = GetElfString(obj->elf, obj->size, obj->strs,
                                 obj->syms[i].st_name))) {
         Die("error", "elf overflow");
diff --git a/tool/build/runit.c b/tool/build/runit.c
index 9dc1a897e..f94f735ea 100644
--- a/tool/build/runit.c
+++ b/tool/build/runit.c
@@ -151,17 +151,11 @@ void Connect(void) {
   struct timespec deadline;
   if ((rc = getaddrinfo(g_hostname, gc(xasprintf("%hu", g_runitdport)),
                         &kResolvHints, &ai)) != 0) {
-    FATALF("%s:%hu: EAI_%s %m", g_hostname, g_runitdport, gai_strerror(rc));
+    FATALF("%s:%hu: DNS lookup failed: %s", g_hostname, g_runitdport,
+           gai_strerror(rc));
     __builtin_unreachable();
   }
   ip4 = (const char *)&((struct sockaddr_in *)ai->ai_addr)->sin_addr;
-  if (ispublicip(ai->ai_family,
-                 &((struct sockaddr_in *)ai->ai_addr)->sin_addr)) {
-    FATALF("%s points to %hhu.%hhu.%hhu.%hhu"
-           " which isn't part of a local/private/testing subnet",
-           g_hostname, ip4[0], ip4[1], ip4[2], ip4[3]);
-    __builtin_unreachable();
-  }
   DEBUGF("connecting to %d.%d.%d.%d port %d", ip4[0], ip4[1], ip4[2], ip4[3],
          ntohs(((struct sockaddr_in *)ai->ai_addr)->sin_port));
   CHECK_NE(-1,
diff --git a/tool/cosmocc/README.md b/tool/cosmocc/README.md
index 1b3aa124a..d55b9717e 100644
--- a/tool/cosmocc/README.md
+++ b/tool/cosmocc/README.md
@@ -9,12 +9,13 @@ reach a broader audience from the platform(s) of your choosing.
 
 ## What's Included
 
-This toolchain bundles GCC 11.2.0, Cosmopolitan Libc, LLVM LIBCXX, and
-LLVM compiler-rt. Additional libraries were provided by Musl Libc, and
-the venerable BSDs OSes. This lets you benefit from the awesome modern
-GCC compiler with the strongest GPL barrier possible. The preprocessor
-advertises cross compilers as both `__COSMOCC__` and `__COSMOPOLITAN__`
-whereas `cosmocc` additionally defines `__FATCOSMOCC__`.
+This toolchain bundles GCC 12.3.0, Cosmopolitan Libc, LLVM LIBCXX, LLVM
+compiler-rt, and LLVM OpenMP. Additional libraries were provided by Musl
+Libc, and the venerable BSDs OSes. This lets you benefit from the
+awesome modern GCC compiler with the strongest GPL barrier possible. The
+preprocessor advertises cross compilers as both `__COSMOCC__` and
+`__COSMOPOLITAN__` whereas `cosmocc` additionally defines
+`__FATCOSMOCC__`.
 
 ## Getting Started
 
@@ -296,9 +297,9 @@ EINVAL: ... }` in cases where constants like `EINVAL` are linkable
 symbols. Your code will be rewritten in such cases to use a series of if
 statements instead, so that Cosmopolitan Libc's system constants will
 work as expected. Our modifications to GNU GCC are published under the
-ISC license at <https://github.com/ahgamut/gcc/tree/portcosmo-11.2>. The
+ISC license at <https://github.com/ahgamut/gcc/tree/portcosmo-12.3>. The
 binaries you see here were first published at
-<https://github.com/ahgamut/superconfigure/releases/tag/z0.0.30> which
+<https://github.com/ahgamut/superconfigure/releases/tag/z0.0.32> which
 is regularly updated.
 
 ## Legal
diff --git a/tool/cosmocc/bin/cosmocc b/tool/cosmocc/bin/cosmocc
index 719d0b5d6..7a797da21 100755
--- a/tool/cosmocc/bin/cosmocc
+++ b/tool/cosmocc/bin/cosmocc
@@ -6,19 +6,19 @@
 BIN=${0%/*}
 PROG=${0##*/}
 ORIGINAL="$0 $*"
-GCC_VERSION=11.2.0
+GCC_VERSION=12.3.0
 TMPDIR=${TMPDIR:-/tmp}
 
 if [ "$1" = "--version" ]; then
 cat <<EOF
 $PROG (GCC) $GCC_VERSION
-Copyright (c) 2023 Justine Alexandra Roberts Tunney
+Copyright (c) 2024 Justine Alexandra Roberts Tunney
 Cosmopolitan Libc and LLVM libcxx/compiler-rt are subject to non-GPL
 notice licenses, e.g. ISC, MIT, etc. Your compiled programs must embed
 our copyright notices. This toolchain is configured to do so default.
 Cosmopolitan comes with absolutely NO WARRANTY of any kind.
 For more information, see the Cosmopolitan LICENSE files.
-Copyright (C) 2019 Free Software Foundation, Inc.
+Copyright (C) 2022 Free Software Foundation, Inc.
 This launches GNU GCC/Binutils subprocesses, which is free software; see
 cosmocc's LICENSE files for source code and copying conditions. There is
 NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
@@ -79,6 +79,7 @@ OPT=
 ARGS=
 FLAGS=
 OUTPUT=
+MDFLAG=0
 MCOSMO=0
 INTENT=ld
 NEED_JOIN=
@@ -88,9 +89,8 @@ APELINKFLAGS=
 FLAGS_X86_64=
 FLAGS_AARCH64=
 INPUT_FILE_COUNT=0
-MDFLAG=0
-NEED_DEPENDENCY_OUTPUT=
 DEPENDENCY_OUTPUT=
+NEED_DEPENDENCY_OUTPUT=
 for x; do
   if [ x"$x" != x"${x#* }" ]; then
     fatal_error "arguments containing spaces unsupported: $x"
@@ -237,9 +237,9 @@ fi
 
 PLATFORM="-D__COSMOPOLITAN__ -D__COSMOCC__ -D__FATCOSMOCC__"
 PREDEF="-include libc/integral/normalize.inc"
-CPPFLAGS="-fno-pie -nostdinc -fno-math-errno -isystem $BIN/../include"
+CPPFLAGS="-fno-pie -nostdinc -isystem $BIN/../include"
 CFLAGS="-fportcosmo -fno-dwarf2-cfi-asm -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-semantic-interposition"
-LDFLAGS="-static -nostdlib -no-pie -fuse-ld=bfd -Wl,-z,norelro -Wl,--gc-sections"
+LDFLAGS="-static -nostdlib -no-pie -fuse-ld=bfd -Wl,-z,noexecstack -Wl,-z,norelro -Wl,--gc-sections"
 PRECIOUS="-fno-omit-frame-pointer"
 
 if [ x"$OPT" != x"-Os" ] && [ x"$MODE" != x"tiny" ]; then
@@ -267,8 +267,8 @@ LDFLAGS_AARCH64="$LDFLAGS -L$BIN/../aarch64-linux-cosmo/lib -Wl,-T,$BIN/../aarch
 LDLIBS_AARCH64="-lcosmo"
 
 if [ x"$OPT" != x"-Os" ] && [ x"$MODE" != x"tiny" ]; then
-  CFLAGS_X86_64="${CFLAGS_X86_64} -fpatchable-function-entry=18,16"
-  CFLAGS_AARCH64="${CFLAGS_AARCH64} -fpatchable-function-entry=7,6"
+  CFLAGS_X86_64="${CFLAGS_X86_64} -fpatchable-function-entry=18,16 -fno-inline-functions-called-once -DFTRACE -DSYSDEBUG"
+  CFLAGS_AARCH64="${CFLAGS_AARCH64} -fpatchable-function-entry=7,6 -fno-inline-functions-called-once -DFTRACE -DSYSDEBUG"
 fi
 
 if [ x"$PROG" != x"${PROG%++}" ]; then
diff --git a/tool/cosmocc/bin/cosmocross b/tool/cosmocc/bin/cosmocross
index e1f3afd04..699ef31f8 100755
--- a/tool/cosmocc/bin/cosmocross
+++ b/tool/cosmocc/bin/cosmocross
@@ -5,18 +5,18 @@
 
 BIN=${0%/*}
 PROG=${0##*/}
-GCC_VERSION=11.2.0
+GCC_VERSION=12.3.0
 
 if [ "$1" = "--version" ]; then
 cat <<EOF
 $PROG (GCC) $GCC_VERSION
-Copyright (c) 2023 Justine Alexandra Roberts Tunney
+Copyright (c) 2024 Justine Alexandra Roberts Tunney
 Cosmopolitan Libc and LLVM libcxx/compiler-rt are subject to non-GPL
 notice licenses, e.g. ISC, MIT, etc. Your compiled programs must embed
 our copyright notices. This toolchain is configured to do so default.
 Cosmopolitan comes with absolutely NO WARRANTY of any kind.
 For more information, see the Cosmopolitan LICENSE files.
-Copyright (C) 2019 Free Software Foundation, Inc.
+Copyright (C) 2022 Free Software Foundation, Inc.
 This launches GNU GCC/Binutils subprocesses, which is free software; see
 cosmocc's LICENSE files for source code and copying conditions. There is
 NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
@@ -48,8 +48,8 @@ ORIGINAL="$0 $*"
 PLATFORM="-D__COSMOPOLITAN__ -D__COSMOCC__"
 PREDEF="-include libc/integral/normalize.inc"
 CFLAGS="-fportcosmo -fno-dwarf2-cfi-asm -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-semantic-interposition"
-CPPFLAGS="-fno-pie -nostdinc -fno-math-errno -isystem $BIN/../include"
-LDFLAGS="-static -no-pie -nostdlib -fuse-ld=bfd"
+CPPFLAGS="-fno-pie -nostdinc -isystem $BIN/../include"
+LDFLAGS="-static -no-pie -nostdlib -fuse-ld=bfd -Wl,-z,noexecstack"
 APEFLAGS="-Wl,--gc-sections"
 PRECIOUS="-fno-omit-frame-pointer"
 
@@ -193,9 +193,9 @@ fi
 if [ x"$OPT" != x"-Os" ] &&                # $OPT != -Os
    [ x"${MODE%tiny}" = x"${MODE}" ]; then  # $MODE not in (tiny, aarch64-tiny)
   if [ x"$ARCH" = x"x86_64" ]; then
-    CFLAGS="$CFLAGS -fpatchable-function-entry=18,16"
+    CFLAGS="$CFLAGS -fpatchable-function-entry=18,16 -fno-inline-functions-called-once"
   elif [ x"$ARCH" = x"aarch64" ]; then
-    CFLAGS="$CFLAGS -fpatchable-function-entry=7,6"
+    CFLAGS="$CFLAGS -fpatchable-function-entry=7,6 -fno-inline-functions-called-once"
   fi
 fi
 
diff --git a/tool/cosmocc/package.sh b/tool/cosmocc/package.sh
index 63e6bf191..029e87e50 100755
--- a/tool/cosmocc/package.sh
+++ b/tool/cosmocc/package.sh
@@ -19,6 +19,7 @@ OUTDIR=${1:-cosmocc}
 APELINK=o/$(mode)/tool/build/apelink.com
 AMD64=${2:-x86_64}
 ARM64=${3:-aarch64}
+GCCVER=12.3.0
 
 make -j32 m= \
   $APELINK
@@ -89,10 +90,10 @@ fetch() {
 OLD=$PWD
 cd "$OUTDIR/"
 if [ ! -x bin/x86_64-linux-cosmo-gcc ]; then
-  fetch https://github.com/ahgamut/superconfigure/releases/download/z0.0.30/aarch64-gcc.zip
+  fetch https://github.com/ahgamut/superconfigure/releases/download/z0.0.34/aarch64-gcc.zip
   unzip aarch64-gcc.zip
   rm -f aarch64-gcc.zip
-  fetch https://github.com/ahgamut/superconfigure/releases/download/z0.0.30/x86_64-gcc.zip
+  fetch https://github.com/ahgamut/superconfigure/releases/download/z0.0.34/x86_64-gcc.zip
   unzip x86_64-gcc.zip
   rm -f x86_64-gcc.zip
 fi
@@ -113,14 +114,14 @@ for arch in aarch64 x86_64; do
   ln -sf $arch-linux-cosmo-objdump bin/$arch-unknown-cosmo-objdump
   ln -sf $arch-linux-cosmo-readelf bin/$arch-unknown-cosmo-readelf
   ln -sf $arch-linux-cosmo-strip bin/$arch-unknown-cosmo-strip
-  cmp -s libexec/gcc/$arch-linux-cosmo/11.2.0/ld.bfd libexec/gcc/$arch-linux-cosmo/11.2.0/ld
-  ln -sf ld.bfd libexec/gcc/$arch-linux-cosmo/11.2.0/ld
-  cmp -s libexec/gcc/$arch-linux-cosmo/11.2.0/ld.bfd bin/$arch-linux-cosmo-ld
-  ln -sf ../libexec/gcc/$arch-linux-cosmo/11.2.0/ld.bfd bin/$arch-linux-cosmo-ld
-  cmp -s libexec/gcc/$arch-linux-cosmo/11.2.0/as bin/$arch-linux-cosmo-as
-  ln -sf ../libexec/gcc/$arch-linux-cosmo/11.2.0/as bin/$arch-linux-cosmo-as
-  cmp -s libexec/gcc/$arch-linux-cosmo/11.2.0/ld.bfd bin/$arch-linux-cosmo-ld.bfd
-  ln -sf ../libexec/gcc/$arch-linux-cosmo/11.2.0/ld.bfd bin/$arch-linux-cosmo-ld.bfd
+  cmp -s libexec/gcc/$arch-linux-cosmo/$GCCVER/ld.bfd libexec/gcc/$arch-linux-cosmo/$GCCVER/ld
+  ln -sf ld.bfd libexec/gcc/$arch-linux-cosmo/$GCCVER/ld
+  cmp -s libexec/gcc/$arch-linux-cosmo/$GCCVER/ld.bfd bin/$arch-linux-cosmo-ld
+  ln -sf ../libexec/gcc/$arch-linux-cosmo/$GCCVER/ld.bfd bin/$arch-linux-cosmo-ld
+  cmp -s libexec/gcc/$arch-linux-cosmo/$GCCVER/as bin/$arch-linux-cosmo-as
+  ln -sf ../libexec/gcc/$arch-linux-cosmo/$GCCVER/as bin/$arch-linux-cosmo-as
+  cmp -s libexec/gcc/$arch-linux-cosmo/$GCCVER/ld.bfd bin/$arch-linux-cosmo-ld.bfd
+  ln -sf ../libexec/gcc/$arch-linux-cosmo/$GCCVER/ld.bfd bin/$arch-linux-cosmo-ld.bfd
 done
 cd "$OLD"
 
@@ -129,7 +130,7 @@ for arch in $AMD64 $ARM64; do
   cp -f o/$arch/libc/crt/crt.o "$OUTDIR/$arch-linux-cosmo/lib/"
   cp -f o/$arch/cosmopolitan.a "$OUTDIR/$arch-linux-cosmo/lib/libcosmo.a"
   cp -f o/$arch/third_party/libcxx/libcxx.a "$OUTDIR/$arch-linux-cosmo/lib/"
-  for lib in c dl gcc_s m pthread resolv rt dl unwind gomp stdc++; do
+  for lib in c dl gcc_s m crypt pthread resolv rt dl unwind gomp stdc++; do
     printf '\041\074\141\162\143\150\076\012' >"$OUTDIR/$arch-linux-cosmo/lib/lib$lib.a"
   done
   mkdir -p "$OUTDIR/lib/gcc/"
diff --git a/tool/curl/curl.c b/tool/curl/curl.c
index ed5c494f8..fd9935de8 100644
--- a/tool/curl/curl.c
+++ b/tool/curl/curl.c
@@ -162,7 +162,7 @@ int _curl(int argc, char *argv[]) {
     size_t n;
     char **p;
   } headers = {0};
-  int method = 0;
+  uint64_t method = 0;
   int authmode = MBEDTLS_SSL_VERIFY_REQUIRED;
   int ciphersuite = MBEDTLS_SSL_PRESET_SUITEC;
   bool includeheaders = false;
@@ -193,7 +193,7 @@ int _curl(int argc, char *argv[]) {
         postdata = optarg;
         break;
       case 'X':
-        if (!(method = GetHttpMethod(optarg, strlen(optarg)))) {
+        if (!(method = ParseHttpMethod(optarg, -1))) {
           tinyprint(2, prog, ": bad http method: ", optarg, "\n", NULL);
           exit(1);
         }
@@ -280,11 +280,13 @@ int _curl(int argc, char *argv[]) {
   }
 
   char *request = 0;
+  char methodstr[9] = {0};
+  WRITE64LE(methodstr, method);
   appendf(&request,
           "%s %s HTTP/1.1\r\n"
           "Connection: close\r\n"
           "User-Agent: %s\r\n",
-          kHttpMethod[method], gc(EncodeUrl(&url, 0)), agent);
+          methodstr, gc(EncodeUrl(&url, 0)), agent);
 
   bool senthost = false;
   bool sentcontenttype = false;
diff --git a/tool/decode/elf.c b/tool/decode/elf.c
index 314603fb4..2e59203d4 100644
--- a/tool/decode/elf.c
+++ b/tool/decode/elf.c
@@ -26,7 +26,6 @@
 #include "libc/errno.h"
 #include "libc/fmt/conv.h"
 #include "libc/fmt/libgen.h"
-#include "libc/serialize.h"
 #include "libc/intrin/safemacros.internal.h"
 #include "libc/log/check.h"
 #include "libc/log/log.h"
@@ -34,6 +33,7 @@
 #include "libc/mem/gc.h"
 #include "libc/mem/mem.h"
 #include "libc/runtime/runtime.h"
+#include "libc/serialize.h"
 #include "libc/stdio/stdio.h"
 #include "libc/str/str.h"
 #include "libc/sysv/consts/auxv.h"
diff --git a/tool/emacs/c.lang b/tool/emacs/c.lang
index a2805763a..a2dad3636 100644
--- a/tool/emacs/c.lang
+++ b/tool/emacs/c.lang
@@ -121,11 +121,10 @@ Keywords={
 "hasatleast",
 "nodebuginfo",
 "noreturn",
-"initarray",
 "mayalias",
 "dontinstrument",
 "interruptfn",
-"nocallback",
+"dontcallback",
 "textstartup",
 "warnifused",
 "attributeallocsize",
diff --git a/tool/emacs/cosmo-asm-mode.el b/tool/emacs/cosmo-asm-mode.el
index 580fa0b01..b948e12af 100644
--- a/tool/emacs/cosmo-asm-mode.el
+++ b/tool/emacs/cosmo-asm-mode.el
@@ -65,9 +65,11 @@
            "pltoff"
            "gotpcrel"
            "progbits"
+           "note"
            "nobits"
            "init_array"
-           "fini_array")])
+           "fini_array"
+           "gnu_indirect_function")])
     "\\>"])
   "GNU Assembler section, relocation, macro param qualifiers.")
 
diff --git a/tool/emacs/cosmo-c-builtins.el b/tool/emacs/cosmo-c-builtins.el
index 1a9a69330..3dee199be 100644
--- a/tool/emacs/cosmo-c-builtins.el
+++ b/tool/emacs/cosmo-c-builtins.el
@@ -204,6 +204,7 @@
          '("DebugBreak"
            "__veil"
            "__conceal"
+           "__notice"
            "__expropriate"
            "__yoink"
            "__dll_import"
diff --git a/tool/emacs/cosmo-c-keywords.el b/tool/emacs/cosmo-c-keywords.el
index f3aa5b349..3f4f691c5 100644
--- a/tool/emacs/cosmo-c-keywords.el
+++ b/tool/emacs/cosmo-c-keywords.el
@@ -87,11 +87,10 @@
            "nomsan"
            "dontubsan"
            "nostackprotector"
-           "initarray"
            "mayalias"
            "dontinstrument"
            "interruptfn"
-           "nocallback"
+           "dontcallback"
            "textstartup"
            "warnifused"
            "attributeallocsize"
@@ -206,6 +205,9 @@
            "__section__"
            "__sentinel__"
            "__simd__"
+           "__vex"
+           "__avx2"
+           "__target_clones"
            "__target_clones__"
            "__unused__"
            "__used__"
diff --git a/tool/emacs/cosmo-cpp-constants.el b/tool/emacs/cosmo-cpp-constants.el
index b5f2af0fa..010ab7127 100644
--- a/tool/emacs/cosmo-cpp-constants.el
+++ b/tool/emacs/cosmo-cpp-constants.el
@@ -66,6 +66,8 @@
     "__BMI2__"
     "__FMA__"
     "__FAST_MATH__"
+    "__ROUNDING_MATH__"
+    "__NO_MATH_ERRNO__"
     "__FMA4__"
     "__F16C__"
     "__CLZERO__"
diff --git a/tool/emacs/cosmo-platform-constants.el b/tool/emacs/cosmo-platform-constants.el
index 6ba7d8014..a632b649a 100644
--- a/tool/emacs/cosmo-platform-constants.el
+++ b/tool/emacs/cosmo-platform-constants.el
@@ -70,8 +70,8 @@
            "__GCC_IEC_559"
            "__SUPPORT_SNAN__"
            "__GCC_IEC_559_COMPLEX"
-           "__NO_MATH_ERRNO__"
-           "__gnu__"))
+           "__gnu__"
+           "_OPENMP"))
 
         (cosmo
          '("__LINKER__"))
diff --git a/tool/emacs/cosmo-stuff.el b/tool/emacs/cosmo-stuff.el
index 22cd41d0a..c7ba003a3 100644
--- a/tool/emacs/cosmo-stuff.el
+++ b/tool/emacs/cosmo-stuff.el
@@ -215,7 +215,7 @@
          (runs (format "o/$m/%s.com%s V=5 TESTARGS=-b" name runsuffix))
          (buns (format "o/$m/test/%s_test.com%s V=5 TESTARGS=-b" name runsuffix)))
     (cond ((not (member ext '("c" "cc" "cpp" "s" "S" "rl" "f")))
-           (format "m=%s; make -j12 MODE=$m o/$m/%s"
+           (format "m=%s; make -j32 MODE=$m o/$m/%s"
                    mode
                    (directory-file-name
                     (or (file-name-directory
@@ -226,7 +226,7 @@
             (cosmo-join
              " && "
              `("m=%s; f=o/$m/%s.com"
-               ,(concat "make -j12 $f MODE=$m")
+               ,(concat "make -j32 $f MODE=$m")
                "scp $f $f.dbg win10:; ssh win10 ./%s.com"))
             mode name (file-name-nondirectory name)))
           ((eq kind 'run-xnu)
@@ -234,19 +234,19 @@
             (cosmo-join
              " && "
              `("m=%s; f=o/$m/%s.com"
-               ,(concat "make -j12 $f MODE=$m")
+               ,(concat "make -j32 $f MODE=$m")
                "scp $f $f.dbg xnu:"
                "ssh xnu ./%s.com"))
             mode name (file-name-nondirectory name)))
           ((and (equal suffix "")
                 (cosmo-contains "_test." (buffer-file-name)))
-           (format "m=%s; make -j12 MODE=$m %s"
+           (format "m=%s; make -j32 MODE=$m %s"
                    mode runs))
           ((and (equal suffix "")
                 (file-exists-p (format "%s" buddy)))
            (format (cosmo-join
                     " && "
-                    '("m=%s; n=%s; make -j12 o/$m/$n%s.o MODE=$m"
+                    '("m=%s; n=%s; make -j32 o/$m/$n%s.o MODE=$m"
                       ;; "bloat o/$m/%s.o | head"
                       ;; "nm -C --size o/$m/%s.o | sort -r"
                       "echo"
@@ -258,11 +258,11 @@
             (cosmo-join
              " && "
              `("m=%s; f=o/$m/%s.com"
-               ,(concat "make -j12 $f MODE=$m")
+               ,(concat "make -j32 $f MODE=$m")
                "build/run ./$f"))
             mode name))
           ((eq kind 'test)
-           (format `"m=%s; f=o/$m/%s.com.ok && make -j12 $f MODE=$m" mode name))
+           (format `"m=%s; f=o/$m/%s.com.ok && make -j32 $f MODE=$m" mode name))
           ((and (file-regular-p this)
                 (file-executable-p this))
            (format "build/run ./%s" file))
@@ -271,7 +271,7 @@
             (cosmo-join
              " && "
              `("m=%s; f=o/$m/%s%s.o"
-               ,(concat "make -j12 $f MODE=$m")
+               ,(concat "make -j32 $f MODE=$m")
                ;; "nm -C --size $f | sort -r"
                "echo"
                "size -A $f | grep '^[.T]' | grep -v 'debug\\|command.line\\|stack' | sort -rnk2"
@@ -481,7 +481,7 @@
           (error "don't know how to show assembly for non c/c++ source file"))
         (let* ((default-directory root)
                (compile-command
-                (format "make %s -j12 MODE=%s %s %s"
+                (format "make %s -j32 MODE=%s %s %s"
                         (or extra-make-flags "") mode asm-gcc asm-clang)))
           (save-buffer)
           (set-visited-file-modtime (current-time))
@@ -641,11 +641,11 @@
                (compile (format "sh -c %s" file)))
               ((eq major-mode 'lua-mode)
                (let* ((mode (cosmo--make-mode arg)))
-                 (compile (format "make -j16 MODE=%s o/%s/tool/net/redbean.com && build/run o/%s/tool/net/redbean.com -i %s" mode mode mode file))))
+                 (compile (format "make -j32 MODE=%s o/%s/tool/net/redbean.com && build/run o/%s/tool/net/redbean.com -i %s" mode mode mode file))))
               ((and (eq major-mode 'python-mode)
                     (cosmo-startswith "third_party/python/Lib/test/" file))
                (let ((mode (cosmo--make-mode arg)))
-                 (compile (format "make -j12 MODE=%s PYHARNESSARGS=-vv PYTESTARGS=-v o/%s/%s.py.runs"
+                 (compile (format "make -j32 MODE=%s PYHARNESSARGS=-vv PYTESTARGS=-v o/%s/%s.py.runs"
                                   mode mode (file-name-sans-extension file)))))
               ((eq major-mode 'python-mode)
                (compile (format "python.com %s" file)))
@@ -692,8 +692,10 @@
   (define-key lua-mode-map (kbd "C-c C-r") 'cosmo-run)
   (define-key python-mode-map (kbd "C-c C-r") 'cosmo-run)
   (define-key c-mode-map (kbd "C-c C-s") 'cosmo-run-test)
+  (define-key c++-mode-map (kbd "C-c C-s") 'cosmo-run-test)
   (define-key c-mode-map (kbd "C-c C-_") 'cosmo-run-win7)
-  (define-key c-mode-map (kbd "C-c C-_") 'cosmo-run-win10))
+  (define-key c-mode-map (kbd "C-c C-_") 'cosmo-run-win10)
+  (define-key c++-mode-map (kbd "C-c C-_") 'cosmo-run-win10))
 
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -939,7 +941,7 @@
            "T[0-9][0-9]:[0-9][0-9]:[0-9][0-9]"          ;; time
            "[+.][0-9][0-9][0-9][0-9][0-9][0-9]"         ;; micros
            ":\\([^:]+\\)"                               ;; file
-           ":\\([0-9]+\\)"))                             ;; line
+           ":\\([0-9]+\\)"))                            ;; line
         1 2))
 
 (eval-after-load 'compile
@@ -948,6 +950,20 @@
                   (cons 'cosmo cosmo-compilation-regexps))
      (add-to-list 'compilation-error-regexp-alist 'cosmo)))
 
+(defvar cosmo-gcc123-compilation-regexps
+  (list (cosmo-join
+         ""
+         '("inlined from '[^']*' at "
+           "\\([^:]+\\)"                ;; file
+           ":\\([0-9]+\\)"))            ;; line
+        1 2))
+
+(eval-after-load 'compile
+  '(progn
+     (add-to-list 'compilation-error-regexp-alist-alist
+                  (cons 'cosmo cosmo-gcc123-compilation-regexps))
+     (add-to-list 'compilation-error-regexp-alist 'cosmo)))
+
 (provide 'cosmo-stuff)
 
 ;;; cosmo-stuff.el ends here
diff --git a/tool/emacs/key.py b/tool/emacs/key.py
index 6da461954..0d1a56ca1 100644
--- a/tool/emacs/key.py
+++ b/tool/emacs/key.py
@@ -335,12 +335,11 @@ cosmo_kws = frozenset([
   "forcealignargpointer",
   "forceinline",
   "hasatleast",
-  "initarray",
   "interruptfn",
   "mallocesque",
   "mayalias",
   "memcpyesque",
-  "nocallback",
+  "dontcallback",
   "nodebuginfo",
   "__wur",
   "dontinline",
@@ -394,12 +393,11 @@ cosmo_kws = frozenset([
   "forcealignargpointer",
   "forceinline",
   "hasatleast",
-  "initarray",
   "interruptfn",
   "mallocesque",
   "mayalias",
   "memcpyesque",
-  "nocallback",
+  "dontcallback",
   "nodebuginfo",
   "__wur",
   "dontinline",
diff --git a/tool/hello/BUILD.mk b/tool/hello/BUILD.mk
index 3d2ac96e9..6047047bf 100644
--- a/tool/hello/BUILD.mk
+++ b/tool/hello/BUILD.mk
@@ -39,7 +39,7 @@ o/$(MODE)/tool/hello/hello.com.dbg:				\
 
 # uses apelink to turn it into an ape executable
 # support vector is set to all operating systems
-o/$(MODE)/tool/hello/hello.com:					\
+o/$(MODE)/tool/hello/hello.ape:					\
 		o/$(MODE)/tool/hello/hello.com.dbg		\
 		o/$(MODE)/tool/build/apelink.com		\
 		o/$(MODE)/tool/build/pecheck.com		\
@@ -49,7 +49,7 @@ o/$(MODE)/tool/hello/hello.com:					\
 
 # uses apelink to generate elf-only executable
 # support vector = linux/freebsd/openbsd/netbsd/metal
-o/$(MODE)/tool/hello/hello-elf.com:				\
+o/$(MODE)/tool/hello/hello-elf.ape:				\
 		o/$(MODE)/tool/hello/hello.com.dbg		\
 		o/$(MODE)/tool/build/apelink.com		\
 		o/$(MODE)/ape/ape.elf
@@ -59,7 +59,7 @@ o/$(MODE)/tool/hello/hello-elf.com:				\
 # support vector = macos/linux/freebsd/openbsd/netbsd
 # - great way to avoid attention from bad virus scanners
 # - creates tinier executable by reducing alignment requirement
-o/$(MODE)/tool/hello/hello-unix.com:				\
+o/$(MODE)/tool/hello/hello-unix.ape:				\
 		o/$(MODE)/tool/hello/hello.com.dbg		\
 		o/$(MODE)/tool/build/apelink.com		\
 		o/$(MODE)/ape/ape.elf
@@ -71,7 +71,7 @@ o/$(MODE)/tool/hello/hello-unix.com:				\
 o/$(MODE)/tool/hello/hello-pe.com.dbg:				\
 		o/$(MODE)/tool/hello/hello-pe.o
 	@$(COMPILE) -ALINK.elf $(LINK) $(LINKARGS) $(OUTPUT_OPTION) -q -e WinMain
-o/$(MODE)/tool/hello/hello-pe.com:				\
+o/$(MODE)/tool/hello/hello-pe.ape:				\
 		o/$(MODE)/tool/hello/hello-pe.com.dbg		\
 		o/$(MODE)/tool/build/elf2pe.com
 	@$(COMPILE) -AELF2PE o/$(MODE)/tool/build/elf2pe.com -o $@ $<
@@ -80,7 +80,7 @@ o/$(MODE)/tool/hello/hello-pe.com:				\
 o/$(MODE)/tool/hello/life-pe.com.dbg:				\
 		o/$(MODE)/tool/hello/life-pe.o
 	@$(COMPILE) -ALINK.elf $(LINK) $(LINKARGS) $(OUTPUT_OPTION) -q -e WinMain
-o/$(MODE)/tool/hello/life-pe.com:				\
+o/$(MODE)/tool/hello/life-pe.ape:				\
 		o/$(MODE)/tool/hello/life-pe.com.dbg		\
 		o/$(MODE)/tool/build/elf2pe.com
 	@$(COMPILE) -AELF2PE o/$(MODE)/tool/build/elf2pe.com -o $@ $<
@@ -89,7 +89,7 @@ o/$(MODE)/tool/hello/life-pe.com:				\
 o/$(MODE)/tool/hello/wait-pe.com.dbg:				\
 		o/$(MODE)/tool/hello/wait-pe.o
 	@$(COMPILE) -ALINK.elf $(LINK) $(LINKARGS) $(OUTPUT_OPTION) -q -e WinMain
-o/$(MODE)/tool/hello/wait-pe.com:				\
+o/$(MODE)/tool/hello/wait-pe.ape:				\
 		o/$(MODE)/tool/hello/wait-pe.com.dbg		\
 		o/$(MODE)/tool/build/elf2pe.com
 	@$(COMPILE) -AELF2PE o/$(MODE)/tool/build/elf2pe.com -R 64kb -S 4kb -o $@ $<
diff --git a/tool/net/BUILD.mk b/tool/net/BUILD.mk
index 116c7a091..a8d1d9c89 100644
--- a/tool/net/BUILD.mk
+++ b/tool/net/BUILD.mk
@@ -99,7 +99,8 @@ TOOL_NET_REDBEAN_LUA_MODULES =							\
 	o/$(MODE)/tool/net/ljson.o						\
 	o/$(MODE)/tool/net/lmaxmind.o						\
 	o/$(MODE)/tool/net/lsqlite3.o						\
-	o/$(MODE)/tool/net/largon2.o
+	o/$(MODE)/tool/net/largon2.o						\
+	o/$(MODE)/tool/net/launch.o
 
 o/$(MODE)/tool/net/redbean.com.dbg:						\
 		$(TOOL_NET_DEPS)						\
diff --git a/tool/net/definitions.lua b/tool/net/definitions.lua
index 52be9b91c..ead9c3ed2 100644
--- a/tool/net/definitions.lua
+++ b/tool/net/definitions.lua
@@ -392,22 +392,22 @@ LUA ENHANCEMENTS
     - redbean supports a printf modulus operator, like Python. For
       example, you can say `"hello %s" % {"world"}` instead of
       `string.format("hello %s", "world")`.
-      
+
       --
       - redbean supports a string multiply operator, like Python. For
       example, you can say `"hi" * 2` instead of `string.rep("hi", 2)`.
-      
+
       - redbean supports octal (base 8) integer literals. For example
       `0644 == 420` is the case in redbean, whereas in upstream Lua
       `0644 == 644` would be the case.
-      
+
       - redbean supports binary (base 2) integer literals. For example
       `0b1010 == 10` is the case in redbean, whereas in upstream Lua
       `0b1010` would result in an error.
-      
+
       - redbean supports the GNU syntax for the ASCII ESC character in
       string literals. For example, `"\e"` is the same as `"\x1b"`.
-      
+
 ]]
 
 ---@class string
@@ -550,6 +550,17 @@ SPECIAL PATHS
 ---
 function OnHttpRequest() end
 
+--- Hooks catch errors
+---
+--- If this functiopn is defined in the global scope by your `/.init.lua`
+--- then any errors occuring in the OnHttpRequest() hook will be catched.
+--- You'll be able then to do whatever you need with the error status and
+--- error message.
+---
+---@param status uint16
+---@param message string
+function OnError(status, message) end
+
 --- Hooks client connection creation.
 ---
 --- If this function is defined it'll be called from the main process
@@ -1436,9 +1447,9 @@ function Log(level, message) end
 function ParseHttpDateTime(rfc1123) end
 
 --- Parses URL.
---- 
+---
 ---@return Url url An object containing the following fields is returned:
---- 
+---
 --- - `scheme` is a string, e.g. `"http"`
 --- - `user` is the username string, or nil if absent
 --- - `pass` is the password string, or nil if absent
@@ -1448,28 +1459,28 @@ function ParseHttpDateTime(rfc1123) end
 --- - `params` is the URL paramaters, e.g. `/?a=b&c` would be
 ---   represented as the data structure `{{"a", "b"}, {"c"}, ...}`
 --- - `fragment` is the stuff after the `#` character
---- 
+---
 ---@param url string
 ---@param flags integer? may have:
---- 
+---
 --- - `kUrlPlus` to turn `+` into space
 --- - `kUrlLatin1` to transcode ISO-8859-1 input into UTF-8
---- 
+---
 --- This parser is charset agnostic. Percent encoded bytes are
 --- decoded for all fields. Returned values might contain things
 --- like NUL characters, spaces, control codes, and non-canonical
 --- encodings. Absent can be discerned from empty by checking if
 --- the pointer is set.
---- 
+---
 --- There's no failure condition for this routine. This is a
 --- permissive parser. This doesn't normalize path segments like
---- `.` or `..` so use IsAcceptablePath() to check for those. No 
+--- `.` or `..` so use IsAcceptablePath() to check for those. No
 --- restrictions are imposed beyond that which is strictly
 --- necessary for parsing. All the data that is provided will be
 --- consumed to the one of the fields. Strict conformance is
 --- enforced on some fields more than others, like scheme, since
 --- it's the most non-deterministically defined field of them all.
---- 
+---
 --- Please note this is a URL parser, not a URI parser. Which
 --- means we support everything the URI spec says we should do
 --- except for the things we won't do, like tokenizing path
@@ -2244,7 +2255,7 @@ function ProgramTrustedIp(ip, cidr) end
 --- is granted per second to all buckets. The minimum value is 1/3600
 --- which means once per hour. The maximum value for this setting is
 --- 1e6, which means once every microsecond.
---- 
+---
 --- `cidr` is the specificity of judgement.  Since creating 2^32 buckets
 --- would need 4GB of RAM, redbean defaults this value to 24 which means
 --- filtering applies to class c network blocks (i.e. x.x.x.*), and your
@@ -2253,38 +2264,38 @@ function ProgramTrustedIp(ip, cidr) end
 --- number means you use less ram/cpu, but splash damage applies more to
 --- your clients; whereas higher numbers means more ram/cpu usage, while
 --- ensuring rate limiting only applies to specific compromised actors.
---- 
+---
 --- `reject` is the token count or treshold at which redbean should send
 --- 429 Too Many Request warnings to the client. Permitted values can be
 --- anywhere between -1 and 126 inclusively. The default value is 30 and
 --- -1 means disable to disable (assuming AcquireToken() will be used).
---- 
+---
 --- `ignore` is the token count or treshold, at which redbean should try
 --- simply ignoring clients and close the connection without logging any
 --- kind of warning, and without sending any response. The default value
 --- for this setting is `MIN(reject / 2, 15)`. This must be less than or
 --- equal to the `reject` setting. Allowed values are [-1,126] where you
 --- can use -1 as a means of disabling `ignore`.
---- 
+---
 --- `ban` is the token count at which redbean should report IP addresses
 --- to the blackhole daemon via a unix-domain socket datagram so they'll
 --- get banned in the kernel routing tables. redbean's default value for
 --- this setting is `MIN(ignore / 10, 1)`. Permitted values are [-1,126]
 --- where -1 may be used as a means of disabling the `ban` feature.
---- 
+---
 --- This function throws an exception if the constraints described above
 --- are not the case. Warnings are logged should redbean fail to connect
 --- to the blackhole daemon, assuming it hasn't been disabled. It's safe
 --- to use load balancing tools when banning is enabled, since you can't
 --- accidentally ban your own network interface addresses, loopback ips,
 --- or ProgramTrustedIp() addresses where these rate limits don't apply.
---- 
+---
 --- It's assumed will be called from the .init.lua global scope although
 --- it could be used in interpreter mode, or from a forked child process
 --- in which case the only processes that'll have ability to use it will
 --- be that same process, and any descendent processes. This function is
 --- only able to be called once.
---- 
+---
 --- This feature is not available in unsecure mode.
 ---@param replenish number?
 ---@param cidr integer?
@@ -2309,10 +2320,10 @@ function ProgramTokenBucket(replenish, cidr, reject, ignore, ban) end
 function AcquireToken(ip) end
 
 --- Counts number of tokens in bucket.
---- 
+---
 --- This function is the same as AcquireToken() except no subtraction is
 --- performed, i.e. no token is taken.
---- 
+---
 --- `ip` should be an IPv4 address and this defaults to GetClientAddr(),
 --- although other interpretations of its meaning are possible.
 ---@param ip uint32?
@@ -2326,12 +2337,12 @@ function CountTokens(ip) end
 --- based on the banned threshold. However if your Lua code calls
 --- `AcquireToken()` manually, then you'll need this function to take
 --- action on the returned values.
---- 
+---
 --- This function returns true if a datagram could be sent sucessfully.
 --- Otherwise false is returned, which can happen if blackholed isn't
 --- running, or if a lot of processes are sending messages to it and the
 --- operation would have blocked.
---- 
+---
 --- It's assumed that the blackholed service is running locally in the
 --- background.
 ---@param ip uint32
@@ -2416,7 +2427,7 @@ lsqlite3 = {
     --- An `lsqlite3.BUSY` error can occur at any point in a transaction: when
     --- the transaction is first started, during any write or update operations,
     --- or when the transaction commits. To avoid encountering `lsqlite3.BUSY`
-    --- errors in the middle of a transaction, the application can use 
+    --- errors in the middle of a transaction, the application can use
     --- `BEGIN IMMEDIATE` instead of just `BEGIN` to start a transaction. The
     --- `BEGIN IMMEDIATE` command might itself return `lsqlite3.BUSY`, but if it
     --- succeeds, then SQLite guarantees that no subsequent operations on the same database through the next COMMIT will return `lsqlite3.BUSY`.
@@ -2472,7 +2483,7 @@ lsqlite3 = {
     CORRUPT = 11,
     --- The `lsqlite3.NOTFOUND` result code is exposed in three ways:
     ---
-    --- `lsqlite3.NOTFOUND` can be returned by the `sqlite3_file_control()` 
+    --- `lsqlite3.NOTFOUND` can be returned by the `sqlite3_file_control()`
     --- interface to indicate that the file control opcode passed as the third
     --- argument was not recognized by the underlying VFS.
     ---
@@ -2491,7 +2502,7 @@ lsqlite3 = {
     --- complete because the disk is full. Note that this error can occur when
     --- trying to write information into the main database file, or it can also
     --- occur when writing into temporary disk files.
-    --- 
+    ---
     --- Sometimes applications encounter this error even though there is an
     --- abundance of primary disk space because the error occurs when writing
     --- into temporary disk files on a system where temporary files are stored
@@ -2521,19 +2532,19 @@ lsqlite3 = {
     --- the database schema was changed by some other process in between the
     --- time that the statement was prepared and the time the statement was run,
     --- this error can result.
-    --- 
+    ---
     --- The statement is automatically re-prepared if the schema changes, up to
     --- `SQLITE_MAX_SCHEMA_RETRY` times (default: 50). The `step()` interface
     --- will only return `lsqlite3.SCHEMA` back to the application if the
     --- failure persists after these many retries.
     SCHEMA = 17,
     --- The `lsqlite3.TOOBIG` error code indicates that a string or BLOB was too
-    --- large. The default maximum length of a string or BLOB in SQLite is 
+    --- large. The default maximum length of a string or BLOB in SQLite is
     --- 1,000,000,000 bytes. This maximum length can be changed at compile-time
     --- using the `SQLITE_MAX_LENGTH` compile-time option. The `lsqlite3.TOOBIG`
     --- error results when SQLite encounters a string or BLOB that exceeds the
     --- compile-time limit.
-    --- 
+    ---
     --- The `lsqlite3.TOOBIG` error code can also result when an oversized SQL
     --- statement is passed into one of the `db:prepare()` interface. The
     --- maximum length of an SQL statement defaults to a much smaller value of
@@ -2544,7 +2555,7 @@ lsqlite3 = {
     --- information about the failed constraint can be found by consulting the
     --- accompanying error message (returned via `errmsg()`) or by looking at
     --- the extended error code.
-    --- 
+    ---
     --- The `lsqlite3.CONSTRAINT` code can also be used as the return value from
     --- the `xBestIndex()` method of a virtual table implementation. When
     --- `xBestIndex()` returns `lsqlite3.CONSTRAINT`, that indicates that the
@@ -2558,7 +2569,7 @@ lsqlite3 = {
     --- BLOB in a column with a declared type of BOOLEAN. But in a few cases,
     --- SQLite is strict about types. The `lsqlite3.MISMATCH` error is returned
     --- in those few cases when the types do not match.
-    --- 
+    ---
     --- The rowid of a table must be an integer. Attempt to set the rowid to
     --- anything other than an integer (or a NULL which will be automatically
     --- converted into the next available integer rowid) results in an
@@ -2843,7 +2854,7 @@ function Database:close_vm(temponly) end
 ---@param func fun(udata: Udata) a Lua function that is invoked by SQLite3 whenever a transaction is committed. This callback receives one argument:
 ---@param udata Udata argument used when the callback was installed.
 ---
---- If `func` returns `false` or `nil` the COMMIT is allowed to proceed, 
+--- If `func` returns `false` or `nil` the COMMIT is allowed to proceed,
 --- otherwise the COMMIT is converted to a ROLLBACK.
 ---
 --- See: `db:rollback_hook` and `db:update_hook`
@@ -2898,7 +2909,7 @@ function Database:create_aggregate(name, nargs, step, final, userdata) end
 --- This creates a collation callback. A collation callback is used to establish
 --- a collation order, mostly for string comparisons and sorting purposes.
 ---@param name string the name of the collation to be created
----@param func fun(s1: string, s2: string): -1|0|1 a function that accepts two string arguments, compares them and returns `0` if both strings are identical, `-1` if the first argument is lower in the collation order than the second and `1` if the first argument is higher in the collation order than the second. 
+---@param func fun(s1: string, s2: string): -1|0|1 a function that accepts two string arguments, compares them and returns `0` if both strings are identical, `-1` if the first argument is lower in the collation order than the second and `1` if the first argument is higher in the collation order than the second.
 --- A simple example:
 ---
 ---    local function collate(s1,s2)
@@ -7263,15 +7274,15 @@ function unix.isatty(fd) end
 function unix.tiocgwinsz(fd) end
 
 --- Returns file descriptor of open anonymous file.
---- 
+---
 --- This creates a secure temporary file inside `$TMPDIR`. If it isn't
 --- defined, then `/tmp` is used on UNIX and GetTempPath() is used on
 --- the New Technology. This resolution of `$TMPDIR` happens once.
---- 
+---
 --- Once close() is called, the returned file is guaranteed to be
 --- deleted automatically. On UNIX the file is unlink()'d before this
 --- function returns. On the New Technology it happens upon close().
---- 
+---
 --- On the New Technology, temporary files created by this function
 --- should have better performance, because `kNtFileAttributeTemporary`
 --- asks the kernel to more aggressively cache and reduce i/o ops.
@@ -7284,7 +7295,7 @@ function unix.tmpfd() end
 function unix.sched_yield() end
 
 --- Creates interprocess shared memory mapping.
---- 
+---
 --- This function allocates special memory that'll be inherited across
 --- fork in a shared way. By default all memory in Redbean is "private"
 --- memory that's only viewable and editable to the process that owns
@@ -7294,15 +7305,15 @@ function unix.sched_yield() end
 --- don't want that to happen, and you want the memory to be shared
 --- similar to how it would be shared if you were using threads, then
 --- you can use this function to achieve just that.
---- 
+---
 --- The memory object this function returns may be accessed using its
 --- methods, which support atomics and futexes. It's very low-level.
 --- For example, you can use it to implement scalable mutexes:
---- 
+---
 ---     mem = unix.mapshared(8000 * 8)
---- 
+---
 ---     LOCK = 0 -- pick an arbitrary word index for lock
---- 
+---
 ---     -- From Futexes Are Tricky Version 1.1 § Mutex, Take 3;
 ---     -- Ulrich Drepper, Red Hat Incorporated, June 27, 2004.
 ---     function Lock()
@@ -7324,7 +7335,7 @@ function unix.sched_yield() end
 ---             mem:wake(LOCK, 1)
 ---         end
 ---     end
---- 
+---
 --- It's possible to accomplish the same thing as unix.mapshared()
 --- using files and unix.fcntl() advisory locks. However this goes
 --- significantly faster. For example, that's what SQLite does and
@@ -7333,7 +7344,7 @@ function unix.sched_yield() end
 --- might need something lower level than file locks, to implement
 --- things like throttling. Shared memory is a good way to do that
 --- since there's nothing that's faster.
---- 
+---
 ---@param size integer
 --- The `size` parameter needs to be a multiple of 8. The returned
 --- memory is zero initialized. When allocating shared memory, you
@@ -7341,7 +7352,7 @@ function unix.sched_yield() end
 --- overhead of allocating a single shared mapping is 500 words of
 --- resident memory and 8000 words of virtual memory. It's because
 --- the Cosmopolitan Libc mmap() granularity is 2**16.
---- 
+---
 --- This system call does not fail. An exception is instead thrown
 --- if sufficient memory isn't available.
 ---
@@ -7351,16 +7362,16 @@ function unix.mapshared(size) end
 ---@class unix.Memory: userdata
 --- unix.Memory encapsulates memory that's shared across fork() and
 --- this module provides the fundamental synchronization primitives
---- 
+---
 --- Redbean memory maps may be used in two ways:
---- 
+---
 --- 1. as an array of bytes a.k.a. a string
 --- 2. as an array of words a.k.a. integers
---- 
+---
 --- They're aliased, union, or overlapped views of the same memory.
 --- For example if you write a string to your memory region, you'll
 --- be able to read it back as an integer.
---- 
+---
 --- Reads, writes, and word operations will throw an exception if a
 --- memory boundary error or overflow occurs.
 unix.Memory = {}
@@ -7372,7 +7383,7 @@ unix.Memory = {}
 --- If `bytes` is none or nil, then the nul-terminated string at
 --- `offset` is returned. You may specify `bytes` to safely read
 --- binary data.
---- 
+---
 --- This operation happens atomically. Each shared mapping has a
 --- single lock which is used to synchronize reads and writes to
 --- that specific map. To make it scale, create additional maps.
@@ -7381,7 +7392,7 @@ unix.Memory = {}
 function unix.Memory:read(offset, bytes) end
 
 --- Writes bytes to memory region.
---- 
+---
 ---@param data string
 ---@param offset integer?
 --- `offset` is the starting byte index to which memory is copied,
@@ -7391,7 +7402,7 @@ function unix.Memory:read(offset, bytes) end
 --- If `bytes` is none or nil, then an implicit nil-terminator
 --- will be included after your `data` so things like json can
 --- be easily serialized to shared memory.
---- 
+---
 --- This operation happens atomically. Each shared mapping has a
 --- single lock which is used to synchronize reads and writes to
 --- that specific map. To make it scale, create additional maps.
@@ -7399,7 +7410,7 @@ function unix.Memory:write(data, offset, bytes) end
 
 
 --- Loads word from memory region.
---- 
+---
 --- This operation is atomic and has relaxed barrier semantics.
 ---@param word_index integer
 ---@return integer
@@ -7407,7 +7418,7 @@ function unix.Memory:write(data, offset, bytes) end
 function unix.Memory:load(word_index) end
 
 --- Stores word from memory region.
---- 
+---
 --- This operation is atomic and has relaxed barrier semantics.
 ---@param word_index integer
 ---@param value integer
@@ -7415,10 +7426,10 @@ function unix.Memory:store(word_index, value) end
 
 
 --- Exchanges value.
---- 
+---
 --- This sets word at `word_index` to `value` and returns the value
 --- previously held in by the word.
---- 
+---
 --- This operation is atomic and provides the same memory barrier
 --- semantics as the aligned x86 LOCK XCHG instruction.
 ---@param word_index integer
@@ -7427,12 +7438,12 @@ function unix.Memory:store(word_index, value) end
 function unix.Memory:xchg(word_index, value) end
 
 --- Compares and exchanges value.
---- 
+---
 --- This inspects the word at `word_index` and if its value is the same
 --- as `old` then it'll be replaced by the value `new`, in which case
 --- `true, old` shall be returned. If a different value was held at
 --- word, then `false` shall be returned along with the word.
---- 
+---
 --- This operation happens atomically and provides the same memory
 --- barrier semantics as the aligned x86 LOCK CMPXCHG instruction.
 ---@param word_index integer
@@ -7442,11 +7453,11 @@ function unix.Memory:xchg(word_index, value) end
 function unix.Memory:cmpxchg(word_index, old, new) end
 
 --- Fetches then adds value.
---- 
+---
 --- This method modifies the word at `word_index` to contain the sum of
 --- value and the `value` paremeter. This method then returns the value
 --- as it existed before the addition was performed.
---- 
+---
 --- This operation is atomic and provides the same memory barrier
 --- semantics as the aligned x86 LOCK XADD instruction.
 ---@param word_index integer
@@ -7455,7 +7466,7 @@ function unix.Memory:cmpxchg(word_index, old, new) end
 function unix.Memory:fetch_add(word_index, value) end
 
 --- Fetches and bitwise ands value.
---- 
+---
 --- This operation happens atomically and provides the same memory
 --- barrier ordering semantics as its x86 implementation.
 ---@param word_index integer
@@ -7464,7 +7475,7 @@ function unix.Memory:fetch_add(word_index, value) end
 function unix.Memory:fetch_and(word_index, value) end
 
 --- Fetches and bitwise ors value.
---- 
+---
 --- This operation happens atomically and provides the same memory
 --- barrier ordering semantics as its x86 implementation.
 ---@param word_index integer
@@ -7473,7 +7484,7 @@ function unix.Memory:fetch_and(word_index, value) end
 function unix.Memory:fetch_or(word_index, value) end
 
 --- Fetches and bitwise xors value.
---- 
+---
 --- This operation happens atomically and provides the same memory
 --- barrier ordering semantics as its x86 implementation.
 ---@param word_index integer
@@ -7482,24 +7493,24 @@ function unix.Memory:fetch_or(word_index, value) end
 function unix.Memory:fetch_xor(word_index, value) end
 
 --- Waits for word to have a different value.
---- 
+---
 --- This method asks the kernel to suspend the process until either the
 --- absolute deadline expires or we're woken up by another process that
 --- calls `unix.Memory:wake()`.
---- 
+---
 --- The `expect` parameter is used only upon entry to synchronize the
 --- transition to kernelspace. The kernel doesn't actually poll the
 --- memory location. It uses `expect` to make sure the process doesn't
 --- get added to the wait list unless it's sure that it needs to wait,
 --- since the kernel can only control the ordering of wait / wake calls
 --- across processes.
---- 
+---
 --- The default behavior is to wait until the heat death of the universe
 --- if necessary. You may alternatively specify an absolute deadline. If
 --- it's less than or equal to the value returned by clock_gettime, then
 --- this routine is non-blocking. Otherwise we'll block at most until
 --- the current time reaches the absolute deadline.
---- 
+---
 --- Futexes are currently supported on Linux, FreeBSD, OpenBSD. On other
 --- platforms this method calls sched_yield() and will either (1) return
 --- unix.EINTR if a deadline is specified, otherwise (2) 0 is returned.
@@ -7512,7 +7523,7 @@ function unix.Memory:fetch_xor(word_index, value) end
 --- well-supported on all supported platforms but requires using files.
 --- Please test your use case though, because it's kind of an edge case
 --- to have the scenario above, and chances are this op will work fine.
---- 
+---
 ---@return 0
 ---@overload fun(self, word_index: integer, expect: integer, abs_deadline?: integer, nanos?: integer): nil, error: unix.Errno
 ---
@@ -7520,10 +7531,10 @@ function unix.Memory:fetch_xor(word_index, value) end
 --- should use futexes inside a loop that is able to cope with spurious
 --- wakeups. We don't actually guarantee the value at word has in fact
 --- changed when this returns.
---- 
+---
 --- `EAGAIN` is raised if, upon entry, the word at `word_index` had a
 --- different value than what's specified at `expect`.
---- 
+---
 --- `ETIMEDOUT` is raised when the absolute deadline expires.
 ---
 ---@param word_index integer
@@ -7533,11 +7544,11 @@ function unix.Memory:fetch_xor(word_index, value) end
 function unix.Memory:wait(word_index, expect, abs_deadline, nanos) end
 
 --- Wakes other processes waiting on word.
---- 
+---
 --- This method may be used to signal or broadcast to waiters. The
 --- `count` specifies the number of processes that should be woken,
 --- which defaults to `INT_MAX`.
---- 
+---
 --- The return value is the number of processes that were actually woken
 --- as a result of the system call. No failure conditions are defined.
 ---@param index integer
diff --git a/tool/net/fetch.inc b/tool/net/fetch.inc
index 5665d085f..37dfb02de 100644
--- a/tool/net/fetch.inc
+++ b/tool/net/fetch.inc
@@ -15,7 +15,7 @@ static int LuaFetch(lua_State *L) {
   bool usingssl;
   uint32_t ip;
   struct Url url;
-  int t, ret, sock = -1, methodidx, hdridx;
+  int t, ret, sock = -1, hdridx;
   const char *host, *port;
   char *request;
   struct TlsBio *bio;
@@ -34,7 +34,9 @@ static int LuaFetch(lua_State *L) {
   size_t urlarglen, requestlen, paylen, bodylen;
   size_t i, g, hdrsize;
   int keepalive = kaNONE;
-  int imethod, numredirects = 0, maxredirects = 5;
+  char canmethod[9] = {0};
+  uint64_t imethod;
+  int numredirects = 0, maxredirects = 5;
   bool followredirect = true;
   struct addrinfo hints = {.ai_family = AF_INET,
                            .ai_socktype = SOCK_STREAM,
@@ -54,9 +56,10 @@ static int LuaFetch(lua_State *L) {
     body = luaL_optlstring(L, -1, "", &bodylen);
     lua_getfield(L, 2, "method");
     // use GET by default if no method is provided
-    method = luaL_optstring(L, -1, kHttpMethod[kHttpGet]);
-    if ((imethod = GetHttpMethod(method, -1))) {
-      method = kHttpMethod[imethod];
+    method = luaL_optstring(L, -1, "GET");
+    if ((imethod = ParseHttpMethod(method, -1))) {
+      WRITE64LE(canmethod, imethod);
+      method = canmethod;
     } else {
       return LuaNilError(L, "bad method");
     }
@@ -123,16 +126,16 @@ static int LuaFetch(lua_State *L) {
   } else if (lua_isnoneornil(L, 2)) {
     body = "";
     bodylen = 0;
-    method = kHttpMethod[kHttpGet];
+    method = "GET";
   } else {
     body = luaL_checklstring(L, 2, &bodylen);
-    method = kHttpMethod[kHttpPost];
+    method = "POST";
   }
   // provide Content-Length header unless it's zero and not expected
-  methodidx = GetHttpMethod(method, -1);
-  if (bodylen > 0 || !(methodidx == kHttpGet || methodidx == kHttpHead ||
-                       methodidx == kHttpTrace || methodidx == kHttpDelete ||
-                       methodidx == kHttpConnect)) {
+  imethod = ParseHttpMethod(method, -1);
+  if (bodylen > 0 ||
+      !(imethod == kHttpGet || imethod == kHttpHead || imethod == kHttpTrace ||
+        imethod == kHttpDelete || imethod == kHttpConnect)) {
     conlenhdr = gc(xasprintf("Content-Length: %zu\r\n", bodylen));
   }
 
@@ -142,8 +145,8 @@ static int LuaFetch(lua_State *L) {
   gc(ParseUrl(urlarg, urlarglen, &url, true));
   gc(url.params.p);
   DEBUGF("(ftch) client fetching %`'s (host=%`'.*s, port=%.*s, path=%`'.*s)",
-         urlarg, url.host.n, url.host.p, url.port.n, url.port.p,
-         url.path.n, url.path.p);
+         urlarg, url.host.n, url.host.p, url.port.n, url.port.p, url.path.n,
+         url.path.p);
 
   usingssl = false;
   if (url.scheme.n) {
@@ -488,7 +491,7 @@ Finished:
     if (msg.status == 303) {
       body = "";
       bodylen = 0;
-      method = kHttpMethod[kHttpGet];
+      method = "GET";
     }
     // create table if needed
     if (!lua_istable(L, 2)) {
@@ -512,8 +515,8 @@ Finished:
     VERBOSEF("(ftch) client redirecting %`'.*s "
              "(scheme=%`'.*s, host=%`'.*s, port=%.*s, path=%`'.*s)",
              FetchHeaderLength(kHttpLocation), FetchHeaderData(kHttpLocation),
-             url.scheme.n, url.scheme.p, url.host.n, url.host.p,
-             url.port.n, url.port.p, url.path.n, url.path.p);
+             url.scheme.n, url.scheme.p, url.host.n, url.host.p, url.port.n,
+             url.port.p, url.path.n, url.path.p);
     // while it's possible to check for IsAcceptableHost/IsAcceptablePort
     // it's not clear what to do if they are not;
     // if they are invalid, redirect returns "invalid host" message
@@ -530,7 +533,7 @@ Finished:
       if (FetchHeaderData(kHttpLocation)[0] == '/') {
         // if the path is absolute, then use it
         // so `/redir/more` -> `/less` becomes `/less`
-        url.path.n = 0; // replace the path
+        url.path.n = 0;  // replace the path
       } else {
         // if the path is relative, then merge it,
         // so `/redir/more` -> `less` becomes `/redir/less`
@@ -539,8 +542,8 @@ Finished:
         }
       }
       url.path.p = gc(xasprintf("%.*s%.*s", url.path.n, url.path.p,
-                      FetchHeaderLength(kHttpLocation),
-                      FetchHeaderData(kHttpLocation)));
+                                FetchHeaderLength(kHttpLocation),
+                                FetchHeaderData(kHttpLocation)));
       url.path.n = strlen(url.path.p);
       lua_pushstring(L, gc(EncodeUrl(&url, 0)));
     }
diff --git a/tool/net/help.txt b/tool/net/help.txt
index e238634f9..9c3ae4a7d 100644
--- a/tool/net/help.txt
+++ b/tool/net/help.txt
@@ -576,6 +576,12 @@ HOOKS
           *). See functions like Route which asks redbean to do its default
           thing from the handler.
 
+  OnError(status:int, message:string)
+          If this function is defined and if any errors occurs in
+          OnHttpRequest() then this method will be called instead of displaying
+          the default error page. Useful if you need to display the error page
+          using your specific code or send it to any tier service.
+
   OnClientConnection(ip:int, port:int, serverip:int, serverport:int) → bool
           If this function is defined it'll be called from the main process
           each time redbean accepts a new client connection. If it returns
diff --git a/tool/net/largon2.c b/tool/net/largon2.c
index e4a6a5c6f..6b5cd51c6 100644
--- a/tool/net/largon2.c
+++ b/tool/net/largon2.c
@@ -32,10 +32,9 @@
 #include "third_party/lua/lua.h"
 #include "third_party/lua/lualib.h"
 
-asm(".ident\t\"\\n\\n\
-largon2 (MIT License)\\n\
-Copyright 2016 Thibault Charbonnier\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(largon2_notice, "\
+largon2 (MIT License)\n\
+Copyright 2016 Thibault Charbonnier");
 
 // clang-format off
 /***
diff --git a/tool/net/launch.c b/tool/net/launch.c
new file mode 100644
index 000000000..2fa348709
--- /dev/null
+++ b/tool/net/launch.c
@@ -0,0 +1,120 @@
+/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
+│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8                               :vi │
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2024 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/calls/calls.h"
+#include "libc/calls/struct/sigaction.h"
+#include "libc/calls/struct/sigset.h"
+#include "libc/dce.h"
+#include "libc/errno.h"
+#include "libc/log/log.h"
+#include "libc/proc/posix_spawn.h"
+#include "libc/runtime/runtime.h"
+#include "libc/stdio/stdio.h"
+#include "libc/str/str.h"
+#include "libc/sysv/consts/sig.h"
+
+static volatile bool g_timed_out;
+
+static void finish(void) {
+  if (!IsWindows()) {
+    _exit(0);
+  }
+}
+
+static void timeout(int sig) {
+  g_timed_out = true;
+}
+
+static void failure(const char *url, const char *cmd, const char *reason) {
+  WARNF("(srvr) failed to open %s in a browser tab using %s: %s", url, cmd,
+        reason);
+}
+
+/**
+ * Opens browser tab on host system.
+ */
+void launch_browser(const char *url) {
+
+  // perform this task from a subprocess so it doesn't block server
+  if (!IsWindows()) {
+    switch (fork()) {
+      case 0:
+        break;
+      default:
+        return;
+      case -1:
+        perror("fork");
+        return;
+    }
+  }
+
+  // determine which command opens browser tab
+  const char *cmd;
+  if (IsWindows()) {
+    cmd = "/c/windows/explorer.exe";
+  } else if (IsXnu()) {
+    cmd = "open";
+  } else {
+    cmd = "xdg-open";
+  }
+
+  // spawn process
+  // set process group so ctrl-c won't kill browser
+  int pid, err;
+  posix_spawnattr_t sa;
+  char *args[] = {(char *)cmd, (char *)url, 0};
+  posix_spawnattr_init(&sa);
+  posix_spawnattr_setflags(&sa, POSIX_SPAWN_SETPGROUP);
+  err = posix_spawnp(&pid, cmd, 0, &sa, args, environ);
+  posix_spawnattr_destroy(&sa);
+  if (err) {
+    failure(url, cmd, strerror(err));
+    return finish();
+  }
+
+  // kill command if it takes more than three seconds
+  // we need it because xdg-open acts weird on headless systems
+  struct sigaction hand;
+  hand.sa_flags = 0;
+  sigemptyset(&hand.sa_mask);
+  hand.sa_handler = timeout;
+  sigaction(SIGALRM, &hand, 0);
+  alarm(3);
+
+  // wait for tab to return finish opening
+  // the browser will still be running after this completes
+  int ws;
+  while (waitpid(pid, &ws, 0) == -1) {
+    if (errno != EINTR) {
+      failure(url, cmd, strerror(errno));
+      kill(pid, SIGKILL);
+      return finish();
+    }
+    if (g_timed_out) {
+      failure(url, cmd, "process timed out");
+      kill(pid, SIGKILL);
+      return finish();
+    }
+  }
+  if (ws) {
+    failure(url, cmd, "process exited with non-zero status");
+  }
+
+  // we're done
+  return finish();
+}
diff --git a/tool/net/lfuncs.h b/tool/net/lfuncs.h
index 17e1fbcba..c5fcf8796 100644
--- a/tool/net/lfuncs.h
+++ b/tool/net/lfuncs.h
@@ -95,5 +95,7 @@ int LuaVisualizeControlCodes(lua_State *);
 void LuaPushUrlView(lua_State *, struct UrlView *);
 char *FormatUnixHttpDateTime(char *, int64_t);
 
+void launch_browser(const char *);
+
 COSMOPOLITAN_C_END_
 #endif /* COSMOPOLITAN_TOOL_NET_LFUNCS_H_ */
diff --git a/tool/net/lsqlite3.c b/tool/net/lsqlite3.c
index 945560538..f46eee72e 100644
--- a/tool/net/lsqlite3.c
+++ b/tool/net/lsqlite3.c
@@ -38,10 +38,9 @@
 #include "third_party/sqlite3/sqlite3.h"
 // clang-format off
 
-asm(".ident\t\"\\n\\n\
-lsqlite3 (MIT License)\\n\
-Copyright 2002-2016 Tiago Dionizio, Doug Currie\"");
-asm(".include \"libc/disclaimer.inc\"");
+__notice(lsqlite3_notice, "\
+lsqlite3 (MIT License)\n\
+Copyright 2002-2016 Tiago Dionizio, Doug Currie");
 
 // LOCAL CHANGES
 //
diff --git a/tool/net/redbean.c b/tool/net/redbean.c
index c7387bc71..3c97af5ae 100644
--- a/tool/net/redbean.c
+++ b/tool/net/redbean.c
@@ -456,6 +456,7 @@ static bool isexitingworker;
 static bool hasonworkerstart;
 static bool leakcrashreports;
 static bool hasonhttprequest;
+static bool hasonerror;
 static bool ishandlingrequest;
 static bool listeningonport443;
 static bool hasonprocesscreate;
@@ -2532,8 +2533,8 @@ img { vertical-align: middle; }\r\n\
   return p;
 }
 
-static char *ServeErrorImpl(unsigned code, const char *reason,
-                            const char *details) {
+static char *ServeErrorImplDefault(unsigned code, const char *reason,
+                                   const char *details) {
   size_t n;
   char *p, *s;
   struct Asset *a;
@@ -2570,6 +2571,27 @@ static char *ServeErrorImpl(unsigned code, const char *reason,
   }
 }
 
+static char *GetLuaResponse(void) {
+  return cpm.luaheaderp ? cpm.luaheaderp : SetStatus(200, "OK");
+}
+
+static char *ServeErrorImpl(unsigned code, const char *reason,
+                            const char *details) {
+  lua_State *L = GL;
+  if (hasonerror) {
+    lua_getglobal(L, "OnError");
+    lua_pushinteger(L, code);
+    lua_pushstring(L, reason);
+    if (LuaCallWithTrace(L, 2, 0, NULL) == LUA_OK) {
+      return CommitOutput(GetLuaResponse());
+    } else {
+      return ServeErrorImplDefault(code, reason, details);
+    }
+  } else {
+    return ServeErrorImplDefault(code, reason, details);
+  }
+}
+
 static char *ServeErrorWithPath(unsigned code, const char *reason,
                                 const char *path, size_t pathlen) {
   ERRORF("(srvr) server error: %d %s %`'.*s", code, reason, pathlen, path);
@@ -2587,9 +2609,10 @@ static char *ServeError(unsigned code, const char *reason) {
 }
 
 static char *ServeFailure(unsigned code, const char *reason) {
-  ERRORF("(srvr) failure: %d %s %s HTTP%02d %.*s %`'.*s %`'.*s %`'.*s %`'.*s",
-         code, reason, DescribeClient(), cpm.msg.version,
-         cpm.msg.xmethod.b - cpm.msg.xmethod.a, inbuf.p + cpm.msg.xmethod.a,
+  char method[9] = {0};
+  WRITE64LE(method, cpm.msg.method);
+  ERRORF("(srvr) failure: %d %s %s HTTP%02d %s %`'.*s %`'.*s %`'.*s %`'.*s",
+         code, reason, DescribeClient(), cpm.msg.version, method,
          HeaderLength(kHttpHost), HeaderData(kHttpHost),
          cpm.msg.uri.b - cpm.msg.uri.a, inbuf.p + cpm.msg.uri.a,
          HeaderLength(kHttpReferer), HeaderData(kHttpReferer),
@@ -2898,12 +2921,8 @@ static const char *GetSystemUrlLauncherCommand(void) {
 }
 
 static void LaunchBrowser(const char *path) {
-  int pid, ws;
-  struct in_addr addr;
-  const char *u, *prog;
-  sigset_t chldmask, savemask;
-  struct sigaction ignore, saveint, savequit;
   uint16_t port = 80;
+  struct in_addr addr;
   path = firstnonnull(path, "/");
   // use the first server address if there is at least one server
   if (servers.n) {
@@ -2913,42 +2932,7 @@ static void LaunchBrowser(const char *path) {
   // assign a loopback address if no server or unknown server address
   if (!servers.n || !addr.s_addr) addr.s_addr = htonl(INADDR_LOOPBACK);
   if (*path != '/') path = gc(xasprintf("/%s", path));
-  if ((prog = commandv(GetSystemUrlLauncherCommand(), gc(malloc(PATH_MAX)),
-                       PATH_MAX))) {
-    u = gc(xasprintf("http://%s:%d%s", inet_ntoa(addr), port, path));
-    DEBUGF("(srvr) opening browser with command %`'s %s", prog, u);
-    ignore.sa_flags = 0;
-    ignore.sa_handler = SIG_IGN;
-    sigemptyset(&ignore.sa_mask);
-    sigaction(SIGINT, &ignore, &saveint);
-    sigaction(SIGQUIT, &ignore, &savequit);
-    sigemptyset(&chldmask);
-    sigaddset(&chldmask, SIGCHLD);
-    sigprocmask(SIG_BLOCK, &chldmask, &savemask);
-    CHECK_NE(-1, (pid = fork()));
-    if (!pid) {
-      setpgrp();  // ctrl-c'ing redbean shouldn't kill browser
-      sigaction(SIGINT, &saveint, 0);
-      sigaction(SIGQUIT, &savequit, 0);
-      sigprocmask(SIG_SETMASK, &savemask, 0);
-      execv(prog, (char *const[]){(char *)prog, (char *)u, 0});
-      _Exit(127);
-    }
-    while (wait4(pid, &ws, 0, 0) == -1) {
-      CHECK_EQ(EINTR, errno);
-      errno = 0;
-    }
-    sigaction(SIGINT, &saveint, 0);
-    sigaction(SIGQUIT, &savequit, 0);
-    sigprocmask(SIG_SETMASK, &savemask, 0);
-    if (!(WIFEXITED(ws) && WEXITSTATUS(ws) == 0)) {
-      WARNF("(srvr) command %`'s exited with %d", GetSystemUrlLauncherCommand(),
-            WIFEXITED(ws) ? WEXITSTATUS(ws) : 128 + WEXITSTATUS(ws));
-    }
-  } else {
-    WARNF("(srvr) can't launch browser because %`'s isn't installed",
-          GetSystemUrlLauncherCommand());
-  }
+  launch_browser(gc(xasprintf("http://%s:%d%s", inet_ntoa(addr), port, path)));
 }
 
 static char *BadMethod(void) {
@@ -3227,10 +3211,6 @@ static char *ServeIndex(const char *path, size_t pathlen) {
   return p;
 }
 
-static char *GetLuaResponse(void) {
-  return cpm.luaheaderp ? cpm.luaheaderp : SetStatus(200, "OK");
-}
-
 static bool ShouldServeCrashReportDetails(void) {
   uint32_t ip;
   uint16_t port;
@@ -3952,12 +3932,9 @@ static int LuaGetRedbeanVersion(lua_State *L) {
 
 static int LuaGetMethod(lua_State *L) {
   OnlyCallDuringRequest(L, "GetMethod");
-  if (cpm.msg.method) {
-    lua_pushstring(L, kHttpMethod[cpm.msg.method]);
-  } else {
-    lua_pushlstring(L, inbuf.p + cpm.msg.xmethod.a,
-                    cpm.msg.xmethod.b - cpm.msg.xmethod.a);
-  }
+  char method[9] = {0};
+  WRITE64LE(method, cpm.msg.method);
+  lua_pushstring(L, method);
   return 1;
 }
 
@@ -4851,6 +4828,9 @@ static int LuaBlackhole(lua_State *L) {
   return 1;
 }
 
+static void BlockSignals(void) {
+}
+
 wontreturn static void Replenisher(void) {
   struct timespec ts;
   VERBOSEF("(token) replenish worker started");
@@ -5779,6 +5759,7 @@ static void LuaInit(void) {
   }
   if (LuaRunAsset("/.init.lua", true)) {
     hasonhttprequest = IsHookDefined("OnHttpRequest");
+    hasonerror = IsHookDefined("OnError");
     hasonclientconnection = IsHookDefined("OnClientConnection");
     hasonprocesscreate = IsHookDefined("OnProcessCreate");
     hasonprocessdestroy = IsHookDefined("OnProcessDestroy");
@@ -6242,9 +6223,10 @@ static char *HandleRequest(void) {
     LockInc(&shared->c.urisrefused);
     return ServeFailure(400, "Bad URI");
   }
-  INFOF("(req) received %s HTTP%02d %.*s %s %`'.*s %`'.*s", DescribeClient(),
-        cpm.msg.version, cpm.msg.xmethod.b - cpm.msg.xmethod.a,
-        inbuf.p + cpm.msg.xmethod.a, FreeLater(EncodeUrl(&url, 0)),
+  char method[9] = {0};
+  WRITE64LE(method, cpm.msg.method);
+  INFOF("(req) received %s HTTP%02d %s %s %`'.*s %`'.*s", DescribeClient(),
+        cpm.msg.version, method, FreeLater(EncodeUrl(&url, 0)),
         HeaderLength(kHttpReferer), HeaderData(kHttpReferer),
         HeaderLength(kHttpUserAgent), HeaderData(kHttpUserAgent));
   if (HasHeader(kHttpContentType) &&
diff --git a/tool/viz/printpeb.c b/tool/viz/printpeb.c
index 3d3ee1a61..7d9abbc43 100644
--- a/tool/viz/printpeb.c
+++ b/tool/viz/printpeb.c
@@ -188,7 +188,7 @@ dontasan void PrintTeb(void) {
 }
 
 void PrintPeb(void) {
-  struct NtPeb *peb = NtGetPeb();
+  __seg_gs struct NtPeb *peb = NtGetPeb();
   printf("\n\
 ╔──────────────────────────────────────────────────────────────────────────────╗\n\
 │ new technology § peb                                                         │\n\
@@ -327,8 +327,6 @@ void PrintPeb(void) {
          "pShimData", peb->pShimData);
   printf("0x%04x: %-40s = 0x%lx\n", offsetof(struct NtPeb, AppCompatInfo),
          "AppCompatInfo", peb->AppCompatInfo);
-  printf("0x%04x: %-40s = \"%s\"\n", offsetof(struct NtPeb, CSDVersion),
-         "CSDVersion", GetString(&peb->CSDVersion));
   printf("0x%04x: %-40s = 0x%lx\n",
          offsetof(struct NtPeb, ActivationContextData), "ActivationContextData",
          peb->ActivationContextData);
diff --git a/usr/share/zoneinfo/Anchorage b/usr/share/zoneinfo/Anchorage
deleted file mode 100644
index cdf0572be..000000000
Binary files a/usr/share/zoneinfo/Anchorage and /dev/null differ
diff --git a/usr/share/zoneinfo/Anchorage b/usr/share/zoneinfo/Anchorage
new file mode 120000
index 000000000..cafb24b4f
--- /dev/null
+++ b/usr/share/zoneinfo/Anchorage
@@ -0,0 +1 @@
+US/Alaska
\ No newline at end of file
diff --git a/usr/share/zoneinfo/Boulder b/usr/share/zoneinfo/Boulder
deleted file mode 100644
index 7fc669171..000000000
Binary files a/usr/share/zoneinfo/Boulder and /dev/null differ
diff --git a/usr/share/zoneinfo/Boulder b/usr/share/zoneinfo/Boulder
new file mode 120000
index 000000000..8b727a113
--- /dev/null
+++ b/usr/share/zoneinfo/Boulder
@@ -0,0 +1 @@
+US/Mountain
\ No newline at end of file
diff --git a/usr/share/zoneinfo/Chicago b/usr/share/zoneinfo/Chicago
deleted file mode 100644
index 3dd8f0fa8..000000000
Binary files a/usr/share/zoneinfo/Chicago and /dev/null differ
diff --git a/usr/share/zoneinfo/Chicago b/usr/share/zoneinfo/Chicago
new file mode 120000
index 000000000..0c6fef91f
--- /dev/null
+++ b/usr/share/zoneinfo/Chicago
@@ -0,0 +1 @@
+US/Central
\ No newline at end of file
diff --git a/usr/share/zoneinfo/GMT b/usr/share/zoneinfo/GMT
deleted file mode 100644
index c63474664..000000000
Binary files a/usr/share/zoneinfo/GMT and /dev/null differ
diff --git a/usr/share/zoneinfo/GMT b/usr/share/zoneinfo/GMT
new file mode 120000
index 000000000..1ed082089
--- /dev/null
+++ b/usr/share/zoneinfo/GMT
@@ -0,0 +1 @@
+UTC
\ No newline at end of file
diff --git a/usr/share/zoneinfo/GST b/usr/share/zoneinfo/GST
deleted file mode 100644
index c0ce4402f..000000000
Binary files a/usr/share/zoneinfo/GST and /dev/null differ
diff --git a/usr/share/zoneinfo/GST b/usr/share/zoneinfo/GST
new file mode 120000
index 000000000..e2ebd1750
--- /dev/null
+++ b/usr/share/zoneinfo/GST
@@ -0,0 +1 @@
+US/Pacific
\ No newline at end of file
diff --git a/usr/share/zoneinfo/Honolulu b/usr/share/zoneinfo/Honolulu
deleted file mode 100644
index d0c2595f9..000000000
Binary files a/usr/share/zoneinfo/Honolulu and /dev/null differ
diff --git a/usr/share/zoneinfo/Honolulu b/usr/share/zoneinfo/Honolulu
new file mode 120000
index 000000000..16c5c6023
--- /dev/null
+++ b/usr/share/zoneinfo/Honolulu
@@ -0,0 +1 @@
+US/Hawaii
\ No newline at end of file
diff --git a/usr/share/zoneinfo/Israel b/usr/share/zoneinfo/Israel
index 4992a7929..4c49bbf52 100644
Binary files a/usr/share/zoneinfo/Israel and b/usr/share/zoneinfo/Israel differ
diff --git a/usr/share/zoneinfo/New_York b/usr/share/zoneinfo/New_York
deleted file mode 100644
index 7553fee37..000000000
Binary files a/usr/share/zoneinfo/New_York and /dev/null differ
diff --git a/usr/share/zoneinfo/New_York b/usr/share/zoneinfo/New_York
new file mode 120000
index 000000000..b8d5363be
--- /dev/null
+++ b/usr/share/zoneinfo/New_York
@@ -0,0 +1 @@
+US/Eastern
\ No newline at end of file
diff --git a/usr/share/zoneinfo/Singapore b/usr/share/zoneinfo/Singapore
index 785836666..dbbdea3c8 100644
Binary files a/usr/share/zoneinfo/Singapore and b/usr/share/zoneinfo/Singapore differ
diff --git a/usr/share/zoneinfo/US/Alaska b/usr/share/zoneinfo/US/Alaska
new file mode 100644
index 000000000..cdf0572be
Binary files /dev/null and b/usr/share/zoneinfo/US/Alaska differ
diff --git a/usr/share/zoneinfo/US/Aleutian b/usr/share/zoneinfo/US/Aleutian
new file mode 100644
index 000000000..b1497bda6
Binary files /dev/null and b/usr/share/zoneinfo/US/Aleutian differ
diff --git a/usr/share/zoneinfo/US/Arizona b/usr/share/zoneinfo/US/Arizona
new file mode 100644
index 000000000..c2bd2f949
Binary files /dev/null and b/usr/share/zoneinfo/US/Arizona differ
diff --git a/usr/share/zoneinfo/US/Central b/usr/share/zoneinfo/US/Central
new file mode 100644
index 000000000..b01688065
Binary files /dev/null and b/usr/share/zoneinfo/US/Central differ
diff --git a/usr/share/zoneinfo/US/East-Indiana b/usr/share/zoneinfo/US/East-Indiana
new file mode 100644
index 000000000..6b08d15bd
Binary files /dev/null and b/usr/share/zoneinfo/US/East-Indiana differ
diff --git a/usr/share/zoneinfo/US/Eastern b/usr/share/zoneinfo/US/Eastern
new file mode 100644
index 000000000..2b6c2eea1
Binary files /dev/null and b/usr/share/zoneinfo/US/Eastern differ
diff --git a/usr/share/zoneinfo/US/Hawaii b/usr/share/zoneinfo/US/Hawaii
new file mode 100644
index 000000000..40e3d492e
Binary files /dev/null and b/usr/share/zoneinfo/US/Hawaii differ
diff --git a/usr/share/zoneinfo/US/Indiana-Starke b/usr/share/zoneinfo/US/Indiana-Starke
new file mode 100644
index 000000000..b187d5f8c
Binary files /dev/null and b/usr/share/zoneinfo/US/Indiana-Starke differ
diff --git a/usr/share/zoneinfo/US/Michigan b/usr/share/zoneinfo/US/Michigan
new file mode 100644
index 000000000..6eb3ac46e
Binary files /dev/null and b/usr/share/zoneinfo/US/Michigan differ
diff --git a/usr/share/zoneinfo/US/Mountain b/usr/share/zoneinfo/US/Mountain
new file mode 100644
index 000000000..09e54e5c7
Binary files /dev/null and b/usr/share/zoneinfo/US/Mountain differ
diff --git a/usr/share/zoneinfo/US/Pacific b/usr/share/zoneinfo/US/Pacific
new file mode 100644
index 000000000..aaf07787a
Binary files /dev/null and b/usr/share/zoneinfo/US/Pacific differ
diff --git a/usr/share/zoneinfo/US/Samoa b/usr/share/zoneinfo/US/Samoa
new file mode 100644
index 000000000..001289cee
Binary files /dev/null and b/usr/share/zoneinfo/US/Samoa differ
diff --git a/usr/share/zoneinfo/UTC b/usr/share/zoneinfo/UTC
index c3b97f1a1..157573b1d 100644
Binary files a/usr/share/zoneinfo/UTC and b/usr/share/zoneinfo/UTC differ