diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json index 4aad63c52..2544b3cf8 100644 --- a/.vscode/c_cpp_properties.json +++ b/.vscode/c_cpp_properties.json @@ -52,7 +52,6 @@ "relegated=", "hidden=", "textstartup=", - "initarray=", "returnsnonnull=", "returnspointerwithnoaliases=", "printfesque(x)=", diff --git a/Makefile b/Makefile index 0f6122670..1186b7674 100644 --- a/Makefile +++ b/Makefile @@ -133,10 +133,10 @@ endif ifneq ($(findstring aarch64,$(MODE)),) ARCH = aarch64 -HOSTS ?= pi pi5 studio freebsdarm +HOSTS ?= pi studio freebsdarm else ARCH = x86_64 -HOSTS ?= freebsd rhel7 xnu win10 openbsd netbsd meatball nightmare +HOSTS ?= freebsd rhel7 xnu openbsd netbsd win10 endif ZIPOBJ_FLAGS += -a$(ARCH) @@ -149,9 +149,9 @@ export MODE export SOURCE_DATE_EPOCH export TMPDIR -COSMOCC = .cosmocc/3.2 +COSMOCC = .cosmocc/3.3 TOOLCHAIN = $(COSMOCC)/bin/$(ARCH)-linux-cosmo- -DOWNLOAD := $(shell build/download-cosmocc.sh $(COSMOCC) 3.2 28b48682595f0f46b45ab381118cdffdabc8fcfa29aa54e301fe6ffe35269f5e) +DOWNLOAD := $(shell build/download-cosmocc.sh $(COSMOCC) 3.3 d26ec8f4e48f6db004fc6a9677c7ff3b50c3b21e936e9393158aa2ed51b0b549) AS = $(TOOLCHAIN)as CC = $(TOOLCHAIN)gcc @@ -206,8 +206,7 @@ endif .UNVEIL += \ libc/integral \ libc/stdbool.h \ - libc/disclaimer.inc \ - rwc:/dev/shm \ + rwc:/dev/shm \ rx:.cosmocc \ rx:build/bootstrap \ r:build/portcosmo.h \ diff --git a/ape/BUILD.mk b/ape/BUILD.mk index 106504d02..afddd13b6 100644 --- a/ape/BUILD.mk +++ b/ape/BUILD.mk @@ -157,7 +157,6 @@ o/$(MODE)/ape/ape-no-modify-self.o: \ ape/ape.S \ ape/ape.h \ ape/macros.internal.h \ - ape/notice.inc \ ape/relocations.h \ ape/ape.internal.h \ libc/dce.h \ @@ -185,7 +184,6 @@ o/$(MODE)/ape/ape-copy-self.o: \ ape/ape.S \ ape/ape.h \ ape/macros.internal.h \ - ape/notice.inc \ ape/relocations.h \ ape/ape.internal.h \ libc/dce.h \ diff --git a/ape/aarch64.lds b/ape/aarch64.lds index 9866ac748..cec91ae89 100644 --- a/ape/aarch64.lds +++ b/ape/aarch64.lds @@ -89,10 +89,12 @@ SECTIONS { *(.ubsan.data) } - .comment : { - __comment_start = .; - KEEP(*(.comment)) + .notice : { + __notices = .; + KEEP(*(.notice)) BYTE(0); + BYTE(10); + BYTE(10); } .eh_frame_hdr : { @@ -157,8 +159,11 @@ SECTIONS { .init_array : { __init_array_start = .; - KEEP(*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) - KEEP(*(.init_array EXCLUDE_FILE(*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors)) + KEEP(*(.preinit_array)) + KEEP(*(SORT_BY_INIT_PRIORITY(.init_array.*) + SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP(*(.init_array)) + KEEP(*(.ctors)) __init_array_end = .; } diff --git a/ape/ape.S b/ape/ape.S index 80e788570..eaf0bfaab 100644 --- a/ape/ape.S +++ b/ape/ape.S @@ -33,7 +33,6 @@ │ αcτµαlly pδrταblε εxεcµταblε § program header │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "ape/macros.internal.h" -#include "ape/notice.inc" #include "ape/relocations.h" #include "libc/calls/metalfile.internal.h" #include "libc/dce.h" @@ -1772,49 +1771,31 @@ kernel: movabs $ape_stack_vaddr,%rsp .type ape_text_nops,@object .type __test_end,@object - .section .commentprologue,"a",@progbits - .globl __comment_start - .type __comment_start,@object - .hidden __comment_start -__comment_start:/* - ... - decentralized content - ... - */.previous - .section .commentepilogue,"a",@progbits - .byte 0 - .previous - .section .ape.pad.head,"a",@progbits .type ape_pad_head,@object .hidden ape_pad_head ape_pad_head: - .previous .section .ape.pad.text,"a",@progbits .type ape_pad_text,@object .hidden ape_pad_text ape_pad_text: - .previous .section .ape.pad.privileged,"a",@progbits .type ape_pad_privileged,@object .hidden ape_pad_privileged ape_pad_privileged: - .previous .section .ape.pad.data,"a",@progbits .type ape_pad_data,@object .hidden ape_pad_data ape_pad_data: - .previous #if SupportsWindows() .section .idata.ro,"a",@progbits .type ape_idata_ro,@object .hidden ape_idata_ro ape_idata_ro: - .previous #endif /* SupportsWindows() */ .section .dataprologue,"aw",@progbits @@ -1822,32 +1803,45 @@ ape_idata_ro: .globl __data_start .hidden __data_start __data_start: - .previous .section .dataepilogue,"aw",@progbits .type __data_end,@object .globl __data_end .hidden __data_end __data_end: - .previous .section .bssprologue,"aw",@nobits .type __bss_start,@object .globl __bss_start .hidden __bss_start __bss_start: - .previous .section .bssepilogue,"aw",@nobits .type __bss_end,@object .globl __bss_end .hidden __bss_end __bss_end: - .previous .section .fstls,"awT",@nobits .align TLS_ALIGNMENT - .previous + + .section .notice,"aR",@progbits + .asciz "\n\n\ +Cosmopolitan\n\ +Copyright 2024 Justine Alexandra Roberts Tunney\n\ +\n\ +Permission to use, copy, modify, and/or distribute this software for\n\ +any purpose with or without fee is hereby granted, provided that the\n\ +above copyright notice and this permission notice appear in all copies.\n\ +\n\ +THE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL\n\ +WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED\n\ +WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE\n\ +AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL\n\ +DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR\n\ +PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER\n\ +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR\n\ +PERFORMANCE OF THIS SOFTWARE." .end  \ No newline at end of file diff --git a/ape/ape.lds b/ape/ape.lds index 1a39e2dc1..d0a27da6b 100644 --- a/ape/ape.lds +++ b/ape/ape.lds @@ -330,11 +330,11 @@ SECTIONS { *(.ubsan.data) /* Legal Notices */ -#if !defined(IM_FEELING_NAUGHTY) || defined(EMBED_NOTICES) - KEEP(*(.commentprologue)) - KEEP(*(.comment)) - KEEP(*(.commentepilogue)) -#endif + __notices = .; + KEEP(*(.notice)) + BYTE(0); + BYTE(10); + BYTE(10); /*BEGIN: read-only data that's only needed for initialization */ @@ -390,6 +390,9 @@ SECTIONS { /*BEGIN: NT FORK COPYING */ KEEP(*(.dataprologue)) *(.data .data.*) + *(.gnu_extab) + *(.gcc_except_table .gcc_except_table.*) + *(.exception_ranges*) *(.PyRuntime) /* for python */ *(.subrs) /* for emacs */ KEEP(*(SORT_BY_NAME(.sort.data.*))) @@ -404,11 +407,11 @@ SECTIONS { . = ALIGN(. != 0 ? __SIZEOF_POINTER__ : 0); __init_array_start = .; + KEEP(*(.preinit_array)) KEEP(*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) - KEEP(*(.ctors)) KEEP(*(.init_array)) - KEEP(*(.preinit_array)) + KEEP(*(.ctors)) __init_array_end = .; . = ALIGN(. != 0 ? __SIZEOF_POINTER__ : 0); @@ -425,8 +428,8 @@ SECTIONS { . = ALIGN(. != 0 ? __SIZEOF_POINTER__ : 0); KEEP(*(SORT_BY_NAME(.piro.data.sort.*))) KEEP(*(.piro.pad.data)) - KEEP(*(.dataepilogue)) *(.igot.plt) + KEEP(*(.dataepilogue)) . = ALIGN(. != 0 ? CONSTANT(COMMONPAGESIZE) : 0); /*END: NT FORK COPYING */ diff --git a/ape/sections.internal.h b/ape/sections.internal.h index e99503b7e..6bc8cc312 100644 --- a/ape/sections.internal.h +++ b/ape/sections.internal.h @@ -2,7 +2,7 @@ #define COSMOPOLITAN_APE_SECTIONS_INTERNAL_H_ COSMOPOLITAN_C_START_ -extern const char __comment_start[] __attribute__((__weak__)); +extern const char __notices[] __attribute__((__weak__)); extern unsigned char __executable_start[] __attribute__((__weak__)); extern unsigned char __privileged_start[] __attribute__((__weak__)); extern unsigned char _ehead[] __attribute__((__weak__)); diff --git a/build/bootstrap/ape.aarch64 b/build/bootstrap/ape.aarch64 index 65fe2cc16..27fd3d7fb 100755 Binary files a/build/bootstrap/ape.aarch64 and b/build/bootstrap/ape.aarch64 differ diff --git a/build/bootstrap/ape.elf b/build/bootstrap/ape.elf index f56c3ef8e..6a87fd728 100755 Binary files a/build/bootstrap/ape.elf and b/build/bootstrap/ape.elf differ diff --git a/build/bootstrap/ape.macho b/build/bootstrap/ape.macho index 2b887117c..42b153c6a 100755 Binary files a/build/bootstrap/ape.macho and b/build/bootstrap/ape.macho differ diff --git a/build/bootstrap/fixupobj.com b/build/bootstrap/fixupobj.com index fefcebd01..fb2db3588 100755 Binary files a/build/bootstrap/fixupobj.com and b/build/bootstrap/fixupobj.com differ diff --git a/build/definitions.mk b/build/definitions.mk index 6950d5667..9c3b3d6e6 100644 --- a/build/definitions.mk +++ b/build/definitions.mk @@ -138,6 +138,7 @@ MATHEMATICAL = \ DEFAULT_CPPFLAGS += \ -D_COSMO_SOURCE \ -DMODE='"$(MODE)"' \ + -Wno-prio-ctor-dtor \ -Wno-unknown-pragmas \ -nostdinc \ -iquote. \ @@ -164,6 +165,7 @@ DEFAULT_LDFLAGS = \ -nostdlib \ -znorelro \ --gc-sections \ + -z noexecstack \ --build-id=none \ --no-dynamic-linker diff --git a/build/objdump b/build/objdump index 32a4e218a..3a022d3aa 100755 --- a/build/objdump +++ b/build/objdump @@ -6,14 +6,14 @@ if [ -n "$OBJDUMP" ]; then fi find_objdump() { - if [ -x .cosmocc/3.2/bin/$1-linux-cosmo-objdump ]; then - OBJDUMP=.cosmocc/3.2/bin/$1-linux-cosmo-objdump - elif [ -x .cosmocc/3.2/bin/$1-linux-musl-objdump ]; then - OBJDUMP=.cosmocc/3.2/bin/$1-linux-musl-objdump - elif [ -x "$COSMO/.cosmocc/3.2/bin/$1-linux-cosmo-objdump" ]; then - OBJDUMP="$COSMO/.cosmocc/3.2/bin/$1-linux-cosmo-objdump" - elif [ -x "$COSMO/.cosmocc/3.2/bin/$1-linux-musl-objdump" ]; then - OBJDUMP="$COSMO/.cosmocc/3.2/bin/$1-linux-musl-objdump" + if [ -x .cosmocc/3.3/bin/$1-linux-cosmo-objdump ]; then + OBJDUMP=.cosmocc/3.3/bin/$1-linux-cosmo-objdump + elif [ -x .cosmocc/3.3/bin/$1-linux-musl-objdump ]; then + OBJDUMP=.cosmocc/3.3/bin/$1-linux-musl-objdump + elif [ -x "$COSMO/.cosmocc/3.3/bin/$1-linux-cosmo-objdump" ]; then + OBJDUMP="$COSMO/.cosmocc/3.3/bin/$1-linux-cosmo-objdump" + elif [ -x "$COSMO/.cosmocc/3.3/bin/$1-linux-musl-objdump" ]; then + OBJDUMP="$COSMO/.cosmocc/3.3/bin/$1-linux-musl-objdump" else echo "error: toolchain not found (try running 'cosmocc --update' or 'make' in the cosmo monorepo)" >&2 exit 1 diff --git a/dsp/mpeg/buffer.c b/dsp/mpeg/buffer.c index 76ed2be36..f0cb0de38 100644 --- a/dsp/mpeg/buffer.c +++ b/dsp/mpeg/buffer.c @@ -35,12 +35,7 @@ #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/sysv/consts/madv.h" - -asm(".ident\t\"\\n\\n\ -PL_MPEG (MIT License)\\n\ -Copyright(c) 2019 Dominic Szablewski\\n\ -https://phoboslab.org\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("pl_mpeg_notice"); /* clang-format off */ // ----------------------------------------------------------------------------- diff --git a/dsp/mpeg/demux.c b/dsp/mpeg/demux.c index 4ded810e5..66eff844a 100644 --- a/dsp/mpeg/demux.c +++ b/dsp/mpeg/demux.c @@ -27,17 +27,12 @@ │ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE │ │ SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "dsp/mpeg/buffer.h" #include "dsp/mpeg/demux.h" +#include "dsp/mpeg/buffer.h" #include "dsp/mpeg/mpeg.h" #include "libc/mem/mem.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -PL_MPEG (MIT License)\\n\ -Copyright(c) 2019 Dominic Szablewski\\n\ -https://phoboslab.org\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("pl_mpeg_notice"); /* clang-format off */ // ---------------------------------------------------------------------------- diff --git a/dsp/mpeg/idct.c b/dsp/mpeg/idct.c index 87c17ae6a..11312607e 100644 --- a/dsp/mpeg/idct.c +++ b/dsp/mpeg/idct.c @@ -28,12 +28,7 @@ │ SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "dsp/core/half.h" - -asm(".ident\t\"\\n\\n\ -PL_MPEG (MIT License)\\n\ -Copyright(c) 2019 Dominic Szablewski\\n\ -https://phoboslab.org\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("pl_mpeg_notice"); /** * Computes Fixed-Point 8x8 Inverse Discrete Cosine Transform. diff --git a/dsp/mpeg/mp2.c b/dsp/mpeg/mp2.c index 88a5a0c10..53fc91a23 100644 --- a/dsp/mpeg/mp2.c +++ b/dsp/mpeg/mp2.c @@ -33,12 +33,6 @@ #include "libc/mem/mem.h" #include "libc/str/str.h" -asm(".ident\t\"\\n\\n\ -PL_MPEG (MIT License)\\n\ -Copyright(c) 2019 Dominic Szablewski\\n\ -https://phoboslab.org\""); -asm(".include \"libc/disclaimer.inc\""); - /* clang-format off */ // ----------------------------------------------------------------------------- // plm_audio implementation diff --git a/dsp/mpeg/mpeg1.c b/dsp/mpeg/mpeg1.c index f9c29910b..905af23da 100644 --- a/dsp/mpeg/mpeg1.c +++ b/dsp/mpeg/mpeg1.c @@ -41,12 +41,7 @@ #include "libc/str/str.h" #include "libc/time/time.h" #include "libc/x/x.h" - -asm(".ident\t\"\\n\\n\ -PL_MPEG (MIT License)\\n\ -Copyright(c) 2019 Dominic Szablewski\\n\ -https://phoboslab.org\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("pl_mpeg_notice"); // ----------------------------------------------------------------------------- // plm_video implementation @@ -1104,7 +1099,7 @@ plm_video_t *plm_video_create_with_buffer(plm_buffer_t *buffer, return self; } -static textstartup void plm_video_init(void) { +__attribute__((__constructor__)) static textstartup void plm_video_init(void) { PLM_VIDEO_MACROBLOCK_TYPE[0] = NULL; PLM_VIDEO_MACROBLOCK_TYPE[1] = (void *)PLM_VIDEO_MACROBLOCK_TYPE_INTRA; PLM_VIDEO_MACROBLOCK_TYPE[2] = (void *)PLM_VIDEO_MACROBLOCK_TYPE_PREDICTIVE; @@ -1113,5 +1108,3 @@ static textstartup void plm_video_init(void) { PLM_VIDEO_DCT_SIZE[1] = (void *)PLM_VIDEO_DCT_SIZE_CHROMINANCE; PLM_VIDEO_DCT_SIZE[2] = (void *)PLM_VIDEO_DCT_SIZE_CHROMINANCE; } - -const void *const plm_video_init_ctor[] initarray = {plm_video_init}; diff --git a/dsp/mpeg/notice.c b/dsp/mpeg/notice.c new file mode 100644 index 000000000..264a7549b --- /dev/null +++ b/dsp/mpeg/notice.c @@ -0,0 +1,4 @@ +__notice(pl_mpeg_notice, "\ +PL_MPEG (MIT License)\n\ +Copyright(c) 2019 Dominic Szablewski\n\ +https://phoboslab.org"); diff --git a/dsp/mpeg/plm.c b/dsp/mpeg/plm.c index f4f1dc504..7704643ff 100644 --- a/dsp/mpeg/plm.c +++ b/dsp/mpeg/plm.c @@ -32,12 +32,7 @@ #include "libc/mem/mem.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -PL_MPEG (MIT License)\\n\ -Copyright(c) 2019 Dominic Szablewski\\n\ -https://phoboslab.org\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("pl_mpeg_notice"); /* clang-format off */ // ----------------------------------------------------------------------------- diff --git a/dsp/mpeg/slowrgb.c b/dsp/mpeg/slowrgb.c index 16c819c38..7472d82f3 100644 --- a/dsp/mpeg/slowrgb.c +++ b/dsp/mpeg/slowrgb.c @@ -29,12 +29,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "dsp/mpeg/mpeg.h" #include "libc/macros.internal.h" - -asm(".ident\t\"\\n\\n\ -PL_MPEG (MIT License)\\n\ -Copyright(c) 2019 Dominic Szablewski\\n\ -https://phoboslab.org\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("pl_mpeg_notice"); /** * @see YCbCr2RGB() in tool/viz/lib/ycbcr2rgb.c diff --git a/dsp/scale/magikarp.c b/dsp/scale/magikarp.c index d8f83dd80..ea5aa7d55 100644 --- a/dsp/scale/magikarp.c +++ b/dsp/scale/magikarp.c @@ -121,8 +121,7 @@ void *MagikarpY(long dys, long dxs, unsigned char d[restrict dys][dxs], return d; } -static textstartup void g_magikarp_init() { +__attribute__((__constructor__)) static textstartup void g_magikarp_init() { memcpy(g_magkern, kMagkern[0], sizeof(g_magkern)); memcpy(g_magikarp, kMagikarp[0], sizeof(g_magikarp)); } -const void *const g_magikarp_ctor[] initarray = {g_magikarp_init}; diff --git a/dsp/tty/itoa8.c b/dsp/tty/itoa8.c index bf968728e..57c903b69 100644 --- a/dsp/tty/itoa8.c +++ b/dsp/tty/itoa8.c @@ -21,7 +21,7 @@ struct Itoa8 kItoa8; -static textstartup void itoa8_init(void) { +__attribute__((__constructor__)) static textstartup void itoa8_init(void) { int i; uint8_t z; uint32_t w; @@ -41,5 +41,3 @@ static textstartup void itoa8_init(void) { kItoa8.data[i] = w; } } - -const void *const itoa8_init_ctor[] initarray = {itoa8_init}; diff --git a/dsp/tty/rgb2ansi.c b/dsp/tty/rgb2ansi.c index 053aa4ebb..baece9e8a 100644 --- a/dsp/tty/rgb2ansi.c +++ b/dsp/tty/rgb2ansi.c @@ -97,7 +97,7 @@ static int uncube(int x) { return x < 48 ? 0 : x < 115 ? 1 : (x - 35) / 40; } -static textstartup void rgb2ansi_init(void) { +__attribute__((__constructor__)) static textstartup void rgb2ansi_init(void) { uint8_t c; uint32_t i; memcpy(g_ansi2rgb_, &kCgaPalette, sizeof(kCgaPalette)); @@ -114,5 +114,3 @@ static textstartup void rgb2ansi_init(void) { g_ansi2rgb_[i].xt = i; } } - -const void *const rgb2ansi_init_ctor[] initarray = {rgb2ansi_init}; diff --git a/dsp/tty/ttyquant.c b/dsp/tty/ttyquant.c index a0cee959e..0e9a0e16e 100644 --- a/dsp/tty/ttyquant.c +++ b/dsp/tty/ttyquant.c @@ -73,8 +73,6 @@ textstartup void ttyquantsetup(enum TtyQuantizationAlgorithm alg, TTYQUANT()->blocks = blocks; } -textstartup void ttyquant_init(void) { +__attribute__((__constructor__)) textstartup void ttyquant_init(void) { ttyquantsetup(kTtyQuantXterm256, kTtyQuantRgb, kTtyBlocksUnicode); } - -const void *const ttyquant_init_ctor[] initarray = {ttyquant_init}; diff --git a/examples/kilo.c b/examples/kilo.c index 7cc1ddefb..9645634e1 100644 --- a/examples/kilo.c +++ b/examples/kilo.c @@ -33,11 +33,10 @@ │ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -asm(".ident \"\n\ +__notice(kilo_notice, "\ Kilo ─ A very simple editor (BSD-2)\n\ Copyright 2016 Salvatore Sanfilippo\n\ -Contact: antirez@gmail.com\"\n\ -.include \"libc/disclaimer.inc\""); +Contact: antirez@gmail.com"); /* * This software has been modified by Justine Tunney to: diff --git a/examples/script.c b/examples/script.c index 3ea419caa..e840aca8a 100644 --- a/examples/script.c +++ b/examples/script.c @@ -67,13 +67,10 @@ * @see https://asciinema.org/ */ -asm(".ident\t\"\\n\\n\ -FreeBSD Script (BSD-3 License)\\n\ -Copyright (c) 2010, 2012 David E. O'Brien\\n\ -Copyright (c) 1980, 1992, 1993\\n\ -\tThe Regents of the University of California.\\n\ -\tAll rights reserved.\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(freebsd_script_notice, "\ +FreeBSD Script (BSD-3 License)\n\ +Copyright (c) 2010, 2012 David E. O'Brien\n\ +Copyright (c) 1980, 1992, 1993 The Regents of the University of California"); #define DEF_BUF 65536 diff --git a/examples/whois.c b/examples/whois.c index bae2bc359..95154787f 100644 --- a/examples/whois.c +++ b/examples/whois.c @@ -48,12 +48,9 @@ #include "third_party/musl/netdb.h" // clang-format off -asm(".ident\t\"\\n\\n\ -FreeBSD Whois (BSD-3 License)\\n\ -Copyright (c) 1980, 1993\\n\ -\tThe Regents of the University of California.\\n\ -\tAll rights reserved.\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(freebsd_whois_notice, "\ +FreeBSD Whois (BSD-3 License)\n\ +Copyright (c) 1980, 1993 The Regents of the University of California"); #define ABUSEHOST "whois.abuse.net" #define ANICHOST "whois.arin.net" diff --git a/libc/calls/BUILD.mk b/libc/calls/BUILD.mk index 7f1611f9a..a12d7f3e6 100644 --- a/libc/calls/BUILD.mk +++ b/libc/calls/BUILD.mk @@ -73,12 +73,10 @@ $(LIBC_CALLS_A_OBJS): private \ -Wframe-larger-than=4096 \ -Walloca-larger-than=4096 -ifneq ($(ARCH), aarch64) # we always want -O3 because: # it makes the code size smaller too -# we need -mstringop-strategy=loop because: -# privileged code might generate memcpy call o/$(MODE)/libc/calls/termios2host.o \ +o/$(MODE)/libc/calls/siginfo2cosmo.o \ o/$(MODE)/libc/calls/sigenter-freebsd.o \ o/$(MODE)/libc/calls/sigenter-netbsd.o \ o/$(MODE)/libc/calls/sigenter-openbsd.o \ @@ -86,6 +84,19 @@ o/$(MODE)/libc/calls/sigenter-xnu.o \ o/$(MODE)/libc/calls/ntcontext2linux.o: private \ COPTS += \ -O3 \ + -ffreestanding + +ifeq ($(ARCH), x86_64) +# we need -mstringop-strategy=loop because: +# privileged code might generate memcpy call +o/$(MODE)/libc/calls/termios2host.o \ +o/$(MODE)/libc/calls/siginfo2cosmo.o \ +o/$(MODE)/libc/calls/sigenter-freebsd.o \ +o/$(MODE)/libc/calls/sigenter-netbsd.o \ +o/$(MODE)/libc/calls/sigenter-openbsd.o \ +o/$(MODE)/libc/calls/sigenter-xnu.o \ +o/$(MODE)/libc/calls/ntcontext2linux.o: private \ + COPTS += \ -mstringop-strategy=loop endif diff --git a/libc/calls/clock_gettime-nt.c b/libc/calls/clock_gettime-nt.c index 8ebef10b6..5a6464e42 100644 --- a/libc/calls/clock_gettime-nt.c +++ b/libc/calls/clock_gettime-nt.c @@ -87,13 +87,9 @@ textwindows int sys_clock_gettime_nt(int clock, struct timespec *ts) { } } -static textstartup void winclock_init() { +__attribute__((__constructor__(40))) static textstartup void winclock_init() { if (IsWindows()) { QueryPerformanceCounter(&g_winclock.base); QueryPerformanceFrequency(&g_winclock.freq); } } - -const void *const winclock_ctor[] initarray = { - winclock_init, -}; diff --git a/libc/calls/fchmodat-linux.c b/libc/calls/fchmodat-linux.c index 3036be97f..1640eb636 100644 --- a/libc/calls/fchmodat-linux.c +++ b/libc/calls/fchmodat-linux.c @@ -32,11 +32,8 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/s.h" #include "libc/sysv/errfuns.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); // clang-format off static void __procfdname(char *buf, unsigned fd) { diff --git a/libc/calls/finddebugbinary.c b/libc/calls/finddebugbinary.c index b4bae854e..3ca5bd1c0 100644 --- a/libc/calls/finddebugbinary.c +++ b/libc/calls/finddebugbinary.c @@ -27,6 +27,7 @@ #include "libc/elf/tinyelf.internal.h" #include "libc/errno.h" #include "libc/intrin/directmap.internal.h" +#include "libc/intrin/kprintf.h" #include "libc/nt/memory.h" #include "libc/nt/runtime.h" #include "libc/runtime/runtime.h" diff --git a/libc/calls/getloadavg-nt.c b/libc/calls/getloadavg-nt.c index 30f940de0..4e8d6d847 100644 --- a/libc/calls/getloadavg-nt.c +++ b/libc/calls/getloadavg-nt.c @@ -59,7 +59,7 @@ textwindows int sys_getloadavg_nt(double *a, int n) { return rc; } -static textstartup void sys_getloadavg_nt_init(void) { +__attribute__((__constructor__(40))) static textstartup void ntinitload(void) { if (IsWindows()) { load = 1; cpus = __get_cpu_count() / 2; @@ -67,7 +67,3 @@ static textstartup void sys_getloadavg_nt_init(void) { GetSystemTimes(&idle1, &kern1, &user1); } } - -const void *const sys_getloadavg_nt_ctor[] initarray = { - sys_getloadavg_nt_init, -}; diff --git a/libc/calls/getrandom.c b/libc/calls/getrandom.c index 7c9a7e2f2..cdc7b13e5 100644 --- a/libc/calls/getrandom.c +++ b/libc/calls/getrandom.c @@ -190,7 +190,8 @@ ssize_t getrandom(void *p, size_t n, unsigned f) { return rc; } -__attribute__((__constructor__)) static textstartup void getrandom_init(void) { +__attribute__((__constructor__(30))) static textstartup void getrandom_init( + void) { int e, rc; if (IsWindows() || IsMetal()) return; BLOCK_CANCELATION; diff --git a/libc/calls/program_invocation_short_name.c b/libc/calls/program_invocation_short_name.c index 8299de395..396d55f47 100644 --- a/libc/calls/program_invocation_short_name.c +++ b/libc/calls/program_invocation_short_name.c @@ -21,7 +21,7 @@ char *program_invocation_short_name; -__attribute__((__constructor__)) static void // +__attribute__((__constructor__(10))) static textstartup void program_invocation_short_name_init(void) { char *p, *r; if (!__argc) return; diff --git a/libc/calls/rdrand_init.c b/libc/calls/rdrand_init.c index 9751016bf..a3b28ae07 100644 --- a/libc/calls/rdrand_init.c +++ b/libc/calls/rdrand_init.c @@ -22,8 +22,8 @@ #include "libc/nexgen32e/x86info.h" #include "libc/stdio/rand.h" -textstartup void rdrand_init(int argc, char **argv, char **envp, - intptr_t *auxv) { +__attribute__((__constructor__(2))) textstartup void rdrand_init( + int argc, char **argv, char **envp, intptr_t *auxv) { extern unsigned kMutableCpuids[KCPUIDS_LEN][4] asm("kCpuids"); /* * Clear RDRAND on AMD models before Zen and then some @@ -39,5 +39,3 @@ textstartup void rdrand_init(int argc, char **argv, char **envp, kMutableCpuids[KCPUIDS_7H][KCPUIDS_EBX] &= ~(1u << 18); } } - -const void *const g_rdrand_init[] initarray = {rdrand_init}; diff --git a/libc/calls/sig.c b/libc/calls/sig.c index 936ed99d4..cab11bec7 100644 --- a/libc/calls/sig.c +++ b/libc/calls/sig.c @@ -32,6 +32,7 @@ #include "libc/intrin/bsf.h" #include "libc/intrin/describebacktrace.internal.h" #include "libc/intrin/dll.h" +#include "libc/intrin/kprintf.h" #include "libc/intrin/strace.internal.h" #include "libc/intrin/weaken.h" #include "libc/nt/console.h" @@ -517,6 +518,15 @@ static int __sig_crash_sig(struct NtExceptionPointers *ep, int *code) { } } +static char *__sig_stpcpy(char *d, const char *s) { + size_t i; + for (i = 0;; ++i) { + if (!(d[i] = s[i])) { + return d + i; + } + } +} + static void __sig_unmaskable(struct NtExceptionPointers *ep, int code, int sig, struct CosmoTib *tib) { @@ -540,9 +550,10 @@ static void __sig_unmaskable(struct NtExceptionPointers *ep, int code, int sig, intptr_t hStderr; char sigbuf[21], s[128], *p; hStderr = GetStdHandle(kNtStdErrorHandle); - p = stpcpy(s, "Terminating on uncaught "); - p = stpcpy(p, strsignal_r(sig, sigbuf)); - p = stpcpy(p, ". Pass --strace and/or ShowCrashReports() for details.\n"); + p = __sig_stpcpy(s, "Terminating on uncaught "); + p = __sig_stpcpy(p, strsignal_r(sig, sigbuf)); + p = __sig_stpcpy( + p, ". Pass --strace and/or ShowCrashReports() for details.\n"); WriteFile(hStderr, s, p - s, 0, 0); #endif __sig_terminate(sig); @@ -656,12 +667,10 @@ textwindows int __sig_check(void) { } } -textstartup void __sig_init(void) { +__attribute__((__constructor__(10))) textstartup void __sig_init(void) { if (!IsWindows()) return; AddVectoredExceptionHandler(true, (void *)__sig_crash); SetConsoleCtrlHandler((void *)__sig_console, true); } -const void *const __sig_ctor[] initarray = {__sig_init}; - #endif /* __x86_64__ */ diff --git a/libc/calls/unveil.c b/libc/calls/unveil.c index 5eae25662..d47c84889 100644 --- a/libc/calls/unveil.c +++ b/libc/calls/unveil.c @@ -100,7 +100,7 @@ static const struct sock_filter kUnveilBlacklistLatestAbi[] = { static int landlock_abi_version; static int landlock_abi_errno; -__attribute__((__constructor__)) void init_landlock_version() { +__attribute__((__constructor__(40))) textstartup void init_landlock_version() { int e = errno; landlock_abi_version = landlock_create_ruleset(0, 0, LANDLOCK_CREATE_RULESET_VERSION); diff --git a/libc/crt/crt.S b/libc/crt/crt.S index f55042d09..69c38493e 100644 --- a/libc/crt/crt.S +++ b/libc/crt/crt.S @@ -18,7 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dce.h" #include "libc/macros.internal.h" -#include "libc/notice.inc" .section .start,"ax",@progbits #if SupportsXnu() && defined(__x86_64__) diff --git a/libc/disclaimer.inc b/libc/disclaimer.inc deleted file mode 100644 index e69de29bb..000000000 diff --git a/libc/dlopen/dlopen.c b/libc/dlopen/dlopen.c index 9dcda52f6..ae5af3e40 100644 --- a/libc/dlopen/dlopen.c +++ b/libc/dlopen/dlopen.c @@ -920,17 +920,3 @@ char *cosmo_dlerror(void) { STRACE("dlerror() → %#s", res); return res; } - -#ifdef __x86_64__ -static textstartup void dlopen_init() { - if (IsLinux() || IsFreebsd()) { - // switch from %fs to %gs for tls - struct CosmoTib *tib = __get_tls(); - __morph_tls(); - __set_tls(tib); - } -} -const void *const dlopen_ctor[] initarray = { - dlopen_init, -}; -#endif diff --git a/libc/dlopen/foreign_tramp.S b/libc/dlopen/foreign_tramp.S index f44c2edbe..dbd036306 100644 --- a/libc/dlopen/foreign_tramp.S +++ b/libc/dlopen/foreign_tramp.S @@ -55,7 +55,7 @@ foreign_tramp: mov %rax,-0xc0(%rbp) // switch to foreign tls - mov %fs:0,%rax + mov %gs:0x30,%rax mov %rax,-0xc8(%rbp) mov __foreign+8(%rip),%rdi call __set_tls diff --git a/libc/integral/c.inc b/libc/integral/c.inc index b5870b7fa..a1923bd64 100644 --- a/libc/integral/c.inc +++ b/libc/integral/c.inc @@ -639,12 +639,6 @@ void abort(void) wontreturn; #define _Section(s) __attribute__((__section__(s))) #endif -#if defined(__x86_64__) && !defined(__llvm__) -#define initarray _Section(".init_array,\"a\",@init_array #") -#else -#define initarray _Section(".init_array") -#endif - #ifndef __llvm__ #pragma GCC diagnostic ignored "-Wformat=0" /* todo: patch gcc */ #pragma GCC diagnostic ignored "-Wbuiltin-declaration-mismatch" @@ -758,17 +752,21 @@ void abort(void) wontreturn; #endif #if defined(__x86_64__) && (defined(__GNUC__) || defined(__llvm__)) && \ - !defined(__chibicc__) && 0 /* TODO: enable with toolchain upgrade */ + !defined(__chibicc__) && defined(__OPTIMIZE__) #define __target_clones(x) __attribute__((__target_clones__(x ",default"))) #else #define __target_clones(x) #endif -#if !defined(TINY) && !defined(__AVX__) && 0 +#if !defined(TINY) && !defined(__AVX__) #define __vex __target_clones("avx") #else #define __vex #endif +#define __notice(sym, str) \ + __attribute__((__section__(".notice"), __aligned__(1))) const char sym[] = \ + "\n\n" str + #define MACHINE_CODE_ANALYSIS_BEGIN_ #define MACHINE_CODE_ANALYSIS_END_ diff --git a/libc/integral/normalize.inc b/libc/integral/normalize.inc index e9cba82d8..1291b101d 100644 --- a/libc/integral/normalize.inc +++ b/libc/integral/normalize.inc @@ -3,8 +3,8 @@ #endif #define __COSMOPOLITAN_MAJOR__ 3 -#define __COSMOPOLITAN_MINOR__ 2 -#define __COSMOPOLITAN_PATCH__ 4 +#define __COSMOPOLITAN_MINOR__ 3 +#define __COSMOPOLITAN_PATCH__ 0 #define __COSMOPOLITAN__ \ (100000000 * __COSMOPOLITAN_MAJOR__ + 1000000 * __COSMOPOLITAN_MINOR__ + \ __COSMOPOLITAN_PATCH__) diff --git a/libc/intrin/BUILD.mk b/libc/intrin/BUILD.mk index 609e05018..2ba6fa39b 100644 --- a/libc/intrin/BUILD.mk +++ b/libc/intrin/BUILD.mk @@ -74,11 +74,15 @@ o//libc/intrin/memmove.o: private \ -fno-toplevel-reorder o//libc/intrin/bzero.o \ +o//libc/intrin/strlen.o \ +o//libc/intrin/strchr.o \ +o//libc/intrin/memchr.o \ +o//libc/intrin/memrchr.o \ o//libc/intrin/memcmp.o \ o//libc/intrin/memset.o \ o//libc/intrin/memmove.o: private \ CFLAGS += \ - -O2 -finline + -O2 -finline -foptimize-sibling-calls o/$(MODE)/libc/intrin/bzero.o \ o/$(MODE)/libc/intrin/memcmp.o \ @@ -89,7 +93,9 @@ o/$(MODE)/libc/intrin/memmove.o: private \ o/$(MODE)/libc/intrin/x86.o: private \ CFLAGS += \ -ffreestanding \ - -fno-jump-tables + -fno-jump-tables \ + -fpatchable-function-entry=0 \ + -Os # these assembly files are safe to build on aarch64 o/$(MODE)/libc/intrin/aarch64/%.o: libc/intrin/aarch64/%.S diff --git a/libc/intrin/aarch64/memchr.S b/libc/intrin/aarch64/memchr.S index 1f423f311..fcfacc661 100644 --- a/libc/intrin/aarch64/memchr.S +++ b/libc/intrin/aarch64/memchr.S @@ -26,14 +26,10 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __memchr_aarch64 memchr -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64 diff --git a/libc/intrin/aarch64/memcpy.S b/libc/intrin/aarch64/memcpy.S index a736ebdb6..fd30eb4e7 100644 --- a/libc/intrin/aarch64/memcpy.S +++ b/libc/intrin/aarch64/memcpy.S @@ -26,15 +26,11 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __memcpy_aarch64_simd memcpy #define __memmove_aarch64_simd memmove -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. diff --git a/libc/intrin/aarch64/memrchr.S b/libc/intrin/aarch64/memrchr.S index d0795c8b1..f24c358f7 100644 --- a/libc/intrin/aarch64/memrchr.S +++ b/libc/intrin/aarch64/memrchr.S @@ -26,14 +26,10 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __memrchr_aarch64 memrchr -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64, Advanced SIMD. diff --git a/libc/intrin/aarch64/memset.S b/libc/intrin/aarch64/memset.S index 509840c74..f6b96d3a6 100644 --- a/libc/intrin/aarch64/memset.S +++ b/libc/intrin/aarch64/memset.S @@ -26,14 +26,10 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __memset_aarch64 memset -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. diff --git a/libc/intrin/aarch64/stpcpy.S b/libc/intrin/aarch64/stpcpy.S index f7e39e52e..05fa1b866 100644 --- a/libc/intrin/aarch64/stpcpy.S +++ b/libc/intrin/aarch64/stpcpy.S @@ -26,14 +26,10 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __stpcpy_aarch64 stpcpy -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64, Advanced SIMD. diff --git a/libc/intrin/aarch64/strchr.S b/libc/intrin/aarch64/strchr.S index ea841ed98..ffceb8eaf 100644 --- a/libc/intrin/aarch64/strchr.S +++ b/libc/intrin/aarch64/strchr.S @@ -26,15 +26,11 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __strchr_aarch64 strchr #define __index_aarch64 index -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64 diff --git a/libc/intrin/aarch64/strchrnul.S b/libc/intrin/aarch64/strchrnul.S index 9694fca47..c153c8f65 100644 --- a/libc/intrin/aarch64/strchrnul.S +++ b/libc/intrin/aarch64/strchrnul.S @@ -26,14 +26,10 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __strchrnul_aarch64 strchrnul -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64 diff --git a/libc/intrin/aarch64/strcmp.S b/libc/intrin/aarch64/strcmp.S index 8d5702f09..dceb9d25c 100644 --- a/libc/intrin/aarch64/strcmp.S +++ b/libc/intrin/aarch64/strcmp.S @@ -26,14 +26,10 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __strcmp_aarch64 strcmp -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64. diff --git a/libc/intrin/aarch64/strcpy.S b/libc/intrin/aarch64/strcpy.S index 3e6599bb8..a92541b40 100644 --- a/libc/intrin/aarch64/strcpy.S +++ b/libc/intrin/aarch64/strcpy.S @@ -26,14 +26,10 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __strcpy_aarch64 strcpy -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64, Advanced SIMD. diff --git a/libc/intrin/aarch64/strlen.S b/libc/intrin/aarch64/strlen.S index a70e7b214..36ecd0e18 100644 --- a/libc/intrin/aarch64/strlen.S +++ b/libc/intrin/aarch64/strlen.S @@ -26,14 +26,10 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __strlen_aarch64 strlen -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. diff --git a/libc/intrin/aarch64/strncmp.S b/libc/intrin/aarch64/strncmp.S index 8741d766b..f91114e51 100644 --- a/libc/intrin/aarch64/strncmp.S +++ b/libc/intrin/aarch64/strncmp.S @@ -26,14 +26,10 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __strncmp_aarch64 strncmp -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64. diff --git a/libc/intrin/aarch64/strnlen.S b/libc/intrin/aarch64/strnlen.S index 2e7c72d31..a556aa61b 100644 --- a/libc/intrin/aarch64/strnlen.S +++ b/libc/intrin/aarch64/strnlen.S @@ -26,14 +26,10 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __strnlen_aarch64 strnlen -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64, Advanced SIMD. diff --git a/libc/intrin/aarch64/strrchr.S b/libc/intrin/aarch64/strrchr.S index f492a4423..73289101f 100644 --- a/libc/intrin/aarch64/strrchr.S +++ b/libc/intrin/aarch64/strrchr.S @@ -26,15 +26,11 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/intrin/aarch64/asmdefs.internal.h" +.yoink arm_optimized_routines_notice #define __strrchr_aarch64 strrchr #define __rindex_aarch64 rindex -.ident "\n\n\ -Optimized Routines (MIT License)\n\ -Copyright 2022 ARM Limited\n" -.include "libc/disclaimer.inc" - /* Assumptions: * * ARMv8-a, AArch64 diff --git a/libc/intrin/armopt.c b/libc/intrin/armopt.c new file mode 100644 index 000000000..19fc8da12 --- /dev/null +++ b/libc/intrin/armopt.c @@ -0,0 +1,3 @@ +__notice(arm_optimized_routines_notice, "\ +Optimized Routines (MIT License)\n\ +Copyright 2022 ARM Limited"); diff --git a/libc/intrin/chromium.c b/libc/intrin/chromium.c new file mode 100644 index 000000000..543de07d3 --- /dev/null +++ b/libc/intrin/chromium.c @@ -0,0 +1,3 @@ +__notice(chromium_notice, "\ +Chromium (BSD-3 License)\n\ +Copyright 2017 The Chromium Authors"); diff --git a/libc/intrin/cxalock.c b/libc/intrin/cxalock.c index 3ebcf3c8d..4e6de59d0 100644 --- a/libc/intrin/cxalock.c +++ b/libc/intrin/cxalock.c @@ -33,10 +33,6 @@ void __cxa_unlock(void) { pthread_mutex_unlock(&__cxa_lock_obj); } -static textstartup void __cxa_init() { +__attribute__((__constructor__(60))) static textstartup void __cxa_init() { pthread_atfork(__cxa_lock, __cxa_unlock, __cxa_wipe); } - -const void *const __cxa_ctor[] initarray = { - __cxa_init, -}; diff --git a/libc/intrin/fblibm.c b/libc/intrin/fblibm.c new file mode 100644 index 000000000..5a5fbd3fa --- /dev/null +++ b/libc/intrin/fblibm.c @@ -0,0 +1,4 @@ +__notice(freebsd_libm_notice, "FreeBSD libm (BSD-2 License)\n\ +Copyright (c) 1992-2024 The FreeBSD Project.\n\ +Copyright (c) 2005-2011 Bruce D. Evans, Steven G. Kargl, David Schultz\n\ +Copyright (c) 2009-2013 Steven G. Kargl"); diff --git a/libc/intrin/fdlibm.c b/libc/intrin/fdlibm.c new file mode 100644 index 000000000..2ea7510e2 --- /dev/null +++ b/libc/intrin/fdlibm.c @@ -0,0 +1,2 @@ +__notice(fdlibm_notice, "fdlibm (fdlibm license)\n\ +Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved."); diff --git a/libc/intrin/folibm.c b/libc/intrin/folibm.c new file mode 100644 index 000000000..2fe77e001 --- /dev/null +++ b/libc/intrin/folibm.c @@ -0,0 +1,2 @@ +__notice(openbsd_libm_notice, "OpenBSD libm (ISC License)\n\ +Copyright (c) 2008 Stephen L. Moshier "); diff --git a/libc/intrin/isrunningundermake.c b/libc/intrin/isrunningundermake.c index 929ece7f8..53bcf4840 100644 --- a/libc/intrin/isrunningundermake.c +++ b/libc/intrin/isrunningundermake.c @@ -30,10 +30,6 @@ bool32 IsRunningUnderMake(void) { return g_isrunningundermake; } -textstartup void g_isrunningundermake_init(void) { +__attribute__((__constructor__(30))) textstartup void onmake_init(void) { g_isrunningundermake = !!getenv("MAKEFLAGS"); } - -const void *const g_isrunningundermake_ctor[] initarray = { - g_isrunningundermake_init, -}; diff --git a/libc/intrin/memchr.c b/libc/intrin/memchr.c index b29c6e67a..676cb04be 100644 --- a/libc/intrin/memchr.c +++ b/libc/intrin/memchr.c @@ -36,8 +36,8 @@ static inline const unsigned char *memchr_pure(const unsigned char *s, } #if defined(__x86_64__) && !defined(__chibicc__) -static inline const unsigned char *memchr_sse(const unsigned char *s, - unsigned char c, size_t n) { +static __vex const unsigned char *memchr_sse(const unsigned char *s, + unsigned char c, size_t n) { size_t i; unsigned m; xmm_t v, t = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c}; @@ -67,11 +67,9 @@ static inline const unsigned char *memchr_sse(const unsigned char *s, * @return is pointer to first instance of c or NULL if not found * @asyncsignalsafe */ -__vex void *memchr(const void *s, int c, size_t n) { +void *memchr(const void *s, int c, size_t n) { #if defined(__x86_64__) && !defined(__chibicc__) - const void *r; - r = memchr_sse(s, c, n); - return (void *)r; + return (void *)memchr_sse(s, c, n); #else return (void *)memchr_pure(s, c, n); #endif diff --git a/libc/intrin/memmove.c b/libc/intrin/memmove.c index f6687c4b5..493210aac 100644 --- a/libc/intrin/memmove.c +++ b/libc/intrin/memmove.c @@ -27,66 +27,7 @@ typedef long long xmm_t __attribute__((__vector_size__(16), __aligned__(1))); typedef long long xmm_a __attribute__((__vector_size__(16), __aligned__(16))); -/** - * Copies memory. - * - * memmove n=0 661 picoseconds - * memmove n=1 661 ps/byte 1,476 mb/s - * memmove n=2 330 ps/byte 2,952 mb/s - * memmove n=3 330 ps/byte 2,952 mb/s - * memmove n=4 165 ps/byte 5,904 mb/s - * memmove n=7 141 ps/byte 6,888 mb/s - * memmove n=8 82 ps/byte 11 GB/s - * memmove n=15 44 ps/byte 21 GB/s - * memmove n=16 41 ps/byte 23 GB/s - * memmove n=31 32 ps/byte 29 GB/s - * memmove n=32 31 ps/byte 30 GB/s - * memmove n=63 21 ps/byte 45 GB/s - * memmove n=64 15 ps/byte 61 GB/s - * memmove n=127 13 ps/byte 73 GB/s - * memmove n=128 31 ps/byte 30 GB/s - * memmove n=255 20 ps/byte 45 GB/s - * memmove n=256 19 ps/byte 49 GB/s - * memmove n=511 16 ps/byte 56 GB/s - * memmove n=512 17 ps/byte 54 GB/s - * memmove n=1023 18 ps/byte 52 GB/s - * memmove n=1024 13 ps/byte 72 GB/s - * memmove n=2047 9 ps/byte 96 GB/s - * memmove n=2048 9 ps/byte 98 GB/s - * memmove n=4095 8 ps/byte 112 GB/s - * memmove n=4096 8 ps/byte 109 GB/s - * memmove n=8191 7 ps/byte 124 GB/s - * memmove n=8192 7 ps/byte 125 GB/s - * memmove n=16383 7 ps/byte 134 GB/s - * memmove n=16384 7 ps/byte 134 GB/s - * memmove n=32767 13 ps/byte 72 GB/s - * memmove n=32768 13 ps/byte 72 GB/s - * memmove n=65535 13 ps/byte 68 GB/s - * memmove n=65536 14 ps/byte 67 GB/s - * memmove n=131071 14 ps/byte 65 GB/s - * memmove n=131072 14 ps/byte 64 GB/s - * memmove n=262143 15 ps/byte 63 GB/s - * memmove n=262144 15 ps/byte 63 GB/s - * memmove n=524287 15 ps/byte 61 GB/s - * memmove n=524288 15 ps/byte 61 GB/s - * memmove n=1048575 15 ps/byte 61 GB/s - * memmove n=1048576 15 ps/byte 61 GB/s - * memmove n=2097151 19 ps/byte 48 GB/s - * memmove n=2097152 27 ps/byte 35 GB/s - * memmove n=4194303 28 ps/byte 33 GB/s - * memmove n=4194304 28 ps/byte 33 GB/s - * memmove n=8388607 28 ps/byte 33 GB/s - * memmove n=8388608 28 ps/byte 33 GB/s - * - * DST and SRC may overlap. - * - * @param dst is destination - * @param src is memory to copy - * @param n is number of bytes to copy - * @return dst - * @asyncsignalsafe - */ -__vex void *memmove(void *dst, const void *src, size_t n) { +static __vex void *__memmove(void *dst, const void *src, size_t n) { char *d; size_t i; const char *s; @@ -338,6 +279,69 @@ __vex void *memmove(void *dst, const void *src, size_t n) { } } +/** + * Copies memory. + * + * memmove n=0 661 picoseconds + * memmove n=1 661 ps/byte 1,476 mb/s + * memmove n=2 330 ps/byte 2,952 mb/s + * memmove n=3 330 ps/byte 2,952 mb/s + * memmove n=4 165 ps/byte 5,904 mb/s + * memmove n=7 141 ps/byte 6,888 mb/s + * memmove n=8 82 ps/byte 11 GB/s + * memmove n=15 44 ps/byte 21 GB/s + * memmove n=16 41 ps/byte 23 GB/s + * memmove n=31 32 ps/byte 29 GB/s + * memmove n=32 31 ps/byte 30 GB/s + * memmove n=63 21 ps/byte 45 GB/s + * memmove n=64 15 ps/byte 61 GB/s + * memmove n=127 13 ps/byte 73 GB/s + * memmove n=128 31 ps/byte 30 GB/s + * memmove n=255 20 ps/byte 45 GB/s + * memmove n=256 19 ps/byte 49 GB/s + * memmove n=511 16 ps/byte 56 GB/s + * memmove n=512 17 ps/byte 54 GB/s + * memmove n=1023 18 ps/byte 52 GB/s + * memmove n=1024 13 ps/byte 72 GB/s + * memmove n=2047 9 ps/byte 96 GB/s + * memmove n=2048 9 ps/byte 98 GB/s + * memmove n=4095 8 ps/byte 112 GB/s + * memmove n=4096 8 ps/byte 109 GB/s + * memmove n=8191 7 ps/byte 124 GB/s + * memmove n=8192 7 ps/byte 125 GB/s + * memmove n=16383 7 ps/byte 134 GB/s + * memmove n=16384 7 ps/byte 134 GB/s + * memmove n=32767 13 ps/byte 72 GB/s + * memmove n=32768 13 ps/byte 72 GB/s + * memmove n=65535 13 ps/byte 68 GB/s + * memmove n=65536 14 ps/byte 67 GB/s + * memmove n=131071 14 ps/byte 65 GB/s + * memmove n=131072 14 ps/byte 64 GB/s + * memmove n=262143 15 ps/byte 63 GB/s + * memmove n=262144 15 ps/byte 63 GB/s + * memmove n=524287 15 ps/byte 61 GB/s + * memmove n=524288 15 ps/byte 61 GB/s + * memmove n=1048575 15 ps/byte 61 GB/s + * memmove n=1048576 15 ps/byte 61 GB/s + * memmove n=2097151 19 ps/byte 48 GB/s + * memmove n=2097152 27 ps/byte 35 GB/s + * memmove n=4194303 28 ps/byte 33 GB/s + * memmove n=4194304 28 ps/byte 33 GB/s + * memmove n=8388607 28 ps/byte 33 GB/s + * memmove n=8388608 28 ps/byte 33 GB/s + * + * DST and SRC may overlap. + * + * @param dst is destination + * @param src is memory to copy + * @param n is number of bytes to copy + * @return dst + * @asyncsignalsafe + */ +void *memmove(void *dst, const void *src, size_t n) { + return __memmove(dst, src, n); +} + __weak_reference(memmove, memcpy); #endif /* __aarch64__ */ diff --git a/libc/intrin/memrchr.c b/libc/intrin/memrchr.c index 59f0d6686..f0f8e8689 100644 --- a/libc/intrin/memrchr.c +++ b/libc/intrin/memrchr.c @@ -36,8 +36,8 @@ static inline const unsigned char *memrchr_pure(const unsigned char *s, } #if defined(__x86_64__) && !defined(__chibicc__) -static inline const unsigned char *memrchr_sse(const unsigned char *s, - unsigned char c, size_t n) { +static __vex const unsigned char *memrchr_sse(const unsigned char *s, + unsigned char c, size_t n) { size_t i; unsigned m; xmm_t v, t = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c}; @@ -67,11 +67,9 @@ static inline const unsigned char *memrchr_sse(const unsigned char *s, * @return is pointer to first instance of c or NULL if not found * @asyncsignalsafe */ -__vex void *memrchr(const void *s, int c, size_t n) { +void *memrchr(const void *s, int c, size_t n) { #if defined(__x86_64__) && !defined(__chibicc__) - const void *r; - r = memrchr_sse(s, c, n); - return (void *)r; + return (void *)memrchr_sse(s, c, n); #else return (void *)memrchr_pure(s, c, n); #endif diff --git a/libc/intrin/musl.c b/libc/intrin/musl.c new file mode 100644 index 000000000..28f7ed4db --- /dev/null +++ b/libc/intrin/musl.c @@ -0,0 +1,3 @@ +__notice(musl_libc_notice, "\ +Musl libc (MIT License)\n\ +Copyright 2005-2014 Rich Felker, et. al."); diff --git a/libc/intrin/nocolor.c b/libc/intrin/nocolor.c index e864b0396..efd2fa7f6 100644 --- a/libc/intrin/nocolor.c +++ b/libc/intrin/nocolor.c @@ -48,13 +48,9 @@ */ bool __nocolor; -optimizesize textstartup void __nocolor_init(int argc, char **argv, char **envp, - intptr_t *auxv) { +__attribute__((__constructor__(20))) optimizesize textstartup void +__nocolor_init(int argc, char **argv, char **envp, intptr_t *auxv) { char *s; __nocolor = (IsWindows() && !IsAtLeastWindows10()) || ((s = getenv("TERM")) && IsDumb(s)); } - -const void *const __nocolor_ctor[] initarray = { - __nocolor_init, -}; diff --git a/libc/intrin/scalblnl.c b/libc/intrin/scalblnl.c index 0150ec683..c93790767 100644 --- a/libc/intrin/scalblnl.c +++ b/libc/intrin/scalblnl.c @@ -28,11 +28,8 @@ #include "libc/limits.h" #include "libc/math.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); // clang-format off long double scalblnl(long double x, long n) { diff --git a/libc/intrin/scalbn.c b/libc/intrin/scalbn.c index 54178a67c..55f7e6cda 100644 --- a/libc/intrin/scalbn.c +++ b/libc/intrin/scalbn.c @@ -26,11 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); // clang-format off /** diff --git a/libc/intrin/scalbnf.c b/libc/intrin/scalbnf.c index d5c5d1d02..b900e589d 100644 --- a/libc/intrin/scalbnf.c +++ b/libc/intrin/scalbnf.c @@ -26,11 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); // clang-format off /** diff --git a/libc/intrin/scalbnl.c b/libc/intrin/scalbnl.c index c87cb76f7..f9739ff36 100644 --- a/libc/intrin/scalbnl.c +++ b/libc/intrin/scalbnl.c @@ -28,11 +28,8 @@ #include "libc/math.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); // clang-format off /** diff --git a/libc/intrin/stackchkguard.S b/libc/intrin/stackchkguard.S index 9f4437ba4..b78117a5d 100644 --- a/libc/intrin/stackchkguard.S +++ b/libc/intrin/stackchkguard.S @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -#include "libc/notice.inc" // Canary for -fstack-protector. // diff --git a/libc/intrin/strchr.c b/libc/intrin/strchr.c index ab7439239..4d05ffb05 100644 --- a/libc/intrin/strchr.c +++ b/libc/intrin/strchr.c @@ -31,7 +31,7 @@ static inline const char *strchr_pure(const char *s, int c) { #if defined(__x86_64__) && !defined(__chibicc__) typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); -static inline const char *strchr_sse(const char *s, unsigned char c) { +static __vex const char *strchr_sse(const char *s, unsigned char c) { unsigned k; unsigned m; const xmm_t *p; @@ -94,7 +94,7 @@ static inline const char *strchr_x64(const char *p, uint64_t c) { * @asyncsignalsafe * @vforksafe */ -__vex char *strchr(const char *s, int c) { +char *strchr(const char *s, int c) { #if defined(__x86_64__) && !defined(__chibicc__) const char *r; if (X86_HAVE(SSE)) { @@ -102,7 +102,6 @@ __vex char *strchr(const char *s, int c) { } else { r = strchr_pure(s, c); } - unassert(!r || *r || !(c & 255)); return (char *)r; #else const char *r; @@ -110,9 +109,7 @@ __vex char *strchr(const char *s, int c) { if ((*s & 255) == c) return (char *)s; if (!*s) return NULL; } - r = strchr_x64(s, c); - unassert(!r || *r || !c); - return (char *)r; + return (char *)strchr_x64(s, c); #endif } diff --git a/libc/intrin/strlen.c b/libc/intrin/strlen.c index 89d85e9d2..72a32c33d 100644 --- a/libc/intrin/strlen.c +++ b/libc/intrin/strlen.c @@ -20,14 +20,7 @@ #include "libc/str/str.h" #ifndef __aarch64__ -/** - * Returns length of NUL-terminated string. - * - * @param s is non-null NUL-terminated string pointer - * @return number of bytes (excluding NUL) - * @asyncsignalsafe - */ -size_t strlen(const char *s) { +static __vex size_t __strlen(const char *s) { #if defined(__x86_64__) && !defined(__chibicc__) typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); xmm_t z = {0}; @@ -56,4 +49,16 @@ size_t strlen(const char *s) { return (const char *)p + (__builtin_ctzl(w) >> 3) - s; #endif } + +/** + * Returns length of NUL-terminated string. + * + * @param s is non-null NUL-terminated string pointer + * @return number of bytes (excluding NUL) + * @asyncsignalsafe + */ +size_t strlen(const char *s) { + return __strlen(s); +} + #endif /* __aarch64__ */ diff --git a/libc/intrin/ubsan.c b/libc/intrin/ubsan.c index d42f8e20b..5b3051ac5 100644 --- a/libc/intrin/ubsan.c +++ b/libc/intrin/ubsan.c @@ -627,7 +627,7 @@ void *__ubsan_get_current_report_data(void) { return 0; } -static textstartup void ubsan_init() { +__attribute__((__constructor__(90))) static textstartup void ubsan_init() { STRACE(" _ _ ____ ____ _ _ _"); STRACE("| | | | __ ) ___| / \\ | \\ | |"); STRACE("| | | | _ \\___ \\ / _ \\ | \\| |"); @@ -635,7 +635,3 @@ static textstartup void ubsan_init() { STRACE(" \\___/|____/____/_/ \\_\\_| \\_|"); STRACE("cosmopolitan behavior module initialized"); } - -const void *const ubsan_ctor[] initarray = { - ubsan_init, -}; diff --git a/libc/intrin/x86.c b/libc/intrin/x86.c index 3a2d21c77..7912d9de8 100644 --- a/libc/intrin/x86.c +++ b/libc/intrin/x86.c @@ -12,8 +12,6 @@ // //===----------------------------------------------------------------------===// #if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) -#include "libc/intrin/strace.internal.h" -#include "third_party/compiler_rt/cpu_model.h" enum VendorSignatures { SIG_INTEL = 0x756e6547, // Genu @@ -740,8 +738,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, #undef setFeature } -int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; - struct __processor_model { unsigned int __cpu_vendor; unsigned int __cpu_type; @@ -757,7 +753,7 @@ unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32]; // the priority set. However, it still runs after ifunc initializers and // needs to be called explicitly there. -int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { +__attribute__((__constructor__(1))) textstartup int __cpu_indicator_init(void) { unsigned EAX, EBX, ECX, EDX; unsigned MaxLeaf = 5; unsigned Vendor; diff --git a/libc/isystem/sys/syscall.h b/libc/isystem/sys/syscall.h index 9a508f8c4..a65aa2c65 100644 --- a/libc/isystem/sys/syscall.h +++ b/libc/isystem/sys/syscall.h @@ -1,4 +1,4 @@ -#ifndef COSMOPOLITAN_LIBC_ISYSTEM_SYSCALL_H_ -#define COSMOPOLITAN_LIBC_ISYSTEM_SYSCALL_H_ +#ifndef COSMOPOLITAN_LIBC_ISYSTEM_SYS_SYSCALL_H_ +#define COSMOPOLITAN_LIBC_ISYSTEM_SYS_SYSCALL_H_ #include "libc/stdio/syscall.h" -#endif /* COSMOPOLITAN_LIBC_ISYSTEM_SYSCALL_H_ */ +#endif /* COSMOPOLITAN_LIBC_ISYSTEM_SYS_SYSCALL_H_ */ diff --git a/libc/isystem/syscall.h b/libc/isystem/syscall.h new file mode 100644 index 000000000..9a508f8c4 --- /dev/null +++ b/libc/isystem/syscall.h @@ -0,0 +1,4 @@ +#ifndef COSMOPOLITAN_LIBC_ISYSTEM_SYSCALL_H_ +#define COSMOPOLITAN_LIBC_ISYSTEM_SYSCALL_H_ +#include "libc/stdio/syscall.h" +#endif /* COSMOPOLITAN_LIBC_ISYSTEM_SYSCALL_H_ */ diff --git a/libc/log/countbranch_report.c b/libc/log/countbranch_report.c index 80c5381ad..7616c4e7a 100644 --- a/libc/log/countbranch_report.c +++ b/libc/log/countbranch_report.c @@ -96,12 +96,9 @@ void countbranch_report(void) { } } -static textstartup void countbranch_init() { +__attribute__((__constructor__(90))) static textstartup void +countbranch_init() { atexit(countbranch_report); } -const void *const countbranch_ctor[] initarray = { - countbranch_init, -}; - #endif /* __x86_64__ */ diff --git a/libc/log/countexpr_data.S b/libc/log/countexpr_data.S index c06a2fe9e..72db2252f 100644 --- a/libc/log/countexpr_data.S +++ b/libc/log/countexpr_data.S @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -#include "libc/notice.inc" .yoink countexpr_report diff --git a/libc/log/countexpr_report.c b/libc/log/countexpr_report.c index 2fe072624..3d1c50025 100644 --- a/libc/log/countexpr_report.c +++ b/libc/log/countexpr_report.c @@ -76,12 +76,8 @@ void countexpr_report(void) { } } -static textstartup void countexpr_init() { +__attribute__((__constructor__(90))) static textstartup void countexpr_init() { atexit(countexpr_report); } -const void *const countexpr_ctor[] initarray = { - countexpr_init, -}; - #endif /* __x86_64__ */ diff --git a/libc/log/err.c b/libc/log/err.c index 455e4ae40..83b459760 100644 --- a/libc/log/err.c +++ b/libc/log/err.c @@ -36,12 +36,9 @@ #include "libc/str/str.h" // clang-format off -asm(".ident\t\"\\n\\n\ -FreeBSD Err (BSD-3 License)\\n\ -Copyright (c) 1993\\n\ -\tThe Regents of the University of California.\\n\ -\tAll rights reserved.\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(freebsd_err_notice, "\ +FreeBSD Error Library (BSD-3 License)\n\ +Copyright 1993 The Regents of the University of California"); static FILE *err_file; /* file to use for error output */ static void (*err_exit)(int); diff --git a/libc/log/logfile.c b/libc/log/logfile.c index dc6221c8d..7cc70c86a 100644 --- a/libc/log/logfile.c +++ b/libc/log/logfile.c @@ -21,6 +21,6 @@ FILE *__log_file; -__attribute__((__constructor__)) static void init(void) { +__attribute__((__constructor__(60))) static void loginit(void) { __log_file = stderr; } diff --git a/libc/log/memlog.c b/libc/log/memlog.c index 4c641bb41..62304ea7a 100644 --- a/libc/log/memlog.c +++ b/libc/log/memlog.c @@ -66,7 +66,7 @@ static struct Memlog { struct Alloc { void *addr; long size; - } * p; + } *p; } allocs; atomic_long usage; } __memlog; @@ -270,7 +270,9 @@ static textexit void __memlog_destroy(void) { __memlog_unlock(); } -static textstartup void __memlog_init(void) { +__attribute__((__constructor__(90))) // +static textstartup void +__memlog_init(void) { GetSymbolTable(); __memlog_lock(); __memlog.free = hook_free; @@ -290,7 +292,3 @@ static textstartup void __memlog_init(void) { atexit(__memlog_destroy); __memlog_unlock(); } - -const void *const enable_memory_log[] initarray = { - __memlog_init, -}; diff --git a/libc/math.h b/libc/math.h index 0dd9fe063..1a9ffb611 100644 --- a/libc/math.h +++ b/libc/math.h @@ -4,59 +4,69 @@ │ cosmopolitan § mathematics │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#define M_E 2.7182818284590452354 /* 𝑒 */ -#define M_LOG2_10 0xd.49a784bcd1b8afep-2 /* log₂10 ≈ 3.3219280948873623478 */ -#define M_LOG2E 0xb.8aa3b295c17f0bcp-3 /* log₂𝑒 ≈ 1.4426950408889634074 */ -#define M_LOG10E 0.43429448190325182765 /* log₁₀𝑒 */ -#define M_LN2 0xb.17217f7d1cf79acp-4 /* logₑ2 ≈ */ -#define M_LN10 2.30258509299404568402 /* logₑ10 */ -#define M_TAU 0x1.921fb54442d1846ap+2 /* τ = 2π */ -#define M_PI 0x1.921fb54442d1846ap+1 /* π ≈ 3.14159265358979323846 */ -#define M_PI_2 1.57079632679489661923 /* π/2 */ -#define M_PI_4 0.78539816339744830962 /* π/4 */ -#define M_1_PI 0.31830988618379067154 /* 1/π */ -#define M_2_PI 0.63661977236758134308 /* 2/π */ -#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrtπ */ -#define M_SQRT2 1.41421356237309504880 /* sqrt2 */ -#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt2 */ +#define M_E 2.7182818284590452354 /* 𝑒 */ +#define M_LOG2E 1.4426950408889634074 /* log₂𝑒 */ +#define M_LOG10E 0.43429448190325182765 /* log₁₀𝑒 */ +#define M_LN2 0.69314718055994530942 /* logₑ2 */ +#define M_LN10 2.30258509299404568402 /* logₑ10 */ +#define M_PI 3.14159265358979323846 /* pi */ +#define M_PI_2 1.57079632679489661923 /* pi/2 */ +#define M_PI_4 0.78539816339744830962 /* pi/4 */ +#define M_1_PI 0.31830988618379067154 /* 1/pi */ +#define M_2_PI 0.63661977236758134308 /* 2/pi */ +#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ +#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ +#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ -#define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__ -#define DBL_DIG __DBL_DIG__ -#define DBL_EPSILON __DBL_EPSILON__ -#define DBL_MANT_DIG __DBL_MANT_DIG__ -#define DBL_MANT_DIG __DBL_MANT_DIG__ -#define DBL_MAX __DBL_MAX__ -#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__ -#define DBL_MAX_EXP __DBL_MAX_EXP__ -#define DBL_MIN __DBL_MIN__ /* 2.23e–308 ↔ 1.79e308 */ -#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__ -#define DBL_MIN_EXP __DBL_MIN_EXP__ -#define DECIMAL_DIG __LDBL_DECIMAL_DIG__ -#define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__ -#define FLT_RADIX __FLT_RADIX__ -#define FLT_DIG __FLT_DIG__ -#define FLT_EPSILON __FLT_EPSILON__ -#define FLT_MANT_DIG __FLT_MANT_DIG__ -#define FLT_MANT_DIG __FLT_MANT_DIG__ -#define FLT_MAX __FLT_MAX__ -#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__ -#define FLT_MAX_EXP __FLT_MAX_EXP__ -#define FLT_MIN __FLT_MIN__ /* 1.18e–38 ↔ 3.40e38 */ -#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__ -#define FLT_MIN_EXP __FLT_MIN_EXP__ -#define HLF_MAX 6.50e4f -#define HLF_MIN 3.10e-5f -#define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__ -#define LDBL_DIG __LDBL_DIG__ -#define LDBL_EPSILON __LDBL_EPSILON__ -#define LDBL_MANT_DIG __LDBL_MANT_DIG__ -#define LDBL_MANT_DIG __LDBL_MANT_DIG__ -#define LDBL_MAX __LDBL_MAX__ -#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__ -#define LDBL_MAX_EXP __LDBL_MAX_EXP__ -#define LDBL_MIN __LDBL_MIN__ /* 3.37e–4932 ↔ 1.18e4932 */ -#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__ -#define LDBL_MIN_EXP __LDBL_MIN_EXP__ +#define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__ +#define DBL_DIG __DBL_DIG__ +#define DBL_EPSILON __DBL_EPSILON__ +#define DBL_HAS_SUBNORM __DBL_HAS_DENORM__ +#define DBL_IS_IEC_60559 __DBL_IS_IEC_60559__ +#define DBL_MANT_DIG __DBL_MANT_DIG__ +#define DBL_MANT_DIG __DBL_MANT_DIG__ +#define DBL_MAX __DBL_MAX__ +#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__ +#define DBL_MAX_EXP __DBL_MAX_EXP__ +#define DBL_MIN __DBL_MIN__ /* 2.23e–308 ↔ 1.79e308 */ +#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__ +#define DBL_MIN_EXP __DBL_MIN_EXP__ +#define DBL_NORM_MAX __DBL_NORM_MAX__ +#define DBL_TRUE_MIN __DBL_DENORM_MIN__ +#define DECIMAL_DIG __LDBL_DECIMAL_DIG__ +#define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__ +#define FLT_DIG __FLT_DIG__ +#define FLT_EPSILON __FLT_EPSILON__ +#define FLT_HAS_SUBNORM __FLT_HAS_DENORM__ +#define FLT_IS_IEC_60559 __FLT_IS_IEC_60559__ +#define FLT_MANT_DIG __FLT_MANT_DIG__ +#define FLT_MANT_DIG __FLT_MANT_DIG__ +#define FLT_MAX __FLT_MAX__ +#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__ +#define FLT_MAX_EXP __FLT_MAX_EXP__ +#define FLT_MIN __FLT_MIN__ /* 1.18e–38 ↔ 3.40e38 */ +#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__ +#define FLT_MIN_EXP __FLT_MIN_EXP__ +#define FLT_NORM_MAX __FLT_NORM_MAX__ +#define FLT_RADIX __FLT_RADIX__ +#define FLT_TRUE_MIN __FLT_DENORM_MIN__ +#define HLF_MAX 6.50e4f +#define HLF_MIN 3.10e-5f +#define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__ +#define LDBL_DIG __LDBL_DIG__ +#define LDBL_EPSILON __LDBL_EPSILON__ +#define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__ +#define LDBL_IS_IEC_60559 __LDBL_IS_IEC_60559__ +#define LDBL_MANT_DIG __LDBL_MANT_DIG__ +#define LDBL_MANT_DIG __LDBL_MANT_DIG__ +#define LDBL_MAX __LDBL_MAX__ +#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__ +#define LDBL_MAX_EXP __LDBL_MAX_EXP__ +#define LDBL_MIN __LDBL_MIN__ /* 3.37e–4932 ↔ 1.18e4932 */ +#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__ +#define LDBL_MIN_EXP __LDBL_MIN_EXP__ +#define LDBL_NORM_MAX __LDBL_NORM_MAX__ +#define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ #define FP_NAN 0 #define FP_INFINITE 1 diff --git a/libc/mem/gc.c b/libc/mem/gc.c index b977d0964..07957ba57 100644 --- a/libc/mem/gc.c +++ b/libc/mem/gc.c @@ -51,7 +51,7 @@ static void TeardownGc(void) { } } -__attribute__((__constructor__)) static void InitializeGc(void) { +__attribute__((__constructor__(51))) static textstartup void InitGc(void) { atexit(TeardownGc); } diff --git a/libc/mem/heapsort.c b/libc/mem/heapsort.c index 31645308a..9577c3891 100644 --- a/libc/mem/heapsort.c +++ b/libc/mem/heapsort.c @@ -31,6 +31,7 @@ #include "libc/mem/alg.h" #include "libc/mem/mem.h" #include "libc/sysv/errfuns.h" +__static_yoink("openbsd_sorting_notice"); // clang-format off /* diff --git a/libc/mem/mergesort.c b/libc/mem/mergesort.c index 70344f552..400b6dfe3 100644 --- a/libc/mem/mergesort.c +++ b/libc/mem/mergesort.c @@ -33,13 +33,9 @@ #include "libc/mem/mem.h" #include "libc/str/str.h" #include "libc/sysv/errfuns.h" +__static_yoink("openbsd_sorting_notice"); // clang-format off -asm(".ident\t\"\\n\\n\ -OpenBSD Sorting (BSD-3)\\n\ -Copyright 1993 The Regents of the University of California\""); -asm(".include \"libc/disclaimer.inc\""); - /* * Hybrid exponential search/linear search merge sort with hybrid * natural/pairwise first pass. Requires about .3% more comparisons diff --git a/libc/mem/opensort.c b/libc/mem/opensort.c new file mode 100644 index 000000000..36f061dfd --- /dev/null +++ b/libc/mem/opensort.c @@ -0,0 +1,2 @@ +__notice(openbsd_sorting_notice, "OpenBSD Sorting (BSD-3)\n\ +Copyright 1993 The Regents of the University of California"); diff --git a/libc/mem/qsort.c b/libc/mem/qsort.c index ca96a532a..361f26a86 100644 --- a/libc/mem/qsort.c +++ b/libc/mem/qsort.c @@ -31,13 +31,9 @@ #include "libc/macros.internal.h" #include "libc/mem/alg.h" #include "libc/str/str.h" +__static_yoink("openbsd_sorting_notice"); // clang-format off -asm(".ident\t\"\\n\\n\ -OpenBSD Sorting (BSD-3)\\n\ -Copyright 1993 The Regents of the University of California\""); -asm(".include \"libc/disclaimer.inc\""); - #define SWAPTYPE_BYTEV 1 #define SWAPTYPE_INTV 2 #define SWAPTYPE_LONGV 3 diff --git a/libc/mem/realpath.c b/libc/mem/realpath.c index bba846b7d..3ea1432a5 100644 --- a/libc/mem/realpath.c +++ b/libc/mem/realpath.c @@ -28,20 +28,17 @@ #include "libc/assert.h" #include "libc/calls/calls.h" #include "libc/errno.h" -#include "libc/serialize.h" #include "libc/intrin/safemacros.internal.h" #include "libc/limits.h" #include "libc/log/backtrace.internal.h" #include "libc/mem/mem.h" +#include "libc/serialize.h" #include "libc/str/str.h" #include "libc/sysv/errfuns.h" +__static_yoink("musl_libc_notice"); #define SYMLOOP_MAX 40 -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); // clang-format off static size_t GetSlashLen(const char *s) diff --git a/libc/nexgen32e/argc.S b/libc/nexgen32e/argc.S index 3dc3852c2..9e85f4409 100644 --- a/libc/nexgen32e/argc.S +++ b/libc/nexgen32e/argc.S @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -#include "libc/notice.inc" .initbss 300,_init_argc // Global variable holding _start(argc) parameter. diff --git a/libc/nexgen32e/argv.S b/libc/nexgen32e/argv.S index a3b95116b..9ee093476 100644 --- a/libc/nexgen32e/argv.S +++ b/libc/nexgen32e/argv.S @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -#include "libc/notice.inc" .initbss 300,_init_argv // Global variable holding _start(argv) parameter. diff --git a/libc/nexgen32e/auxv.S b/libc/nexgen32e/auxv.S index bc81ef64c..0c921df7c 100644 --- a/libc/nexgen32e/auxv.S +++ b/libc/nexgen32e/auxv.S @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -#include "libc/notice.inc" .initbss 300,_init_auxv // Global variable holding _start(auxv) parameter. diff --git a/libc/nexgen32e/environ.S b/libc/nexgen32e/environ.S index bf368d773..d1419a52c 100644 --- a/libc/nexgen32e/environ.S +++ b/libc/nexgen32e/environ.S @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -#include "libc/notice.inc" // Environment variable pointer list. .bss diff --git a/libc/nexgen32e/gc.S b/libc/nexgen32e/gc.S index 037ce8b31..302dcd5a2 100644 --- a/libc/nexgen32e/gc.S +++ b/libc/nexgen32e/gc.S @@ -18,7 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" #include "libc/dce.h" -#include "libc/notice.inc" nop @@ -37,7 +36,7 @@ __gc: .ftrace2 #ifdef __x86_64__ - mov %fs:0,%rcx // __get_tls() + mov %gs:0x30,%rcx // __get_tls() mov 0x18(%rcx),%rcx // tls::garbages decl (%rcx) // --g->i mov (%rcx),%r8d // r8 = g->i diff --git a/libc/nexgen32e/gclongjmp.S b/libc/nexgen32e/gclongjmp.S index 18f092000..1fb68131b 100644 --- a/libc/nexgen32e/gclongjmp.S +++ b/libc/nexgen32e/gclongjmp.S @@ -35,7 +35,7 @@ gclongjmp: #ifdef __x86_64__ push %rbp mov %rsp,%rbp - mov %fs:0,%r12 // __get_tls() + mov %gs:0x30,%r12 // __get_tls() mov 0x18(%r12),%r12 // Tls::garbages test %r12,%r12 jz 0f diff --git a/libc/nexgen32e/identity.S b/libc/nexgen32e/identity.S index af26136a2..7fc23e4d8 100644 --- a/libc/nexgen32e/identity.S +++ b/libc/nexgen32e/identity.S @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -#include "libc/notice.inc" // The identity() function. // @return first argument diff --git a/libc/nexgen32e/sha1.S b/libc/nexgen32e/sha1.S index 24fb18306..1016c0498 100644 --- a/libc/nexgen32e/sha1.S +++ b/libc/nexgen32e/sha1.S @@ -33,10 +33,11 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -.ident "\n\ -AVX2 SHA-1 (BSD-3 License)\n\ -Copyright 2014 Intel Corporation\n" -.include "libc/disclaimer.inc" + .section .notice,"aR",@progbits + .asciz "\n\n\ +AVX2 SHA-1 (BSD-3 License)n\ +Copyright 2014 Intel Corporation" + .previous #define CTX %rdi /* arg1 */ #define BUF %rsi /* arg2 */ diff --git a/libc/nexgen32e/sha1ni.S b/libc/nexgen32e/sha1ni.S index f1e577d22..223f5f25d 100644 --- a/libc/nexgen32e/sha1ni.S +++ b/libc/nexgen32e/sha1ni.S @@ -33,14 +33,15 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -.text -.balign 32 -.ident "\n\ +.section .notice,"aR",@progbits +.asciz "\n\n\ Intel SHA-NI (BSD-3 License)\n\ Copyright 2015 Intel Corporation\n\ Sean Gulley \n\ -Tim Chen \n" -.include "libc/disclaimer.inc" +Tim Chen " + +.text +.balign 32 #define FRAME_SIZE 32 #define DIGEST_PTR %rdi diff --git a/libc/nexgen32e/sha256.S b/libc/nexgen32e/sha256.S index 3858101dd..df175bf5b 100644 --- a/libc/nexgen32e/sha256.S +++ b/libc/nexgen32e/sha256.S @@ -49,10 +49,11 @@ ///////////////////////////////////////////////////////////////////////// #include "libc/macros.internal.h" -.ident "\n\ +.section .notice,"aR",@progbits +.asciz "\n\n\ AVX2 SHA2 (BSD-2 License)\n\ -Copyright 2013 Intel Corporation\n" -.include "libc/disclaimer.inc" +Copyright 2013 Intel Corporation" +.previous ## assume buffers not aligned #define VMOVDQ vmovdqu diff --git a/libc/nexgen32e/sha256ni.S b/libc/nexgen32e/sha256ni.S index d04857550..eb020d706 100644 --- a/libc/nexgen32e/sha256ni.S +++ b/libc/nexgen32e/sha256ni.S @@ -33,14 +33,15 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -.text -.balign 32 -.ident "\n\ +.section .notice,"aR",@progbits +.asciz "\n\n\ Intel SHA-NI (BSD-3 License)\n\ Copyright 2015 Intel Corporation\n\ Sean Gulley \n\ -Tim Chen \n" -.include "libc/disclaimer.inc" +Tim Chen " + +.text +.balign 32 #define DIGEST_PTR %rdi /* 1st arg */ #define DATA_PTR %rsi /* 2nd arg */ diff --git a/libc/nexgen32e/sha512.S b/libc/nexgen32e/sha512.S index b858a44d9..6e36d6d1b 100644 --- a/libc/nexgen32e/sha512.S +++ b/libc/nexgen32e/sha512.S @@ -50,10 +50,11 @@ ///////////////////////////////////////////////////////////////////////// #include "libc/macros.internal.h" -.ident "\n\ -AVX2 SHA2 (BSD-2 License)\n\ -Copyright 2013 Intel Corporation\n" -.include "libc/disclaimer.inc" +.section .notice,"aR",@progbits +.asciz "\n\n\ +AVX2 SHA512 (BSD-2 License)\n\ +Copyright 2013 Intel Corporation" +.previous # Virtual Registers Y_0 = %ymm4 diff --git a/libc/notice.inc b/libc/notice.inc deleted file mode 100644 index 5b9c68964..000000000 --- a/libc/notice.inc +++ /dev/null @@ -1,17 +0,0 @@ -.ident "\n\ -Cosmopolitan\n\ -Copyright 2020 Justine Alexandra Roberts Tunney\n\ -\n\ -Permission to use, copy, modify, and/or distribute this software for\n\ -any purpose with or without fee is hereby granted, provided that the\n\ -above copyright notice and this permission notice appear in all copies.\n\ -\n\ -THE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL\n\ -WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED\n\ -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE\n\ -AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL\n\ -DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR\n\ -PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER\n\ -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR\n\ -PERFORMANCE OF THIS SOFTWARE.\ -" diff --git a/libc/notice.internal.h b/libc/notice.internal.h deleted file mode 100644 index 4bb9ab304..000000000 --- a/libc/notice.internal.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_INTERNAL_NOTICE_H_ -#define COSMOPOLITAN_LIBC_INTERNAL_NOTICE_H_ -#ifndef __STRICT_ANSI__ - -#ifdef __ASSEMBLER__ -.include "libc/notice.inc" -#else -asm(".include \"libc/notice.inc\""); -#endif - -#endif /* !ANSI */ -#endif /* COSMOPOLITAN_LIBC_INTERNAL_NOTICE_H_ */ diff --git a/libc/nt/privilege.h b/libc/nt/privilege.h index 95088a34a..d281a330a 100644 --- a/libc/nt/privilege.h +++ b/libc/nt/privilege.h @@ -46,5 +46,8 @@ bool32 AdjustTokenPrivileges(int64_t TokenHandle, bool32 DisableAllPrivileges, bool32 ImpersonateSelf(int kNtSecurityImpersonationLevel); bool32 RevertToSelf(void); +bool32 OpenThreadToken(intptr_t ThreadHandle, uint32_t DesiredAccess, + bool32 OpenAsSelf, intptr_t *TokenHandle); + COSMOPOLITAN_C_END_ #endif /* COSMOPOLITAN_LIBC_NT_PRIVILEGE_H_ */ diff --git a/libc/proc/execve-sysv.c b/libc/proc/execve-sysv.c index 04a40f861..aa638aac6 100644 --- a/libc/proc/execve-sysv.c +++ b/libc/proc/execve-sysv.c @@ -98,7 +98,7 @@ static void SetupExecve(void) { g_execve.tmpdir = getenv("TMPDIR"); } -__attribute__((__constructor__)) static void InitExecve(void) { +__attribute__((__constructor__(10))) static textstartup void InitExecve(void) { cosmo_once(&g_execve.once, SetupExecve); } diff --git a/libc/proc/vfork.S b/libc/proc/vfork.S index ff701c1c5..d43faf4cf 100644 --- a/libc/proc/vfork.S +++ b/libc/proc/vfork.S @@ -61,7 +61,7 @@ vfork: #endif pop %rbp #endif - mov %fs:0,%r9 // get thread information block + mov %gs:0x30,%r9 // get thread information block mov 0x3c(%r9),%r8d // avoid question of @vforksafe errno pop %rsi // saves return address in a register mov __NR_vfork(%rip),%eax diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c index cb1d0733f..7a9913ccb 100644 --- a/libc/runtime/clone.c +++ b/libc/runtime/clone.c @@ -400,9 +400,7 @@ static wontreturn void FreebsdThreadMain(void *p) { #ifdef __aarch64__ asm volatile("mov\tx28,%0" : /* no outputs */ : "r"(wt->tls)); #elif defined(__x86_64__) - if (__tls_morphed) { - sys_set_tls(AMD64_SET_GSBASE, wt->tls); - } + sys_set_tls(AMD64_SET_GSBASE, wt->tls); #endif *wt->ctid = wt->tid; wt->func(wt->arg, wt->tid); @@ -575,7 +573,7 @@ static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz, #endif wt = (struct LinuxCloneArgs *)sp; #ifdef __x86_64__ - if ((flags & CLONE_SETTLS) && __tls_morphed) { + if (flags & CLONE_SETTLS) { flags &= ~CLONE_SETTLS; wt->arg = arg; wt->tls = tls; diff --git a/libc/runtime/cosmo.S b/libc/runtime/cosmo.S index e8d1d9fb1..31ee018a9 100644 --- a/libc/runtime/cosmo.S +++ b/libc/runtime/cosmo.S @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -#include "libc/notice.inc" #include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/map.h" #include "libc/intrin/strace.internal.h" @@ -85,19 +84,18 @@ cosmo: push %rbp call _init // call constructors - ezlea __init_array_end,ax // static ctors in forward order - .weak __init_array_end // could be called multiple times - ezlea __init_array_start,cx // idempotency recommended - .weak __init_array_start // @see ape/ape.lds -1: cmp %rax,%rcx + .weak __init_array_end + .weak __init_array_start + mov $__init_array_start,%eax +1: cmp $__init_array_end,%eax je 2f - sub $8,%rax push %rax - push %rcx + push %rax call .Largs call *(%rax) - pop %rcx pop %rax + pop %rax + add $8,%eax jmp 1b // call main() diff --git a/libc/runtime/cosmo2.c b/libc/runtime/cosmo2.c index f01c63720..8181bb9b3 100644 --- a/libc/runtime/cosmo2.c +++ b/libc/runtime/cosmo2.c @@ -59,8 +59,6 @@ extern char syscon_openbsd[]; extern char syscon_netbsd[]; extern char syscon_windows[]; extern init_f __strace_init; -extern init_f *__preinit_array_start[] __attribute__((__weak__)); -extern init_f *__preinit_array_end[] __attribute__((__weak__)); extern init_f *__init_array_start[] __attribute__((__weak__)); extern init_f *__init_array_end[] __attribute__((__weak__)); extern char ape_stack_prot[] __attribute__((__weak__)); @@ -189,7 +187,7 @@ wontreturn textstartup void cosmo(long *sp, struct Syslib *m1, char *exename, #if SYSDEBUG argc = __strace_init(argc, argv, envp, auxv); #endif - for (init_f **fp = __init_array_end; fp-- > __init_array_start;) { + for (init_f **fp = __init_array_start; fp < __init_array_end; ++fp) { (*fp)(argc, argv, envp, auxv); } #ifdef FTRACE diff --git a/libc/runtime/dsohandle.S b/libc/runtime/dsohandle.S index 6d344c37e..39cc3e989 100644 --- a/libc/runtime/dsohandle.S +++ b/libc/runtime/dsohandle.S @@ -17,7 +17,6 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -#include "libc/notice.inc" .underrun // Uniquely identifies each artifact linked in an address space. diff --git a/libc/runtime/enable_tls.c b/libc/runtime/enable_tls.c index 5395b6d6f..045740baf 100644 --- a/libc/runtime/enable_tls.c +++ b/libc/runtime/enable_tls.c @@ -251,7 +251,7 @@ textstartup void __enable_tls(void) { #ifdef __x86_64__ // rewrite the executable tls opcodes in memory - if (IsWindows() || IsXnu()) { + if (IsWindows() || IsOpenbsd() || IsNetbsd()) { __morph_tls(); } #endif diff --git a/libc/runtime/fenv.h b/libc/runtime/fenv.h index ac9b33c47..4f25de55a 100644 --- a/libc/runtime/fenv.h +++ b/libc/runtime/fenv.h @@ -59,7 +59,9 @@ typedef double fenv_t; #endif /* __x86_64__ */ -#ifdef __FLT_EVAL_METHOD__ +#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__ +#define FLT_EVAL_METHOD __FLT_EVAL_METHOD_TS_18661_3__ +#elif defined(__FLT_EVAL_METHOD__) #define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ #else #define FLT_EVAL_METHOD 0 diff --git a/libc/runtime/morph_tls.c b/libc/runtime/morph_tls.c index b780533ed..aef561580 100644 --- a/libc/runtime/morph_tls.c +++ b/libc/runtime/morph_tls.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "ape/sections.internal.h" +#include "libc/intrin/kprintf.h" #include "libc/runtime/internal.h" #include "libc/runtime/runtime.h" #include "libc/serialize.h" @@ -49,68 +50,105 @@ privileged void __morph_tls(void) { unsigned char *p; __morph_begin(); - if (IsXnu()) { - // Apple is quite straightforward to patch. We basically - // just change the segment register, and the linear slot - // address 0x30 was promised to us, according to Go team - // https://github.com/golang/go/issues/23617 - dis = 0x30; - } else if (IsWindows()) { + if (IsWindows()) { // MSVC __declspec(thread) generates binary code for this // %gs:0x1480 abi. So long as TlsAlloc() isn't called >64 // times we should be good. dis = 0x1480 + __tls_index * 8; - } else { - dis = 0; - } - // iterate over modifiable code looking for 9 byte instruction - // this used to take 30ms with xed to enable tls on python.com - for (p = _ereal; p + 9 <= __privileged_start; p += n) { + // iterate over modifiable code looking for 9 byte instruction + // this used to take 30ms with xed to enable tls on python.com + for (p = _ereal; p + 9 <= __privileged_start; p += n) { - // use sse to zoom zoom to fs register prefixes - // that way it'll take 1 ms to morph python.com - // we recompiled a 13mb binary in 1 millisecond - while (p + 9 + 16 <= __privileged_start) { - if ((m = __builtin_ia32_pmovmskb128( - *(xmm_t *)p == (xmm_t){0144, 0144, 0144, 0144, 0144, 0144, 0144, - 0144, 0144, 0144, 0144, 0144, 0144, 0144, - 0144, 0144}))) { - m = __builtin_ctzll(m); - p += m; - break; + // use sse to zoom zoom to fs register prefixes + // that way it'll take 1 ms to morph python.com + // we recompiled a 13mb binary in 1 millisecond + while (p + 9 + 16 <= __privileged_start) { + if ((m = __builtin_ia32_pmovmskb128( + *(xmm_t *)p == (xmm_t){0145, 0145, 0145, 0145, 0145, 0145, + 0145, 0145, 0145, 0145, 0145, 0145, + 0145, 0145, 0145, 0145}))) { + m = __builtin_ctzll(m); + p += m; + break; + } else { + p += 16; + } + } + + // we're checking for the following expression: + // 0145 == p[0] && // %gs + // 0110 == (p[1] & 0373) && // rex.w (and ignore rex.r) + // (0213 == p[2] || // mov reg/mem → reg (word-sized) + // 0003 == p[2]) && // add reg/mem → reg (word-sized) + // 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg + // 0x30 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32 + // 0000 == p[5] && // displacement (von Neumann endian) + // 0000 == p[6] && // displacement + // 0000 == p[7] && // displacement + // 0000 == p[8] // displacement + w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377"); + if ((w == READ64LE("\145\110\213\004\045\060\000\000") || + w == READ64LE("\145\110\003\004\045\060\000\000")) && + !p[8]) { + + // now change the code + p[5] = (dis & 0x000000ff) >> 000; // displacement + p[6] = (dis & 0x0000ff00) >> 010; // displacement + p[7] = (dis & 0x00ff0000) >> 020; // displacement + p[8] = (dis & 0xff000000) >> 030; // displacement + + // advance to the next instruction + n = 9; } else { - p += 16; + n = 1; } } + } else { + // iterate over modifiable code looking for 9 byte instruction + // this used to take 30ms with xed to enable tls on python.com + for (p = _ereal; p + 9 <= __privileged_start; p += n) { - // we're checking for the following expression: - // 0144 == p[0] && // %fs - // 0110 == (p[1] & 0373) && // rex.w (and ignore rex.r) - // (0213 == p[2] || // mov reg/mem → reg (word-sized) - // 0003 == p[2]) && // add reg/mem → reg (word-sized) - // 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg - // 0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32 - // 0000 == p[5] && // displacement (von Neumann endian) - // 0000 == p[6] && // displacement - // 0000 == p[7] && // displacement - // 0000 == p[8] // displacement - w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377"); - if ((w == READ64LE("\144\110\213\004\045\000\000\000") || - w == READ64LE("\144\110\003\004\045\000\000\000")) && - !p[8]) { + // use sse to zoom zoom to fs register prefixes + // that way it'll take 1 ms to morph python.com + // we recompiled a 13mb binary in 1 millisecond + while (p + 9 + 16 <= __privileged_start) { + if ((m = __builtin_ia32_pmovmskb128( + *(xmm_t *)p == (xmm_t){0145, 0145, 0145, 0145, 0145, 0145, + 0145, 0145, 0145, 0145, 0145, 0145, + 0145, 0145, 0145, 0145}))) { + m = __builtin_ctzll(m); + p += m; + break; + } else { + p += 16; + } + } - // now change the code - p[0] = 0145; // change %fs to %gs - p[5] = (dis & 0x000000ff) >> 000; // displacement - p[6] = (dis & 0x0000ff00) >> 010; // displacement - p[7] = (dis & 0x00ff0000) >> 020; // displacement - p[8] = (dis & 0xff000000) >> 030; // displacement + // we're checking for the following expression: + // 0145 == p[0] && // %gs + // 0110 == (p[1] & 0373) && // rex.w (and ignore rex.r) + // (0213 == p[2] || // mov reg/mem → reg (word-sized) + // 0003 == p[2]) && // add reg/mem → reg (word-sized) + // 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg + // 0x30 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32 + // 0000 == p[5] && // displacement (von Neumann endian) + // 0000 == p[6] && // displacement + // 0000 == p[7] && // displacement + // 0000 == p[8] // displacement + w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377"); + if ((w == READ64LE("\145\110\213\004\045\060\000\000") || + w == READ64LE("\145\110\003\004\045\060\000\000")) && + !p[8]) { - // advance to the next instruction - n = 9; - } else { - n = 1; + // now change the code + p[0] = 0144; // change %gs to %fs + + // advance to the next instruction + n = 9; + } else { + n = 1; + } } } diff --git a/libc/runtime/set_tls.c b/libc/runtime/set_tls.c index 29972f484..0f54e8d12 100644 --- a/libc/runtime/set_tls.c +++ b/libc/runtime/set_tls.c @@ -39,10 +39,10 @@ dontinstrument textstartup void __set_tls(struct CosmoTib *tib) { // ask the operating system to change the x86 segment register if (IsWindows()) { asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib)); - } else if (IsFreebsd()) { - sys_set_tls(__tls_morphed ? AMD64_SET_GSBASE : AMD64_SET_FSBASE, tib); } else if (IsLinux()) { - sys_set_tls(__tls_morphed ? ARCH_SET_GS : ARCH_SET_FS, tib); + sys_set_tls(ARCH_SET_GS, tib); + } else if (IsFreebsd()) { + sys_set_tls(AMD64_SET_GSBASE, tib); } else if (IsNetbsd()) { // netbsd has sysarch(X86_SET_FSBASE) but we can't use that because // signal handlers will cause it to be reset due to not setting the diff --git a/libc/runtime/zipos-open.c b/libc/runtime/zipos-open.c index da92c9d37..83b71ed76 100644 --- a/libc/runtime/zipos-open.c +++ b/libc/runtime/zipos-open.c @@ -302,7 +302,7 @@ int __zipos_open(struct ZiposUri *name, int flags) { return rc; } -__attribute__((__constructor__)) static void __zipos_ctor(void) { +__attribute__((__constructor__(60))) static textstartup void zipos_ctor(void) { __zipos_wipe(); pthread_atfork(__zipos_lock, __zipos_unlock, __zipos_wipe); } diff --git a/libc/sock/epoll.c b/libc/sock/epoll.c index 73fcf66cd..4c86e35e1 100644 --- a/libc/sock/epoll.c +++ b/libc/sock/epoll.c @@ -91,11 +91,10 @@ * TODO(jart): Polyfill kqueue for XNU/FreeBSD/OpenBSD. */ -asm(".ident\t\"\\n\\n\ -wepoll (BSD-2)\\n\ -Copyright 2012-2020 Bert Belder\\n\ -https://github.com/piscisaureus/wepoll\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(wepoll_notice, "\ +wepoll (BSD-2)\n\ +Copyright 2012-2020 Bert Belder\n\ +https://github.com/piscisaureus/wepoll"); #define MAX_GROUP_SIZE 32 @@ -128,7 +127,7 @@ asm(".include \"libc/disclaimer.inc\""); } while (0) #define CONTAINOF(ptr, type, member) \ - ((type *)((uintptr_t)(ptr)-offsetof(type, member))) + ((type *)((uintptr_t)(ptr) - offsetof(type, member))) #define TREE__ROTATE(cis, trans) \ struct TreeNode *p = node; \ diff --git a/libc/sock/inet_aton.c b/libc/sock/inet_aton.c index 98869e463..c826cb955 100644 --- a/libc/sock/inet_aton.c +++ b/libc/sock/inet_aton.c @@ -29,11 +29,7 @@ #include "libc/sock/sock.h" #include "libc/sock/struct/sockaddr.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); /** * Converts dotted IPv4 address string to network order binary. diff --git a/libc/stdio/demangle.c b/libc/stdio/demangle.c index 42b532d23..2950ad7ba 100644 --- a/libc/stdio/demangle.c +++ b/libc/stdio/demangle.c @@ -34,11 +34,10 @@ #include "libc/stdio/stdio.h" #include "libc/str/str.h" -asm(".ident\t\"\\n\\n\ -C++ Demangle (BSD-2)\\n\ -Copyright (c) 2007 Hyogeol Lee \\n\ -Copyright (c) 2015-2017 Kai Wang \""); -asm(".include \"libc/disclaimer.inc\""); +__notice(demangle_notice, "\ +FreeBSD C++ Demangle (BSD-2)\n\ +Copyright (c) 2007 Hyogeol Lee \n\ +Copyright (c) 2015-2017 Kai Wang "); // https://github.com/freebsd/freebsd-src/blob/2176c9ab71c85efd90a6c7af4a9e04fe8e3d49ca/contrib/libcxxrt/libelftc_dem_gnu3.c // clang-format off diff --git a/libc/stdio/ecvt.c b/libc/stdio/ecvt.c index cb3324fdc..ebb72075c 100644 --- a/libc/stdio/ecvt.c +++ b/libc/stdio/ecvt.c @@ -27,10 +27,9 @@ #include "libc/str/str.h" #include "third_party/gdtoa/gdtoa.h" -asm(".ident\t\"\\n\\n\ -OpenBSD ecvt/gcvt (MIT)\\n\ -Copyright (c) 2002, 2006, 2010 Todd C. Miller \""); -asm(".include \"libc/disclaimer.inc\""); +__notice(ecvt_notice, "\ +OpenBSD ecvt (MIT)\n\ +Copyright (c) 2002, 2006, 2010 Todd C. Miller "); // clang-format off static char *s; @@ -42,7 +41,7 @@ __cvt_atexit(void) s = 0; } -static void __attribute__((__constructor__)) +static __attribute__((__constructor__(60))) textstartup void __cvt_init(void) { atexit(__cvt_atexit); diff --git a/libc/stdio/fflush_unlocked.c b/libc/stdio/fflush_unlocked.c index ab889dec9..fd8887d79 100644 --- a/libc/stdio/fflush_unlocked.c +++ b/libc/stdio/fflush_unlocked.c @@ -69,7 +69,7 @@ static void __stdio_fork_child(void) { pthread_mutex_init(&__fflush_lock_obj, 0); } -__attribute__((__constructor__)) static void __stdio_init(void) { +__attribute__((__constructor__(60))) static textstartup void stdioinit(void) { pthread_atfork(__stdio_fork_prepare, __stdio_fork_parent, __stdio_fork_child); } diff --git a/libc/stdio/ftw.c b/libc/stdio/ftw.c index 9440b75f8..ded9820ff 100644 --- a/libc/stdio/ftw.c +++ b/libc/stdio/ftw.c @@ -26,11 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/stdio/ftw.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); // clang-format off /** diff --git a/libc/stdio/gcvt.c b/libc/stdio/gcvt.c index c0ecbe869..dfc824415 100644 --- a/libc/stdio/gcvt.c +++ b/libc/stdio/gcvt.c @@ -28,10 +28,9 @@ #include "libc/str/unicode.h" #include "third_party/gdtoa/gdtoa.h" -asm(".ident\t\"\\n\\n\ -OpenBSD ecvt/gcvt (MIT)\\n\ -Copyright (c) 2002, 2006, 2010 Todd C. Miller \""); -asm(".include \"libc/disclaimer.inc\""); +__notice(gcvt_notice, "\ +OpenBSD gcvt (MIT)\n\ +Copyright (c) 2002, 2006, 2010 Todd C. Miller "); // clang-format off #define DEFPREC 6 diff --git a/libc/stdio/iconv.c b/libc/stdio/iconv.c index 791d73277..3b04b6b8d 100644 --- a/libc/stdio/iconv.c +++ b/libc/stdio/iconv.c @@ -32,11 +32,8 @@ #include "libc/str/str.h" #include "libc/thread/tls.h" // clang-format off +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); #define UTF_32BE 0300 #define UTF_16LE 0301 diff --git a/libc/stdio/mt19937.c b/libc/stdio/mt19937.c index 3ef4e8362..8a4b8b8d2 100644 --- a/libc/stdio/mt19937.c +++ b/libc/stdio/mt19937.c @@ -38,10 +38,8 @@ #include "libc/macros.internal.h" #include "libc/stdio/rand.h" -asm(".ident\t\"\\n\\n\ -mt19937 (BSD-3)\\n\ -Copyright 1997-2004 Makoto Matsumoto and Takuji Nishimura\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(mt19937_notice, "mt19937 (BSD-3)\n\ +Copyright 1997-2004 Makoto Matsumoto and Takuji Nishimura"); /* * A C-program for MT19937-64 (2004/9/29 version). diff --git a/libc/stdio/nftw.c b/libc/stdio/nftw.c index 35c84b0cb..53c5ca2da 100644 --- a/libc/stdio/nftw.c +++ b/libc/stdio/nftw.c @@ -36,11 +36,8 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/s.h" #include "libc/thread/thread.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); // clang-format off struct history diff --git a/libc/stdio/random.c b/libc/stdio/random.c index 513300d76..aa78e31c9 100644 --- a/libc/stdio/random.c +++ b/libc/stdio/random.c @@ -26,11 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/stdio/rand.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); /* * this code uses the same lagged fibonacci generator as the diff --git a/libc/stdio/scandir.c b/libc/stdio/scandir.c index 07973bcc6..e9be6e664 100644 --- a/libc/stdio/scandir.c +++ b/libc/stdio/scandir.c @@ -31,11 +31,8 @@ #include "libc/mem/alg.h" #include "libc/mem/mem.h" #include "libc/str/str.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); // clang-format off int scandir(const char *path, struct dirent ***res, diff --git a/libc/stdio/stderr.c b/libc/stdio/stderr.c index 9065dd114..444a3ed0c 100644 --- a/libc/stdio/stderr.c +++ b/libc/stdio/stderr.c @@ -30,7 +30,7 @@ FILE *stderr; static FILE __stderr; -__attribute__((__constructor__)) static void __stderr_init(void) { +__attribute__((__constructor__(60))) static textstartup void errinit(void) { stderr = &__stderr; stderr->fd = STDERR_FILENO; stderr->bufmode = _IONBF; diff --git a/libc/stdio/stdin.c b/libc/stdio/stdin.c index 0c08324d8..2407187fe 100644 --- a/libc/stdio/stdin.c +++ b/libc/stdio/stdin.c @@ -30,7 +30,7 @@ FILE *stdin; static FILE __stdin; -__attribute__((__constructor__)) static void __stdin_init(void) { +__attribute__((__constructor__(60))) static textstartup void initin(void) { stdin = &__stdin; stdin->fd = STDIN_FILENO; stdin->iomode = O_RDONLY; diff --git a/libc/stdio/stdout.c b/libc/stdio/stdout.c index 3de0d80f3..06eee4475 100644 --- a/libc/stdio/stdout.c +++ b/libc/stdio/stdout.c @@ -31,7 +31,7 @@ FILE *stdout; static FILE __stdout; -__attribute__((__constructor__)) static void __stdout_init(void) { +__attribute__((__constructor__(60))) static textstartup void outinit(void) { stdout = &__stdout; stdout->fd = STDOUT_FILENO; diff --git a/libc/str/blake2.c b/libc/str/blake2.c index 44e0ebc3b..362a198da 100644 --- a/libc/str/blake2.c +++ b/libc/str/blake2.c @@ -23,10 +23,9 @@ #define ROR(v, n) (((v) >> (n)) | ((v) << (64 - (n)))) -asm(".ident\t\"\\n\\n\ -boringssl blake2b (ISC License)\\n\ -Copyright 2021 Google LLC\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(blake2b_notice, "\ +boringssl blake2b (ISC License)\n\ +Copyright 2021 Google LLC"); // https://tools.ietf.org/html/rfc7693#section-2.6 static const uint64_t kIV[8] = { diff --git a/libc/str/btowc.c b/libc/str/btowc.c index e0faaf8e0..4e3cb74ab 100644 --- a/libc/str/btowc.c +++ b/libc/str/btowc.c @@ -29,11 +29,7 @@ #include "libc/stdio/stdio.h" #include "libc/str/mb.internal.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); wint_t btowc(int c) { int b = (unsigned char)c; diff --git a/libc/str/c16rtomb.c b/libc/str/c16rtomb.c index 3289411f6..34406401e 100644 --- a/libc/str/c16rtomb.c +++ b/libc/str/c16rtomb.c @@ -30,11 +30,7 @@ #include "libc/limits.h" #include "libc/str/mb.internal.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); size_t c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps) { static unsigned internal_state; diff --git a/libc/str/highwayhash64.c b/libc/str/highwayhash64.c index 7cb0af034..5cd7581a3 100644 --- a/libc/str/highwayhash64.c +++ b/libc/str/highwayhash64.c @@ -18,10 +18,9 @@ #include "libc/str/highwayhash64.h" #include "libc/serialize.h" -asm(".ident\t\"\\n\\n\ -HighwayHash (Apache 2.0)\\n\ -Copyright 2017 Google LLC\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(highwayhash_notice, "\ +HighwayHash (Apache 2.0)\n\ +Copyright 2017 Google LLC"); typedef struct { uint64_t v0[4]; diff --git a/libc/str/langinfo.c b/libc/str/langinfo.c index d66386082..fe88cd54a 100644 --- a/libc/str/langinfo.c +++ b/libc/str/langinfo.c @@ -29,11 +29,8 @@ #include "libc/str/locale.h" #include "libc/str/nltypes.h" #include "libc/thread/tls.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); // clang-format off static const char c_time[] = diff --git a/libc/str/mb.c b/libc/str/mb.c index 3838b676c..98cbf47dd 100644 --- a/libc/str/mb.c +++ b/libc/str/mb.c @@ -26,11 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/str/mb.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); #define C(x) (x < 2 ? -1 : (R(0x80, 0xc0) | x)) #define D(x) C((x + 16)) diff --git a/libc/str/mbrtoc16.c b/libc/str/mbrtoc16.c index 1a41aa9f7..40d366118 100644 --- a/libc/str/mbrtoc16.c +++ b/libc/str/mbrtoc16.c @@ -29,11 +29,7 @@ #include "libc/limits.h" #include "libc/str/mb.internal.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); size_t mbrtoc16(char16_t *pc16, const char *s, size_t n, mbstate_t *ps) { static unsigned internal_state; diff --git a/libc/str/mbrtoc32.c b/libc/str/mbrtoc32.c index 5cfca124b..00cc13ba7 100644 --- a/libc/str/mbrtoc32.c +++ b/libc/str/mbrtoc32.c @@ -29,11 +29,7 @@ #include "libc/limits.h" #include "libc/macros.internal.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); size_t mbrtoc32(char32_t *pc32, const char *s, size_t n, mbstate_t *ps) { static unsigned internal_state; diff --git a/libc/str/mbrtowc.c b/libc/str/mbrtowc.c index 1d89eb7b9..8a0f996ef 100644 --- a/libc/str/mbrtowc.c +++ b/libc/str/mbrtowc.c @@ -30,11 +30,7 @@ #include "libc/macros.internal.h" #include "libc/str/mb.internal.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); size_t mbrtowc(wchar_t *wc, const char *src, size_t n, mbstate_t *st) { static unsigned internal_state; diff --git a/libc/str/mbsnrtowcs.c b/libc/str/mbsnrtowcs.c index d3dd1499a..504a81c45 100644 --- a/libc/str/mbsnrtowcs.c +++ b/libc/str/mbsnrtowcs.c @@ -30,11 +30,7 @@ #include "libc/macros.internal.h" #include "libc/str/mb.internal.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); size_t mbsnrtowcs(wchar_t *wcs, const char **src, size_t n, size_t wn, mbstate_t *st) { diff --git a/libc/str/mbsrtowcs.c b/libc/str/mbsrtowcs.c index b3aa5f0ae..986172725 100644 --- a/libc/str/mbsrtowcs.c +++ b/libc/str/mbsrtowcs.c @@ -30,11 +30,7 @@ #include "libc/macros.internal.h" #include "libc/str/mb.internal.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); size_t mbsrtowcs(wchar_t *ws, const char **src, size_t wn, mbstate_t *st) { const unsigned char *s = (const void *)*src; diff --git a/libc/str/mbtowc.c b/libc/str/mbtowc.c index 669dcaf47..155270726 100644 --- a/libc/str/mbtowc.c +++ b/libc/str/mbtowc.c @@ -29,11 +29,7 @@ #include "libc/limits.h" #include "libc/str/mb.internal.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n) { unsigned c; diff --git a/libc/str/smoothsort.c b/libc/str/smoothsort.c index e9cf02b26..ba84be2e4 100644 --- a/libc/str/smoothsort.c +++ b/libc/str/smoothsort.c @@ -26,11 +26,10 @@ #include "libc/mem/alg.h" #include "libc/str/str.h" -asm(".ident\t\"\\n\\n\ -Smoothsort (MIT License)\\n\ -Copyright 2011 Valentin Ochs\\n\ -Discovered by Edsger Dijkstra\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(smoothsort_notice, "\ +Smoothsort (MIT License)\n\ +Copyright 2011 Valentin Ochs\n\ +Discovered by Edsger Dijkstra"); typedef int (*cmpfun)(const void *, const void *, void *); diff --git a/libc/str/strlcat.c b/libc/str/strlcat.c index 445ec2742..087376cc7 100644 --- a/libc/str/strlcat.c +++ b/libc/str/strlcat.c @@ -19,11 +19,7 @@ #include "libc/str/str.h" // clang-format off // $OpenBSD: strlcat.c,v 1.19 2019/01/25 00:19:25 millert Exp $ - -asm(".ident\t\"\\n\\n\ -OpenBSD Strings (ISC)\\n\ -Copyright (c) 1998, 2015 Todd C. Miller \""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("openbsd_strings_notice"); /** * Appends string, the BSD way. diff --git a/libc/str/strlcpy.c b/libc/str/strlcpy.c index 89bbbd8fa..785eaacaf 100644 --- a/libc/str/strlcpy.c +++ b/libc/str/strlcpy.c @@ -19,11 +19,7 @@ #include "libc/str/str.h" // clang-format off // $OpenBSD: strlcpy.c,v 1.16 2019/01/25 00:19:25 millert Exp $ - -asm(".ident\t\"\\n\\n\ -OpenBSD Strings (ISC)\\n\ -Copyright (c) 1998, 2015 Todd C. Miller \""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("openbsd_strings_notice"); /** * Copies string, the BSD way. diff --git a/libc/str/strverscmp.c b/libc/str/strverscmp.c index 0652be54c..3cc3740ef 100644 --- a/libc/str/strverscmp.c +++ b/libc/str/strverscmp.c @@ -26,11 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/str/str.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); // clang-format off /** diff --git a/libc/str/timingsafe_memcmp.c b/libc/str/timingsafe_memcmp.c index a209fd829..dff98b620 100644 --- a/libc/str/timingsafe_memcmp.c +++ b/libc/str/timingsafe_memcmp.c @@ -19,10 +19,9 @@ #include "libc/limits.h" #include "libc/str/str.h" -asm(".ident\t\"\\n\\n\ -timingsafe_memcmp (ISC License)\\n\ -Copyright 2014 Google Inc.\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(timingsafe_memcmp_notice, "\ +timingsafe_memcmp (ISC License)\n\ +Copyright 2014 Google Inc."); /** * Lexicographically compares the first 𝑛 bytes in 𝑝 and 𝑞. diff --git a/libc/str/todd.c b/libc/str/todd.c new file mode 100644 index 000000000..133d948d7 --- /dev/null +++ b/libc/str/todd.c @@ -0,0 +1,2 @@ +__notice(openbsd_strings_notice, "OpenBSD Strings (ISC)\n\ +Copyright (c) 1998, 2015 Todd C. Miller "); diff --git a/libc/str/wcrtomb.c b/libc/str/wcrtomb.c index 23116b707..0a8f6b155 100644 --- a/libc/str/wcrtomb.c +++ b/libc/str/wcrtomb.c @@ -29,11 +29,7 @@ #include "libc/limits.h" #include "libc/str/mb.internal.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); size_t wcrtomb(char *s, wchar_t wc, mbstate_t *st) { if (!s) return 1; diff --git a/libc/str/wcsnrtombs.c b/libc/str/wcsnrtombs.c index ef684d9b0..4486ce579 100644 --- a/libc/str/wcsnrtombs.c +++ b/libc/str/wcsnrtombs.c @@ -29,11 +29,7 @@ #include "libc/limits.h" #include "libc/str/mb.internal.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); size_t wcsnrtombs(char *dst, const wchar_t **wcs, size_t wn, size_t n, mbstate_t *st) { diff --git a/libc/str/wcsrtombs.c b/libc/str/wcsrtombs.c index 013635e39..f1ad160e9 100644 --- a/libc/str/wcsrtombs.c +++ b/libc/str/wcsrtombs.c @@ -29,11 +29,7 @@ #include "libc/limits.h" #include "libc/str/mb.internal.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); size_t wcsrtombs(char *s, const wchar_t **ws, size_t n, mbstate_t *st) { const wchar_t *ws2; diff --git a/libc/str/wctob.c b/libc/str/wctob.c index 62e3ed8a7..bd5ff66b6 100644 --- a/libc/str/wctob.c +++ b/libc/str/wctob.c @@ -29,11 +29,7 @@ #include "libc/stdio/stdio.h" #include "libc/str/mb.internal.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); int wctob(wint_t c) { if (c < 128U) return c; diff --git a/libc/sysv/errno.c b/libc/sysv/errno.c index 570f29d5b..438ee9508 100644 --- a/libc/sysv/errno.c +++ b/libc/sysv/errno.c @@ -17,7 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/errno.h" -#include "libc/thread/tls.h" +#include "libc/thread/tls2.internal.h" /** * Global variable for last error. diff --git a/libc/sysv/systemfive.S b/libc/sysv/systemfive.S index 02b237817..e858edc54 100644 --- a/libc/sysv/systemfive.S +++ b/libc/sysv/systemfive.S @@ -108,7 +108,7 @@ systemfive_cp: systemfive_cancellable: // our pthread_cancel() miracle code cmpb $0,__tls_enabled(%rip) // inspired by the musl libc design! je 1f // we handle linux and bsd together! - mov %fs:0,%r10 // CosmoTib::tib_self + mov %gs:0x30,%r10 // CosmoTib::tib_self mov 0x28(%r10),%r10 // CosmoTib::tib_pthread test %r10,%r10 // is it a posix thread? jz 1f // it's spawn() probably @@ -135,7 +135,7 @@ systemfive_cancellable_end: // i/o calls park here for long time jne systemfive_errno // werent interrupted by OnSigCancel cmpb $0,__tls_enabled(%rip) // make sure it's safe to grab %fs:0 je systemfive_errno // tls is disabled we can't continue - mov %fs:0,%rcx // CosmoTib::tib_self + mov %gs:0x30,%rcx // CosmoTib::tib_self mov 0x28(%rcx),%rcx // CosmoTib::tib_pthread test %rcx,%rcx // is it a posix thread? jz systemfive_errno // it's spawn() probably diff --git a/libc/thread/tls.h b/libc/thread/tls.h index b5ef8b793..87c0b0bc5 100644 --- a/libc/thread/tls.h +++ b/libc/thread/tls.h @@ -70,7 +70,7 @@ forceinline pureconst struct CosmoTib *__get_tls(void) { return 0; #elif __x86_64__ struct CosmoTib *__tib; - __asm__("mov\t%%fs:0,%0" : "=r"(__tib)); + __asm__("mov\t%%gs:0x30,%0" : "=r"(__tib)); return __tib; #elif defined(__aarch64__) register struct CosmoTib *__tls __asm__("x28"); diff --git a/libc/thread/tls2.internal.h b/libc/thread/tls2.internal.h index e91ce33d7..e739a0981 100644 --- a/libc/thread/tls2.internal.h +++ b/libc/thread/tls2.internal.h @@ -13,12 +13,8 @@ COSMOPOLITAN_C_START_ */ forceinline struct CosmoTib *__get_tls_privileged(void) { char *tib, *lin = (char *)0x30; - if (IsLinux() || IsFreebsd() || IsNetbsd() || IsOpenbsd() || IsMetal()) { - if (!__tls_morphed) { - asm("mov\t%%fs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory"); - } else { - asm("mov\t%%gs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory"); - } + if (IsNetbsd() || IsOpenbsd()) { + asm("mov\t%%fs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory"); } else { asm("mov\t%%gs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory"); if (IsWindows()) { diff --git a/libc/time/BUILD.mk b/libc/time/BUILD.mk index 8705b3347..bb3b136f9 100644 --- a/libc/time/BUILD.mk +++ b/libc/time/BUILD.mk @@ -4,7 +4,6 @@ PKGS += LIBC_TIME LIBC_TIME_ARTIFACTS += LIBC_TIME_A -LIBC_TIME_ZONEINFOS = $(wildcard usr/share/zoneinfo/*) LIBC_TIME = $(LIBC_TIME_A_DEPS) $(LIBC_TIME_A) LIBC_TIME_A = o/$(MODE)/libc/time/time.a LIBC_TIME_A_FILES := $(wildcard libc/time/struct/*) $(wildcard libc/time/*) @@ -12,6 +11,10 @@ LIBC_TIME_A_HDRS := $(filter %.h,$(LIBC_TIME_A_FILES)) LIBC_TIME_A_SRCS_S = $(filter %.S,$(LIBC_TIME_A_FILES)) LIBC_TIME_A_SRCS_C = $(filter %.c,$(LIBC_TIME_A_FILES)) +LIBC_TIME_ZONEINFOS := \ + $(wildcard usr/share/zoneinfo/*) \ + $(wildcard usr/share/zoneinfo/US/*) + LIBC_TIME_A_SRCS = \ $(LIBC_TIME_A_SRCS_S) \ $(LIBC_TIME_A_SRCS_C) diff --git a/libc/time/localtime.c b/libc/time/localtime.c index c90686b2c..3c5c567c8 100644 --- a/libc/time/localtime.c +++ b/libc/time/localtime.c @@ -5,9 +5,9 @@ #include "libc/calls/blockcancel.internal.h" #include "libc/calls/calls.h" #include "libc/cxxabi.h" -#include "libc/serialize.h" #include "libc/mem/gc.h" #include "libc/mem/mem.h" +#include "libc/serialize.h" #include "libc/str/str.h" #include "libc/sysv/consts/o.h" #include "libc/thread/thread.h" @@ -60,7 +60,8 @@ void localtime_unlock(void) { pthread_mutex_unlock(&locallock); } -__attribute__((__constructor__)) static void localtime_init(void) { +__attribute__((__constructor__(80))) +static textstartup void localtime_init(void) { localtime_wipe(); pthread_atfork(localtime_lock, localtime_unlock, diff --git a/libc/time/strftime.c b/libc/time/strftime.c index 1906cf1db..1a607b35e 100644 --- a/libc/time/strftime.c +++ b/libc/time/strftime.c @@ -26,10 +26,8 @@ #define DIVISOR 100 -asm(".ident\t\"\\n\\n\ -strftime (BSD-3)\\n\ -Copyright 1989 The Regents of the University of California\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(strftime_notice, "strftime (BSD-3)\n\ +Copyright 1989 The Regents of the University of California"); /* ** Based on the UCB version with the copyright notice appearing above. diff --git a/libc/time/strptime.c b/libc/time/strptime.c index 74c558466..63d9020ac 100644 --- a/libc/time/strptime.c +++ b/libc/time/strptime.c @@ -30,11 +30,7 @@ #include "libc/str/str.h" #include "libc/time/struct/tm.h" #include "libc/time/time.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2019 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); char *strptime(const char *s, const char *f, struct tm *tm) { int i, w, neg, adj, min, range, itemsize, *dest, dummy; @@ -44,8 +40,7 @@ char *strptime(const char *s, const char *f, struct tm *tm) { while (*f) { if (*f != '%') { if (isspace(*f)) { - for (; *s && isspace(*s); s++) - ; + for (; *s && isspace(*s); s++); } else if (*s != *f) { return 0; } else { @@ -138,8 +133,7 @@ char *strptime(const char *s, const char *f, struct tm *tm) { goto numeric_range; case 'n': case 't': - for (; *s && isspace(*s); s++) - ; + for (; *s && isspace(*s); s++); break; case 'p': ex = "AM"; diff --git a/libc/time/tzfile.internal.h b/libc/time/tzfile.internal.h index 3c892b77f..abab9923d 100644 --- a/libc/time/tzfile.internal.h +++ b/libc/time/tzfile.internal.h @@ -29,7 +29,7 @@ #endif /* !defined TZDEFAULT */ #ifndef TZDEFRULES -#define TZDEFRULES "New_York" +#define TZDEFRULES "US/Pacific" #endif /* !defined TZDEFRULES */ diff --git a/libc/tinymath/.clang-format b/libc/tinymath/.clang-format new file mode 100644 index 000000000..47a38a93f --- /dev/null +++ b/libc/tinymath/.clang-format @@ -0,0 +1,2 @@ +DisableFormat: true +SortIncludes: Never diff --git a/libc/tinymath/__cexp.c b/libc/tinymath/__cexp.c index 788edab61..59d3e36fd 100644 --- a/libc/tinymath/__cexp.c +++ b/libc/tinymath/__cexp.c @@ -28,12 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/k_exp.c */ /*- diff --git a/libc/tinymath/__cexpf.c b/libc/tinymath/__cexpf.c index 5bf24743f..1840c31e1 100644 --- a/libc/tinymath/__cexpf.c +++ b/libc/tinymath/__cexpf.c @@ -28,12 +28,8 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("freebsd_libm_notice"); +__static_yoink("musl_libc_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/k_expf.c */ /*- diff --git a/libc/tinymath/__math_divzero.c b/libc/tinymath/__math_divzero.c index 6c4fecba0..24b104690 100644 --- a/libc/tinymath/__math_divzero.c +++ b/libc/tinymath/__math_divzero.c @@ -26,7 +26,6 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/internal.h" -// clang-format off double __math_divzero(uint32_t sign) { diff --git a/libc/tinymath/__math_divzerof.c b/libc/tinymath/__math_divzerof.c index aba79e70d..b241eecb0 100644 --- a/libc/tinymath/__math_divzerof.c +++ b/libc/tinymath/__math_divzerof.c @@ -26,7 +26,6 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/internal.h" -// clang-format off float __math_divzerof(uint32_t sign) { diff --git a/libc/tinymath/__math_invalid.c b/libc/tinymath/__math_invalid.c index 77a8a245e..890a8eadf 100644 --- a/libc/tinymath/__math_invalid.c +++ b/libc/tinymath/__math_invalid.c @@ -26,7 +26,6 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/internal.h" -// clang-format off double __math_invalid(double x) { diff --git a/libc/tinymath/__math_invalidf.c b/libc/tinymath/__math_invalidf.c index 5c8bd6951..7bcb5870b 100644 --- a/libc/tinymath/__math_invalidf.c +++ b/libc/tinymath/__math_invalidf.c @@ -26,7 +26,6 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/internal.h" -// clang-format off float __math_invalidf(float x) { diff --git a/libc/tinymath/__math_oflow.c b/libc/tinymath/__math_oflow.c index 3e8a756a0..588c80167 100644 --- a/libc/tinymath/__math_oflow.c +++ b/libc/tinymath/__math_oflow.c @@ -26,7 +26,6 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/internal.h" -// clang-format off dontinstrument double __math_oflow(uint32_t sign) { diff --git a/libc/tinymath/__math_oflowf.c b/libc/tinymath/__math_oflowf.c index c289062d4..a7b8d7f1a 100644 --- a/libc/tinymath/__math_oflowf.c +++ b/libc/tinymath/__math_oflowf.c @@ -26,7 +26,6 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/internal.h" -// clang-format off dontinstrument float __math_oflowf(uint32_t sign) { diff --git a/libc/tinymath/__math_uflow.c b/libc/tinymath/__math_uflow.c index c810da1c6..7ccda32fe 100644 --- a/libc/tinymath/__math_uflow.c +++ b/libc/tinymath/__math_uflow.c @@ -26,7 +26,6 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/internal.h" -// clang-format off dontinstrument double __math_uflow(uint32_t sign) { diff --git a/libc/tinymath/__math_uflowf.c b/libc/tinymath/__math_uflowf.c index de7cf7c06..b355b5a7e 100644 --- a/libc/tinymath/__math_uflowf.c +++ b/libc/tinymath/__math_uflowf.c @@ -26,7 +26,6 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/internal.h" -// clang-format off dontinstrument float __math_uflowf(uint32_t sign) { diff --git a/libc/tinymath/__math_xflow.c b/libc/tinymath/__math_xflow.c index 7e87826dc..df508896c 100644 --- a/libc/tinymath/__math_xflow.c +++ b/libc/tinymath/__math_xflow.c @@ -26,7 +26,6 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/internal.h" -// clang-format off dontinstrument double __math_xflow(uint32_t sign, double y) { diff --git a/libc/tinymath/__math_xflowf.c b/libc/tinymath/__math_xflowf.c index 1ea47b7fe..6b23ee8f3 100644 --- a/libc/tinymath/__math_xflowf.c +++ b/libc/tinymath/__math_xflowf.c @@ -26,7 +26,6 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/internal.h" -// clang-format off dontinstrument float __math_xflowf(uint32_t sign, float y) { diff --git a/libc/tinymath/acos.c b/libc/tinymath/acos.c index 7631f77ea..9bd36c7c8 100644 --- a/libc/tinymath/acos.c +++ b/libc/tinymath/acos.c @@ -27,11 +27,9 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/e_acos.c */ /* diff --git a/libc/tinymath/acosf.c b/libc/tinymath/acosf.c index 969a464b4..ee451d43a 100644 --- a/libc/tinymath/acosf.c +++ b/libc/tinymath/acosf.c @@ -28,11 +28,9 @@ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/e_acosf.c */ /* diff --git a/libc/tinymath/acosh.c b/libc/tinymath/acosh.c index b1bb9a4fa..cc0c3f0f5 100644 --- a/libc/tinymath/acosh.c +++ b/libc/tinymath/acosh.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /** * Returns inverse hyperbolic cosine of 𝑥. diff --git a/libc/tinymath/acoshf.c b/libc/tinymath/acoshf.c index f0bc86b5c..d8d4be8e9 100644 --- a/libc/tinymath/acoshf.c +++ b/libc/tinymath/acoshf.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("musl_libc_notice"); /** * Returns inverse hyperbolic cosine of 𝑥. diff --git a/libc/tinymath/acoshl.c b/libc/tinymath/acoshl.c index 019d6ae74..a0b16cc66 100644 --- a/libc/tinymath/acoshl.c +++ b/libc/tinymath/acoshl.c @@ -39,15 +39,8 @@ #include "libc/math.h" #include "libc/tinymath/freebsd.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); /* EXP_LARGE is the threshold above which we use acosh(x) ~= log(2x). */ #if LDBL_MANT_DIG == 64 diff --git a/libc/tinymath/acosl.c b/libc/tinymath/acosl.c index d65ab0a27..eaa8ffb7f 100644 --- a/libc/tinymath/acosl.c +++ b/libc/tinymath/acosl.c @@ -29,15 +29,8 @@ #include "libc/tinymath/invtrigl.internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("fdlibm_notice"); +__static_yoink("musl_libc_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/e_acosl.c */ /* diff --git a/libc/tinymath/asin.c b/libc/tinymath/asin.c index 0d5afb86f..761f33636 100644 --- a/libc/tinymath/asin.c +++ b/libc/tinymath/asin.c @@ -26,12 +26,9 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("fdlibm_notice"); +__static_yoink("musl_libc_notice"); +__static_yoink("freebsd_libm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/e_asin.c */ /* diff --git a/libc/tinymath/asinf.c b/libc/tinymath/asinf.c index 36e4320af..ff0eed85a 100644 --- a/libc/tinymath/asinf.c +++ b/libc/tinymath/asinf.c @@ -28,14 +28,9 @@ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("fdlibm_notice"); +__static_yoink("musl_libc_notice"); +__static_yoink("freebsd_libm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/e_asinf.c */ /* diff --git a/libc/tinymath/asinh.c b/libc/tinymath/asinh.c index 62d70372b..fd9967fff 100644 --- a/libc/tinymath/asinh.c +++ b/libc/tinymath/asinh.c @@ -28,12 +28,7 @@ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" #include "libc/tinymath/freebsd.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /** * Returns inverse hyperbolic sine of 𝑥. diff --git a/libc/tinymath/asinhf.c b/libc/tinymath/asinhf.c index bd66d94cf..840e40852 100644 --- a/libc/tinymath/asinhf.c +++ b/libc/tinymath/asinhf.c @@ -27,12 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("musl_libc_notice"); /** * Returns inverse hyperbolic sine of 𝑥. diff --git a/libc/tinymath/asinhl.c b/libc/tinymath/asinhl.c index 16d2790e6..c39f5f97c 100644 --- a/libc/tinymath/asinhl.c +++ b/libc/tinymath/asinhl.c @@ -38,16 +38,11 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/freebsd.internal.h" -#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); + +#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) /* EXP_LARGE is the threshold above which we use asinh(x) ~= log(2x). */ /* EXP_TINY is the threshold below which we use asinh(x) ~= x. */ diff --git a/libc/tinymath/asinl.c b/libc/tinymath/asinl.c index 07ff3f32c..bec55a702 100644 --- a/libc/tinymath/asinl.c +++ b/libc/tinymath/asinl.c @@ -30,15 +30,8 @@ #include "libc/tinymath/invtrigl.internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("fdlibm_notice"); +__static_yoink("musl_libc_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/e_asinl.c */ /* diff --git a/libc/tinymath/atan.c b/libc/tinymath/atan.c index 9314d1ba1..bfcf1fb67 100644 --- a/libc/tinymath/atan.c +++ b/libc/tinymath/atan.c @@ -27,15 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_atan.c */ /* diff --git a/libc/tinymath/atan2.c b/libc/tinymath/atan2.c index 663d979aa..04b79f109 100644 --- a/libc/tinymath/atan2.c +++ b/libc/tinymath/atan2.c @@ -38,14 +38,8 @@ #include "libc/math.h" #include "libc/tinymath/freebsd.internal.h" -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); /* atan2(y,x) * Method : diff --git a/libc/tinymath/atan2f.c b/libc/tinymath/atan2f.c index e62ead380..3e8ca8194 100644 --- a/libc/tinymath/atan2f.c +++ b/libc/tinymath/atan2f.c @@ -29,12 +29,7 @@ #include "libc/math.h" #include "libc/tinymath/atanf_common.internal.h" #include "libc/tinymath/internal.h" - -asm(".ident\t\"\\n\\n\ -Optimized Routines (MIT License)\\n\ -Copyright 2022 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("arm_optimized_routines_notice"); #define Pi (0x1.921fb6p+1f) #define PiOver2 (0x1.921fb6p+0f) diff --git a/libc/tinymath/atan2l.c b/libc/tinymath/atan2l.c index f2f1cac8c..f8b61f6d8 100644 --- a/libc/tinymath/atan2l.c +++ b/libc/tinymath/atan2l.c @@ -30,15 +30,9 @@ #include "libc/tinymath/invtrigl.internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/e_atan2l.c */ /* diff --git a/libc/tinymath/atan_common.internal.h b/libc/tinymath/atan_common.internal.h index 57c6aaf01..7c50045a6 100644 --- a/libc/tinymath/atan_common.internal.h +++ b/libc/tinymath/atan_common.internal.h @@ -4,7 +4,6 @@ #include "libc/tinymath/estrin_wrap.internal.h" #include "libc/tinymath/horner.internal.h" COSMOPOLITAN_C_START_ -// clang-format off /* * Double-precision polynomial evaluation function for scalar and vector atan(x) diff --git a/libc/tinymath/atan_data.c b/libc/tinymath/atan_data.c index e28e2d5b0..3b465d0c5 100644 --- a/libc/tinymath/atan_data.c +++ b/libc/tinymath/atan_data.c @@ -27,12 +27,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/atan_data.internal.h" -asm(".ident\t\"\\n\\n\ -Optimized Routines (MIT License)\\n\ -Copyright 2022 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - const struct atan_poly_data __atan_poly_data = { .poly = {/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-1022, 1.0]. See atan.sollya for details of how these were diff --git a/libc/tinymath/atanf.c b/libc/tinymath/atanf.c index 59e773020..e2a687bf8 100644 --- a/libc/tinymath/atanf.c +++ b/libc/tinymath/atanf.c @@ -27,15 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_atanf.c */ /* diff --git a/libc/tinymath/atanf_common.internal.h b/libc/tinymath/atanf_common.internal.h index 13368d3a1..38a2df65d 100644 --- a/libc/tinymath/atanf_common.internal.h +++ b/libc/tinymath/atanf_common.internal.h @@ -4,7 +4,6 @@ #include "libc/tinymath/estrin_wrap.internal.h" #include "libc/tinymath/hornerf.internal.h" COSMOPOLITAN_C_START_ -// clang-format off #if WANT_VMATH diff --git a/libc/tinymath/atanf_data.c b/libc/tinymath/atanf_data.c index cf22b0506..e44a7631e 100644 --- a/libc/tinymath/atanf_data.c +++ b/libc/tinymath/atanf_data.c @@ -27,12 +27,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/atanf_data.internal.h" -asm(".ident\t\"\\n\\n\ -Optimized Routines (MIT License)\\n\ -Copyright 2022 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. */ const struct atanf_poly_data __atanf_poly_data = { diff --git a/libc/tinymath/atanh.c b/libc/tinymath/atanh.c index 36bd107dc..f05f0466d 100644 --- a/libc/tinymath/atanh.c +++ b/libc/tinymath/atanh.c @@ -27,12 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /** * Returns inverse hyperbolic tangent of 𝑥. diff --git a/libc/tinymath/atanhf.c b/libc/tinymath/atanhf.c index fb00d8452..f2431cc90 100644 --- a/libc/tinymath/atanhf.c +++ b/libc/tinymath/atanhf.c @@ -27,12 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("musl_libc_notice"); /** * Returns inverse hyperbolic tangent of 𝑥. diff --git a/libc/tinymath/atanhl.c b/libc/tinymath/atanhl.c index 208ff9b9c..e10b33c6a 100644 --- a/libc/tinymath/atanhl.c +++ b/libc/tinymath/atanhl.c @@ -29,12 +29,7 @@ #include "libc/tinymath/feval.internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /** * Returns inverse hyperbolic tangent of 𝑥. diff --git a/libc/tinymath/atanl.c b/libc/tinymath/atanl.c index ced707ff5..6f3a29942 100644 --- a/libc/tinymath/atanl.c +++ b/libc/tinymath/atanl.c @@ -29,15 +29,8 @@ #include "libc/tinymath/internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_atanl.c */ /* diff --git a/libc/tinymath/cacos.c b/libc/tinymath/cacos.c index 9ad032fd7..6c774f583 100644 --- a/libc/tinymath/cacos.c +++ b/libc/tinymath/cacos.c @@ -28,12 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); // FIXME: Hull et al. "Implementing the complex arcsine and arccosine functions using exception handling" 1997 diff --git a/libc/tinymath/cacosf.c b/libc/tinymath/cacosf.c index 1b6bfcd3e..0c8add51d 100644 --- a/libc/tinymath/cacosf.c +++ b/libc/tinymath/cacosf.c @@ -28,12 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("musl_libc_notice"); // FIXME diff --git a/libc/tinymath/cacosh.c b/libc/tinymath/cacosh.c index 4eccd729c..3a663ee16 100644 --- a/libc/tinymath/cacosh.c +++ b/libc/tinymath/cacosh.c @@ -28,12 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /* acosh(z) = i acos(z) */ double complex cacosh(double complex z) diff --git a/libc/tinymath/cacoshf.c b/libc/tinymath/cacoshf.c index a029f08da..0160691fa 100644 --- a/libc/tinymath/cacoshf.c +++ b/libc/tinymath/cacoshf.c @@ -28,12 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("musl_libc_notice"); float complex cacoshf(float complex z) { diff --git a/libc/tinymath/cacoshl.c b/libc/tinymath/cacoshl.c index 1c5e96cb4..5e129462a 100644 --- a/libc/tinymath/cacoshl.c +++ b/libc/tinymath/cacoshl.c @@ -28,12 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); long double complex cacoshl(long double complex z) { diff --git a/libc/tinymath/cacosl.c b/libc/tinymath/cacosl.c index 0e09d4753..adfff0b5d 100644 --- a/libc/tinymath/cacosl.c +++ b/libc/tinymath/cacosl.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); long double complex cacosl(long double complex z) { #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 diff --git a/libc/tinymath/casin.c b/libc/tinymath/casin.c index b0e85ca2a..f6478d061 100644 --- a/libc/tinymath/casin.c +++ b/libc/tinymath/casin.c @@ -28,12 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); // FIXME diff --git a/libc/tinymath/casinf.c b/libc/tinymath/casinf.c index 1d0b35414..04766d91d 100644 --- a/libc/tinymath/casinf.c +++ b/libc/tinymath/casinf.c @@ -28,14 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - - +__static_yoink("musl_libc_notice"); // FIXME diff --git a/libc/tinymath/casinh.c b/libc/tinymath/casinh.c index 5a11878a7..56d958807 100644 --- a/libc/tinymath/casinh.c +++ b/libc/tinymath/casinh.c @@ -28,14 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - - +__static_yoink("musl_libc_notice"); /* asinh(z) = -i asin(i z) */ diff --git a/libc/tinymath/casinhf.c b/libc/tinymath/casinhf.c index 503eb3437..8bcc1e726 100644 --- a/libc/tinymath/casinhf.c +++ b/libc/tinymath/casinhf.c @@ -28,14 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - - +__static_yoink("musl_libc_notice"); float complex casinhf(float complex z) { diff --git a/libc/tinymath/casinhl.c b/libc/tinymath/casinhl.c index 5766ff6d9..193d8d53a 100644 --- a/libc/tinymath/casinhl.c +++ b/libc/tinymath/casinhl.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); long double complex casinhl(long double complex z) { #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 diff --git a/libc/tinymath/casinl.c b/libc/tinymath/casinl.c index 618d236be..78913278d 100644 --- a/libc/tinymath/casinl.c +++ b/libc/tinymath/casinl.c @@ -27,12 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); long double complex casinl(long double complex z) { diff --git a/libc/tinymath/catan.c b/libc/tinymath/catan.c index b9c6b7faa..dd3b4a90e 100644 --- a/libc/tinymath/catan.c +++ b/libc/tinymath/catan.c @@ -28,16 +28,8 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off - +__static_yoink("musl_libc_notice"); +__static_yoink("openbsd_libm_notice"); /* origin: OpenBSD /usr/src/lib/libm/src/s_catan.c */ /* @@ -99,7 +91,6 @@ asm(".include \"libc/disclaimer.inc\""); * 2.9e-17. See also clog(). */ - #define MAXNUM 1.0e308 static const double DP1 = 3.14159265160560607910E0; diff --git a/libc/tinymath/catanf.c b/libc/tinymath/catanf.c index ce5f2cdb0..846c2a7ad 100644 --- a/libc/tinymath/catanf.c +++ b/libc/tinymath/catanf.c @@ -28,16 +28,8 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - +__static_yoink("musl_libc_notice"); +__static_yoink("openbsd_libm_notice"); /* origin: OpenBSD /usr/src/lib/libm/src/s_catanf.c */ /* diff --git a/libc/tinymath/catanh.c b/libc/tinymath/catanh.c index 087239547..12b2c88b1 100644 --- a/libc/tinymath/catanh.c +++ b/libc/tinymath/catanh.c @@ -28,14 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - - +__static_yoink("musl_libc_notice"); /* atanh = -i atan(i z) */ diff --git a/libc/tinymath/catanhf.c b/libc/tinymath/catanhf.c index 90f491207..b7282e92e 100644 --- a/libc/tinymath/catanhf.c +++ b/libc/tinymath/catanhf.c @@ -28,14 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - - +__static_yoink("musl_libc_notice"); float complex catanhf(float complex z) { diff --git a/libc/tinymath/catanhl.c b/libc/tinymath/catanhl.c index 299aadece..59c2a88fc 100644 --- a/libc/tinymath/catanhl.c +++ b/libc/tinymath/catanhl.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); long double complex catanhl(long double complex z) { #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 diff --git a/libc/tinymath/catanl.c b/libc/tinymath/catanl.c index 3f87d3a3b..633d155ea 100644 --- a/libc/tinymath/catanl.c +++ b/libc/tinymath/catanl.c @@ -22,15 +22,8 @@ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("openbsd_libm_notice"); +__static_yoink("musl_libc_notice"); /* * Complex circular arc tangent diff --git a/libc/tinymath/cbrt.c b/libc/tinymath/cbrt.c index b88b7e297..55e324164 100644 --- a/libc/tinymath/cbrt.c +++ b/libc/tinymath/cbrt.c @@ -26,15 +26,9 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */ /* diff --git a/libc/tinymath/cbrtf.c b/libc/tinymath/cbrtf.c index 0ec14e758..088ac83a7 100644 --- a/libc/tinymath/cbrtf.c +++ b/libc/tinymath/cbrtf.c @@ -26,15 +26,9 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */ /* diff --git a/libc/tinymath/cbrtl.c b/libc/tinymath/cbrtl.c index ccd1040f1..4e1e2863c 100644 --- a/libc/tinymath/cbrtl.c +++ b/libc/tinymath/cbrtl.c @@ -28,17 +28,9 @@ #include "libc/math.h" #include "libc/tinymath/ldshape.internal.h" -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("fdlibm_notice"); +__static_yoink("musl_libc_notice"); +__static_yoink("freebsd_libm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtl.c */ /*- diff --git a/libc/tinymath/ccos.c b/libc/tinymath/ccos.c index 35d3f137f..ccfddd586 100644 --- a/libc/tinymath/ccos.c +++ b/libc/tinymath/ccos.c @@ -17,8 +17,11 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" -#include "libc/math.h" double complex ccos(double complex z) { return ccosh(CMPLX(-cimag(z), creal(z))); } + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +__weak_reference(ccos, ccosl); +#endif diff --git a/libc/tinymath/ccosf.c b/libc/tinymath/ccosf.c index 5613099ed..6356d54cb 100644 --- a/libc/tinymath/ccosf.c +++ b/libc/tinymath/ccosf.c @@ -28,14 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - - +__static_yoink("musl_libc_notice"); float complex ccosf(float complex z) { diff --git a/libc/tinymath/ccosh.c b/libc/tinymath/ccosh.c index 28bb3cbc2..c45f39ee3 100644 --- a/libc/tinymath/ccosh.c +++ b/libc/tinymath/ccosh.c @@ -29,14 +29,8 @@ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("freebsd_libm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_ccosh.c */ /*- diff --git a/libc/tinymath/ccoshf.c b/libc/tinymath/ccoshf.c index d2e137044..d7e02c489 100644 --- a/libc/tinymath/ccoshf.c +++ b/libc/tinymath/ccoshf.c @@ -29,14 +29,8 @@ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("freebsd_libm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_ccoshf.c */ /*- diff --git a/libc/tinymath/ccoshl.c b/libc/tinymath/ccoshl.c index 7d22ca69d..bc388ffc3 100644 --- a/libc/tinymath/ccoshl.c +++ b/libc/tinymath/ccoshl.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); //FIXME long double complex ccoshl(long double complex z) diff --git a/libc/tinymath/ccosl.c b/libc/tinymath/ccosl.c index 76548b9d6..a16144507 100644 --- a/libc/tinymath/ccosl.c +++ b/libc/tinymath/ccosl.c @@ -26,17 +26,11 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); long double complex ccosl(long double complex z) { -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 - return ccos(z); -#else return ccoshl(CMPLXL(-cimagl(z), creall(z))); -#endif } + +#endif diff --git a/libc/tinymath/ceil.c b/libc/tinymath/ceil.c index 4ba6ec6da..dd5c4f6eb 100644 --- a/libc/tinymath/ceil.c +++ b/libc/tinymath/ceil.c @@ -31,12 +31,7 @@ #ifndef __llvm__ #include "third_party/intel/smmintrin.internal.h" #endif - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 #define EPS DBL_EPSILON diff --git a/libc/tinymath/ceilf.c b/libc/tinymath/ceilf.c index 2dad43065..ab34911fd 100644 --- a/libc/tinymath/ceilf.c +++ b/libc/tinymath/ceilf.c @@ -30,12 +30,7 @@ #ifndef __llvm__ #include "third_party/intel/smmintrin.internal.h" #endif - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /** * Returns smallest integral value not less than 𝑥. diff --git a/libc/tinymath/ceill.c b/libc/tinymath/ceill.c index 921a93836..42c733662 100644 --- a/libc/tinymath/ceill.c +++ b/libc/tinymath/ceill.c @@ -28,12 +28,7 @@ #include "libc/math.h" #include "libc/tinymath/internal.h" #include "libc/tinymath/ldshape.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /** * Returns smallest integral value not less than 𝑥. diff --git a/libc/tinymath/cexp.c b/libc/tinymath/cexp.c index c6fd6e086..5f09c24e8 100644 --- a/libc/tinymath/cexp.c +++ b/libc/tinymath/cexp.c @@ -28,13 +28,8 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - +__static_yoink("freebsd_libm_notice"); +__static_yoink("musl_libc_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_cexp.c */ /*- diff --git a/libc/tinymath/cexpf.c b/libc/tinymath/cexpf.c index 200b9de4f..a26b30feb 100644 --- a/libc/tinymath/cexpf.c +++ b/libc/tinymath/cexpf.c @@ -28,13 +28,8 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - +__static_yoink("freebsd_libm_notice"); +__static_yoink("musl_libc_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_cexpf.c */ /*- diff --git a/libc/tinymath/cexpl.c b/libc/tinymath/cexpl.c index a58b94d2e..1800be2d5 100644 --- a/libc/tinymath/cexpl.c +++ b/libc/tinymath/cexpl.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); //FIXME long double complex cexpl(long double complex z) diff --git a/libc/tinymath/clog.c b/libc/tinymath/clog.c index 60c48fb9c..77b4cb284 100644 --- a/libc/tinymath/clog.c +++ b/libc/tinymath/clog.c @@ -28,14 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - - +__static_yoink("musl_libc_notice"); // FIXME @@ -49,3 +42,7 @@ double complex clog(double complex z) phi = carg(z); return CMPLX(log(r), phi); } + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +__weak_reference(clog, clogl); +#endif diff --git a/libc/tinymath/clogf.c b/libc/tinymath/clogf.c index dcc3a4b29..e658c58f3 100644 --- a/libc/tinymath/clogf.c +++ b/libc/tinymath/clogf.c @@ -28,14 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - - +__static_yoink("musl_libc_notice"); // FIXME diff --git a/libc/tinymath/clogl.c b/libc/tinymath/clogl.c index 868f44eb5..d55a153b8 100644 --- a/libc/tinymath/clogl.c +++ b/libc/tinymath/clogl.c @@ -27,21 +27,10 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" #include "libc/math.h" +#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) #include "libc/tinymath/complex.internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off - -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -long double complex clogl(long double complex z) -{ - return clog(z); -} -#else -// FIXME long double complex clogl(long double complex z) { long double r, phi; @@ -49,4 +38,5 @@ long double complex clogl(long double complex z) phi = cargl(z); return CMPLXL(logl(r), phi); } + #endif diff --git a/libc/tinymath/cos.c b/libc/tinymath/cos.c index 00776746a..d1b2b58f4 100644 --- a/libc/tinymath/cos.c +++ b/libc/tinymath/cos.c @@ -28,15 +28,8 @@ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" #include "libc/tinymath/kernel.internal.h" - -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */ /* diff --git a/libc/tinymath/cosdf.c b/libc/tinymath/cosdf.c index 0fb5e3da1..3d35a6436 100644 --- a/libc/tinymath/cosdf.c +++ b/libc/tinymath/cosdf.c @@ -27,15 +27,9 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" - -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("freebsd_libm_notice"); +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */ /* diff --git a/libc/tinymath/cosf.c b/libc/tinymath/cosf.c index 6e12a7b14..36d3ca2db 100644 --- a/libc/tinymath/cosf.c +++ b/libc/tinymath/cosf.c @@ -29,15 +29,10 @@ #include "libc/tinymath/complex.internal.h" #include "libc/tinymath/feval.internal.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("freebsd_libm_notice"); +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ /* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */ /* diff --git a/libc/tinymath/cosh.c b/libc/tinymath/cosh.c index 3c98b9253..4db136b38 100644 --- a/libc/tinymath/cosh.c +++ b/libc/tinymath/cosh.c @@ -28,12 +28,7 @@ #include "libc/math.h" #include "libc/tinymath/expo.internal.h" #include "libc/tinymath/feval.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /** * Returns hyperbolic cosine of 𝑥. diff --git a/libc/tinymath/coshf.c b/libc/tinymath/coshf.c index 1e352d1c4..e0bef68dc 100644 --- a/libc/tinymath/coshf.c +++ b/libc/tinymath/coshf.c @@ -28,12 +28,7 @@ #include "libc/math.h" #include "libc/tinymath/expo.internal.h" #include "libc/tinymath/feval.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("musl_libc_notice"); /** * Returns hyperbolic cosine of 𝑥. diff --git a/libc/tinymath/coshl.c b/libc/tinymath/coshl.c index f7cfb1ed3..c2cb21c6e 100644 --- a/libc/tinymath/coshl.c +++ b/libc/tinymath/coshl.c @@ -38,16 +38,11 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/freebsd.internal.h" -#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); + +#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) #if LDBL_MAX_EXP != 0x4000 /* We also require the usual expsign encoding. */ diff --git a/libc/tinymath/cosl.c b/libc/tinymath/cosl.c index 1368cc825..6642af10e 100644 --- a/libc/tinymath/cosl.c +++ b/libc/tinymath/cosl.c @@ -30,12 +30,7 @@ #include "libc/tinymath/kernel.internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /** * Returns cosine of 𝑥. diff --git a/libc/tinymath/cpow.c b/libc/tinymath/cpow.c index 4a0aa4b10..5d58e14d9 100644 --- a/libc/tinymath/cpow.c +++ b/libc/tinymath/cpow.c @@ -26,18 +26,15 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" -#include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /* pow(z, c) = exp(c log(z)), See C99 G.6.4.1 */ - double complex cpow(double complex z, double complex c) { return cexp(c * clog(z)); } + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +__weak_reference(cpow, cpowl); +#endif diff --git a/libc/tinymath/cpowf.c b/libc/tinymath/cpowf.c index 671bad57e..9d3656faa 100644 --- a/libc/tinymath/cpowf.c +++ b/libc/tinymath/cpowf.c @@ -28,14 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - - +__static_yoink("musl_libc_notice"); float complex cpowf(float complex z, float complex c) { diff --git a/libc/tinymath/cpowl.c b/libc/tinymath/cpowl.c index 389642031..1fa76aee6 100644 --- a/libc/tinymath/cpowl.c +++ b/libc/tinymath/cpowl.c @@ -26,21 +26,12 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" +#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off - -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -long double complex cpowl(long double complex z, long double complex c) -{ - return cpow(z, c); -} -#else long double complex cpowl(long double complex z, long double complex c) { return cexpl(c * clogl(z)); } + #endif diff --git a/libc/tinymath/csin.c b/libc/tinymath/csin.c index 23d0f27c2..eeb1a30f4 100644 --- a/libc/tinymath/csin.c +++ b/libc/tinymath/csin.c @@ -28,19 +28,15 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - - +__static_yoink("musl_libc_notice"); /* sin(z) = -i sinh(i z) */ - double complex csin(double complex z) { z = csinh(CMPLX(-cimag(z), creal(z))); return CMPLX(cimag(z), -creal(z)); } + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +__weak_reference(csin, csinl); +#endif diff --git a/libc/tinymath/csinf.c b/libc/tinymath/csinf.c index 24511f17b..0883e26cd 100644 --- a/libc/tinymath/csinf.c +++ b/libc/tinymath/csinf.c @@ -28,12 +28,8 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ diff --git a/libc/tinymath/csinh.c b/libc/tinymath/csinh.c index 8f59253da..22da546e0 100644 --- a/libc/tinymath/csinh.c +++ b/libc/tinymath/csinh.c @@ -29,15 +29,8 @@ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off - +__static_yoink("musl_libc_notice"); +__static_yoink("freebsd_libm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_csinh.c */ /*- diff --git a/libc/tinymath/csinhf.c b/libc/tinymath/csinhf.c index 07471ce81..d75ac0c07 100644 --- a/libc/tinymath/csinhf.c +++ b/libc/tinymath/csinhf.c @@ -29,14 +29,8 @@ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("freebsd_libm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_csinhf.c */ /*- diff --git a/libc/tinymath/csinhl.c b/libc/tinymath/csinhl.c index 1a83181fc..f85b8b26a 100644 --- a/libc/tinymath/csinhl.c +++ b/libc/tinymath/csinhl.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); //FIXME long double complex csinhl(long double complex z) diff --git a/libc/tinymath/csinl.c b/libc/tinymath/csinl.c index ddf599d90..02b5d0178 100644 --- a/libc/tinymath/csinl.c +++ b/libc/tinymath/csinl.c @@ -26,22 +26,13 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" +#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off - -#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 -long double complex csinl(long double complex z) -{ - return csin(z); -} -#else long double complex csinl(long double complex z) { z = csinhl(CMPLXL(-cimagl(z), creall(z))); return CMPLXL(cimagl(z), -creall(z)); } + #endif diff --git a/libc/tinymath/csqrt.c b/libc/tinymath/csqrt.c index 70583e7a2..d2bccdc38 100644 --- a/libc/tinymath/csqrt.c +++ b/libc/tinymath/csqrt.c @@ -28,12 +28,8 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("freebsd_libm_notice"); +__static_yoink("musl_libc_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_csqrt.c */ /*- diff --git a/libc/tinymath/csqrtf.c b/libc/tinymath/csqrtf.c index 27c1e6889..61d142f11 100644 --- a/libc/tinymath/csqrtf.c +++ b/libc/tinymath/csqrtf.c @@ -28,13 +28,8 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - +__static_yoink("freebsd_libm_notice"); +__static_yoink("musl_libc_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_csqrtf.c */ /*- diff --git a/libc/tinymath/csqrtl.c b/libc/tinymath/csqrtl.c index c6b1e7524..df534fd9f 100644 --- a/libc/tinymath/csqrtl.c +++ b/libc/tinymath/csqrtl.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); //FIXME long double complex csqrtl(long double complex z) diff --git a/libc/tinymath/ctan.c b/libc/tinymath/ctan.c index b3e2de71d..8e20f8757 100644 --- a/libc/tinymath/ctan.c +++ b/libc/tinymath/ctan.c @@ -28,14 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - - +__static_yoink("musl_libc_notice"); /* tan(z) = -i tanh(i z) */ diff --git a/libc/tinymath/ctanf.c b/libc/tinymath/ctanf.c index f584c5ea9..fe65ba01b 100644 --- a/libc/tinymath/ctanf.c +++ b/libc/tinymath/ctanf.c @@ -28,14 +28,7 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - - +__static_yoink("musl_libc_notice"); float complex ctanf(float complex z) { diff --git a/libc/tinymath/ctanh.c b/libc/tinymath/ctanh.c index 94a1776d8..721e303da 100644 --- a/libc/tinymath/ctanh.c +++ b/libc/tinymath/ctanh.c @@ -28,16 +28,8 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -ctahnh (BSD-2 License)\\n\ -Copyright (c) 2011 David Schultz \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - +__static_yoink("freebsd_libm_notice"); +__static_yoink("musl_libc_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_ctanh.c */ /*- diff --git a/libc/tinymath/ctanhf.c b/libc/tinymath/ctanhf.c index 29d7e3f1b..b806dcf58 100644 --- a/libc/tinymath/ctanhf.c +++ b/libc/tinymath/ctanhf.c @@ -28,13 +28,8 @@ #include "libc/complex.h" #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - +__static_yoink("freebsd_libm_notice"); +__static_yoink("musl_libc_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_ctanhf.c */ /*- diff --git a/libc/tinymath/ctanhl.c b/libc/tinymath/ctanhl.c index 6aa599c3c..43061dcf9 100644 --- a/libc/tinymath/ctanhl.c +++ b/libc/tinymath/ctanhl.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); //FIXME long double complex ctanhl(long double complex z) diff --git a/libc/tinymath/ctanl.c b/libc/tinymath/ctanl.c index d8602d9b7..27f0d94ca 100644 --- a/libc/tinymath/ctanl.c +++ b/libc/tinymath/ctanl.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/complex.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("musl_libc_notice"); #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 long double complex ctanl(long double complex z) diff --git a/libc/tinymath/erf.c b/libc/tinymath/erf.c index 029a41840..a3b976358 100644 --- a/libc/tinymath/erf.c +++ b/libc/tinymath/erf.c @@ -26,15 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" - -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */ /* diff --git a/libc/tinymath/erff.c b/libc/tinymath/erff.c index cea72f1a4..8e7d60006 100644 --- a/libc/tinymath/erff.c +++ b/libc/tinymath/erff.c @@ -26,15 +26,9 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" - -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("freebsd_libm_notice"); +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */ /* diff --git a/libc/tinymath/erfl.c b/libc/tinymath/erfl.c index 6ac746b30..60bb0ef2a 100644 --- a/libc/tinymath/erfl.c +++ b/libc/tinymath/erfl.c @@ -28,15 +28,8 @@ #include "libc/math.h" #include "libc/tinymath/ldshape.internal.h" #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 - -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("openbsd_libm_notice"); +__static_yoink("musl_libc_notice"); /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_erfl.c */ /* diff --git a/libc/tinymath/estrin_wrap.internal.h b/libc/tinymath/estrin_wrap.internal.h index c71015fa0..ee77c108e 100644 --- a/libc/tinymath/estrin_wrap.internal.h +++ b/libc/tinymath/estrin_wrap.internal.h @@ -8,7 +8,6 @@ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -// clang-format off #define ESTRIN_1_(x, c, i) FMA(x, c(1 + i), c(i)) #define ESTRIN_2_(x, x2, c, i) FMA(x2, c(2 + i), ESTRIN_1_(x, c, i)) #define ESTRIN_3_(x, x2, c, i) FMA(x2, ESTRIN_1_(x, c, 2 + i), ESTRIN_1_(x, c, i)) diff --git a/libc/tinymath/exp.c b/libc/tinymath/exp.c index e9ea6fddb..4d6cf8541 100644 --- a/libc/tinymath/exp.c +++ b/libc/tinymath/exp.c @@ -29,12 +29,7 @@ #include "libc/math.h" #include "libc/tinymath/exp_data.internal.h" #include "libc/tinymath/internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("arm_optimized_routines_notice"); /* * Double-precision e^x function. diff --git a/libc/tinymath/exp10.c b/libc/tinymath/exp10.c index 27baff1cd..db15c0c61 100644 --- a/libc/tinymath/exp10.c +++ b/libc/tinymath/exp10.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /** * Returns 10ˣ. diff --git a/libc/tinymath/exp10f.c b/libc/tinymath/exp10f.c index 16dc2071a..9bbae5350 100644 --- a/libc/tinymath/exp10f.c +++ b/libc/tinymath/exp10f.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /** * Returns 10ˣ. diff --git a/libc/tinymath/exp10l.c b/libc/tinymath/exp10l.c index e108701d7..1111bde6e 100644 --- a/libc/tinymath/exp10l.c +++ b/libc/tinymath/exp10l.c @@ -28,12 +28,7 @@ #include "libc/math.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); /** * Returns 10ˣ. diff --git a/libc/tinymath/exp2.c b/libc/tinymath/exp2.c index 1dc2c9a43..627a823ad 100644 --- a/libc/tinymath/exp2.c +++ b/libc/tinymath/exp2.c @@ -29,12 +29,7 @@ #include "libc/math.h" #include "libc/tinymath/exp_data.internal.h" #include "libc/tinymath/internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("arm_optimized_routines_notice"); /* * Double-precision 2^x function. diff --git a/libc/tinymath/exp2f.c b/libc/tinymath/exp2f.c index 1687b8b30..ad68d4a70 100644 --- a/libc/tinymath/exp2f.c +++ b/libc/tinymath/exp2f.c @@ -2,8 +2,8 @@ │ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 1999-2022, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -29,12 +29,7 @@ #include "libc/math.h" #include "libc/tinymath/exp2f_data.internal.h" #include "libc/tinymath/internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("arm_optimized_routines_notice"); /* * Single-precision 2^x function. diff --git a/libc/tinymath/exp2f_data.c b/libc/tinymath/exp2f_data.c index 94442cc73..e2b098251 100644 --- a/libc/tinymath/exp2f_data.c +++ b/libc/tinymath/exp2f_data.c @@ -1,9 +1,9 @@ /*-*- mode:c;indent-tabs-mode:t;c-basic-offset:8;tab-width:8;coding:utf-8 -*-│ -│ vi: set et ft=c ts=8 sw=8 fenc=utf-8 :vi │ +│ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 1999-2022, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/exp2f_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("arm_optimized_routines_notice"); /* * Shared data between expf, exp2f and powf. diff --git a/libc/tinymath/exp2l.c b/libc/tinymath/exp2l.c index 28553aed2..a4c8f9a7f 100644 --- a/libc/tinymath/exp2l.c +++ b/libc/tinymath/exp2l.c @@ -28,16 +28,11 @@ #include "libc/math.h" #include "libc/tinymath/internal.h" #include "libc/tinymath/ldshape.internal.h" -#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("freebsd_libm_notice"); + +#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 /* origin: FreeBSD /usr/src/lib/msun/ld80/s_exp2l.c and /usr/src/lib/msun/ld128/s_exp2l.c */ /*- diff --git a/libc/tinymath/exp_data.c b/libc/tinymath/exp_data.c index 0d4b880a8..df7efc71e 100644 --- a/libc/tinymath/exp_data.c +++ b/libc/tinymath/exp_data.c @@ -27,12 +27,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/exp_data.internal.h" -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - /* * Shared data between exp, exp2 and pow. * diff --git a/libc/tinymath/expf.c b/libc/tinymath/expf.c index 4879566ab..43f33219b 100644 --- a/libc/tinymath/expf.c +++ b/libc/tinymath/expf.c @@ -2,8 +2,8 @@ │ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 1999-2022, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -29,12 +29,7 @@ #include "libc/math.h" #include "libc/tinymath/exp2f_data.internal.h" #include "libc/tinymath/internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("arm_optimized_routines_notice"); /* * Single-precision e^x function. diff --git a/libc/tinymath/expl.c b/libc/tinymath/expl.c index 5ff1010c3..52dffa2f4 100644 --- a/libc/tinymath/expl.c +++ b/libc/tinymath/expl.c @@ -17,18 +17,11 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" - +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 #include "libc/tinymath/internal.h" - -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("openbsd_libm_notice"); /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_expl.c */ /* @@ -149,15 +142,7 @@ long double expl(long double x) #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 #include "libc/tinymath/freebsd.internal.h" - -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("freebsd_libm_notice"); /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD diff --git a/libc/tinymath/expm1.c b/libc/tinymath/expm1.c index 12600b69b..2fc2d66aa 100644 --- a/libc/tinymath/expm1.c +++ b/libc/tinymath/expm1.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/s_expm1.c */ /* diff --git a/libc/tinymath/expm1f.c b/libc/tinymath/expm1f.c index 6500aec27..097aa40e4 100644 --- a/libc/tinymath/expm1f.c +++ b/libc/tinymath/expm1f.c @@ -38,14 +38,8 @@ #include "libc/math.h" #include "libc/tinymath/freebsd.internal.h" -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); static const float one = 1.0, diff --git a/libc/tinymath/expm1l.c b/libc/tinymath/expm1l.c index bd585bca8..45dae3426 100644 --- a/libc/tinymath/expm1l.c +++ b/libc/tinymath/expm1l.c @@ -1,12 +1,7 @@ #include "libc/math.h" #include "libc/tinymath/internal.h" -// clang-format off #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("openbsd_libm_notice"); /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_expm1l.c */ /* diff --git a/libc/tinymath/expo2.c b/libc/tinymath/expo2.c index 3f8eda928..befac99ad 100644 --- a/libc/tinymath/expo2.c +++ b/libc/tinymath/expo2.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/expo.internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off #define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f #define INSERT_WORDS(d,hi,lo) \ diff --git a/libc/tinymath/expo2f.c b/libc/tinymath/expo2f.c index 6d149db93..fc5c66245 100644 --- a/libc/tinymath/expo2f.c +++ b/libc/tinymath/expo2f.c @@ -26,12 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off #define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f #define SET_FLOAT_WORD(d,w) \ diff --git a/libc/tinymath/floor.c b/libc/tinymath/floor.c index c492fc05c..a19b2e8f4 100644 --- a/libc/tinymath/floor.c +++ b/libc/tinymath/floor.c @@ -31,12 +31,8 @@ #ifndef __llvm__ #include "third_party/intel/smmintrin.internal.h" #endif +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 #define EPS DBL_EPSILON diff --git a/libc/tinymath/floorf.c b/libc/tinymath/floorf.c index de575ee27..c83aae1a7 100644 --- a/libc/tinymath/floorf.c +++ b/libc/tinymath/floorf.c @@ -30,12 +30,8 @@ #ifndef __llvm__ #include "third_party/intel/smmintrin.internal.h" #endif +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Returns largest integral value not greater than 𝑥. diff --git a/libc/tinymath/floorl.c b/libc/tinymath/floorl.c index 499012d8f..37d17d9f1 100644 --- a/libc/tinymath/floorl.c +++ b/libc/tinymath/floorl.c @@ -29,12 +29,8 @@ #include "libc/tinymath/internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Returns largest integral value not greater than 𝑥. diff --git a/libc/tinymath/fma.c b/libc/tinymath/fma.c index 72a0ba9a0..308e31a77 100644 --- a/libc/tinymath/fma.c +++ b/libc/tinymath/fma.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/nexgen32e/x86feature.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off #define ASUINT64(x) ((union {double f; uint64_t i;}){x}).i #define ZEROINFNAN (0x7ff-0x3ff-52-1) diff --git a/libc/tinymath/fmaf.c b/libc/tinymath/fmaf.c index c65a3d481..544301ca5 100644 --- a/libc/tinymath/fmaf.c +++ b/libc/tinymath/fmaf.c @@ -28,15 +28,8 @@ #include "libc/math.h" #include "libc/nexgen32e/x86feature.h" #include "libc/runtime/fenv.h" - -asm(".ident\t\"\\n\\n\ -Fused Multiply Add (MIT License)\\n\ -Copyright (c) 2005-2011 David Schultz \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("freebsd_libm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */ /*- diff --git a/libc/tinymath/fmal.c b/libc/tinymath/fmal.c index cefdca2d6..53b4db60f 100644 --- a/libc/tinymath/fmal.c +++ b/libc/tinymath/fmal.c @@ -31,13 +31,10 @@ #include "libc/runtime/fenv.h" #include "libc/tinymath/freebsd.internal.h" #include "libc/tinymath/ldshape.internal.h" -#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("freebsd_libm_notice"); + +#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 #if LDBL_MANT_DIG == 64 #define LASTBIT(u) (u.i.m & 1) diff --git a/libc/tinymath/fmod.c b/libc/tinymath/fmod.c index e08205299..b9956e9ef 100644 --- a/libc/tinymath/fmod.c +++ b/libc/tinymath/fmod.c @@ -26,12 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Does (𝑥 rem 𝑦) w/ round()-style rounding. diff --git a/libc/tinymath/fmodf.c b/libc/tinymath/fmodf.c index 6e01ef28c..5e74153ef 100644 --- a/libc/tinymath/fmodf.c +++ b/libc/tinymath/fmodf.c @@ -26,12 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off float fmodf(float x, float y) { diff --git a/libc/tinymath/fmodl.c b/libc/tinymath/fmodl.c index 2b7ecf532..ffe9fdb66 100644 --- a/libc/tinymath/fmodl.c +++ b/libc/tinymath/fmodl.c @@ -28,12 +28,8 @@ #include "libc/math.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Does (𝑥 rem 𝑦) w/ round()-style rounding. diff --git a/libc/tinymath/freebsd.internal.h b/libc/tinymath/freebsd.internal.h index 0aca0a1ac..dff035568 100644 --- a/libc/tinymath/freebsd.internal.h +++ b/libc/tinymath/freebsd.internal.h @@ -5,7 +5,6 @@ #include "libc/math.h" #include "libc/runtime/fenv.h" COSMOPOLITAN_C_START_ -// clang-format off #define __CONCAT1(x,y) x ## y #define __CONCAT(x,y) __CONCAT1(x,y) diff --git a/libc/tinymath/frexp.c b/libc/tinymath/frexp.c index 4ba2e1b7a..78a8e80b3 100644 --- a/libc/tinymath/frexp.c +++ b/libc/tinymath/frexp.c @@ -26,12 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Splits number normalized fraction and exponent. diff --git a/libc/tinymath/frexpf.c b/libc/tinymath/frexpf.c index 4666ec4e5..759a87006 100644 --- a/libc/tinymath/frexpf.c +++ b/libc/tinymath/frexpf.c @@ -26,12 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ /** * Splits number normalized fraction and exponent. diff --git a/libc/tinymath/frexpl.c b/libc/tinymath/frexpl.c index 3d4fd6b41..45b5bbcac 100644 --- a/libc/tinymath/frexpl.c +++ b/libc/tinymath/frexpl.c @@ -29,13 +29,10 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/freebsd.internal.h" -#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("freebsd_libm_notice"); + +#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) /** * Splits number normalized fraction and exponent. diff --git a/libc/tinymath/horner_wrap.internal.h b/libc/tinymath/horner_wrap.internal.h index 984c728c7..0c213a4a0 100644 --- a/libc/tinymath/horner_wrap.internal.h +++ b/libc/tinymath/horner_wrap.internal.h @@ -8,7 +8,6 @@ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -// clang-format off #define HORNER_1_(x, c, i) FMA(c(i + 1), x, c(i)) #define HORNER_2_(x, c, i) FMA(HORNER_1_ (x, c, i + 1), x, c(i)) #define HORNER_3_(x, c, i) FMA(HORNER_2_ (x, c, i + 1), x, c(i)) diff --git a/libc/tinymath/hypot.c b/libc/tinymath/hypot.c index cd8cac22b..37ba5b507 100644 --- a/libc/tinymath/hypot.c +++ b/libc/tinymath/hypot.c @@ -26,12 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off #if FLT_EVAL_METHOD > 1U && LDBL_MANT_DIG == 64 #define SPLIT (0x1p32 + 1) diff --git a/libc/tinymath/hypotf.c b/libc/tinymath/hypotf.c index 5dd9c8eef..4692b97b7 100644 --- a/libc/tinymath/hypotf.c +++ b/libc/tinymath/hypotf.c @@ -26,12 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off float hypotf(float x, float y) { diff --git a/libc/tinymath/hypotl.c b/libc/tinymath/hypotl.c index b524fd52b..a45fd7ed4 100644 --- a/libc/tinymath/hypotl.c +++ b/libc/tinymath/hypotl.c @@ -28,12 +28,8 @@ #include "libc/math.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off #if LDBL_MANT_DIG == 64 #define SPLIT (0x1p32L+1) diff --git a/libc/tinymath/ilogb.c b/libc/tinymath/ilogb.c index fb327610d..595da8c28 100644 --- a/libc/tinymath/ilogb.c +++ b/libc/tinymath/ilogb.c @@ -28,12 +28,8 @@ #include "libc/limits.h" #include "libc/math.h" #include "libc/tinymath/internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Returns log₂𝑥 exponent part of double. diff --git a/libc/tinymath/ilogbf.c b/libc/tinymath/ilogbf.c index 6e3297604..744594eb1 100644 --- a/libc/tinymath/ilogbf.c +++ b/libc/tinymath/ilogbf.c @@ -28,12 +28,8 @@ #include "libc/limits.h" #include "libc/math.h" #include "libc/tinymath/internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Returns log₂𝑥 exponent part of double. diff --git a/libc/tinymath/internal.h b/libc/tinymath/internal.h index cc121fed0..5d4b097b1 100644 --- a/libc/tinymath/internal.h +++ b/libc/tinymath/internal.h @@ -8,7 +8,6 @@ COSMOPOLITAN_C_START_ #define issignalingf_inline(x) 0 #define issignaling_inline(x) 0 -// clang-format off #define asuint(f) ((union{float _f; uint32_t _i;}){f})._i #define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f #define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i diff --git a/libc/tinymath/invtrigl.c b/libc/tinymath/invtrigl.c index 67d984089..c03ff89da 100644 --- a/libc/tinymath/invtrigl.c +++ b/libc/tinymath/invtrigl.c @@ -31,15 +31,9 @@ #include "libc/tinymath/invtrigl.internal.h" #include "libc/tinymath/kernel.internal.h" #include "libc/tinymath/ldshape.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 static const long double diff --git a/libc/tinymath/j0.c b/libc/tinymath/j0.c index 6d6eda6d5..2c4746213 100644 --- a/libc/tinymath/j0.c +++ b/libc/tinymath/j0.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("freebsd_libm_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/e_j0.c */ /* diff --git a/libc/tinymath/j0f.c b/libc/tinymath/j0f.c index 6723ab68b..80c663da0 100644 --- a/libc/tinymath/j0f.c +++ b/libc/tinymath/j0f.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("freebsd_libm_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/e_j0f.c */ /* diff --git a/libc/tinymath/j1.c b/libc/tinymath/j1.c index bd3240687..25b04ce1c 100644 --- a/libc/tinymath/j1.c +++ b/libc/tinymath/j1.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("freebsd_libm_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/e_j1.c */ /* diff --git a/libc/tinymath/j1f.c b/libc/tinymath/j1f.c index 8b86031f3..0b016b683 100644 --- a/libc/tinymath/j1f.c +++ b/libc/tinymath/j1f.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("freebsd_libm_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/e_j1f.c */ /* diff --git a/libc/tinymath/jn.c b/libc/tinymath/jn.c index 5df41b18b..2b5213e21 100644 --- a/libc/tinymath/jn.c +++ b/libc/tinymath/jn.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("freebsd_libm_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/e_jn.c */ /* diff --git a/libc/tinymath/jnf.c b/libc/tinymath/jnf.c index ff33eb8ac..09f67338d 100644 --- a/libc/tinymath/jnf.c +++ b/libc/tinymath/jnf.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("freebsd_libm_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */ /* diff --git a/libc/tinymath/kcos.c b/libc/tinymath/kcos.c index f2467a0b7..a80529aec 100644 --- a/libc/tinymath/kcos.c +++ b/libc/tinymath/kcos.c @@ -27,15 +27,9 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */ /* diff --git a/libc/tinymath/kcosl.c b/libc/tinymath/kcosl.c index 8d54f5fcb..6563de59b 100644 --- a/libc/tinymath/kcosl.c +++ b/libc/tinymath/kcosl.c @@ -27,18 +27,10 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/ld80/k_cosl.c */ /* origin: FreeBSD /usr/src/lib/msun/ld128/k_cosl.c */ diff --git a/libc/tinymath/kexpl.c b/libc/tinymath/kexpl.c index 5dc19b613..d02bf2aaa 100644 --- a/libc/tinymath/kexpl.c +++ b/libc/tinymath/kexpl.c @@ -37,11 +37,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/freebsd.internal.h" +__static_yoink("freebsd_libm_notice"); -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -// clang-format off /* * ld128 version of k_expl.h. See ../ld80/s_expl.c for most comments. diff --git a/libc/tinymath/ksin.c b/libc/tinymath/ksin.c index 2a61bcbac..080382c19 100644 --- a/libc/tinymath/ksin.c +++ b/libc/tinymath/ksin.c @@ -27,15 +27,9 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */ /* diff --git a/libc/tinymath/ksinl.c b/libc/tinymath/ksinl.c index c430c0322..bce105867 100644 --- a/libc/tinymath/ksinl.c +++ b/libc/tinymath/ksinl.c @@ -28,17 +28,9 @@ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); /* origin: FreeBSD /usr/src/lib/msun/ld80/k_sinl.c */ /* origin: FreeBSD /usr/src/lib/msun/ld128/k_sinl.c */ diff --git a/libc/tinymath/ktan.c b/libc/tinymath/ktan.c index 695c6b70e..02ca7daa6 100644 --- a/libc/tinymath/ktan.c +++ b/libc/tinymath/ktan.c @@ -27,15 +27,9 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */ /* diff --git a/libc/tinymath/ktanl.c b/libc/tinymath/ktanl.c index cbe9228b8..f7b618bc2 100644 --- a/libc/tinymath/ktanl.c +++ b/libc/tinymath/ktanl.c @@ -27,19 +27,12 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" -#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); + +#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) /* origin: FreeBSD /usr/src/lib/msun/ld80/k_tanl.c */ /* origin: FreeBSD /usr/src/lib/msun/ld128/k_tanl.c */ diff --git a/libc/tinymath/lgamma_r.c b/libc/tinymath/lgamma_r.c index 356155bca..4632b144c 100644 --- a/libc/tinymath/lgamma_r.c +++ b/libc/tinymath/lgamma_r.c @@ -27,15 +27,9 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */ /* diff --git a/libc/tinymath/lgammaf_r.c b/libc/tinymath/lgammaf_r.c index f51eeea00..ee6b7a286 100644 --- a/libc/tinymath/lgammaf_r.c +++ b/libc/tinymath/lgammaf_r.c @@ -27,15 +27,9 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */ /* diff --git a/libc/tinymath/lgammal.c b/libc/tinymath/lgammal.c index de54194b7..ec3309acb 100644 --- a/libc/tinymath/lgammal.c +++ b/libc/tinymath/lgammal.c @@ -29,15 +29,9 @@ #include "libc/tinymath/kernel.internal.h" #include "libc/tinymath/ldshape.internal.h" #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 +__static_yoink("musl_libc_notice"); +__static_yoink("openbsd_libm_notice"); -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_lgammal.c */ /* diff --git a/libc/tinymath/log.c b/libc/tinymath/log.c index 61b13ef62..e793dd923 100644 --- a/libc/tinymath/log.c +++ b/libc/tinymath/log.c @@ -29,12 +29,7 @@ #include "libc/math.h" #include "libc/tinymath/internal.h" #include "libc/tinymath/log_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Optimized Routines (MIT License)\\n\ -Copyright 2022 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("arm_optimized_routines_notice"); /* * Double-precision log(x) function. diff --git a/libc/tinymath/log10.c b/libc/tinymath/log10.c index 3c3f19650..e073c1e38 100644 --- a/libc/tinymath/log10.c +++ b/libc/tinymath/log10.c @@ -30,15 +30,9 @@ #include "libc/tinymath/complex.internal.h" #include "libc/tinymath/internal.h" #include "libc/tinymath/log2_data.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */ /* diff --git a/libc/tinymath/log10f.c b/libc/tinymath/log10f.c index e078f8982..90ecb8cea 100644 --- a/libc/tinymath/log10f.c +++ b/libc/tinymath/log10f.c @@ -26,15 +26,9 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/e_log10f.c */ /* diff --git a/libc/tinymath/log10l.c b/libc/tinymath/log10l.c index 04f658b86..35de4cc8b 100644 --- a/libc/tinymath/log10l.c +++ b/libc/tinymath/log10l.c @@ -28,15 +28,9 @@ #include "libc/math.h" #include "libc/tinymath/internal.h" #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 +__static_yoink("musl_libc_notice"); +__static_yoink("openbsd_libm_notice"); -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_log10l.c */ /* diff --git a/libc/tinymath/log1p.c b/libc/tinymath/log1p.c index 3d6883422..97e9b51c6 100644 --- a/libc/tinymath/log1p.c +++ b/libc/tinymath/log1p.c @@ -29,12 +29,8 @@ #include "libc/math.h" #include "libc/tinymath/internal.h" #include "libc/tinymath/log_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("freebsd_libm_notice"); +__static_yoink("fdlibm_notice"); /* origin: FreeBSD /usr/src/lib/msun/src/s_log1p.c */ /* diff --git a/libc/tinymath/log1pf.c b/libc/tinymath/log1pf.c index 0f1b867f9..59fe2aace 100644 --- a/libc/tinymath/log1pf.c +++ b/libc/tinymath/log1pf.c @@ -29,12 +29,7 @@ #include "libc/math.h" #include "libc/tinymath/internal.h" #include "libc/tinymath/log1pf_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Optimized Routines (MIT License)\\n\ -Copyright 2022 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("arm_optimized_routines_notice"); #define Ln2 (0x1.62e43p-1f) #define SignMask (0x80000000) diff --git a/libc/tinymath/log1pf_data.c b/libc/tinymath/log1pf_data.c index eea85b405..abcc21bef 100644 --- a/libc/tinymath/log1pf_data.c +++ b/libc/tinymath/log1pf_data.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/log1pf_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Optimized Routines (MIT License)\\n\ -Copyright 2022 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("arm_optimized_routines_notice"); /* Polynomial coefficients generated using floating-point minimax algorithm, see tools/log1pf.sollya for details. */ diff --git a/libc/tinymath/log1pl.c b/libc/tinymath/log1pl.c index 42f5a3b17..784feab56 100644 --- a/libc/tinymath/log1pl.c +++ b/libc/tinymath/log1pl.c @@ -29,15 +29,8 @@ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 - -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("openbsd_libm_notice"); /* origin: OpenBSD /usr/src/lib/libm/src/ld80/s_log1pl.c */ /* diff --git a/libc/tinymath/log2.c b/libc/tinymath/log2.c index d1d4d40f9..29a774c56 100644 --- a/libc/tinymath/log2.c +++ b/libc/tinymath/log2.c @@ -2,8 +2,8 @@ │ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 1999-2022, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -30,12 +30,7 @@ #include "libc/tinymath/complex.internal.h" #include "libc/tinymath/internal.h" #include "libc/tinymath/log2_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("arm_optimized_routines_notice"); /* * Double-precision log2(x) function. diff --git a/libc/tinymath/log2_data.c b/libc/tinymath/log2_data.c index 259ffd0db..2ecd1da72 100644 --- a/libc/tinymath/log2_data.c +++ b/libc/tinymath/log2_data.c @@ -27,12 +27,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/log2_data.internal.h" -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ - /* * Data for log2. * diff --git a/libc/tinymath/log2f.c b/libc/tinymath/log2f.c index 118a02d29..d7cc7c88e 100644 --- a/libc/tinymath/log2f.c +++ b/libc/tinymath/log2f.c @@ -2,8 +2,8 @@ │ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 1999-2022, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -30,12 +30,7 @@ #include "libc/tinymath/complex.internal.h" #include "libc/tinymath/internal.h" #include "libc/tinymath/log2f_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("arm_optimized_routines_notice"); /* * Single-precision log2 function. diff --git a/libc/tinymath/log2f_data.c b/libc/tinymath/log2f_data.c index 783213a91..bd04b407d 100644 --- a/libc/tinymath/log2f_data.c +++ b/libc/tinymath/log2f_data.c @@ -2,8 +2,8 @@ │ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 1999-2022, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/log2f_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("arm_optimized_routines_notice"); /* * Data definition for log2f. diff --git a/libc/tinymath/log2l.c b/libc/tinymath/log2l.c index fd1489688..6ad25ef6c 100644 --- a/libc/tinymath/log2l.c +++ b/libc/tinymath/log2l.c @@ -29,15 +29,8 @@ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 - -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("openbsd_libm_notice"); /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_log2l.c */ /* diff --git a/libc/tinymath/log_data.c b/libc/tinymath/log_data.c index be87c4105..0dca131ae 100644 --- a/libc/tinymath/log_data.c +++ b/libc/tinymath/log_data.c @@ -2,8 +2,8 @@ │ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 1999-2022, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/log_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("arm_optimized_routines_notice"); /* * Data for log. diff --git a/libc/tinymath/logf.c b/libc/tinymath/logf.c index 2bce5c3a3..c25d898b4 100644 --- a/libc/tinymath/logf.c +++ b/libc/tinymath/logf.c @@ -29,12 +29,7 @@ #include "libc/math.h" #include "libc/tinymath/internal.h" #include "libc/tinymath/logf_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Optimized Routines (MIT License)\\n\ -Copyright 2022 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("arm_optimized_routines_notice"); /* * Single-precision log function. diff --git a/libc/tinymath/logf_data.c b/libc/tinymath/logf_data.c index 504d50a26..cd8f944fe 100644 --- a/libc/tinymath/logf_data.c +++ b/libc/tinymath/logf_data.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/logf_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("arm_optimized_routines_notice"); /* * Data definition for logf. diff --git a/libc/tinymath/logl.c b/libc/tinymath/logl.c index b6f269c45..6f4c36d16 100644 --- a/libc/tinymath/logl.c +++ b/libc/tinymath/logl.c @@ -28,15 +28,8 @@ #include "libc/math.h" #include "libc/tinymath/internal.h" #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 - -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("openbsd_libm_notice"); /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_logl.c */ /* diff --git a/libc/tinymath/loglq.c b/libc/tinymath/loglq.c index 0d2dbdcfb..5af871c9c 100644 --- a/libc/tinymath/loglq.c +++ b/libc/tinymath/loglq.c @@ -29,13 +29,9 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/freebsd.internal.h" +__static_yoink("freebsd_libm_notice"); #if LDBL_MANT_DIG == 113 -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Implementation of the natural logarithm of x for 128-bit format. diff --git a/libc/tinymath/modf.c b/libc/tinymath/modf.c index 3047590c4..2701093c1 100644 --- a/libc/tinymath/modf.c +++ b/libc/tinymath/modf.c @@ -26,12 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Returns fractional part of 𝑥. diff --git a/libc/tinymath/modff.c b/libc/tinymath/modff.c index 99e60840c..720f05e80 100644 --- a/libc/tinymath/modff.c +++ b/libc/tinymath/modff.c @@ -26,12 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off float modff(float x, float *iptr) { diff --git a/libc/tinymath/modfl.c b/libc/tinymath/modfl.c index 9b430c646..0d3f585ec 100644 --- a/libc/tinymath/modfl.c +++ b/libc/tinymath/modfl.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off static const long double toint = 1/LDBL_EPSILON; diff --git a/libc/tinymath/nextafter.c b/libc/tinymath/nextafter.c index f041bf0f3..a1da2b40a 100644 --- a/libc/tinymath/nextafter.c +++ b/libc/tinymath/nextafter.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off double nextafter(double x, double y) { diff --git a/libc/tinymath/nextafterf.c b/libc/tinymath/nextafterf.c index 80e884413..82bb5bd48 100644 --- a/libc/tinymath/nextafterf.c +++ b/libc/tinymath/nextafterf.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off float nextafterf(float x, float y) { diff --git a/libc/tinymath/nextafterl.c b/libc/tinymath/nextafterl.c index e85824851..c213e93ff 100644 --- a/libc/tinymath/nextafterl.c +++ b/libc/tinymath/nextafterl.c @@ -30,12 +30,8 @@ #include "libc/tinymath/internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off long double nextafterl(long double x, long double y) { diff --git a/libc/tinymath/nexttoward.c b/libc/tinymath/nexttoward.c index c7a16bd50..d451122de 100644 --- a/libc/tinymath/nexttoward.c +++ b/libc/tinymath/nexttoward.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off double nexttoward(double x, long double y) { diff --git a/libc/tinymath/nexttowardf.c b/libc/tinymath/nexttowardf.c index da5eb59ba..470c27edd 100644 --- a/libc/tinymath/nexttowardf.c +++ b/libc/tinymath/nexttowardf.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off float nexttowardf(float x, long double y) { diff --git a/libc/tinymath/polevll.c b/libc/tinymath/polevll.c index aac4505d8..92356ca36 100644 --- a/libc/tinymath/polevll.c +++ b/libc/tinymath/polevll.c @@ -26,15 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("openbsd_libm_notice"); /* origin: OpenBSD /usr/src/lib/libm/src/polevll.c */ /* diff --git a/libc/tinymath/pow.c b/libc/tinymath/pow.c index 4e223fb54..abca8c6d5 100644 --- a/libc/tinymath/pow.c +++ b/libc/tinymath/pow.c @@ -30,12 +30,7 @@ #include "libc/tinymath/exp_data.internal.h" #include "libc/tinymath/internal.h" #include "libc/tinymath/pow_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Optimized Routines (MIT License)\\n\ -Copyright 2022 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("arm_optimized_routines_notice"); /* * Double-precision x^y function. diff --git a/libc/tinymath/pow_data.c b/libc/tinymath/pow_data.c index 43dc5d008..ce804633e 100644 --- a/libc/tinymath/pow_data.c +++ b/libc/tinymath/pow_data.c @@ -2,8 +2,8 @@ │ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 1999-2022, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/pow_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("arm_optimized_routines_notice"); /* * Data for the log part of pow. diff --git a/libc/tinymath/powf.c b/libc/tinymath/powf.c index 2ad8f7f63..aeab1bbb7 100644 --- a/libc/tinymath/powf.c +++ b/libc/tinymath/powf.c @@ -2,8 +2,8 @@ │ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 1999-2022, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -31,12 +31,7 @@ #include "libc/tinymath/exp_data.internal.h" #include "libc/tinymath/internal.h" #include "libc/tinymath/powf_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("arm_optimized_routines_notice"); /* * Copyright (c) 2017-2018, Arm Limited. diff --git a/libc/tinymath/powf_data.c b/libc/tinymath/powf_data.c index 0ef109ad6..77cd461f1 100644 --- a/libc/tinymath/powf_data.c +++ b/libc/tinymath/powf_data.c @@ -2,8 +2,8 @@ │ vi: set noet ft=c ts=8 sw=8 fenc=utf-8 :vi │ ╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Musl Libc │ -│ Copyright © 2005-2014 Rich Felker, et al. │ +│ Optimized Routines │ +│ Copyright (c) 1999-2022, Arm Limited. │ │ │ │ Permission is hereby granted, free of charge, to any person obtaining │ │ a copy of this software and associated documentation files (the │ @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/powf_data.internal.h" - -asm(".ident\t\"\\n\\n\ -Double-precision math functions (MIT License)\\n\ -Copyright 2018 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("arm_optimized_routines_notice"); /* * Data definition for powf. diff --git a/libc/tinymath/powl.c b/libc/tinymath/powl.c index cb53b0e2c..6de2ae99a 100644 --- a/libc/tinymath/powl.c +++ b/libc/tinymath/powl.c @@ -96,15 +96,8 @@ long double powl(long double x, long double y) { } #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 - -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); +__static_yoink("openbsd_libm_notice"); /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */ /* @@ -616,10 +609,6 @@ static long double powil(long double x, int nn) #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 #include "libc/tinymath/freebsd.internal.h" -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); - /*- * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. diff --git a/libc/tinymath/poz.c b/libc/tinymath/poz.c index 4599ab71a..09c4a0476 100644 --- a/libc/tinymath/poz.c +++ b/libc/tinymath/poz.c @@ -1,7 +1,6 @@ /*-*- mode:c;indent-tabs-mode:t;c-basic-offset:4;tab-width:4;coding:utf-8 -*-│ │ vi: set noet ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi │ ╚─────────────────────────────────────────────────────────────────────────────*/ -/* clang-format off */ /* Compute probability of measured Chi Square value. diff --git a/libc/tinymath/randtest.c b/libc/tinymath/randtest.c index ae96db067..10dc8d5c4 100644 --- a/libc/tinymath/randtest.c +++ b/libc/tinymath/randtest.c @@ -1,4 +1,3 @@ -/* clang-format off */ /* Apply various randomness tests to a stream of bytes diff --git a/libc/tinymath/remainderf.c b/libc/tinymath/remainderf.c index b02a9da9c..f403db37e 100644 --- a/libc/tinymath/remainderf.c +++ b/libc/tinymath/remainderf.c @@ -19,7 +19,6 @@ #include "libc/math.h" #include "libc/tinymath/freebsd.internal.h" -// clang-format off /** * Returns remainder of dividing 𝑥 by 𝑦. diff --git a/libc/tinymath/rempio2.c b/libc/tinymath/rempio2.c index 7337b0684..09565bcd7 100644 --- a/libc/tinymath/rempio2.c +++ b/libc/tinymath/rempio2.c @@ -29,15 +29,9 @@ #include "libc/math.h" #include "libc/runtime/fenv.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ /* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c */ /* diff --git a/libc/tinymath/rempio2f.c b/libc/tinymath/rempio2f.c index 86ed07f49..b7ea0658b 100644 --- a/libc/tinymath/rempio2f.c +++ b/libc/tinymath/rempio2f.c @@ -29,15 +29,9 @@ #include "libc/math.h" #include "libc/runtime/fenv.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ /* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2f.c */ /* diff --git a/libc/tinymath/rempio2l.c b/libc/tinymath/rempio2l.c index 5648b89aa..818989d80 100644 --- a/libc/tinymath/rempio2l.c +++ b/libc/tinymath/rempio2l.c @@ -29,19 +29,13 @@ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" #include "libc/tinymath/ldshape.internal.h" + +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); + #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/ld80/e_rem_pio2.c */ /* diff --git a/libc/tinymath/rempio2large.c b/libc/tinymath/rempio2large.c index 4f2b2ad22..7dce93dab 100644 --- a/libc/tinymath/rempio2large.c +++ b/libc/tinymath/rempio2large.c @@ -27,17 +27,11 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */ /* diff --git a/libc/tinymath/remquo.c b/libc/tinymath/remquo.c index 975451a21..156a1e320 100644 --- a/libc/tinymath/remquo.c +++ b/libc/tinymath/remquo.c @@ -26,12 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Computes remainder and part of quotient. diff --git a/libc/tinymath/remquof.c b/libc/tinymath/remquof.c index ca4fabcf4..a5eddfdb3 100644 --- a/libc/tinymath/remquof.c +++ b/libc/tinymath/remquof.c @@ -26,12 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Computes remainder and part of quotient. diff --git a/libc/tinymath/remquol.c b/libc/tinymath/remquol.c index e5c196fa4..4e86521ee 100644 --- a/libc/tinymath/remquol.c +++ b/libc/tinymath/remquol.c @@ -28,12 +28,8 @@ #include "libc/math.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Computes remainder and part of quotient. diff --git a/libc/tinymath/rint.c b/libc/tinymath/rint.c index dd1732d18..f8cfc91f5 100644 --- a/libc/tinymath/rint.c +++ b/libc/tinymath/rint.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/runtime/fenv.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 #define EPS DBL_EPSILON diff --git a/libc/tinymath/rintf.c b/libc/tinymath/rintf.c index d9270e6c4..abef412d7 100644 --- a/libc/tinymath/rintf.c +++ b/libc/tinymath/rintf.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/runtime/fenv.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off #if FLT_EVAL_METHOD==0 #define EPS FLT_EPSILON diff --git a/libc/tinymath/rintl.c b/libc/tinymath/rintl.c index b9c2e9bce..ba6324740 100644 --- a/libc/tinymath/rintl.c +++ b/libc/tinymath/rintl.c @@ -28,12 +28,8 @@ #include "libc/math.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Rounds to integer in current rounding mode. diff --git a/libc/tinymath/round.c b/libc/tinymath/round.c index d5966f9eb..800df248f 100644 --- a/libc/tinymath/round.c +++ b/libc/tinymath/round.c @@ -28,12 +28,8 @@ #include "libc/math.h" #include "libc/runtime/fenv.h" #include "libc/tinymath/internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 #define EPS DBL_EPSILON diff --git a/libc/tinymath/roundf.c b/libc/tinymath/roundf.c index e84568cee..4eac941c3 100644 --- a/libc/tinymath/roundf.c +++ b/libc/tinymath/roundf.c @@ -28,12 +28,7 @@ #include "libc/math.h" #include "libc/runtime/fenv.h" #include "libc/tinymath/internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("musl_libc_notice"); #if FLT_EVAL_METHOD==0 #define EPS FLT_EPSILON diff --git a/libc/tinymath/roundl.c b/libc/tinymath/roundl.c index 2eb2bd7d0..3f2518945 100644 --- a/libc/tinymath/roundl.c +++ b/libc/tinymath/roundl.c @@ -29,12 +29,8 @@ #include "libc/tinymath/internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Rounds 𝑥 to nearest integer, away from zero. diff --git a/libc/tinymath/scalb.c b/libc/tinymath/scalb.c index f8137e3a2..73048c389 100644 --- a/libc/tinymath/scalb.c +++ b/libc/tinymath/scalb.c @@ -26,15 +26,9 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/e_scalb.c */ /* diff --git a/libc/tinymath/scalbf.c b/libc/tinymath/scalbf.c index e71d1e719..e3f0eb8f9 100644 --- a/libc/tinymath/scalbf.c +++ b/libc/tinymath/scalbf.c @@ -26,15 +26,9 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off float scalbf(float x, float fn) { diff --git a/libc/tinymath/sin.c b/libc/tinymath/sin.c index 362e1a5bb..468e93128 100644 --- a/libc/tinymath/sin.c +++ b/libc/tinymath/sin.c @@ -28,15 +28,9 @@ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ /* diff --git a/libc/tinymath/sincos.c b/libc/tinymath/sincos.c index 69fcec219..934123dc2 100644 --- a/libc/tinymath/sincos.c +++ b/libc/tinymath/sincos.c @@ -29,15 +29,9 @@ #include "libc/runtime/runtime.h" #include "libc/tinymath/feval.internal.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ /* diff --git a/libc/tinymath/sincosf.c b/libc/tinymath/sincosf.c index 33371598d..cc0a52d55 100644 --- a/libc/tinymath/sincosf.c +++ b/libc/tinymath/sincosf.c @@ -28,12 +28,7 @@ #include "libc/intrin/likely.h" #include "libc/math.h" #include "libc/tinymath/sincosf.internal.h" - -asm(".ident\t\"\\n\\n\ -Optimized Routines (MIT License)\\n\ -Copyright 2022 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("arm_optimized_routines_notice"); /* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative error is 0.5303 * 2^-23. A single-step range reduction is used for diff --git a/libc/tinymath/sincosf.internal.h b/libc/tinymath/sincosf.internal.h index e07239828..57a596c98 100644 --- a/libc/tinymath/sincosf.internal.h +++ b/libc/tinymath/sincosf.internal.h @@ -2,7 +2,6 @@ #define COSMOPOLITAN_LIBC_TINYMATH_SINCOSF_INTERNAL_H_ #include "libc/tinymath/internal.h" COSMOPOLITAN_C_START_ -// clang-format off /* * Header for sinf, cosf and sincosf. diff --git a/libc/tinymath/sincosf_data.c b/libc/tinymath/sincosf_data.c index d90de5455..61a9ff067 100644 --- a/libc/tinymath/sincosf_data.c +++ b/libc/tinymath/sincosf_data.c @@ -26,12 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/sincosf.internal.h" - -asm(".ident\t\"\\n\\n\ -Optimized Routines (MIT License)\\n\ -Copyright 2022 ARM Limited\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ +__static_yoink("arm_optimized_routines_notice"); /* The constants and polynomials for sine and cosine. The 2nd entry computes -cos (x) rather than cos (x) to get negation for free. */ diff --git a/libc/tinymath/sincosl.c b/libc/tinymath/sincosl.c index 8cc1c3d2b..cb4d03a49 100644 --- a/libc/tinymath/sincosl.c +++ b/libc/tinymath/sincosl.c @@ -30,12 +30,8 @@ #include "libc/tinymath/kernel.internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Returns sine and cosine of 𝑥. diff --git a/libc/tinymath/sindf.c b/libc/tinymath/sindf.c index 06a4196f1..60608e0c1 100644 --- a/libc/tinymath/sindf.c +++ b/libc/tinymath/sindf.c @@ -27,15 +27,9 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */ /* diff --git a/libc/tinymath/sinf.c b/libc/tinymath/sinf.c index 81d3218d2..14f8de8a5 100644 --- a/libc/tinymath/sinf.c +++ b/libc/tinymath/sinf.c @@ -29,15 +29,9 @@ #include "libc/tinymath/complex.internal.h" #include "libc/tinymath/feval.internal.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ /* diff --git a/libc/tinymath/sinh.c b/libc/tinymath/sinh.c index d740da683..6c3b9975c 100644 --- a/libc/tinymath/sinh.c +++ b/libc/tinymath/sinh.c @@ -28,12 +28,8 @@ #include "libc/math.h" #include "libc/tinymath/expo.internal.h" #include "libc/tinymath/freebsd.internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Returns hyperbolic sine of 𝑥. diff --git a/libc/tinymath/sinhf.c b/libc/tinymath/sinhf.c index 75fcb49c6..1f1e359f3 100644 --- a/libc/tinymath/sinhf.c +++ b/libc/tinymath/sinhf.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/expo.internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Returns hyperbolic sine of 𝑥. diff --git a/libc/tinymath/sinhl.c b/libc/tinymath/sinhl.c index 372038ada..bb642a6df 100644 --- a/libc/tinymath/sinhl.c +++ b/libc/tinymath/sinhl.c @@ -39,16 +39,11 @@ #include "libc/intrin/likely.h" #include "libc/math.h" #include "libc/tinymath/freebsd.internal.h" -#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); + +#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) #if LDBL_MAX_EXP != 0x4000 /* We also require the usual expsign encoding. */ diff --git a/libc/tinymath/sinl.c b/libc/tinymath/sinl.c index b65e21e4b..2d87d6b3f 100644 --- a/libc/tinymath/sinl.c +++ b/libc/tinymath/sinl.c @@ -30,12 +30,8 @@ #include "libc/tinymath/kernel.internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Returns sine of 𝑥. diff --git a/libc/tinymath/sqrt.c b/libc/tinymath/sqrt.c index b6e1cd215..1d23a9fd7 100644 --- a/libc/tinymath/sqrt.c +++ b/libc/tinymath/sqrt.c @@ -28,12 +28,8 @@ #include "libc/intrin/likely.h" #include "libc/math.h" #include "libc/tinymath/internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off #define FENV_SUPPORT 1 diff --git a/libc/tinymath/sqrt_data.c b/libc/tinymath/sqrt_data.c index 9850440cd..c0e9e6f75 100644 --- a/libc/tinymath/sqrt_data.c +++ b/libc/tinymath/sqrt_data.c @@ -26,12 +26,8 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/tinymath/internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off const uint16_t __rsqrt_tab[128] = { 0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43, diff --git a/libc/tinymath/sqrtf.c b/libc/tinymath/sqrtf.c index 21374d03f..f37ecbc12 100644 --- a/libc/tinymath/sqrtf.c +++ b/libc/tinymath/sqrtf.c @@ -28,12 +28,8 @@ #include "libc/intrin/likely.h" #include "libc/math.h" #include "libc/tinymath/internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off #define FENV_SUPPORT 1 diff --git a/libc/tinymath/sqrtl.c b/libc/tinymath/sqrtl.c index 4bf9d7df2..335d40f4d 100644 --- a/libc/tinymath/sqrtl.c +++ b/libc/tinymath/sqrtl.c @@ -30,12 +30,8 @@ #include "libc/tinymath/internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off #define FENV_SUPPORT 1 diff --git a/libc/tinymath/tan.c b/libc/tinymath/tan.c index 0d5a45f73..92d072404 100644 --- a/libc/tinymath/tan.c +++ b/libc/tinymath/tan.c @@ -28,15 +28,9 @@ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/s_tan.c */ /* diff --git a/libc/tinymath/tandf.c b/libc/tinymath/tandf.c index 93b79289b..bff4e4adb 100644 --- a/libc/tinymath/tandf.c +++ b/libc/tinymath/tandf.c @@ -27,15 +27,9 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); +__static_yoink("fdlibm_notice"); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -/* clang-format off */ /* origin: FreeBSD /usr/src/lib/msun/src/k_tanf.c */ /* diff --git a/libc/tinymath/tanf.c b/libc/tinymath/tanf.c index 5fb7c4453..508fec260 100644 --- a/libc/tinymath/tanf.c +++ b/libc/tinymath/tanf.c @@ -28,12 +28,8 @@ #include "libc/math.h" #include "libc/tinymath/complex.internal.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: FreeBSD /usr/src/lib/msun/src/s_tanf.c */ /* diff --git a/libc/tinymath/tanh.c b/libc/tinymath/tanh.c index 043d788f7..760440a1f 100644 --- a/libc/tinymath/tanh.c +++ b/libc/tinymath/tanh.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Returns hyperbolic tangent of 𝑥. diff --git a/libc/tinymath/tanhf.c b/libc/tinymath/tanhf.c index 2b735ff2d..628a4860e 100644 --- a/libc/tinymath/tanhf.c +++ b/libc/tinymath/tanhf.c @@ -27,12 +27,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #include "libc/tinymath/internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Returns hyperbolic tangent of 𝑥. diff --git a/libc/tinymath/tanhl.c b/libc/tinymath/tanhl.c index 295f72de7..d1e8ffb78 100644 --- a/libc/tinymath/tanhl.c +++ b/libc/tinymath/tanhl.c @@ -39,16 +39,11 @@ #include "libc/intrin/likely.h" #include "libc/math.h" #include "libc/tinymath/freebsd.internal.h" -#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) -asm(".ident\t\"\\n\\n\ -FreeBSD libm (BSD-2 License)\\n\ -Copyright (c) 2005-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.\""); -asm(".ident\t\"\\n\\n\ -fdlibm (fdlibm license)\\n\ -Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off +__static_yoink("fdlibm_notice"); +__static_yoink("freebsd_libm_notice"); + +#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) #if LDBL_MAX_EXP != 0x4000 /* We also require the usual expsign encoding. */ diff --git a/libc/tinymath/tanl.c b/libc/tinymath/tanl.c index 9ef8475a7..90b6d69e4 100644 --- a/libc/tinymath/tanl.c +++ b/libc/tinymath/tanl.c @@ -30,12 +30,8 @@ #include "libc/tinymath/kernel.internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Returns tangent of x. diff --git a/libc/tinymath/tgamma.c b/libc/tinymath/tgamma.c index 35369f448..822f43249 100644 --- a/libc/tinymath/tgamma.c +++ b/libc/tinymath/tgamma.c @@ -28,12 +28,8 @@ #include "libc/math.h" #include "libc/tinymath/feval.internal.h" #include "libc/tinymath/kernel.internal.h" +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* "A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964) diff --git a/libc/tinymath/tgammal.c b/libc/tinymath/tgammal.c index ae73f31dd..ee5ffafd1 100644 --- a/libc/tinymath/tgammal.c +++ b/libc/tinymath/tgammal.c @@ -28,15 +28,9 @@ #include "libc/math.h" #include "libc/tinymath/internal.h" #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 +__static_yoink("musl_libc_notice"); +__static_yoink("openbsd_libm_notice"); -asm(".ident\t\"\\n\\n\ -OpenBSD libm (ISC License)\\n\ -Copyright (c) 2008 Stephen L. Moshier \""); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_tgammal.c */ /* diff --git a/libc/tinymath/trunc.c b/libc/tinymath/trunc.c index 0262f4028..3dc404c6d 100644 --- a/libc/tinymath/trunc.c +++ b/libc/tinymath/trunc.c @@ -30,12 +30,8 @@ #ifndef __llvm__ #include "third_party/intel/smmintrin.internal.h" #endif +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Rounds to integer, towards zero. diff --git a/libc/tinymath/truncf.c b/libc/tinymath/truncf.c index 3f85aef61..906e00b0a 100644 --- a/libc/tinymath/truncf.c +++ b/libc/tinymath/truncf.c @@ -30,12 +30,8 @@ #ifndef __llvm__ #include "third_party/intel/smmintrin.internal.h" #endif +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Rounds to integer, towards zero. diff --git a/libc/tinymath/truncl.c b/libc/tinymath/truncl.c index 52f8586ca..55b305294 100644 --- a/libc/tinymath/truncl.c +++ b/libc/tinymath/truncl.c @@ -29,12 +29,8 @@ #include "libc/tinymath/internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) +__static_yoink("musl_libc_notice"); -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); -// clang-format off /** * Rounds to integer, towards zero. diff --git a/net/http/base32.c b/net/http/base32.c index 56c56c9ef..6e7963d38 100644 --- a/net/http/base32.c +++ b/net/http/base32.c @@ -21,15 +21,11 @@ #include "libc/mem/mem.h" #include "libc/str/str.h" -asm(".ident\t\"\\n\\n\ -Apache License, Version 2.0\\n\ -Copyright 2010 Google Inc.\""); -asm(".include \"libc/disclaimer.inc\""); - const char base32def[] = "0123456789abcdefghjkmnpqrstvwxyz"; int tobits(int b) { - int bits = 0; while (b && (b >>= 1)) bits++; + int bits = 0; + while (b && (b >>= 1)) bits++; return bits; } @@ -48,20 +44,19 @@ int tobits(int b) { * @param ol if non-NULL receives output length * @return allocated NUL-terminated buffer, or NULL w/ errno */ -char* EncodeBase32(const char *s, size_t sl, - const char *a, size_t al, +char *EncodeBase32(const char *s, size_t sl, const char *a, size_t al, size_t *ol) { size_t count = 0; char *r = NULL; if (sl == -1) sl = s ? strlen(s) : 0; if (al == 0) { a = base32def; - al = sizeof(base32def)/sizeof(a[0]); + al = sizeof(base32def) / sizeof(a[0]); } unassert(2 <= al && al <= 128); int bl = tobits(al); int mask = (1 << bl) - 1; - size_t n = (sl * 8 + bl - 1) / bl; // calculate output length + size_t n = (sl * 8 + bl - 1) / bl; // calculate output length if ((r = malloc(n + 1))) { int buffer = s[0]; size_t next = 1; @@ -92,11 +87,11 @@ static signed char kBase32[256] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1, // 0x00 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x10 -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, // 0x20 - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 0x30 - -1, 10, 11, 12, 13, 14, 15, 16, 17, 1, 18, 19, 1, 20, 21, -1, // 0x40 - 22, 23, 24, 25, 26, 0, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, // 0x50 - -1, 10, 11, 12, 13, 14, 15, 16, 17, 1, 18, 19, 1, 20, 21, -1, // 0x60 - 22, 23, 24, 25, 26, 0, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, // 0x70 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 0x30 + -1, 10, 11, 12, 13, 14, 15, 16, 17, 1, 18, 19, 1, 20, 21, -1, // 0x40 + 22, 23, 24, 25, 26, 0, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, // 0x50 + -1, 10, 11, 12, 13, 14, 15, 16, 17, 1, 18, 19, 1, 20, 21, -1, // 0x60 + 22, 23, 24, 25, 26, 0, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, // 0x70 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x80 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x90 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xa0 @@ -120,19 +115,18 @@ static signed char kBase32[256] = { * @param ol if non-NULL receives output length * @return allocated NUL-terminated buffer, or NULL w/ errno */ -char* DecodeBase32(const char *s, size_t sl, - const char *a, size_t al, +char *DecodeBase32(const char *s, size_t sl, const char *a, size_t al, size_t *ol) { size_t count = 0; char *r = NULL; if (sl == -1) sl = s ? strlen(s) : 0; if (al == 0) { a = base32def; - al = sizeof(base32def)/sizeof(a[0]); + al = sizeof(base32def) / sizeof(a[0]); } unassert(2 <= al && al <= 128); int bl = tobits(al); - size_t n = (sl * bl + 1) / 8 + 1; // calculate output length + size_t n = (sl * bl + 1) / 8 + 1; // calculate output length // process input if ((r = malloc(n + 1))) { unsigned int buffer = 0; diff --git a/test/libc/calls/dup_test.c b/test/libc/calls/dup_test.c index c8cb7098a..cad66f18e 100644 --- a/test/libc/calls/dup_test.c +++ b/test/libc/calls/dup_test.c @@ -40,7 +40,8 @@ void SetUpOnce(void) { testlib_enable_tmp_setup_teardown(); } -static textstartup void TestInit(int argc, char **argv) { +__attribute__((__constructor__)) static textstartup void TestInit(int argc, + char **argv) { int fd; if (argc == 2 && !strcmp(argv[1], "boop")) { if ((fd = open("/dev/null", O_RDWR | O_CLOEXEC)) == 3) { @@ -51,8 +52,6 @@ static textstartup void TestInit(int argc, char **argv) { } } -const void *const TestCtor[] initarray = {TestInit}; - TEST(dup, ebadf) { ASSERT_SYS(EBADF, -1, dup(-1)); ASSERT_SYS(EBADF, -1, dup2(-1, 0)); diff --git a/test/libc/calls/pledge_test.c b/test/libc/calls/pledge_test.c index 36cdfe0e7..a9a160392 100644 --- a/test/libc/calls/pledge_test.c +++ b/test/libc/calls/pledge_test.c @@ -550,6 +550,7 @@ TEST(pledge, execpromises_reducesAtExecOnLinux) { } TEST(pledge_openbsd, execpromisesIsNull_letsItDoAnything) { + if (IsOpenbsd()) return; // mimmutable() ugh if (!IsOpenbsd()) return; int ws, pid; ASSERT_NE(-1, (pid = fork())); @@ -566,6 +567,7 @@ TEST(pledge_openbsd, execpromisesIsNull_letsItDoAnything) { } TEST(pledge_openbsd, execpromisesIsSuperset_letsItDoAnything) { + if (IsOpenbsd()) return; // mimmutable() ugh if (!IsOpenbsd()) return; int ws, pid; ASSERT_NE(-1, (pid = fork())); @@ -585,6 +587,7 @@ TEST(pledge_linux, execpromisesIsSuperset_notPossible) { } TEST(pledge_openbsd, execpromises_notok) { + if (IsOpenbsd()) return; // mimmutable() ugh int ws, pid; ASSERT_NE(-1, (pid = fork())); if (!pid) { diff --git a/test/libc/calls/readlinkat_test.c b/test/libc/calls/readlinkat_test.c index 8da142e5f..0d3d5a4a3 100644 --- a/test/libc/calls/readlinkat_test.c +++ b/test/libc/calls/readlinkat_test.c @@ -23,7 +23,6 @@ #include "libc/limits.h" #include "libc/log/log.h" #include "libc/mem/gc.h" -#include "libc/mem/gc.h" #include "libc/runtime/symbols.internal.h" #include "libc/str/str.h" #include "libc/sysv/consts/at.h" diff --git a/test/libc/calls/unveil_test.c b/test/libc/calls/unveil_test.c index 1c5767a79..b252fd8ce 100644 --- a/test/libc/calls/unveil_test.c +++ b/test/libc/calls/unveil_test.c @@ -94,6 +94,7 @@ TEST(unveil, api_differences) { } TEST(unveil, rx_readOnlyPreexistingExecutable_worksFine) { + if (IsOpenbsd()) return; // TOOD(jart): why pledge violation? SPAWN(fork); ASSERT_SYS(0, 0, mkdir("folder", 0755)); testlib_extract("/zip/life.elf", "folder/life.elf", 0755); @@ -150,6 +151,7 @@ TEST(unveil, rwc_createExecutableFile_isAllowedButCantBeRun) { } TEST(unveil, rwcx_createExecutableFile_canAlsoBeRun) { + if (IsOpenbsd()) return; // TOOD(jart): why pledge violation? SPAWN(fork); ASSERT_SYS(0, 0, mkdir("folder", 0755)); ASSERT_SYS(0, 0, unveil("folder", "rwcx")); diff --git a/test/libc/mem/realpath_test.c b/test/libc/mem/realpath_test.c index e24060045..a4796d091 100644 --- a/test/libc/mem/realpath_test.c +++ b/test/libc/mem/realpath_test.c @@ -19,6 +19,7 @@ #include "libc/calls/calls.h" #include "libc/dce.h" #include "libc/errno.h" +#include "libc/intrin/strace.internal.h" #include "libc/mem/gc.h" #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" @@ -45,6 +46,8 @@ TEST(realpath, test2) { } TEST(realpath, test3) { + strace_enabled(+1); + ASSERT_TRUE(fileexists("conftest.a")); char *name = gc(realpath("conftest.l/../conftest.a", NULL)); if (IsWindows()) { // WIN32 acts as a flat namespace, rather than linear inode crawl. @@ -54,6 +57,7 @@ TEST(realpath, test3) { // Every other OS FS is a UNIX inode crawl. ASSERT_SYS(ENOTDIR, NULL, name); } + strace_enabled(-1); } TEST(realpath, test4) { diff --git a/test/libc/proc/posix_spawn_test.c b/test/libc/proc/posix_spawn_test.c index d4fe8aa7f..046710146 100644 --- a/test/libc/proc/posix_spawn_test.c +++ b/test/libc/proc/posix_spawn_test.c @@ -36,7 +36,6 @@ #include "libc/intrin/safemacros.internal.h" #include "libc/limits.h" #include "libc/mem/gc.h" -#include "libc/mem/gc.h" #include "libc/mem/mem.h" #include "libc/proc/proc.internal.h" #include "libc/runtime/internal.h" @@ -130,6 +129,7 @@ TEST(posix_spawn, ape) { } TEST(posix_spawn, elf) { + if (IsOpenbsd()) return; // mimmutable() ugh if (IsXnu() || IsWindows() || IsMetal()) return; int ws, pid; char *prog = "./life.elf"; // assimilate -bcef diff --git a/test/libc/release/BUILD.mk b/test/libc/release/BUILD.mk index e98174d41..85da0a377 100644 --- a/test/libc/release/BUILD.mk +++ b/test/libc/release/BUILD.mk @@ -38,10 +38,12 @@ o/$(MODE)/test/libc/release/smoke.o: \ -nostdinc \ -D_COSMO_SOURCE \ -Wl,--gc-sections \ + -z noexecstack \ -fno-omit-frame-pointer \ -include o/cosmopolitan.h \ -Wl,-z,max-page-size=0x1000 \ -Wl,-z,common-page-size=0x1000 \ + -Wl,-z,noexecstack \ $< o/$(MODE)/test/libc/release/smoke.com.dbg: \ @@ -55,6 +57,7 @@ o/$(MODE)/test/libc/release/smoke.com.dbg: \ -no-pie \ -nostdlib \ --gc-sections \ + -z noexecstack \ -z max-page-size=0x1000 \ -z common-page-size=0x1000 \ -T o/$(MODE)/ape/ape.lds \ @@ -75,6 +78,7 @@ o/$(MODE)/test/libc/release/smoke-nms.com.dbg: \ -no-pie \ -nostdlib \ --gc-sections \ + -z noexecstack \ -z max-page-size=0x1000 \ -z common-page-size=0x1000 \ -T o/$(MODE)/ape/ape.lds \ @@ -96,6 +100,7 @@ o/$(MODE)/test/libc/release/smoke-chibicc.com.dbg: \ -no-pie \ -nostdlib \ --gc-sections \ + -z noexecstack \ -z max-page-size=0x1000 \ -z common-page-size=0x1000 \ -T o/$(MODE)/ape/ape.lds \ @@ -138,6 +143,7 @@ o/$(MODE)/test/libc/release/smokecxx.com.dbg: \ -no-pie \ -nostdlib \ --gc-sections \ + -z noexecstack \ -z max-page-size=0x1000 \ -z common-page-size=0x1000 \ -T o/$(MODE)/ape/ape.lds \ @@ -158,6 +164,7 @@ o/$(MODE)/test/libc/release/smokecxx.o: \ -fno-pie \ -nostdinc \ -Wl,--gc-sections \ + -Wl,-z,noexecstack \ -fno-omit-frame-pointer \ -z max-page-size=0x1000 \ -z common-page-size=0x1000 \ @@ -175,6 +182,7 @@ o/$(MODE)/test/libc/release/smokeansi.com.dbg: \ -no-pie \ -nostdlib \ --gc-sections \ + -z noexecstack \ -z max-page-size=0x1000 \ -z common-page-size=0x1000 \ -T o/$(MODE)/ape/ape.lds \ @@ -198,6 +206,7 @@ o/$(MODE)/test/libc/release/smokeansi.o: \ -nostdinc \ -D_COSMO_SOURCE \ -Wl,--gc-sections \ + -Wl,-z,noexecstack \ -fno-omit-frame-pointer \ -include o/cosmopolitan.h \ -Wl,-z,max-page-size=0x1000 \ diff --git a/test/libc/runtime/initorder_test.c b/test/libc/runtime/initorder_test.c new file mode 100644 index 000000000..d8f5a70a5 --- /dev/null +++ b/test/libc/runtime/initorder_test.c @@ -0,0 +1,260 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2024 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/stdio/stdio.h" + +// orders of events +// -4 pc() bios +// -4 ApeLoader() unix +// -3 WinMain() win32 +// -3 _start() unix +// -2 cosmo() x86 +// -2 cosmo2() aarch64 +// -1 _init() x86 +// 0 ASMPREINIT ok +// 1 ASMINIT105 ok +// 2 CTOR110 ok +// 3 ASMINIT115 ok +// 4 CTOR120 ok +// 5 CTOR1000 ok +// 6 CTOR10000 ok +// 7 CTOR40000 ok +// 8 CTOR60000 ok +// 9 ASMCTOR113 ok +// 10 ASMCTOR103 ok +// 11 CTOR65500 ok +// 12 CTOR65534 ok +// 13 CTOR ok +// 14 CTOR65535 ok +// 15 ASMINIT ok +// 16 ASMCTOR ok +// n main() + +#define N 18 + +#define COSMOINIT -1 + +#define CTOR 1 +#define CTOR110 110 +#define CTOR120 120 +#define CTOR1000 1000 +#define CTOR10000 10000 +#define CTOR40000 40000 +#define CTOR60000 60000 +#define CTOR65500 65500 +#define CTOR65534 65534 +#define CTOR65535 65535 + +#define ASMINIT 2 +#define ASMINIT105 105 +#define ASMINIT115 115 + +#define ASMCTOR 3 +#define ASMCTOR103 103 +#define ASMCTOR113 113 + +#define ASMPREINIT 4 +#define ASMPREINIT107 107 +#define ASMPREINIT117 117 + +#ifdef __x86__ +#define GUFF " #" +#else +#define GUFF " //" +#endif + +int event; +int events[N]; + +const char *getname(int x) { + switch (x) { + case 0: + return "0"; + case CTOR: + return "CTOR"; + case CTOR110: + return "CTOR110"; + case CTOR120: + return "CTOR120"; + case CTOR1000: + return "CTOR1000"; + case CTOR10000: + return "CTOR10000"; + case CTOR40000: + return "CTOR40000"; + case CTOR60000: + return "CTOR60000"; + case CTOR65500: + return "CTOR65500"; + case CTOR65534: + return "CTOR65534"; + case CTOR65535: + return "CTOR65535"; + case ASMINIT: + return "ASMINIT"; + case ASMINIT105: + return "ASMINIT105"; + case ASMINIT115: + return "ASMINIT115"; + case ASMCTOR: + return "ASMCTOR"; + case ASMCTOR103: + return "ASMCTOR103"; + case ASMCTOR113: + return "ASMCTOR113"; + case ASMPREINIT: + return "ASMPREINIT"; + case ASMPREINIT107: + return "ASMPREINIT107"; + case ASMPREINIT117: + return "ASMPREINIT117"; + default: + return "???"; + } +} + +__attribute__((__constructor__)) void ctor(void) { + events[event++] = CTOR; +} + +__attribute__((__constructor__(110))) void ctor110(void) { + events[event++] = CTOR110; +} + +__attribute__((__constructor__(120))) void ctor120(void) { + events[event++] = CTOR120; +} + +__attribute__((__constructor__(1000))) void ctor1000(void) { + events[event++] = CTOR1000; +} + +__attribute__((__constructor__(10000))) void ctor10000(void) { + events[event++] = CTOR10000; +} + +__attribute__((__constructor__(40000))) void ctor40000(void) { + events[event++] = CTOR40000; +} + +__attribute__((__constructor__(60000))) void ctor60000(void) { + events[event++] = CTOR60000; +} + +__attribute__((__constructor__(65500))) void ctor65500(void) { + events[event++] = CTOR65500; +} + +__attribute__((__constructor__(65534))) void ctor65534(void) { + events[event++] = CTOR65534; +} + +__attribute__((__constructor__(65535))) void ctor65535(void) { + events[event++] = CTOR65535; +} + +void asminit(void) { + events[event++] = ASMINIT; +} +__attribute__((__section__( + ".init_array,\"aw\",@init_array" GUFF))) void *const kAsminit[] = {asminit}; + +void asminit105(void) { + events[event++] = ASMINIT105; +} +__attribute__((__section__( + ".init_array.105,\"aw\",@init_array" GUFF))) void *const kAsminit105[] = { + asminit105}; + +void asminit115(void) { + events[event++] = ASMINIT115; +} +__attribute__((__section__( + ".init_array.115,\"aw\",@init_array" GUFF))) void *const kAsminit115[] = { + asminit115}; + +void asmpreinit(void) { + events[event++] = ASMPREINIT; +} +__attribute__((__section__( + ".preinit_array,\"a\",@preinit_array" GUFF))) void *const kAsmpreinit[] = { + asmpreinit}; + +void asmpreinit107(void) { + events[event++] = ASMPREINIT107; +} +__attribute__(( + __section__(".preinit_array.107,\"a\",@preinit_array" GUFF))) void + *const kAsmpreinit107[] = {asmpreinit107}; + +void asmctor(void) { + events[event++] = ASMCTOR; +} +__attribute__((__section__( + ".ctors,\"aw\",@init_array" GUFF))) void *const kAsmctor[] = {asmctor}; + +void asmctor103(void) { + events[event++] = ASMCTOR103; +} +__attribute__((__section__( + ".ctors.103,\"aw\",@init_array" GUFF))) void *const kAsmctor103[] = { + asmctor103}; + +void asmctor113(void) { + events[event++] = ASMCTOR113; +} +__attribute__((__section__( + ".ctors.113,\"aw\",@init_array" GUFF))) void *const kAsmctor113[] = { + asmctor113}; + +const int want[N] = { + ASMPREINIT, // + ASMINIT105, // + CTOR110, // + ASMINIT115, // + CTOR120, // + CTOR1000, // + CTOR10000, // + CTOR40000, // + CTOR60000, // + ASMCTOR113, // + ASMCTOR103, // + CTOR65500, // + CTOR65534, // + CTOR, // + CTOR65535, // + ASMINIT, // + ASMCTOR, // +}; + +int main() { + int fails = 0; + printf("\nevents:\n"); + for (int i = 0; i < N; ++i) { + printf("%3d %12s ", i, getname(events[i])); + if (events[i] == want[i]) { + printf("ok"); + } else { + printf("should be %s", getname(want[i])); + ++fails; + } + printf("\n"); + } + printf("\n"); + return fails; +} diff --git a/test/libc/time/strftime_test.c b/test/libc/time/strftime_test.c index 2e37463c0..275c0fb7c 100644 --- a/test/libc/time/strftime_test.c +++ b/test/libc/time/strftime_test.c @@ -23,10 +23,9 @@ #include "libc/time/struct/tm.h" #include "libc/time/time.h" -textstartup static void strftime_test_init(void) { +__attribute__((__constructor__)) void init(void) { setenv("TZ", "GST", true); } -const void *const strftime_test_ctor[] initarray = {strftime_test_init}; char *FormatTime(const char *fmt, struct tm *tm) { static char buf[64]; diff --git a/test/libcxx/openmp_test.cc b/test/libcxx/openmp_test.cc index 7a8e7782c..ecea814ea 100644 --- a/test/libcxx/openmp_test.cc +++ b/test/libcxx/openmp_test.cc @@ -22,6 +22,7 @@ #include #include #include +#include "libc/stdio/rand.h" #define PRECISION 2e-6 #define LV1DCACHE 49152 @@ -63,36 +64,36 @@ void transpose(long m, long n, const TA *A, long lda, TB *B, long ldb) { } // m×k * k×n → m×n -// k×m * k×n → m×n if aT -// m×k * n×k → m×n if bT -// k×m * n×k → m×n if aT and bT +// k×m * k×n → m×n if aᵀ +// m×k * n×k → m×n if bᵀ +// k×m * n×k → m×n if aᵀ and bᵀ template -void dgemm(bool aT, bool bT, long m, long n, long k, float alpha, const TA *A, - long lda, const TB *B, long ldb, float beta, TC *C, long ldc) { +void dgemm(bool aᵀ, bool bᵀ, long m, long n, long k, float α, const TA *A, + long lda, const TB *B, long ldb, float β, TC *C, long ldc) { #pragma omp parallel for collapse(2) if (m * n * k > THRESHOLD) for (long i = 0; i < m; ++i) for (long j = 0; j < n; ++j) { double sum = 0; for (long l = 0; l < k; ++l) - sum = std::fma((aT ? A[lda * l + i] : A[lda * i + l]) * alpha, - (bT ? B[ldb * j + l] : B[ldb * l + j]), sum); - C[ldc * i + j] = beta * C[ldc * i + j] + sum; + sum = std::fma((aᵀ ? A[lda * l + i] : A[lda * i + l]) * α, + (bᵀ ? B[ldb * j + l] : B[ldb * l + j]), sum); + C[ldc * i + j] = C[ldc * i + j] * β + sum; } } template struct Gemmlin { public: - Gemmlin(bool aT, bool bT, float alpha, const TA *A, long lda, const TB *B, - long ldb, float beta, TC *C, long ldc) + Gemmlin(bool aT, bool bT, float α, const TA *A, long lda, const TB *B, + long ldb, float β, TC *C, long ldc) : aT(aT), bT(bT), - alpha(alpha), + α(α), A(A), lda(lda), B(B), ldb(ldb), - beta(beta), + β(β), C(C), ldc(ldc) { } @@ -101,7 +102,7 @@ struct Gemmlin { if (!m || !n) return; for (long i = 0; i < m; ++i) for (long j = 0; j < n; ++j) { - C[ldc * i + j] *= beta; + C[ldc * i + j] *= β; } if (!k) return; cub = sqrt(LV1DCACHE) / sqrt(sizeof(T) * 3); @@ -168,8 +169,8 @@ struct Gemmlin { T Ac[mc / mr][kc][mr]; for (long i = 0; i < mc; ++i) for (long j = 0; j < kc; ++j) - Ac[i / mr][j][i % mr] = alpha * (aT ? A[lda * (pc + j) + (ic + i)] - : A[lda * (ic + i) + (pc + j)]); + Ac[i / mr][j][i % mr] = α * (aT ? A[lda * (pc + j) + (ic + i)] + : A[lda * (ic + i) + (pc + j)]); for (long jc = n0; jc < n; jc += nc) { T Bc[nc / nr][nr][kc]; for (long j = 0; j < nc; ++j) @@ -220,12 +221,12 @@ struct Gemmlin { bool aT; bool bT; - float alpha; + float α; const TA *A; long lda; const TB *B; long ldb; - float beta; + float β; TC *C; long ldc; long ops; @@ -236,9 +237,9 @@ struct Gemmlin { }; template -void sgemm(bool aT, bool bT, long m, long n, long k, float alpha, const TA *A, - long lda, const TB *B, long ldb, float beta, TC *C, long ldc) { - Gemmlin g{aT, bT, alpha, A, lda, B, ldb, beta, C, ldc}; +void sgemm(bool aT, bool bT, long m, long n, long k, float α, const TA *A, + long lda, const TB *B, long ldb, float β, TC *C, long ldc) { + Gemmlin g{aT, bT, α, A, lda, B, ldb, β, C, ldc}; g.gemm(m, n, k); } @@ -360,20 +361,12 @@ long micros(void) { #x); \ } while (0) -unsigned long rando(void) { - static unsigned long s; - unsigned long z = (s += 0x9e3779b97f4a7c15); - z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; - z = (z ^ (z >> 27)) * 0x94d049bb133111eb; - return z ^ (z >> 31); -} - double real01(unsigned long x) { // (0,1) return 1. / 4503599627370496. * ((x >> 12) + .5); } double numba(void) { // (-1,1) - return real01(rando()) * 2 - 1; + return real01(lemur64()) * 2 - 1; } template @@ -390,20 +383,20 @@ void test_gemm(long m, long n, long k) { float *Bt = new float[n * k]; float *C = new float[m * n]; float *GOLD = new float[m * n]; - float alpha = 1; - float beta = 0; + float α = 1; + float β = 0; fill(A, m * k); fill(B, k * n); dgemm(0, 0, m, n, k, 1, A, k, B, n, 0, GOLD, n); transpose(m, k, A, k, At, m); transpose(k, n, B, n, Bt, k); - sgemm(0, 0, m, n, k, alpha, A, k, B, n, beta, C, n); + sgemm(0, 0, m, n, k, α, A, k, B, n, β, C, n); check(PRECISION, m, n, GOLD, n, C, n); - sgemm(1, 0, m, n, k, alpha, At, m, B, n, beta, C, n); + sgemm(1, 0, m, n, k, α, At, m, B, n, β, C, n); check(PRECISION, m, n, GOLD, n, C, n); - sgemm(0, 1, m, n, k, alpha, A, k, Bt, k, beta, C, n); + sgemm(0, 1, m, n, k, α, A, k, Bt, k, β, C, n); check(PRECISION, m, n, GOLD, n, C, n); - sgemm(1, 1, m, n, k, alpha, At, m, Bt, k, beta, C, n); + sgemm(1, 1, m, n, k, α, At, m, Bt, k, β, C, n); check(PRECISION, m, n, GOLD, n, C, n); delete[] GOLD; delete[] C; diff --git a/third_party/aarch64/arm_acle.internal.h b/third_party/aarch64/arm_acle.internal.h index 9c0bd0be6..687b133d2 100644 --- a/third_party/aarch64/arm_acle.internal.h +++ b/third_party/aarch64/arm_acle.internal.h @@ -1,12 +1,45 @@ #if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0) #ifndef _GCC_ARM_ACLE_H #define _GCC_ARM_ACLE_H +#pragma GCC aarch64 "arm_acle.h" #ifdef __cplusplus extern "C" { #endif +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) __extension__ extern __inline TYPE __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) NAME (TYPE __value, uint32_t __rotate) { size_t __size = sizeof (TYPE) * __CHAR_BIT__; __rotate = __rotate % __size; return __value >> __rotate | __value << ((__size - __rotate) % __size); } +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) +#undef _GCC_ARM_ACLE_ROR_FN +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) __extension__ extern __inline RTYPE __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __##NAME (ITYPE __value) { return __builtin_##BUILTIN (__value); } +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int) +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int) +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int) +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int) +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int) +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int) +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, uint32_t) +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, unsigned long) +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, uint64_t) +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t) +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, unsigned long) +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t) +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, int16_t) +_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t, uint32_t) +_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t, uint64_t) +#undef _GCC_ARM_ACLE_DATA_FN +__extension__ extern __inline unsigned long +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__revl (unsigned long __value) +{ + if (sizeof (unsigned long) == 8) + return __revll (__value); + else + return __rev (__value); +} #pragma GCC push_options #pragma GCC target ("arch=armv8.3-a") -__funline int32_t +__extension__ extern __inline int32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __jcvt (double __a) { return __builtin_aarch64_jcvtzs (__a); @@ -14,42 +47,50 @@ __jcvt (double __a) #pragma GCC pop_options #pragma GCC push_options #pragma GCC target ("arch=armv8.5-a") -__funline float +__extension__ extern __inline float +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __rint32zf (float __a) { return __builtin_aarch64_frint32zsf (__a); } -__funline double +__extension__ extern __inline double +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __rint32z (double __a) { return __builtin_aarch64_frint32zdf (__a); } -__funline float +__extension__ extern __inline float +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __rint64zf (float __a) { return __builtin_aarch64_frint64zsf (__a); } -__funline double +__extension__ extern __inline double +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __rint64z (double __a) { return __builtin_aarch64_frint64zdf (__a); } -__funline float +__extension__ extern __inline float +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __rint32xf (float __a) { return __builtin_aarch64_frint32xsf (__a); } -__funline double +__extension__ extern __inline double +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __rint32x (double __a) { return __builtin_aarch64_frint32xdf (__a); } -__funline float +__extension__ extern __inline float +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __rint64xf (float __a) { return __builtin_aarch64_frint64xsf (__a); } -__funline double +__extension__ extern __inline double +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __rint64x (double __a) { return __builtin_aarch64_frint64xdf (__a); @@ -57,42 +98,50 @@ __rint64x (double __a) #pragma GCC pop_options #pragma GCC push_options #pragma GCC target ("+nothing+crc") -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __crc32b (uint32_t __a, uint8_t __b) { return __builtin_aarch64_crc32b (__a, __b); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __crc32cb (uint32_t __a, uint8_t __b) { return __builtin_aarch64_crc32cb (__a, __b); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __crc32ch (uint32_t __a, uint16_t __b) { return __builtin_aarch64_crc32ch (__a, __b); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __crc32cw (uint32_t __a, uint32_t __b) { return __builtin_aarch64_crc32cw (__a, __b); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __crc32cd (uint32_t __a, uint64_t __b) { return __builtin_aarch64_crc32cx (__a, __b); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __crc32h (uint32_t __a, uint16_t __b) { return __builtin_aarch64_crc32h (__a, __b); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __crc32w (uint32_t __a, uint32_t __b) { return __builtin_aarch64_crc32w (__a, __b); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __crc32d (uint32_t __a, uint64_t __b) { return __builtin_aarch64_crc32x (__a, __b); @@ -112,36 +161,72 @@ __crc32d (uint32_t __a, uint64_t __b) #define _TMFAILURE_DBG 0x00400000u #define _TMFAILURE_INT 0x00800000u #define _TMFAILURE_TRIVIAL 0x01000000u -__funline uint64_t +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __tstart (void) { return __builtin_aarch64_tstart (); } -__funline void +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __tcommit (void) { __builtin_aarch64_tcommit (); } -__funline void +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __tcancel (const uint64_t __reason) { __builtin_aarch64_tcancel (__reason); } -__funline uint64_t +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __ttest (void) { return __builtin_aarch64_ttest (); } #pragma GCC pop_options #endif +#ifdef __ARM_FEATURE_LS64 +#pragma GCC push_options +#pragma GCC target ("+nothing+ls64") +typedef __arm_data512_t data512_t; +__extension__ extern __inline data512_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_ld64b (const void *__addr) +{ + return __builtin_aarch64_ld64b (__addr); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_st64b (void *__addr, data512_t __value) +{ + __builtin_aarch64_st64b (__addr, __value); +} +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_st64bv (void *__addr, data512_t __value) +{ + return __builtin_aarch64_st64bv (__addr, __value); +} +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_st64bv0 (void *__addr, data512_t __value) +{ + return __builtin_aarch64_st64bv0 (__addr, __value); +} +#pragma GCC pop_options +#endif #pragma GCC push_options #pragma GCC target ("+nothing+rng") -__funline int +__extension__ extern __inline int +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __rndr (uint64_t *__res) { return __builtin_aarch64_rndr (__res); } -__funline int +__extension__ extern __inline int +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __rndrrs (uint64_t *__res) { return __builtin_aarch64_rndrrs (__res); diff --git a/third_party/aarch64/arm_fp16.internal.h b/third_party/aarch64/arm_fp16.internal.h index ddc72f764..84185a620 100644 --- a/third_party/aarch64/arm_fp16.internal.h +++ b/third_party/aarch64/arm_fp16.internal.h @@ -4,447 +4,536 @@ #pragma GCC push_options #pragma GCC target ("arch=armv8.2-a+fp16") typedef __fp16 float16_t; -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabsh_f16 (float16_t __a) { return __builtin_aarch64_abshf (__a); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzh_f16 (float16_t __a) { return __builtin_aarch64_cmeqhf_uss (__a, 0.0f); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgezh_f16 (float16_t __a) { return __builtin_aarch64_cmgehf_uss (__a, 0.0f); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgtzh_f16 (float16_t __a) { return __builtin_aarch64_cmgthf_uss (__a, 0.0f); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vclezh_f16 (float16_t __a) { return __builtin_aarch64_cmlehf_uss (__a, 0.0f); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcltzh_f16 (float16_t __a) { return __builtin_aarch64_cmlthf_uss (__a, 0.0f); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_f16_s16 (int16_t __a) { return __builtin_aarch64_floathihf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_f16_s32 (int32_t __a) { return __builtin_aarch64_floatsihf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_f16_s64 (int64_t __a) { return __builtin_aarch64_floatdihf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_f16_u16 (uint16_t __a) { return __builtin_aarch64_floatunshihf_us (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_f16_u32 (uint32_t __a) { return __builtin_aarch64_floatunssihf_us (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_f16_u64 (uint64_t __a) { return __builtin_aarch64_floatunsdihf_us (__a); } -__funline int16_t +__extension__ extern __inline int16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_s16_f16 (float16_t __a) { return __builtin_aarch64_fix_trunchfhi (__a); } -__funline int32_t +__extension__ extern __inline int32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_s32_f16 (float16_t __a) { return __builtin_aarch64_fix_trunchfsi (__a); } -__funline int64_t +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_s64_f16 (float16_t __a) { return __builtin_aarch64_fix_trunchfdi (__a); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_u16_f16 (float16_t __a) { return __builtin_aarch64_fixuns_trunchfhi_us (__a); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_u32_f16 (float16_t __a) { return __builtin_aarch64_fixuns_trunchfsi_us (__a); } -__funline uint64_t +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_u64_f16 (float16_t __a) { return __builtin_aarch64_fixuns_trunchfdi_us (__a); } -__funline int16_t +__extension__ extern __inline int16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtah_s16_f16 (float16_t __a) { return __builtin_aarch64_lroundhfhi (__a); } -__funline int32_t +__extension__ extern __inline int32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtah_s32_f16 (float16_t __a) { return __builtin_aarch64_lroundhfsi (__a); } -__funline int64_t +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtah_s64_f16 (float16_t __a) { return __builtin_aarch64_lroundhfdi (__a); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtah_u16_f16 (float16_t __a) { return __builtin_aarch64_lrounduhfhi_us (__a); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtah_u32_f16 (float16_t __a) { return __builtin_aarch64_lrounduhfsi_us (__a); } -__funline uint64_t +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtah_u64_f16 (float16_t __a) { return __builtin_aarch64_lrounduhfdi_us (__a); } -__funline int16_t +__extension__ extern __inline int16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtmh_s16_f16 (float16_t __a) { return __builtin_aarch64_lfloorhfhi (__a); } -__funline int32_t +__extension__ extern __inline int32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtmh_s32_f16 (float16_t __a) { return __builtin_aarch64_lfloorhfsi (__a); } -__funline int64_t +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtmh_s64_f16 (float16_t __a) { return __builtin_aarch64_lfloorhfdi (__a); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtmh_u16_f16 (float16_t __a) { return __builtin_aarch64_lflooruhfhi_us (__a); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtmh_u32_f16 (float16_t __a) { return __builtin_aarch64_lflooruhfsi_us (__a); } -__funline uint64_t +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtmh_u64_f16 (float16_t __a) { return __builtin_aarch64_lflooruhfdi_us (__a); } -__funline int16_t +__extension__ extern __inline int16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtnh_s16_f16 (float16_t __a) { return __builtin_aarch64_lfrintnhfhi (__a); } -__funline int32_t +__extension__ extern __inline int32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtnh_s32_f16 (float16_t __a) { return __builtin_aarch64_lfrintnhfsi (__a); } -__funline int64_t +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtnh_s64_f16 (float16_t __a) { return __builtin_aarch64_lfrintnhfdi (__a); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtnh_u16_f16 (float16_t __a) { return __builtin_aarch64_lfrintnuhfhi_us (__a); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtnh_u32_f16 (float16_t __a) { return __builtin_aarch64_lfrintnuhfsi_us (__a); } -__funline uint64_t +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtnh_u64_f16 (float16_t __a) { return __builtin_aarch64_lfrintnuhfdi_us (__a); } -__funline int16_t +__extension__ extern __inline int16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtph_s16_f16 (float16_t __a) { return __builtin_aarch64_lceilhfhi (__a); } -__funline int32_t +__extension__ extern __inline int32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtph_s32_f16 (float16_t __a) { return __builtin_aarch64_lceilhfsi (__a); } -__funline int64_t +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtph_s64_f16 (float16_t __a) { return __builtin_aarch64_lceilhfdi (__a); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtph_u16_f16 (float16_t __a) { return __builtin_aarch64_lceiluhfhi_us (__a); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtph_u32_f16 (float16_t __a) { return __builtin_aarch64_lceiluhfsi_us (__a); } -__funline uint64_t +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtph_u64_f16 (float16_t __a) { return __builtin_aarch64_lceiluhfdi_us (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vnegh_f16 (float16_t __a) { return __builtin_aarch64_neghf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrecpeh_f16 (float16_t __a) { return __builtin_aarch64_frecpehf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrecpxh_f16 (float16_t __a) { return __builtin_aarch64_frecpxhf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndh_f16 (float16_t __a) { return __builtin_aarch64_btrunchf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndah_f16 (float16_t __a) { return __builtin_aarch64_roundhf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndih_f16 (float16_t __a) { return __builtin_aarch64_nearbyinthf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndmh_f16 (float16_t __a) { return __builtin_aarch64_floorhf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndnh_f16 (float16_t __a) { - return __builtin_aarch64_frintnhf (__a); + return __builtin_aarch64_roundevenhf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndph_f16 (float16_t __a) { return __builtin_aarch64_ceilhf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndxh_f16 (float16_t __a) { return __builtin_aarch64_rinthf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsqrteh_f16 (float16_t __a) { return __builtin_aarch64_rsqrtehf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsqrth_f16 (float16_t __a) { return __builtin_aarch64_sqrthf (__a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddh_f16 (float16_t __a, float16_t __b) { return __a + __b; } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vabdh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_fabdhf (__a, __b); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcageh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_facgehf_uss (__a, __b); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcagth_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_facgthf_uss (__a, __b); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcaleh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_faclehf_uss (__a, __b); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcalth_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_faclthf_uss (__a, __b); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_cmeqhf_uss (__a, __b); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgeh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_cmgehf_uss (__a, __b); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcgth_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_cmgthf_uss (__a, __b); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcleh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_cmlehf_uss (__a, __b); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vclth_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_cmlthf_uss (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_n_f16_s16 (int16_t __a, const int __b) { return __builtin_aarch64_scvtfhi (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_n_f16_s32 (int32_t __a, const int __b) { return __builtin_aarch64_scvtfsihf (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_n_f16_s64 (int64_t __a, const int __b) { return __builtin_aarch64_scvtfdihf (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_n_f16_u16 (uint16_t __a, const int __b) { return __builtin_aarch64_ucvtfhi_sus (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_n_f16_u32 (uint32_t __a, const int __b) { return __builtin_aarch64_ucvtfsihf_sus (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_n_f16_u64 (uint64_t __a, const int __b) { return __builtin_aarch64_ucvtfdihf_sus (__a, __b); } -__funline int16_t +__extension__ extern __inline int16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_n_s16_f16 (float16_t __a, const int __b) { return __builtin_aarch64_fcvtzshf (__a, __b); } -__funline int32_t +__extension__ extern __inline int32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_n_s32_f16 (float16_t __a, const int __b) { return __builtin_aarch64_fcvtzshfsi (__a, __b); } -__funline int64_t +__extension__ extern __inline int64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_n_s64_f16 (float16_t __a, const int __b) { return __builtin_aarch64_fcvtzshfdi (__a, __b); } -__funline uint16_t +__extension__ extern __inline uint16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_n_u16_f16 (float16_t __a, const int __b) { return __builtin_aarch64_fcvtzuhf_uss (__a, __b); } -__funline uint32_t +__extension__ extern __inline uint32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_n_u32_f16 (float16_t __a, const int __b) { return __builtin_aarch64_fcvtzuhfsi_uss (__a, __b); } -__funline uint64_t +__extension__ extern __inline uint64_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvth_n_u64_f16 (float16_t __a, const int __b) { return __builtin_aarch64_fcvtzuhfdi_uss (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdivh_f16 (float16_t __a, float16_t __b) { return __a / __b; } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmaxh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_fmaxhf (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmaxnmh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_fmaxhf (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vminh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_fminhf (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vminnmh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_fminhf (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulh_f16 (float16_t __a, float16_t __b) { return __a * __b; } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulxh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_fmulxhf (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrecpsh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_frecpshf (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsqrtsh_f16 (float16_t __a, float16_t __b) { return __builtin_aarch64_rsqrtshf (__a, __b); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubh_f16 (float16_t __a, float16_t __b) { return __a - __b; } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vfmah_f16 (float16_t __a, float16_t __b, float16_t __c) { return __builtin_aarch64_fmahf (__b, __c, __a); } -__funline float16_t +__extension__ extern __inline float16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vfmsh_f16 (float16_t __a, float16_t __b, float16_t __c) { return __builtin_aarch64_fnmahf (__b, __c, __a); diff --git a/third_party/aarch64/arm_neon.internal.h b/third_party/aarch64/arm_neon.internal.h index 78560ec85..1cbe2db72 100644 --- a/third_party/aarch64/arm_neon.internal.h +++ b/third_party/aarch64/arm_neon.internal.h @@ -3,6 +3,7 @@ #define _AARCH64_NEON_H_ #pragma GCC push_options #pragma GCC target ("+nothing+simd") +#pragma GCC aarch64 "arm_neon.h" #pragma GCC diagnostic ignored "-Wmissing-braces" #define __AARCH64_UINT64_C(__C) ((uint64_t) __C) #define __AARCH64_INT64_C(__C) ((int64_t) __C) @@ -43,366 +44,6 @@ typedef float float32_t; typedef double float64_t; typedef __Bfloat16x4_t bfloat16x4_t; typedef __Bfloat16x8_t bfloat16x8_t; -typedef struct bfloat16x4x2_t -{ - bfloat16x4_t val[2]; -} bfloat16x4x2_t; -typedef struct bfloat16x8x2_t -{ - bfloat16x8_t val[2]; -} bfloat16x8x2_t; -typedef struct bfloat16x4x3_t -{ - bfloat16x4_t val[3]; -} bfloat16x4x3_t; -typedef struct bfloat16x8x3_t -{ - bfloat16x8_t val[3]; -} bfloat16x8x3_t; -typedef struct bfloat16x4x4_t -{ - bfloat16x4_t val[4]; -} bfloat16x4x4_t; -typedef struct bfloat16x8x4_t -{ - bfloat16x8_t val[4]; -} bfloat16x8x4_t; -typedef struct int8x8x2_t -{ - int8x8_t val[2]; -} int8x8x2_t; -typedef struct int8x16x2_t -{ - int8x16_t val[2]; -} int8x16x2_t; -typedef struct int16x4x2_t -{ - int16x4_t val[2]; -} int16x4x2_t; -typedef struct int16x8x2_t -{ - int16x8_t val[2]; -} int16x8x2_t; -typedef struct int32x2x2_t -{ - int32x2_t val[2]; -} int32x2x2_t; -typedef struct int32x4x2_t -{ - int32x4_t val[2]; -} int32x4x2_t; -typedef struct int64x1x2_t -{ - int64x1_t val[2]; -} int64x1x2_t; -typedef struct int64x2x2_t -{ - int64x2_t val[2]; -} int64x2x2_t; -typedef struct uint8x8x2_t -{ - uint8x8_t val[2]; -} uint8x8x2_t; -typedef struct uint8x16x2_t -{ - uint8x16_t val[2]; -} uint8x16x2_t; -typedef struct uint16x4x2_t -{ - uint16x4_t val[2]; -} uint16x4x2_t; -typedef struct uint16x8x2_t -{ - uint16x8_t val[2]; -} uint16x8x2_t; -typedef struct uint32x2x2_t -{ - uint32x2_t val[2]; -} uint32x2x2_t; -typedef struct uint32x4x2_t -{ - uint32x4_t val[2]; -} uint32x4x2_t; -typedef struct uint64x1x2_t -{ - uint64x1_t val[2]; -} uint64x1x2_t; -typedef struct uint64x2x2_t -{ - uint64x2_t val[2]; -} uint64x2x2_t; -typedef struct float16x4x2_t -{ - float16x4_t val[2]; -} float16x4x2_t; -typedef struct float16x8x2_t -{ - float16x8_t val[2]; -} float16x8x2_t; -typedef struct float32x2x2_t -{ - float32x2_t val[2]; -} float32x2x2_t; -typedef struct float32x4x2_t -{ - float32x4_t val[2]; -} float32x4x2_t; -typedef struct float64x2x2_t -{ - float64x2_t val[2]; -} float64x2x2_t; -typedef struct float64x1x2_t -{ - float64x1_t val[2]; -} float64x1x2_t; -typedef struct poly8x8x2_t -{ - poly8x8_t val[2]; -} poly8x8x2_t; -typedef struct poly8x16x2_t -{ - poly8x16_t val[2]; -} poly8x16x2_t; -typedef struct poly16x4x2_t -{ - poly16x4_t val[2]; -} poly16x4x2_t; -typedef struct poly16x8x2_t -{ - poly16x8_t val[2]; -} poly16x8x2_t; -typedef struct poly64x1x2_t -{ - poly64x1_t val[2]; -} poly64x1x2_t; -typedef struct poly64x1x3_t -{ - poly64x1_t val[3]; -} poly64x1x3_t; -typedef struct poly64x1x4_t -{ - poly64x1_t val[4]; -} poly64x1x4_t; -typedef struct poly64x2x2_t -{ - poly64x2_t val[2]; -} poly64x2x2_t; -typedef struct poly64x2x3_t -{ - poly64x2_t val[3]; -} poly64x2x3_t; -typedef struct poly64x2x4_t -{ - poly64x2_t val[4]; -} poly64x2x4_t; -typedef struct int8x8x3_t -{ - int8x8_t val[3]; -} int8x8x3_t; -typedef struct int8x16x3_t -{ - int8x16_t val[3]; -} int8x16x3_t; -typedef struct int16x4x3_t -{ - int16x4_t val[3]; -} int16x4x3_t; -typedef struct int16x8x3_t -{ - int16x8_t val[3]; -} int16x8x3_t; -typedef struct int32x2x3_t -{ - int32x2_t val[3]; -} int32x2x3_t; -typedef struct int32x4x3_t -{ - int32x4_t val[3]; -} int32x4x3_t; -typedef struct int64x1x3_t -{ - int64x1_t val[3]; -} int64x1x3_t; -typedef struct int64x2x3_t -{ - int64x2_t val[3]; -} int64x2x3_t; -typedef struct uint8x8x3_t -{ - uint8x8_t val[3]; -} uint8x8x3_t; -typedef struct uint8x16x3_t -{ - uint8x16_t val[3]; -} uint8x16x3_t; -typedef struct uint16x4x3_t -{ - uint16x4_t val[3]; -} uint16x4x3_t; -typedef struct uint16x8x3_t -{ - uint16x8_t val[3]; -} uint16x8x3_t; -typedef struct uint32x2x3_t -{ - uint32x2_t val[3]; -} uint32x2x3_t; -typedef struct uint32x4x3_t -{ - uint32x4_t val[3]; -} uint32x4x3_t; -typedef struct uint64x1x3_t -{ - uint64x1_t val[3]; -} uint64x1x3_t; -typedef struct uint64x2x3_t -{ - uint64x2_t val[3]; -} uint64x2x3_t; -typedef struct float16x4x3_t -{ - float16x4_t val[3]; -} float16x4x3_t; -typedef struct float16x8x3_t -{ - float16x8_t val[3]; -} float16x8x3_t; -typedef struct float32x2x3_t -{ - float32x2_t val[3]; -} float32x2x3_t; -typedef struct float32x4x3_t -{ - float32x4_t val[3]; -} float32x4x3_t; -typedef struct float64x2x3_t -{ - float64x2_t val[3]; -} float64x2x3_t; -typedef struct float64x1x3_t -{ - float64x1_t val[3]; -} float64x1x3_t; -typedef struct poly8x8x3_t -{ - poly8x8_t val[3]; -} poly8x8x3_t; -typedef struct poly8x16x3_t -{ - poly8x16_t val[3]; -} poly8x16x3_t; -typedef struct poly16x4x3_t -{ - poly16x4_t val[3]; -} poly16x4x3_t; -typedef struct poly16x8x3_t -{ - poly16x8_t val[3]; -} poly16x8x3_t; -typedef struct int8x8x4_t -{ - int8x8_t val[4]; -} int8x8x4_t; -typedef struct int8x16x4_t -{ - int8x16_t val[4]; -} int8x16x4_t; -typedef struct int16x4x4_t -{ - int16x4_t val[4]; -} int16x4x4_t; -typedef struct int16x8x4_t -{ - int16x8_t val[4]; -} int16x8x4_t; -typedef struct int32x2x4_t -{ - int32x2_t val[4]; -} int32x2x4_t; -typedef struct int32x4x4_t -{ - int32x4_t val[4]; -} int32x4x4_t; -typedef struct int64x1x4_t -{ - int64x1_t val[4]; -} int64x1x4_t; -typedef struct int64x2x4_t -{ - int64x2_t val[4]; -} int64x2x4_t; -typedef struct uint8x8x4_t -{ - uint8x8_t val[4]; -} uint8x8x4_t; -typedef struct uint8x16x4_t -{ - uint8x16_t val[4]; -} uint8x16x4_t; -typedef struct uint16x4x4_t -{ - uint16x4_t val[4]; -} uint16x4x4_t; -typedef struct uint16x8x4_t -{ - uint16x8_t val[4]; -} uint16x8x4_t; -typedef struct uint32x2x4_t -{ - uint32x2_t val[4]; -} uint32x2x4_t; -typedef struct uint32x4x4_t -{ - uint32x4_t val[4]; -} uint32x4x4_t; -typedef struct uint64x1x4_t -{ - uint64x1_t val[4]; -} uint64x1x4_t; -typedef struct uint64x2x4_t -{ - uint64x2_t val[4]; -} uint64x2x4_t; -typedef struct float16x4x4_t -{ - float16x4_t val[4]; -} float16x4x4_t; -typedef struct float16x8x4_t -{ - float16x8_t val[4]; -} float16x8x4_t; -typedef struct float32x2x4_t -{ - float32x2_t val[4]; -} float32x2x4_t; -typedef struct float32x4x4_t -{ - float32x4_t val[4]; -} float32x4x4_t; -typedef struct float64x2x4_t -{ - float64x2_t val[4]; -} float64x2x4_t; -typedef struct float64x1x4_t -{ - float64x1_t val[4]; -} float64x1x4_t; -typedef struct poly8x8x4_t -{ - poly8x8_t val[4]; -} poly8x8x4_t; -typedef struct poly8x16x4_t -{ - poly8x16_t val[4]; -} poly8x16x4_t; -typedef struct poly16x4x4_t -{ - poly16x4_t val[4]; -} poly16x4x4_t; -typedef struct poly16x8x4_t -{ - poly16x8_t val[4]; -} poly16x8x4_t; #define __aarch64_vdup_lane_any(__size, __q, __a, __b) vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b)) #define __aarch64_vdup_lane_f16(__a, __b) __aarch64_vdup_lane_any (f16, , __a, __b) #define __aarch64_vdup_lane_f32(__a, __b) __aarch64_vdup_lane_any (f32, , __a, __b) @@ -593,475 +234,433 @@ __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_s8 (int8x8_t __a, int8x8_t __b) { - return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b); + return __builtin_aarch64_saddlv8qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_s16 (int16x4_t __a, int16x4_t __b) { - return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b); + return __builtin_aarch64_saddlv4hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_s32 (int32x2_t __a, int32x2_t __b) { - return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b); + return __builtin_aarch64_saddlv2si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_uaddlv8qi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_uaddlv4hi_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a, - (int32x2_t) __b); + return __builtin_aarch64_uaddlv2si_uuu (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_high_s8 (int8x16_t __a, int8x16_t __b) { - return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b); + return __builtin_aarch64_saddl2v16qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_high_s16 (int16x8_t __a, int16x8_t __b) { - return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b); + return __builtin_aarch64_saddl2v8hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_high_s32 (int32x4_t __a, int32x4_t __b) { - return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b); + return __builtin_aarch64_saddl2v4si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a, - (int8x16_t) __b); + return __builtin_aarch64_uaddl2v16qi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_uaddl2v8hi_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_uaddl2v4si_uuu (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_s8 (int16x8_t __a, int8x8_t __b) { - return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b); + return __builtin_aarch64_saddwv8qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_s16 (int32x4_t __a, int16x4_t __b) { - return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b); + return __builtin_aarch64_saddwv4hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_s32 (int64x2_t __a, int32x2_t __b) { - return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b); + return __builtin_aarch64_saddwv2si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_u8 (uint16x8_t __a, uint8x8_t __b) { - return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_uaddwv8qi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_u16 (uint32x4_t __a, uint16x4_t __b) { - return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_uaddwv4hi_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_u32 (uint64x2_t __a, uint32x2_t __b) { - return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a, - (int32x2_t) __b); + return __builtin_aarch64_uaddwv2si_uuu (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_high_s8 (int16x8_t __a, int8x16_t __b) { - return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b); + return __builtin_aarch64_saddw2v16qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_high_s16 (int32x4_t __a, int16x8_t __b) { - return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b); + return __builtin_aarch64_saddw2v8hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_high_s32 (int64x2_t __a, int32x4_t __b) { - return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b); + return __builtin_aarch64_saddw2v4si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b) { - return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a, - (int8x16_t) __b); + return __builtin_aarch64_uaddw2v16qi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b) { - return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_uaddw2v8hi_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b) { - return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_uaddw2v4si_uuu (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhadd_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b); + return __builtin_aarch64_shaddv8qi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhadd_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b); + return __builtin_aarch64_shaddv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhadd_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b); + return __builtin_aarch64_shaddv2si (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhadd_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_uhaddv8qi_uuu (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhadd_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_uhaddv4hi_uuu (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhadd_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a, - (int32x2_t) __b); + return __builtin_aarch64_uhaddv2si_uuu (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhaddq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b); + return __builtin_aarch64_shaddv16qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhaddq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b); + return __builtin_aarch64_shaddv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhaddq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b); + return __builtin_aarch64_shaddv4si (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhaddq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a, - (int8x16_t) __b); + return __builtin_aarch64_uhaddv16qi_uuu (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhaddq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_uhaddv8hi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhaddq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_uhaddv4si_uuu (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhadd_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b); + return __builtin_aarch64_srhaddv8qi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhadd_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b); + return __builtin_aarch64_srhaddv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhadd_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b); + return __builtin_aarch64_srhaddv2si (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhadd_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_urhaddv8qi_uuu (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhadd_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_urhaddv4hi_uuu (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhadd_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a, - (int32x2_t) __b); + return __builtin_aarch64_urhaddv2si_uuu (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhaddq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b); + return __builtin_aarch64_srhaddv16qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhaddq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b); + return __builtin_aarch64_srhaddv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhaddq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b); + return __builtin_aarch64_srhaddv4si (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a, - (int8x16_t) __b); + return __builtin_aarch64_urhaddv16qi_uuu (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_urhaddv8hi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_urhaddv4si_uuu (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_s16 (int16x8_t __a, int16x8_t __b) { - return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b); + return __builtin_aarch64_addhnv8hi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_s32 (int32x4_t __a, int32x4_t __b) { - return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b); + return __builtin_aarch64_addhnv4si (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_s64 (int64x2_t __a, int64x2_t __b) { - return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b); + return __builtin_aarch64_addhnv2di (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_addhnv8hi_uuu (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_addhnv4si_uuu (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a, - (int64x2_t) __b); + return __builtin_aarch64_addhnv2di_uuu (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_s16 (int16x8_t __a, int16x8_t __b) { - return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b); + return __builtin_aarch64_raddhnv8hi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_s32 (int32x4_t __a, int32x4_t __b) { - return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b); + return __builtin_aarch64_raddhnv4si (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_s64 (int64x2_t __a, int64x2_t __b) { - return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b); + return __builtin_aarch64_raddhnv2di (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_raddhnv8hi_uuu (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_raddhnv4si_uuu (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a, - (int64x2_t) __b); + return __builtin_aarch64_raddhnv2di_uuu (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) { - return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c); + return __builtin_aarch64_addhn2v8hi (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) { - return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c); + return __builtin_aarch64_addhn2v4si (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) { - return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c); + return __builtin_aarch64_addhn2v2di (__a, __b, __c); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { - return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a, - (int16x8_t) __b, - (int16x8_t) __c); + return __builtin_aarch64_addhn2v8hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { - return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a, - (int32x4_t) __b, - (int32x4_t) __c); + return __builtin_aarch64_addhn2v4si_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { - return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a, - (int64x2_t) __b, - (int64x2_t) __c); + return __builtin_aarch64_addhn2v2di_uuuu (__a, __b, __c); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) { - return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c); + return __builtin_aarch64_raddhn2v8hi (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) { - return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c); + return __builtin_aarch64_raddhn2v4si (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) { - return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c); + return __builtin_aarch64_raddhn2v2di (__a, __b, __c); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { - return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a, - (int16x8_t) __b, - (int16x8_t) __c); + return __builtin_aarch64_raddhn2v8hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { - return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a, - (int32x4_t) __b, - (int32x4_t) __c); + return __builtin_aarch64_raddhn2v4si_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { - return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a, - (int64x2_t) __b, - (int64x2_t) __c); + return __builtin_aarch64_raddhn2v2di_uuuu (__a, __b, __c); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -1139,8 +738,7 @@ __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_p8 (poly8x8_t __a, poly8x8_t __b) { - return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_pmulv8qi_ppp (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -1194,8 +792,7 @@ __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_p8 (poly8x16_t __a, poly8x16_t __b) { - return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a, - (int8x16_t) __b); + return __builtin_aarch64_pmulv16qi_ppp (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -1801,157 +1398,145 @@ __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_s8 (int8x8_t __a, int8x8_t __b) { - return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b); + return __builtin_aarch64_ssublv8qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_s16 (int16x4_t __a, int16x4_t __b) { - return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b); + return __builtin_aarch64_ssublv4hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_s32 (int32x2_t __a, int32x2_t __b) { - return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b); + return __builtin_aarch64_ssublv2si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_usublv8qi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_usublv4hi_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a, - (int32x2_t) __b); + return __builtin_aarch64_usublv2si_uuu (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_high_s8 (int8x16_t __a, int8x16_t __b) { - return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b); + return __builtin_aarch64_ssubl2v16qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_high_s16 (int16x8_t __a, int16x8_t __b) { - return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b); + return __builtin_aarch64_ssubl2v8hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_high_s32 (int32x4_t __a, int32x4_t __b) { - return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b); + return __builtin_aarch64_ssubl2v4si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a, - (int8x16_t) __b); + return __builtin_aarch64_usubl2v16qi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_usubl2v8hi_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_usubl2v4si_uuu (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_s8 (int16x8_t __a, int8x8_t __b) { - return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b); + return __builtin_aarch64_ssubwv8qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_s16 (int32x4_t __a, int16x4_t __b) { - return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b); + return __builtin_aarch64_ssubwv4hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_s32 (int64x2_t __a, int32x2_t __b) { - return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b); + return __builtin_aarch64_ssubwv2si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_u8 (uint16x8_t __a, uint8x8_t __b) { - return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_usubwv8qi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_u16 (uint32x4_t __a, uint16x4_t __b) { - return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_usubwv4hi_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_u32 (uint64x2_t __a, uint32x2_t __b) { - return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a, - (int32x2_t) __b); + return __builtin_aarch64_usubwv2si_uuu (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_high_s8 (int16x8_t __a, int8x16_t __b) { - return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b); + return __builtin_aarch64_ssubw2v16qi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_high_s16 (int32x4_t __a, int16x8_t __b) { - return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b); + return __builtin_aarch64_ssubw2v8hi (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_high_s32 (int64x2_t __a, int32x4_t __b) { - return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b); + return __builtin_aarch64_ssubw2v4si (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b) { - return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a, - (int8x16_t) __b); + return __builtin_aarch64_usubw2v16qi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b) { - return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_usubw2v8hi_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b) { - return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_usubw2v4si_uuu (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -1987,241 +1572,217 @@ __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsub_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t)__builtin_aarch64_shsubv8qi (__a, __b); + return __builtin_aarch64_shsubv8qi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsub_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t) __builtin_aarch64_shsubv4hi (__a, __b); + return __builtin_aarch64_shsubv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsub_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t) __builtin_aarch64_shsubv2si (__a, __b); + return __builtin_aarch64_shsubv2si (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsub_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_uhsubv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_uhsubv8qi_uuu (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsub_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_uhsubv4hi ((int16x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_uhsubv4hi_uuu (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsub_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_uhsubv2si ((int32x2_t) __a, - (int32x2_t) __b); + return __builtin_aarch64_uhsubv2si_uuu (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsubq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t) __builtin_aarch64_shsubv16qi (__a, __b); + return __builtin_aarch64_shsubv16qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsubq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t) __builtin_aarch64_shsubv8hi (__a, __b); + return __builtin_aarch64_shsubv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsubq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t) __builtin_aarch64_shsubv4si (__a, __b); + return __builtin_aarch64_shsubv4si (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsubq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_uhsubv16qi ((int8x16_t) __a, - (int8x16_t) __b); + return __builtin_aarch64_uhsubv16qi_uuu (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsubq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_uhsubv8hi ((int16x8_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_uhsubv8hi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vhsubq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_uhsubv4si ((int32x4_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_uhsubv4si_uuu (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_s16 (int16x8_t __a, int16x8_t __b) { - return (int8x8_t) __builtin_aarch64_subhnv8hi (__a, __b); + return __builtin_aarch64_subhnv8hi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_s32 (int32x4_t __a, int32x4_t __b) { - return (int16x4_t) __builtin_aarch64_subhnv4si (__a, __b); + return __builtin_aarch64_subhnv4si (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_s64 (int64x2_t __a, int64x2_t __b) { - return (int32x2_t) __builtin_aarch64_subhnv2di (__a, __b); + return __builtin_aarch64_subhnv2di (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint8x8_t) __builtin_aarch64_subhnv8hi ((int16x8_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_subhnv8hi_uuu (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint16x4_t) __builtin_aarch64_subhnv4si ((int32x4_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_subhnv4si_uuu (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint32x2_t) __builtin_aarch64_subhnv2di ((int64x2_t) __a, - (int64x2_t) __b); + return __builtin_aarch64_subhnv2di_uuu (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_s16 (int16x8_t __a, int16x8_t __b) { - return (int8x8_t) __builtin_aarch64_rsubhnv8hi (__a, __b); + return __builtin_aarch64_rsubhnv8hi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_s32 (int32x4_t __a, int32x4_t __b) { - return (int16x4_t) __builtin_aarch64_rsubhnv4si (__a, __b); + return __builtin_aarch64_rsubhnv4si (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_s64 (int64x2_t __a, int64x2_t __b) { - return (int32x2_t) __builtin_aarch64_rsubhnv2di (__a, __b); + return __builtin_aarch64_rsubhnv2di (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint8x8_t) __builtin_aarch64_rsubhnv8hi ((int16x8_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_rsubhnv8hi_uuu (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint16x4_t) __builtin_aarch64_rsubhnv4si ((int32x4_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_rsubhnv4si_uuu (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint32x2_t) __builtin_aarch64_rsubhnv2di ((int64x2_t) __a, - (int64x2_t) __b); + return __builtin_aarch64_rsubhnv2di_uuu (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) { - return (int8x16_t) __builtin_aarch64_rsubhn2v8hi (__a, __b, __c); + return __builtin_aarch64_rsubhn2v8hi (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) { - return (int16x8_t) __builtin_aarch64_rsubhn2v4si (__a, __b, __c); + return __builtin_aarch64_rsubhn2v4si (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) { - return (int32x4_t) __builtin_aarch64_rsubhn2v2di (__a, __b, __c); + return __builtin_aarch64_rsubhn2v2di (__a, __b, __c); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { - return (uint8x16_t) __builtin_aarch64_rsubhn2v8hi ((int8x8_t) __a, - (int16x8_t) __b, - (int16x8_t) __c); + return __builtin_aarch64_rsubhn2v8hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { - return (uint16x8_t) __builtin_aarch64_rsubhn2v4si ((int16x4_t) __a, - (int32x4_t) __b, - (int32x4_t) __c); + return __builtin_aarch64_rsubhn2v4si_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { - return (uint32x4_t) __builtin_aarch64_rsubhn2v2di ((int32x2_t) __a, - (int64x2_t) __b, - (int64x2_t) __c); + return __builtin_aarch64_rsubhn2v2di_uuuu (__a, __b, __c); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) { - return (int8x16_t) __builtin_aarch64_subhn2v8hi (__a, __b, __c); + return __builtin_aarch64_subhn2v8hi (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) { - return (int16x8_t) __builtin_aarch64_subhn2v4si (__a, __b, __c);; + return __builtin_aarch64_subhn2v4si (__a, __b, __c);; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) { - return (int32x4_t) __builtin_aarch64_subhn2v2di (__a, __b, __c); + return __builtin_aarch64_subhn2v2di (__a, __b, __c); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { - return (uint8x16_t) __builtin_aarch64_subhn2v8hi ((int8x8_t) __a, - (int16x8_t) __b, - (int16x8_t) __c); + return __builtin_aarch64_subhn2v8hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { - return (uint16x8_t) __builtin_aarch64_subhn2v4si ((int16x4_t) __a, - (int32x4_t) __b, - (int32x4_t) __c); + return __builtin_aarch64_subhn2v4si_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { - return (uint32x4_t) __builtin_aarch64_subhn2v2di ((int32x2_t) __a, - (int64x2_t) __b, - (int64x2_t) __c); + return __builtin_aarch64_subhn2v2di_uuuu (__a, __b, __c); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -5461,19 +5022,19 @@ __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b); + return __builtin_aarch64_combinev8qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b); + return __builtin_aarch64_combinev4hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b); + return __builtin_aarch64_combinev2si (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -5491,34 +5052,31 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_f32 (float32x2_t __a, float32x2_t __b) { - return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b); + return __builtin_aarch64_combinev2sf (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_combinev8qi_uuu (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_combinev4hi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a, - (int32x2_t) __b); + return __builtin_aarch64_combinev2si_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]); + return __builtin_aarch64_combinedi_uuu (__a[0], __b[0]); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -5530,21 +5088,19 @@ __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_p8 (poly8x8_t __a, poly8x8_t __b) { - return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_combinev8qi_ppp (__a, __b); } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_p16 (poly16x4_t __a, poly16x4_t __b) { - return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_combinev4hi_ppp (__a, __b); } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_p64 (poly64x1_t __a, poly64x1_t __b) { - return (poly64x2_t) __builtin_aarch64_combinedi_ppp (__a[0], __b[0]); + return __builtin_aarch64_combinedi_ppp (__a[0], __b[0]); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -5898,46 +5454,25 @@ __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtx_f32_f64 (float64x2_t __a) { - float32x2_t __result; - __asm__ ("fcvtxn %0.2s,%1.2d" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return __builtin_aarch64_float_trunc_rodd_lo_v2sf (__a); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtx_high_f32_f64 (float32x2_t __a, float64x2_t __b) { - float32x4_t __result; - __asm__ ("fcvtxn2 %0.4s,%1.2d" - : "=w"(__result) - : "w" (__b), "0"(__a) - : ); - return __result; + return __builtin_aarch64_float_trunc_rodd_hi_v4sf (__a, __b); } __extension__ extern __inline float32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvtxd_f32_f64 (float64_t __a) { - float32_t __result; - __asm__ ("fcvtxn %s0,%d1" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return __builtin_aarch64_float_trunc_rodd_df (__a); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) { - float32x2_t __result; - float32x2_t __t1; - __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s" - : "=w"(__result), "=w"(__t1) - : "0"(__a), "w"(__b), "w"(__c) - : ); - return __result; + return __builtin_aarch64_float_mla_nv2sf (__a, __b, __c); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -5955,17 +5490,13 @@ __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c) { - return (uint16x4_t) __builtin_aarch64_mla_nv4hi ((int16x4_t) __a, - (int16x4_t) __b, - (int16_t) __c); + return __builtin_aarch64_mla_nv4hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c) { - return (uint32x2_t) __builtin_aarch64_mla_nv2si ((int32x2_t) __a, - (int32x2_t) __b, - (int32_t) __c); + return __builtin_aarch64_mla_nv2si_uuuu (__a, __b, __c); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -5989,25 +5520,19 @@ __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { - return (uint8x8_t) __builtin_aarch64_mlav8qi ((int8x8_t) __a, - (int8x8_t) __b, - (int8x8_t) __c); + return __builtin_aarch64_mlav8qi_uuuu (__a, __b, __c); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { - return (uint16x4_t) __builtin_aarch64_mlav4hi ((int16x4_t) __a, - (int16x4_t) __b, - (int16x4_t) __c); + return __builtin_aarch64_mlav4hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { - return (uint32x2_t) __builtin_aarch64_mlav2si ((int32x2_t) __a, - (int32x2_t) __b, - (int32x2_t) __c); + return __builtin_aarch64_mlav2si_uuuu (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -6237,13 +5762,7 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) { - float32x4_t __result; - float32x4_t __t1; - __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s" - : "=w"(__result), "=w"(__t1) - : "0"(__a), "w"(__b), "w"(__c) - : ); - return __result; + return __builtin_aarch64_float_mla_nv4sf (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -6261,17 +5780,13 @@ __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c) { - return (uint16x8_t) __builtin_aarch64_mla_nv8hi ((int16x8_t) __a, - (int16x8_t) __b, - (int16_t) __c); + return __builtin_aarch64_mla_nv8hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c) { - return (uint32x4_t) __builtin_aarch64_mla_nv4si ((int32x4_t) __a, - (int32x4_t) __b, - (int32_t) __c); + return __builtin_aarch64_mla_nv4si_uuuu (__a, __b, __c); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -6295,37 +5810,25 @@ __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { - return (uint8x16_t) __builtin_aarch64_mlav16qi ((int8x16_t) __a, - (int8x16_t) __b, - (int8x16_t) __c); + return __builtin_aarch64_mlav16qi_uuuu (__a, __b, __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { - return (uint16x8_t) __builtin_aarch64_mlav8hi ((int16x8_t) __a, - (int16x8_t) __b, - (int16x8_t) __c); + return __builtin_aarch64_mlav8hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { - return (uint32x4_t) __builtin_aarch64_mlav4si ((int32x4_t) __a, - (int32x4_t) __b, - (int32x4_t) __c); + return __builtin_aarch64_mlav4si_uuuu (__a, __b, __c); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) { - float32x2_t __result; - float32x2_t __t1; - __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s" - : "=w"(__result), "=w"(__t1) - : "0"(__a), "w"(__b), "w"(__c) - : ); - return __result; + return __builtin_aarch64_float_mls_nv2sf (__a, __b, __c); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -6343,17 +5846,13 @@ __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c) { - return (uint16x4_t) __builtin_aarch64_mls_nv4hi ((int16x4_t) __a, - (int16x4_t) __b, - (int16_t) __c); + return __builtin_aarch64_mls_nv4hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c) { - return (uint32x2_t) __builtin_aarch64_mls_nv2si ((int32x2_t) __a, - (int32x2_t) __b, - (int32_t) __c); + return __builtin_aarch64_mls_nv2si_uuuu (__a, __b, __c); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -6377,25 +5876,19 @@ __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { - return (uint8x8_t) __builtin_aarch64_mlsv8qi ((int8x8_t) __a, - (int8x8_t) __b, - (int8x8_t) __c); + return __builtin_aarch64_mlsv8qi_uuuu (__a, __b, __c); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { - return (uint16x4_t) __builtin_aarch64_mlsv4hi ((int16x4_t) __a, - (int16x4_t) __b, - (int16x4_t) __c); + return __builtin_aarch64_mlsv4hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { - return (uint32x2_t) __builtin_aarch64_mlsv2si ((int32x2_t) __a, - (int32x2_t) __b, - (int32x2_t) __c); + return __builtin_aarch64_mlsv2si_uuuu (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -6629,13 +6122,7 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) { - float32x4_t __result; - float32x4_t __t1; - __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s" - : "=w"(__result), "=w"(__t1) - : "0"(__a), "w"(__b), "w"(__c) - : ); - return __result; + return __builtin_aarch64_float_mls_nv4sf (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -6653,17 +6140,13 @@ __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c) { - return (uint16x8_t) __builtin_aarch64_mls_nv8hi ((int16x8_t) __a, - (int16x8_t) __b, - (int16_t) __c); + return __builtin_aarch64_mls_nv8hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c) { - return (uint32x4_t) __builtin_aarch64_mls_nv4si ((int32x4_t) __a, - (int32x4_t) __b, - (int32_t) __c); + return __builtin_aarch64_mls_nv4si_uuuu (__a, __b, __c); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -6687,25 +6170,19 @@ __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { - return (uint8x16_t) __builtin_aarch64_mlsv16qi ((int8x16_t) __a, - (int8x16_t) __b, - (int8x16_t) __c); + return __builtin_aarch64_mlsv16qi_uuuu (__a, __b, __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { - return (uint16x8_t) __builtin_aarch64_mlsv8hi ((int16x8_t) __a, - (int16x8_t) __b, - (int16x8_t) __c); + return __builtin_aarch64_mlsv8hi_uuuu (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { - return (uint32x4_t) __builtin_aarch64_mlsv4si ((int32x4_t) __a, - (int32x4_t) __b, - (int32x4_t) __c); + return __builtin_aarch64_mlsv4si_uuuu (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -6801,22 +6278,19 @@ __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_high_u16 (uint8x8_t __a, uint16x8_t __b) { - return (uint8x16_t) - __builtin_aarch64_xtn2v8hi ((int8x8_t) __a, (int16x8_t) __b); + return __builtin_aarch64_xtn2v8hi_uuu (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_high_u32 (uint16x4_t __a, uint32x4_t __b) { - return (uint16x8_t) - __builtin_aarch64_xtn2v4si ((int16x4_t) __a, (int32x4_t) __b); + return __builtin_aarch64_xtn2v4si_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_high_u64 (uint32x2_t __a, uint64x2_t __b) { - return (uint32x4_t) - __builtin_aarch64_xtn2v2di ((int32x2_t) __a, (int64x2_t) __b); + return __builtin_aarch64_xtn2v2di_uuu (__a, __b); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -6840,19 +6314,19 @@ __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_u16 (uint16x8_t __a) { - return (uint8x8_t)__builtin_aarch64_xtnv8hi ((int16x8_t) __a); + return __builtin_aarch64_xtnv8hi_uu (__a); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_u32 (uint32x4_t __a) { - return (uint16x4_t) __builtin_aarch64_xtnv4si ((int32x4_t )__a); + return __builtin_aarch64_xtnv4si_uu (__a); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovn_u64 (uint64x2_t __a) { - return (uint32x2_t) __builtin_aarch64_xtnv2di ((int64x2_t) __a); + return __builtin_aarch64_xtnv2di_uu (__a); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -6876,19 +6350,19 @@ __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_n_u16 (uint16x8_t __a, const int __b) { - return (uint8x8_t)__builtin_aarch64_shrnv8hi ((int16x8_t)__a, __b); + return __builtin_aarch64_shrnv8hi_uus (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_n_u32 (uint32x4_t __a, const int __b) { - return (uint16x4_t)__builtin_aarch64_shrnv4si ((int32x4_t)__a, __b); + return __builtin_aarch64_shrnv4si_uus (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_n_u64 (uint64x2_t __a, const int __b) { - return (uint32x2_t)__builtin_aarch64_shrnv2di ((int64x2_t)__a, __b); + return __builtin_aarch64_shrnv2di_uus (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -6966,12 +6440,7 @@ __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_high_p8 (poly8x16_t __a, poly8x16_t __b) { - poly16x8_t __result; - __asm__ ("pmull2 %0.8h,%1.16b,%2.16b" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_pmull_hiv16qi_ppp (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -7085,12 +6554,7 @@ __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmull_p8 (poly8x8_t __a, poly8x8_t __b) { - poly16x8_t __result; - __asm__ ("pmull %0.8h, %1.8b, %2.8b" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_pmullv8qi_ppp (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -7144,12 +6608,7 @@ __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadal_s32 (int64x1_t __a, int32x2_t __b) { - int64x1_t __result; - __asm__ ("sadalp %0.1d,%2.2s" - : "=w"(__result) - : "0"(__a), "w"(__b) - : ); - return __result; + return (int64x1_t) __builtin_aarch64_sadalpv2si (__a[0], __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -7167,12 +6626,7 @@ __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadal_u32 (uint64x1_t __a, uint32x2_t __b) { - uint64x1_t __result; - __asm__ ("uadalp %0.1d,%2.2s" - : "=w"(__result) - : "0"(__a), "w"(__b) - : ); - return __result; + return (uint64x1_t) __builtin_aarch64_uadalpv2si_uuu (__a[0], __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -7214,265 +6668,145 @@ __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddl_s8 (int8x8_t __a) { - int16x4_t __result; - __asm__ ("saddlp %0.4h,%1.8b" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return __builtin_aarch64_saddlpv8qi (__a); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddl_s16 (int16x4_t __a) { - int32x2_t __result; - __asm__ ("saddlp %0.2s,%1.4h" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return __builtin_aarch64_saddlpv4hi (__a); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddl_s32 (int32x2_t __a) { - int64x1_t __result; - __asm__ ("saddlp %0.1d,%1.2s" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return (int64x1_t) __builtin_aarch64_saddlpv2si (__a); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddl_u8 (uint8x8_t __a) { - uint16x4_t __result; - __asm__ ("uaddlp %0.4h,%1.8b" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return __builtin_aarch64_uaddlpv8qi_uu (__a); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddl_u16 (uint16x4_t __a) { - uint32x2_t __result; - __asm__ ("uaddlp %0.2s,%1.4h" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return __builtin_aarch64_uaddlpv4hi_uu (__a); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddl_u32 (uint32x2_t __a) { - uint64x1_t __result; - __asm__ ("uaddlp %0.1d,%1.2s" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return (uint64x1_t) __builtin_aarch64_uaddlpv2si_uu (__a); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddlq_s8 (int8x16_t __a) { - int16x8_t __result; - __asm__ ("saddlp %0.8h,%1.16b" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return __builtin_aarch64_saddlpv16qi (__a); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddlq_s16 (int16x8_t __a) { - int32x4_t __result; - __asm__ ("saddlp %0.4s,%1.8h" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return __builtin_aarch64_saddlpv8hi (__a); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddlq_s32 (int32x4_t __a) { - int64x2_t __result; - __asm__ ("saddlp %0.2d,%1.4s" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return __builtin_aarch64_saddlpv4si (__a); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddlq_u8 (uint8x16_t __a) { - uint16x8_t __result; - __asm__ ("uaddlp %0.8h,%1.16b" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return __builtin_aarch64_uaddlpv16qi_uu (__a); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddlq_u16 (uint16x8_t __a) { - uint32x4_t __result; - __asm__ ("uaddlp %0.4s,%1.8h" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return __builtin_aarch64_uaddlpv8hi_uu (__a); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddlq_u32 (uint32x4_t __a) { - uint64x2_t __result; - __asm__ ("uaddlp %0.2d,%1.4s" - : "=w"(__result) - : "w"(__a) - : ); - return __result; + return __builtin_aarch64_uaddlpv4si_uu (__a); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_s8 (int8x16_t __a, int8x16_t __b) { - int8x16_t __result; - __asm__ ("addp %0.16b,%1.16b,%2.16b" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_addpv16qi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_s16 (int16x8_t __a, int16x8_t __b) { - int16x8_t __result; - __asm__ ("addp %0.8h,%1.8h,%2.8h" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_addpv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_s32 (int32x4_t __a, int32x4_t __b) { - int32x4_t __result; - __asm__ ("addp %0.4s,%1.4s,%2.4s" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_addpv4si (__a, __b); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_s64 (int64x2_t __a, int64x2_t __b) { - int64x2_t __result; - __asm__ ("addp %0.2d,%1.2d,%2.2d" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_addpv2di (__a, __b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_u8 (uint8x16_t __a, uint8x16_t __b) { - uint8x16_t __result; - __asm__ ("addp %0.16b,%1.16b,%2.16b" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_addpv16qi_uuu (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_u16 (uint16x8_t __a, uint16x8_t __b) { - uint16x8_t __result; - __asm__ ("addp %0.8h,%1.8h,%2.8h" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_addpv8hi_uuu (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_u32 (uint32x4_t __a, uint32x4_t __b) { - uint32x4_t __result; - __asm__ ("addp %0.4s,%1.4s,%2.4s" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_addpv4si_uuu (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddq_u64 (uint64x2_t __a, uint64x2_t __b) { - uint64x2_t __result; - __asm__ ("addp %0.2d,%1.2d,%2.2d" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_addpv2di_uuu (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulh_n_s16 (int16x4_t __a, int16_t __b) { - int16x4_t __result; - __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]" - : "=w"(__result) - : "w"(__a), "x"(__b) - : ); - return __result; + return __builtin_aarch64_sqdmulh_nv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulh_n_s32 (int32x2_t __a, int32_t __b) { - int32x2_t __result; - __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_sqdmulh_nv2si (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulhq_n_s16 (int16x8_t __a, int16_t __b) { - int16x8_t __result; - __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]" - : "=w"(__result) - : "w"(__a), "x"(__b) - : ); - return __result; + return __builtin_aarch64_sqdmulh_nv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqdmulhq_n_s32 (int32x4_t __a, int32_t __b) { - int32x4_t __result; - __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_sqdmulh_nv4si (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -7532,45 +6866,25 @@ __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulh_n_s16 (int16x4_t __a, int16_t __b) { - int16x4_t __result; - __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]" - : "=w"(__result) - : "w"(__a), "x"(__b) - : ); - return __result; + return __builtin_aarch64_sqrdmulh_nv4hi (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulh_n_s32 (int32x2_t __a, int32_t __b) { - int32x2_t __result; - __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_sqrdmulh_nv2si (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b) { - int16x8_t __result; - __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]" - : "=w"(__result) - : "w"(__a), "x"(__b) - : ); - return __result; + return __builtin_aarch64_sqrdmulh_nv8hi (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b) { - int32x4_t __result; - __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_sqrdmulh_nv4si (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -7702,22 +7016,19 @@ __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c) { - return (uint8x16_t) __builtin_aarch64_rshrn2v8hi ((int8x8_t) __a, - (int16x8_t) __b, __c); + return __builtin_aarch64_rshrn2v8hi_uuus (__a, __b, __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c) { - return (uint16x8_t) __builtin_aarch64_rshrn2v4si ((int16x4_t) __a, - (int32x4_t) __b, __c); + return __builtin_aarch64_rshrn2v4si_uuus (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c) { - return (uint32x4_t) __builtin_aarch64_rshrn2v2di ((int32x2_t)__a, - (int64x2_t)__b, __c); + return __builtin_aarch64_rshrn2v2di_uuus (__a, __b, __c); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -7741,19 +7052,19 @@ __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_n_u16 (uint16x8_t __a, const int __b) { - return (uint8x8_t) __builtin_aarch64_rshrnv8hi ((int16x8_t) __a, __b); + return __builtin_aarch64_rshrnv8hi_uus (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_n_u32 (uint32x4_t __a, const int __b) { - return (uint16x4_t) __builtin_aarch64_rshrnv4si ((int32x4_t) __a, __b); + return __builtin_aarch64_rshrnv4si_uus (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_n_u64 (uint64x2_t __a, const int __b) { - return (uint32x2_t) __builtin_aarch64_rshrnv2di ((int64x2_t) __a, __b); + return __builtin_aarch64_rshrnv2di_uus (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -7789,33 +7100,80 @@ __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c) { - return (uint8x16_t) - __builtin_aarch64_shrn2v8hi ((int8x8_t) __a, (int16x8_t) __b, __c); + return __builtin_aarch64_shrn2v8hi_uuus (__a, __b, __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c) { - return (uint16x8_t) - __builtin_aarch64_shrn2v4si ((int16x4_t) __a, (int32x4_t) __b, __c); + return __builtin_aarch64_shrn2v4si_uuus (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c) { - return (uint32x4_t) - __builtin_aarch64_shrn2v2di ((int32x2_t) __a, (int64x2_t) __b, __c); + return __builtin_aarch64_shrn2v2di_uuus (__a, __b, __c); +} +__extension__ extern __inline poly8x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsli_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c) +{ + return __builtin_aarch64_ssli_nv8qi_ppps (__a, __b, __c); +} +__extension__ extern __inline poly16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) +{ + return __builtin_aarch64_ssli_nv4hi_ppps (__a, __b, __c); +} +__extension__ extern __inline poly8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsliq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c) +{ + return __builtin_aarch64_ssli_nv16qi_ppps (__a, __b, __c); +} +__extension__ extern __inline poly16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) +{ + return __builtin_aarch64_ssli_nv8hi_ppps (__a, __b, __c); +} +__extension__ extern __inline poly8x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c) +{ + return __builtin_aarch64_ssri_nv8qi_ppps (__a, __b, __c); +} +__extension__ extern __inline poly16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) +{ + return __builtin_aarch64_ssri_nv4hi_ppps (__a, __b, __c); +} +__extension__ extern __inline poly64x1_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsri_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) +{ + return (poly64x1_t) __builtin_aarch64_ssri_ndi_ppps (__a[0], __b[0], __c); +} +__extension__ extern __inline poly8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c) +{ + return __builtin_aarch64_ssri_nv16qi_ppps (__a, __b, __c); +} +__extension__ extern __inline poly16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) +{ + return __builtin_aarch64_ssri_nv8hi_ppps (__a, __b, __c); +} +__extension__ extern __inline poly64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsriq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) +{ + return __builtin_aarch64_ssri_nv2di_ppps (__a, __b, __c); } -#define vsli_n_p8(a, b, c) __extension__ ({ poly8x8_t b_ = (b); poly8x8_t a_ = (a); poly8x8_t result; __asm__ ("sli %0.8b,%2.8b,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; }) -#define vsli_n_p16(a, b, c) __extension__ ({ poly16x4_t b_ = (b); poly16x4_t a_ = (a); poly16x4_t result; __asm__ ("sli %0.4h,%2.4h,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; }) -#define vsliq_n_p8(a, b, c) __extension__ ({ poly8x16_t b_ = (b); poly8x16_t a_ = (a); poly8x16_t result; __asm__ ("sli %0.16b,%2.16b,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; }) -#define vsliq_n_p16(a, b, c) __extension__ ({ poly16x8_t b_ = (b); poly16x8_t a_ = (a); poly16x8_t result; __asm__ ("sli %0.8h,%2.8h,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; }) -#define vsri_n_p8(a, b, c) __extension__ ({ poly8x8_t b_ = (b); poly8x8_t a_ = (a); poly8x8_t result; __asm__ ("sri %0.8b,%2.8b,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; }) -#define vsri_n_p16(a, b, c) __extension__ ({ poly16x4_t b_ = (b); poly16x4_t a_ = (a); poly16x4_t result; __asm__ ("sri %0.4h,%2.4h,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; }) -#define vsri_n_p64(a, b, c) __extension__ ({ poly64x1_t b_ = (b); poly64x1_t a_ = (a); poly64x1_t result; __asm__ ("sri %d0,%d2,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; }) -#define vsriq_n_p8(a, b, c) __extension__ ({ poly8x16_t b_ = (b); poly8x16_t a_ = (a); poly8x16_t result; __asm__ ("sri %0.16b,%2.16b,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; }) -#define vsriq_n_p16(a, b, c) __extension__ ({ poly16x8_t b_ = (b); poly16x8_t a_ = (a); poly16x8_t result; __asm__ ("sri %0.8h,%2.8h,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; }) -#define vsriq_n_p64(a, b, c) __extension__ ({ poly64x2_t b_ = (b); poly64x2_t a_ = (a); poly64x2_t result; __asm__ ("sri %0.2d,%2.2d,%3" : "=w"(result) : "0"(a_), "w"(b_), "i"(c) : ); result; }) __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtst_p8 (poly8x8_t __a, poly8x8_t __b) @@ -7857,166 +7215,594 @@ vtstq_p64 (poly64x2_t __a, poly64x2_t __b) return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b)) != __AARCH64_INT64_C (0)); } -#define __STRUCTN(t, sz, nelem) typedef struct t ## sz ## x ## nelem ## _t { t ## sz ## _t val[nelem]; } t ## sz ## x ## nelem ## _t; -__STRUCTN (int, 8, 2) -__STRUCTN (int, 16, 2) -__STRUCTN (uint, 8, 2) -__STRUCTN (uint, 16, 2) -__STRUCTN (float, 16, 2) -__STRUCTN (poly, 8, 2) -__STRUCTN (poly, 16, 2) -__STRUCTN (int, 8, 3) -__STRUCTN (int, 16, 3) -__STRUCTN (int, 32, 3) -__STRUCTN (int, 64, 3) -__STRUCTN (uint, 8, 3) -__STRUCTN (uint, 16, 3) -__STRUCTN (uint, 32, 3) -__STRUCTN (uint, 64, 3) -__STRUCTN (float, 16, 3) -__STRUCTN (float, 32, 3) -__STRUCTN (float, 64, 3) -__STRUCTN (poly, 8, 3) -__STRUCTN (poly, 16, 3) -__STRUCTN (int, 8, 4) -__STRUCTN (int, 64, 4) -__STRUCTN (uint, 8, 4) -__STRUCTN (uint, 64, 4) -__STRUCTN (poly, 8, 4) -__STRUCTN (float, 64, 4) -#undef __STRUCTN -#define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode, qmode, ptr_mode, funcsuffix, signedtype) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_lane_ ## funcsuffix (ptrtype *__ptr, intype __b, const int __c) { __builtin_aarch64_simd_oi __o; largetype __temp; __temp.val[0] = vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoi##qmode (__o, (signedtype) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoi##qmode (__o, (signedtype) __temp.val[1], 1); __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) __ptr, __o, __c); } -__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16, - float16x8_t) -__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32, - float32x4_t) -__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64, - float64x2_t) -__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8, - int8x16_t) -__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16, - int16x8_t) -__ST2_LANE_FUNC (poly64x1x2_t, poly64x2x2_t, poly64_t, di, v2di_ssps, di, p64, - poly64x2_t) -__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8, - int8x16_t) -__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16, - int16x8_t) -__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32, - int32x4_t) -__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64, - int64x2_t) -__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8, - int8x16_t) -__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16, - int16x8_t) -__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32, - int32x4_t) -__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64, - int64x2_t) -#define __ST2Q_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_lane_ ## funcsuffix (ptrtype *__ptr, intype __b, const int __c) { union { intype __i; __builtin_aarch64_simd_oi __o; } __temp = { __b }; __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) __ptr, __temp.__o, __c); } -__ST2Q_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16) -__ST2Q_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32) -__ST2Q_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64) -__ST2Q_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8) -__ST2Q_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16) -__ST2Q_LANE_FUNC (poly64x2x2_t, poly64_t, v2di, di, p64) -__ST2Q_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8) -__ST2Q_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16) -__ST2Q_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32) -__ST2Q_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64) -__ST2Q_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8) -__ST2Q_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16) -__ST2Q_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32) -__ST2Q_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64) -#define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode, qmode, ptr_mode, funcsuffix, signedtype) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_lane_ ## funcsuffix (ptrtype *__ptr, intype __b, const int __c) { __builtin_aarch64_simd_ci __o; largetype __temp; __temp.val[0] = vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregci##qmode (__o, (signedtype) __temp.val[0], 0); __o = __builtin_aarch64_set_qregci##qmode (__o, (signedtype) __temp.val[1], 1); __o = __builtin_aarch64_set_qregci##qmode (__o, (signedtype) __temp.val[2], 2); __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) __ptr, __o, __c); } -__ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16, - float16x8_t) -__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32, - float32x4_t) -__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64, - float64x2_t) -__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8, - int8x16_t) -__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16, - int16x8_t) -__ST3_LANE_FUNC (poly64x1x3_t, poly64x2x3_t, poly64_t, di, v2di_ssps, di, p64, - poly64x2_t) -__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8, - int8x16_t) -__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16, - int16x8_t) -__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32, - int32x4_t) -__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64, - int64x2_t) -__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8, - int8x16_t) -__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16, - int16x8_t) -__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32, - int32x4_t) -__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64, - int64x2_t) -#define __ST3Q_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_lane_ ## funcsuffix (ptrtype *__ptr, intype __b, const int __c) { union { intype __i; __builtin_aarch64_simd_ci __o; } __temp = { __b }; __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) __ptr, __temp.__o, __c); } -__ST3Q_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16) -__ST3Q_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32) -__ST3Q_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64) -__ST3Q_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8) -__ST3Q_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16) -__ST3Q_LANE_FUNC (poly64x2x3_t, poly64_t, v2di, di, p64) -__ST3Q_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8) -__ST3Q_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16) -__ST3Q_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32) -__ST3Q_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64) -__ST3Q_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8) -__ST3Q_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16) -__ST3Q_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32) -__ST3Q_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64) -#define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode, qmode, ptr_mode, funcsuffix, signedtype) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_lane_ ## funcsuffix (ptrtype *__ptr, intype __b, const int __c) { __builtin_aarch64_simd_xi __o; largetype __temp; __temp.val[0] = vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[0], 0); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[1], 1); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[2], 2); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[3], 3); __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) __ptr, __o, __c); } -__ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16, - float16x8_t) -__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32, - float32x4_t) -__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64, - float64x2_t) -__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8, - int8x16_t) -__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16, - int16x8_t) -__ST4_LANE_FUNC (poly64x1x4_t, poly64x2x4_t, poly64_t, di, v2di_ssps, di, p64, - poly64x2_t) -__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8, - int8x16_t) -__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16, - int16x8_t) -__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32, - int32x4_t) -__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64, - int64x2_t) -__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8, - int8x16_t) -__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16, - int16x8_t) -__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32, - int32x4_t) -__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64, - int64x2_t) -#define __ST4Q_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_lane_ ## funcsuffix (ptrtype *__ptr, intype __b, const int __c) { union { intype __i; __builtin_aarch64_simd_xi __o; } __temp = { __b }; __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) __ptr, __temp.__o, __c); } -__ST4Q_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16) -__ST4Q_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32) -__ST4Q_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64) -__ST4Q_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8) -__ST4Q_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16) -__ST4Q_LANE_FUNC (poly64x2x4_t, poly64_t, v2di, di, p64) -__ST4Q_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8) -__ST4Q_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16) -__ST4Q_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32) -__ST4Q_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64) -__ST4Q_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8) -__ST4Q_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16) -__ST4Q_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32) -__ST4Q_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_f16 (float16_t *__ptr, float16x4x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev4hf ((__builtin_aarch64_simd_hf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_f32 (float32_t *__ptr, float32x2x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev2sf ((__builtin_aarch64_simd_sf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_f64 (float64_t *__ptr, float64x1x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanedf ((__builtin_aarch64_simd_df *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_p8 (poly8_t *__ptr, poly8x8x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev8qi_sps ((__builtin_aarch64_simd_qi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_p16 (poly16_t *__ptr, poly16x4x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev4hi_sps ((__builtin_aarch64_simd_hi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_p64 (poly64_t *__ptr, poly64x1x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanedi_sps ((__builtin_aarch64_simd_di *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_s8 (int8_t *__ptr, int8x8x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_s16 (int16_t *__ptr, int16x4x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_s32 (int32_t *__ptr, int32x2x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev2si ((__builtin_aarch64_simd_si *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_s64 (int64_t *__ptr, int64x1x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanedi ((__builtin_aarch64_simd_di *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_u8 (uint8_t *__ptr, uint8x8x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev8qi_sus ((__builtin_aarch64_simd_qi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_u16 (uint16_t *__ptr, uint16x4x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev4hi_sus ((__builtin_aarch64_simd_hi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_u32 (uint32_t *__ptr, uint32x2x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev2si_sus ((__builtin_aarch64_simd_si *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_u64 (uint64_t *__ptr, uint64x1x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanedi_sus ((__builtin_aarch64_simd_di *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_f16 (float16_t *__ptr, float16x8x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev8hf ((__builtin_aarch64_simd_hf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_f32 (float32_t *__ptr, float32x4x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev4sf ((__builtin_aarch64_simd_sf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_f64 (float64_t *__ptr, float64x2x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev2df ((__builtin_aarch64_simd_df *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_p8 (poly8_t *__ptr, poly8x16x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev16qi_sps ((__builtin_aarch64_simd_qi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_p16 (poly16_t *__ptr, poly16x8x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev8hi_sps ((__builtin_aarch64_simd_hi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_p64 (poly64_t *__ptr, poly64x2x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev2di_sps ((__builtin_aarch64_simd_di *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_s8 (int8_t *__ptr, int8x16x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_s16 (int16_t *__ptr, int16x8x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_s32 (int32_t *__ptr, int32x4x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev4si ((__builtin_aarch64_simd_si *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_s64 (int64_t *__ptr, int64x2x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_u8 (uint8_t *__ptr, uint8x16x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev16qi_sus ((__builtin_aarch64_simd_qi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_u16 (uint16_t *__ptr, uint16x8x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev8hi_sus ((__builtin_aarch64_simd_hi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_u32 (uint32_t *__ptr, uint32x4x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev4si_sus ((__builtin_aarch64_simd_si *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_u64 (uint64_t *__ptr, uint64x2x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev2di_sus ((__builtin_aarch64_simd_di *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_f16 (float16_t *__ptr, float16x4x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev4hf ((__builtin_aarch64_simd_hf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_f32 (float32_t *__ptr, float32x2x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev2sf ((__builtin_aarch64_simd_sf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_f64 (float64_t *__ptr, float64x1x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanedf ((__builtin_aarch64_simd_df *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_p8 (poly8_t *__ptr, poly8x8x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev8qi_sps ((__builtin_aarch64_simd_qi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_p16 (poly16_t *__ptr, poly16x4x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev4hi_sps ((__builtin_aarch64_simd_hi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_p64 (poly64_t *__ptr, poly64x1x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanedi_sps ((__builtin_aarch64_simd_di *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_s8 (int8_t *__ptr, int8x8x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_s16 (int16_t *__ptr, int16x4x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_s32 (int32_t *__ptr, int32x2x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev2si ((__builtin_aarch64_simd_si *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_s64 (int64_t *__ptr, int64x1x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanedi ((__builtin_aarch64_simd_di *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_u8 (uint8_t *__ptr, uint8x8x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev8qi_sus ((__builtin_aarch64_simd_qi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_u16 (uint16_t *__ptr, uint16x4x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev4hi_sus ((__builtin_aarch64_simd_hi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_u32 (uint32_t *__ptr, uint32x2x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev2si_sus ((__builtin_aarch64_simd_si *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_u64 (uint64_t *__ptr, uint64x1x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanedi_sus ((__builtin_aarch64_simd_di *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_f16 (float16_t *__ptr, float16x8x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev8hf ((__builtin_aarch64_simd_hf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_f32 (float32_t *__ptr, float32x4x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev4sf ((__builtin_aarch64_simd_sf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_f64 (float64_t *__ptr, float64x2x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev2df ((__builtin_aarch64_simd_df *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_p8 (poly8_t *__ptr, poly8x16x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev16qi_sps ((__builtin_aarch64_simd_qi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_p16 (poly16_t *__ptr, poly16x8x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev8hi_sps ((__builtin_aarch64_simd_hi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_p64 (poly64_t *__ptr, poly64x2x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev2di_sps ((__builtin_aarch64_simd_di *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_s8 (int8_t *__ptr, int8x16x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_s16 (int16_t *__ptr, int16x8x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_s32 (int32_t *__ptr, int32x4x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev4si ((__builtin_aarch64_simd_si *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_s64 (int64_t *__ptr, int64x2x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_u8 (uint8_t *__ptr, uint8x16x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev16qi_sus ((__builtin_aarch64_simd_qi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_u16 (uint16_t *__ptr, uint16x8x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev8hi_sus ((__builtin_aarch64_simd_hi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_u32 (uint32_t *__ptr, uint32x4x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev4si_sus ((__builtin_aarch64_simd_si *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_u64 (uint64_t *__ptr, uint64x2x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev2di_sus ((__builtin_aarch64_simd_di *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_f16 (float16_t *__ptr, float16x4x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev4hf ((__builtin_aarch64_simd_hf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_f32 (float32_t *__ptr, float32x2x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev2sf ((__builtin_aarch64_simd_sf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_f64 (float64_t *__ptr, float64x1x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanedf ((__builtin_aarch64_simd_df *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_p8 (poly8_t *__ptr, poly8x8x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev8qi_sps ((__builtin_aarch64_simd_qi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_p16 (poly16_t *__ptr, poly16x4x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev4hi_sps ((__builtin_aarch64_simd_hi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_p64 (poly64_t *__ptr, poly64x1x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanedi_sps ((__builtin_aarch64_simd_di *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_s8 (int8_t *__ptr, int8x8x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_s16 (int16_t *__ptr, int16x4x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_s32 (int32_t *__ptr, int32x2x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev2si ((__builtin_aarch64_simd_si *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_s64 (int64_t *__ptr, int64x1x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanedi ((__builtin_aarch64_simd_di *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_u8 (uint8_t *__ptr, uint8x8x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev8qi_sus ((__builtin_aarch64_simd_qi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_u16 (uint16_t *__ptr, uint16x4x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev4hi_sus ((__builtin_aarch64_simd_hi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_u32 (uint32_t *__ptr, uint32x2x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev2si_sus ((__builtin_aarch64_simd_si *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_u64 (uint64_t *__ptr, uint64x1x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanedi_sus ((__builtin_aarch64_simd_di *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_f16 (float16_t *__ptr, float16x8x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev8hf ((__builtin_aarch64_simd_hf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_f32 (float32_t *__ptr, float32x4x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev4sf ((__builtin_aarch64_simd_sf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_f64 (float64_t *__ptr, float64x2x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev2df ((__builtin_aarch64_simd_df *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_p8 (poly8_t *__ptr, poly8x16x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev16qi_sps ((__builtin_aarch64_simd_qi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_p16 (poly16_t *__ptr, poly16x8x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev8hi_sps ((__builtin_aarch64_simd_hi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_p64 (poly64_t *__ptr, poly64x2x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev2di_sps ((__builtin_aarch64_simd_di *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_s8 (int8_t *__ptr, int8x16x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_s16 (int16_t *__ptr, int16x8x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_s32 (int32_t *__ptr, int32x4x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev4si ((__builtin_aarch64_simd_si *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_s64 (int64_t *__ptr, int64x2x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_u8 (uint8_t *__ptr, uint8x16x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev16qi_sus ((__builtin_aarch64_simd_qi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_u16 (uint16_t *__ptr, uint16x8x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev8hi_sus ((__builtin_aarch64_simd_hi *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_u32 (uint32_t *__ptr, uint32x4x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev4si_sus ((__builtin_aarch64_simd_si *) __ptr, + __val, __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_u64 (uint64_t *__ptr, uint64x2x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev2di_sus ((__builtin_aarch64_simd_di *) __ptr, + __val, __lane); +} __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_s32 (int32x2_t __a) @@ -8079,339 +7865,198 @@ vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqtbl1_p8 (poly8x16_t __a, uint8x8_t __b) +vqtbl1_p8 (poly8x16_t __tab, uint8x8_t __idx) { - poly8x8_t __result; - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_qtbl1v8qi_ppu (__tab, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqtbl1_s8 (int8x16_t __a, uint8x8_t __b) +vqtbl1_s8 (int8x16_t __tab, uint8x8_t __idx) { - int8x8_t __result; - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_qtbl1v8qi_ssu (__tab, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqtbl1_u8 (uint8x16_t __a, uint8x8_t __b) +vqtbl1_u8 (uint8x16_t __tab, uint8x8_t __idx) { - uint8x8_t __result; - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_qtbl1v8qi_uuu (__tab, __idx); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqtbl1q_p8 (poly8x16_t __a, uint8x16_t __b) +vqtbl1q_p8 (poly8x16_t __tab, uint8x16_t __idx) { - poly8x16_t __result; - __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_qtbl1v16qi_ppu (__tab, __idx); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqtbl1q_s8 (int8x16_t __a, uint8x16_t __b) +vqtbl1q_s8 (int8x16_t __tab, uint8x16_t __idx) { - int8x16_t __result; - __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_qtbl1v16qi_ssu (__tab, __idx); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqtbl1q_u8 (uint8x16_t __a, uint8x16_t __b) +vqtbl1q_u8 (uint8x16_t __tab, uint8x16_t __idx) { - uint8x16_t __result; - __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" - : "=w"(__result) - : "w"(__a), "w"(__b) - : ); - return __result; + return __builtin_aarch64_qtbl1v16qi_uuu (__tab, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1_s8 (int8x8_t __r, int8x16_t __tab, uint8x8_t __idx) { - int8x8_t __result = __r; - __asm__ ("tbx %0.8b,{%1.16b},%2.8b" - : "+w"(__result) - : "w"(__tab), "w"(__idx) - : ); - return __result; + return __builtin_aarch64_qtbx1v8qi_sssu (__r, __tab, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1_u8 (uint8x8_t __r, uint8x16_t __tab, uint8x8_t __idx) { - uint8x8_t __result = __r; - __asm__ ("tbx %0.8b,{%1.16b},%2.8b" - : "+w"(__result) - : "w"(__tab), "w"(__idx) - : ); - return __result; + return __builtin_aarch64_qtbx1v8qi_uuuu (__r, __tab, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1_p8 (poly8x8_t __r, poly8x16_t __tab, uint8x8_t __idx) { - poly8x8_t __result = __r; - __asm__ ("tbx %0.8b,{%1.16b},%2.8b" - : "+w"(__result) - : "w"(__tab), "w"(__idx) - : ); - return __result; + return __builtin_aarch64_qtbx1v8qi_pppu (__r, __tab, __idx); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1q_s8 (int8x16_t __r, int8x16_t __tab, uint8x16_t __idx) { - int8x16_t __result = __r; - __asm__ ("tbx %0.16b,{%1.16b},%2.16b" - : "+w"(__result) - : "w"(__tab), "w"(__idx) - : ); - return __result; + return __builtin_aarch64_qtbx1v16qi_sssu (__r, __tab, __idx); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1q_u8 (uint8x16_t __r, uint8x16_t __tab, uint8x16_t __idx) { - uint8x16_t __result = __r; - __asm__ ("tbx %0.16b,{%1.16b},%2.16b" - : "+w"(__result) - : "w"(__tab), "w"(__idx) - : ); - return __result; + return __builtin_aarch64_qtbx1v16qi_uuuu (__r, __tab, __idx); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1q_p8 (poly8x16_t __r, poly8x16_t __tab, uint8x16_t __idx) { - poly8x16_t __result = __r; - __asm__ ("tbx %0.16b,{%1.16b},%2.16b" - : "+w"(__result) - : "w"(__tab), "w"(__idx) - : ); - return __result; + return __builtin_aarch64_qtbx1v16qi_pppu (__r, __tab, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl1_s8 (int8x8_t __tab, int8x8_t __idx) { - int8x8_t __result; - int8x16_t __temp = vcombine_s8 (__tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(__result) - : "w"(__temp), "w"(__idx) - : ); - return __result; + int8x16_t __temp = vcombine_s8 (__tab, + vcreate_s8 (__AARCH64_UINT64_C (0x0))); + return __builtin_aarch64_qtbl1v8qi (__temp, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl1_u8 (uint8x8_t __tab, uint8x8_t __idx) { - uint8x8_t __result; - uint8x16_t __temp = vcombine_u8 (__tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(__result) - : "w"(__temp), "w"(__idx) - : ); - return __result; + uint8x16_t __temp = vcombine_u8 (__tab, + vcreate_u8 (__AARCH64_UINT64_C (0x0))); + return __builtin_aarch64_qtbl1v8qi_uuu (__temp, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl1_p8 (poly8x8_t __tab, uint8x8_t __idx) { - poly8x8_t __result; - poly8x16_t __temp = vcombine_p8 (__tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(__result) - : "w"(__temp), "w"(__idx) - : ); - return __result; + poly8x16_t __temp = vcombine_p8 (__tab, + vcreate_p8 (__AARCH64_UINT64_C (0x0))); + return __builtin_aarch64_qtbl1v8qi_ppu (__temp, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl2_s8 (int8x8x2_t __tab, int8x8_t __idx) { - int8x8_t __result; int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(__result) - : "w"(__temp), "w"(__idx) - : ); - return __result; + return __builtin_aarch64_qtbl1v8qi (__temp, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl2_u8 (uint8x8x2_t __tab, uint8x8_t __idx) { - uint8x8_t __result; uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(__result) - : "w"(__temp), "w"(__idx) - : ); - return __result; + return __builtin_aarch64_qtbl1v8qi_uuu (__temp, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl2_p8 (poly8x8x2_t __tab, uint8x8_t __idx) { - poly8x8_t __result; poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(__result) - : "w"(__temp), "w"(__idx) - : ); - return __result; + return __builtin_aarch64_qtbl1v8qi_ppu (__temp, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx) { - int8x8_t __result; int8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); - __temp.val[1] = vcombine_s8 (__tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); - __result = __builtin_aarch64_tbl3v8qi (__o, __idx); - return __result; + __temp.val[1] = vcombine_s8 (__tab.val[2], + vcreate_s8 (__AARCH64_UINT64_C (0x0))); + return __builtin_aarch64_qtbl2v8qi (__temp, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx) { - uint8x8_t __result; uint8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); - __temp.val[1] = vcombine_u8 (__tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); - __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); - return __result; + __temp.val[1] = vcombine_u8 (__tab.val[2], + vcreate_u8 (__AARCH64_UINT64_C (0x0))); + return __builtin_aarch64_qtbl2v8qi_uuu (__temp, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx) { - poly8x8_t __result; poly8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); - __temp.val[1] = vcombine_p8 (__tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); - __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); - return __result; + __temp.val[1] = vcombine_p8 (__tab.val[2], + vcreate_p8 (__AARCH64_UINT64_C (0x0))); + return __builtin_aarch64_qtbl2v8qi_ppu (__temp, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx) { - int8x8_t __result; int8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); __temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); - __result = __builtin_aarch64_tbl3v8qi (__o, __idx); - return __result; + return __builtin_aarch64_qtbl2v8qi (__temp, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx) { - uint8x8_t __result; uint8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); __temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); - __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); - return __result; + return __builtin_aarch64_qtbl2v8qi_uuu (__temp, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx) { - poly8x8_t __result; poly8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); __temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); - __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); - return __result; + return __builtin_aarch64_qtbl2v8qi_ppu (__temp, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx2_s8 (int8x8_t __r, int8x8x2_t __tab, int8x8_t __idx) { - int8x8_t __result = __r; int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]); - __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" - : "+w"(__result) - : "w"(__temp), "w"(__idx) - : ); - return __result; + return __builtin_aarch64_qtbx1v8qi (__r, __temp, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx2_u8 (uint8x8_t __r, uint8x8x2_t __tab, uint8x8_t __idx) { - uint8x8_t __result = __r; uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]); - __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" - : "+w"(__result) - : "w"(__temp), "w"(__idx) - : ); - return __result; + return __builtin_aarch64_qtbx1v8qi_uuuu (__r, __temp, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx2_p8 (poly8x8_t __r, poly8x8x2_t __tab, uint8x8_t __idx) { - poly8x8_t __result = __r; poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]); - __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" - : "+w"(__result) - : "w"(__temp), "w"(__idx) - : ); - return __result; + return __builtin_aarch64_qtbx1v8qi_pppu (__r, __temp, __idx); } __extension__ extern __inline float32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -8562,19 +8207,19 @@ __extension__ extern __inline uint8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddv_u8 (uint8x8_t __a) { - return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a); + return __builtin_aarch64_reduc_plus_scal_v8qi_uu (__a); } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddv_u16 (uint16x4_t __a) { - return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a); + return __builtin_aarch64_reduc_plus_scal_v4hi_uu (__a); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddv_u32 (uint32x2_t __a) { - return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a); + return __builtin_aarch64_reduc_plus_scal_v2si_uu (__a); } __extension__ extern __inline int8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -8604,25 +8249,25 @@ __extension__ extern __inline uint8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_u8 (uint8x16_t __a) { - return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a); + return __builtin_aarch64_reduc_plus_scal_v16qi_uu (__a); } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_u16 (uint16x8_t __a) { - return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a); + return __builtin_aarch64_reduc_plus_scal_v8hi_uu (__a); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_u32 (uint32x4_t __a) { - return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a); + return __builtin_aarch64_reduc_plus_scal_v4si_uu (__a); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddvq_u64 (uint64x2_t __a) { - return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a); + return __builtin_aarch64_reduc_plus_scal_v2di_uu (__a); } __extension__ extern __inline float32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -13003,15 +12648,15 @@ __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p8 (const poly8_t *__a) { - return (poly8x8_t) - __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a); + return __builtin_aarch64_ld1v8qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p16 (const poly16_t *__a) { - return (poly16x4_t) - __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a); + return __builtin_aarch64_ld1v4hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -13047,22 +12692,22 @@ __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u8 (const uint8_t *__a) { - return (uint8x8_t) - __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a); + return __builtin_aarch64_ld1v8qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u16 (const uint16_t *__a) { - return (uint16x4_t) - __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a); + return __builtin_aarch64_ld1v4hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u32 (const uint32_t *__a) { - return (uint32x2_t) - __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) __a); + return __builtin_aarch64_ld1v2si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -13074,337 +12719,184 @@ __extension__ extern __inline uint8x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u8_x3 (const uint8_t *__a) { - uint8x8x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = (__builtin_aarch64_simd_ci)__builtin_aarch64_ld1x3v8qi ((const __builtin_aarch64_simd_qi *) __a); - __i.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); - __i.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); - __i.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v8qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int8x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s8_x3 (const uint8_t *__a) +vld1_s8_x3 (const int8_t *__a) { - int8x8x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v8qi ((const __builtin_aarch64_simd_qi *) __a); - __i.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); - __i.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); - __i.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v8qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u16_x3 (const uint16_t *__a) { - uint16x4x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v4hi ((const __builtin_aarch64_simd_hi *) __a); - __i.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); - __i.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); - __i.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v4hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_s16_x3 (const int16_t *__a) { - int16x4x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v4hi ((const __builtin_aarch64_simd_hi *) __a); - __i.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); - __i.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); - __i.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v4hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u32_x3 (const uint32_t *__a) { - uint32x2x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v2si ((const __builtin_aarch64_simd_si *) __a); - __i.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); - __i.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); - __i.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v2si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline int32x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s32_x3 (const uint32_t *__a) +vld1_s32_x3 (const int32_t *__a) { - int32x2x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v2si ((const __builtin_aarch64_simd_si *) __a); - __i.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); - __i.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); - __i.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v2si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint64x1x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u64_x3 (const uint64_t *__a) { - uint64x1x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3di ((const __builtin_aarch64_simd_di *) __a); - __i.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); - __i.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); - __i.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int64x1x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_s64_x3 (const int64_t *__a) { - int64x1x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3di ((const __builtin_aarch64_simd_di *) __a); - __i.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); - __i.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); - __i.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_f16_x3 (const float16_t *__a) { - float16x4x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v4hf ((const __builtin_aarch64_simd_hf *) __a); - __i.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0); - __i.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1); - __i.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v4hf ((const __builtin_aarch64_simd_hf *) __a); } __extension__ extern __inline float32x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_f32_x3 (const float32_t *__a) { - float32x2x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v2sf ((const __builtin_aarch64_simd_sf *) __a); - __i.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0); - __i.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1); - __i.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v2sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline float64x1x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_f64_x3 (const float64_t *__a) { - float64x1x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3df ((const __builtin_aarch64_simd_df *) __a); - __i.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); - __i.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); - __i.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline poly8x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p8_x3 (const poly8_t *__a) { - poly8x8x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v8qi ((const __builtin_aarch64_simd_qi *) __a); - __i.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); - __i.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); - __i.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v8qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p16_x3 (const poly16_t *__a) { - poly16x4x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v4hi ((const __builtin_aarch64_simd_hi *) __a); - __i.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); - __i.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); - __i.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v4hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly64x1x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p64_x3 (const poly64_t *__a) { - poly64x1x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3di ((const __builtin_aarch64_simd_di *) __a); - __i.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); - __i.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); - __i.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); -return __i; + return __builtin_aarch64_ld1x3di_ps ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint8x16x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8_x3 (const uint8_t *__a) { - uint8x16x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v16qi ((const __builtin_aarch64_simd_qi *) __a); - __i.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); - __i.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); - __i.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v16qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int8x16x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s8_x3 (const int8_t *__a) { - int8x16x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v16qi ((const __builtin_aarch64_simd_qi *) __a); - __i.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); - __i.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); - __i.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v16qi ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u16_x3 (const uint16_t *__a) { - uint16x8x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v8hi ((const __builtin_aarch64_simd_hi *) __a); - __i.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); - __i.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); - __i.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v8hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s16_x3 (const int16_t *__a) { - int16x8x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v8hi ((const __builtin_aarch64_simd_hi *) __a); - __i.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); - __i.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); - __i.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v8hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u32_x3 (const uint32_t *__a) { - uint32x4x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v4si ((const __builtin_aarch64_simd_si *) __a); - __i.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); - __i.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); - __i.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v4si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline int32x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s32_x3 (const int32_t *__a) { - int32x4x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v4si ((const __builtin_aarch64_simd_si *) __a); - __i.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); - __i.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); - __i.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v4si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint64x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u64_x3 (const uint64_t *__a) { - uint64x2x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v2di ((const __builtin_aarch64_simd_di *) __a); - __i.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); - __i.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); - __i.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v2di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int64x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s64_x3 (const int64_t *__a) { - int64x2x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v2di ((const __builtin_aarch64_simd_di *) __a); - __i.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); - __i.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); - __i.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v2di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f16_x3 (const float16_t *__a) { - float16x8x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v8hf ((const __builtin_aarch64_simd_hf *) __a); - __i.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0); - __i.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1); - __i.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v8hf ((const __builtin_aarch64_simd_hf *) __a); } __extension__ extern __inline float32x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f32_x3 (const float32_t *__a) { - float32x4x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v4sf ((const __builtin_aarch64_simd_sf *) __a); - __i.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0); - __i.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1); - __i.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v4sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline float64x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f64_x3 (const float64_t *__a) { - float64x2x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v2df ((const __builtin_aarch64_simd_df *) __a); - __i.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0); - __i.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1); - __i.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v2df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline poly8x16x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8_x3 (const poly8_t *__a) { - poly8x16x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v16qi ((const __builtin_aarch64_simd_qi *) __a); - __i.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); - __i.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); - __i.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v16qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p16_x3 (const poly16_t *__a) { - poly16x8x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v8hi ((const __builtin_aarch64_simd_hi *) __a); - __i.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); - __i.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); - __i.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v8hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly64x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p64_x3 (const poly64_t *__a) { - poly64x2x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v2di ((const __builtin_aarch64_simd_di *) __a); - __i.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); - __i.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); - __i.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v2di_ps ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -13428,22 +12920,22 @@ __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8 (const poly8_t *__a) { - return (poly8x16_t) - __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __builtin_aarch64_ld1v16qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p16 (const poly16_t *__a) { - return (poly16x8_t) - __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __builtin_aarch64_ld1v8hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p64 (const poly64_t *__a) { - return (poly64x2_t) - __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a); + return __builtin_aarch64_ld1v2di_ps ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -13473,589 +12965,395 @@ __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8 (const uint8_t *__a) { - return (uint8x16_t) - __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __builtin_aarch64_ld1v16qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint8x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u8_x2 (const uint8_t *__a) { - uint8x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); - ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v8qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int8x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_s8_x2 (const int8_t *__a) { - int8x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); - ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u16_x2 (const uint16_t *__a) { - uint16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); - ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v4hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_s16_x2 (const int16_t *__a) { - int16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); - ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u32_x2 (const uint32_t *__a) { - uint32x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); - ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v2si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline int32x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_s32_x2 (const int32_t *__a) { - int32x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); - ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v2si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint64x1x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u64_x2 (const uint64_t *__a) { - uint64x1x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); - ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int64x1x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_s64_x2 (const int64_t *__a) { - int64x1x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); - ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_f16_x2 (const float16_t *__a) { - float16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v4hf ((const __builtin_aarch64_simd_hf *) __a); - ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 0); - ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v4hf ((const __builtin_aarch64_simd_hf *) __a); } __extension__ extern __inline float32x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_f32_x2 (const float32_t *__a) { - float32x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v2sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0); - ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v2sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline float64x1x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_f64_x2 (const float64_t *__a) { - float64x1x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)}; - ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)}; - return ret; + return __builtin_aarch64_ld1x2df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline poly8x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p8_x2 (const poly8_t *__a) { - poly8x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); - ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v8qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p16_x2 (const poly16_t *__a) { - poly16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); - ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v4hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly64x1x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p64_x2 (const poly64_t *__a) { - poly64x1x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); - ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2di_ps ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint8x16x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8_x2 (const uint8_t *__a) { - uint8x16x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); - ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v16qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int8x16x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s8_x2 (const int8_t *__a) { - int8x16x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); - ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v16qi ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u16_x2 (const uint16_t *__a) { - uint16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); - ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v8hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s16_x2 (const int16_t *__a) { - int16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); - ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u32_x2 (const uint32_t *__a) { - uint32x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); - ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v4si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline int32x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s32_x2 (const int32_t *__a) { - int32x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); - ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v4si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint64x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u64_x2 (const uint64_t *__a) { - uint64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); - ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v2di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int64x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s64_x2 (const int64_t *__a) { - int64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); - ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f16_x2 (const float16_t *__a) { - float16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v8hf ((const __builtin_aarch64_simd_hf *) __a); - ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0); - ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v8hf ((const __builtin_aarch64_simd_hf *) __a); } __extension__ extern __inline float32x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f32_x2 (const float32_t *__a) { - float32x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v4sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0); - ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v4sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline float64x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f64_x2 (const float64_t *__a) { - float64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v2df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0); - ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v2df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline poly8x16x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8_x2 (const poly8_t *__a) { - poly8x16x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); - ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v16qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p16_x2 (const poly16_t *__a) { - poly16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); - ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v8hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly64x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p64_x2 (const poly64_t *__a) { - poly64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); - ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v2di_ps ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u16 (const uint16_t *__a) { - return (uint16x8_t) - __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __builtin_aarch64_ld1v8hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u32 (const uint32_t *__a) { - return (uint32x4_t) - __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) __a); + return __builtin_aarch64_ld1v4si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u64 (const uint64_t *__a) { - return (uint64x2_t) - __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a); + return __builtin_aarch64_ld1v2di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int8x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_s8_x4 (const int8_t *__a) { - union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int8x16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s8_x4 (const int8_t *__a) { - union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v16qi ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_s16_x4 (const int16_t *__a) { - union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s16_x4 (const int16_t *__a) { - union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int32x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_s32_x4 (const int32_t *__a) { - union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline int32x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s32_x4 (const int32_t *__a) { - union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint8x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u8_x4 (const uint8_t *__a) { - union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v8qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint8x16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8_x4 (const uint8_t *__a) { - union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v16qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u16_x4 (const uint16_t *__a) { - union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v4hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u16_x4 (const uint16_t *__a) { - union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v8hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u32_x4 (const uint32_t *__a) { - union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v2si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint32x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u32_x4 (const uint32_t *__a) { - union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v4si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline float16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_f16_x4 (const float16_t *__a) { - union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v4hf ((const __builtin_aarch64_simd_hf *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v4hf ((const __builtin_aarch64_simd_hf *) __a); } __extension__ extern __inline float16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f16_x4 (const float16_t *__a) { - union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v8hf ((const __builtin_aarch64_simd_hf *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v8hf ((const __builtin_aarch64_simd_hf *) __a); } __extension__ extern __inline float32x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_f32_x4 (const float32_t *__a) { - union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v2sf ((const __builtin_aarch64_simd_sf *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v2sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline float32x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f32_x4 (const float32_t *__a) { - union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v4sf ((const __builtin_aarch64_simd_sf *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v4sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline poly8x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p8_x4 (const poly8_t *__a) { - union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v8qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly8x16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8_x4 (const poly8_t *__a) { - union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v16qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p16_x4 (const poly16_t *__a) { - union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v4hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p16_x4 (const poly16_t *__a) { - union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v8hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int64x1x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_s64_x4 (const int64_t *__a) { - union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint64x1x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u64_x4 (const uint64_t *__a) { - union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline poly64x1x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p64_x4 (const poly64_t *__a) { - union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4di_ps ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int64x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s64_x4 (const int64_t *__a) { - union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint64x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u64_x4 (const uint64_t *__a) { - union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v2di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline poly64x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p64_x4 (const poly64_t *__a) { - union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v2di_ps ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float64x1x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_f64_x4 (const float64_t *__a) { - union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4df ((const __builtin_aarch64_simd_df *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline float64x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f64_x4 (const float64_t *__a) { - union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v2df ((const __builtin_aarch64_simd_df *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v2df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -14397,1009 +13695,539 @@ __extension__ extern __inline int64x1x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_s64 (const int64_t * __a) { - int64x1x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); - ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); - return ret; + return __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint64x1x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_u64 (const uint64_t * __a) { - uint64x1x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); - ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); - return ret; + return __builtin_aarch64_ld2di_us ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float64x1x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_f64 (const float64_t * __a) { - float64x1x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)}; - ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)}; - return ret; + return __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline int8x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_s8 (const int8_t * __a) { - int8x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); - ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); - return ret; + return __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly8x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_p8 (const poly8_t * __a) { - poly8x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); - ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); - return ret; + return __builtin_aarch64_ld2v8qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly64x1x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_p64 (const poly64_t * __a) { - poly64x1x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0); - ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1); - return ret; + return __builtin_aarch64_ld2di_ps ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_s16 (const int16_t * __a) { - int16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); - ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); - return ret; + return __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_p16 (const poly16_t * __a) { - poly16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); - ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); - return ret; + return __builtin_aarch64_ld2v4hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int32x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_s32 (const int32_t * __a) { - int32x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); - ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); - return ret; + return __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint8x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_u8 (const uint8_t * __a) { - uint8x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); - ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); - return ret; + return __builtin_aarch64_ld2v8qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_u16 (const uint16_t * __a) { - uint16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); - ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); - return ret; + return __builtin_aarch64_ld2v4hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_u32 (const uint32_t * __a) { - uint32x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); - ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); - return ret; + return __builtin_aarch64_ld2v2si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline float16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_f16 (const float16_t * __a) { - float16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v4hf (__a); - ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0); - ret.val[1] = __builtin_aarch64_get_dregoiv4hf (__o, 1); - return ret; + return __builtin_aarch64_ld2v4hf (__a); } __extension__ extern __inline float32x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_f32 (const float32_t * __a) { - float32x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0); - ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1); - return ret; + return __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline int8x16x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_s8 (const int8_t * __a) { - int8x16x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); - ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); - return ret; + return __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly8x16x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_p8 (const poly8_t * __a) { - poly8x16x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); - ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); - return ret; + return __builtin_aarch64_ld2v16qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_s16 (const int16_t * __a) { - int16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); - ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); - return ret; + return __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_p16 (const poly16_t * __a) { - poly16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); - ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); - return ret; + return __builtin_aarch64_ld2v8hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly64x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_p64 (const poly64_t * __a) { - poly64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0); - ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1); - return ret; + return __builtin_aarch64_ld2v2di_ps ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int32x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_s32 (const int32_t * __a) { - int32x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); - ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); - return ret; + return __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline int64x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_s64 (const int64_t * __a) { - int64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); - ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); - return ret; + return __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint8x16x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_u8 (const uint8_t * __a) { - uint8x16x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); - ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); - return ret; + return __builtin_aarch64_ld2v16qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_u16 (const uint16_t * __a) { - uint16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); - ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); - return ret; + return __builtin_aarch64_ld2v8hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_u32 (const uint32_t * __a) { - uint32x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); - ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); - return ret; + return __builtin_aarch64_ld2v4si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint64x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_u64 (const uint64_t * __a) { - uint64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); - ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); - return ret; + return __builtin_aarch64_ld2v2di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_f16 (const float16_t * __a) { - float16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v8hf (__a); - ret.val[0] = __builtin_aarch64_get_qregoiv8hf (__o, 0); - ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1); - return ret; + return __builtin_aarch64_ld2v8hf (__a); } __extension__ extern __inline float32x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_f32 (const float32_t * __a) { - float32x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0); - ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1); - return ret; + return __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline float64x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_f64 (const float64_t * __a) { - float64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0); - ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1); - return ret; + return __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline int64x1x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_s64 (const int64_t * __a) { - int64x1x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); - ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); - ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); - return ret; + return __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint64x1x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_u64 (const uint64_t * __a) { - uint64x1x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); - ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); - ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); - return ret; + return __builtin_aarch64_ld3di_us ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float64x1x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_f64 (const float64_t * __a) { - float64x1x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)}; - ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)}; - ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)}; - return ret; + return __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline int8x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_s8 (const int8_t * __a) { - int8x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); - ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); - ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); - return ret; + return __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly8x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_p8 (const poly8_t * __a) { - poly8x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); - ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); - ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); - return ret; + return __builtin_aarch64_ld3v8qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_s16 (const int16_t * __a) { - int16x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); - ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); - ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); - return ret; + return __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_p16 (const poly16_t * __a) { - poly16x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); - ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); - ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); - return ret; + return __builtin_aarch64_ld3v4hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int32x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_s32 (const int32_t * __a) { - int32x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); - ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); - ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); - return ret; + return __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint8x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_u8 (const uint8_t * __a) { - uint8x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); - ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); - ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); - return ret; + return __builtin_aarch64_ld3v8qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_u16 (const uint16_t * __a) { - uint16x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); - ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); - ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); - return ret; + return __builtin_aarch64_ld3v4hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_u32 (const uint32_t * __a) { - uint32x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); - ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); - ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); - return ret; + return __builtin_aarch64_ld3v2si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline float16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_f16 (const float16_t * __a) { - float16x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v4hf (__a); - ret.val[0] = __builtin_aarch64_get_dregciv4hf (__o, 0); - ret.val[1] = __builtin_aarch64_get_dregciv4hf (__o, 1); - ret.val[2] = __builtin_aarch64_get_dregciv4hf (__o, 2); - return ret; + return __builtin_aarch64_ld3v4hf (__a); } __extension__ extern __inline float32x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_f32 (const float32_t * __a) { - float32x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0); - ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1); - ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2); - return ret; + return __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline poly64x1x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_p64 (const poly64_t * __a) { - poly64x1x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0); - ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1); - ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2); - return ret; + return __builtin_aarch64_ld3di_ps ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int8x16x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_s8 (const int8_t * __a) { - int8x16x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); - ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); - ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); - return ret; + return __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly8x16x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_p8 (const poly8_t * __a) { - poly8x16x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); - ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); - ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); - return ret; + return __builtin_aarch64_ld3v16qi_ps ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_s16 (const int16_t * __a) { - int16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); - ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); - ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); - return ret; + return __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_p16 (const poly16_t * __a) { - poly16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); - ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); - ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); - return ret; + return __builtin_aarch64_ld3v8hi_ps ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int32x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_s32 (const int32_t * __a) { - int32x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); - ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); - ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); - return ret; + return __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline int64x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_s64 (const int64_t * __a) { - int64x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); - ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); - ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); - return ret; + return __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint8x16x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_u8 (const uint8_t * __a) { - uint8x16x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); - ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); - ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); - return ret; + return __builtin_aarch64_ld3v16qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_u16 (const uint16_t * __a) { - uint16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); - ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); - ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); - return ret; + return __builtin_aarch64_ld3v8hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_u32 (const uint32_t * __a) { - uint32x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); - ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); - ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); - return ret; + return __builtin_aarch64_ld3v4si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint64x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_u64 (const uint64_t * __a) { - uint64x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); - ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); - ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); - return ret; + return __builtin_aarch64_ld3v2di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_f16 (const float16_t * __a) { - float16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v8hf (__a); - ret.val[0] = __builtin_aarch64_get_qregciv8hf (__o, 0); - ret.val[1] = __builtin_aarch64_get_qregciv8hf (__o, 1); - ret.val[2] = __builtin_aarch64_get_qregciv8hf (__o, 2); - return ret; + return __builtin_aarch64_ld3v8hf (__a); } __extension__ extern __inline float32x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_f32 (const float32_t * __a) { - float32x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0); - ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1); - ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2); - return ret; + return __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline float64x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_f64 (const float64_t * __a) { - float64x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0); - ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1); - ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2); - return ret; + return __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline poly64x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_p64 (const poly64_t * __a) { - poly64x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0); - ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1); - ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2); - return ret; + return __builtin_aarch64_ld3v2di_ps ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int64x1x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_s64 (const int64_t * __a) { - int64x1x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); - ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); - ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); - ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); - return ret; + return __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint64x1x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_u64 (const uint64_t * __a) { - uint64x1x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); - ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); - ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); - ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); - return ret; + return __builtin_aarch64_ld4di_us ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float64x1x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_f64 (const float64_t * __a) { - float64x1x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)}; - ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)}; - ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)}; - ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)}; - return ret; + return __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline int8x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_s8 (const int8_t * __a) { - int8x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); - ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); - ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); - ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); - return ret; + return __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly8x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_p8 (const poly8_t * __a) { - poly8x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); - ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); - ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); - ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); - return ret; + return __builtin_aarch64_ld4v8qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_s16 (const int16_t * __a) { - int16x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); - ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); - ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); - ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); - return ret; + return __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_p16 (const poly16_t * __a) { - poly16x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); - ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); - ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); - ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); - return ret; + return __builtin_aarch64_ld4v4hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int32x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_s32 (const int32_t * __a) { - int32x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); - ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); - ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); - ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); - return ret; + return __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint8x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_u8 (const uint8_t * __a) { - uint8x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); - ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); - ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); - ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); - return ret; + return __builtin_aarch64_ld4v8qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_u16 (const uint16_t * __a) { - uint16x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); - ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); - ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); - ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); - return ret; + return __builtin_aarch64_ld4v4hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_u32 (const uint32_t * __a) { - uint32x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); - ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); - ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); - ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); - return ret; + return __builtin_aarch64_ld4v2si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline float16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_f16 (const float16_t * __a) { - float16x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v4hf (__a); - ret.val[0] = __builtin_aarch64_get_dregxiv4hf (__o, 0); - ret.val[1] = __builtin_aarch64_get_dregxiv4hf (__o, 1); - ret.val[2] = __builtin_aarch64_get_dregxiv4hf (__o, 2); - ret.val[3] = __builtin_aarch64_get_dregxiv4hf (__o, 3); - return ret; + return __builtin_aarch64_ld4v4hf (__a); } __extension__ extern __inline float32x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_f32 (const float32_t * __a) { - float32x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0); - ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1); - ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2); - ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3); - return ret; + return __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline poly64x1x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_p64 (const poly64_t * __a) { - poly64x1x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0); - ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1); - ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2); - ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3); - return ret; + return __builtin_aarch64_ld4di_ps ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int8x16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_s8 (const int8_t * __a) { - int8x16x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); - ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); - ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); - ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); - return ret; + return __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly8x16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_p8 (const poly8_t * __a) { - poly8x16x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); - ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); - ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); - ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); - return ret; + return __builtin_aarch64_ld4v16qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_s16 (const int16_t * __a) { - int16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); - ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); - ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); - ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); - return ret; + return __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_p16 (const poly16_t * __a) { - poly16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); - ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); - ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); - ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); - return ret; + return __builtin_aarch64_ld4v8hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int32x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_s32 (const int32_t * __a) { - int32x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); - ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); - ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); - ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); - return ret; + return __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline int64x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_s64 (const int64_t * __a) { - int64x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); - ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); - ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); - ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); - return ret; + return __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint8x16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_u8 (const uint8_t * __a) { - uint8x16x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); - ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); - ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); - ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); - return ret; + return __builtin_aarch64_ld4v16qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_u16 (const uint16_t * __a) { - uint16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); - ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); - ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); - ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); - return ret; + return __builtin_aarch64_ld4v8hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_u32 (const uint32_t * __a) { - uint32x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); - ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); - ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); - ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); - return ret; + return __builtin_aarch64_ld4v4si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint64x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_u64 (const uint64_t * __a) { - uint64x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); - ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); - ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); - ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); - return ret; + return __builtin_aarch64_ld4v2di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_f16 (const float16_t * __a) { - float16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v8hf (__a); - ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0); - ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1); - ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2); - ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3); - return ret; + return __builtin_aarch64_ld4v8hf (__a); } __extension__ extern __inline float32x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_f32 (const float32_t * __a) { - float32x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0); - ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1); - ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2); - ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3); - return ret; + return __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline float64x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_f64 (const float64_t * __a) { - float64x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0); - ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1); - ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2); - ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3); - return ret; + return __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline poly64x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_p64 (const poly64_t * __a) { - poly64x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0); - ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1); - ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2); - ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3); - return ret; + return __builtin_aarch64_ld4v2di_ps ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline poly128_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -15411,1154 +14239,1142 @@ __extension__ extern __inline int8x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_s8 (const int8_t * __a) { - int8x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); - ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); - return ret; + return __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_s16 (const int16_t * __a) { - int16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); - ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); - return ret; + return __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int32x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_s32 (const int32_t * __a) { - int32x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); - ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); - return ret; + return __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline float16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_f16 (const float16_t * __a) { - float16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a); - ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0); - ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1); - return ret; + return __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a); } __extension__ extern __inline float32x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_f32 (const float32_t * __a) { - float32x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0); - ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1); - return ret; + return __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline float64x1x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_f64 (const float64_t * __a) { - float64x1x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)}; - ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)}; - return ret; + return __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline uint8x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_u8 (const uint8_t * __a) { - uint8x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); - ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); - return ret; + return __builtin_aarch64_ld2rv8qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_u16 (const uint16_t * __a) { - uint16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); - ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); - return ret; + return __builtin_aarch64_ld2rv4hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_u32 (const uint32_t * __a) { - uint32x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); - ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); - return ret; + return __builtin_aarch64_ld2rv2si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline poly8x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_p8 (const poly8_t * __a) { - poly8x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); - ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); - return ret; + return __builtin_aarch64_ld2rv8qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_p16 (const poly16_t * __a) { - poly16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); - ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); - return ret; + return __builtin_aarch64_ld2rv4hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly64x1x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_p64 (const poly64_t * __a) { - poly64x1x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0); - ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1); - return ret; + return __builtin_aarch64_ld2rdi_ps ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int64x1x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_s64 (const int64_t * __a) { - int64x1x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); - ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); - return ret; + return __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint64x1x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_u64 (const uint64_t * __a) { - uint64x1x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); - ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); - return ret; + return __builtin_aarch64_ld2rdi_us ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int8x16x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_s8 (const int8_t * __a) { - int8x16x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); - ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); - return ret; + return __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly8x16x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_p8 (const poly8_t * __a) { - poly8x16x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); - ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); - return ret; + return __builtin_aarch64_ld2rv16qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_s16 (const int16_t * __a) { - int16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); - ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); - return ret; + return __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_p16 (const poly16_t * __a) { - poly16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); - ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); - return ret; + return __builtin_aarch64_ld2rv8hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int32x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_s32 (const int32_t * __a) { - int32x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); - ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); - return ret; + return __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline int64x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_s64 (const int64_t * __a) { - int64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); - ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); - return ret; + return __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint8x16x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_u8 (const uint8_t * __a) { - uint8x16x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); - ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); - return ret; + return __builtin_aarch64_ld2rv16qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_u16 (const uint16_t * __a) { - uint16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); - ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); - return ret; + return __builtin_aarch64_ld2rv8hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_u32 (const uint32_t * __a) { - uint32x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); - ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); - return ret; + return __builtin_aarch64_ld2rv4si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint64x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_u64 (const uint64_t * __a) { - uint64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); - ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); - return ret; + return __builtin_aarch64_ld2rv2di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_f16 (const float16_t * __a) { - float16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a); - ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0); - ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1); - return ret; + return __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a); } __extension__ extern __inline float32x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_f32 (const float32_t * __a) { - float32x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0); - ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1); - return ret; + return __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline float64x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_f64 (const float64_t * __a) { - float64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0); - ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1); - return ret; + return __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline poly64x2x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_p64 (const poly64_t * __a) { - poly64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0); - ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1); - return ret; + return __builtin_aarch64_ld2rv2di_ps ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int64x1x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_s64 (const int64_t * __a) { - int64x1x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); - ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); - ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); - return ret; + return __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint64x1x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_u64 (const uint64_t * __a) { - uint64x1x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); - ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); - ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); - return ret; + return __builtin_aarch64_ld3rdi_us ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float64x1x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_f64 (const float64_t * __a) { - float64x1x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)}; - ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)}; - ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)}; - return ret; + return __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline int8x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_s8 (const int8_t * __a) { - int8x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); - ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); - ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); - return ret; + return __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly8x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_p8 (const poly8_t * __a) { - poly8x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); - ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); - ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); - return ret; + return __builtin_aarch64_ld3rv8qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_s16 (const int16_t * __a) { - int16x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); - ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); - ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); - return ret; + return __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_p16 (const poly16_t * __a) { - poly16x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); - ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); - ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); - return ret; + return __builtin_aarch64_ld3rv4hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int32x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_s32 (const int32_t * __a) { - int32x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); - ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); - ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); - return ret; + return __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint8x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_u8 (const uint8_t * __a) { - uint8x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); - ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); - ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); - return ret; + return __builtin_aarch64_ld3rv8qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_u16 (const uint16_t * __a) { - uint16x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); - ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); - ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); - return ret; + return __builtin_aarch64_ld3rv4hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_u32 (const uint32_t * __a) { - uint32x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); - ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); - ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); - return ret; + return __builtin_aarch64_ld3rv2si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline float16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_f16 (const float16_t * __a) { - float16x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a); - ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0); - ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1); - ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2); - return ret; + return __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a); } __extension__ extern __inline float32x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_f32 (const float32_t * __a) { - float32x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0); - ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1); - ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2); - return ret; + return __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline poly64x1x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_p64 (const poly64_t * __a) { - poly64x1x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0); - ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1); - ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2); - return ret; + return __builtin_aarch64_ld3rdi_ps ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int8x16x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_s8 (const int8_t * __a) { - int8x16x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); - ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); - ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); - return ret; + return __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly8x16x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_p8 (const poly8_t * __a) { - poly8x16x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); - ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); - ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); - return ret; + return __builtin_aarch64_ld3rv16qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_s16 (const int16_t * __a) { - int16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); - ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); - ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); - return ret; + return __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_p16 (const poly16_t * __a) { - poly16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); - ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); - ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); - return ret; + return __builtin_aarch64_ld3rv8hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int32x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_s32 (const int32_t * __a) { - int32x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); - ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); - ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); - return ret; + return __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline int64x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_s64 (const int64_t * __a) { - int64x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); - ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); - ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); - return ret; + return __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint8x16x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_u8 (const uint8_t * __a) { - uint8x16x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); - ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); - ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); - return ret; + return __builtin_aarch64_ld3rv16qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_u16 (const uint16_t * __a) { - uint16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); - ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); - ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); - return ret; + return __builtin_aarch64_ld3rv8hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_u32 (const uint32_t * __a) { - uint32x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); - ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); - ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); - return ret; + return __builtin_aarch64_ld3rv4si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint64x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_u64 (const uint64_t * __a) { - uint64x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); - ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); - ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); - return ret; + return __builtin_aarch64_ld3rv2di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_f16 (const float16_t * __a) { - float16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a); - ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0); - ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1); - ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2); - return ret; + return __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a); } __extension__ extern __inline float32x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_f32 (const float32_t * __a) { - float32x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0); - ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1); - ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2); - return ret; + return __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline float64x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_f64 (const float64_t * __a) { - float64x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0); - ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1); - ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2); - return ret; + return __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline poly64x2x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_p64 (const poly64_t * __a) { - poly64x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0); - ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1); - ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2); - return ret; + return __builtin_aarch64_ld3rv2di_ps ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int64x1x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_s64 (const int64_t * __a) { - int64x1x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); - ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); - ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); - ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); - return ret; + return __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint64x1x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_u64 (const uint64_t * __a) { - uint64x1x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); - ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); - ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); - ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); - return ret; + return __builtin_aarch64_ld4rdi_us ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float64x1x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_f64 (const float64_t * __a) { - float64x1x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)}; - ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)}; - ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)}; - ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)}; - return ret; + return __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline int8x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_s8 (const int8_t * __a) { - int8x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); - ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); - ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); - ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); - return ret; + return __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly8x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_p8 (const poly8_t * __a) { - poly8x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); - ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); - ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); - ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); - return ret; + return __builtin_aarch64_ld4rv8qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_s16 (const int16_t * __a) { - int16x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); - ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); - ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); - ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); - return ret; + return __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_p16 (const poly16_t * __a) { - poly16x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); - ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); - ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); - ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); - return ret; + return __builtin_aarch64_ld4rv4hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int32x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_s32 (const int32_t * __a) { - int32x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); - ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); - ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); - ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); - return ret; + return __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint8x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_u8 (const uint8_t * __a) { - uint8x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); - ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); - ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); - ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); - return ret; + return __builtin_aarch64_ld4rv8qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_u16 (const uint16_t * __a) { - uint16x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); - ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); - ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); - ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); - return ret; + return __builtin_aarch64_ld4rv4hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_u32 (const uint32_t * __a) { - uint32x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); - ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); - ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); - ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); - return ret; + return __builtin_aarch64_ld4rv2si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline float16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_f16 (const float16_t * __a) { - float16x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a); - ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 0); - ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 1); - ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 2); - ret.val[3] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 3); - return ret; + return __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a); } __extension__ extern __inline float32x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_f32 (const float32_t * __a) { - float32x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0); - ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1); - ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2); - ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3); - return ret; + return __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline poly64x1x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_p64 (const poly64_t * __a) { - poly64x1x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0); - ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1); - ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2); - ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3); - return ret; + return __builtin_aarch64_ld4rdi_ps ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline int8x16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_s8 (const int8_t * __a) { - int8x16x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); - ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); - ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); - ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); - return ret; + return __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline poly8x16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_p8 (const poly8_t * __a) { - poly8x16x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); - ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); - ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); - ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); - return ret; + return __builtin_aarch64_ld4rv16qi_ps ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline int16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_s16 (const int16_t * __a) { - int16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); - ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); - ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); - ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); - return ret; + return __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline poly16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_p16 (const poly16_t * __a) { - poly16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); - ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); - ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); - ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); - return ret; + return __builtin_aarch64_ld4rv8hi_ps ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline int32x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_s32 (const int32_t * __a) { - int32x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); - ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); - ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); - ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); - return ret; + return __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline int64x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_s64 (const int64_t * __a) { - int64x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); - ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); - ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); - ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); - return ret; + return __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline uint8x16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_u8 (const uint8_t * __a) { - uint8x16x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); - ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); - ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); - ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); - return ret; + return __builtin_aarch64_ld4rv16qi_us ( + (const __builtin_aarch64_simd_qi *) __a); } __extension__ extern __inline uint16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_u16 (const uint16_t * __a) { - uint16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); - ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); - ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); - ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); - return ret; + return __builtin_aarch64_ld4rv8hi_us ( + (const __builtin_aarch64_simd_hi *) __a); } __extension__ extern __inline uint32x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_u32 (const uint32_t * __a) { - uint32x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); - ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); - ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); - ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); - return ret; + return __builtin_aarch64_ld4rv4si_us ( + (const __builtin_aarch64_simd_si *) __a); } __extension__ extern __inline uint64x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_u64 (const uint64_t * __a) { - uint64x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); - ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); - ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); - ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); - return ret; + return __builtin_aarch64_ld4rv2di_us ( + (const __builtin_aarch64_simd_di *) __a); } __extension__ extern __inline float16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_f16 (const float16_t * __a) { - float16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a); - ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 0); - ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 1); - ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 2); - ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 3); - return ret; + return __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a); } __extension__ extern __inline float32x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_f32 (const float32_t * __a) { - float32x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0); - ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1); - ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2); - ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3); - return ret; + return __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a); } __extension__ extern __inline float64x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_f64 (const float64_t * __a) { - float64x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0); - ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1); - ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2); - ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3); - return ret; + return __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a); } __extension__ extern __inline poly64x2x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_p64 (const poly64_t * __a) { - poly64x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0); - ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1); - ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2); - ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3); - return ret; + return __builtin_aarch64_ld4rv2di_ps ( + (const __builtin_aarch64_simd_di *) __a); +} +__extension__ extern __inline uint8x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_u8 (const uint8_t * __ptr, uint8x8x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev8qi_usus ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline uint16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_u16 (const uint16_t * __ptr, uint16x4x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev4hi_usus ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline uint32x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_u32 (const uint32_t * __ptr, uint32x2x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev2si_usus ( + (__builtin_aarch64_simd_si *) __ptr, __b, __c); +} +__extension__ extern __inline uint64x1x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_u64 (const uint64_t * __ptr, uint64x1x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanedi_usus ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline int8x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_s8 (const int8_t * __ptr, int8x8x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev8qi ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline int16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_s16 (const int16_t * __ptr, int16x4x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev4hi ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline int32x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_s32 (const int32_t * __ptr, int32x2x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev2si ( + (__builtin_aarch64_simd_si *) __ptr, __b, __c); +} +__extension__ extern __inline int64x1x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_s64 (const int64_t * __ptr, int64x1x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanedi ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline float16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_f16 (const float16_t * __ptr, float16x4x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev4hf ( + (__builtin_aarch64_simd_hf *) __ptr, __b, __c); +} +__extension__ extern __inline float32x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_f32 (const float32_t * __ptr, float32x2x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev2sf ( + (__builtin_aarch64_simd_sf *) __ptr, __b, __c); +} +__extension__ extern __inline float64x1x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_f64 (const float64_t * __ptr, float64x1x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanedf ( + (__builtin_aarch64_simd_df *) __ptr, __b, __c); +} +__extension__ extern __inline poly8x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_p8 (const poly8_t * __ptr, poly8x8x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev8qi_psps ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline poly16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_p16 (const poly16_t * __ptr, poly16x4x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev4hi_psps ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline poly64x1x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_p64 (const poly64_t * __ptr, poly64x1x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanedi_psps ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline uint8x16x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_u8 (const uint8_t * __ptr, uint8x16x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev16qi_usus ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline uint16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_u16 (const uint16_t * __ptr, uint16x8x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev8hi_usus ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline uint32x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_u32 (const uint32_t * __ptr, uint32x4x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev4si_usus ( + (__builtin_aarch64_simd_si *) __ptr, __b, __c); +} +__extension__ extern __inline uint64x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_u64 (const uint64_t * __ptr, uint64x2x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev2di_usus ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline int8x16x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_s8 (const int8_t * __ptr, int8x16x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev16qi ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline int16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_s16 (const int16_t * __ptr, int16x8x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev8hi ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline int32x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_s32 (const int32_t * __ptr, int32x4x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev4si ( + (__builtin_aarch64_simd_si *) __ptr, __b, __c); +} +__extension__ extern __inline int64x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_s64 (const int64_t * __ptr, int64x2x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev2di ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline float16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_f16 (const float16_t * __ptr, float16x8x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev8hf ( + (__builtin_aarch64_simd_hf *) __ptr, __b, __c); +} +__extension__ extern __inline float32x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_f32 (const float32_t * __ptr, float32x4x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev4sf ( + (__builtin_aarch64_simd_sf *) __ptr, __b, __c); +} +__extension__ extern __inline float64x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_f64 (const float64_t * __ptr, float64x2x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev2df ( + (__builtin_aarch64_simd_df *) __ptr, __b, __c); +} +__extension__ extern __inline poly8x16x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_p8 (const poly8_t * __ptr, poly8x16x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev16qi_psps ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline poly16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_p16 (const poly16_t * __ptr, poly16x8x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev8hi_psps ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline poly64x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_p64 (const poly64_t * __ptr, poly64x2x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev2di_psps ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline uint8x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_u8 (const uint8_t * __ptr, uint8x8x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev8qi_usus ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline uint16x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_u16 (const uint16_t * __ptr, uint16x4x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev4hi_usus ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline uint32x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_u32 (const uint32_t * __ptr, uint32x2x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev2si_usus ( + (__builtin_aarch64_simd_si *) __ptr, __b, __c); +} +__extension__ extern __inline uint64x1x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_u64 (const uint64_t * __ptr, uint64x1x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanedi_usus ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline int8x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_s8 (const int8_t * __ptr, int8x8x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev8qi ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline int16x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_s16 (const int16_t * __ptr, int16x4x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev4hi ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline int32x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_s32 (const int32_t * __ptr, int32x2x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev2si ( + (__builtin_aarch64_simd_si *) __ptr, __b, __c); +} +__extension__ extern __inline int64x1x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_s64 (const int64_t * __ptr, int64x1x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanedi ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline float16x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_f16 (const float16_t * __ptr, float16x4x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev4hf ( + (__builtin_aarch64_simd_hf *) __ptr, __b, __c); +} +__extension__ extern __inline float32x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_f32 (const float32_t * __ptr, float32x2x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev2sf ( + (__builtin_aarch64_simd_sf *) __ptr, __b, __c); +} +__extension__ extern __inline float64x1x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_f64 (const float64_t * __ptr, float64x1x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanedf ( + (__builtin_aarch64_simd_df *) __ptr, __b, __c); +} +__extension__ extern __inline poly8x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_p8 (const poly8_t * __ptr, poly8x8x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev8qi_psps ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline poly16x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_p16 (const poly16_t * __ptr, poly16x4x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev4hi_psps ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline poly64x1x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_p64 (const poly64_t * __ptr, poly64x1x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanedi_psps ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline uint8x16x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_u8 (const uint8_t * __ptr, uint8x16x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev16qi_usus ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline uint16x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_u16 (const uint16_t * __ptr, uint16x8x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev8hi_usus ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline uint32x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_u32 (const uint32_t * __ptr, uint32x4x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev4si_usus ( + (__builtin_aarch64_simd_si *) __ptr, __b, __c); +} +__extension__ extern __inline uint64x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_u64 (const uint64_t * __ptr, uint64x2x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev2di_usus ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline int8x16x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_s8 (const int8_t * __ptr, int8x16x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev16qi ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline int16x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_s16 (const int16_t * __ptr, int16x8x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev8hi ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline int32x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_s32 (const int32_t * __ptr, int32x4x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev4si ( + (__builtin_aarch64_simd_si *) __ptr, __b, __c); +} +__extension__ extern __inline int64x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_s64 (const int64_t * __ptr, int64x2x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev2di ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline float16x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_f16 (const float16_t * __ptr, float16x8x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev8hf ( + (__builtin_aarch64_simd_hf *) __ptr, __b, __c); +} +__extension__ extern __inline float32x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_f32 (const float32_t * __ptr, float32x4x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev4sf ( + (__builtin_aarch64_simd_sf *) __ptr, __b, __c); +} +__extension__ extern __inline float64x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_f64 (const float64_t * __ptr, float64x2x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev2df ( + (__builtin_aarch64_simd_df *) __ptr, __b, __c); +} +__extension__ extern __inline poly8x16x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_p8 (const poly8_t * __ptr, poly8x16x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev16qi_psps ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline poly16x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_p16 (const poly16_t * __ptr, poly16x8x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev8hi_psps ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline poly64x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_p64 (const poly64_t * __ptr, poly64x2x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev2di_psps ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline uint8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_u8 (const uint8_t * __ptr, uint8x8x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev8qi_usus ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline uint16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_u16 (const uint16_t * __ptr, uint16x4x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev4hi_usus ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline uint32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_u32 (const uint32_t * __ptr, uint32x2x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev2si_usus ( + (__builtin_aarch64_simd_si *) __ptr, __b, __c); +} +__extension__ extern __inline uint64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_u64 (const uint64_t * __ptr, uint64x1x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanedi_usus ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline int8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_s8 (const int8_t * __ptr, int8x8x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev8qi ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline int16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_s16 (const int16_t * __ptr, int16x4x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev4hi ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline int32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_s32 (const int32_t * __ptr, int32x2x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev2si ( + (__builtin_aarch64_simd_si *) __ptr, __b, __c); +} +__extension__ extern __inline int64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_s64 (const int64_t * __ptr, int64x1x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanedi ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline float16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_f16 (const float16_t * __ptr, float16x4x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev4hf ( + (__builtin_aarch64_simd_hf *) __ptr, __b, __c); +} +__extension__ extern __inline float32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_f32 (const float32_t * __ptr, float32x2x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev2sf ( + (__builtin_aarch64_simd_sf *) __ptr, __b, __c); +} +__extension__ extern __inline float64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_f64 (const float64_t * __ptr, float64x1x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanedf ( + (__builtin_aarch64_simd_df *) __ptr, __b, __c); +} +__extension__ extern __inline poly8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_p8 (const poly8_t * __ptr, poly8x8x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev8qi_psps ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline poly16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_p16 (const poly16_t * __ptr, poly16x4x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev4hi_psps ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline poly64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_p64 (const poly64_t * __ptr, poly64x1x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanedi_psps ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline uint8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_u8 (const uint8_t * __ptr, uint8x16x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev16qi_usus ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline uint16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_u16 (const uint16_t * __ptr, uint16x8x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev8hi_usus ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline uint32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_u32 (const uint32_t * __ptr, uint32x4x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev4si_usus ( + (__builtin_aarch64_simd_si *) __ptr, __b, __c); +} +__extension__ extern __inline uint64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_u64 (const uint64_t * __ptr, uint64x2x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev2di_usus ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline int8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_s8 (const int8_t * __ptr, int8x16x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev16qi ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline int16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_s16 (const int16_t * __ptr, int16x8x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev8hi ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline int32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_s32 (const int32_t * __ptr, int32x4x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev4si ( + (__builtin_aarch64_simd_si *) __ptr, __b, __c); +} +__extension__ extern __inline int64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_s64 (const int64_t * __ptr, int64x2x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev2di ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); +} +__extension__ extern __inline float16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_f16 (const float16_t * __ptr, float16x8x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev8hf ( + (__builtin_aarch64_simd_hf *) __ptr, __b, __c); +} +__extension__ extern __inline float32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_f32 (const float32_t * __ptr, float32x4x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev4sf ( + (__builtin_aarch64_simd_sf *) __ptr, __b, __c); +} +__extension__ extern __inline float64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_f64 (const float64_t * __ptr, float64x2x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev2df ( + (__builtin_aarch64_simd_df *) __ptr, __b, __c); +} +__extension__ extern __inline poly8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_p8 (const poly8_t * __ptr, poly8x16x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev16qi_psps ( + (__builtin_aarch64_simd_qi *) __ptr, __b, __c); +} +__extension__ extern __inline poly16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_p16 (const poly16_t * __ptr, poly16x8x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev8hi_psps ( + (__builtin_aarch64_simd_hi *) __ptr, __b, __c); +} +__extension__ extern __inline poly64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_p64 (const poly64_t * __ptr, poly64x2x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev2di_psps ( + (__builtin_aarch64_simd_di *) __ptr, __b, __c); } -#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, qmode, ptrmode, funcsuffix, signedtype) __extension__ extern __inline intype __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) { __builtin_aarch64_simd_oi __o; largetype __temp; __temp.val[0] = vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); __temp.val[1] = vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); __o = __builtin_aarch64_set_qregoi##qmode (__o, (signedtype) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoi##qmode (__o, (signedtype) __temp.val[1], 1); __o = __builtin_aarch64_ld2_lane##mode ( (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0); __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1); return __b; } -__LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v4hf, - v8hf, hf, f16, float16x8_t) -__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf, - sf, f32, float32x4_t) -__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df, - df, f64, float64x2_t) -__LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8, - int8x16_t) -__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, - p16, int16x8_t) -__LD2_LANE_FUNC (poly64x1x2_t, poly64x1_t, poly64x2x2_t, poly64_t, di, - v2di_ssps, di, p64, poly64x2_t) -__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8, - int8x16_t) -__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16, - int16x8_t) -__LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32, - int32x4_t) -__LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, di, v2di, di, s64, - int64x2_t) -__LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8, - int8x16_t) -__LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, - u16, int16x8_t) -__LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, - u32, int32x4_t) -__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di, - u64, int64x2_t) -#define __LD2Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) __extension__ extern __inline intype __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) { __builtin_aarch64_simd_oi __o; intype ret; __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); __o = __builtin_aarch64_ld2_lane##mode ( (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0); ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1); return ret; } -__LD2Q_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16) -__LD2Q_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32) -__LD2Q_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64) -__LD2Q_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8) -__LD2Q_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16) -__LD2Q_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64) -__LD2Q_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8) -__LD2Q_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16) -__LD2Q_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32) -__LD2Q_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64) -__LD2Q_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8) -__LD2Q_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16) -__LD2Q_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32) -__LD2Q_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64) -#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, qmode, ptrmode, funcsuffix, signedtype) __extension__ extern __inline intype __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) { __builtin_aarch64_simd_ci __o; largetype __temp; __temp.val[0] = vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); __temp.val[1] = vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); __temp.val[2] = vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); __o = __builtin_aarch64_set_qregci##qmode (__o, (signedtype) __temp.val[0], 0); __o = __builtin_aarch64_set_qregci##qmode (__o, (signedtype) __temp.val[1], 1); __o = __builtin_aarch64_set_qregci##qmode (__o, (signedtype) __temp.val[2], 2); __o = __builtin_aarch64_ld3_lane##mode ( (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0); __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1); __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2); return __b; } -__LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v4hf, - v8hf, hf, f16, float16x8_t) -__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf, - sf, f32, float32x4_t) -__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df, - df, f64, float64x2_t) -__LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8, - int8x16_t) -__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, - p16, int16x8_t) -__LD3_LANE_FUNC (poly64x1x3_t, poly64x1_t, poly64x2x3_t, poly64_t, di, - v2di_ssps, di, p64, poly64x2_t) -__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8, - int8x16_t) -__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16, - int16x8_t) -__LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v2si, v4si, si, s32, - int32x4_t) -__LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, di, v2di, di, s64, - int64x2_t) -__LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8, - int8x16_t) -__LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, - u16, int16x8_t) -__LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, si, - u32, int32x4_t) -__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di, - u64, int64x2_t) -#define __LD3Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) __extension__ extern __inline intype __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) { __builtin_aarch64_simd_ci __o; intype ret; __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); __o = __builtin_aarch64_ld3_lane##mode ( (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0); ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1); ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2); return ret; } -__LD3Q_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16) -__LD3Q_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32) -__LD3Q_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64) -__LD3Q_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8) -__LD3Q_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16) -__LD3Q_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64) -__LD3Q_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8) -__LD3Q_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16) -__LD3Q_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32) -__LD3Q_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64) -__LD3Q_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8) -__LD3Q_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16) -__LD3Q_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32) -__LD3Q_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64) -#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, qmode, ptrmode, funcsuffix, signedtype) __extension__ extern __inline intype __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) { __builtin_aarch64_simd_xi __o; largetype __temp; __temp.val[0] = vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); __temp.val[1] = vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); __temp.val[2] = vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); __temp.val[3] = vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0)); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[0], 0); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[1], 1); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[2], 2); __o = __builtin_aarch64_set_qregxi##qmode (__o, (signedtype) __temp.val[3], 3); __o = __builtin_aarch64_ld4_lane##mode ( (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0); __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1); __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2); __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3); return __b; } -__LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf, - v8hf, hf, f16, float16x8_t) -__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf, - sf, f32, float32x4_t) -__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df, - df, f64, float64x2_t) -__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8, - int8x16_t) -__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, - p16, int16x8_t) -__LD4_LANE_FUNC (poly64x1x4_t, poly64x1_t, poly64x2x4_t, poly64_t, di, - v2di_ssps, di, p64, poly64x2_t) -__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8, - int8x16_t) -__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16, - int16x8_t) -__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32, - int32x4_t) -__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64, - int64x2_t) -__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8, - int8x16_t) -__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, - u16, int16x8_t) -__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si, - u32, int32x4_t) -__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di, - u64, int64x2_t) -#define __LD4Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) __extension__ extern __inline intype __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) { __builtin_aarch64_simd_xi __o; intype ret; __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); __o = __builtin_aarch64_ld4_lane##mode ( (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0); ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1); ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2); ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3); return ret; } -__LD4Q_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16) -__LD4Q_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32) -__LD4Q_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64) -__LD4Q_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8) -__LD4Q_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16) -__LD4Q_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64) -__LD4Q_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8) -__LD4Q_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16) -__LD4Q_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32) -__LD4Q_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64) -__LD4Q_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8) -__LD4Q_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16) -__LD4Q_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32) -__LD4Q_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64) __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmax_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_smax_nanv2sf (__a, __b); + return __builtin_aarch64_fmax_nanv2sf (__a, __b); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmax_f64 (float64x1_t __a, float64x1_t __b) { return (float64x1_t) - { __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0), + { __builtin_aarch64_fmax_nandf (vget_lane_f64 (__a, 0), vget_lane_f64 (__b, 0)) }; } __extension__ extern __inline int8x8_t @@ -16604,13 +15420,13 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmaxq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_smax_nanv4sf (__a, __b); + return __builtin_aarch64_fmax_nanv4sf (__a, __b); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmaxq_f64 (float64x2_t __a, float64x2_t __b) { - return __builtin_aarch64_smax_nanv2df (__a, __b); + return __builtin_aarch64_fmax_nanv2df (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -17173,14 +15989,14 @@ __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmin_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_smin_nanv2sf (__a, __b); + return __builtin_aarch64_fmin_nanv2sf (__a, __b); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmin_f64 (float64x1_t __a, float64x1_t __b) { return (float64x1_t) - { __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0), + { __builtin_aarch64_fmin_nandf (vget_lane_f64 (__a, 0), vget_lane_f64 (__b, 0)) }; } __extension__ extern __inline int8x8_t @@ -17226,13 +16042,13 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vminq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_smin_nanv4sf (__a, __b); + return __builtin_aarch64_fmin_nanv4sf (__a, __b); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vminq_f64 (float64x2_t __a, float64x2_t __b) { - return __builtin_aarch64_smin_nanv2df (__a, __b); + return __builtin_aarch64_fmin_nanv2df (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -17411,32 +16227,32 @@ __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) { - return __a + __b * __c; + return __builtin_aarch64_float_mlav2sf (__a, __b, __c); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) { - return __a + __b * __c; + return (float64x1_t) {__builtin_aarch64_float_mladf (__a[0], __b[0], __c[0])}; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) { - return __a + __b * __c; + return __builtin_aarch64_float_mlav4sf (__a, __b, __c); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c) { - return __a + __b * __c; + return __builtin_aarch64_float_mlav2df (__a, __b, __c); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __lane) { - return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); + return __builtin_aarch64_float_mla_lanev2sf (__a, __b, __c, __lane); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -17471,7 +16287,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmla_laneq_f32 (float32x2_t __a, float32x2_t __b, float32x4_t __c, const int __lane) { - return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); + return __builtin_aarch64_float_mla_laneqv2sf (__a, __b, __c, __lane); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -17506,7 +16322,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __lane) { - return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); + return __builtin_aarch64_float_mla_lanev4sf (__a, __b, __c, __lane); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -17541,7 +16357,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, const int __lane) { - return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); + return __builtin_aarch64_float_mla_laneqv4sf (__a, __b, __c, __lane); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -17575,32 +16391,32 @@ __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) { - return __a - __b * __c; + return __builtin_aarch64_float_mlsv2sf (__a, __b, __c); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) { - return __a - __b * __c; + return (float64x1_t) {__builtin_aarch64_float_mlsdf (__a[0], __b[0], __c[0])}; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) { - return __a - __b * __c; + return __builtin_aarch64_float_mlsv4sf (__a, __b, __c); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c) { - return __a - __b * __c; + return __builtin_aarch64_float_mlsv2df (__a, __b, __c); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __lane) { - return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); + return __builtin_aarch64_float_mls_lanev2sf (__a, __b, __c, __lane); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -17635,7 +16451,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmls_laneq_f32 (float32x2_t __a, float32x2_t __b, float32x4_t __c, const int __lane) { - return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); + return __builtin_aarch64_float_mls_laneqv2sf (__a, __b, __c, __lane); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -17670,7 +16486,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __lane) { - return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); + return __builtin_aarch64_float_mls_lanev4sf (__a, __b, __c, __lane); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -17705,7 +16521,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, const int __lane) { - return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); + return __builtin_aarch64_float_mls_laneqv4sf (__a, __b, __c, __lane); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -18346,22 +17162,19 @@ __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadd_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_addpv8qi_uuu (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadd_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_addpv4hi_uuu (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpadd_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a, - (int32x2_t) __b); + return __builtin_aarch64_addpv2si_uuu (__a, __b); } __extension__ extern __inline float32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -18385,7 +17198,7 @@ __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddd_u64 (uint64x2_t __a) { - return __builtin_aarch64_addpdi ((int64x2_t) __a); + return __builtin_aarch64_addpdi_uu (__a); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -19899,373 +18712,217 @@ __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl2_s8 (int8x16x2_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); - return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); + return __builtin_aarch64_qtbl2v8qi_ssu (__tab, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl2_u8 (uint8x16x2_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); + return __builtin_aarch64_qtbl2v8qi_uuu (__tab, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl2_p8 (poly8x16x2_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); + return __builtin_aarch64_qtbl2v8qi_ppu (__tab, __idx); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl2q_s8 (int8x16x2_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); + return __builtin_aarch64_qtbl2v16qi_ssu (__tab, __idx); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl2q_u8 (uint8x16x2_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); + return __builtin_aarch64_qtbl2v16qi_uuu (__tab, __idx); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl2q_p8 (poly8x16x2_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); + return __builtin_aarch64_qtbl2v16qi_ppu (__tab, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl3_s8 (int8x16x3_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); - return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx); + return __builtin_aarch64_qtbl3v8qi_ssu (__tab, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl3_u8 (uint8x16x3_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); - return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx); + return __builtin_aarch64_qtbl3v8qi_uuu (__tab, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl3_p8 (poly8x16x3_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); - return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx); + return __builtin_aarch64_qtbl3v8qi_ppu (__tab, __idx); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl3q_s8 (int8x16x3_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); - return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx); + return __builtin_aarch64_qtbl3v16qi_ssu (__tab, __idx); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl3q_u8 (uint8x16x3_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); - return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx); + return __builtin_aarch64_qtbl3v16qi_uuu (__tab, __idx); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl3q_p8 (poly8x16x3_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); - return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx); + return __builtin_aarch64_qtbl3v16qi_ppu (__tab, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl4_s8 (int8x16x4_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); - return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx); + return __builtin_aarch64_qtbl4v8qi_ssu (__tab, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl4_u8 (uint8x16x4_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); - return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx); + return __builtin_aarch64_qtbl4v8qi_uuu (__tab, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl4_p8 (poly8x16x4_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); - return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx); + return __builtin_aarch64_qtbl4v8qi_ppu (__tab, __idx); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl4q_s8 (int8x16x4_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); - return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); + return __builtin_aarch64_qtbl4v16qi_ssu (__tab, __idx); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl4q_u8 (uint8x16x4_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); - return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); + return __builtin_aarch64_qtbl4v16qi_uuu (__tab, __idx); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl4q_p8 (poly8x16x4_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); - return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); + return __builtin_aarch64_qtbl4v16qi_ppu (__tab, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx2_s8 (int8x8_t __r, int8x16x2_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); - return __builtin_aarch64_tbx4v8qi (__r, __o, (int8x8_t)__idx); + return __builtin_aarch64_qtbx2v8qi_sssu (__r, __tab, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx2_u8 (uint8x8_t __r, uint8x16x2_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); + return __builtin_aarch64_qtbx2v8qi_uuuu (__r, __tab, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx2_p8 (poly8x8_t __r, poly8x16x2_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); + return __builtin_aarch64_qtbx2v8qi_pppu (__r, __tab, __idx); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx2q_s8 (int8x16_t __r, int8x16x2_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); - return __builtin_aarch64_tbx4v16qi (__r, __o, (int8x16_t)__idx); + return __builtin_aarch64_qtbx2v16qi_sssu (__r, __tab, __idx); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx2q_u8 (uint8x16_t __r, uint8x16x2_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o, - (int8x16_t)__idx); + return __builtin_aarch64_qtbx2v16qi_uuuu (__r, __tab, __idx); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx2q_p8 (poly8x16_t __r, poly8x16x2_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o, - (int8x16_t)__idx); + return __builtin_aarch64_qtbx2v16qi_pppu (__r, __tab, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx3_s8 (int8x8_t __r, int8x16x3_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[2], 2); - return __builtin_aarch64_qtbx3v8qi (__r, __o, (int8x8_t)__idx); + return __builtin_aarch64_qtbx3v8qi_sssu (__r, __tab, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx3_u8 (uint8x8_t __r, uint8x16x3_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); - return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); + return __builtin_aarch64_qtbx3v8qi_uuuu (__r, __tab, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx3_p8 (poly8x8_t __r, poly8x16x3_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); - return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); + return __builtin_aarch64_qtbx3v8qi_pppu (__r, __tab, __idx); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx3q_s8 (int8x16_t __r, int8x16x3_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[2], 2); - return __builtin_aarch64_qtbx3v16qi (__r, __o, (int8x16_t)__idx); + return __builtin_aarch64_qtbx3v16qi_sssu (__r, __tab, __idx); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx3q_u8 (uint8x16_t __r, uint8x16x3_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); - return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)__r, __o, - (int8x16_t)__idx); + return __builtin_aarch64_qtbx3v16qi_uuuu (__r, __tab, __idx); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx3q_p8 (poly8x16_t __r, poly8x16x3_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); - return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)__r, __o, - (int8x16_t)__idx); + return __builtin_aarch64_qtbx3v16qi_pppu (__r, __tab, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx4_s8 (int8x8_t __r, int8x16x4_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[3], 3); - return __builtin_aarch64_qtbx4v8qi (__r, __o, (int8x8_t)__idx); + return __builtin_aarch64_qtbx4v8qi_sssu (__r, __tab, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx4_u8 (uint8x8_t __r, uint8x16x4_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); - return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); + return __builtin_aarch64_qtbx4v8qi_uuuu (__r, __tab, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx4_p8 (poly8x8_t __r, poly8x16x4_t __tab, uint8x8_t __idx) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); - return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); + return __builtin_aarch64_qtbx4v8qi_pppu (__r, __tab, __idx); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx4q_s8 (int8x16_t __r, int8x16x4_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[3], 3); - return __builtin_aarch64_qtbx4v16qi (__r, __o, (int8x16_t)__idx); + return __builtin_aarch64_qtbx4v16qi_sssu (__r, __tab, __idx); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx4q_u8 (uint8x16_t __r, uint8x16x4_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); - return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)__r, __o, - (int8x16_t)__idx); + return __builtin_aarch64_qtbx4v16qi_uuuu (__r, __tab, __idx); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx4q_p8 (poly8x16_t __r, poly8x16x4_t __tab, uint8x16_t __idx) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); - return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)__r, __o, - (int8x16_t)__idx); + return __builtin_aarch64_qtbx4v16qi_pppu (__r, __tab, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -20737,31 +19394,31 @@ __extension__ extern __inline float32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndns_f32 (float32_t __a) { - return __builtin_aarch64_frintnsf (__a); + return __builtin_aarch64_roundevensf (__a); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndn_f32 (float32x2_t __a) { - return __builtin_aarch64_frintnv2sf (__a); + return __builtin_aarch64_roundevenv2sf (__a); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndn_f64 (float64x1_t __a) { - return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])}; + return (float64x1_t) {__builtin_aarch64_roundevendf (__a[0])}; } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndnq_f32 (float32x4_t __a) { - return __builtin_aarch64_frintnv4sf (__a); + return __builtin_aarch64_roundevenv4sf (__a); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndnq_f64 (float64x2_t __a) { - return __builtin_aarch64_frintnv2df (__a); + return __builtin_aarch64_roundevenv2df (__a); } __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -21603,19 +20260,19 @@ __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshr_n_u8 (uint8x8_t __a, const int __b) { - return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b); + return __builtin_aarch64_lshrv8qi_uus (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshr_n_u16 (uint16x4_t __a, const int __b) { - return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b); + return __builtin_aarch64_lshrv4hi_uus (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshr_n_u32 (uint32x2_t __a, const int __b) { - return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b); + return __builtin_aarch64_lshrv2si_uus (__a, __b); } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -21651,25 +20308,25 @@ __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrq_n_u8 (uint8x16_t __a, const int __b) { - return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b); + return __builtin_aarch64_lshrv16qi_uus (__a, __b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrq_n_u16 (uint16x8_t __a, const int __b) { - return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b); + return __builtin_aarch64_lshrv8hi_uus (__a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrq_n_u32 (uint32x4_t __a, const int __b) { - return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b); + return __builtin_aarch64_lshrv4si_uus (__a, __b); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrq_n_u64 (uint64x2_t __a, const int __b) { - return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b); + return __builtin_aarch64_lshrv2di_uus (__a, __b); } __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -22137,15 +20794,13 @@ __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p8 (poly8_t *__a, poly8x8_t __b) { - __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a, - (int8x8_t) __b); + __builtin_aarch64_st1v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __b); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p16 (poly16_t *__a, poly16x4_t __b) { - __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a, - (int16x4_t) __b); + __builtin_aarch64_st1v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __b); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -22181,22 +20836,19 @@ __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u8 (uint8_t *__a, uint8x8_t __b) { - __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a, - (int8x8_t) __b); + __builtin_aarch64_st1v8qi_su ((__builtin_aarch64_simd_qi *) __a, __b); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u16 (uint16_t *__a, uint16x4_t __b) { - __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a, - (int16x4_t) __b); + __builtin_aarch64_st1v4hi_su ((__builtin_aarch64_simd_hi *) __a, __b); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u32 (uint32_t *__a, uint32x2_t __b) { - __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) __a, - (int32x2_t) __b); + __builtin_aarch64_st1v2si_su ((__builtin_aarch64_simd_si *) __a, __b); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -22226,22 +20878,19 @@ __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p8 (poly8_t *__a, poly8x16_t __b) { - __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a, - (int8x16_t) __b); + __builtin_aarch64_st1v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __b); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p16 (poly16_t *__a, poly16x8_t __b) { - __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a, - (int16x8_t) __b); + __builtin_aarch64_st1v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __b); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p64 (poly64_t *__a, poly64x2_t __b) { - __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) __a, - (poly64x2_t) __b); + __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) __a, __b); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -22271,29 +20920,25 @@ __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u8 (uint8_t *__a, uint8x16_t __b) { - __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a, - (int8x16_t) __b); + __builtin_aarch64_st1v16qi_su ((__builtin_aarch64_simd_qi *) __a, __b); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u16 (uint16_t *__a, uint16x8_t __b) { - __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a, - (int16x8_t) __b); + __builtin_aarch64_st1v8hi_su ((__builtin_aarch64_simd_hi *) __a, __b); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u32 (uint32_t *__a, uint32x4_t __b) { - __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) __a, - (int32x4_t) __b); + __builtin_aarch64_st1v4si_su ((__builtin_aarch64_simd_si *) __a, __b); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u64 (uint64_t *__a, uint64x2_t __b) { - __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) __a, - (int64x2_t) __b); + __builtin_aarch64_st1v2di_su ((__builtin_aarch64_simd_di *) __a, __b); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -22467,1879 +21112,1010 @@ __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s64_x2 (int64_t * __a, int64x1x2_t __val) { - __builtin_aarch64_simd_oi __o; - int64x2x2_t __temp; - __temp.val[0] - = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); - __temp.val[1] - = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); - __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u64_x2 (uint64_t * __a, uint64x1x2_t __val) { - __builtin_aarch64_simd_oi __o; - uint64x2x2_t __temp; - __temp.val[0] - = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); - __temp.val[1] - = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); - __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st1x2di_su ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_f64_x2 (float64_t * __a, float64x1x2_t __val) { - __builtin_aarch64_simd_oi __o; - float64x2x2_t __temp; - __temp.val[0] - = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); - __temp.val[1] - = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1); - __builtin_aarch64_st1x2df ((__builtin_aarch64_simd_df *) __a, __o); + __builtin_aarch64_st1x2df ((__builtin_aarch64_simd_df *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s8_x2 (int8_t * __a, int8x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - int8x16x2_t __temp; - __temp.val[0] - = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); - __temp.val[1] - = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p8_x2 (poly8_t * __a, poly8x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - poly8x16x2_t __temp; - __temp.val[0] - = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); - __temp.val[1] - = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st1x2v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s16_x2 (int16_t * __a, int16x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - int16x8x2_t __temp; - __temp.val[0] - = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); - __temp.val[1] - = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p16_x2 (poly16_t * __a, poly16x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - poly16x8x2_t __temp; - __temp.val[0] - = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); - __temp.val[1] - = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st1x2v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s32_x2 (int32_t * __a, int32x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - int32x4x2_t __temp; - __temp.val[0] - = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); - __temp.val[1] - = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u8_x2 (uint8_t * __a, uint8x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - uint8x16x2_t __temp; - __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st1x2v8qi_su ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u16_x2 (uint16_t * __a, uint16x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - uint16x8x2_t __temp; - __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st1x2v4hi_su ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u32_x2 (uint32_t * __a, uint32x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - uint32x4x2_t __temp; - __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st1x2v2si_su ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_f16_x2 (float16_t * __a, float16x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - float16x8x2_t __temp; - __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1); - __builtin_aarch64_st1x2v4hf (__a, __o); + __builtin_aarch64_st1x2v4hf (__a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_f32_x2 (float32_t * __a, float32x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - float32x4x2_t __temp; - __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); + __builtin_aarch64_st1x2v2sf ((__builtin_aarch64_simd_sf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p64_x2 (poly64_t * __a, poly64x1x2_t __val) { - __builtin_aarch64_simd_oi __o; - poly64x2x2_t __temp; - __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __temp.val[1], 1); - __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st1x2di_sp ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s8_x2 (int8_t * __a, int8x16x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); - __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p8_x2 (poly8_t * __a, poly8x16x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); - __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st1x2v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s16_x2 (int16_t * __a, int16x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); - __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); - __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st1x2v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s32_x2 (int32_t * __a, int32x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); - __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s64_x2 (int64_t * __a, int64x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); - __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u8_x2 (uint8_t * __a, uint8x16x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); - __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st1x2v16qi_su ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u16_x2 (uint16_t * __a, uint16x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); - __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st1x2v8hi_su ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u32_x2 (uint32_t * __a, uint32x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); - __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st1x2v4si_su ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); - __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st1x2v2di_su ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f16_x2 (float16_t * __a, float16x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1); - __builtin_aarch64_st1x2v8hf (__a, __o); + __builtin_aarch64_st1x2v8hf (__a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f32_x2 (float32_t * __a, float32x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1); - __builtin_aarch64_st1x2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); + __builtin_aarch64_st1x2v4sf ((__builtin_aarch64_simd_sf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f64_x2 (float64_t * __a, float64x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1); - __builtin_aarch64_st1x2v2df ((__builtin_aarch64_simd_df *) __a, __o); + __builtin_aarch64_st1x2v2df ((__builtin_aarch64_simd_df *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __val.val[1], 1); - __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st1x2v2di_sp ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s64_x3 (int64_t * __a, int64x1x3_t __val) { - __builtin_aarch64_simd_ci __o; - int64x2x3_t __temp; - __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); - __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); - __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u64_x3 (uint64_t * __a, uint64x1x3_t __val) { - __builtin_aarch64_simd_ci __o; - uint64x2x3_t __temp; - __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); - __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st1x3di_su ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_f64_x3 (float64_t * __a, float64x1x3_t __val) { - __builtin_aarch64_simd_ci __o; - float64x2x3_t __temp; - __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2); - __builtin_aarch64_st1x3df ((__builtin_aarch64_simd_df *) __a, __o); + __builtin_aarch64_st1x3df ((__builtin_aarch64_simd_df *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s8_x3 (int8_t * __a, int8x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - int8x16x3_t __temp; - __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); - __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p8_x3 (poly8_t * __a, poly8x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - poly8x16x3_t __temp; - __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st1x3v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s16_x3 (int16_t * __a, int16x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - int16x8x3_t __temp; - __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); - __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p16_x3 (poly16_t * __a, poly16x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - poly16x8x3_t __temp; - __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st1x3v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s32_x3 (int32_t * __a, int32x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - int32x4x3_t __temp; - __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); - __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u8_x3 (uint8_t * __a, uint8x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - uint8x16x3_t __temp; - __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st1x3v8qi_su ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u16_x3 (uint16_t * __a, uint16x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - uint16x8x3_t __temp; - __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st1x3v4hi_su ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u32_x3 (uint32_t * __a, uint32x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - uint32x4x3_t __temp; - __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st1x3v2si_su ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_f16_x3 (float16_t * __a, float16x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - float16x8x3_t __temp; - __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_hf *) __a, __o); + __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_hf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_f32_x3 (float32_t * __a, float32x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - float32x4x3_t __temp; - __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v2sf ((__builtin_aarch64_simd_sf *) __a, __o); + __builtin_aarch64_st1x3v2sf ((__builtin_aarch64_simd_sf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p64_x3 (poly64_t * __a, poly64x1x3_t __val) { - __builtin_aarch64_simd_ci __o; - poly64x2x3_t __temp; - __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __temp.val[2], 2); - __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st1x3di_sp ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s8_x3 (int8_t * __a, int8x16x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); - __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p8_x3 (poly8_t * __a, poly8x16x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); - __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st1x3v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s16_x3 (int16_t * __a, int16x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); - __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); - __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st1x3v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s32_x3 (int32_t * __a, int32x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); - __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s64_x3 (int64_t * __a, int64x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); - __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, + (int64x2x3_t) __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u8_x3 (uint8_t * __a, uint8x16x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); - __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st1x3v16qi_su ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u16_x3 (uint16_t * __a, uint16x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); - __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st1x3v8hi_su ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u32_x3 (uint32_t * __a, uint32x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); - __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st1x3v4si_su ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); - __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st1x3v2di_su ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f16_x3 (float16_t * __a, float16x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2); - __builtin_aarch64_st1x3v8hf ((__builtin_aarch64_simd_hf *) __a, __o); + __builtin_aarch64_st1x3v8hf ((__builtin_aarch64_simd_hf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f32_x3 (float32_t * __a, float32x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2); - __builtin_aarch64_st1x3v4sf ((__builtin_aarch64_simd_sf *) __a, __o); + __builtin_aarch64_st1x3v4sf ((__builtin_aarch64_simd_sf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f64_x3 (float64_t * __a, float64x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2); - __builtin_aarch64_st1x3v2df ((__builtin_aarch64_simd_df *) __a, __o); + __builtin_aarch64_st1x3v2df ((__builtin_aarch64_simd_df *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __val.val[2], 2); - __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st1x3v2di_sp ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s8_x4 (int8_t * __a, int8x8x4_t val) +vst1_s8_x4 (int8_t * __a, int8x8x4_t __val) { - union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s8_x4 (int8_t * __a, int8x16x4_t val) +vst1q_s8_x4 (int8_t * __a, int8x16x4_t __val) { - union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s16_x4 (int16_t * __a, int16x4x4_t val) +vst1_s16_x4 (int16_t * __a, int16x4x4_t __val) { - union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s16_x4 (int16_t * __a, int16x8x4_t val) +vst1q_s16_x4 (int16_t * __a, int16x8x4_t __val) { - union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s32_x4 (int32_t * __a, int32x2x4_t val) +vst1_s32_x4 (int32_t * __a, int32x2x4_t __val) { - union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o); + __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s32_x4 (int32_t * __a, int32x4x4_t val) +vst1q_s32_x4 (int32_t * __a, int32x4x4_t __val) { - union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o); + __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u8_x4 (uint8_t * __a, uint8x8x4_t val) +vst1_u8_x4 (uint8_t * __a, uint8x8x4_t __val) { - union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + __builtin_aarch64_st1x4v8qi_su ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t val) +vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t __val) { - union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + __builtin_aarch64_st1x4v16qi_su ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u16_x4 (uint16_t * __a, uint16x4x4_t val) +vst1_u16_x4 (uint16_t * __a, uint16x4x4_t __val) { - union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + __builtin_aarch64_st1x4v4hi_su ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t val) +vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t __val) { - union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + __builtin_aarch64_st1x4v8hi_su ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u32_x4 (uint32_t * __a, uint32x2x4_t val) +vst1_u32_x4 (uint32_t * __a, uint32x2x4_t __val) { - union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o); + __builtin_aarch64_st1x4v2si_su ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t val) +vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t __val) { - union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o); + __builtin_aarch64_st1x4v4si_su ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f16_x4 (float16_t * __a, float16x4x4_t val) +vst1_f16_x4 (float16_t * __a, float16x4x4_t __val) { - union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4hf ((__builtin_aarch64_simd_hf *) __a, __u.__o); + __builtin_aarch64_st1x4v4hf ((__builtin_aarch64_simd_hf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_f16_x4 (float16_t * __a, float16x8x4_t val) +vst1q_f16_x4 (float16_t * __a, float16x8x4_t __val) { - union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8hf ((__builtin_aarch64_simd_hf *) __a, __u.__o); + __builtin_aarch64_st1x4v8hf ((__builtin_aarch64_simd_hf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f32_x4 (float32_t * __a, float32x2x4_t val) +vst1_f32_x4 (float32_t * __a, float32x2x4_t __val) { - union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2sf ((__builtin_aarch64_simd_sf *) __a, __u.__o); + __builtin_aarch64_st1x4v2sf ((__builtin_aarch64_simd_sf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_f32_x4 (float32_t * __a, float32x4x4_t val) +vst1q_f32_x4 (float32_t * __a, float32x4x4_t __val) { - union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4sf ((__builtin_aarch64_simd_sf *) __a, __u.__o); + __builtin_aarch64_st1x4v4sf ((__builtin_aarch64_simd_sf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p8_x4 (poly8_t * __a, poly8x8x4_t val) +vst1_p8_x4 (poly8_t * __a, poly8x8x4_t __val) { - union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + __builtin_aarch64_st1x4v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t val) +vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t __val) { - union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + __builtin_aarch64_st1x4v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p16_x4 (poly16_t * __a, poly16x4x4_t val) +vst1_p16_x4 (poly16_t * __a, poly16x4x4_t __val) { - union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + __builtin_aarch64_st1x4v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t val) +vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t __val) { - union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + __builtin_aarch64_st1x4v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s64_x4 (int64_t * __a, int64x1x4_t val) +vst1_s64_x4 (int64_t * __a, int64x1x4_t __val) { - union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); + __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u64_x4 (uint64_t * __a, uint64x1x4_t val) +vst1_u64_x4 (uint64_t * __a, uint64x1x4_t __val) { - union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); + __builtin_aarch64_st1x4di_su ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p64_x4 (poly64_t * __a, poly64x1x4_t val) +vst1_p64_x4 (poly64_t * __a, poly64x1x4_t __val) { - union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); + __builtin_aarch64_st1x4di_sp ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s64_x4 (int64_t * __a, int64x2x4_t val) +vst1q_s64_x4 (int64_t * __a, int64x2x4_t __val) { - union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); + __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t val) +vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t __val) { - union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); + __builtin_aarch64_st1x4v2di_su ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t val) +vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t __val) { - union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); + __builtin_aarch64_st1x4v2di_sp ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f64_x4 (float64_t * __a, float64x1x4_t val) +vst1_f64_x4 (float64_t * __a, float64x1x4_t __val) { - union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4df ((__builtin_aarch64_simd_df *) __a, __u.__o); + __builtin_aarch64_st1x4df ((__builtin_aarch64_simd_df *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_f64_x4 (float64_t * __a, float64x2x4_t val) +vst1q_f64_x4 (float64_t * __a, float64x2x4_t __val) { - union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2df ((__builtin_aarch64_simd_df *) __a, __u.__o); + __builtin_aarch64_st1x4v2df ((__builtin_aarch64_simd_df *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_s64 (int64_t * __a, int64x1x2_t __val) { - __builtin_aarch64_simd_oi __o; - int64x2x2_t __temp; - __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); - __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_u64 (uint64_t * __a, uint64x1x2_t __val) { - __builtin_aarch64_simd_oi __o; - uint64x2x2_t __temp; - __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); - __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st2di_su ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_f64 (float64_t * __a, float64x1x2_t __val) { - __builtin_aarch64_simd_oi __o; - float64x2x2_t __temp; - __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1); - __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); + __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_s8 (int8_t * __a, int8x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - int8x16x2_t __temp; - __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_p8 (poly8_t * __a, poly8x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - poly8x16x2_t __temp; - __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st2v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_s16 (int16_t * __a, int16x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - int16x8x2_t __temp; - __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_p16 (poly16_t * __a, poly16x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - poly16x8x2_t __temp; - __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st2v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_s32 (int32_t * __a, int32x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - int32x4x2_t __temp; - __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); - __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_u8 (uint8_t * __a, uint8x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - uint8x16x2_t __temp; - __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st2v8qi_su ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_u16 (uint16_t * __a, uint16x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - uint16x8x2_t __temp; - __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st2v4hi_su ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_u32 (uint32_t * __a, uint32x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - uint32x4x2_t __temp; - __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); - __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st2v2si_su ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_f16 (float16_t * __a, float16x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - float16x8x2_t __temp; - __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1); - __builtin_aarch64_st2v4hf (__a, __o); + __builtin_aarch64_st2v4hf ((__builtin_aarch64_simd_hf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_f32 (float32_t * __a, float32x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - float32x4x2_t __temp; - __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1); - __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); + __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_p64 (poly64_t * __a, poly64x1x2_t __val) { - __builtin_aarch64_simd_oi __o; - poly64x2x2_t __temp; - __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __temp.val[1], 1); - __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st2di_sp ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_s8 (int8_t * __a, int8x16x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); - __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_p8 (poly8_t * __a, poly8x16x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); - __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st2v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_s16 (int16_t * __a, int16x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); - __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_p16 (poly16_t * __a, poly16x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); - __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st2v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_s32 (int32_t * __a, int32x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); - __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_s64 (int64_t * __a, int64x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); - __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_u8 (uint8_t * __a, uint8x16x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); - __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st2v16qi_su ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_u16 (uint16_t * __a, uint16x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); - __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st2v8hi_su ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_u32 (uint32_t * __a, uint32x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); - __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st2v4si_su ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_u64 (uint64_t * __a, uint64x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); - __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st2v2di_su ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_f16 (float16_t * __a, float16x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1); - __builtin_aarch64_st2v8hf (__a, __o); + __builtin_aarch64_st2v8hf ((__builtin_aarch64_simd_hf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_f32 (float32_t * __a, float32x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1); - __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); + __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_f64 (float64_t * __a, float64x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1); - __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o); + __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_p64 (poly64_t * __a, poly64x2x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __val.val[1], 1); - __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st2v2di_sp ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_s64 (int64_t * __a, int64x1x3_t __val) { - __builtin_aarch64_simd_ci __o; - int64x2x3_t __temp; - __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); - __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); - __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_u64 (uint64_t * __a, uint64x1x3_t __val) { - __builtin_aarch64_simd_ci __o; - uint64x2x3_t __temp; - __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); - __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st3di_su ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_f64 (float64_t * __a, float64x1x3_t __val) { - __builtin_aarch64_simd_ci __o; - float64x2x3_t __temp; - __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2); - __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o); + __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_s8 (int8_t * __a, int8x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - int8x16x3_t __temp; - __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); - __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); - __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_p8 (poly8_t * __a, poly8x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - poly8x16x3_t __temp; - __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); - __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st3v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_s16 (int16_t * __a, int16x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - int16x8x3_t __temp; - __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); - __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); - __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_p16 (poly16_t * __a, poly16x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - poly16x8x3_t __temp; - __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); - __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st3v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_s32 (int32_t * __a, int32x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - int32x4x3_t __temp; - __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); - __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); - __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_u8 (uint8_t * __a, uint8x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - uint8x16x3_t __temp; - __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); - __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st3v8qi_su ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_u16 (uint16_t * __a, uint16x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - uint16x8x3_t __temp; - __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); - __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st3v4hi_su ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_u32 (uint32_t * __a, uint32x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - uint32x4x3_t __temp; - __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); - __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st3v2si_su ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_f16 (float16_t * __a, float16x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - float16x8x3_t __temp; - __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2); - __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o); + __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_f32 (float32_t * __a, float32x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - float32x4x3_t __temp; - __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2); - __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o); + __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_p64 (poly64_t * __a, poly64x1x3_t __val) { - __builtin_aarch64_simd_ci __o; - poly64x2x3_t __temp; - __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __temp.val[2], 2); - __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st3di_sp ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_s8 (int8_t * __a, int8x16x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); - __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_p8 (poly8_t * __a, poly8x16x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); - __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st3v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_s16 (int16_t * __a, int16x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); - __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_p16 (poly16_t * __a, poly16x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); - __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st3v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_s32 (int32_t * __a, int32x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); - __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_s64 (int64_t * __a, int64x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); - __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_u8 (uint8_t * __a, uint8x16x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); - __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st3v16qi_su ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_u16 (uint16_t * __a, uint16x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); - __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st3v8hi_su ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_u32 (uint32_t * __a, uint32x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); - __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st3v4si_su ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_u64 (uint64_t * __a, uint64x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); - __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st3v2di_su ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_f16 (float16_t * __a, float16x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2); - __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o); + __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_f32 (float32_t * __a, float32x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2); - __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o); + __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_f64 (float64_t * __a, float64x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2); - __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o); + __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_p64 (poly64_t * __a, poly64x2x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __val.val[2], 2); - __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st3v2di_sp ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_s64 (int64_t * __a, int64x1x4_t __val) { - __builtin_aarch64_simd_xi __o; - int64x2x4_t __temp; - __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); - __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); - __temp.val[3] = vcombine_s64 (__val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); - __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_u64 (uint64_t * __a, uint64x1x4_t __val) { - __builtin_aarch64_simd_xi __o; - uint64x2x4_t __temp; - __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); - __temp.val[3] = vcombine_u64 (__val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); - __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st4di_su ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_f64 (float64_t * __a, float64x1x4_t __val) { - __builtin_aarch64_simd_xi __o; - float64x2x4_t __temp; - __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); - __temp.val[3] = vcombine_f64 (__val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[3], 3); - __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o); + __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_s8 (int8_t * __a, int8x8x4_t __val) { - __builtin_aarch64_simd_xi __o; - int8x16x4_t __temp; - __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); - __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); - __temp.val[3] = vcombine_s8 (__val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); - __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_p8 (poly8_t * __a, poly8x8x4_t __val) { - __builtin_aarch64_simd_xi __o; - poly8x16x4_t __temp; - __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); - __temp.val[3] = vcombine_p8 (__val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); - __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st4v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_s16 (int16_t * __a, int16x4x4_t __val) { - __builtin_aarch64_simd_xi __o; - int16x8x4_t __temp; - __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); - __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); - __temp.val[3] = vcombine_s16 (__val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); - __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_p16 (poly16_t * __a, poly16x4x4_t __val) { - __builtin_aarch64_simd_xi __o; - poly16x8x4_t __temp; - __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); - __temp.val[3] = vcombine_p16 (__val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); - __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st4v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_s32 (int32_t * __a, int32x2x4_t __val) { - __builtin_aarch64_simd_xi __o; - int32x4x4_t __temp; - __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); - __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); - __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); - __temp.val[3] = vcombine_s32 (__val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3); - __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_u8 (uint8_t * __a, uint8x8x4_t __val) { - __builtin_aarch64_simd_xi __o; - uint8x16x4_t __temp; - __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); - __temp.val[3] = vcombine_u8 (__val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); - __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st4v8qi_su ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_u16 (uint16_t * __a, uint16x4x4_t __val) { - __builtin_aarch64_simd_xi __o; - uint16x8x4_t __temp; - __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); - __temp.val[3] = vcombine_u16 (__val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); - __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st4v4hi_su ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_u32 (uint32_t * __a, uint32x2x4_t __val) { - __builtin_aarch64_simd_xi __o; - uint32x4x4_t __temp; - __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); - __temp.val[3] = vcombine_u32 (__val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3); - __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st4v2si_su ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_f16 (float16_t * __a, float16x4x4_t __val) { - __builtin_aarch64_simd_xi __o; - float16x8x4_t __temp; - __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); - __temp.val[3] = vcombine_f16 (__val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[3], 3); - __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o); + __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_f32 (float32_t * __a, float32x2x4_t __val) { - __builtin_aarch64_simd_xi __o; - float32x4x4_t __temp; - __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); - __temp.val[3] = vcombine_f32 (__val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[3], 3); - __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o); + __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_p64 (poly64_t * __a, poly64x1x4_t __val) { - __builtin_aarch64_simd_xi __o; - poly64x2x4_t __temp; - __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); - __temp.val[3] = vcombine_p64 (__val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __temp.val[3], 3); - __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st4di_sp ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_s8 (int8_t * __a, int8x16x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3); - __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_p8 (poly8_t * __a, poly8x16x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3); - __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st4v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_s16 (int16_t * __a, int16x8x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3); - __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_p16 (poly16_t * __a, poly16x8x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3); - __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st4v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_s32 (int32_t * __a, int32x4x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[3], 3); - __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_s64 (int64_t * __a, int64x2x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[3], 3); - __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_u8 (uint8_t * __a, uint8x16x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3); - __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + __builtin_aarch64_st4v16qi_su ((__builtin_aarch64_simd_qi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_u16 (uint16_t * __a, uint16x8x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3); - __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + __builtin_aarch64_st4v8hi_su ((__builtin_aarch64_simd_hi *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_u32 (uint32_t * __a, uint32x4x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[3], 3); - __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); + __builtin_aarch64_st4v4si_su ((__builtin_aarch64_simd_si *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_u64 (uint64_t * __a, uint64x2x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[3], 3); - __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st4v2di_su ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_f16 (float16_t * __a, float16x8x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[3], 3); - __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o); + __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_f32 (float32_t * __a, float32x4x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[3], 3); - __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o); + __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_f64 (float64_t * __a, float64x2x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[3], 3); - __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o); + __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_p64 (poly64_t * __a, poly64x2x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __val.val[3], 3); - __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); + __builtin_aarch64_st4v2di_sp ((__builtin_aarch64_simd_di *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -24413,51 +22189,28 @@ __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx) { - int8x8_t __result; int8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); __temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); - __result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx); - return __result; + return __builtin_aarch64_qtbx2v8qi (__r, __temp, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx) { - uint8x8_t __result; uint8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); __temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); - __result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); - return __result; + return __builtin_aarch64_qtbx2v8qi_uuuu (__r, __temp, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx) { - poly8x8_t __result; poly8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); __temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); - __result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); - return __result; + return __builtin_aarch64_qtbx2v8qi_pppu (__r, __temp, __idx); } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -26547,13 +24300,13 @@ __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndn_f16 (float16x4_t __a) { - return __builtin_aarch64_frintnv4hf (__a); + return __builtin_aarch64_roundevenv4hf (__a); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndnq_f16 (float16x8_t __a) { - return __builtin_aarch64_frintnv8hf (__a); + return __builtin_aarch64_roundevenv8hf (__a); } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -26799,13 +24552,13 @@ __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmax_f16 (float16x4_t __a, float16x4_t __b) { - return __builtin_aarch64_smax_nanv4hf (__a, __b); + return __builtin_aarch64_fmax_nanv4hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmaxq_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_aarch64_smax_nanv8hf (__a, __b); + return __builtin_aarch64_fmax_nanv8hf (__a, __b); } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -26823,13 +24576,13 @@ __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmin_f16 (float16x4_t __a, float16x4_t __b) { - return __builtin_aarch64_smin_nanv4hf (__a, __b); + return __builtin_aarch64_fmin_nanv4hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vminq_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_aarch64_smin_nanv8hf (__a, __b); + return __builtin_aarch64_fmin_nanv8hf (__a, __b); } __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -27246,25 +24999,25 @@ __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdot_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b) { - return __builtin_aarch64_udotv8qi_uuuu (__r, __a, __b); + return __builtin_aarch64_udot_prodv8qi_uuuu (__a, __b, __r); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdotq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b) { - return __builtin_aarch64_udotv16qi_uuuu (__r, __a, __b); + return __builtin_aarch64_udot_prodv16qi_uuuu (__a, __b, __r); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdot_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b) { - return __builtin_aarch64_sdotv8qi (__r, __a, __b); + return __builtin_aarch64_sdot_prodv8qi (__a, __b, __r); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdotq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b) { - return __builtin_aarch64_sdotv16qi (__r, __a, __b); + return __builtin_aarch64_sdot_prodv16qi (__a, __b, __r); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -28220,7 +25973,7 @@ __extension__ extern __inline bfloat16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcombine_bf16 (bfloat16x4_t __a, bfloat16x4_t __b) { - return (bfloat16x8_t)__builtin_aarch64_combinev4bf (__a, __b); + return __builtin_aarch64_combinev4bf (__a, __b); } __extension__ extern __inline bfloat16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -28286,65 +26039,38 @@ __extension__ extern __inline bfloat16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_bf16_x2 (const bfloat16_t *__a) { - bfloat16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v4bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0); - ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v4bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16_x2 (const bfloat16_t *__a) { - bfloat16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld1x2v8bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0); - ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1); - return ret; + return __builtin_aarch64_ld1x2v8bf ( + (const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_bf16_x3 (const bfloat16_t *__a) { - bfloat16x4x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v4bf ((const __builtin_aarch64_simd_bf *) __a); - __i.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0); - __i.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1); - __i.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v4bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16_x3 (const bfloat16_t *__a) { - bfloat16x8x3_t __i; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld1x3v8bf ((const __builtin_aarch64_simd_bf *) __a); - __i.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0); - __i.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1); - __i.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2); - return __i; + return __builtin_aarch64_ld1x3v8bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_bf16_x4 (const bfloat16_t *__a) { - union { bfloat16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v4bf ((const __builtin_aarch64_simd_bf *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v4bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16_x4 (const bfloat16_t *__a) { - union { bfloat16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; - __au.__o - = __builtin_aarch64_ld1x4v8bf ((const __builtin_aarch64_simd_bf *) __a); - return __au.__i; + return __builtin_aarch64_ld1x4v8bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -28374,145 +26100,73 @@ __extension__ extern __inline bfloat16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_bf16 (const bfloat16_t * __a) { - bfloat16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v4bf (__a); - ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0); - ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1); - return ret; + return __builtin_aarch64_ld2v4bf (__a); } __extension__ extern __inline bfloat16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_bf16 (const bfloat16_t * __a) { - bfloat16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v8bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0); - ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1); - return ret; + return __builtin_aarch64_ld2v8bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_dup_bf16 (const bfloat16_t * __a) { - bfloat16x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv4bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0); - ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1); - return ret; + return __builtin_aarch64_ld2rv4bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x8x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2q_dup_bf16 (const bfloat16_t * __a) { - bfloat16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2rv8bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0); - ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1); - return ret; + return __builtin_aarch64_ld2rv8bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_bf16 (const bfloat16_t * __a) { - bfloat16x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v4bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0); - ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1); - ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2); - return ret; + return __builtin_aarch64_ld3v4bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_bf16 (const bfloat16_t * __a) { - bfloat16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v8bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0); - ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1); - ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2); - return ret; + return __builtin_aarch64_ld3v8bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x4x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3_dup_bf16 (const bfloat16_t * __a) { - bfloat16x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv4bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0); - ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1); - ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2); - return ret; + return __builtin_aarch64_ld3rv4bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x8x3_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld3q_dup_bf16 (const bfloat16_t * __a) { - bfloat16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3rv8bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0); - ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1); - ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2); - return ret; + return __builtin_aarch64_ld3rv8bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_bf16 (const bfloat16_t * __a) { - bfloat16x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v4bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 0); - ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 1); - ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 2); - ret.val[3] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 3); - return ret; + return __builtin_aarch64_ld4v4bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_bf16 (const bfloat16_t * __a) { - bfloat16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v8bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 0); - ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 1); - ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 2); - ret.val[3] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 3); - return ret; + return __builtin_aarch64_ld4v8bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x4x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4_dup_bf16 (const bfloat16_t * __a) { - bfloat16x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv4bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 0); - ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 1); - ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 2); - ret.val[3] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 3); - return ret; + return __builtin_aarch64_ld4rv4bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline bfloat16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld4q_dup_bf16 (const bfloat16_t * __a) { - bfloat16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4rv8bf ((const __builtin_aarch64_simd_bf *) __a); - ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 0); - ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 1); - ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 2); - ret.val[3] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 3); - return ret; + return __builtin_aarch64_ld4rv8bf ((const __builtin_aarch64_simd_bf *) __a); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -28524,60 +26178,37 @@ __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_bf16_x2 (bfloat16_t * __a, bfloat16x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - bfloat16x8x2_t __temp; - __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[1], 1); - __builtin_aarch64_st1x2v4bf (__a, __o); + __builtin_aarch64_st1x2v4bf (__a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_bf16_x2 (bfloat16_t * __a, bfloat16x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[1], 1); - __builtin_aarch64_st1x2v8bf (__a, __o); + __builtin_aarch64_st1x2v8bf (__a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - bfloat16x8x3_t __temp; - __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v4bf ((__builtin_aarch64_simd_bf *) __a, __o); + __builtin_aarch64_st1x3v4bf ((__builtin_aarch64_simd_bf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_bf16_x3 (bfloat16_t * __a, bfloat16x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[2], 2); - __builtin_aarch64_st1x3v8bf ((__builtin_aarch64_simd_bf *) __a, __o); + __builtin_aarch64_st1x3v8bf ((__builtin_aarch64_simd_bf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_bf16_x4 (bfloat16_t * __a, bfloat16x4x4_t val) +vst1_bf16_x4 (bfloat16_t * __a, bfloat16x4x4_t __val) { - union { bfloat16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4bf ((__builtin_aarch64_simd_bf *) __a, __u.__o); + __builtin_aarch64_st1x4v4bf ((__builtin_aarch64_simd_bf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_bf16_x4 (bfloat16_t * __a, bfloat16x8x4_t val) +vst1q_bf16_x4 (bfloat16_t * __a, bfloat16x8x4_t __val) { - union { bfloat16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8bf ((__builtin_aarch64_simd_bf *) __a, __u.__o); + __builtin_aarch64_st1x4v8bf ((__builtin_aarch64_simd_bf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -28601,73 +26232,37 @@ __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_bf16 (bfloat16_t * __a, bfloat16x4x2_t __val) { - __builtin_aarch64_simd_oi __o; - bfloat16x8x2_t __temp; - __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[1], 1); - __builtin_aarch64_st2v4bf (__a, __o); + __builtin_aarch64_st2v4bf (__a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_bf16 (bfloat16_t * __a, bfloat16x8x2_t __val) { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[1], 1); - __builtin_aarch64_st2v8bf (__a, __o); + __builtin_aarch64_st2v8bf (__a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3_bf16 (bfloat16_t * __a, bfloat16x4x3_t __val) { - __builtin_aarch64_simd_ci __o; - bfloat16x8x3_t __temp; - __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); - __builtin_aarch64_st3v4bf ((__builtin_aarch64_simd_bf *) __a, __o); + __builtin_aarch64_st3v4bf ((__builtin_aarch64_simd_bf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_bf16 (bfloat16_t * __a, bfloat16x8x3_t __val) { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[2], 2); - __builtin_aarch64_st3v8bf ((__builtin_aarch64_simd_bf *) __a, __o); + __builtin_aarch64_st3v8bf ((__builtin_aarch64_simd_bf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4_bf16 (bfloat16_t * __a, bfloat16x4x4_t __val) { - __builtin_aarch64_simd_xi __o; - bfloat16x8x4_t __temp; - __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __temp.val[3] = vcombine_bf16 (__val.val[3], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[3], 3); - __builtin_aarch64_st4v4bf ((__builtin_aarch64_simd_bf *) __a, __o); + __builtin_aarch64_st4v4bf ((__builtin_aarch64_simd_bf *) __a, __val); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_bf16 (bfloat16_t * __a, bfloat16x8x4_t __val) { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[3], 3); - __builtin_aarch64_st4v8bf ((__builtin_aarch64_simd_bf *) __a, __o); + __builtin_aarch64_st4v8bf ((__builtin_aarch64_simd_bf *) __a, __val); } __extension__ extern __inline bfloat16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -29183,24 +26778,90 @@ vcopyq_laneq_bf16 (bfloat16x8_t __a, const int __lane1, return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), __a, __lane1); } -__LD2_LANE_FUNC (bfloat16x4x2_t, bfloat16x4_t, bfloat16x8x2_t, bfloat16_t, v4bf, - v8bf, bf, bf16, bfloat16x8_t) -__LD2Q_LANE_FUNC (bfloat16x8x2_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16) -__LD3_LANE_FUNC (bfloat16x4x3_t, bfloat16x4_t, bfloat16x8x3_t, bfloat16_t, v4bf, - v8bf, bf, bf16, bfloat16x8_t) -__LD3Q_LANE_FUNC (bfloat16x8x3_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16) -__LD4_LANE_FUNC (bfloat16x4x4_t, bfloat16x4_t, bfloat16x8x4_t, bfloat16_t, v4bf, - v8bf, bf, bf16, bfloat16x8_t) -__LD4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16) -__ST2_LANE_FUNC (bfloat16x4x2_t, bfloat16x8x2_t, bfloat16_t, v4bf, v8bf, bf, - bf16, bfloat16x8_t) -__ST2Q_LANE_FUNC (bfloat16x8x2_t, bfloat16_t, v8bf, bf, bf16) -__ST3_LANE_FUNC (bfloat16x4x3_t, bfloat16x8x3_t, bfloat16_t, v4bf, v8bf, bf, - bf16, bfloat16x8_t) -__ST3Q_LANE_FUNC (bfloat16x8x3_t, bfloat16_t, v8bf, bf, bf16) -__ST4_LANE_FUNC (bfloat16x4x4_t, bfloat16x8x4_t, bfloat16_t, v4bf, v8bf, bf, - bf16, bfloat16x8_t) -__ST4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16_t, v8bf, bf, bf16) +__extension__ extern __inline bfloat16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2_lane_bf16 (const bfloat16_t * __ptr, bfloat16x4x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev4bf ( + (__builtin_aarch64_simd_bf *) __ptr, __b, __c); +} +__extension__ extern __inline bfloat16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld2q_lane_bf16 (const bfloat16_t * __ptr, bfloat16x8x2_t __b, const int __c) +{ + return __builtin_aarch64_ld2_lanev8bf ( + (__builtin_aarch64_simd_bf *) __ptr, __b, __c); +} +__extension__ extern __inline bfloat16x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3_lane_bf16 (const bfloat16_t * __ptr, bfloat16x4x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev4bf ( + (__builtin_aarch64_simd_bf *) __ptr, __b, __c); +} +__extension__ extern __inline bfloat16x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld3q_lane_bf16 (const bfloat16_t * __ptr, bfloat16x8x3_t __b, const int __c) +{ + return __builtin_aarch64_ld3_lanev8bf ( + (__builtin_aarch64_simd_bf *) __ptr, __b, __c); +} +__extension__ extern __inline bfloat16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4_lane_bf16 (const bfloat16_t * __ptr, bfloat16x4x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev4bf ( + (__builtin_aarch64_simd_bf *) __ptr, __b, __c); +} +__extension__ extern __inline bfloat16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) +vld4q_lane_bf16 (const bfloat16_t * __ptr, bfloat16x8x4_t __b, const int __c) +{ + return __builtin_aarch64_ld4_lanev8bf ( + (__builtin_aarch64_simd_bf *) __ptr, __b, __c); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_bf16 (bfloat16_t *__ptr, bfloat16x4x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev4bf ((__builtin_aarch64_simd_bf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x2_t __val, const int __lane) +{ + __builtin_aarch64_st2_lanev8bf ((__builtin_aarch64_simd_bf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_bf16 (bfloat16_t *__ptr, bfloat16x4x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev4bf ((__builtin_aarch64_simd_bf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x3_t __val, const int __lane) +{ + __builtin_aarch64_st3_lanev8bf ((__builtin_aarch64_simd_bf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_bf16 (bfloat16_t *__ptr, bfloat16x4x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev4bf ((__builtin_aarch64_simd_bf *) __ptr, __val, + __lane); +} +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x4_t __val, const int __lane) +{ + __builtin_aarch64_st4_lanev8bf ((__builtin_aarch64_simd_bf *) __ptr, __val, + __lane); +} #pragma GCC pop_options #pragma GCC push_options #pragma GCC target ("arch=armv8.2-a+i8mm") @@ -29208,13 +26869,13 @@ __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b) { - return __builtin_aarch64_usdotv8qi_ssus (__r, __a, __b); + return __builtin_aarch64_usdot_prodv8qi_suss (__a, __b, __r); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b) { - return __builtin_aarch64_usdotv16qi_ssus (__r, __a, __b); + return __builtin_aarch64_usdot_prodv16qi_suss (__a, __b, __r); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -29385,17 +27046,5 @@ vaddq_p128 (poly128_t __a, poly128_t __b) #undef __aarch64_vdupq_laneq_u16 #undef __aarch64_vdupq_laneq_u32 #undef __aarch64_vdupq_laneq_u64 -#undef __LD2_LANE_FUNC -#undef __LD2Q_LANE_FUNC -#undef __LD3_LANE_FUNC -#undef __LD3Q_LANE_FUNC -#undef __LD4_LANE_FUNC -#undef __LD4Q_LANE_FUNC -#undef __ST2_LANE_FUNC -#undef __ST2Q_LANE_FUNC -#undef __ST3_LANE_FUNC -#undef __ST3Q_LANE_FUNC -#undef __ST4_LANE_FUNC -#undef __ST4Q_LANE_FUNC #endif #endif diff --git a/third_party/aarch64/arm_sve.internal.h b/third_party/aarch64/arm_sve.internal.h index a4fb4000a..c224bdba3 100644 --- a/third_party/aarch64/arm_sve.internal.h +++ b/third_party/aarch64/arm_sve.internal.h @@ -5,6 +5,6 @@ typedef __fp16 float16_t; typedef float float32_t; typedef double float64_t; -#pragma GCC aarch64 "third_party/aarch64/arm_sve.internal.h" +#pragma GCC aarch64 "arm_sve.h" #endif #endif diff --git a/third_party/aarch64/upgrade.sh b/third_party/aarch64/upgrade.sh index 04376e53a..1e4e6c5b7 100755 --- a/third_party/aarch64/upgrade.sh +++ b/third_party/aarch64/upgrade.sh @@ -13,17 +13,16 @@ # 3. You should fix up the `#pragma GCC aarch64` things. # -s=/opt/cross11portcosmo/lib/gcc/aarch64-linux-musl/11.2.0/include +s=/opt/goodies/include d=third_party/aarch64 FILES=' +acc_prof arm_acle +arm_bf16 arm_fp16 arm_neon -acc_prof -arm_bf16 arm_sve -acc_prof openacc ' diff --git a/third_party/argon2/argon2.c b/third_party/argon2/argon2.c index 323120a69..24f04d4d8 100644 --- a/third_party/argon2/argon2.c +++ b/third_party/argon2/argon2.c @@ -22,11 +22,6 @@ #include "third_party/argon2/core.h" #include "third_party/argon2/encoding.h" -asm(".ident\t\"\\n\\n\ -argon2 (CC0 or Apache2)\\n\ -Copyright 2016 Daniel Dinu, Dmitry Khovratovich\\n\ -Copyright 2016 Jean-Philippe Aumasson, Samuel Neves\""); - /** * Function that gives the string representation of an argon2_type. * @param type The argon2_type that we want the string for diff --git a/third_party/argon2/blake2b.c b/third_party/argon2/blake2b.c index f62c988f2..26392c322 100644 --- a/third_party/argon2/blake2b.c +++ b/third_party/argon2/blake2b.c @@ -21,11 +21,6 @@ #include "third_party/argon2/blake2.h" #include "third_party/argon2/core.h" -asm(".ident\t\"\\n\\n\ -argon2 (CC0 or Apache2)\\n\ -Copyright 2016 Daniel Dinu, Dmitry Khovratovich\\n\ -Copyright 2016 Jean-Philippe Aumasson, Samuel Neves\""); - /* Ensure param structs have not been wrongly padded */ /* Poor man's static_assert */ enum { diff --git a/third_party/argon2/core.c b/third_party/argon2/core.c index 340fc22c2..72f04ee50 100644 --- a/third_party/argon2/core.c +++ b/third_party/argon2/core.c @@ -20,10 +20,10 @@ #include "third_party/argon2/blake2.h" #include "third_party/argon2/core.h" -asm(".ident\t\"\\n\\n\ -argon2 (CC0 or Apache2)\\n\ -Copyright 2016 Daniel Dinu, Dmitry Khovratovich\\n\ -Copyright 2016 Jean-Philippe Aumasson, Samuel Neves\""); +__notice(argon2_notice, "\ +argon2 (CC0 or Apache2)\n\ +Copyright 2016 Daniel Dinu, Dmitry Khovratovich\n\ +Copyright 2016 Jean-Philippe Aumasson, Samuel Neves"); int FLAG_clear_internal_memory = 1; diff --git a/third_party/argon2/encoding.c b/third_party/argon2/encoding.c index 22f4707fc..22a50dcf3 100644 --- a/third_party/argon2/encoding.c +++ b/third_party/argon2/encoding.c @@ -21,11 +21,6 @@ #include "libc/str/str.h" #include "third_party/argon2/core.h" -asm(".ident\t\"\\n\\n\ -argon2 (CC0 or Apache2)\\n\ -Copyright 2016 Daniel Dinu, Dmitry Khovratovich\\n\ -Copyright 2016 Jean-Philippe Aumasson, Samuel Neves\""); - /* * Example code for a decoder and encoder of "hash strings", with Argon2 * parameters. diff --git a/third_party/argon2/ref.c b/third_party/argon2/ref.c index 1809aca03..f21c902de 100644 --- a/third_party/argon2/ref.c +++ b/third_party/argon2/ref.c @@ -22,11 +22,6 @@ #include "third_party/argon2/blamka-round-ref.h" #include "third_party/argon2/core.h" -asm(".ident\t\"\\n\\n\ -argon2 (CC0 or Apache2)\\n\ -Copyright 2016 Daniel Dinu, Dmitry Khovratovich\\n\ -Copyright 2016 Jean-Philippe Aumasson, Samuel Neves\""); - /* * Argon2 reference source code package - reference C implementations * diff --git a/third_party/awk/main.c b/third_party/awk/main.c index d1ec5b755..07fba0946 100644 --- a/third_party/awk/main.c +++ b/third_party/awk/main.c @@ -39,28 +39,29 @@ #include "libc/sysv/consts/sig.h" #include "third_party/awk/awk.h" -asm(".ident\t\"\\n\\n\ -Copyright (C) Lucent Technologies 1997\\n\ -All Rights Reserved\\n\ -\\n\ -Permission to use, copy, modify, and distribute this software and\\n\ -its documentation for any purpose and without fee is hereby\\n\ -granted, provided that the above copyright notice appear in all\\n\ -copies and that both that the copyright notice and this\\n\ -permission notice and warranty disclaimer appear in supporting\\n\ -documentation, and that the name Lucent Technologies or any of\\n\ -its entities not be used in advertising or publicity pertaining\\n\ -to distribution of the software without specific, written prior\\n\ -permission.\\n\ -\\n\ -LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,\\n\ -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.\\n\ -IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY\\n\ -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES\\n\ -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER\\n\ -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,\\n\ -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF\\n\ -THIS SOFTWARE.\""); +__notice(awk_notice, "\ +The One True Awk\n\ +Copyright (C) Lucent Technologies 1997\n\ +All Rights Reserved\n\ +\n\ +Permission to use, copy, modify, and distribute this software and\n\ +its documentation for any purpose and without fee is hereby\n\ +granted, provided that the above copyright notice appear in all\n\ +copies and that both that the copyright notice and this\n\ +permission notice and warranty disclaimer appear in supporting\n\ +documentation, and that the name Lucent Technologies or any of\n\ +its entities not be used in advertising or publicity pertaining\n\ +to distribution of the software without specific, written prior\n\ +permission.\n\ +\n\ +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,\n\ +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.\n\ +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY\n\ +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES\n\ +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER\n\ +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,\n\ +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF\n\ +THIS SOFTWARE."); const char *version = "version 20220530"; extern int nfields; diff --git a/third_party/bzip2/crctable.c b/third_party/bzip2/crctable.c index 2fee0a90a..2c21bd6a5 100644 --- a/third_party/bzip2/crctable.c +++ b/third_party/bzip2/crctable.c @@ -20,6 +20,7 @@ uint32_t BZ2_crc32Table[256]; +__attribute__((__constructor__(10))) static textstartup void BZ2_crc32Table_init() { unsigned i, j, u; for (i = 0; i < 256; ++i) { @@ -38,10 +39,6 @@ static textstartup void BZ2_crc32Table_init() { } } -const void *const BZ2_crc32Table_ctor[] initarray = { - BZ2_crc32Table_init, -}; - #else const uint32_t BZ2_crc32Table[256] = { diff --git a/third_party/chibicc/chibicc.c b/third_party/chibicc/chibicc.c index 3273b3077..8224433a5 100644 --- a/third_party/chibicc/chibicc.c +++ b/third_party/chibicc/chibicc.c @@ -9,11 +9,10 @@ #include "libc/sysv/consts/sig.h" #include "libc/x/xasprintf.h" -asm(".ident\t\"\\n\\n\ -chibicc (MIT/ISC License)\\n\ -Copyright 2019 Rui Ueyama\\n\ -Copyright 2020 Justine Alexandra Roberts Tunney\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(chibicc_notice, "\ +chibicc (MIT/ISC License)\n\ +Copyright 2019 Rui Ueyama\n\ +Copyright 2020 Justine Alexandra Roberts Tunney"); typedef enum { FILE_NONE, diff --git a/third_party/compiler_rt/comprt.S b/third_party/compiler_rt/comprt.S index 84814f711..95060b658 100644 --- a/third_party/compiler_rt/comprt.S +++ b/third_party/compiler_rt/comprt.S @@ -7,12 +7,8 @@ huge_compiler_rt_license: .endobj huge_compiler_rt_license,globl,hidden .previous -.ident "\n -compiler_rt (Licensed MIT) -Copyright (c) 2009-2015 by the contributors listed in: -github.com/llvm-mirror/compiler-rt/blob/master/CREDITS.TXT" - -.ident "\n + .section .notice,"aR",@progbits + .asciz "\n\n\ compiler_rt (Licensed \"University of Illinois/NCSA Open Source License\") Copyright (c) 2009-2018 by the contributors listed in: github.com/llvm-mirror/compiler-rt/blob/master/CREDITS.TXT diff --git a/third_party/compiler_rt/cpu_model.h b/third_party/compiler_rt/cpu_model.h deleted file mode 100644 index f5d2ba440..000000000 --- a/third_party/compiler_rt/cpu_model.h +++ /dev/null @@ -1,33 +0,0 @@ -//===-- cpu_model_common.c - Utilities for cpu model detection ----*- C -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements common utilities for runtime cpu model detection. -// -//===----------------------------------------------------------------------===// - -#ifndef COMPILER_RT_LIB_BUILTINS_CPU_MODEL_COMMON_H -#define COMPILER_RT_LIB_BUILTINS_CPU_MODEL_COMMON_H - -#if __has_attribute(constructor) -#if __GNUC__ >= 9 -// Ordinarily init priorities below 101 are disallowed as they are reserved for -// the implementation. However, we are the implementation, so silence the -// diagnostic, since it doesn't apply to us. -#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" -#endif -// We're choosing init priority 90 to force our constructors to run before any -// constructors in the end user application (starting at priority 101). This -// value matches the libgcc choice for the same functions. -#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__(90))) -#else -// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that -// this runs during initialization. -#define CONSTRUCTOR_ATTRIBUTE -#endif - -#endif diff --git a/third_party/compiler_rt/ilogbl.c b/third_party/compiler_rt/ilogbl.c index 73ccc6352..09e4c9630 100644 --- a/third_party/compiler_rt/ilogbl.c +++ b/third_party/compiler_rt/ilogbl.c @@ -30,11 +30,7 @@ #include "libc/tinymath/internal.h" #include "libc/tinymath/ldshape.internal.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); /** * Returns log₂𝑥 exponent part of double. diff --git a/third_party/compiler_rt/logbl.c b/third_party/compiler_rt/logbl.c index 171b9aa44..27c639045 100644 --- a/third_party/compiler_rt/logbl.c +++ b/third_party/compiler_rt/logbl.c @@ -27,11 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/math.h" #if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); long double logbl(long double x) { diff --git a/third_party/dlmalloc/init.inc b/third_party/dlmalloc/init.inc index b635d84bf..684e68995 100644 --- a/third_party/dlmalloc/init.inc +++ b/third_party/dlmalloc/init.inc @@ -8,7 +8,7 @@ static void dlmalloc_post_fork_child(void) { (void)INITIAL_LOCK(&(gm)->mutex); #endif /* LOCK_AT_FORK */ /* Initialize mparams */ -__attribute__((__constructor__)) int init_mparams(void) { +__attribute__((__constructor__(50))) int init_mparams(void) { #ifdef NEED_GLOBAL_LOCK_INIT if (malloc_global_mutex_status <= 0) init_malloc_global_mutex(); diff --git a/third_party/double-conversion/bignum-dtoa.cc b/third_party/double-conversion/bignum-dtoa.cc index 5b9325158..a9a75e4f8 100644 --- a/third_party/double-conversion/bignum-dtoa.cc +++ b/third_party/double-conversion/bignum-dtoa.cc @@ -28,11 +28,7 @@ #include "third_party/double-conversion/bignum.h" #include "third_party/double-conversion/ieee.h" #include "third_party/libcxx/cmath" - -asm(".ident\t\"\\n\\n\ -double-conversion (BSD-3 License)\\n\ -Copyright 2006-2012 the V8 project authors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("double_conversion_notice"); namespace double_conversion { diff --git a/third_party/double-conversion/bignum.cc b/third_party/double-conversion/bignum.cc index 62853945a..cadef28b0 100644 --- a/third_party/double-conversion/bignum.cc +++ b/third_party/double-conversion/bignum.cc @@ -28,11 +28,7 @@ #include "third_party/double-conversion/utils.h" #include "third_party/libcxx/algorithm" #include "third_party/libcxx/cstring" - -asm(".ident\t\"\\n\\n\ -double-conversion (BSD-3 License)\\n\ -Copyright 2006-2012 the V8 project authors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("double_conversion_notice"); namespace double_conversion { diff --git a/third_party/double-conversion/cached-powers.cc b/third_party/double-conversion/cached-powers.cc index 18253cfc4..05156a099 100644 --- a/third_party/double-conversion/cached-powers.cc +++ b/third_party/double-conversion/cached-powers.cc @@ -29,11 +29,7 @@ #include "third_party/libcxx/climits" #include "third_party/libcxx/cmath" #include "third_party/libcxx/cstdarg" - -asm(".ident\t\"\\n\\n\ -double-conversion (BSD-3 License)\\n\ -Copyright 2006-2012 the V8 project authors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("double_conversion_notice"); namespace double_conversion { diff --git a/third_party/double-conversion/double-to-string.cc b/third_party/double-conversion/double-to-string.cc index a58a5c925..0dfe0dc41 100644 --- a/third_party/double-conversion/double-to-string.cc +++ b/third_party/double-conversion/double-to-string.cc @@ -33,11 +33,7 @@ #include "third_party/libcxx/algorithm" #include "third_party/libcxx/climits" #include "third_party/libcxx/cmath" - -asm(".ident\t\"\\n\\n\ -double-conversion (BSD-3 License)\\n\ -Copyright 2006-2012 the V8 project authors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("double_conversion_notice"); namespace double_conversion { diff --git a/third_party/double-conversion/dubble.c b/third_party/double-conversion/dubble.c new file mode 100644 index 000000000..a73cf09aa --- /dev/null +++ b/third_party/double-conversion/dubble.c @@ -0,0 +1,3 @@ +__notice(double_conversion_notice, "\ +double-conversion (BSD-3 License)\n\ +Copyright 2006-2012 the V8 project authors"); diff --git a/third_party/double-conversion/fast-dtoa.cc b/third_party/double-conversion/fast-dtoa.cc index 60195c26d..84fcfd36b 100644 --- a/third_party/double-conversion/fast-dtoa.cc +++ b/third_party/double-conversion/fast-dtoa.cc @@ -28,11 +28,7 @@ #include "third_party/double-conversion/diy-fp.h" #include "third_party/double-conversion/fast-dtoa.h" #include "third_party/double-conversion/ieee.h" - -asm(".ident\t\"\\n\\n\ -double-conversion (BSD-3 License)\\n\ -Copyright 2006-2012 the V8 project authors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("double_conversion_notice"); namespace double_conversion { diff --git a/third_party/double-conversion/fixed-dtoa.cc b/third_party/double-conversion/fixed-dtoa.cc index cca64aca7..d00093609 100644 --- a/third_party/double-conversion/fixed-dtoa.cc +++ b/third_party/double-conversion/fixed-dtoa.cc @@ -27,11 +27,7 @@ #include "third_party/double-conversion/fixed-dtoa.h" #include "third_party/double-conversion/ieee.h" #include "third_party/libcxx/cmath" - -asm(".ident\t\"\\n\\n\ -double-conversion (BSD-3 License)\\n\ -Copyright 2006-2012 the V8 project authors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("double_conversion_notice"); namespace double_conversion { diff --git a/third_party/double-conversion/string-to-double.cc b/third_party/double-conversion/string-to-double.cc index 0d88f2d60..cc29f9555 100644 --- a/third_party/double-conversion/string-to-double.cc +++ b/third_party/double-conversion/string-to-double.cc @@ -31,11 +31,7 @@ #include "third_party/libcxx/climits" #include "third_party/libcxx/cmath" #include "third_party/libcxx/locale" - -asm(".ident\t\"\\n\\n\ -double-conversion (BSD-3 License)\\n\ -Copyright 2006-2012 the V8 project authors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("double_conversion_notice"); #ifdef _MSC_VER # if _MSC_VER >= 1900 diff --git a/third_party/double-conversion/strtod.cc b/third_party/double-conversion/strtod.cc index 9c00ef19c..bf6dde672 100644 --- a/third_party/double-conversion/strtod.cc +++ b/third_party/double-conversion/strtod.cc @@ -30,11 +30,7 @@ #include "third_party/double-conversion/strtod.h" #include "third_party/libcxx/climits" #include "third_party/libcxx/cstdarg" - -asm(".ident\t\"\\n\\n\ -double-conversion (BSD-3 License)\\n\ -Copyright 2006-2012 the V8 project authors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("double_conversion_notice"); namespace double_conversion { diff --git a/third_party/gdtoa/gdtoa.internal.h b/third_party/gdtoa/gdtoa.internal.h index bbb4d1e2c..67f11af0a 100644 --- a/third_party/gdtoa/gdtoa.internal.h +++ b/third_party/gdtoa/gdtoa.internal.h @@ -4,12 +4,7 @@ #include "libc/str/str.h" #include "third_party/gdtoa/gdtoa.h" -asm(".ident\t\"\\n\\n\ -gdtoa (MIT License)\\n\ -The author of this software is David M. Gay\\n\ -Kudos go to Guy L. Steele, Jr. and Jon L. White\\n\ -Copyright (C) 1997, 1998, 2000 by Lucent Technologies\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("gdtoa_notice"); #define IEEE_Arith 1 #define IEEE_8087 1 diff --git a/third_party/gdtoa/misc.c b/third_party/gdtoa/misc.c index 43b1d4729..75d3883d8 100644 --- a/third_party/gdtoa/misc.c +++ b/third_party/gdtoa/misc.c @@ -98,7 +98,7 @@ __gdtoa_Bclear(void) __gdtoa_unlock(); } -__attribute__((__constructor__)) static void +__attribute__((__constructor__(60))) static void __gdtoa_Binit(void) { __gdtoa_initlock(); diff --git a/third_party/gdtoa/notice.c b/third_party/gdtoa/notice.c new file mode 100644 index 000000000..1046f9235 --- /dev/null +++ b/third_party/gdtoa/notice.c @@ -0,0 +1,5 @@ +__notice(gdtoa_notice, "\ +gdtoa (MIT License)\n\ +The author of this software is David M. Gay\n\ +Kudos go to Guy L. Steele, Jr. and Jon L. White\n\ +Copyright (C) 1997, 1998, 2000 by Lucent Technologies"); diff --git a/third_party/getopt/getopt.c b/third_party/getopt/getopt.c index f1d7b716e..e51509a51 100644 --- a/third_party/getopt/getopt.c +++ b/third_party/getopt/getopt.c @@ -37,10 +37,8 @@ #include "libc/errno.h" #include "third_party/getopt/getopt.internal.h" -asm(".ident\t\"\\n\ -getopt (BSD-3)\\n\ -Copyright 1987, 1993, 1994 The Regents of the University of California\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(notice_getopt, "getopt (BSD-3)\n\ +Copyright 1987, 1993, 1994 The Regents of the University of California"); #define BADCH '?' #define BADARG ':' diff --git a/third_party/intel/amxbf16intrin.internal.h b/third_party/intel/amxbf16intrin.internal.h index d7b31ecb0..8a1ed88fb 100644 --- a/third_party/intel/amxbf16intrin.internal.h +++ b/third_party/intel/amxbf16intrin.internal.h @@ -9,7 +9,7 @@ #pragma GCC target("amx-bf16") #define __DISABLE_AMX_BF16__ #endif -#if defined(__x86_64__) && defined(__AMX_BF16__) +#if defined(__x86_64__) #define _tile_dpbf16ps_internal(dst,src1,src2) __asm__ volatile ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) #define _tile_dpbf16ps(dst,src1,src2) _tile_dpbf16ps_internal (dst, src1, src2) #endif diff --git a/third_party/intel/amxint8intrin.internal.h b/third_party/intel/amxint8intrin.internal.h index f1d25d11f..e5c566735 100644 --- a/third_party/intel/amxint8intrin.internal.h +++ b/third_party/intel/amxint8intrin.internal.h @@ -9,7 +9,7 @@ #pragma GCC target("amx-int8") #define __DISABLE_AMX_INT8__ #endif -#if defined(__x86_64__) && defined(__AMX_INT8__) +#if defined(__x86_64__) #define _tile_int8_dp_internal(name,dst,src1,src2) __asm__ volatile ("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) #define _tile_dpbssd(dst,src1,src2) _tile_int8_dp_internal (tdpbssd, dst, src1, src2) #define _tile_dpbsud(dst,src1,src2) _tile_int8_dp_internal (tdpbsud, dst, src1, src2) diff --git a/third_party/intel/amxtileintrin.internal.h b/third_party/intel/amxtileintrin.internal.h index 3913c900e..00e403118 100644 --- a/third_party/intel/amxtileintrin.internal.h +++ b/third_party/intel/amxtileintrin.internal.h @@ -9,7 +9,7 @@ #pragma GCC target("amx-tile") #define __DISABLE_AMX_TILE__ #endif -#if defined(__x86_64__) && defined(__AMX_TILE__) +#if defined(__x86_64__) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _tile_loadconfig (const void *__config) @@ -29,11 +29,11 @@ _tile_release (void) __asm__ volatile ("tilerelease" ::); } #define _tile_loadd(dst,base,stride) _tile_loadd_internal (dst, base, stride) -#define _tile_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) base), "r" ((long) stride)) +#define _tile_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) (base)), "r" ((long) (stride))) #define _tile_stream_loadd(dst,base,stride) _tile_stream_loadd_internal (dst, base, stride) -#define _tile_stream_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) base), "r" ((long) stride)) +#define _tile_stream_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) (base)), "r" ((long) (stride))) #define _tile_stored(dst,base,stride) _tile_stored_internal (dst, base, stride) -#define _tile_stored_internal(src,base,stride) __asm__ volatile ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" :: "r" ((void*) base), "r" ((long) stride) : "memory") +#define _tile_stored_internal(src,base,stride) __asm__ volatile ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" :: "r" ((void*) (base)), "r" ((long) (stride)) : "memory") #define _tile_zero(dst) _tile_zero_internal (dst) #define _tile_zero_internal(dst) __asm__ volatile ("tilezero\t%%tmm"#dst ::) #endif diff --git a/third_party/intel/avx512bf16intrin.internal.h b/third_party/intel/avx512bf16intrin.internal.h index 088340d6a..5bc3f8358 100644 --- a/third_party/intel/avx512bf16intrin.internal.h +++ b/third_party/intel/avx512bf16intrin.internal.h @@ -11,6 +11,14 @@ #endif typedef short __v32bh __attribute__ ((__vector_size__ (64))); typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__)); +extern __inline float +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsbh_ss (__bfloat16 __A) +{ + union{ float a; unsigned int b;} __tmp; + __tmp.b = ((unsigned int)(__A)) << 16; + return __tmp.a; +} extern __inline __m512bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm512_cvtne2ps_pbh (__m512 __A, __m512 __B) @@ -65,6 +73,29 @@ _mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D) { return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A); } +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtpbh_ps (__m256bh __A) +{ + return (__m512)_mm512_castsi512_ps ((__m512i)_mm512_slli_epi32 ( + (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)); +} +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtpbh_ps (__mmask16 __U, __m256bh __A) +{ + return (__m512)_mm512_castsi512_ps ((__m512i) _mm512_slli_epi32 ( + (__m512i)_mm512_maskz_cvtepi16_epi32 ( + (__mmask16)__U, (__m256i)__A), 16)); +} +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A) +{ + return (__m512)_mm512_castsi512_ps ((__m512i)(_mm512_mask_slli_epi32 ( + (__m512i)__S, (__mmask16)__U, + (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16))); +} #ifdef __DISABLE_AVX512BF16__ #undef __DISABLE_AVX512BF16__ #pragma GCC pop_options diff --git a/third_party/intel/avx512bf16vlintrin.internal.h b/third_party/intel/avx512bf16vlintrin.internal.h index 83019cf5f..216196fcf 100644 --- a/third_party/intel/avx512bf16vlintrin.internal.h +++ b/third_party/intel/avx512bf16vlintrin.internal.h @@ -13,6 +13,7 @@ typedef short __v16bh __attribute__ ((__vector_size__ (32))); typedef short __v8bh __attribute__ ((__vector_size__ (16))); typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__)); typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef unsigned short __bfloat16; extern __inline __m256bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtne2ps_pbh (__m256 __A, __m256 __B) @@ -121,6 +122,61 @@ _mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D) { return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A); } +extern __inline __bfloat16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtness_sbh (float __A) +{ + __v4sf __V = {__A, 0, 0, 0}; + __v8hi __R = __builtin_ia32_cvtneps2bf16_v4sf_mask ((__v4sf)__V, + (__v8hi)_mm_undefined_si128 (), (__mmask8)-1); + return __R[0]; +} +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpbh_ps (__m128bh __A) +{ + return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 ( + (__m128i)_mm_cvtepi16_epi32 ((__m128i)__A), 16)); +} +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtpbh_ps (__m128bh __A) +{ + return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 ( + (__m256i)_mm256_cvtepi16_epi32 ((__m128i)__A), 16)); +} +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A) +{ + return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 ( + (__m128i)_mm_maskz_cvtepi16_epi32 ( + (__mmask8)__U, (__m128i)__A), 16)); +} +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A) +{ + return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 ( + (__m256i)_mm256_maskz_cvtepi16_epi32 ( + (__mmask8)__U, (__m128i)__A), 16)); +} +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtpbh_ps (__m128 __S, __mmask8 __U, __m128bh __A) +{ + return (__m128)_mm_castsi128_ps ((__m128i)_mm_mask_slli_epi32 ( + (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32 ( + (__m128i)__A), 16)); +} +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtpbh_ps (__m256 __S, __mmask8 __U, __m128bh __A) +{ + return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_mask_slli_epi32 ( + (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32 ( + (__m128i)__A), 16)); +} #ifdef __DISABLE_AVX512BF16VL__ #undef __DISABLE_AVX512BF16VL__ #pragma GCC pop_options diff --git a/third_party/intel/avx512dqintrin.internal.h b/third_party/intel/avx512dqintrin.internal.h index 38d8010f1..0cf6a7080 100644 --- a/third_party/intel/avx512dqintrin.internal.h +++ b/third_party/intel/avx512dqintrin.internal.h @@ -2248,9 +2248,9 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #define _mm_mask_fpclass_ss_mask(X, C, U) ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), (int) (C), (__mmask8) (U))) #define _mm_mask_fpclass_sd_mask(X, C, U) ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), (int) (C), (__mmask8) (U))) #define _mm512_mask_fpclass_pd_mask(u, X, C) ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), (int) (C), (__mmask8)(u))) -#define _mm512_mask_fpclass_ps_mask(u, x, c) ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x), (int) (c),(__mmask8)(u))) +#define _mm512_mask_fpclass_ps_mask(u, x, c) ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x), (int) (c),(__mmask16)(u))) #define _mm512_fpclass_pd_mask(X, C) ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), (int) (C), (__mmask8)-1)) -#define _mm512_fpclass_ps_mask(x, c) ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x), (int) (c),(__mmask8)-1)) +#define _mm512_fpclass_ps_mask(x, c) ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x), (int) (c),(__mmask16)-1)) #define _mm_reduce_sd(A, B, C) ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), (__mmask8)-1)) #define _mm_mask_reduce_sd(W, U, A, B, C) ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U))) #define _mm_maskz_reduce_sd(U, A, B, C) ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), (__mmask8)(U))) diff --git a/third_party/intel/avx512fintrin.internal.h b/third_party/intel/avx512fintrin.internal.h index 79158ec85..a2c2c788c 100644 --- a/third_party/intel/avx512fintrin.internal.h +++ b/third_party/intel/avx512fintrin.internal.h @@ -1351,74 +1351,92 @@ _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B, #define _mm_mask_sub_round_ss(W, U, A, B, C) (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C) #define _mm_maskz_sub_round_ss(U, A, B, C) (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) #endif +typedef enum +{ + _MM_TERNLOG_A = 0xF0, + _MM_TERNLOG_B = 0xCC, + _MM_TERNLOG_C = 0xAA +} _MM_TERNLOG_ENUM; #ifdef __OPTIMIZE__ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int __imm) { - return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) __C, __imm, - (__mmask8) -1); + return (__m512i) + __builtin_ia32_pternlogq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __C, + (unsigned char) __imm, + (__mmask8) -1); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B, __m512i __C, const int __imm) { - return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) __C, __imm, - (__mmask8) __U); + return (__m512i) + __builtin_ia32_pternlogq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B, __m512i __C, const int __imm) { - return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A, - (__v8di) __B, - (__v8di) __C, - __imm, (__mmask8) __U); + return (__m512i) + __builtin_ia32_pternlogq512_maskz ((__v8di) __A, + (__v8di) __B, + (__v8di) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int __imm) { - return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) __C, - __imm, (__mmask16) -1); + return (__m512i) + __builtin_ia32_pternlogd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __C, + (unsigned char) __imm, + (__mmask16) -1); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B, __m512i __C, const int __imm) { - return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) __C, - __imm, (__mmask16) __U); + return (__m512i) + __builtin_ia32_pternlogd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __C, + (unsigned char) __imm, + (__mmask16) __U); } extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B, __m512i __C, const int __imm) { - return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A, - (__v16si) __B, - (__v16si) __C, - __imm, (__mmask16) __U); + return (__m512i) + __builtin_ia32_pternlogd512_maskz ((__v16si) __A, + (__v16si) __B, + (__v16si) __C, + (unsigned char) __imm, + (__mmask16) __U); } #else -#define _mm512_ternarylogic_epi64(A, B, C, I) ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1)) -#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) -#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) -#define _mm512_ternarylogic_epi32(A, B, C, I) ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), (__mmask16)-1)) -#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), (__mmask16)(U))) -#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), (__mmask16)(U))) +#define _mm512_ternarylogic_epi64(A, B, C, I) ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), (__v8di) (__m512i) (B), (__v8di) (__m512i) (C), (unsigned char) (I), (__mmask8) -1)) +#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), (__v8di) (__m512i) (B), (__v8di) (__m512i) (C), (unsigned char)(I), (__mmask8) (U))) +#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) (__m512i) (A), (__v8di) (__m512i) (B), (__v8di) (__m512i) (C), (unsigned char) (I), (__mmask8) (U))) +#define _mm512_ternarylogic_epi32(A, B, C, I) ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), (__v16si) (__m512i) (B), (__v16si) (__m512i) (C), (unsigned char) (I), (__mmask16) -1)) +#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), (__v16si) (__m512i) (B), (__v16si) (__m512i) (C), (unsigned char) (I), (__mmask16) (U))) +#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) (__m512i) (A), (__v16si) (__m512i) (B), (__v16si) (__m512i) (C), (unsigned char) (I), (__mmask16) (U))) #endif extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -2680,14 +2698,18 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R) (__mmask8) __U, __R); } #else -#define _mm512_scalef_round_pd(A, B, C) (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) -#define _mm512_maskz_scalef_round_pd(U, A, B, C) (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) -#define _mm512_scalef_round_ps(A, B, C) (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) -#define _mm512_maskz_scalef_round_ps(U, A, B, C) (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) -#define _mm_scalef_round_sd(A, B, C) (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, (__v2df)_mm_setzero_pd (), -1, C) -#define _mm_scalef_round_ss(A, B, C) (__m128)__builtin_ia32_scalefss_mask_round (A, B, (__v4sf)_mm_setzero_ps (), -1, C) +#define _mm512_scalef_round_pd(A, B, C) ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (__v8df) _mm512_undefined_pd(), -1, (C))) +#define _mm512_mask_scalef_round_pd(W, U, A, B, C) ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C))) +#define _mm512_maskz_scalef_round_pd(U, A, B, C) ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (__v8df) _mm512_setzero_pd(), (U), (C))) +#define _mm512_scalef_round_ps(A, B, C) ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (__v16sf) _mm512_undefined_ps(), -1, (C))) +#define _mm512_mask_scalef_round_ps(W, U, A, B, C) ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C))) +#define _mm512_maskz_scalef_round_ps(U, A, B, C) ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (__v16sf) _mm512_setzero_ps(), (U), (C))) +#define _mm_scalef_round_sd(A, B, C) ((__m128d) __builtin_ia32_scalefsd_mask_round ((A), (B), (__v2df) _mm_undefined_pd (), -1, (C))) +#define _mm_scalef_round_ss(A, B, C) ((__m128) __builtin_ia32_scalefss_mask_round ((A), (B), (__v4sf) _mm_undefined_ps (), -1, (C))) +#define _mm_mask_scalef_round_sd(W, U, A, B, C) ((__m128d) __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C))) +#define _mm_mask_scalef_round_ss(W, U, A, B, C) ((__m128) __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C))) +#define _mm_maskz_scalef_round_sd(U, A, B, C) ((__m128d) __builtin_ia32_scalefsd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), (U), (C))) +#define _mm_maskz_scalef_round_ss(U, A, B, C) ((__m128) __builtin_ia32_scalefss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), (U), (C))) #endif #define _mm_mask_scalef_sd(W, U, A, B) _mm_mask_scalef_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_maskz_scalef_sd(U, A, B) _mm_maskz_scalef_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION) diff --git a/third_party/intel/avx512fp16intrin.internal.h b/third_party/intel/avx512fp16intrin.internal.h new file mode 100644 index 000000000..4f249cc30 --- /dev/null +++ b/third_party/intel/avx512fp16intrin.internal.h @@ -0,0 +1,5452 @@ +#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0) +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif +#ifndef __AVX512FP16INTRIN_H_INCLUDED +#define __AVX512FP16INTRIN_H_INCLUDED +#ifndef __AVX512FP16__ +#pragma GCC push_options +#pragma GCC target("avx512fp16") +#define __DISABLE_AVX512FP16__ +#endif +typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16))); +typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32))); +typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64))); +typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__)); +typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__)); +typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); +typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32), __may_alias__, __aligned__ (1))); +typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_ph (_Float16 __A7, _Float16 __A6, _Float16 __A5, + _Float16 __A4, _Float16 __A3, _Float16 __A2, + _Float16 __A1, _Float16 __A0) +{ + return __extension__ (__m128h)(__v8hf){ __A0, __A1, __A2, __A3, + __A4, __A5, __A6, __A7 }; +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_set_ph (_Float16 __A15, _Float16 __A14, _Float16 __A13, + _Float16 __A12, _Float16 __A11, _Float16 __A10, + _Float16 __A9, _Float16 __A8, _Float16 __A7, + _Float16 __A6, _Float16 __A5, _Float16 __A4, + _Float16 __A3, _Float16 __A2, _Float16 __A1, + _Float16 __A0) +{ + return __extension__ (__m256h)(__v16hf){ __A0, __A1, __A2, __A3, + __A4, __A5, __A6, __A7, + __A8, __A9, __A10, __A11, + __A12, __A13, __A14, __A15 }; +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set_ph (_Float16 __A31, _Float16 __A30, _Float16 __A29, + _Float16 __A28, _Float16 __A27, _Float16 __A26, + _Float16 __A25, _Float16 __A24, _Float16 __A23, + _Float16 __A22, _Float16 __A21, _Float16 __A20, + _Float16 __A19, _Float16 __A18, _Float16 __A17, + _Float16 __A16, _Float16 __A15, _Float16 __A14, + _Float16 __A13, _Float16 __A12, _Float16 __A11, + _Float16 __A10, _Float16 __A9, _Float16 __A8, + _Float16 __A7, _Float16 __A6, _Float16 __A5, + _Float16 __A4, _Float16 __A3, _Float16 __A2, + _Float16 __A1, _Float16 __A0) +{ + return __extension__ (__m512h)(__v32hf){ __A0, __A1, __A2, __A3, + __A4, __A5, __A6, __A7, + __A8, __A9, __A10, __A11, + __A12, __A13, __A14, __A15, + __A16, __A17, __A18, __A19, + __A20, __A21, __A22, __A23, + __A24, __A25, __A26, __A27, + __A28, __A29, __A30, __A31 }; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2, + _Float16 __A3, _Float16 __A4, _Float16 __A5, + _Float16 __A6, _Float16 __A7) +{ + return _mm_set_ph (__A7, __A6, __A5, __A4, __A3, __A2, __A1, __A0); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2, + _Float16 __A3, _Float16 __A4, _Float16 __A5, + _Float16 __A6, _Float16 __A7, _Float16 __A8, + _Float16 __A9, _Float16 __A10, _Float16 __A11, + _Float16 __A12, _Float16 __A13, _Float16 __A14, + _Float16 __A15) +{ + return _mm256_set_ph (__A15, __A14, __A13, __A12, __A11, __A10, __A9, + __A8, __A7, __A6, __A5, __A4, __A3, __A2, __A1, + __A0); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2, + _Float16 __A3, _Float16 __A4, _Float16 __A5, + _Float16 __A6, _Float16 __A7, _Float16 __A8, + _Float16 __A9, _Float16 __A10, _Float16 __A11, + _Float16 __A12, _Float16 __A13, _Float16 __A14, + _Float16 __A15, _Float16 __A16, _Float16 __A17, + _Float16 __A18, _Float16 __A19, _Float16 __A20, + _Float16 __A21, _Float16 __A22, _Float16 __A23, + _Float16 __A24, _Float16 __A25, _Float16 __A26, + _Float16 __A27, _Float16 __A28, _Float16 __A29, + _Float16 __A30, _Float16 __A31) +{ + return _mm512_set_ph (__A31, __A30, __A29, __A28, __A27, __A26, __A25, + __A24, __A23, __A22, __A21, __A20, __A19, __A18, + __A17, __A16, __A15, __A14, __A13, __A12, __A11, + __A10, __A9, __A8, __A7, __A6, __A5, __A4, __A3, + __A2, __A1, __A0); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set1_ph (_Float16 __A) +{ + return _mm_set_ph (__A, __A, __A, __A, __A, __A, __A, __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_set1_ph (_Float16 __A) +{ + return _mm256_set_ph (__A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set1_ph (_Float16 __A) +{ + return _mm512_set_ph (__A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setzero_ph (void) +{ + return _mm_set1_ph (0.0f); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_setzero_ph (void) +{ + return _mm256_set1_ph (0.0f); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_setzero_ph (void) +{ + return _mm512_set1_ph (0.0f); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_undefined_ph (void) +{ + __m128h __Y = __Y; + return __Y; +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_undefined_ph (void) +{ + __m256h __Y = __Y; + return __Y; +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_undefined_ph (void) +{ + __m512h __Y = __Y; + return __Y; +} +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_h (__m128h __A) +{ + return __A[0]; +} +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtsh_h (__m256h __A) +{ + return __A[0]; +} +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtsh_h (__m512h __A) +{ + return __A[0]; +} +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph_ps (__m512h __a) +{ + return (__m512) __a; +} +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph_pd (__m512h __a) +{ + return (__m512d) __a; +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph_si512 (__m512h __a) +{ + return (__m512i) __a; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph512_ph128 (__m512h __A) +{ + union + { + __m128h a[4]; + __m512h v; + } u = { .v = __A }; + return u.a[0]; +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph512_ph256 (__m512h __A) +{ + union + { + __m256h a[2]; + __m512h v; + } u = { .v = __A }; + return u.a[0]; +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph128_ph512 (__m128h __A) +{ + union + { + __m128h a[4]; + __m512h v; + } u; + u.a[0] = __A; + return u.v; +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph256_ph512 (__m256h __A) +{ + union + { + __m256h a[2]; + __m512h v; + } u; + u.a[0] = __A; + return u.v; +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_zextph128_ph512 (__m128h __A) +{ + return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (), + (__m128) __A, 0); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_zextph256_ph512 (__m256h __A) +{ + return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (), + (__m256d) __A, 0); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castps_ph (__m512 __a) +{ + return (__m512h) __a; +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castpd_ph (__m512d __a) +{ + return (__m512h) __a; +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castsi512_ph (__m512i __a) +{ + return (__m512h) __a; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_sh (_Float16 __F) +{ + return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, __F); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_sh (void const *__P) +{ + return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + *(_Float16 const *) __P); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_load_ph (void const *__P) +{ + return *(const __m512h *) __P; +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_load_ph (void const *__P) +{ + return *(const __m256h *) __P; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_ph (void const *__P) +{ + return *(const __m128h *) __P; +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_ph (void const *__P) +{ + return *(const __m512h_u *) __P; +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_loadu_ph (void const *__P) +{ + return *(const __m256h_u *) __P; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadu_ph (void const *__P) +{ + return *(const __m128h_u *) __P; +} +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_sh (void *__P, __m128h __A) +{ + *(_Float16 *) __P = ((__v8hf)__A)[0]; +} +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_store_ph (void *__P, __m512h __A) +{ + *(__m512h *) __P = __A; +} +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_store_ph (void *__P, __m256h __A) +{ + *(__m256h *) __P = __A; +} +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_ph (void *__P, __m128h __A) +{ + *(__m128h *) __P = __A; +} +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_ph (void *__P, __m512h __A) +{ + *(__m512h_u *) __P = __A; +} +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_storeu_ph (void *__P, __m256h __A) +{ + *(__m256h_u *) __P = __A; +} +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_ph (void *__P, __m128h __A) +{ + *(__m128h_u *) __P = __A; +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_abs_ph (__m512h __A) +{ + return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF), + (__m512i) __A); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_add_ph (__m512h __A, __m512h __B) +{ + return (__m512h) ((__v32hf) __A + (__v32hf) __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) +{ + return __builtin_ia32_addph512_mask (__C, __D, __A, __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C) +{ + return __builtin_ia32_addph512_mask (__B, __C, + _mm512_setzero_ph (), __A); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sub_ph (__m512h __A, __m512h __B) +{ + return (__m512h) ((__v32hf) __A - (__v32hf) __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) +{ + return __builtin_ia32_subph512_mask (__C, __D, __A, __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C) +{ + return __builtin_ia32_subph512_mask (__B, __C, + _mm512_setzero_ph (), __A); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mul_ph (__m512h __A, __m512h __B) +{ + return (__m512h) ((__v32hf) __A * (__v32hf) __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) +{ + return __builtin_ia32_mulph512_mask (__C, __D, __A, __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C) +{ + return __builtin_ia32_mulph512_mask (__B, __C, + _mm512_setzero_ph (), __A); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_div_ph (__m512h __A, __m512h __B) +{ + return (__m512h) ((__v32hf) __A / (__v32hf) __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) +{ + return __builtin_ia32_divph512_mask (__C, __D, __A, __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C) +{ + return __builtin_ia32_divph512_mask (__B, __C, + _mm512_setzero_ph (), __A); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_add_round_ph (__m512h __A, __m512h __B, const int __C) +{ + return __builtin_ia32_addph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C, + __m512h __D, const int __E) +{ + return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C, + const int __D) +{ + return __builtin_ia32_addph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C) +{ + return __builtin_ia32_subph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C, + __m512h __D, const int __E) +{ + return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C, + const int __D) +{ + return __builtin_ia32_subph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C) +{ + return __builtin_ia32_mulph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C, + __m512h __D, const int __E) +{ + return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C, + const int __D) +{ + return __builtin_ia32_mulph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_div_round_ph (__m512h __A, __m512h __B, const int __C) +{ + return __builtin_ia32_divph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C, + __m512h __D, const int __E) +{ + return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C, + const int __D) +{ + return __builtin_ia32_divph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); +} +#else +#define _mm512_add_round_ph(A, B, C) ((__m512h)__builtin_ia32_addph512_mask_round((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C))) +#define _mm512_mask_add_round_ph(A, B, C, D, E) ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E))) +#define _mm512_maskz_add_round_ph(A, B, C, D) ((__m512h)__builtin_ia32_addph512_mask_round((B), (C), _mm512_setzero_ph (), (A), (D))) +#define _mm512_sub_round_ph(A, B, C) ((__m512h)__builtin_ia32_subph512_mask_round((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C))) +#define _mm512_mask_sub_round_ph(A, B, C, D, E) ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E))) +#define _mm512_maskz_sub_round_ph(A, B, C, D) ((__m512h)__builtin_ia32_subph512_mask_round((B), (C), _mm512_setzero_ph (), (A), (D))) +#define _mm512_mul_round_ph(A, B, C) ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C))) +#define _mm512_mask_mul_round_ph(A, B, C, D, E) ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E))) +#define _mm512_maskz_mul_round_ph(A, B, C, D) ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C), _mm512_setzero_ph (), (A), (D))) +#define _mm512_div_round_ph(A, B, C) ((__m512h)__builtin_ia32_divph512_mask_round((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C))) +#define _mm512_mask_div_round_ph(A, B, C, D, E) ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E))) +#define _mm512_maskz_div_round_ph(A, B, C, D) ((__m512h)__builtin_ia32_divph512_mask_round((B), (C), _mm512_setzero_ph (), (A), (D))) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_conj_pch (__m512h __A) +{ + return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31)); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A) +{ + return (__m512h) + __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A), + (__v16sf) __W, + (__mmask16) __U); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_conj_pch (__mmask16 __U, __m512h __A) +{ + return (__m512h) + __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A), + (__v16sf) _mm512_setzero_ps (), + (__mmask16) __U); +} +extern __inline __m128h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_sh (__m128h __A, __m128h __B) +{ + __A[0] += __B[0]; + return __A; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_addsh_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_sh (__m128h __A, __m128h __B) +{ + __A[0] -= __B[0]; + return __A; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_subsh_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mul_sh (__m128h __A, __m128h __B) +{ + __A[0] *= __B[0]; + return __A; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_mulsh_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_div_sh (__m128h __A, __m128h __B) +{ + __A[0] /= __B[0]; + return __A; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_divsh_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (), + __A); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_round_sh (__m128h __A, __m128h __B, const int __C) +{ + return __builtin_ia32_addsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + const int __D) +{ + return __builtin_ia32_addsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_round_sh (__m128h __A, __m128h __B, const int __C) +{ + return __builtin_ia32_subsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + const int __D) +{ + return __builtin_ia32_subsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mul_round_sh (__m128h __A, __m128h __B, const int __C) +{ + return __builtin_ia32_mulsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + const int __D) +{ + return __builtin_ia32_mulsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_div_round_sh (__m128h __A, __m128h __B, const int __C) +{ + return __builtin_ia32_divsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + const int __D) +{ + return __builtin_ia32_divsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); +} +#else +#define _mm_add_round_sh(A, B, C) ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C))) +#define _mm_mask_add_round_sh(A, B, C, D, E) ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E))) +#define _mm_maskz_add_round_sh(A, B, C, D) ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D))) +#define _mm_sub_round_sh(A, B, C) ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C))) +#define _mm_mask_sub_round_sh(A, B, C, D, E) ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E))) +#define _mm_maskz_sub_round_sh(A, B, C, D) ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D))) +#define _mm_mul_round_sh(A, B, C) ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C))) +#define _mm_mask_mul_round_sh(A, B, C, D, E) ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E))) +#define _mm_maskz_mul_round_sh(A, B, C, D) ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D))) +#define _mm_div_round_sh(A, B, C) ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C))) +#define _mm_mask_div_round_sh(A, B, C, D, E) ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E))) +#define _mm_maskz_div_round_sh(A, B, C, D) ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D))) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_ph (__m512h __A, __m512h __B) +{ + return __builtin_ia32_maxph512_mask (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) +{ + return __builtin_ia32_maxph512_mask (__C, __D, __A, __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C) +{ + return __builtin_ia32_maxph512_mask (__B, __C, + _mm512_setzero_ph (), __A); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_ph (__m512h __A, __m512h __B) +{ + return __builtin_ia32_minph512_mask (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) +{ + return __builtin_ia32_minph512_mask (__C, __D, __A, __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C) +{ + return __builtin_ia32_minph512_mask (__B, __C, + _mm512_setzero_ph (), __A); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_round_ph (__m512h __A, __m512h __B, const int __C) +{ + return __builtin_ia32_maxph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C, + __m512h __D, const int __E) +{ + return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C, + const int __D) +{ + return __builtin_ia32_maxph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_round_ph (__m512h __A, __m512h __B, const int __C) +{ + return __builtin_ia32_minph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C, + __m512h __D, const int __E) +{ + return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C, + const int __D) +{ + return __builtin_ia32_minph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); +} +#else +#define _mm512_max_round_ph(A, B, C) (__builtin_ia32_maxph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C))) +#define _mm512_mask_max_round_ph(A, B, C, D, E) (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E))) +#define _mm512_maskz_max_round_ph(A, B, C, D) (__builtin_ia32_maxph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), (D))) +#define _mm512_min_round_ph(A, B, C) (__builtin_ia32_minph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C))) +#define _mm512_mask_min_round_ph(A, B, C, D, E) (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E))) +#define _mm512_maskz_min_round_ph(A, B, C, D) (__builtin_ia32_minph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), (D))) +#endif +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_sh (__m128h __A, __m128h __B) +{ + __A[0] = __A[0] > __B[0] ? __A[0] : __B[0]; + return __A; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_maxsh_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_sh (__m128h __A, __m128h __B) +{ + __A[0] = __A[0] < __B[0] ? __A[0] : __B[0]; + return __A; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_minsh_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (), + __A); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_round_sh (__m128h __A, __m128h __B, const int __C) +{ + return __builtin_ia32_maxsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + const int __D) +{ + return __builtin_ia32_maxsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_round_sh (__m128h __A, __m128h __B, const int __C) +{ + return __builtin_ia32_minsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + const int __D) +{ + return __builtin_ia32_minsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); +} +#else +#define _mm_max_round_sh(A, B, C) (__builtin_ia32_maxsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C))) +#define _mm_mask_max_round_sh(A, B, C, D, E) (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E))) +#define _mm_maskz_max_round_sh(A, B, C, D) (__builtin_ia32_maxsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D))) +#define _mm_min_round_sh(A, B, C) (__builtin_ia32_minsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C))) +#define _mm_mask_min_round_sh(A, B, C, D, E) (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E))) +#define _mm_maskz_min_round_sh(A, B, C, D) (__builtin_ia32_minsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D))) +#endif +#ifdef __OPTIMIZE +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C) +{ + return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C, + (__mmask32) -1); +} +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C, + const int __D) +{ + return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D, + __A); +} +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C, + const int __D) +{ + return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B, + __C, (__mmask32) -1, + __D); +} +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C, + const int __D, const int __E) +{ + return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C, + __D, __A, + __E); +} +#else +#define _mm512_cmp_ph_mask(A, B, C) (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1))) +#define _mm512_mask_cmp_ph_mask(A, B, C, D) (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A))) +#define _mm512_cmp_round_ph_mask(A, B, C, D) (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D))) +#define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E))) +#endif +#ifdef __OPTIMIZE__ +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C) +{ + return (__mmask8) + __builtin_ia32_cmpsh_mask_round (__A, __B, + __C, (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C, + const int __D) +{ + return (__mmask8) + __builtin_ia32_cmpsh_mask_round (__B, __C, + __D, __A, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C, + const int __D) +{ + return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B, + __C, (__mmask8) -1, + __D); +} +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C, + const int __D, const int __E) +{ + return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C, + __D, __A, + __E); +} +#else +#define _mm_cmp_sh_mask(A, B, C) (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (_MM_FROUND_CUR_DIRECTION))) +#define _mm_mask_cmp_sh_mask(A, B, C, D) (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (_MM_FROUND_CUR_DIRECTION))) +#define _mm_cmp_round_sh_mask(A, B, C, D) (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D))) +#define _mm_mask_cmp_round_sh_mask(A, B, C, D, E) (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E))) +#endif +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comieq_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comilt_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comile_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comigt_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comige_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comineq_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomieq_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomilt_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomile_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomigt_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomige_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomineq_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comi_sh (__m128h __A, __m128h __B, const int __P) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, __P, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R) +{ + return __builtin_ia32_cmpsh_mask_round (__A, __B, __P, + (__mmask8) -1,__R); +} +#else +#define _mm_comi_round_sh(A, B, P, R) (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R))) +#define _mm_comi_sh(A, B, P) (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), _MM_FROUND_CUR_DIRECTION)) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sqrt_ph (__m512h __A) +{ + return __builtin_ia32_sqrtph512_mask_round (__A, + _mm512_setzero_ph(), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C) +{ + return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B) +{ + return __builtin_ia32_sqrtph512_mask_round (__B, + _mm512_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sqrt_round_ph (__m512h __A, const int __B) +{ + return __builtin_ia32_sqrtph512_mask_round (__A, + _mm512_setzero_ph(), + (__mmask32) -1, __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C, + const int __D) +{ + return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C) +{ + return __builtin_ia32_sqrtph512_mask_round (__B, + _mm512_setzero_ph (), + __A, __C); +} +#else +#define _mm512_sqrt_round_ph(A, B) (__builtin_ia32_sqrtph512_mask_round ((A), _mm512_setzero_ph (), (__mmask32)-1, (B))) +#define _mm512_mask_sqrt_round_ph(A, B, C, D) (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D))) +#define _mm512_maskz_sqrt_round_ph(A, B, C) (__builtin_ia32_sqrtph512_mask_round ((B), _mm512_setzero_ph (), (A), (C))) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rsqrt_ph (__m512h __A) +{ + return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (), + (__mmask32) -1); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C) +{ + return __builtin_ia32_rsqrtph512_mask (__C, __A, __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B) +{ + return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rsqrt_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sqrt_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_sqrtsh_mask_round (__B, __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_sqrtsh_mask_round (__C, __B, + _mm_setzero_ph (), + __A, _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C) +{ + return __builtin_ia32_sqrtsh_mask_round (__B, __A, + _mm_setzero_ph (), + (__mmask8) -1, __C); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B, + __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + const int __D) +{ + return __builtin_ia32_sqrtsh_mask_round (__C, __B, + _mm_setzero_ph (), + __A, __D); +} +#else +#define _mm_sqrt_round_sh(A, B, C) (__builtin_ia32_sqrtsh_mask_round ((B), (A), _mm_setzero_ph (), (__mmask8)-1, (C))) +#define _mm_mask_sqrt_round_sh(A, B, C, D, E) (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E))) +#define _mm_maskz_sqrt_round_sh(A, B, C, D) (__builtin_ia32_sqrtsh_mask_round ((C), (B), _mm_setzero_ph (), (A), (D))) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rcp_ph (__m512h __A) +{ + return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (), + (__mmask32) -1); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C) +{ + return __builtin_ia32_rcpph512_mask (__C, __A, __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B) +{ + return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rcp_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (), + __A); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_scalef_ph (__m512h __A, __m512h __B) +{ + return __builtin_ia32_scalefph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) +{ + return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C) +{ + return __builtin_ia32_scalefph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C) +{ + return __builtin_ia32_scalefph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C, + __m512h __D, const int __E) +{ + return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B, + __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C, + const int __D) +{ + return __builtin_ia32_scalefph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); +} +#else +#define _mm512_scalef_round_ph(A, B, C) (__builtin_ia32_scalefph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C))) +#define _mm512_mask_scalef_round_ph(A, B, C, D, E) (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E))) +#define _mm512_maskz_scalef_round_ph(A, B, C, D) (__builtin_ia32_scalefph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), (D))) +#endif +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_scalef_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_scalefsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_scalefsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C) +{ + return __builtin_ia32_scalefsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B, + __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + const int __D) +{ + return __builtin_ia32_scalefsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); +} +#else +#define _mm_scalef_round_sh(A, B, C) (__builtin_ia32_scalefsh_mask_round ((A), (B), _mm_setzero_ph (), (__mmask8)-1, (C))) +#define _mm_mask_scalef_round_sh(A, B, C, D, E) (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E))) +#define _mm_maskz_scalef_round_sh(A, B, C, D) (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (), (A), (D))) +#endif +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_reduce_ph (__m512h __A, int __B) +{ + return __builtin_ia32_reduceph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D) +{ + return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C) +{ + return __builtin_ia32_reduceph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_reduce_round_ph (__m512h __A, int __B, const int __C) +{ + return __builtin_ia32_reduceph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C, + int __D, const int __E) +{ + return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B, + __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C, + const int __D) +{ + return __builtin_ia32_reduceph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); +} +#else +#define _mm512_reduce_ph(A, B) (__builtin_ia32_reduceph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION)) +#define _mm512_mask_reduce_ph(A, B, C, D) (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), _MM_FROUND_CUR_DIRECTION)) +#define _mm512_maskz_reduce_ph(A, B, C) (__builtin_ia32_reduceph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), _MM_FROUND_CUR_DIRECTION)) +#define _mm512_reduce_round_ph(A, B, C) (__builtin_ia32_reduceph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C))) +#define _mm512_mask_reduce_round_ph(A, B, C, D, E) (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E))) +#define _mm512_maskz_reduce_round_ph(A, B, C, D) (__builtin_ia32_reduceph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), (D))) +#endif +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_sh (__m128h __A, __m128h __B, int __C) +{ + return __builtin_ia32_reducesh_mask_round (__A, __B, __C, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, int __E) +{ + return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D) +{ + return __builtin_ia32_reducesh_mask_round (__B, __C, __D, + _mm_setzero_ph (), __A, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D) +{ + return __builtin_ia32_reducesh_mask_round (__A, __B, __C, + _mm_setzero_ph (), + (__mmask8) -1, __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, int __E, const int __F) +{ + return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, + __B, __F); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + int __D, const int __E) +{ + return __builtin_ia32_reducesh_mask_round (__B, __C, __D, + _mm_setzero_ph (), + __A, __E); +} +#else +#define _mm_reduce_sh(A, B, C) (__builtin_ia32_reducesh_mask_round ((A), (B), (C), _mm_setzero_ph (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) +#define _mm_mask_reduce_sh(A, B, C, D, E) (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), _MM_FROUND_CUR_DIRECTION)) +#define _mm_maskz_reduce_sh(A, B, C, D) (__builtin_ia32_reducesh_mask_round ((B), (C), (D), _mm_setzero_ph (), (A), _MM_FROUND_CUR_DIRECTION)) +#define _mm_reduce_round_sh(A, B, C, D) (__builtin_ia32_reducesh_mask_round ((A), (B), (C), _mm_setzero_ph (), (__mmask8)-1, (D))) +#define _mm_mask_reduce_round_sh(A, B, C, D, E, F) (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F))) +#define _mm_maskz_reduce_round_sh(A, B, C, D, E) (__builtin_ia32_reducesh_mask_round ((B), (C), (D), _mm_setzero_ph (), (A), (E))) +#endif +#ifdef __OPTIMIZE__ +extern __inline __m512h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_roundscale_ph (__m512h __A, int __B) +{ + return __builtin_ia32_rndscaleph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B, + __m512h __C, int __D) +{ + return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C) +{ + return __builtin_ia32_rndscaleph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_roundscale_round_ph (__m512h __A, int __B, const int __C) +{ + return __builtin_ia32_rndscaleph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, + __C); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B, + __m512h __C, int __D, const int __E) +{ + return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, + __B, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C, + const int __D) +{ + return __builtin_ia32_rndscaleph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); +} +#else +#define _mm512_roundscale_ph(A, B) (__builtin_ia32_rndscaleph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION)) +#define _mm512_mask_roundscale_ph(A, B, C, D) (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), _MM_FROUND_CUR_DIRECTION)) +#define _mm512_maskz_roundscale_ph(A, B, C) (__builtin_ia32_rndscaleph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), _MM_FROUND_CUR_DIRECTION)) +#define _mm512_roundscale_round_ph(A, B, C) (__builtin_ia32_rndscaleph512_mask_round ((A), (B), _mm512_setzero_ph (), (__mmask32)-1, (C))) +#define _mm512_mask_roundscale_round_ph(A, B, C, D, E) (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E))) +#define _mm512_maskz_roundscale_round_ph(A, B, C, D) (__builtin_ia32_rndscaleph512_mask_round ((B), (C), _mm512_setzero_ph (), (A), (D))) +#endif +#ifdef __OPTIMIZE__ +extern __inline __m128h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_roundscale_sh (__m128h __A, __m128h __B, int __C) +{ + return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, int __E) +{ + return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D) +{ + return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D, + _mm_setzero_ph (), __A, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D) +{ + return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C, + _mm_setzero_ph (), + (__mmask8) -1, + __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, int __E, const int __F) +{ + return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, + __A, __B, __F); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + int __D, const int __E) +{ + return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D, + _mm_setzero_ph (), + __A, __E); +} +#else +#define _mm_roundscale_sh(A, B, C) (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), _mm_setzero_ph (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) +#define _mm_mask_roundscale_sh(A, B, C, D, E) (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), _MM_FROUND_CUR_DIRECTION)) +#define _mm_maskz_roundscale_sh(A, B, C, D) (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), _mm_setzero_ph (), (A), _MM_FROUND_CUR_DIRECTION)) +#define _mm_roundscale_round_sh(A, B, C, D) (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), _mm_setzero_ph (), (__mmask8)-1, (D))) +#define _mm_mask_roundscale_round_sh(A, B, C, D, E, F) (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F))) +#define _mm_maskz_roundscale_round_sh(A, B, C, D, E) (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), _mm_setzero_ph (), (A), (E))) +#endif +#ifdef __OPTIMIZE__ +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fpclass_sh_mask (__m128h __A, const int __imm) +{ + return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, + (__mmask8) -1); +} +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fpclass_sh_mask (__mmask8 __U, __m128h __A, const int __imm) +{ + return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, __U); +} +#else +#define _mm_fpclass_sh_mask(X, C) ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), (int) (C), (__mmask8) (-1))) +#define _mm_mask_fpclass_sh_mask(U, X, C) ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), (int) (C), (__mmask8) (U))) +#endif +#ifdef __OPTIMIZE__ +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A, + const int __imm) +{ + return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A, + __imm, __U); +} +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fpclass_ph_mask (__m512h __A, const int __imm) +{ + return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A, + __imm, + (__mmask32) -1); +} +#else +#define _mm512_mask_fpclass_ph_mask(u, x, c) ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), (int) (c),(__mmask8)(u))) +#define _mm512_fpclass_ph_mask(x, c) ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), (int) (c),(__mmask8)-1)) +#endif +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getexp_sh (__m128h __A, __m128h __B) +{ + return (__m128h) + __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B, + (__v8hf) _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_getexp_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) +{ + return (__m128h) + __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B, + (__v8hf) __W, (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_getexp_sh (__mmask8 __U, __m128h __A, __m128h __B) +{ + return (__m128h) + __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B, + (__v8hf) _mm_setzero_ph (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getexp_ph (__m512h __A) +{ + return (__m512h) + __builtin_ia32_getexpph512_mask ((__v32hf) __A, + (__v32hf) _mm512_setzero_ph (), + (__mmask32) -1, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A) +{ + return (__m512h) + __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W, + (__mmask32) __U, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A) +{ + return (__m512h) + __builtin_ia32_getexpph512_mask ((__v32hf) __A, + (__v32hf) _mm512_setzero_ph (), + (__mmask32) __U, _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getexp_round_sh (__m128h __A, __m128h __B, const int __R) +{ + return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, + (__v8hf) __B, + _mm_setzero_ph (), + (__mmask8) -1, + __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_getexp_round_sh (__m128h __W, __mmask8 __U, __m128h __A, + __m128h __B, const int __R) +{ + return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __W, + (__mmask8) __U, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_getexp_round_sh (__mmask8 __U, __m128h __A, __m128h __B, + const int __R) +{ + return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) + _mm_setzero_ph (), + (__mmask8) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getexp_round_ph (__m512h __A, const int __R) +{ + return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) -1, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A, + (__v32hf) __W, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R) +{ + return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, __R); +} +#else +#define _mm_getexp_round_sh(A, B, R) ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1, R)) +#define _mm_mask_getexp_round_sh(W, U, A, B, C) (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C) +#define _mm_maskz_getexp_round_sh(U, A, B, C) (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, (__v8hf)_mm_setzero_ph(), U, C) +#define _mm512_getexp_round_ph(A, R) ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R)) +#define _mm512_mask_getexp_round_ph(W, U, A, R) ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), (__v32hf)(__m512h)(W), (__mmask32)(U), R)) +#define _mm512_maskz_getexp_round_ph(U, A, R) ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R)) +#endif +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getmant_sh (__m128h __A, __m128h __B, + _MM_MANTISSA_NORM_ENUM __C, + _MM_MANTISSA_SIGN_ENUM __D) +{ + return (__m128h) + __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B, + (__D << 2) | __C, _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_getmant_sh (__m128h __W, __mmask8 __U, __m128h __A, + __m128h __B, _MM_MANTISSA_NORM_ENUM __C, + _MM_MANTISSA_SIGN_ENUM __D) +{ + return (__m128h) + __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B, + (__D << 2) | __C, (__v8hf) __W, + __U, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_getmant_sh (__mmask8 __U, __m128h __A, __m128h __B, + _MM_MANTISSA_NORM_ENUM __C, + _MM_MANTISSA_SIGN_ENUM __D) +{ + return (__m128h) + __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B, + (__D << 2) | __C, + (__v8hf) _mm_setzero_ph(), + __U, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, + (__C << 2) | __B, + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, + (__C << 2) | __B, + (__v32hf) __W, __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, + (__C << 2) | __B, + (__v32hf) + _mm512_setzero_ph (), + __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getmant_round_sh (__m128h __A, __m128h __B, + _MM_MANTISSA_NORM_ENUM __C, + _MM_MANTISSA_SIGN_ENUM __D, const int __R) +{ + return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, + (__v8hf) __B, + (__D << 2) | __C, + _mm_setzero_ph (), + (__mmask8) -1, + __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_getmant_round_sh (__m128h __W, __mmask8 __U, __m128h __A, + __m128h __B, _MM_MANTISSA_NORM_ENUM __C, + _MM_MANTISSA_SIGN_ENUM __D, const int __R) +{ + return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, + (__v8hf) __B, + (__D << 2) | __C, + (__v8hf) __W, + __U, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_getmant_round_sh (__mmask8 __U, __m128h __A, __m128h __B, + _MM_MANTISSA_NORM_ENUM __C, + _MM_MANTISSA_SIGN_ENUM __D, const int __R) +{ + return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, + (__v8hf) __B, + (__D << 2) | __C, + (__v8hf) + _mm_setzero_ph(), + __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C, const int __R) +{ + return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, + (__C << 2) | __B, + _mm512_setzero_ph (), + (__mmask32) -1, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C, const int __R) +{ + return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, + (__C << 2) | __B, + (__v32hf) __W, __U, + __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C, const int __R) +{ + return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, + (__C << 2) | __B, + (__v32hf) + _mm512_setzero_ph (), + __U, __R); +} +#else +#define _mm512_getmant_ph(X, B, C) ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), (int)(((C)<<2) | (B)), (__v32hf)(__m512h) _mm512_setzero_ph(), (__mmask32)-1, _MM_FROUND_CUR_DIRECTION)) +#define _mm512_mask_getmant_ph(W, U, X, B, C) ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), (int)(((C)<<2) | (B)), (__v32hf)(__m512h)(W), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION)) +#define _mm512_maskz_getmant_ph(U, X, B, C) ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), (int)(((C)<<2) | (B)), (__v32hf)(__m512h) _mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION)) +#define _mm_getmant_sh(X, Y, C, D) ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(((D)<<2) | (C)), (__v8hf)(__m128h) _mm_setzero_ph (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) +#define _mm_mask_getmant_sh(W, U, X, Y, C, D) ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(((D)<<2) | (C)), (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) +#define _mm_maskz_getmant_sh(U, X, Y, C, D) ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(((D)<<2) | (C)), (__v8hf)(__m128h) _mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) +#define _mm512_getmant_round_ph(X, B, C, R) ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), (int)(((C)<<2) | (B)), (__v32hf)(__m512h) _mm512_setzero_ph(), (__mmask32)-1, (R))) +#define _mm512_mask_getmant_round_ph(W, U, X, B, C, R) ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), (int)(((C)<<2) | (B)), (__v32hf)(__m512h)(W), (__mmask32)(U), (R))) +#define _mm512_maskz_getmant_round_ph(U, X, B, C, R) ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), (int)(((C)<<2) | (B)), (__v32hf)(__m512h) _mm512_setzero_ph(), (__mmask32)(U), (R))) +#define _mm_getmant_round_sh(X, Y, C, D, R) ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(((D)<<2) | (C)), (__v8hf)(__m128h) _mm_setzero_ph (), (__mmask8)-1, (R))) +#define _mm_mask_getmant_round_sh(W, U, X, Y, C, D, R) ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(((D)<<2) | (C)), (__v8hf)(__m128h)(W), (__mmask8)(U), (R))) +#define _mm_maskz_getmant_round_sh(U, X, Y, C, D, R) ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(((D)<<2) | (C)), (__v8hf)(__m128h) _mm_setzero_ph(), (__mmask8)(U), (R))) +#endif +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi16_si128 (short __A) +{ + return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A); +} +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi128_si16 (__m128i __A) +{ + return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C) +{ + return __builtin_ia32_loadsh_mask (__C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B) +{ + return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A); +} +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C) +{ + __builtin_ia32_storesh_mask (__A, __C, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_move_sh (__m128h __A, __m128h __B) +{ + __A[0] = __B[0]; + return __A; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epi32 (__m256h __A) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epi32 (__m512i __A, __mmask16 __B, __m256h __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__C, + (__v16si) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epi32 (__mmask16 __A, __m256h __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epi32 (__m256h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __B); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epi32 (__m512i __A, __mmask16 __B, __m256h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__C, + (__v16si) __A, + __B, + __D); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + __C); +} +#else +#define _mm512_cvt_roundph_epi32(A, B) ((__m512i) __builtin_ia32_vcvtph2dq512_mask_round ((A), (__v16si) _mm512_setzero_si512 (), (__mmask16)-1, (B))) +#define _mm512_mask_cvt_roundph_epi32(A, B, C, D) ((__m512i) __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D))) +#define _mm512_maskz_cvt_roundph_epi32(A, B, C) ((__m512i) __builtin_ia32_vcvtph2dq512_mask_round ((B), (__v16si) _mm512_setzero_si512 (), (A), (C))) +#endif +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epu32 (__m256h __A) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epu32 (__m512i __A, __mmask16 __B, __m256h __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__C, + (__v16si) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epu32 (__mmask16 __A, __m256h __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epu32 (__m256h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __B); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epu32 (__m512i __A, __mmask16 __B, __m256h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__C, + (__v16si) __A, + __B, + __D); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + __C); +} +#else +#define _mm512_cvt_roundph_epu32(A, B) ((__m512i) __builtin_ia32_vcvtph2udq512_mask_round ((A), (__v16si) _mm512_setzero_si512 (), (__mmask16)-1, (B))) +#define _mm512_mask_cvt_roundph_epu32(A, B, C, D) ((__m512i) __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D))) +#define _mm512_maskz_cvt_roundph_epu32(A, B, C) ((__m512i) __builtin_ia32_vcvtph2udq512_mask_round ((B), (__v16si) _mm512_setzero_si512 (), (A), (C))) +#endif +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epi32 (__m256h __A) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epi32 (__m512i __A, __mmask16 __B, __m256h __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__C, + (__v16si) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epi32 (__mmask16 __A, __m256h __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epi32 (__m256h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __B); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epi32 (__m512i __A, __mmask16 __B, + __m256h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__C, + (__v16si) __A, + __B, + __D); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + __C); +} +#else +#define _mm512_cvtt_roundph_epi32(A, B) ((__m512i) __builtin_ia32_vcvttph2dq512_mask_round ((A), (__v16si) (_mm512_setzero_si512 ()), (__mmask16)(-1), (B))) +#define _mm512_mask_cvtt_roundph_epi32(A, B, C, D) ((__m512i) __builtin_ia32_vcvttph2dq512_mask_round ((C), (__v16si)(A), (B), (D))) +#define _mm512_maskz_cvtt_roundph_epi32(A, B, C) ((__m512i) __builtin_ia32_vcvttph2dq512_mask_round ((B), (__v16si) _mm512_setzero_si512 (), (A), (C))) +#endif +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epu32 (__m256h __A) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epu32 (__m512i __A, __mmask16 __B, __m256h __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__C, + (__v16si) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epu32 (__mmask16 __A, __m256h __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epu32 (__m256h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __B); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epu32 (__m512i __A, __mmask16 __B, + __m256h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__C, + (__v16si) __A, + __B, + __D); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + __C); +} +#else +#define _mm512_cvtt_roundph_epu32(A, B) ((__m512i) __builtin_ia32_vcvttph2udq512_mask_round ((A), (__v16si) _mm512_setzero_si512 (), (__mmask16)-1, (B))) +#define _mm512_mask_cvtt_roundph_epu32(A, B, C, D) ((__m512i) __builtin_ia32_vcvttph2udq512_mask_round ((C), (__v16si)(A), (B), (D))) +#define _mm512_maskz_cvtt_roundph_epu32(A, B, C) ((__m512i) __builtin_ia32_vcvttph2udq512_mask_round ((B), (__v16si) _mm512_setzero_si512 (), (A), (C))) +#endif +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi32_ph (__m512i __A) +{ + return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi32_ph (__m256h __A, __mmask16 __B, __m512i __C) +{ + return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C, + __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi32_ph (__mmask16 __A, __m512i __B) +{ + return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B, + _mm256_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepi32_ph (__m512i __A, int __B) +{ + return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepi32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D) +{ + return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C, + __A, + __B, + __D); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepi32_ph (__mmask16 __A, __m512i __B, int __C) +{ + return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B, + _mm256_setzero_ph (), + __A, + __C); +} +#else +#define _mm512_cvt_roundepi32_ph(A, B) (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(A), _mm256_setzero_ph (), (__mmask16)-1, (B))) +#define _mm512_mask_cvt_roundepi32_ph(A, B, C, D) (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(C), (A), (B), (D))) +#define _mm512_maskz_cvt_roundepi32_ph(A, B, C) (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(B), _mm256_setzero_ph (), (A), (C))) +#endif +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepu32_ph (__m512i __A) +{ + return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu32_ph (__m256h __A, __mmask16 __B, __m512i __C) +{ + return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C, + __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu32_ph (__mmask16 __A, __m512i __B) +{ + return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B, + _mm256_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepu32_ph (__m512i __A, int __B) +{ + return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepu32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D) +{ + return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C, + __A, + __B, + __D); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepu32_ph (__mmask16 __A, __m512i __B, int __C) +{ + return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B, + _mm256_setzero_ph (), + __A, + __C); +} +#else +#define _mm512_cvt_roundepu32_ph(A, B) (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)(A), _mm256_setzero_ph (), (__mmask16)-1, B)) +#define _mm512_mask_cvt_roundepu32_ph(A, B, C, D) (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)C, A, B, D)) +#define _mm512_maskz_cvt_roundepu32_ph(A, B, C) (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)B, _mm256_setzero_ph (), A, C)) +#endif +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epi64 (__m512i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epi64 (__m128h __A, int __B) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + __B); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, __D); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + __C); +} +#else +#define _mm512_cvt_roundph_epi64(A, B) (__builtin_ia32_vcvtph2qq512_mask_round ((A), _mm512_setzero_si512 (), (__mmask8)-1, (B))) +#define _mm512_mask_cvt_roundph_epi64(A, B, C, D) (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D))) +#define _mm512_maskz_cvt_roundph_epi64(A, B, C) (__builtin_ia32_vcvtph2qq512_mask_round ((B), _mm512_setzero_si512 (), (A), (C))) +#endif +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epu64 (__m512i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epu64 (__m128h __A, int __B) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + __B); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, __D); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + __C); +} +#else +#define _mm512_cvt_roundph_epu64(A, B) (__builtin_ia32_vcvtph2uqq512_mask_round ((A), _mm512_setzero_si512 (), (__mmask8)-1, (B))) +#define _mm512_mask_cvt_roundph_epu64(A, B, C, D) (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D))) +#define _mm512_maskz_cvt_roundph_epu64(A, B, C) (__builtin_ia32_vcvtph2uqq512_mask_round ((B), _mm512_setzero_si512 (), (A), (C))) +#endif +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epi64 (__m512i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epi64 (__m128h __A, int __B) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + __B); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, __D); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + __C); +} +#else +#define _mm512_cvtt_roundph_epi64(A, B) (__builtin_ia32_vcvttph2qq512_mask_round ((A), _mm512_setzero_si512 (), (__mmask8)-1, (B))) +#define _mm512_mask_cvtt_roundph_epi64(A, B, C, D) __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D)) +#define _mm512_maskz_cvtt_roundph_epi64(A, B, C) (__builtin_ia32_vcvttph2qq512_mask_round ((B), _mm512_setzero_si512 (), (A), (C))) +#endif +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epu64 (__m512i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epu64 (__m128h __A, int __B) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + __B); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, __D); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + __C); +} +#else +#define _mm512_cvtt_roundph_epu64(A, B) (__builtin_ia32_vcvttph2uqq512_mask_round ((A), _mm512_setzero_si512 (), (__mmask8)-1, (B))) +#define _mm512_mask_cvtt_roundph_epu64(A, B, C, D) __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D)) +#define _mm512_maskz_cvtt_roundph_epu64(A, B, C) (__builtin_ia32_vcvttph2uqq512_mask_round ((B), _mm512_setzero_si512 (), (A), (C))) +#endif +extern __inline __m128h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi64_ph (__m512i __A) +{ + return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m512i __C) +{ + return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C, + __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi64_ph (__mmask8 __A, __m512i __B) +{ + return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B, + _mm_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepi64_ph (__m512i __A, int __B) +{ + return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A, + _mm_setzero_ph (), + (__mmask8) -1, + __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepi64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D) +{ + return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C, + __A, + __B, + __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepi64_ph (__mmask8 __A, __m512i __B, int __C) +{ + return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B, + _mm_setzero_ph (), + __A, + __C); +} +#else +#define _mm512_cvt_roundepi64_ph(A, B) (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(A), _mm_setzero_ph (), (__mmask8)-1, (B))) +#define _mm512_mask_cvt_roundepi64_ph(A, B, C, D) (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(C), (A), (B), (D))) +#define _mm512_maskz_cvt_roundepi64_ph(A, B, C) (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(B), _mm_setzero_ph (), (A), (C))) +#endif +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepu64_ph (__m512i __A) +{ + return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m512i __C) +{ + return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C, + __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu64_ph (__mmask8 __A, __m512i __B) +{ + return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B, + _mm_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepu64_ph (__m512i __A, int __B) +{ + return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A, + _mm_setzero_ph (), + (__mmask8) -1, + __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepu64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D) +{ + return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C, + __A, + __B, + __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepu64_ph (__mmask8 __A, __m512i __B, int __C) +{ + return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B, + _mm_setzero_ph (), + __A, + __C); +} +#else +#define _mm512_cvt_roundepu64_ph(A, B) (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(A), _mm_setzero_ph (), (__mmask8)-1, (B))) +#define _mm512_mask_cvt_roundepu64_ph(A, B, C, D) (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(C), (A), (B), (D))) +#define _mm512_maskz_cvt_roundepu64_ph(A, B, C) (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(B), _mm_setzero_ph (), (A), (C))) +#endif +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epi16 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epi16 (__m512i __A, __mmask32 __B, __m512h __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__C, + (__v32hi) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epi16 (__mmask32 __A, __m512h __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epi16 (__m512h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + __B); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epi16 (__m512i __A, __mmask32 __B, __m512h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__C, + (__v32hi) __A, + __B, + __D); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + __C); +} +#else +#define _mm512_cvt_roundph_epi16(A, B) ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A), (__v32hi) _mm512_setzero_si512 (), (__mmask32)-1, (B))) +#define _mm512_mask_cvt_roundph_epi16(A, B, C, D) ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C), (__v32hi)(A), (B), (D))) +#define _mm512_maskz_cvt_roundph_epi16(A, B, C) ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B), (__v32hi) _mm512_setzero_si512 (), (A), (C))) +#endif +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epu16 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epu16 (__m512i __A, __mmask32 __B, __m512h __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epu16 (__mmask32 __A, __m512h __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epu16 (__m512h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + __B); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epu16 (__m512i __A, __mmask32 __B, __m512h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, __D); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + __C); +} +#else +#define _mm512_cvt_roundph_epu16(A, B) ((__m512i) __builtin_ia32_vcvtph2uw512_mask_round ((A), (__v32hi) _mm512_setzero_si512 (), (__mmask32)-1, (B))) +#define _mm512_mask_cvt_roundph_epu16(A, B, C, D) ((__m512i) __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D))) +#define _mm512_maskz_cvt_roundph_epu16(A, B, C) ((__m512i) __builtin_ia32_vcvtph2uw512_mask_round ((B), (__v32hi) _mm512_setzero_si512 (), (A), (C))) +#endif +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epi16 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epi16 (__m512i __A, __mmask32 __B, __m512h __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__C, + (__v32hi) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epi16 (__mmask32 __A, __m512h __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epi16 (__m512h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + __B); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epi16 (__m512i __A, __mmask32 __B, + __m512h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__C, + (__v32hi) __A, + __B, + __D); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + __C); +} +#else +#define _mm512_cvtt_roundph_epi16(A, B) ((__m512i) __builtin_ia32_vcvttph2w512_mask_round ((A), (__v32hi) _mm512_setzero_si512 (), (__mmask32)-1, (B))) +#define _mm512_mask_cvtt_roundph_epi16(A, B, C, D) ((__m512i) __builtin_ia32_vcvttph2w512_mask_round ((C), (__v32hi)(A), (B), (D))) +#define _mm512_maskz_cvtt_roundph_epi16(A, B, C) ((__m512i) __builtin_ia32_vcvttph2w512_mask_round ((B), (__v32hi) _mm512_setzero_si512 (), (A), (C))) +#endif +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epu16 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epu16 (__m512i __A, __mmask32 __B, __m512h __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__C, + (__v32hi) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epu16 (__mmask32 __A, __m512h __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epu16 (__m512h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + __B); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epu16 (__m512i __A, __mmask32 __B, + __m512h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__C, + (__v32hi) __A, + __B, + __D); +} +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + __C); +} +#else +#define _mm512_cvtt_roundph_epu16(A, B) ((__m512i) __builtin_ia32_vcvttph2uw512_mask_round ((A), (__v32hi) _mm512_setzero_si512 (), (__mmask32)-1, (B))) +#define _mm512_mask_cvtt_roundph_epu16(A, B, C, D) ((__m512i) __builtin_ia32_vcvttph2uw512_mask_round ((C), (__v32hi)(A), (B), (D))) +#define _mm512_maskz_cvtt_roundph_epu16(A, B, C) ((__m512i) __builtin_ia32_vcvttph2uw512_mask_round ((B), (__v32hi) _mm512_setzero_si512 (), (A), (C))) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi16_ph (__m512i __A) +{ + return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A, + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi16_ph (__m512h __A, __mmask32 __B, __m512i __C) +{ + return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C, + __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi16_ph (__mmask32 __A, __m512i __B) +{ + return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B, + _mm512_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepi16_ph (__m512i __A, int __B) +{ + return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A, + _mm512_setzero_ph (), + (__mmask32) -1, + __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepi16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D) +{ + return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C, + __A, + __B, + __D); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepi16_ph (__mmask32 __A, __m512i __B, int __C) +{ + return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B, + _mm512_setzero_ph (), + __A, + __C); +} +#else +#define _mm512_cvt_roundepi16_ph(A, B) (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(A), _mm512_setzero_ph (), (__mmask32)-1, (B))) +#define _mm512_mask_cvt_roundepi16_ph(A, B, C, D) (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(C), (A), (B), (D))) +#define _mm512_maskz_cvt_roundepi16_ph(A, B, C) (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(B), _mm512_setzero_ph (), (A), (C))) +#endif + extern __inline __m512h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + _mm512_cvtepu16_ph (__m512i __A) + { + return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A, + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); + } +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu16_ph (__m512h __A, __mmask32 __B, __m512i __C) +{ + return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C, + __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu16_ph (__mmask32 __A, __m512i __B) +{ + return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B, + _mm512_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepu16_ph (__m512i __A, int __B) +{ + return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A, + _mm512_setzero_ph (), + (__mmask32) -1, + __B); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepu16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D) +{ + return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C, + __A, + __B, + __D); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C) +{ + return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B, + _mm512_setzero_ph (), + __A, + __C); +} +#else +#define _mm512_cvt_roundepu16_ph(A, B) (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(A), _mm512_setzero_ph (), (__mmask32)-1, (B))) +#define _mm512_mask_cvt_roundepu16_ph(A, B, C, D) (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(C), (A), (B), (D))) +#define _mm512_maskz_cvt_roundepu16_ph(A, B, C) (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(B), _mm512_setzero_ph (), (A), (C))) +#endif +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_i32 (__m128h __A) +{ + return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION); +} +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_u32 (__m128h __A) +{ + return (int) __builtin_ia32_vcvtsh2usi32_round (__A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_i32 (__m128h __A, const int __R) +{ + return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R); +} +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_u32 (__m128h __A, const int __R) +{ + return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R); +} +#else +#define _mm_cvt_roundsh_i32(A, B) ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B))) +#define _mm_cvt_roundsh_u32(A, B) ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B))) +#endif +#ifdef __x86_64__ +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_i64 (__m128h __A) +{ + return (long long) + __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION); +} +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_u64 (__m128h __A) +{ + return (long long) + __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_i64 (__m128h __A, const int __R) +{ + return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R); +} +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_u64 (__m128h __A, const int __R) +{ + return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R); +} +#else +#define _mm_cvt_roundsh_i64(A, B) ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B))) +#define _mm_cvt_roundsh_u64(A, B) ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B))) +#endif +#endif +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsh_i32 (__m128h __A) +{ + return (int) + __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION); +} +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsh_u32 (__m128h __A) +{ + return (int) + __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsh_i32 (__m128h __A, const int __R) +{ + return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R); +} +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsh_u32 (__m128h __A, const int __R) +{ + return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R); +} +#else +#define _mm_cvtt_roundsh_i32(A, B) ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B))) +#define _mm_cvtt_roundsh_u32(A, B) ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B))) +#endif +#ifdef __x86_64__ +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsh_i64 (__m128h __A) +{ + return (long long) + __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION); +} +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsh_u64 (__m128h __A) +{ + return (long long) + __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsh_i64 (__m128h __A, const int __R) +{ + return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R); +} +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsh_u64 (__m128h __A, const int __R) +{ + return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R); +} +#else +#define _mm_cvtt_roundsh_i64(A, B) ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B))) +#define _mm_cvtt_roundsh_u64(A, B) ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B))) +#endif +#endif +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvti32_sh (__m128h __A, int __B) +{ + return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtu32_sh (__m128h __A, unsigned int __B) +{ + return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R) +{ + return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R) +{ + return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R); +} +#else +#define _mm_cvt_roundi32_sh(A, B, C) (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C))) +#define _mm_cvt_roundu32_sh(A, B, C) (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C))) +#endif +#ifdef __x86_64__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvti64_sh (__m128h __A, long long __B) +{ + return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtu64_sh (__m128h __A, unsigned long long __B) +{ + return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R) +{ + return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R) +{ + return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R); +} +#else +#define _mm_cvt_roundi64_sh(A, B, C) (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C))) +#define _mm_cvt_roundu64_sh(A, B, C) (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C))) +#endif +#endif +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_pd (__m128h __A) +{ + return __builtin_ia32_vcvtph2pd512_mask_round (__A, + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_pd (__m512d __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_pd (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2pd512_mask_round (__B, + _mm512_setzero_pd (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_pd (__m128h __A, int __B) +{ + return __builtin_ia32_vcvtph2pd512_mask_round (__A, + _mm512_setzero_pd (), + (__mmask8) -1, + __B); +} +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_pd (__m512d __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, __D); +} +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_pd (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvtph2pd512_mask_round (__B, + _mm512_setzero_pd (), + __A, + __C); +} +#else +#define _mm512_cvt_roundph_pd(A, B) (__builtin_ia32_vcvtph2pd512_mask_round ((A), _mm512_setzero_pd (), (__mmask8)-1, (B))) +#define _mm512_mask_cvt_roundph_pd(A, B, C, D) (__builtin_ia32_vcvtph2pd512_mask_round ((C), (A), (B), (D))) +#define _mm512_maskz_cvt_roundph_pd(A, B, C) (__builtin_ia32_vcvtph2pd512_mask_round ((B), _mm512_setzero_pd (), (A), (C))) +#endif +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtxph_ps (__m256h __A) +{ + return __builtin_ia32_vcvtph2psx512_mask_round (__A, + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtxph_ps (__m512 __A, __mmask16 __B, __m256h __C) +{ + return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtxph_ps (__mmask16 __A, __m256h __B) +{ + return __builtin_ia32_vcvtph2psx512_mask_round (__B, + _mm512_setzero_ps (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtx_roundph_ps (__m256h __A, int __B) +{ + return __builtin_ia32_vcvtph2psx512_mask_round (__A, + _mm512_setzero_ps (), + (__mmask16) -1, + __B); +} +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtx_roundph_ps (__m512 __A, __mmask16 __B, __m256h __C, int __D) +{ + return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, __D); +} +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtx_roundph_ps (__mmask16 __A, __m256h __B, int __C) +{ + return __builtin_ia32_vcvtph2psx512_mask_round (__B, + _mm512_setzero_ps (), + __A, + __C); +} +#else +#define _mm512_cvtx_roundph_ps(A, B) (__builtin_ia32_vcvtph2psx512_mask_round ((A), _mm512_setzero_ps (), (__mmask16)-1, (B))) +#define _mm512_mask_cvtx_roundph_ps(A, B, C, D) (__builtin_ia32_vcvtph2psx512_mask_round ((C), (A), (B), (D))) +#define _mm512_maskz_cvtx_roundph_ps(A, B, C) (__builtin_ia32_vcvtph2psx512_mask_round ((B), _mm512_setzero_ps (), (A), (C))) +#endif +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtxps_ph (__m512 __A) +{ + return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtxps_ph (__m256h __A, __mmask16 __B, __m512 __C) +{ + return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C, + __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtxps_ph (__mmask16 __A, __m512 __B) +{ + return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B, + _mm256_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtx_roundps_ph (__m512 __A, int __B) +{ + return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtx_roundps_ph (__m256h __A, __mmask16 __B, __m512 __C, int __D) +{ + return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C, + __A, __B, __D); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtx_roundps_ph (__mmask16 __A, __m512 __B, int __C) +{ + return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B, + _mm256_setzero_ph (), + __A, __C); +} +#else +#define _mm512_cvtx_roundps_ph(A, B) (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(A), _mm256_setzero_ph (), (__mmask16)-1, (B))) +#define _mm512_mask_cvtx_roundps_ph(A, B, C, D) (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(C), (A), (B), (D))) +#define _mm512_maskz_cvtx_roundps_ph(A, B, C) (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(B), _mm256_setzero_ph (), (A), (C))) +#endif +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtpd_ph (__m512d __A) +{ + return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m512d __C) +{ + return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C, + __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtpd_ph (__mmask8 __A, __m512d __B) +{ + return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B, + _mm_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundpd_ph (__m512d __A, int __B) +{ + return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A, + _mm_setzero_ph (), + (__mmask8) -1, + __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundpd_ph (__m128h __A, __mmask8 __B, __m512d __C, int __D) +{ + return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C, + __A, __B, __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C) +{ + return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B, + _mm_setzero_ph (), + __A, __C); +} +#else +#define _mm512_cvt_roundpd_ph(A, B) (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(A), _mm_setzero_ph (), (__mmask8)-1, (B))) +#define _mm512_mask_cvt_roundpd_ph(A, B, C, D) (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(C), (A), (B), (D))) +#define _mm512_maskz_cvt_roundpd_ph(A, B, C) (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(B), _mm_setzero_ph (), (A), (C))) +#endif +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_ss (__m128 __A, __m128h __B) +{ + return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A, + _mm_setzero_ps (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C, + __m128h __D) +{ + return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B, + __m128h __C) +{ + return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B, + _mm_setzero_ps (), + __A, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_sd (__m128d __A, __m128h __B) +{ + return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A, + _mm_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C, + __m128h __D) +{ + return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C) +{ + return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B, + _mm_setzero_pd (), + __A, _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R) +{ + return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A, + _mm_setzero_ps (), + (__mmask8) -1, __R); +} +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C, + __m128h __D, const int __R) +{ + return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R); +} +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B, + __m128h __C, const int __R) +{ + return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B, + _mm_setzero_ps (), + __A, __R); +} +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R) +{ + return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A, + _mm_setzero_pd (), + (__mmask8) -1, __R); +} +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C, + __m128h __D, const int __R) +{ + return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R); +} +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R) +{ + return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B, + _mm_setzero_pd (), + __A, __R); +} +#else +#define _mm_cvt_roundsh_ss(A, B, R) (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), _mm_setzero_ps (), (__mmask8) -1, (R))) +#define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R))) +#define _mm_maskz_cvt_roundsh_ss(A, B, C, R) (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), _mm_setzero_ps (), (A), (R))) +#define _mm_cvt_roundsh_sd(A, B, R) (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), _mm_setzero_pd (), (__mmask8) -1, (R))) +#define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R))) +#define _mm_maskz_cvt_roundsh_sd(A, B, C, R) (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), _mm_setzero_pd (), (A), (R))) +#endif +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_sh (__m128h __A, __m128 __B) +{ + return __builtin_ia32_vcvtss2sh_mask_round (__B, __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D) +{ + return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C) +{ + return __builtin_ia32_vcvtss2sh_mask_round (__C, __B, + _mm_setzero_ph (), + __A, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsd_sh (__m128h __A, __m128d __B) +{ + return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D) +{ + return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C) +{ + return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B, + _mm_setzero_ph (), + __A, _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R) +{ + return __builtin_ia32_vcvtss2sh_mask_round (__B, __A, + _mm_setzero_ph (), + (__mmask8) -1, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D, + const int __R) +{ + return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C, + const int __R) +{ + return __builtin_ia32_vcvtss2sh_mask_round (__C, __B, + _mm_setzero_ph (), + __A, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R) +{ + return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A, + _mm_setzero_ph (), + (__mmask8) -1, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D, + const int __R) +{ + return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C, + const int __R) +{ + return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B, + _mm_setzero_ph (), + __A, __R); +} +#else +#define _mm_cvt_roundss_sh(A, B, R) (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), _mm_setzero_ph (), (__mmask8) -1, R)) +#define _mm_mask_cvt_roundss_sh(A, B, C, D, R) (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R))) +#define _mm_maskz_cvt_roundss_sh(A, B, C, R) (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), _mm_setzero_ph (), A, R)) +#define _mm_cvt_roundsd_sh(A, B, R) (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), _mm_setzero_ph (), (__mmask8) -1, R)) +#define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R))) +#define _mm_maskz_cvt_roundsd_sh(A, B, C, R) (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), _mm_setzero_ph (), (A), (R))) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmaddsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmaddsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmaddsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, + __mmask32 __U, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmaddsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +#else +#define _mm512_fmaddsub_round_ph(A, B, C, R) ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R))) +#define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R))) +#define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R))) +#define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) ((__m512h)__builtin_ia32_vfmaddsubph512_maskz ((A), (B), (C), (U), (R))) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + _mm512_fmsubadd_ph (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsubadd_ph (__m512h __A, __mmask32 __U, + __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsubadd_ph (__m512h __A, __m512h __B, + __m512h __C, __mmask32 __U) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsubadd_ph (__mmask32 __U, __m512h __A, + __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsubadd_round_ph (__m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsubadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsubadd_round_ph (__m512h __A, __m512h __B, __m512h __C, + __mmask32 __U, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +#else +#define _mm512_fmsubadd_round_ph(A, B, C, R) ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R))) +#define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R))) +#define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R))) +#define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R))) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + _mm512_fmadd_ph (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) +{ + return (__m512h) + __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, + __mmask32 __U, const int __R) +{ + return (__m512h) __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +#else +#define _mm512_fmadd_round_ph(A, B, C, R) ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), -1, (R))) +#define _mm512_mask_fmadd_round_ph(A, U, B, C, R) ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), (U), (R))) +#define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) ((__m512h)__builtin_ia32_vfmaddph512_mask3 ((A), (B), (C), (U), (R))) +#define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) ((__m512h)__builtin_ia32_vfmaddph512_maskz ((A), (B), (C), (U), (R))) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) +{ + return (__m512h) + __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, + __mmask32 __U, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +#else +#define _mm512_fnmadd_round_ph(A, B, C, R) ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), -1, (R))) +#define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), (U), (R))) +#define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) ((__m512h)__builtin_ia32_vfnmaddph512_mask3 ((A), (B), (C), (U), (R))) +#define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) ((__m512h)__builtin_ia32_vfnmaddph512_maskz ((A), (B), (C), (U), (R))) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsub_ph (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) +{ + return (__m512h) + __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, + __mmask32 __U, const int __R) +{ + return (__m512h) __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +#else +#define _mm512_fmsub_round_ph(A, B, C, R) ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), -1, (R))) +#define _mm512_mask_fmsub_round_ph(A, U, B, C, R) ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), (U), (R))) +#define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) ((__m512h)__builtin_ia32_vfmsubph512_mask3 ((A), (B), (C), (U), (R))) +#define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) ((__m512h)__builtin_ia32_vfmsubph512_maskz ((A), (B), (C), (U), (R))) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) +{ + return (__m512h) + __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, + __mmask32 __U, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} +#else +#define _mm512_fnmsub_round_ph(A, B, C, R) ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), -1, (R))) +#define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), (U), (R))) +#define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) ((__m512h)__builtin_ia32_vfnmsubph512_mask3 ((A), (B), (C), (U), (R))) +#define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) ((__m512h)__builtin_ia32_vfnmsubph512_maskz ((A), (B), (C), (U), (R))) +#endif +extern __inline __m128h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) -1, + __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B, + const int __R) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U, + const int __R) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A, + __m128h __B, const int __R) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, __R); +} +#else +#define _mm_fmadd_round_sh(A, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R))) +#define _mm_mask_fmadd_round_sh(A, U, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R))) +#define _mm_mask3_fmadd_round_sh(A, B, C, U, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R))) +#define _mm_maskz_fmadd_round_sh(U, A, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R))) +#endif +extern __inline __m128h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R) +{ + return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) -1, + __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B, + const int __R) +{ + return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U, + const int __R) +{ + return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A, + __m128h __B, const int __R) +{ + return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, __R); +} +#else +#define _mm_fnmadd_round_sh(A, B, C, R) ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R))) +#define _mm_mask_fnmadd_round_sh(A, U, B, C, R) ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R))) +#define _mm_mask3_fnmadd_round_sh(A, B, C, U, R) ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R))) +#define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R))) +#endif +extern __inline __m128h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, + (__v8hf) __A, + -(__v8hf) __B, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, + (__v8hf) __A, + -(__v8hf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W, + (__v8hf) __A, + -(__v8hf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, + (__v8hf) __A, + -(__v8hf) __B, + (__mmask8) -1, + __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B, + const int __R) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, + (__v8hf) __A, + -(__v8hf) __B, + (__mmask8) __U, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U, + const int __R) +{ + return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W, + (__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A, + __m128h __B, const int __R) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W, + (__v8hf) __A, + -(__v8hf) __B, + (__mmask8) __U, __R); +} +#else +#define _mm_fmsub_round_sh(A, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R))) +#define _mm_mask_fmsub_round_sh(A, U, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R))) +#define _mm_mask3_fmsub_round_sh(A, B, C, U, R) ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R))) +#define _mm_maskz_fmsub_round_sh(U, A, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R))) +#endif +extern __inline __m128h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, + -(__v8hf) __A, + -(__v8hf) __B, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, + -(__v8hf) __A, + -(__v8hf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W, + -(__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W, + -(__v8hf) __A, + -(__v8hf) __B, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, + -(__v8hf) __A, + -(__v8hf) __B, + (__mmask8) -1, + __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B, + const int __R) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W, + -(__v8hf) __A, + -(__v8hf) __B, + (__mmask8) __U, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U, + const int __R) +{ + return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W, + -(__v8hf) __A, + (__v8hf) __B, + (__mmask8) __U, __R); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A, + __m128h __B, const int __R) +{ + return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W, + -(__v8hf) __A, + -(__v8hf) __B, + (__mmask8) __U, __R); +} +#else +#define _mm_fnmsub_round_sh(A, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R))) +#define _mm_mask_fnmsub_round_sh(A, U, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R))) +#define _mm_mask3_fnmsub_round_sh(A, B, C, U, R) ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R))) +#define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R))) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fcmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D) +{ + return (__m512h) + __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A, + (__v32hf) __C, + (__v32hf) __D, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D) +{ + return (__m512h) + __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + __D, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fcmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D) +{ + return (__m512h) + __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B, + (__v32hf) __C, + (__v32hf) __D, + __A, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmadd_pch (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddcph512_round ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D) +{ + return (__m512h) + __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A, + (__v32hf) __C, + (__v32hf) __D, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D) +{ + return (__m512h) + __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + __D, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D) +{ + return (__m512h) + __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B, + (__v32hf) __C, + (__v32hf) __D, + __A, _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D) +{ + return (__m512h) + __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + __D); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fcmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C, + __m512h __D, const int __E) +{ + return (__m512h) + __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A, + (__v32hf) __C, + (__v32hf) __D, __B, + __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, + __mmask16 __D, const int __E) +{ + return (__m512h) + __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + __D, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fcmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C, + __m512h __D, const int __E) +{ + return (__m512h) + __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B, + (__v32hf) __C, + (__v32hf) __D, + __A, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D) +{ + return (__m512h) + __builtin_ia32_vfmaddcph512_round ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + __D); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C, + __m512h __D, const int __E) +{ + return (__m512h) + __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A, + (__v32hf) __C, + (__v32hf) __D, __B, + __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, + __mmask16 __D, const int __E) +{ + return (__m512h) + __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + __D, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C, + __m512h __D, const int __E) +{ + return (__m512h) + __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B, + (__v32hf) __C, + (__v32hf) __D, + __A, __E); +} +#else +#define _mm512_fcmadd_round_pch(A, B, C, D) (__m512h) __builtin_ia32_vfcmaddcph512_round ((A), (B), (C), (D)) +#define _mm512_mask_fcmadd_round_pch(A, B, C, D, E) ((__m512h) __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) (A), (__v32hf) (C), (__v32hf) (D), (B), (E))) +#define _mm512_mask3_fcmadd_round_pch(A, B, C, D, E) ((__m512h) __builtin_ia32_vfcmaddcph512_mask3_round ((A), (B), (C), (D), (E))) +#define _mm512_maskz_fcmadd_round_pch(A, B, C, D, E) (__m512h) __builtin_ia32_vfcmaddcph512_maskz_round ((B), (C), (D), (A), (E)) +#define _mm512_fmadd_round_pch(A, B, C, D) (__m512h) __builtin_ia32_vfmaddcph512_round ((A), (B), (C), (D)) +#define _mm512_mask_fmadd_round_pch(A, B, C, D, E) ((__m512h) __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) (A), (__v32hf) (C), (__v32hf) (D), (B), (E))) +#define _mm512_mask3_fmadd_round_pch(A, B, C, D, E) (__m512h) __builtin_ia32_vfmaddcph512_mask3_round ((A), (B), (C), (D), (E)) +#define _mm512_maskz_fmadd_round_pch(A, B, C, D, E) (__m512h) __builtin_ia32_vfmaddcph512_maskz_round ((B), (C), (D), (A), (E)) +#endif +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fcmul_pch (__m512h __A, __m512h __B) +{ + return (__m512h) + __builtin_ia32_vfcmulcph512_round ((__v32hf) __A, + (__v32hf) __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fcmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D) +{ + return (__m512h) + __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C, + (__v32hf) __D, + (__v32hf) __A, + __B, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fcmul_pch (__mmask16 __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B, + (__v32hf) __C, + _mm512_setzero_ph (), + __A, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmul_pch (__m512h __A, __m512h __B) +{ + return (__m512h) + __builtin_ia32_vfmulcph512_round ((__v32hf) __A, + (__v32hf) __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D) +{ + return (__m512h) + __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C, + (__v32hf) __D, + (__v32hf) __A, + __B, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmul_pch (__mmask16 __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B, + (__v32hf) __C, + _mm512_setzero_ph (), + __A, _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fcmul_round_pch (__m512h __A, __m512h __B, const int __D) +{ + return (__m512h) + __builtin_ia32_vfcmulcph512_round ((__v32hf) __A, + (__v32hf) __B, __D); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fcmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C, + __m512h __D, const int __E) +{ + return (__m512h) + __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C, + (__v32hf) __D, + (__v32hf) __A, + __B, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fcmul_round_pch (__mmask16 __A, __m512h __B, + __m512h __C, const int __E) +{ + return (__m512h) + __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B, + (__v32hf) __C, + _mm512_setzero_ph (), + __A, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmul_round_pch (__m512h __A, __m512h __B, const int __D) +{ + return (__m512h) + __builtin_ia32_vfmulcph512_round ((__v32hf) __A, + (__v32hf) __B, + __D); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C, + __m512h __D, const int __E) +{ + return (__m512h) + __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C, + (__v32hf) __D, + (__v32hf) __A, + __B, __E); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmul_round_pch (__mmask16 __A, __m512h __B, + __m512h __C, const int __E) +{ + return (__m512h) + __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B, + (__v32hf) __C, + _mm512_setzero_ph (), + __A, __E); +} +#else +#define _mm512_fcmul_round_pch(A, B, D) (__m512h) __builtin_ia32_vfcmulcph512_round ((A), (B), (D)) +#define _mm512_mask_fcmul_round_pch(A, B, C, D, E) (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((C), (D), (A), (B), (E)) +#define _mm512_maskz_fcmul_round_pch(A, B, C, E) (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((B), (C), (__v32hf) _mm512_setzero_ph (), (A), (E)) +#define _mm512_fmul_round_pch(A, B, D) (__m512h) __builtin_ia32_vfmulcph512_round ((A), (B), (D)) +#define _mm512_mask_fmul_round_pch(A, B, C, D, E) (__m512h) __builtin_ia32_vfmulcph512_mask_round ((C), (D), (A), (B), (E)) +#define _mm512_maskz_fmul_round_pch(A, B, C, E) (__m512h) __builtin_ia32_vfmulcph512_mask_round ((B), (C), (__v32hf) _mm512_setzero_ph (), (A), (E)) +#endif +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fcmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return (__m128h) + __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A, + (__v8hf) __C, + (__v8hf) __D, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D) +{ + return (__m128h) + __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, __D, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fcmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D) +{ + return (__m128h) + __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B, + (__v8hf) __C, + (__v8hf) __D, + __A, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) + __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return (__m128h) + __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A, + (__v8hf) __C, + (__v8hf) __D, __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D) +{ + return (__m128h) + __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, __D, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D) +{ + return (__m128h) + __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B, + (__v8hf) __C, + (__v8hf) __D, + __A, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmadd_sch (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) + __builtin_ia32_vfmaddcsh_round ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fcmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return (__m128h) + __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A, + (__v8hf) __C, + (__v8hf) __D, + __B, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __D, const int __E) +{ + return (__m128h) + __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + __D, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fcmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C, + __m128h __D, const int __E) +{ + return (__m128h) + __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B, + (__v8hf) __C, + (__v8hf) __D, + __A, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D) +{ + return (__m128h) + __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return (__m128h) + __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A, + (__v8hf) __C, + (__v8hf) __D, + __B, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __D, const int __E) +{ + return (__m128h) + __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + __D, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C, + __m128h __D, const int __E) +{ + return (__m128h) + __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B, + (__v8hf) __C, + (__v8hf) __D, + __A, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D) +{ + return (__m128h) + __builtin_ia32_vfmaddcsh_round ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + __D); +} +#else +#define _mm_mask_fcmadd_round_sch(A, B, C, D, E) ((__m128h) __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A), (__v8hf) (C), (__v8hf) (D), (B), (E))) +#define _mm_mask3_fcmadd_round_sch(A, B, C, D, E) ((__m128h) __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A), (__v8hf) (B), (__v8hf) (C), (D), (E))) +#define _mm_maskz_fcmadd_round_sch(A, B, C, D, E) __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E)) +#define _mm_fcmadd_round_sch(A, B, C, D) __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D)) +#define _mm_mask_fmadd_round_sch(A, B, C, D, E) ((__m128h) __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A), (__v8hf) (C), (__v8hf) (D), (B), (E))) +#define _mm_mask3_fmadd_round_sch(A, B, C, D, E) ((__m128h) __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A), (__v8hf) (B), (__v8hf) (C), (D), (E))) +#define _mm_maskz_fmadd_round_sch(A, B, C, D, E) __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E)) +#define _mm_fmadd_round_sch(A, B, C, D) __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D)) +#endif +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fcmul_sch (__m128h __A, __m128h __B) +{ + return (__m128h) + __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, + (__v8hf) __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fcmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return (__m128h) + __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, + (__v8hf) __D, + (__v8hf) __A, + __B, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fcmul_sch (__mmask8 __A, __m128h __B, __m128h __C) +{ + return (__m128h) + __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, + (__v8hf) __C, + _mm_setzero_ph (), + __A, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmul_sch (__m128h __A, __m128h __B) +{ + return (__m128h) + __builtin_ia32_vfmulcsh_round ((__v8hf) __A, + (__v8hf) __B, + _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return (__m128h) + __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, + (__v8hf) __D, + (__v8hf) __A, + __B, _MM_FROUND_CUR_DIRECTION); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmul_sch (__mmask8 __A, __m128h __B, __m128h __C) +{ + return (__m128h) + __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, + (__v8hf) __C, + _mm_setzero_ph (), + __A, _MM_FROUND_CUR_DIRECTION); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fcmul_round_sch (__m128h __A, __m128h __B, const int __D) +{ + return (__m128h) + __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, + (__v8hf) __B, + __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fcmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return (__m128h) + __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, + (__v8hf) __D, + (__v8hf) __A, + __B, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fcmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, + const int __E) +{ + return (__m128h) + __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, + (__v8hf) __C, + _mm_setzero_ph (), + __A, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmul_round_sch (__m128h __A, __m128h __B, const int __D) +{ + return (__m128h) + __builtin_ia32_vfmulcsh_round ((__v8hf) __A, + (__v8hf) __B, __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return (__m128h) + __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, + (__v8hf) __D, + (__v8hf) __A, + __B, __E); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E) +{ + return (__m128h) + __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, + (__v8hf) __C, + _mm_setzero_ph (), + __A, __E); +} +#else +#define _mm_fcmul_round_sch(__A, __B, __D) (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, (__v8hf) __B, __D) +#define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E) (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, (__v8hf) __D, (__v8hf) __A, __B, __E) +#define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E) (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, (__v8hf) __C, _mm_setzero_ph (), __A, __E) +#define _mm_fmul_round_sch(__A, __B, __D) (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A, (__v8hf) __B, __D) +#define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E) (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, (__v8hf) __D, (__v8hf) __A, __B, __E) +#define _mm_maskz_fmul_round_sch(__A, __B, __C, __E) (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, (__v8hf) __C, _mm_setzero_ph (), __A, __E) +#endif +#define _MM512_REDUCE_OP(op) __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); __m256h __T3 = (__T1 op __T2); __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); __m128h __T6 = (__T4 op __T5); __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); __m128h __T8 = (__T6 op __T7); __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); __m128h __T10 = __T8 op __T9; return __T10[0] op __T10[1] +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_reduce_add_ph (__m512h __A) +{ + _MM512_REDUCE_OP (+); +} +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_reduce_mul_ph (__m512h __A) +{ + _MM512_REDUCE_OP (*); +} +#undef _MM512_REDUCE_OP +#ifdef __AVX512VL__ +#define _MM512_REDUCE_OP(op) __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); __m256h __T3 = __builtin_ia32_##op##ph256_mask (__T1, __T2, _mm256_setzero_ph (), (__mmask16) -1); __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); __m128h __T6 = __builtin_ia32_##op##ph128_mask (__T4, __T5, _mm_setzero_ph (),(__mmask8) -1); __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); __m128h __T8 = (__m128h) __builtin_ia32_##op##ph128_mask (__T6, __T7, _mm_setzero_ph (),(__mmask8) -1); __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, (__v8hi) { 4, 5 }); __m128h __T10 = __builtin_ia32_##op##ph128_mask (__T8, __T9, _mm_setzero_ph (),(__mmask8) -1); __m128h __T11 = (__m128h) __builtin_shuffle (__T10, (__v8hi) { 1, 0 }); __m128h __T12 = __builtin_ia32_##op##ph128_mask (__T10, __T11, _mm_setzero_ph (),(__mmask8) -1); return __T12[0] +#else +#define _MM512_REDUCE_OP(op) __m512h __T1 = (__m512h) __builtin_shuffle ((__m512d) __A, (__v8di) { 4, 5, 6, 7, 0, 0, 0, 0 }); __m512h __T2 = _mm512_##op##_ph (__A, __T1); __m512h __T3 = (__m512h) __builtin_shuffle ((__m512d) __T2, (__v8di) { 2, 3, 0, 0, 0, 0, 0, 0 }); __m512h __T4 = _mm512_##op##_ph (__T2, __T3); __m512h __T5 = (__m512h) __builtin_shuffle ((__m512d) __T4, (__v8di) { 1, 0, 0, 0, 0, 0, 0, 0 }); __m512h __T6 = _mm512_##op##_ph (__T4, __T5); __m512h __T7 = (__m512h) __builtin_shuffle ((__m512) __T6, (__v16si) { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }); __m512h __T8 = _mm512_##op##_ph (__T6, __T7); __m512h __T9 = (__m512h) __builtin_shuffle (__T8, (__v32hi) { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }); __m512h __T10 = _mm512_##op##_ph (__T8, __T9); return __T10[0] +#endif +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_reduce_min_ph (__m512h __A) +{ + _MM512_REDUCE_OP (min); +} +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_reduce_max_ph (__m512h __A) +{ + _MM512_REDUCE_OP (max); +} +#undef _MM512_REDUCE_OP +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_blend_ph (__mmask32 __U, __m512h __A, __m512h __W) +{ + return (__m512h) __builtin_ia32_movdquhi512_mask ((__v32hi) __W, + (__v32hi) __A, + (__mmask32) __U); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutex2var_ph (__m512h __A, __m512i __I, __m512h __B) +{ + return (__m512h) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A, + (__v32hi) __I, + (__v32hi) __B, + (__mmask32)-1); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutexvar_ph (__m512i __A, __m512h __B) +{ + return (__m512h) __builtin_ia32_permvarhi512_mask ((__v32hi) __B, + (__v32hi) __A, + (__v32hi) + (_mm512_setzero_ph ()), + (__mmask32)-1); +} +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set1_pch (_Float16 _Complex __A) +{ + union + { + _Float16 _Complex a; + float b; + } u = { .a = __A}; + return (__m512h) _mm512_set1_ps (u.b); +} +#define _mm512_mul_pch(A, B) _mm512_fmul_pch ((A), (B)) +#define _mm512_mask_mul_pch(W, U, A, B) _mm512_mask_fmul_pch ((W), (U), (A), (B)) +#define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch ((U), (A), (B)) +#define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch ((A), (B), (R)) +#define _mm512_mask_mul_round_pch(W, U, A, B, R) _mm512_mask_fmul_round_pch ((W), (U), (A), (B), (R)) +#define _mm512_maskz_mul_round_pch(U, A, B, R) _mm512_maskz_fmul_round_pch ((U), (A), (B), (R)) +#define _mm512_cmul_pch(A, B) _mm512_fcmul_pch ((A), (B)) +#define _mm512_mask_cmul_pch(W, U, A, B) _mm512_mask_fcmul_pch ((W), (U), (A), (B)) +#define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch ((U), (A), (B)) +#define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch ((A), (B), (R)) +#define _mm512_mask_cmul_round_pch(W, U, A, B, R) _mm512_mask_fcmul_round_pch ((W), (U), (A), (B), (R)) +#define _mm512_maskz_cmul_round_pch(U, A, B, R) _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R)) +#define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B)) +#define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B)) +#define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B)) +#define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R)) +#define _mm_mask_mul_round_sch(W, U, A, B, R) _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R)) +#define _mm_maskz_mul_round_sch(U, A, B, R) _mm_maskz_fmul_round_sch ((U), (A), (B), (R)) +#define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B)) +#define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B)) +#define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B)) +#define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R)) +#define _mm_mask_cmul_round_sch(W, U, A, B, R) _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R)) +#define _mm_maskz_cmul_round_sch(U, A, B, R) _mm_maskz_fcmul_round_sch ((U), (A), (B), (R)) +#ifdef __DISABLE_AVX512FP16__ +#undef __DISABLE_AVX512FP16__ +#pragma GCC pop_options +#endif +#endif +#endif diff --git a/third_party/intel/avx512fp16vlintrin.internal.h b/third_party/intel/avx512fp16vlintrin.internal.h new file mode 100644 index 000000000..d42a041a9 --- /dev/null +++ b/third_party/intel/avx512fp16vlintrin.internal.h @@ -0,0 +1,2819 @@ +#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0) +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif +#ifndef __AVX512FP16VLINTRIN_H_INCLUDED +#define __AVX512FP16VLINTRIN_H_INCLUDED +#if !defined(__AVX512VL__) || !defined(__AVX512FP16__) +#pragma GCC push_options +#pragma GCC target("avx512fp16,avx512vl") +#define __DISABLE_AVX512FP16VL__ +#endif +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castph_ps (__m128h __a) +{ + return (__m128) __a; +} +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph_ps (__m256h __a) +{ + return (__m256) __a; +} +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castph_pd (__m128h __a) +{ + return (__m128d) __a; +} +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph_pd (__m256h __a) +{ + return (__m256d) __a; +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castph_si128 (__m128h __a) +{ + return (__m128i) __a; +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph_si256 (__m256h __a) +{ + return (__m256i) __a; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_ph (__m128 __a) +{ + return (__m128h) __a; +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castps_ph (__m256 __a) +{ + return (__m256h) __a; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_ph (__m128d __a) +{ + return (__m128h) __a; +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castpd_ph (__m256d __a) +{ + return (__m256h) __a; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_ph (__m128i __a) +{ + return (__m128h) __a; +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castsi256_ph (__m256i __a) +{ + return (__m256h) __a; +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph256_ph128 (__m256h __A) +{ + union + { + __m128h a[2]; + __m256h v; + } u = { .v = __A }; + return u.a[0]; +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph128_ph256 (__m128h __A) +{ + union + { + __m128h a[2]; + __m256h v; + } u; + u.a[0] = __A; + return u.v; +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_zextph128_ph256 (__m128h __A) +{ + return (__m256h) _mm256_insertf128_ps (_mm256_setzero_ps (), + (__m128) __A, 0); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_conj_pch (__m256h __A) +{ + return (__m256h) _mm256_xor_epi32 ((__m256i) __A, _mm256_set1_epi32 (1<<31)); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_conj_pch (__m256h __W, __mmask8 __U, __m256h __A) +{ + return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf) + _mm256_conj_pch (__A), + (__v8sf) __W, + (__mmask8) __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_conj_pch (__mmask8 __U, __m256h __A) +{ + return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf) + _mm256_conj_pch (__A), + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_conj_pch (__m128h __A) +{ + return (__m128h) _mm_xor_epi32 ((__m128i) __A, _mm_set1_epi32 (1<<31)); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_conj_pch (__m128h __W, __mmask8 __U, __m128h __A) +{ + return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A), + (__v4sf) __W, + (__mmask8) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_conj_pch (__mmask8 __U, __m128h __A) +{ + return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A), + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_ph (__m128h __A, __m128h __B) +{ + return (__m128h) ((__v8hf) __A + (__v8hf) __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_add_ph (__m256h __A, __m256h __B) +{ + return (__m256h) ((__v16hf) __A + (__v16hf) __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_addph128_mask (__C, __D, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) +{ + return __builtin_ia32_addph256_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_addph128_mask (__B, __C, _mm_setzero_ph (), + __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C) +{ + return __builtin_ia32_addph256_mask (__B, __C, + _mm256_setzero_ph (), __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_ph (__m128h __A, __m128h __B) +{ + return (__m128h) ((__v8hf) __A - (__v8hf) __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_sub_ph (__m256h __A, __m256h __B) +{ + return (__m256h) ((__v16hf) __A - (__v16hf) __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_subph128_mask (__C, __D, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) +{ + return __builtin_ia32_subph256_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_subph128_mask (__B, __C, _mm_setzero_ph (), + __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C) +{ + return __builtin_ia32_subph256_mask (__B, __C, + _mm256_setzero_ph (), __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mul_ph (__m128h __A, __m128h __B) +{ + return (__m128h) ((__v8hf) __A * (__v8hf) __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mul_ph (__m256h __A, __m256h __B) +{ + return (__m256h) ((__v16hf) __A * (__v16hf) __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_mulph128_mask (__C, __D, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) +{ + return __builtin_ia32_mulph256_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_mulph128_mask (__B, __C, _mm_setzero_ph (), + __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C) +{ + return __builtin_ia32_mulph256_mask (__B, __C, + _mm256_setzero_ph (), __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_div_ph (__m128h __A, __m128h __B) +{ + return (__m128h) ((__v8hf) __A / (__v8hf) __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_div_ph (__m256h __A, __m256h __B) +{ + return (__m256h) ((__v16hf) __A / (__v16hf) __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_divph128_mask (__C, __D, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) +{ + return __builtin_ia32_divph256_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_divph128_mask (__B, __C, _mm_setzero_ph (), + __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C) +{ + return __builtin_ia32_divph256_mask (__B, __C, + _mm256_setzero_ph (), __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_ph (__m128h __A, __m128h __B) +{ + return __builtin_ia32_maxph128_mask (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_max_ph (__m256h __A, __m256h __B) +{ + return __builtin_ia32_maxph256_mask (__A, __B, + _mm256_setzero_ph (), + (__mmask16) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_max_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_maxph128_mask (__C, __D, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_max_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) +{ + return __builtin_ia32_maxph256_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_max_ph (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_maxph128_mask (__B, __C, _mm_setzero_ph (), + __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_max_ph (__mmask16 __A, __m256h __B, __m256h __C) +{ + return __builtin_ia32_maxph256_mask (__B, __C, + _mm256_setzero_ph (), __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_ph (__m128h __A, __m128h __B) +{ + return __builtin_ia32_minph128_mask (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_min_ph (__m256h __A, __m256h __B) +{ + return __builtin_ia32_minph256_mask (__A, __B, + _mm256_setzero_ph (), + (__mmask16) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_min_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_minph128_mask (__C, __D, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_min_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) +{ + return __builtin_ia32_minph256_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_min_ph (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_minph128_mask (__B, __C, _mm_setzero_ph (), + __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C) +{ + return __builtin_ia32_minph256_mask (__B, __C, + _mm256_setzero_ph (), __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_abs_ph (__m128h __A) +{ + return (__m128h) _mm_and_si128 ( _mm_set1_epi32 (0x7FFF7FFF), + (__m128i) __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_abs_ph (__m256h __A) +{ + return (__m256h) _mm256_and_si256 ( _mm256_set1_epi32 (0x7FFF7FFF), + (__m256i) __A); +} +#ifdef __OPTIMIZE +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_ph_mask (__m128h __A, __m128h __B, const int __C) +{ + return (__mmask8) __builtin_ia32_cmpph128_mask (__A, __B, __C, + (__mmask8) -1); +} +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_ph_mask (__mmask8 __A, __m128h __B, __m128h __C, + const int __D) +{ + return (__mmask8) __builtin_ia32_cmpph128_mask (__B, __C, __D, __A); +} +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_ph_mask (__m256h __A, __m256h __B, const int __C) +{ + return (__mmask16) __builtin_ia32_cmpph256_mask (__A, __B, __C, + (__mmask16) -1); +} +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_ph_mask (__mmask16 __A, __m256h __B, __m256h __C, + const int __D) +{ + return (__mmask16) __builtin_ia32_cmpph256_mask (__B, __C, __D, + __A); +} +#else +#define _mm_cmp_ph_mask(A, B, C) (__builtin_ia32_cmpph128_mask ((A), (B), (C), (-1))) +#define _mm_mask_cmp_ph_mask(A, B, C, D) (__builtin_ia32_cmpph128_mask ((B), (C), (D), (A))) +#define _mm256_cmp_ph_mask(A, B, C) (__builtin_ia32_cmpph256_mask ((A), (B), (C), (-1))) +#define _mm256_mask_cmp_ph_mask(A, B, C, D) (__builtin_ia32_cmpph256_mask ((B), (C), (D), (A))) +#endif +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sqrt_ph (__m128h __A) +{ + return __builtin_ia32_sqrtph128_mask (__A, _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_sqrt_ph (__m256h __A) +{ + return __builtin_ia32_sqrtph256_mask (__A, _mm256_setzero_ph (), + (__mmask16) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sqrt_ph (__m128h __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_sqrtph128_mask (__C, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_sqrt_ph (__m256h __A, __mmask16 __B, __m256h __C) +{ + return __builtin_ia32_sqrtph256_mask (__C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sqrt_ph (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_sqrtph128_mask (__B, _mm_setzero_ph (), + __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_sqrt_ph (__mmask16 __A, __m256h __B) +{ + return __builtin_ia32_sqrtph256_mask (__B, _mm256_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rsqrt_ph (__m128h __A) +{ + return __builtin_ia32_rsqrtph128_mask (__A, _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_rsqrt_ph (__m256h __A) +{ + return __builtin_ia32_rsqrtph256_mask (__A, _mm256_setzero_ph (), + (__mmask16) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rsqrt_ph (__m128h __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_rsqrtph128_mask (__C, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_rsqrt_ph (__m256h __A, __mmask16 __B, __m256h __C) +{ + return __builtin_ia32_rsqrtph256_mask (__C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rsqrt_ph (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_rsqrtph128_mask (__B, _mm_setzero_ph (), __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B) +{ + return __builtin_ia32_rsqrtph256_mask (__B, _mm256_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rcp_ph (__m128h __A) +{ + return __builtin_ia32_rcpph128_mask (__A, _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_rcp_ph (__m256h __A) +{ + return __builtin_ia32_rcpph256_mask (__A, _mm256_setzero_ph (), + (__mmask16) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_rcpph128_mask (__C, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C) +{ + return __builtin_ia32_rcpph256_mask (__C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rcp_ph (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_rcpph128_mask (__B, _mm_setzero_ph (), __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B) +{ + return __builtin_ia32_rcpph256_mask (__B, _mm256_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_scalef_ph (__m128h __A, __m128h __B) +{ + return __builtin_ia32_scalefph128_mask (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_scalef_ph (__m256h __A, __m256h __B) +{ + return __builtin_ia32_scalefph256_mask (__A, __B, + _mm256_setzero_ph (), + (__mmask16) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_scalefph128_mask (__C, __D, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C, + __m256h __D) +{ + return __builtin_ia32_scalefph256_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_scalefph128_mask (__B, __C, + _mm_setzero_ph (), __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C) +{ + return __builtin_ia32_scalefph256_mask (__B, __C, + _mm256_setzero_ph (), + __A); +} +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_ph (__m128h __A, int __B) +{ + return __builtin_ia32_reduceph128_mask (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_reduceph128_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_reduceph128_mask (__B, __C, + _mm_setzero_ph (), __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_ph (__m256h __A, int __B) +{ + return __builtin_ia32_reduceph256_mask (__A, __B, + _mm256_setzero_ph (), + (__mmask16) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D) +{ + return __builtin_ia32_reduceph256_mask (__C, __D, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C) +{ + return __builtin_ia32_reduceph256_mask (__B, __C, + _mm256_setzero_ph (), + __A); +} +#else +#define _mm_reduce_ph(A, B) (__builtin_ia32_reduceph128_mask ((A), (B), _mm_setzero_ph (), ((__mmask8)-1))) +#define _mm_mask_reduce_ph(A, B, C, D) (__builtin_ia32_reduceph128_mask ((C), (D), (A), (B))) +#define _mm_maskz_reduce_ph(A, B, C) (__builtin_ia32_reduceph128_mask ((B), (C), _mm_setzero_ph (), (A))) +#define _mm256_reduce_ph(A, B) (__builtin_ia32_reduceph256_mask ((A), (B), _mm256_setzero_ph (), ((__mmask16)-1))) +#define _mm256_mask_reduce_ph(A, B, C, D) (__builtin_ia32_reduceph256_mask ((C), (D), (A), (B))) +#define _mm256_maskz_reduce_ph(A, B, C) (__builtin_ia32_reduceph256_mask ((B), (C), _mm256_setzero_ph (), (A))) +#endif +#ifdef __OPTIMIZE__ + extern __inline __m128h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + _mm_roundscale_ph (__m128h __A, int __B) + { + return __builtin_ia32_rndscaleph128_mask (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1); + } +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_rndscaleph128_mask (__C, __D, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_rndscaleph128_mask (__B, __C, + _mm_setzero_ph (), __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_roundscale_ph (__m256h __A, int __B) +{ + return __builtin_ia32_rndscaleph256_mask (__A, __B, + _mm256_setzero_ph (), + (__mmask16) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C, + int __D) +{ + return __builtin_ia32_rndscaleph256_mask (__C, __D, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C) +{ + return __builtin_ia32_rndscaleph256_mask (__B, __C, + _mm256_setzero_ph (), + __A); +} +#else +#define _mm_roundscale_ph(A, B) (__builtin_ia32_rndscaleph128_mask ((A), (B), _mm_setzero_ph (), ((__mmask8)-1))) +#define _mm_mask_roundscale_ph(A, B, C, D) (__builtin_ia32_rndscaleph128_mask ((C), (D), (A), (B))) +#define _mm_maskz_roundscale_ph(A, B, C) (__builtin_ia32_rndscaleph128_mask ((B), (C), _mm_setzero_ph (), (A))) +#define _mm256_roundscale_ph(A, B) (__builtin_ia32_rndscaleph256_mask ((A), (B), _mm256_setzero_ph(), ((__mmask16)-1))) +#define _mm256_mask_roundscale_ph(A, B, C, D) (__builtin_ia32_rndscaleph256_mask ((C), (D), (A), (B))) +#define _mm256_maskz_roundscale_ph(A, B, C) (__builtin_ia32_rndscaleph256_mask ((B), (C), _mm256_setzero_ph (), (A))) +#endif +#ifdef __OPTIMIZE__ +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + _mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm) +{ + return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A, + __imm, __U); +} +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fpclass_ph_mask (__m128h __A, const int __imm) +{ + return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A, + __imm, + (__mmask8) -1); +} +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fpclass_ph_mask (__mmask16 __U, __m256h __A, const int __imm) +{ + return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A, + __imm, __U); +} +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fpclass_ph_mask (__m256h __A, const int __imm) +{ + return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A, + __imm, + (__mmask16) -1); +} +#else +#define _mm_fpclass_ph_mask(X, C) ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X), (int) (C),(__mmask8)-1)) +#define _mm_mask_fpclass_ph_mask(u, X, C) ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X), (int) (C),(__mmask8)(u))) +#define _mm256_fpclass_ph_mask(X, C) ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), (int) (C),(__mmask16)-1)) +#define _mm256_mask_fpclass_ph_mask(u, X, C) ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), (int) (C),(__mmask16)(u))) +#endif +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_getexp_ph (__m256h __A) +{ + return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A, + (__v16hf) + _mm256_setzero_ph (), + (__mmask16) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_getexp_ph (__m256h __W, __mmask16 __U, __m256h __A) +{ + return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A, + (__v16hf) __W, + (__mmask16) __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_getexp_ph (__mmask16 __U, __m256h __A) +{ + return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A, + (__v16hf) + _mm256_setzero_ph (), + (__mmask16) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getexp_ph (__m128h __A) +{ + return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A, + (__v8hf) + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_getexp_ph (__m128h __W, __mmask8 __U, __m128h __A) +{ + return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A, + (__v8hf) __W, + (__mmask8) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_getexp_ph (__mmask8 __U, __m128h __A) +{ + return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A, + (__v8hf) + _mm_setzero_ph (), + (__mmask8) __U); +} +#ifdef __OPTIMIZE__ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_getmant_ph (__m256h __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A, + (__C << 2) | __B, + (__v16hf) + _mm256_setzero_ph (), + (__mmask16) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_getmant_ph (__m256h __W, __mmask16 __U, __m256h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A, + (__C << 2) | __B, + (__v16hf) __W, + (__mmask16) __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_getmant_ph (__mmask16 __U, __m256h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A, + (__C << 2) | __B, + (__v16hf) + _mm256_setzero_ph (), + (__mmask16) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getmant_ph (__m128h __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A, + (__C << 2) | __B, + (__v8hf) + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_getmant_ph (__m128h __W, __mmask8 __U, __m128h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A, + (__C << 2) | __B, + (__v8hf) __W, + (__mmask8) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_getmant_ph (__mmask8 __U, __m128h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A, + (__C << 2) | __B, + (__v8hf) + _mm_setzero_ph (), + (__mmask8) __U); +} +#else +#define _mm256_getmant_ph(X, B, C) ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), (int)(((C)<<2) | (B)), (__v16hf)(__m256h)_mm256_setzero_ph (), (__mmask16)-1)) +#define _mm256_mask_getmant_ph(W, U, X, B, C) ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), (int)(((C)<<2) | (B)), (__v16hf)(__m256h)(W), (__mmask16)(U))) +#define _mm256_maskz_getmant_ph(U, X, B, C) ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), (int)(((C)<<2) | (B)), (__v16hf)(__m256h)_mm256_setzero_ph (), (__mmask16)(U))) +#define _mm_getmant_ph(X, B, C) ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), (int)(((C)<<2) | (B)), (__v8hf)(__m128h)_mm_setzero_ph (), (__mmask8)-1)) +#define _mm_mask_getmant_ph(W, U, X, B, C) ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), (int)(((C)<<2) | (B)), (__v8hf)(__m128h)(W), (__mmask8)(U))) +#define _mm_maskz_getmant_ph(U, X, B, C) ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), (int)(((C)<<2) | (B)), (__v8hf)(__m128h)_mm_setzero_ph (), (__mmask8)(U))) +#endif +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epi32 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvtph2dq128_mask (__A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epi32 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvtph2dq128_mask (__C, ( __v4si) __A, __B); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvtph2dq128_mask (__B, + (__v4si) _mm_setzero_si128 (), + __A); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epi32 (__m128h __A) +{ + return (__m256i) + __builtin_ia32_vcvtph2dq256_mask (__A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epi32 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return (__m256i) + __builtin_ia32_vcvtph2dq256_mask (__C, ( __v8si) __A, __B); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B) +{ + return (__m256i) + __builtin_ia32_vcvtph2dq256_mask (__B, + (__v8si) + _mm256_setzero_si256 (), + __A); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epu32 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvtph2udq128_mask (__A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epu32 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvtph2udq128_mask (__C, ( __v4si) __A, __B); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvtph2udq128_mask (__B, + (__v4si) + _mm_setzero_si128 (), + __A); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epu32 (__m128h __A) +{ + return (__m256i) + __builtin_ia32_vcvtph2udq256_mask (__A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epu32 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return (__m256i) + __builtin_ia32_vcvtph2udq256_mask (__C, ( __v8si) __A, __B); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B) +{ + return (__m256i) + __builtin_ia32_vcvtph2udq256_mask (__B, + (__v8si) _mm256_setzero_si256 (), + __A); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epi32 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvttph2dq128_mask (__A, + (__v4si) _mm_setzero_si128 (), + (__mmask8) -1); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epi32 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i)__builtin_ia32_vcvttph2dq128_mask (__C, + ( __v4si) __A, + __B); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvttph2dq128_mask (__B, + (__v4si) _mm_setzero_si128 (), + __A); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epi32 (__m128h __A) +{ + return (__m256i) + __builtin_ia32_vcvttph2dq256_mask (__A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epi32 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return (__m256i) + __builtin_ia32_vcvttph2dq256_mask (__C, + ( __v8si) __A, + __B); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B) +{ + return (__m256i) + __builtin_ia32_vcvttph2dq256_mask (__B, + (__v8si) + _mm256_setzero_si256 (), + __A); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epu32 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvttph2udq128_mask (__A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epu32 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvttph2udq128_mask (__C, + ( __v4si) __A, + __B); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvttph2udq128_mask (__B, + (__v4si) + _mm_setzero_si128 (), + __A); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epu32 (__m128h __A) +{ + return (__m256i) + __builtin_ia32_vcvttph2udq256_mask (__A, + (__v8si) + _mm256_setzero_si256 (), (__mmask8) -1); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epu32 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return (__m256i) + __builtin_ia32_vcvttph2udq256_mask (__C, + ( __v8si) __A, + __B); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B) +{ + return (__m256i) + __builtin_ia32_vcvttph2udq256_mask (__B, + (__v8si) + _mm256_setzero_si256 (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi32_ph (__m128i __A) +{ + return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m128i __C) +{ + return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtepi32_ph (__mmask8 __A, __m128i __B) +{ + return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtepi32_ph (__m256i __A) +{ + return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m256i __C) +{ + return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtepi32_ph (__mmask8 __A, __m256i __B) +{ + return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepu32_ph (__m128i __A) +{ + return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m128i __C) +{ + return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __C, + __A, + __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtepu32_ph (__mmask8 __A, __m128i __B) +{ + return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtepu32_ph (__m256i __A) +{ + return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m256i __C) +{ + return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtepu32_ph (__mmask8 __A, __m256i __B) +{ + return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epi64 (__m128h __A) +{ + return + __builtin_ia32_vcvtph2qq128_mask (__A, + _mm_setzero_si128 (), + (__mmask8) -1); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epi64 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2qq128_mask (__C, __A, __B); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2qq128_mask (__B, + _mm_setzero_si128 (), + __A); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2qq256_mask (__A, + _mm256_setzero_si256 (), + (__mmask8) -1); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epi64 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2qq256_mask (__C, __A, __B); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2qq256_mask (__B, + _mm256_setzero_si256 (), + __A); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2uqq128_mask (__A, + _mm_setzero_si128 (), + (__mmask8) -1); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epu64 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2uqq128_mask (__C, __A, __B); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2uqq128_mask (__B, + _mm_setzero_si128 (), + __A); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2uqq256_mask (__A, + _mm256_setzero_si256 (), + (__mmask8) -1); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epu64 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2uqq256_mask (__C, __A, __B); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2uqq256_mask (__B, + _mm256_setzero_si256 (), + __A); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2qq128_mask (__A, + _mm_setzero_si128 (), + (__mmask8) -1); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epi64 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2qq128_mask (__C, + __A, + __B); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2qq128_mask (__B, + _mm_setzero_si128 (), + __A); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2qq256_mask (__A, + _mm256_setzero_si256 (), + (__mmask8) -1); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epi64 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2qq256_mask (__C, + __A, + __B); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2qq256_mask (__B, + _mm256_setzero_si256 (), + __A); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2uqq128_mask (__A, + _mm_setzero_si128 (), + (__mmask8) -1); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epu64 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2uqq128_mask (__C, + __A, + __B); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2uqq128_mask (__B, + _mm_setzero_si128 (), + __A); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2uqq256_mask (__A, + _mm256_setzero_si256 (), + (__mmask8) -1); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epu64 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2uqq256_mask (__C, + __A, + __B); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2uqq256_mask (__B, + _mm256_setzero_si256 (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi64_ph (__m128i __A) +{ + return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m128i __C) +{ + return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtepi64_ph (__mmask8 __A, __m128i __B) +{ + return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtepi64_ph (__m256i __A) +{ + return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m256i __C) +{ + return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtepi64_ph (__mmask8 __A, __m256i __B) +{ + return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepu64_ph (__m128i __A) +{ + return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m128i __C) +{ + return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtepu64_ph (__mmask8 __A, __m128i __B) +{ + return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtepu64_ph (__m256i __A) +{ + return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m256i __C) +{ + return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtepu64_ph (__mmask8 __A, __m256i __B) +{ + return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epi16 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvtph2w128_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epi16 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvtph2w128_mask (__C, ( __v8hi) __A, __B); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epi16 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvtph2w128_mask (__B, + (__v8hi) + _mm_setzero_si128 (), + __A); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epi16 (__m256h __A) +{ + return (__m256i) + __builtin_ia32_vcvtph2w256_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epi16 (__m256i __A, __mmask16 __B, __m256h __C) +{ + return (__m256i) + __builtin_ia32_vcvtph2w256_mask (__C, ( __v16hi) __A, __B); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epi16 (__mmask16 __A, __m256h __B) +{ + return (__m256i) + __builtin_ia32_vcvtph2w256_mask (__B, + (__v16hi) + _mm256_setzero_si256 (), + __A); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epu16 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvtph2uw128_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epu16 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvtph2uw128_mask (__C, ( __v8hi) __A, __B); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epu16 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvtph2uw128_mask (__B, + (__v8hi) + _mm_setzero_si128 (), + __A); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epu16 (__m256h __A) +{ + return (__m256i) + __builtin_ia32_vcvtph2uw256_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epu16 (__m256i __A, __mmask16 __B, __m256h __C) +{ + return (__m256i) + __builtin_ia32_vcvtph2uw256_mask (__C, ( __v16hi) __A, __B); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epu16 (__mmask16 __A, __m256h __B) +{ + return (__m256i) + __builtin_ia32_vcvtph2uw256_mask (__B, + (__v16hi) + _mm256_setzero_si256 (), + __A); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epi16 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvttph2w128_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epi16 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvttph2w128_mask (__C, + ( __v8hi) __A, + __B); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epi16 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvttph2w128_mask (__B, + (__v8hi) + _mm_setzero_si128 (), + __A); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epi16 (__m256h __A) +{ + return (__m256i) + __builtin_ia32_vcvttph2w256_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epi16 (__m256i __A, __mmask16 __B, __m256h __C) +{ + return (__m256i) + __builtin_ia32_vcvttph2w256_mask (__C, + ( __v16hi) __A, + __B); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epi16 (__mmask16 __A, __m256h __B) +{ + return (__m256i) + __builtin_ia32_vcvttph2w256_mask (__B, + (__v16hi) + _mm256_setzero_si256 (), + __A); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epu16 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvttph2uw128_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epu16 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvttph2uw128_mask (__C, + ( __v8hi) __A, + __B); +} +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epu16 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvttph2uw128_mask (__B, + (__v8hi) + _mm_setzero_si128 (), + __A); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epu16 (__m256h __A) +{ + return (__m256i) + __builtin_ia32_vcvttph2uw256_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epu16 (__m256i __A, __mmask16 __B, __m256h __C) +{ + return (__m256i) + __builtin_ia32_vcvttph2uw256_mask (__C, + ( __v16hi) __A, + __B); +} +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epu16 (__mmask16 __A, __m256h __B) +{ + return (__m256i) + __builtin_ia32_vcvttph2uw256_mask (__B, + (__v16hi) _mm256_setzero_si256 (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi16_ph (__m128i __A) +{ + return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepi16_ph (__m128h __A, __mmask8 __B, __m128i __C) +{ + return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __C, + __A, + __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtepi16_ph (__mmask8 __A, __m128i __B) +{ + return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtepi16_ph (__m256i __A) +{ + return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __A, + _mm256_setzero_ph (), + (__mmask16) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepi16_ph (__m256h __A, __mmask16 __B, __m256i __C) +{ + return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __C, + __A, + __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtepi16_ph (__mmask16 __A, __m256i __B) +{ + return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __B, + _mm256_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepu16_ph (__m128i __A) +{ + return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepu16_ph (__m128h __A, __mmask8 __B, __m128i __C) +{ + return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtepu16_ph (__mmask8 __A, __m128i __B) +{ + return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtepu16_ph (__m256i __A) +{ + return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __A, + _mm256_setzero_ph (), + (__mmask16) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepu16_ph (__m256h __A, __mmask16 __B, __m256i __C) +{ + return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __C, __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtepu16_ph (__mmask16 __A, __m256i __B) +{ + return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __B, + _mm256_setzero_ph (), + __A); +} +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_pd (__m128h __A) +{ + return __builtin_ia32_vcvtph2pd128_mask (__A, + _mm_setzero_pd (), + (__mmask8) -1); +} +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_pd (__m128d __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2pd128_mask (__C, __A, __B); +} +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_pd (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2pd128_mask (__B, _mm_setzero_pd (), __A); +} +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_pd (__m128h __A) +{ + return __builtin_ia32_vcvtph2pd256_mask (__A, + _mm256_setzero_pd (), + (__mmask8) -1); +} +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_pd (__m256d __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2pd256_mask (__C, __A, __B); +} +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_pd (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2pd256_mask (__B, + _mm256_setzero_pd (), + __A); +} +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtxph_ps (__m128h __A) +{ + return __builtin_ia32_vcvtph2psx128_mask (__A, + _mm_setzero_ps (), + (__mmask8) -1); +} +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtxph_ps (__m128 __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2psx128_mask (__C, __A, __B); +} +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtxph_ps (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2psx128_mask (__B, _mm_setzero_ps (), __A); +} +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtxph_ps (__m128h __A) +{ + return __builtin_ia32_vcvtph2psx256_mask (__A, + _mm256_setzero_ps (), + (__mmask8) -1); +} +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtxph_ps (__m256 __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2psx256_mask (__C, __A, __B); +} +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtxph_ps (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2psx256_mask (__B, + _mm256_setzero_ps (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtxps_ph (__m128 __A) +{ + return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m128 __C) +{ + return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtxps_ph (__mmask8 __A, __m128 __B) +{ + return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtxps_ph (__m256 __A) +{ + return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m256 __C) +{ + return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtxps_ph (__mmask8 __A, __m256 __B) +{ + return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpd_ph (__m128d __A) +{ + return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m128d __C) +{ + return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtpd_ph (__mmask8 __A, __m128d __B) +{ + return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtpd_ph (__m256d __A) +{ + return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m256d __C) +{ + return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __C, __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtpd_ph (__mmask8 __A, __m256d __B) +{ + return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __B, + _mm_setzero_ph (), + __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h)__builtin_ia32_vfmaddsubph256_mask ((__v16hf)__A, + (__v16hf)__B, + (__v16hf)__C, + (__mmask16)-1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fmaddsub_ph (__m256h __A, __mmask16 __U, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmaddsubph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C, + __mmask16 __U) +{ + return (__m256h) __builtin_ia32_vfmaddsubph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fmaddsub_ph (__mmask16 __U, __m256h __A, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmaddsubph256_maskz ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h)__builtin_ia32_vfmaddsubph128_mask ((__v8hf)__A, + (__v8hf)__B, + (__v8hf)__C, + (__mmask8)-1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmaddsub_ph (__m128h __A, __mmask8 __U, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmaddsubph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfmaddsubph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmaddsub_ph (__mmask8 __U, __m128h __A, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmaddsubph128_maskz ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fmsubadd_ph (__m256h __A, __mmask16 __U, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C, + __mmask16 __U) +{ + return (__m256h) __builtin_ia32_vfmsubaddph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fmsubadd_ph (__mmask16 __U, __m256h __A, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmsubaddph256_maskz ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmsubadd_ph (__m128h __A, __mmask8 __U, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfmsubaddph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmsubadd_ph (__mmask8 __U, __m128h __A, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmsubaddph128_maskz ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmadd_ph (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fmadd_ph (__m256h __A, __mmask16 __U, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fmadd_ph (__m256h __A, __m256h __B, __m256h __C, + __mmask16 __U) +{ + return (__m256h) __builtin_ia32_vfmaddph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fmadd_ph (__mmask16 __U, __m256h __A, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmaddph256_maskz ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmadd_ph (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmadd_ph (__m128h __A, __mmask8 __U, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmadd_ph (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfmaddph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmadd_ph (__mmask8 __U, __m128h __A, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmaddph128_maskz ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fnmadd_ph (__m256h __A, __mmask16 __U, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C, + __mmask16 __U) +{ + return (__m256h) __builtin_ia32_vfnmaddph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fnmadd_ph (__mmask16 __U, __m256h __A, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfnmaddph256_maskz ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmadd_ph (__m128h __A, __mmask8 __U, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfnmaddph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmadd_ph (__mmask8 __U, __m128h __A, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfnmaddph128_maskz ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmsub_ph (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fmsub_ph (__m256h __A, __mmask16 __U, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fmsub_ph (__m256h __A, __m256h __B, __m256h __C, + __mmask16 __U) +{ + return (__m256h) __builtin_ia32_vfmsubph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fmsub_ph (__mmask16 __U, __m256h __A, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmsubph256_maskz ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsub_ph (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmsub_ph (__m128h __A, __mmask8 __U, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmsub_ph (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfmsubph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmsub_ph (__mmask8 __U, __m128h __A, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmsubph128_maskz ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) -1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fnmsub_ph (__m256h __A, __mmask16 __U, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C, + __mmask16 __U) +{ + return (__m256h) __builtin_ia32_vfnmsubph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fnmsub_ph (__mmask16 __U, __m256h __A, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfnmsubph256_maskz ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) -1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmsub_ph (__m128h __A, __mmask8 __U, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfnmsubph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmsub_ph (__mmask8 __U, __m128h __A, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfnmsubph128_maskz ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmadd_pch (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmaddcph128 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return (__m128h) + __builtin_ia32_vfmaddcph128_mask ((__v8hf) __A, + (__v8hf) __C, + (__v8hf) __D, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmadd_pch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D) +{ + return (__m128h) + __builtin_ia32_vfmaddcph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D) +{ + return (__m128h) __builtin_ia32_vfmaddcph128_maskz ((__v8hf) __B, + (__v8hf) __C, + (__v8hf) __D, __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmadd_pch (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmaddcph256 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D) +{ + return (__m256h) + __builtin_ia32_vfmaddcph256_mask ((__v16hf) __A, + (__v16hf) __C, + (__v16hf) __D, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fmadd_pch (__m256h __A, __m256h __B, __m256h __C, __mmask8 __D) +{ + return (__m256h) + __builtin_ia32_vfmaddcph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, __D); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D) +{ + return (__m256h)__builtin_ia32_vfmaddcph256_maskz ((__v16hf) __B, + (__v16hf) __C, + (__v16hf) __D, __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfcmaddcph128 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fcmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return (__m128h) + __builtin_ia32_vfcmaddcph128_mask ((__v8hf) __A, + (__v8hf) __C, + (__v8hf) __D, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D) +{ + return (__m128h) + __builtin_ia32_vfcmaddcph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, __D); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fcmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D) +{ + return (__m128h)__builtin_ia32_vfcmaddcph128_maskz ((__v8hf) __B, + (__v8hf) __C, + (__v8hf) __D, __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfcmaddcph256 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fcmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D) +{ + return (__m256h) + __builtin_ia32_vfcmaddcph256_mask ((__v16hf) __A, + (__v16hf) __C, + (__v16hf) __D, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C, __mmask8 __D) +{ + return (__m256h) + __builtin_ia32_vfcmaddcph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, __D); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fcmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D) +{ + return (__m256h) __builtin_ia32_vfcmaddcph256_maskz ((__v16hf) __B, + (__v16hf) __C, + (__v16hf) __D, __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmul_pch (__m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfmulcph128 ((__v8hf) __A, (__v8hf) __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __C, + (__v8hf) __D, + (__v8hf) __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmul_pch (__mmask8 __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __B, + (__v8hf) __C, + _mm_setzero_ph (), + __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmul_pch (__m256h __A, __m256h __B) +{ + return (__m256h) __builtin_ia32_vfmulcph256 ((__v16hf) __A, + (__v16hf) __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D) +{ + return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __C, + (__v16hf) __D, + (__v16hf) __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fmul_pch (__mmask8 __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __B, + (__v16hf) __C, + _mm256_setzero_ph (), + __A); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fcmul_pch (__m128h __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_vfcmulcph128 ((__v8hf) __A, + (__v8hf) __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fcmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __C, + (__v8hf) __D, + (__v8hf) __A, __B); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fcmul_pch (__mmask8 __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __B, + (__v8hf) __C, + _mm_setzero_ph (), + __A); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fcmul_pch (__m256h __A, __m256h __B) +{ + return (__m256h) __builtin_ia32_vfcmulcph256 ((__v16hf) __A, (__v16hf) __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fcmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D) +{ + return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __C, + (__v16hf) __D, + (__v16hf) __A, __B); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fcmul_pch (__mmask8 __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __B, + (__v16hf) __C, + _mm256_setzero_ph (), + __A); +} +#define _MM256_REDUCE_OP(op) __m128h __T1 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 0); __m128h __T2 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 1); __m128h __T3 = (__T1 op __T2); __m128h __T4 = (__m128h) __builtin_shuffle (__T3, (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); __m128h __T5 = (__T3) op (__T4); __m128h __T6 = (__m128h) __builtin_shuffle (__T5, (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); __m128h __T7 = __T5 op __T6; return __T7[0] op __T7[1] +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_add_ph (__m256h __A) +{ + _MM256_REDUCE_OP (+); +} +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_mul_ph (__m256h __A) +{ + _MM256_REDUCE_OP (*); +} +#undef _MM256_REDUCE_OP +#define _MM256_REDUCE_OP(op) __m128h __T1 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 0); __m128h __T2 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 1); __m128h __T3 = _mm_##op (__T1, __T2); __m128h __T4 = (__m128h) __builtin_shuffle (__T3, (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); __m128h __T5 = _mm_##op (__T3, __T4); __m128h __T6 = (__m128h) __builtin_shuffle (__T5, (__v8hi) { 4, 5 }); __m128h __T7 = _mm_##op (__T5, __T6); __m128h __T8 = (__m128h) __builtin_shuffle (__T7, (__v8hi) { 1, 0 }); __m128h __T9 = _mm_##op (__T7, __T8); return __T9[0] +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_min_ph (__m256h __A) +{ + _MM256_REDUCE_OP (min_ph); +} +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_max_ph (__m256h __A) +{ + _MM256_REDUCE_OP (max_ph); +} +#define _MM_REDUCE_OP(op) __m128h __T1 = (__m128h) __builtin_shuffle (__A, (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); __m128h __T2 = (__A) op (__T1); __m128h __T3 = (__m128h) __builtin_shuffle (__T2, (__v8hi){ 2, 3, 0, 1, 4, 5, 6, 7 }); __m128h __T4 = __T2 op __T3; return __T4[0] op __T4[1] +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_add_ph (__m128h __A) +{ + _MM_REDUCE_OP (+); +} +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_mul_ph (__m128h __A) +{ + _MM_REDUCE_OP (*); +} +#undef _MM_REDUCE_OP +#define _MM_REDUCE_OP(op) __m128h __T1 = (__m128h) __builtin_shuffle (__A, (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); __m128h __T2 = _mm_##op (__A, __T1); __m128h __T3 = (__m128h) __builtin_shuffle (__T2, (__v8hi){ 4, 5 }); __m128h __T4 = _mm_##op (__T2, __T3); __m128h __T5 = (__m128h) __builtin_shuffle (__T4, (__v8hi){ 1, 0 }); __m128h __T6 = _mm_##op (__T4, __T5); return __T6[0] +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_min_ph (__m128h __A) +{ + _MM_REDUCE_OP (min_ph); +} +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_max_ph (__m128h __A) +{ + _MM_REDUCE_OP (max_ph); +} +#undef _MM256_REDUCE_OP +#undef _MM_REDUCE_OP +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_ph (__mmask16 __U, __m256h __A, __m256h __W) +{ + return (__m256h) __builtin_ia32_movdquhi256_mask ((__v16hi) __W, + (__v16hi) __A, + (__mmask16) __U); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_permutex2var_ph (__m256h __A, __m256i __I, __m256h __B) +{ + return (__m256h) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A, + (__v16hi) __I, + (__v16hi) __B, + (__mmask16)-1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_permutexvar_ph (__m256i __A, __m256h __B) +{ + return (__m256h) __builtin_ia32_permvarhi256_mask ((__v16hi) __B, + (__v16hi) __A, + (__v16hi) + (_mm256_setzero_ph ()), + (__mmask16)-1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_ph (__mmask8 __U, __m128h __A, __m128h __W) +{ + return (__m128h) __builtin_ia32_movdquhi128_mask ((__v8hi) __W, + (__v8hi) __A, + (__mmask8) __U); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_permutex2var_ph (__m128h __A, __m128i __I, __m128h __B) +{ + return (__m128h) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A, + (__v8hi) __I, + (__v8hi) __B, + (__mmask8)-1); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_permutexvar_ph (__m128i __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, + (__v8hi) __A, + (__v8hi) + (_mm_setzero_ph ()), + (__mmask8)-1); +} +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_set1_pch (_Float16 _Complex __A) +{ + union + { + _Float16 _Complex a; + float b; + } u = { .a = __A }; + return (__m256h) _mm256_set1_ps (u.b); +} +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set1_pch (_Float16 _Complex __A) +{ + union + { + _Float16 _Complex a; + float b; + } u = { .a = __A }; + return (__m128h) _mm_set1_ps (u.b); +} +#define _mm_mul_pch(A, B) _mm_fmul_pch ((A), (B)) +#define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch ((W), (U), (A), (B)) +#define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch ((U), (A), (B)) +#define _mm256_mul_pch(A, B) _mm256_fmul_pch ((A), (B)) +#define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch ((W), (U), (A), (B)) +#define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch ((U), (A), (B)) +#define _mm_cmul_pch(A, B) _mm_fcmul_pch ((A), (B)) +#define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch ((W), (U), (A), (B)) +#define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch ((U), (A), (B)) +#define _mm256_cmul_pch(A, B) _mm256_fcmul_pch ((A), (B)) +#define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch ((W), (U), (A), (B)) +#define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch((U), (A), (B)) +#ifdef __DISABLE_AVX512FP16VL__ +#undef __DISABLE_AVX512FP16VL__ +#pragma GCC pop_options +#endif +#endif +#endif diff --git a/third_party/intel/avx512vlintrin.internal.h b/third_party/intel/avx512vlintrin.internal.h index 73d7e3054..493f80abb 100644 --- a/third_party/intel/avx512vlintrin.internal.h +++ b/third_party/intel/avx512vlintrin.internal.h @@ -9486,10 +9486,12 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) __C, __imm, - (__mmask8) -1); + return (__m256i) + __builtin_ia32_pternlogq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __C, + (unsigned char) __imm, + (__mmask8) -1); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -9497,10 +9499,12 @@ _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U, __m256i __B, __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) __C, __imm, - (__mmask8) __U); + return (__m256i) + __builtin_ia32_pternlogq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -9508,21 +9512,24 @@ _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A, __m256i __B, __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A, - (__v4di) __B, - (__v4di) __C, - __imm, - (__mmask8) __U); + return (__m256i) + __builtin_ia32_pternlogq256_maskz ((__v4di) __A, + (__v4di) __B, + (__v4di) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A, - (__v8si) __B, - (__v8si) __C, __imm, - (__mmask8) -1); + return (__m256i) + __builtin_ia32_pternlogd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __C, + (unsigned char) __imm, + (__mmask8) -1); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -9530,10 +9537,12 @@ _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U, __m256i __B, __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A, - (__v8si) __B, - (__v8si) __C, __imm, - (__mmask8) __U); + return (__m256i) + __builtin_ia32_pternlogd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -9541,73 +9550,88 @@ _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A, __m256i __B, __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A, - (__v8si) __B, - (__v8si) __C, - __imm, - (__mmask8) __U); + return (__m256i) + __builtin_ia32_pternlogd256_maskz ((__v8si) __A, + (__v8si) __B, + (__v8si) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C, const int __imm) { - return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __C, __imm, - (__mmask8) -1); + return (__m128i) + __builtin_ia32_pternlogq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __C, + (unsigned char) __imm, + (__mmask8) -1); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U, - __m128i __B, __m128i __C, const int __imm) + __m128i __B, __m128i __C, + const int __imm) { - return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __C, __imm, - (__mmask8) __U); + return (__m128i) + __builtin_ia32_pternlogq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A, - __m128i __B, __m128i __C, const int __imm) + __m128i __B, __m128i __C, + const int __imm) { - return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A, - (__v2di) __B, - (__v2di) __C, - __imm, - (__mmask8) __U); + return (__m128i) + __builtin_ia32_pternlogq128_maskz ((__v2di) __A, + (__v2di) __B, + (__v2di) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C, const int __imm) { - return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A, - (__v4si) __B, - (__v4si) __C, __imm, - (__mmask8) -1); + return (__m128i) + __builtin_ia32_pternlogd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __C, + (unsigned char) __imm, + (__mmask8) -1); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U, - __m128i __B, __m128i __C, const int __imm) + __m128i __B, __m128i __C, + const int __imm) { - return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A, - (__v4si) __B, - (__v4si) __C, __imm, - (__mmask8) __U); + return (__m128i) + __builtin_ia32_pternlogd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A, - __m128i __B, __m128i __C, const int __imm) + __m128i __B, __m128i __C, + const int __imm) { - return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A, - (__v4si) __B, - (__v4si) __C, - __imm, - (__mmask8) __U); + return (__m128i) + __builtin_ia32_pternlogd128_maskz ((__v4si) __A, + (__v4si) __B, + (__v4si) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -11335,18 +11359,18 @@ _mm256_permutex_pd (__m256d __X, const int __M) #define _mm_maskz_slli_epi32(U, X, C) ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U))) #define _mm_mask_slli_epi64(W, U, X, C) ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C), (__v2di)(__m128i)(W), (__mmask8)(U))) #define _mm_maskz_slli_epi64(U, X, C) ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (), (__mmask8)(U))) -#define _mm256_ternarylogic_epi64(A, B, C, I) ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1)) -#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U))) -#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U))) -#define _mm256_ternarylogic_epi32(A, B, C, I) ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1)) -#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U))) -#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U))) -#define _mm_ternarylogic_epi64(A, B, C, I) ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1)) -#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U))) -#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U))) -#define _mm_ternarylogic_epi32(A, B, C, I) ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1)) -#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U))) -#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U))) +#define _mm256_ternarylogic_epi64(A, B, C, I) ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) (__m256i) (A), (__v4di) (__m256i) (B), (__v4di) (__m256i) (C), (unsigned char) (I), (__mmask8) -1)) +#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) (__m256i) (A), (__v4di) (__m256i) (B), (__v4di) (__m256i) (C), (unsigned char) (I), (__mmask8) (U))) +#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) (__m256i) (A), (__v4di) (__m256i) (B), (__v4di) (__m256i) (C), (unsigned char) (I), (__mmask8) (U))) +#define _mm256_ternarylogic_epi32(A, B, C, I) ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) (__m256i) (A), (__v8si) (__m256i) (B), (__v8si) (__m256i) (C), (unsigned char) (I), (__mmask8) -1)) +#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) (__m256i) (A), (__v8si) (__m256i) (B), (__v8si) (__m256i) (C), (unsigned char) (I), (__mmask8) (U))) +#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) (__m256i) (A), (__v8si) (__m256i) (B), (__v8si) (__m256i) (C), (unsigned char) (I), (__mmask8) (U))) +#define _mm_ternarylogic_epi64(A, B, C, I) ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) (__m128i) (A), (__v2di) (__m128i) (B), (__v2di) (__m128i) (C), (unsigned char) (I), (__mmask8) -1)) +#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) (__m128i) (A), (__v2di) (__m128i) (B), (__v2di) (__m128i) (C), (unsigned char) (I), (__mmask8) (U))) +#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) (__m128i) (A), (__v2di) (__m128i) (B), (__v2di) (__m128i) (C), (unsigned char) (I), (__mmask8) (U))) +#define _mm_ternarylogic_epi32(A, B, C, I) ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) (__m128i) (A), (__v4si) (__m128i) (B), (__v4si) (__m128i) (C), (unsigned char) (I), (__mmask8) -1)) +#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) (__m128i) (A), (__v4si) (__m128i) (B), (__v4si) (__m128i) (C), (unsigned char) (I), (__mmask8) (U))) +#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) (__m128i) (A), (__v4si) (__m128i) (B), (__v4si) (__m128i) (C), (unsigned char) (I), (__mmask8) (U))) #define _mm256_roundscale_ps(A, B) ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1)) #define _mm256_mask_roundscale_ps(W, U, A, B) ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U))) #define _mm256_maskz_roundscale_ps(U, A, B) ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U))) diff --git a/third_party/intel/bmiintrin.internal.h b/third_party/intel/bmiintrin.internal.h index cf2042f1b..0c17f0bd2 100644 --- a/third_party/intel/bmiintrin.internal.h +++ b/third_party/intel/bmiintrin.internal.h @@ -14,12 +14,22 @@ __tzcnt_u16 (unsigned short __X) { return __builtin_ia32_tzcnt_u16 (__X); } +extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tzcnt_u16 (unsigned short __X) +{ + return __builtin_ia32_tzcnt_u16 (__X); +} extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __andn_u32 (unsigned int __X, unsigned int __Y) { return ~__X & __Y; } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_andn_u32 (unsigned int __X, unsigned int __Y) +{ + return __andn_u32 (__X, __Y); +} +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bextr_u32 (unsigned int __X, unsigned int __Y) { return __builtin_ia32_bextr_u32 (__X, __Y); @@ -76,6 +86,11 @@ __andn_u64 (unsigned long long __X, unsigned long long __Y) return ~__X & __Y; } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_andn_u64 (unsigned long long __X, unsigned long long __Y) +{ + return __andn_u64 (__X, __Y); +} +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bextr_u64 (unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bextr_u64 (__X, __Y); diff --git a/third_party/intel/cpuid.internal.h b/third_party/intel/cpuid.internal.h index 081886d9b..3f082193c 100644 --- a/third_party/intel/cpuid.internal.h +++ b/third_party/intel/cpuid.internal.h @@ -48,7 +48,6 @@ #define bit_AVX2 (1 << 5) #define bit_BMI2 (1 << 8) #define bit_RTM (1 << 11) -#define bit_MPX (1 << 14) #define bit_AVX512F (1 << 16) #define bit_AVX512DQ (1 << 17) #define bit_RDSEED (1 << 18) @@ -84,6 +83,7 @@ #define bit_AVX5124VNNIW (1 << 2) #define bit_AVX5124FMAPS (1 << 3) #define bit_AVX512VP2INTERSECT (1 << 8) +#define bit_AVX512FP16 (1 << 23) #define bit_IBT (1 << 20) #define bit_UINTR (1 << 5) #define bit_PCONFIG (1 << 18) @@ -92,8 +92,6 @@ #define bit_AMX_BF16 (1 << 22) #define bit_AMX_TILE (1 << 24) #define bit_AMX_INT8 (1 << 25) -#define bit_BNDREGS (1 << 3) -#define bit_BNDCSR (1 << 4) #define bit_XSAVEOPT (1 << 0) #define bit_XSAVEC (1 << 1) #define bit_XSAVES (1 << 3) diff --git a/third_party/intel/emmintrin.internal.h b/third_party/intel/emmintrin.internal.h index d54c75287..c0c3bf59d 100644 --- a/third_party/intel/emmintrin.internal.h +++ b/third_party/intel/emmintrin.internal.h @@ -550,13 +550,12 @@ _mm_loadu_si64 (void const *__P) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_si32 (void const *__P) { - return _mm_set_epi32 (*(int *)__P, (int)0, (int)0, (int)0); + return _mm_set_epi32 (0, 0, 0, (*(__m32_u *)__P)[0]); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_si16 (void const *__P) { - return _mm_set_epi16 (*(short *)__P, (short)0, (short)0, (short)0, - (short)0, (short)0, (short)0, (short)0); + return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, (*(__m16_u *)__P)[0]); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_si128 (__m128i *__P, __m128i __B) diff --git a/third_party/intel/ia32intrin.internal.h b/third_party/intel/ia32intrin.internal.h index faeaff648..f6a5172c7 100644 --- a/third_party/intel/ia32intrin.internal.h +++ b/third_party/intel/ia32intrin.internal.h @@ -21,10 +21,10 @@ __bswapd (int __X) return __builtin_bswap32 (__X); } #ifndef __iamcu__ -#ifndef __SSE4_2__ +#ifndef __CRC32__ #pragma GCC push_options -#pragma GCC target("sse4.2") -#define __DISABLE_SSE4_2__ +#pragma GCC target("crc32") +#define __DISABLE_CRC32__ #endif extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -44,8 +44,8 @@ __crc32d (unsigned int __C, unsigned int __V) { return __builtin_ia32_crc32si (__C, __V); } -#ifdef __DISABLE_SSE4_2__ -#undef __DISABLE_SSE4_2__ +#ifdef __DISABLE_CRC32__ +#undef __DISABLE_CRC32__ #pragma GCC pop_options #endif #endif @@ -63,9 +63,19 @@ __rdpmc (int __S) return __builtin_ia32_rdpmc (__S); } #endif -#define __rdtsc() __builtin_ia32_rdtsc () +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__rdtsc (void) +{ + return __builtin_ia32_rdtsc (); +} #ifndef __iamcu__ -#define __rdtscp(a) __builtin_ia32_rdtscp (a) +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__rdtscp (unsigned int *__A) +{ + return __builtin_ia32_rdtscp (__A); +} #endif extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -130,10 +140,10 @@ __bswapq (long long __X) { return __builtin_bswap64 (__X); } -#ifndef __SSE4_2__ +#ifndef __CRC32__ #pragma GCC push_options -#pragma GCC target("sse4.2") -#define __DISABLE_SSE4_2__ +#pragma GCC target("crc32") +#define __DISABLE_CRC32__ #endif extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -141,8 +151,8 @@ __crc32q (unsigned long long __C, unsigned long long __V) { return __builtin_ia32_crc32di (__C, __V); } -#ifdef __DISABLE_SSE4_2__ -#undef __DISABLE_SSE4_2__ +#ifdef __DISABLE_CRC32__ +#undef __DISABLE_CRC32__ #pragma GCC pop_options #endif extern __inline long long diff --git a/third_party/intel/immintrin.internal.h b/third_party/intel/immintrin.internal.h index 60b7ec6d9..a932d9755 100644 --- a/third_party/intel/immintrin.internal.h +++ b/third_party/intel/immintrin.internal.h @@ -36,6 +36,10 @@ #include "third_party/intel/avx512bitalgintrin.internal.h" #include "third_party/intel/avx512vp2intersectintrin.internal.h" #include "third_party/intel/avx512vp2intersectvlintrin.internal.h" +#ifdef __SSE2__ +#include "third_party/intel/avx512fp16intrin.internal.h" +#include "third_party/intel/avx512fp16vlintrin.internal.h" +#endif #include "third_party/intel/shaintrin.internal.h" #include "third_party/intel/fmaintrin.internal.h" #include "third_party/intel/f16cintrin.internal.h" diff --git a/third_party/intel/mwaitintrin.internal.h b/third_party/intel/mwaitintrin.internal.h new file mode 100644 index 000000000..6405a2c9e --- /dev/null +++ b/third_party/intel/mwaitintrin.internal.h @@ -0,0 +1,26 @@ +#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0) +#ifndef _MWAITINTRIN_H_INCLUDED +#define _MWAITINTRIN_H_INCLUDED +#ifndef __MWAIT__ +#pragma GCC push_options +#pragma GCC target("mwait") +#define __DISABLE_MWAIT__ +#endif +extern __inline void +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_monitor (void const * __P, unsigned int __E, unsigned int __H) +{ + __builtin_ia32_monitor (__P, __E, __H); +} +extern __inline void +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mwait (unsigned int __E, unsigned int __H) +{ + __builtin_ia32_mwait (__E, __H); +} +#ifdef __DISABLE_MWAIT__ +#undef __DISABLE_MWAIT__ +#pragma GCC pop_options +#endif +#endif +#endif diff --git a/third_party/intel/pmmintrin.internal.h b/third_party/intel/pmmintrin.internal.h index abac40c73..b66b8412b 100644 --- a/third_party/intel/pmmintrin.internal.h +++ b/third_party/intel/pmmintrin.internal.h @@ -2,6 +2,7 @@ #ifndef _PMMINTRIN_H_INCLUDED #define _PMMINTRIN_H_INCLUDED #include "third_party/intel/emmintrin.internal.h" +#include "third_party/intel/mwaitintrin.internal.h" #ifndef __SSE3__ #pragma GCC push_options #pragma GCC target("sse3") @@ -67,16 +68,6 @@ _mm_lddqu_si128 (__m128i const *__P) { return (__m128i) __builtin_ia32_lddqu ((char const *)__P); } -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_monitor (void const * __P, unsigned int __E, unsigned int __H) -{ - __builtin_ia32_monitor (__P, __E, __H); -} -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mwait (unsigned int __E, unsigned int __H) -{ - __builtin_ia32_mwait (__E, __H); -} #ifdef __DISABLE_SSE3__ #undef __DISABLE_SSE3__ #pragma GCC pop_options diff --git a/third_party/intel/serializeintrin.internal.h b/third_party/intel/serializeintrin.internal.h index a68abbf43..22fa1c97b 100644 --- a/third_party/intel/serializeintrin.internal.h +++ b/third_party/intel/serializeintrin.internal.h @@ -9,7 +9,12 @@ #pragma GCC target("serialize") #define __DISABLE_SERIALIZE__ #endif -#define _serialize() __builtin_ia32_serialize () +extern __inline void +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_serialize (void) +{ + __builtin_ia32_serialize (); +} #ifdef __DISABLE_SERIALIZE__ #undef __DISABLE_SERIALIZE__ #pragma GCC pop_options diff --git a/third_party/intel/smmintrin.internal.h b/third_party/intel/smmintrin.internal.h index 2fae19a10..5179c6e2e 100644 --- a/third_party/intel/smmintrin.internal.h +++ b/third_party/intel/smmintrin.internal.h @@ -524,15 +524,10 @@ _mm_cmpgt_epi64 (__m128i __X, __m128i __Y) #pragma GCC pop_options #endif #include "third_party/intel/popcntintrin.internal.h" -#ifndef __SSE4_1__ +#ifndef __CRC32__ #pragma GCC push_options -#pragma GCC target("sse4.1") -#define __DISABLE_SSE4_1__ -#endif -#ifndef __SSE4_2__ -#pragma GCC push_options -#pragma GCC target("sse4.2") -#define __DISABLE_SSE4_2__ +#pragma GCC target("crc32") +#define __DISABLE_CRC32__ #endif extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_crc32_u8 (unsigned int __C, unsigned char __V) @@ -556,12 +551,8 @@ _mm_crc32_u64 (unsigned long long __C, unsigned long long __V) return __builtin_ia32_crc32di (__C, __V); } #endif -#ifdef __DISABLE_SSE4_2__ -#undef __DISABLE_SSE4_2__ -#pragma GCC pop_options -#endif -#ifdef __DISABLE_SSE4_1__ -#undef __DISABLE_SSE4_1__ +#ifdef __DISABLE_CRC32__ +#undef __DISABLE_CRC32__ #pragma GCC pop_options #endif #endif diff --git a/third_party/intel/upgrade.sh b/third_party/intel/upgrade.sh index 02458cbe1..f5f32ddae 100755 --- a/third_party/intel/upgrade.sh +++ b/third_party/intel/upgrade.sh @@ -1,32 +1,27 @@ #!/bin/sh -s=/opt/cross11portcosmo/lib/gcc/x86_64-linux-musl/11.2.0/include +s=/opt/include d=third_party/intel FILES=' +adxintrin +ammintrin amxbf16intrin amxint8intrin amxtileintrin -avx512bf16intrin -avx512bf16vlintrin -avx512vp2intersectintrin -avx512vp2intersectvlintrin -avxvnniintrin -enqcmdintrin -hresetintrin -keylockerintrin -serializeintrin -tsxldtrkintrin -uintrintrin -x86gprintrin +avx2intrin avx5124fmapsintrin avx5124vnniwintrin +avx512bf16intrin +avx512bf16vlintrin avx512bitalgintrin avx512bwintrin avx512cdintrin avx512dqintrin avx512erintrin avx512fintrin +avx512fp16intrin +avx512fp16vlintrin avx512ifmaintrin avx512ifmavlintrin avx512pfintrin @@ -39,12 +34,12 @@ avx512vldqintrin avx512vlintrin avx512vnniintrin avx512vnnivlintrin +avx512vp2intersectintrin +avx512vp2intersectvlintrin avx512vpopcntdqintrin avx512vpopcntdqvlintrin -adxintrin -ammintrin -avx2intrin avxintrin +avxvnniintrin bmi2intrin bmiintrin cetintrin @@ -54,19 +49,23 @@ clwbintrin clzerointrin cpuid emmintrin +enqcmdintrin f16cintrin fma4intrin fmaintrin fxsrintrin gfniintrin +hresetintrin ia32intrin immintrin +keylockerintrin lwpintrin lzcntintrin mm3dnow mm_malloc mmintrin movdirintrin +mwaitintrin mwaitxintrin nmmintrin pconfigintrin @@ -76,16 +75,20 @@ popcntintrin prfchwintrin rdseedintrin rtmintrin +serializeintrin sgxintrin shaintrin smmintrin tbmintrin tmmintrin +tsxldtrkintrin +uintrintrin vaesintrin vpclmulqdqintrin waitpkgintrin wbnoinvdintrin wmmintrin +x86gprintrin x86intrin xmmintrin xopintrin diff --git a/third_party/intel/vaesintrin.internal.h b/third_party/intel/vaesintrin.internal.h index e0b577112..6a55221af 100644 --- a/third_party/intel/vaesintrin.internal.h +++ b/third_party/intel/vaesintrin.internal.h @@ -1,7 +1,4 @@ #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0) -#ifndef _IMMINTRIN_H_INCLUDED -# error "Never use directly; include instead." -#endif #ifndef __VAESINTRIN_H_INCLUDED #define __VAESINTRIN_H_INCLUDED #if !defined(__VAES__) || !defined(__AVX__) diff --git a/third_party/intel/x86gprintrin.internal.h b/third_party/intel/x86gprintrin.internal.h index 3f8aedf78..875718588 100644 --- a/third_party/intel/x86gprintrin.internal.h +++ b/third_party/intel/x86gprintrin.internal.h @@ -1,6 +1,11 @@ #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0) #ifndef _X86GPRINTRIN_H_INCLUDED #define _X86GPRINTRIN_H_INCLUDED +#if !defined _SOFT_FLOAT || defined __MMX__ || defined __SSE__ +#pragma GCC push_options +#pragma GCC target("general-regs-only") +#define __DISABLE_GENERAL_REGS_ONLY__ +#endif #include "third_party/intel/ia32intrin.internal.h" #ifndef __iamcu__ #include "third_party/intel/adxintrin.internal.h" @@ -16,6 +21,7 @@ #include "third_party/intel/lzcntintrin.internal.h" #include "third_party/intel/lwpintrin.internal.h" #include "third_party/intel/movdirintrin.internal.h" +#include "third_party/intel/mwaitintrin.internal.h" #include "third_party/intel/mwaitxintrin.internal.h" #include "third_party/intel/pconfigintrin.internal.h" #include "third_party/intel/popcntintrin.internal.h" @@ -175,5 +181,9 @@ _ptwrite32 (unsigned __B) #pragma GCC pop_options #endif #endif +#ifdef __DISABLE_GENERAL_REGS_ONLY__ +#undef __DISABLE_GENERAL_REGS_ONLY__ +#pragma GCC pop_options +#endif #endif #endif diff --git a/third_party/intel/xmmintrin.internal.h b/third_party/intel/xmmintrin.internal.h index 7c744991b..3a97b9fa1 100644 --- a/third_party/intel/xmmintrin.internal.h +++ b/third_party/intel/xmmintrin.internal.h @@ -1,9 +1,10 @@ #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0) #ifndef _XMMINTRIN_H_INCLUDED #define _XMMINTRIN_H_INCLUDED -#include "third_party/intel/mm_malloc.internal.h" #include "third_party/intel/mmintrin.internal.h" -enum _mm_hint { +#include "third_party/intel/mm_malloc.internal.h" +enum _mm_hint +{ _MM_HINT_ET0 = 7, _MM_HINT_ET1 = 6, _MM_HINT_T0 = 3, @@ -12,953 +13,950 @@ enum _mm_hint { _MM_HINT_NTA = 0 }; #ifdef __OPTIMIZE__ -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_prefetch(const void *__P, enum _mm_hint __I) { - __builtin_prefetch(__P, (__I & 0x4) >> 2, __I & 0x3); +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_prefetch (const void *__P, enum _mm_hint __I) +{ + __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3); } #else -#define _mm_prefetch(P, I) __builtin_prefetch((P), ((I & 0x4) >> 2), (I & 0x3)) +#define _mm_prefetch(P, I) __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3)) #endif #ifndef __SSE__ #pragma GCC push_options #pragma GCC target("sse") #define __DISABLE_SSE__ #endif -typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); -typedef float __m128_u - __attribute__((__vector_size__(16), __may_alias__, __aligned__(1))); -typedef float __v4sf __attribute__((__vector_size__(16))); -#define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \ - (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) -#define _MM_EXCEPT_MASK 0x003f -#define _MM_EXCEPT_INVALID 0x0001 -#define _MM_EXCEPT_DENORM 0x0002 -#define _MM_EXCEPT_DIV_ZERO 0x0004 -#define _MM_EXCEPT_OVERFLOW 0x0008 -#define _MM_EXCEPT_UNDERFLOW 0x0010 -#define _MM_EXCEPT_INEXACT 0x0020 -#define _MM_MASK_MASK 0x1f80 -#define _MM_MASK_INVALID 0x0080 -#define _MM_MASK_DENORM 0x0100 -#define _MM_MASK_DIV_ZERO 0x0200 -#define _MM_MASK_OVERFLOW 0x0400 -#define _MM_MASK_UNDERFLOW 0x0800 -#define _MM_MASK_INEXACT 0x1000 -#define _MM_ROUND_MASK 0x6000 -#define _MM_ROUND_NEAREST 0x0000 -#define _MM_ROUND_DOWN 0x2000 -#define _MM_ROUND_UP 0x4000 +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); +typedef float __v4sf __attribute__ ((__vector_size__ (16))); +#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) +#define _MM_EXCEPT_MASK 0x003f +#define _MM_EXCEPT_INVALID 0x0001 +#define _MM_EXCEPT_DENORM 0x0002 +#define _MM_EXCEPT_DIV_ZERO 0x0004 +#define _MM_EXCEPT_OVERFLOW 0x0008 +#define _MM_EXCEPT_UNDERFLOW 0x0010 +#define _MM_EXCEPT_INEXACT 0x0020 +#define _MM_MASK_MASK 0x1f80 +#define _MM_MASK_INVALID 0x0080 +#define _MM_MASK_DENORM 0x0100 +#define _MM_MASK_DIV_ZERO 0x0200 +#define _MM_MASK_OVERFLOW 0x0400 +#define _MM_MASK_UNDERFLOW 0x0800 +#define _MM_MASK_INEXACT 0x1000 +#define _MM_ROUND_MASK 0x6000 +#define _MM_ROUND_NEAREST 0x0000 +#define _MM_ROUND_DOWN 0x2000 +#define _MM_ROUND_UP 0x4000 #define _MM_ROUND_TOWARD_ZERO 0x6000 -#define _MM_FLUSH_ZERO_MASK 0x8000 -#define _MM_FLUSH_ZERO_ON 0x8000 -#define _MM_FLUSH_ZERO_OFF 0x0000 -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_undefined_ps(void) { +#define _MM_FLUSH_ZERO_MASK 0x8000 +#define _MM_FLUSH_ZERO_ON 0x8000 +#define _MM_FLUSH_ZERO_OFF 0x0000 +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_undefined_ps (void) +{ __m128 __Y = __Y; return __Y; } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_setzero_ps(void) { - return __extension__(__m128){0.0f, 0.0f, 0.0f, 0.0f}; +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setzero_ps (void) +{ + return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f }; } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_add_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_addss((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_sub_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_subss((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_mul_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_mulss((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mul_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_div_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_divss((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_div_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_sqrt_ss(__m128 __A) { - return (__m128)__builtin_ia32_sqrtss((__v4sf)__A); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sqrt_ss (__m128 __A) +{ + return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_rcp_ss(__m128 __A) { - return (__m128)__builtin_ia32_rcpss((__v4sf)__A); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rcp_ss (__m128 __A) +{ + return (__m128) __builtin_ia32_rcpss ((__v4sf)__A); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_rsqrt_ss(__m128 __A) { - return (__m128)__builtin_ia32_rsqrtss((__v4sf)__A); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rsqrt_ss (__m128 __A) +{ + return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_min_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_minss((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_max_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_maxss((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_add_ps(__m128 __A, __m128 __B) { - return (__m128)((__v4sf)__A + (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_ps (__m128 __A, __m128 __B) +{ + return (__m128) ((__v4sf)__A + (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_sub_ps(__m128 __A, __m128 __B) { - return (__m128)((__v4sf)__A - (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_ps (__m128 __A, __m128 __B) +{ + return (__m128) ((__v4sf)__A - (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_mul_ps(__m128 __A, __m128 __B) { - return (__m128)((__v4sf)__A * (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mul_ps (__m128 __A, __m128 __B) +{ + return (__m128) ((__v4sf)__A * (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_div_ps(__m128 __A, __m128 __B) { - return (__m128)((__v4sf)__A / (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_div_ps (__m128 __A, __m128 __B) +{ + return (__m128) ((__v4sf)__A / (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_sqrt_ps(__m128 __A) { - return (__m128)__builtin_ia32_sqrtps((__v4sf)__A); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sqrt_ps (__m128 __A) +{ + return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_rcp_ps(__m128 __A) { - return (__m128)__builtin_ia32_rcpps((__v4sf)__A); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rcp_ps (__m128 __A) +{ + return (__m128) __builtin_ia32_rcpps ((__v4sf)__A); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_rsqrt_ps(__m128 __A) { - return (__m128)__builtin_ia32_rsqrtps((__v4sf)__A); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rsqrt_ps (__m128 __A) +{ + return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_min_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_minps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_max_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_maxps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_and_ps(__m128 __A, __m128 __B) { - return __builtin_ia32_andps(__A, __B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_and_ps (__m128 __A, __m128 __B) +{ + return __builtin_ia32_andps (__A, __B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_andnot_ps(__m128 __A, __m128 __B) { - return __builtin_ia32_andnps(__A, __B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_andnot_ps (__m128 __A, __m128 __B) +{ + return __builtin_ia32_andnps (__A, __B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_or_ps(__m128 __A, __m128 __B) { - return __builtin_ia32_orps(__A, __B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_or_ps (__m128 __A, __m128 __B) +{ + return __builtin_ia32_orps (__A, __B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_xor_ps(__m128 __A, __m128 __B) { - return __builtin_ia32_xorps(__A, __B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_xor_ps (__m128 __A, __m128 __B) +{ + return __builtin_ia32_xorps (__A, __B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpeq_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpeqss((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpeq_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmplt_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpltss((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmple_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpless((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmple_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpgt_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_movss( - (__v4sf)__A, (__v4sf)__builtin_ia32_cmpltss((__v4sf)__B, (__v4sf)__A)); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpgt_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss ((__v4sf) __A, + (__v4sf) + __builtin_ia32_cmpltss ((__v4sf) __B, + (__v4sf) + __A)); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpge_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_movss( - (__v4sf)__A, (__v4sf)__builtin_ia32_cmpless((__v4sf)__B, (__v4sf)__A)); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpge_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss ((__v4sf) __A, + (__v4sf) + __builtin_ia32_cmpless ((__v4sf) __B, + (__v4sf) + __A)); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpneq_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpneqss((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpneq_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpnlt_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpnltss((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnlt_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpnle_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpnless((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnle_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpngt_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_movss( - (__v4sf)__A, (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__B, (__v4sf)__A)); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpngt_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss ((__v4sf) __A, + (__v4sf) + __builtin_ia32_cmpnltss ((__v4sf) __B, + (__v4sf) + __A)); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpnge_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_movss( - (__v4sf)__A, (__v4sf)__builtin_ia32_cmpnless((__v4sf)__B, (__v4sf)__A)); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnge_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movss ((__v4sf) __A, + (__v4sf) + __builtin_ia32_cmpnless ((__v4sf) __B, + (__v4sf) + __A)); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpord_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpordss((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpord_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpunord_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpunordss((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpunord_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpeq_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpeqps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpeq_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmplt_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpltps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmple_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpleps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmple_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpgt_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpgtps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpgt_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpge_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpgeps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpge_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpneq_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpneqps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpneq_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpnlt_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpnltps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnlt_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpnle_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpnleps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnle_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpngt_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpngtps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpngt_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpnge_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpngeps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnge_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpord_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpordps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpord_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cmpunord_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_cmpunordps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpunord_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_comieq_ss(__m128 __A, __m128 __B) { - return __builtin_ia32_comieq((__v4sf)__A, (__v4sf)__B); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comieq_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_comilt_ss(__m128 __A, __m128 __B) { - return __builtin_ia32_comilt((__v4sf)__A, (__v4sf)__B); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comilt_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_comile_ss(__m128 __A, __m128 __B) { - return __builtin_ia32_comile((__v4sf)__A, (__v4sf)__B); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comile_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_comigt_ss(__m128 __A, __m128 __B) { - return __builtin_ia32_comigt((__v4sf)__A, (__v4sf)__B); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comigt_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_comige_ss(__m128 __A, __m128 __B) { - return __builtin_ia32_comige((__v4sf)__A, (__v4sf)__B); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comige_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_comineq_ss(__m128 __A, __m128 __B) { - return __builtin_ia32_comineq((__v4sf)__A, (__v4sf)__B); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comineq_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_ucomieq_ss(__m128 __A, __m128 __B) { - return __builtin_ia32_ucomieq((__v4sf)__A, (__v4sf)__B); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomieq_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_ucomilt_ss(__m128 __A, __m128 __B) { - return __builtin_ia32_ucomilt((__v4sf)__A, (__v4sf)__B); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomilt_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_ucomile_ss(__m128 __A, __m128 __B) { - return __builtin_ia32_ucomile((__v4sf)__A, (__v4sf)__B); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomile_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_ucomigt_ss(__m128 __A, __m128 __B) { - return __builtin_ia32_ucomigt((__v4sf)__A, (__v4sf)__B); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomigt_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_ucomige_ss(__m128 __A, __m128 __B) { - return __builtin_ia32_ucomige((__v4sf)__A, (__v4sf)__B); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomige_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_ucomineq_ss(__m128 __A, __m128 __B) { - return __builtin_ia32_ucomineq((__v4sf)__A, (__v4sf)__B); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomineq_ss (__m128 __A, __m128 __B) +{ + return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtss_si32(__m128 __A) { - return __builtin_ia32_cvtss2si((__v4sf)__A); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_si32 (__m128 __A) +{ + return __builtin_ia32_cvtss2si ((__v4sf) __A); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvt_ss2si(__m128 __A) { - return _mm_cvtss_si32(__A); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_ss2si (__m128 __A) +{ + return _mm_cvtss_si32 (__A); } #ifdef __x86_64__ -extern __inline long long - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtss_si64(__m128 __A) { - return __builtin_ia32_cvtss2si64((__v4sf)__A); +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_si64 (__m128 __A) +{ + return __builtin_ia32_cvtss2si64 ((__v4sf) __A); } -extern __inline long long - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtss_si64x(__m128 __A) { - return __builtin_ia32_cvtss2si64((__v4sf)__A); +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_si64x (__m128 __A) +{ + return __builtin_ia32_cvtss2si64 ((__v4sf) __A); } #endif -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtps_pi32(__m128 __A) { - return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__A); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtps_pi32 (__m128 __A) +{ + return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvt_ps2pi(__m128 __A) { - return _mm_cvtps_pi32(__A); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_ps2pi (__m128 __A) +{ + return _mm_cvtps_pi32 (__A); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvttss_si32(__m128 __A) { - return __builtin_ia32_cvttss2si((__v4sf)__A); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttss_si32 (__m128 __A) +{ + return __builtin_ia32_cvttss2si ((__v4sf) __A); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtt_ss2si(__m128 __A) { - return _mm_cvttss_si32(__A); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_ss2si (__m128 __A) +{ + return _mm_cvttss_si32 (__A); } #ifdef __x86_64__ -extern __inline long long - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvttss_si64(__m128 __A) { - return __builtin_ia32_cvttss2si64((__v4sf)__A); +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttss_si64 (__m128 __A) +{ + return __builtin_ia32_cvttss2si64 ((__v4sf) __A); } -extern __inline long long - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvttss_si64x(__m128 __A) { - return __builtin_ia32_cvttss2si64((__v4sf)__A); +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttss_si64x (__m128 __A) +{ + return __builtin_ia32_cvttss2si64 ((__v4sf) __A); } #endif -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvttps_pi32(__m128 __A) { - return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__A); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttps_pi32 (__m128 __A) +{ + return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtt_ps2pi(__m128 __A) { - return _mm_cvttps_pi32(__A); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_ps2pi (__m128 __A) +{ + return _mm_cvttps_pi32 (__A); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtsi32_ss(__m128 __A, int __B) { - return (__m128)__builtin_ia32_cvtsi2ss((__v4sf)__A, __B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi32_ss (__m128 __A, int __B) +{ + return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvt_si2ss(__m128 __A, int __B) { - return _mm_cvtsi32_ss(__A, __B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_si2ss (__m128 __A, int __B) +{ + return _mm_cvtsi32_ss (__A, __B); } #ifdef __x86_64__ -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtsi64_ss(__m128 __A, long long __B) { - return (__m128)__builtin_ia32_cvtsi642ss((__v4sf)__A, __B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi64_ss (__m128 __A, long long __B) +{ + return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtsi64x_ss(__m128 __A, long long __B) { - return (__m128)__builtin_ia32_cvtsi642ss((__v4sf)__A, __B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi64x_ss (__m128 __A, long long __B) +{ + return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B); } #endif -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtpi32_ps(__m128 __A, __m64 __B) { - return (__m128)__builtin_ia32_cvtpi2ps((__v4sf)__A, (__v2si)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpi32_ps (__m128 __A, __m64 __B) +{ + return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvt_pi2ps(__m128 __A, __m64 __B) { - return _mm_cvtpi32_ps(__A, __B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_pi2ps (__m128 __A, __m64 __B) +{ + return _mm_cvtpi32_ps (__A, __B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtpi16_ps(__m64 __A) { +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpi16_ps (__m64 __A) +{ __v4hi __sign; __v2si __hisi, __losi; __v4sf __zero, __ra, __rb; - __sign = __builtin_ia32_pcmpgtw((__v4hi)0LL, (__v4hi)__A); - __losi = (__v2si)__builtin_ia32_punpcklwd((__v4hi)__A, __sign); - __hisi = (__v2si)__builtin_ia32_punpckhwd((__v4hi)__A, __sign); - __zero = (__v4sf)_mm_setzero_ps(); - __ra = __builtin_ia32_cvtpi2ps(__zero, __losi); - __rb = __builtin_ia32_cvtpi2ps(__ra, __hisi); - return (__m128)__builtin_ia32_movlhps(__ra, __rb); + __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A); + __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign); + __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign); + __zero = (__v4sf) _mm_setzero_ps (); + __ra = __builtin_ia32_cvtpi2ps (__zero, __losi); + __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi); + return (__m128) __builtin_ia32_movlhps (__ra, __rb); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtpu16_ps(__m64 __A) { +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpu16_ps (__m64 __A) +{ __v2si __hisi, __losi; __v4sf __zero, __ra, __rb; - __losi = (__v2si)__builtin_ia32_punpcklwd((__v4hi)__A, (__v4hi)0LL); - __hisi = (__v2si)__builtin_ia32_punpckhwd((__v4hi)__A, (__v4hi)0LL); - __zero = (__v4sf)_mm_setzero_ps(); - __ra = __builtin_ia32_cvtpi2ps(__zero, __losi); - __rb = __builtin_ia32_cvtpi2ps(__ra, __hisi); - return (__m128)__builtin_ia32_movlhps(__ra, __rb); + __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL); + __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL); + __zero = (__v4sf) _mm_setzero_ps (); + __ra = __builtin_ia32_cvtpi2ps (__zero, __losi); + __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi); + return (__m128) __builtin_ia32_movlhps (__ra, __rb); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtpi8_ps(__m64 __A) { +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpi8_ps (__m64 __A) +{ __v8qi __sign; - __sign = __builtin_ia32_pcmpgtb((__v8qi)0LL, (__v8qi)__A); - __A = (__m64)__builtin_ia32_punpcklbw((__v8qi)__A, __sign); + __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A); + __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign); return _mm_cvtpi16_ps(__A); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtpu8_ps(__m64 __A) { - __A = (__m64)__builtin_ia32_punpcklbw((__v8qi)__A, (__v8qi)0LL); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpu8_ps(__m64 __A) +{ + __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL); return _mm_cvtpu16_ps(__A); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtpi32x2_ps(__m64 __A, __m64 __B) { - __v4sf __zero = (__v4sf)_mm_setzero_ps(); - __v4sf __sfa = __builtin_ia32_cvtpi2ps(__zero, (__v2si)__A); - __v4sf __sfb = __builtin_ia32_cvtpi2ps(__sfa, (__v2si)__B); - return (__m128)__builtin_ia32_movlhps(__sfa, __sfb); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpi32x2_ps(__m64 __A, __m64 __B) +{ + __v4sf __zero = (__v4sf) _mm_setzero_ps (); + __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A); + __v4sf __sfb = __builtin_ia32_cvtpi2ps (__sfa, (__v2si)__B); + return (__m128) __builtin_ia32_movlhps (__sfa, __sfb); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtps_pi16(__m128 __A) { +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtps_pi16(__m128 __A) +{ __v4sf __hisf = (__v4sf)__A; - __v4sf __losf = __builtin_ia32_movhlps(__hisf, __hisf); - __v2si __hisi = __builtin_ia32_cvtps2pi(__hisf); - __v2si __losi = __builtin_ia32_cvtps2pi(__losf); - return (__m64)__builtin_ia32_packssdw(__hisi, __losi); + __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf); + __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf); + __v2si __losi = __builtin_ia32_cvtps2pi (__losf); + return (__m64) __builtin_ia32_packssdw (__hisi, __losi); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtps_pi8(__m128 __A) { - __v4hi __tmp = (__v4hi)_mm_cvtps_pi16(__A); - return (__m64)__builtin_ia32_packsswb(__tmp, (__v4hi)0LL); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtps_pi8(__m128 __A) +{ + __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A); + return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL); } #ifdef __OPTIMIZE__ -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_shuffle_ps(__m128 __A, __m128 __B, int const __mask) { - return (__m128)__builtin_ia32_shufps((__v4sf)__A, (__v4sf)__B, __mask); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask) +{ + return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask); } #else -#define _mm_shuffle_ps(A, B, MASK) \ - ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ - (int)(MASK))) +#define _mm_shuffle_ps(A, B, MASK) ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(MASK))) #endif -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_unpackhi_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_unpckhps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpackhi_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_unpacklo_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_unpcklps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_loadh_pi(__m128 __A, __m64 const *__P) { - return (__m128)__builtin_ia32_loadhps((__v4sf)__A, (const __v2sf *)__P); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadh_pi (__m128 __A, __m64 const *__P) +{ + return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P); } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_storeh_pi(__m64 *__P, __m128 __A) { - __builtin_ia32_storehps((__v2sf *)__P, (__v4sf)__A); +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeh_pi (__m64 *__P, __m128 __A) +{ + __builtin_ia32_storehps ((__v2sf *)__P, (__v4sf)__A); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_movehl_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_movhlps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movehl_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_movelh_ps(__m128 __A, __m128 __B) { - return (__m128)__builtin_ia32_movlhps((__v4sf)__A, (__v4sf)__B); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movelh_ps (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_loadl_pi(__m128 __A, __m64 const *__P) { - return (__m128)__builtin_ia32_loadlps((__v4sf)__A, (const __v2sf *)__P); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadl_pi (__m128 __A, __m64 const *__P) +{ + return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P); } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_storel_pi(__m64 *__P, __m128 __A) { - __builtin_ia32_storelps((__v2sf *)__P, (__v4sf)__A); +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storel_pi (__m64 *__P, __m128 __A) +{ + __builtin_ia32_storelps ((__v2sf *)__P, (__v4sf)__A); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_movemask_ps(__m128 __A) { - return __builtin_ia32_movmskps((__v4sf)__A); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movemask_ps (__m128 __A) +{ + return __builtin_ia32_movmskps ((__v4sf)__A); } -extern __inline unsigned int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_getcsr(void) { - return __builtin_ia32_stmxcsr(); +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getcsr (void) +{ + return __builtin_ia32_stmxcsr (); } -extern __inline unsigned int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _MM_GET_EXCEPTION_STATE(void) { +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_MM_GET_EXCEPTION_STATE (void) +{ return _mm_getcsr() & _MM_EXCEPT_MASK; } -extern __inline unsigned int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _MM_GET_EXCEPTION_MASK(void) { +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_MM_GET_EXCEPTION_MASK (void) +{ return _mm_getcsr() & _MM_MASK_MASK; } -extern __inline unsigned int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _MM_GET_ROUNDING_MODE(void) { +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_MM_GET_ROUNDING_MODE (void) +{ return _mm_getcsr() & _MM_ROUND_MASK; } -extern __inline unsigned int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _MM_GET_FLUSH_ZERO_MODE(void) { +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_MM_GET_FLUSH_ZERO_MODE (void) +{ return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_setcsr(unsigned int __I) { - __builtin_ia32_ldmxcsr(__I); +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setcsr (unsigned int __I) +{ + __builtin_ia32_ldmxcsr (__I); } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _MM_SET_EXCEPTION_STATE(unsigned int __mask) { +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_MM_SET_EXCEPTION_STATE(unsigned int __mask) +{ _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask); } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _MM_SET_EXCEPTION_MASK(unsigned int __mask) { +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_MM_SET_EXCEPTION_MASK (unsigned int __mask) +{ _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask); } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _MM_SET_ROUNDING_MODE(unsigned int __mode) { +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_MM_SET_ROUNDING_MODE (unsigned int __mode) +{ _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode); } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _MM_SET_FLUSH_ZERO_MODE(unsigned int __mode) { +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode) +{ _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_set_ss(float __F) { - return __extension__(__m128)(__v4sf){__F, 0.0f, 0.0f, 0.0f}; +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_ss (float __F) +{ + return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f }; } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_set1_ps(float __F) { - return __extension__(__m128)(__v4sf){__F, __F, __F, __F}; +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set1_ps (float __F) +{ + return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F }; } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_set_ps1(float __F) { - return _mm_set1_ps(__F); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_ps1 (float __F) +{ + return _mm_set1_ps (__F); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_load_ss(float const *__P) { - return _mm_set_ss(*__P); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_ss (float const *__P) +{ + return _mm_set_ss (*__P); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_load1_ps(float const *__P) { - return _mm_set1_ps(*__P); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load1_ps (float const *__P) +{ + return _mm_set1_ps (*__P); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_load_ps1(float const *__P) { - return _mm_load1_ps(__P); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_ps1 (float const *__P) +{ + return _mm_load1_ps (__P); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_load_ps(float const *__P) { +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_ps (float const *__P) +{ return *(__m128 *)__P; } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_loadu_ps(float const *__P) { +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadu_ps (float const *__P) +{ return *(__m128_u *)__P; } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_loadr_ps(float const *__P) { +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadr_ps (float const *__P) +{ __v4sf __tmp = *(__v4sf *)__P; - return (__m128)__builtin_ia32_shufps(__tmp, __tmp, _MM_SHUFFLE(0, 1, 2, 3)); + return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3)); } -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, - __artificial__)) -_mm_set_ps(const float __Z, const float __Y, const float __X, const float __W) { - return __extension__(__m128)(__v4sf){__W, __X, __Y, __Z}; +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W) +{ + return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z }; } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_setr_ps(float __Z, float __Y, float __X, float __W) { - return __extension__(__m128)(__v4sf){__Z, __Y, __X, __W}; +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setr_ps (float __Z, float __Y, float __X, float __W) +{ + return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W }; } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_store_ss(float *__P, __m128 __A) { +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_ss (float *__P, __m128 __A) +{ *__P = ((__v4sf)__A)[0]; } -extern __inline float - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_cvtss_f32(__m128 __A) { +extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_f32 (__m128 __A) +{ return ((__v4sf)__A)[0]; } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_store_ps(float *__P, __m128 __A) { +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_ps (float *__P, __m128 __A) +{ *(__m128 *)__P = __A; } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_storeu_ps(float *__P, __m128 __A) { +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_ps (float *__P, __m128 __A) +{ *(__m128_u *)__P = __A; } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_store1_ps(float *__P, __m128 __A) { +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store1_ps (float *__P, __m128 __A) +{ __v4sf __va = (__v4sf)__A; - __v4sf __tmp = __builtin_ia32_shufps(__va, __va, _MM_SHUFFLE(0, 0, 0, 0)); - _mm_storeu_ps(__P, __tmp); + __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0)); + _mm_storeu_ps (__P, __tmp); } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_store_ps1(float *__P, __m128 __A) { - _mm_store1_ps(__P, __A); +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_ps1 (float *__P, __m128 __A) +{ + _mm_store1_ps (__P, __A); } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_storer_ps(float *__P, __m128 __A) { +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storer_ps (float *__P, __m128 __A) +{ __v4sf __va = (__v4sf)__A; - __v4sf __tmp = __builtin_ia32_shufps(__va, __va, _MM_SHUFFLE(0, 1, 2, 3)); - _mm_store_ps(__P, __tmp); + __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3)); + _mm_store_ps (__P, __tmp); } -extern __inline __m128 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_move_ss(__m128 __A, __m128 __B) { - return (__m128)__builtin_shuffle( - (__v4sf)__A, (__v4sf)__B, - __extension__(__attribute__((__vector_size__(16))) int){4, 1, 2, 3}); +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_move_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_shuffle ((__v4sf)__A, (__v4sf)__B, + __extension__ + (__attribute__((__vector_size__ (16))) int) + {4,1,2,3}); } #ifdef __OPTIMIZE__ -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_extract_pi16(__m64 const __A, int const __N) { - return (unsigned short)__builtin_ia32_vec_ext_v4hi((__v4hi)__A, __N); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_extract_pi16 (__m64 const __A, int const __N) +{ + return (unsigned short) __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_pextrw(__m64 const __A, int const __N) { - return _mm_extract_pi16(__A, __N); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pextrw (__m64 const __A, int const __N) +{ + return _mm_extract_pi16 (__A, __N); } #else -#define _mm_extract_pi16(A, N) \ - ((int)(unsigned short)__builtin_ia32_vec_ext_v4hi((__v4hi)(__m64)(A), \ - (int)(N))) +#define _mm_extract_pi16(A, N) ((int) (unsigned short) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N))) #define _m_pextrw(A, N) _mm_extract_pi16(A, N) #endif #ifdef __OPTIMIZE__ -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_insert_pi16(__m64 const __A, int const __D, int const __N) { - return (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)__A, __D, __N); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_insert_pi16 (__m64 const __A, int const __D, int const __N) +{ + return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_pinsrw(__m64 const __A, int const __D, int const __N) { - return _mm_insert_pi16(__A, __D, __N); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pinsrw (__m64 const __A, int const __D, int const __N) +{ + return _mm_insert_pi16 (__A, __D, __N); } #else -#define _mm_insert_pi16(A, D, N) \ - ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)(__m64)(A), (int)(D), (int)(N))) +#define _mm_insert_pi16(A, D, N) ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A), (int)(D), (int)(N))) #define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N) #endif -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_max_pi16(__m64 __A, __m64 __B) { - return (__m64)__builtin_ia32_pmaxsw((__v4hi)__A, (__v4hi)__B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_pi16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_pmaxsw(__m64 __A, __m64 __B) { - return _mm_max_pi16(__A, __B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pmaxsw (__m64 __A, __m64 __B) +{ + return _mm_max_pi16 (__A, __B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_max_pu8(__m64 __A, __m64 __B) { - return (__m64)__builtin_ia32_pmaxub((__v8qi)__A, (__v8qi)__B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_pmaxub(__m64 __A, __m64 __B) { - return _mm_max_pu8(__A, __B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pmaxub (__m64 __A, __m64 __B) +{ + return _mm_max_pu8 (__A, __B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_min_pi16(__m64 __A, __m64 __B) { - return (__m64)__builtin_ia32_pminsw((__v4hi)__A, (__v4hi)__B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_pi16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_pminsw(__m64 __A, __m64 __B) { - return _mm_min_pi16(__A, __B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pminsw (__m64 __A, __m64 __B) +{ + return _mm_min_pi16 (__A, __B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_min_pu8(__m64 __A, __m64 __B) { - return (__m64)__builtin_ia32_pminub((__v8qi)__A, (__v8qi)__B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_pminub(__m64 __A, __m64 __B) { - return _mm_min_pu8(__A, __B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pminub (__m64 __A, __m64 __B) +{ + return _mm_min_pu8 (__A, __B); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_movemask_pi8(__m64 __A) { - return __builtin_ia32_pmovmskb((__v8qi)__A); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movemask_pi8 (__m64 __A) +{ + return __builtin_ia32_pmovmskb ((__v8qi)__A); } -extern __inline int - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_pmovmskb(__m64 __A) { - return _mm_movemask_pi8(__A); +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pmovmskb (__m64 __A) +{ + return _mm_movemask_pi8 (__A); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_mulhi_pu16(__m64 __A, __m64 __B) { - return (__m64)__builtin_ia32_pmulhuw((__v4hi)__A, (__v4hi)__B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mulhi_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_pmulhuw(__m64 __A, __m64 __B) { - return _mm_mulhi_pu16(__A, __B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pmulhuw (__m64 __A, __m64 __B) +{ + return _mm_mulhi_pu16 (__A, __B); } #ifdef __OPTIMIZE__ -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_shuffle_pi16(__m64 __A, int const __N) { - return (__m64)__builtin_ia32_pshufw((__v4hi)__A, __N); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_pi16 (__m64 __A, int const __N) +{ + return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_pshufw(__m64 __A, int const __N) { - return _mm_shuffle_pi16(__A, __N); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pshufw (__m64 __A, int const __N) +{ + return _mm_shuffle_pi16 (__A, __N); } #else -#define _mm_shuffle_pi16(A, N) \ - ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(A), (int)(N))) -#define _m_pshufw(A, N) _mm_shuffle_pi16(A, N) +#define _mm_shuffle_pi16(A, N) ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N))) +#define _m_pshufw(A, N) _mm_shuffle_pi16 (A, N) #endif -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_maskmove_si64(__m64 __A, __m64 __N, char *__P) { +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P) +{ #ifdef __MMX_WITH_SSE__ - typedef long long __v2di __attribute__((__vector_size__(16))); - typedef char __v16qi __attribute__((__vector_size__(16))); - __v2di __A128 = __extension__(__v2di){((__v1di)__A)[0], 0}; - __v2di __N128 = __extension__(__v2di){((__v1di)__N)[0], 0}; - __SIZE_TYPE__ offset = ((__SIZE_TYPE__)__P) & 0xf; - if (offset) { - if (offset > 8) offset = 8; - __P = (char *)(((__SIZE_TYPE__)__P) - offset); - switch (offset) { - case 1: - __A128 = __builtin_ia32_pslldqi128(__A128, 8); - __N128 = __builtin_ia32_pslldqi128(__N128, 8); - break; - case 2: - __A128 = __builtin_ia32_pslldqi128(__A128, 2 * 8); - __N128 = __builtin_ia32_pslldqi128(__N128, 2 * 8); - break; - case 3: - __A128 = __builtin_ia32_pslldqi128(__A128, 3 * 8); - __N128 = __builtin_ia32_pslldqi128(__N128, 3 * 8); - break; - case 4: - __A128 = __builtin_ia32_pslldqi128(__A128, 4 * 8); - __N128 = __builtin_ia32_pslldqi128(__N128, 4 * 8); - break; - case 5: - __A128 = __builtin_ia32_pslldqi128(__A128, 5 * 8); - __N128 = __builtin_ia32_pslldqi128(__N128, 5 * 8); - break; - case 6: - __A128 = __builtin_ia32_pslldqi128(__A128, 6 * 8); - __N128 = __builtin_ia32_pslldqi128(__N128, 6 * 8); - break; - case 7: - __A128 = __builtin_ia32_pslldqi128(__A128, 7 * 8); - __N128 = __builtin_ia32_pslldqi128(__N128, 7 * 8); - break; - case 8: - __A128 = __builtin_ia32_pslldqi128(__A128, 8 * 8); - __N128 = __builtin_ia32_pslldqi128(__N128, 8 * 8); - break; - default: - break; + typedef long long __v2di __attribute__ ((__vector_size__ (16))); + typedef char __v16qi __attribute__ ((__vector_size__ (16))); + __v2di __A128 = __extension__ (__v2di) { ((__v1di) __A)[0], 0 }; + __v2di __N128 = __extension__ (__v2di) { ((__v1di) __N)[0], 0 }; + __SIZE_TYPE__ offset = ((__SIZE_TYPE__) __P) & 0xf; + if (offset) + { + if (offset > 8) + offset = 8; + __P = (char *) (((__SIZE_TYPE__) __P) - offset); + switch (offset) + { + case 1: + __A128 = __builtin_ia32_pslldqi128 (__A128, 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 8); + break; + case 2: + __A128 = __builtin_ia32_pslldqi128 (__A128, 2 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 2 * 8); + break; + case 3: + __A128 = __builtin_ia32_pslldqi128 (__A128, 3 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 3 * 8); + break; + case 4: + __A128 = __builtin_ia32_pslldqi128 (__A128, 4 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 4 * 8); + break; + case 5: + __A128 = __builtin_ia32_pslldqi128 (__A128, 5 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 5 * 8); + break; + case 6: + __A128 = __builtin_ia32_pslldqi128 (__A128, 6 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 6 * 8); + break; + case 7: + __A128 = __builtin_ia32_pslldqi128 (__A128, 7 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 7 * 8); + break; + case 8: + __A128 = __builtin_ia32_pslldqi128 (__A128, 8 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 8 * 8); + break; + default: + break; + } } - } - __builtin_ia32_maskmovdqu((__v16qi)__A128, (__v16qi)__N128, __P); + __builtin_ia32_maskmovdqu ((__v16qi)__A128, (__v16qi)__N128, __P); #else - __builtin_ia32_maskmovq((__v8qi)__A, (__v8qi)__N, __P); + __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P); #endif } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_maskmovq(__m64 __A, __m64 __N, char *__P) { - _mm_maskmove_si64(__A, __N, __P); +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_maskmovq (__m64 __A, __m64 __N, char *__P) +{ + _mm_maskmove_si64 (__A, __N, __P); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_avg_pu8(__m64 __A, __m64 __B) { - return (__m64)__builtin_ia32_pavgb((__v8qi)__A, (__v8qi)__B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_avg_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_pavgb(__m64 __A, __m64 __B) { - return _mm_avg_pu8(__A, __B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pavgb (__m64 __A, __m64 __B) +{ + return _mm_avg_pu8 (__A, __B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_avg_pu16(__m64 __A, __m64 __B) { - return (__m64)__builtin_ia32_pavgw((__v4hi)__A, (__v4hi)__B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_avg_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_pavgw(__m64 __A, __m64 __B) { - return _mm_avg_pu16(__A, __B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pavgw (__m64 __A, __m64 __B) +{ + return _mm_avg_pu16 (__A, __B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_sad_pu8(__m64 __A, __m64 __B) { - return (__m64)__builtin_ia32_psadbw((__v8qi)__A, (__v8qi)__B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sad_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B); } -extern __inline __m64 - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_psadbw(__m64 __A, __m64 __B) { - return _mm_sad_pu8(__A, __B); +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_psadbw (__m64 __A, __m64 __B) +{ + return _mm_sad_pu8 (__A, __B); } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_stream_pi(__m64 *__P, __m64 __A) { - __builtin_ia32_movntq((unsigned long long *)__P, (unsigned long long)__A); +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_stream_pi (__m64 *__P, __m64 __A) +{ + __builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A); } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_stream_ps(float *__P, __m128 __A) { - __builtin_ia32_movntps(__P, (__v4sf)__A); +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_stream_ps (float *__P, __m128 __A) +{ + __builtin_ia32_movntps (__P, (__v4sf)__A); } -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_sfence(void) { - __builtin_ia32_sfence(); +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sfence (void) +{ + __builtin_ia32_sfence (); } -#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \ - __v4sf __t0 = __builtin_ia32_unpcklps(__r0, __r1); \ - __v4sf __t1 = __builtin_ia32_unpcklps(__r2, __r3); \ - __v4sf __t2 = __builtin_ia32_unpckhps(__r0, __r1); \ - __v4sf __t3 = __builtin_ia32_unpckhps(__r2, __r3); \ - (row0) = __builtin_ia32_movlhps(__t0, __t1); \ - (row1) = __builtin_ia32_movhlps(__t1, __t0); \ - (row2) = __builtin_ia32_movlhps(__t2, __t3); \ - (row3) = __builtin_ia32_movhlps(__t3, __t2); \ - } while (0) +#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) do { __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1); __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3); __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1); __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3); (row0) = __builtin_ia32_movlhps (__t0, __t1); (row1) = __builtin_ia32_movhlps (__t1, __t0); (row2) = __builtin_ia32_movlhps (__t2, __t3); (row3) = __builtin_ia32_movhlps (__t3, __t2); } while (0) #include "third_party/intel/emmintrin.internal.h" #ifdef __DISABLE_SSE__ #undef __DISABLE_SSE__ #pragma GCC pop_options #endif -extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _mm_pause(void) { - __builtin_ia32_pause(); +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_pause (void) +{ + __builtin_ia32_pause (); } #endif #endif diff --git a/third_party/less/main.c b/third_party/less/main.c index 21ed227ca..529fcb457 100644 --- a/third_party/less/main.c +++ b/third_party/less/main.c @@ -1,27 +1,27 @@ -asm(".ident\t\"\\n\\n\ -Less\\n\ -Copyright (C) 1984-2023 Mark Nudelman\\n\ -\\n\ -Redistribution and use in source and binary forms, with or without\\n\ -modification, are permitted provided that the following conditions\\n\ -are met:\\n\ -1. Redistributions of source code must retain the above copyright\\n\ - notice, this list of conditions and the following disclaimer.\\n\ -2. Redistributions in binary form must reproduce the above copyright\\n\ - notice in the documentation and/or other materials provided with\\n\ - the distribution.\\n\ -\\n\ -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY\\n\ -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\\n\ -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\\n\ -PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE\\n\ -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\\n\ -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT\\n\ -OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR\\n\ -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\\n\ -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE\\n\ -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN\\n\ -IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\""); +__notice(less_notice, "\ +Less\n\ +Copyright (C) 1984-2023 Mark Nudelman\n\ +\n\ +Redistribution and use in source and binary forms, with or without\n\ +modification, are permitted provided that the following conditions\n\ +are met:\n\ +1. Redistributions of source code must retain the above copyright\n\ + notice, this list of conditions and the following disclaimer.\n\ +2. Redistributions in binary form must reproduce the above copyright\n\ + notice in the documentation and/or other materials provided with\n\ + the distribution.\n\ +\n\ +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY\n\ +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n\ +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n\ +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE\n\ +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n\ +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT\n\ +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR\n\ +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n\ +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE\n\ +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN\n\ +IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."); /* * Copyright (C) 1984-2023 Mark Nudelman diff --git a/third_party/libcxx/vector b/third_party/libcxx/vector index 014c8a51d..91735b728 100644 --- a/third_party/libcxx/vector +++ b/third_party/libcxx/vector @@ -1456,6 +1456,8 @@ vector<_Tp, _Allocator>::assign(_ForwardIterator __first, _ForwardIterator __las else { __vdeallocate(); + // [jart] make compiler not complain about maximum object size + __asm__("" : "+r"(__new_size)); __vallocate(__recommend(__new_size)); __construct_at_end(__first, __last, __new_size); } diff --git a/third_party/linenoise/linenoise.c b/third_party/linenoise/linenoise.c index c6386b2ba..cf673ba1f 100644 --- a/third_party/linenoise/linenoise.c +++ b/third_party/linenoise/linenoise.c @@ -172,11 +172,11 @@ #include "net/http/escape.h" #include "tool/build/lib/case.h" -asm(".ident\t\"\\n\\n\ -Cosmopolitan Linenoise (BSD-2)\\n\ -Copyright 2018-2020 Justine Tunney \\n\ -Copyright 2010-2016 Salvatore Sanfilippo \\n\ -Copyright 2010-2013 Pieter Noordhuis \""); +__notice(linenoise_notice, "\ +Cosmopolitan Linenoise (BSD-2)\n\ +Copyright 2018-2020 Justine Tunney \n\ +Copyright 2010-2016 Salvatore Sanfilippo \n\ +Copyright 2010-2013 Pieter Noordhuis "); #define LINENOISE_POLL_MS 50 @@ -2644,10 +2644,7 @@ static void linenoiseAtExit(void) { linenoiseRingFree(); } +__attribute__((__constructor__(99))) static textstartup void linenoiseInit() { atexit(linenoiseAtExit); } - -const void *const linenoiseCtor[] initarray = { - linenoiseInit, -}; diff --git a/third_party/lua/BUILD.mk b/third_party/lua/BUILD.mk index 60a73e960..bd62923b2 100644 --- a/third_party/lua/BUILD.mk +++ b/third_party/lua/BUILD.mk @@ -85,6 +85,7 @@ THIRD_PARTY_LUA_A_SRCS = \ third_party/lua/llock.c \ third_party/lua/lmathlib.c \ third_party/lua/lmem.c \ + third_party/lua/lnotice.c \ third_party/lua/loadlib.c \ third_party/lua/lobject.c \ third_party/lua/lopcodes.c \ diff --git a/third_party/lua/lapi.c b/third_party/lua/lapi.c index 15230810a..ae8988f2c 100644 --- a/third_party/lua/lapi.c +++ b/third_party/lua/lapi.c @@ -42,11 +42,7 @@ #include "third_party/lua/lua.h" #include "third_party/lua/lundump.h" #include "third_party/lua/lvm.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); const char lua_ident[] = diff --git a/third_party/lua/lauxlib.c b/third_party/lua/lauxlib.c index a1555ba18..1fa74d1f2 100644 --- a/third_party/lua/lauxlib.c +++ b/third_party/lua/lauxlib.c @@ -34,11 +34,7 @@ #include "third_party/lua/lauxlib.h" #include "third_party/lua/lprefix.h" #include "third_party/lua/lua.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /** diff --git a/third_party/lua/lbaselib.c b/third_party/lua/lbaselib.c index f5ab58b16..5e936c406 100644 --- a/third_party/lua/lbaselib.c +++ b/third_party/lua/lbaselib.c @@ -32,11 +32,7 @@ #include "third_party/lua/lprefix.h" #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); static int luaB_print (lua_State *L) { diff --git a/third_party/lua/lcode.c b/third_party/lua/lcode.c index 6468d2243..c6c9c0a2b 100644 --- a/third_party/lua/lcode.c +++ b/third_party/lua/lcode.c @@ -42,11 +42,7 @@ #include "third_party/lua/ltable.h" #include "third_party/lua/lua.h" #include "third_party/lua/lvm.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* Maximum number of registers in a Lua function (must fit in 8 bits) */ diff --git a/third_party/lua/lcorolib.c b/third_party/lua/lcorolib.c index e15b3e749..be6f75f5d 100644 --- a/third_party/lua/lcorolib.c +++ b/third_party/lua/lcorolib.c @@ -31,11 +31,7 @@ #include "third_party/lua/lprefix.h" #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); static lua_State *getco (lua_State *L) { diff --git a/third_party/lua/ldblib.c b/third_party/lua/ldblib.c index 8918ffcbe..74f6ed32b 100644 --- a/third_party/lua/ldblib.c +++ b/third_party/lua/ldblib.c @@ -32,11 +32,7 @@ #include "third_party/lua/lprefix.h" #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* diff --git a/third_party/lua/ldebug.c b/third_party/lua/ldebug.c index 16197115f..8e103095f 100644 --- a/third_party/lua/ldebug.c +++ b/third_party/lua/ldebug.c @@ -42,11 +42,7 @@ #include "third_party/lua/ltm.h" #include "third_party/lua/lua.h" #include "third_party/lua/lvm.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); #define noLuaClosure(f) ((f) == NULL || (f)->c.tt == LUA_VCCL) diff --git a/third_party/lua/ldo.c b/third_party/lua/ldo.c index 1978b1ff4..abfa70719 100644 --- a/third_party/lua/ldo.c +++ b/third_party/lua/ldo.c @@ -51,11 +51,7 @@ #include "third_party/lua/lundump.h" #include "third_party/lua/lvm.h" #include "third_party/lua/lzio.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); #define errorstatus(s) ((s) > LUA_YIELD) diff --git a/third_party/lua/ldump.c b/third_party/lua/ldump.c index 8e4d6b7c0..a1e9666b1 100644 --- a/third_party/lua/ldump.c +++ b/third_party/lua/ldump.c @@ -32,11 +32,7 @@ #include "third_party/lua/lstate.h" #include "third_party/lua/lua.h" #include "third_party/lua/lundump.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); typedef struct { diff --git a/third_party/lua/lfunc.c b/third_party/lua/lfunc.c index b3e69eef3..7ff7cafd0 100644 --- a/third_party/lua/lfunc.c +++ b/third_party/lua/lfunc.c @@ -37,11 +37,7 @@ #include "third_party/lua/lstate.h" #include "third_party/lua/ltm.h" #include "third_party/lua/lua.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); CClosure *luaF_newCclosure (lua_State *L, int nupvals) { diff --git a/third_party/lua/lgc.c b/third_party/lua/lgc.c index 57027bb3b..a52142624 100644 --- a/third_party/lua/lgc.c +++ b/third_party/lua/lgc.c @@ -40,11 +40,7 @@ #include "third_party/lua/ltable.h" #include "third_party/lua/ltm.h" #include "third_party/lua/lua.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* diff --git a/third_party/lua/linit.c b/third_party/lua/linit.c index e625cc728..2ebb7fbd0 100644 --- a/third_party/lua/linit.c +++ b/third_party/lua/linit.c @@ -46,11 +46,7 @@ #include "third_party/lua/lprefix.h" #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* diff --git a/third_party/lua/liolib.c b/third_party/lua/liolib.c index 30cc43815..c8bc69076 100644 --- a/third_party/lua/liolib.c +++ b/third_party/lua/liolib.c @@ -37,11 +37,7 @@ #include "third_party/lua/lprefix.h" #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* diff --git a/third_party/lua/llex.c b/third_party/lua/llex.c index 3e1446b7f..688d2a3b0 100644 --- a/third_party/lua/llex.c +++ b/third_party/lua/llex.c @@ -40,11 +40,7 @@ #include "third_party/lua/ltable.h" #include "third_party/lua/lua.h" #include "third_party/lua/lzio.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); #define next(ls) (ls->current = zgetc(ls->z)) diff --git a/third_party/lua/lmathlib.c b/third_party/lua/lmathlib.c index b0968ed16..8b1a291f4 100644 --- a/third_party/lua/lmathlib.c +++ b/third_party/lua/lmathlib.c @@ -34,11 +34,7 @@ #include "third_party/lua/lprefix.h" #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); #undef PI diff --git a/third_party/lua/lmem.c b/third_party/lua/lmem.c index 7d0ee1648..780e01bf8 100644 --- a/third_party/lua/lmem.c +++ b/third_party/lua/lmem.c @@ -36,11 +36,7 @@ #include "third_party/lua/lprefix.h" #include "third_party/lua/lstate.h" #include "third_party/lua/lua.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); #if defined(EMERGENCYGCTESTS) diff --git a/third_party/lua/lnotice.c b/third_party/lua/lnotice.c new file mode 100644 index 000000000..9eb4e4e17 --- /dev/null +++ b/third_party/lua/lnotice.c @@ -0,0 +1,3 @@ +__notice(lua_notice, "\ +Lua 5.4.3 (MIT License)\n\ +Copyright 1994–2021 Lua.org, PUC-Rio."); diff --git a/third_party/lua/loadlib.c b/third_party/lua/loadlib.c index 0feae4d7f..43bf0168d 100644 --- a/third_party/lua/loadlib.c +++ b/third_party/lua/loadlib.c @@ -34,11 +34,7 @@ #include "third_party/lua/lprefix.h" #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* ** This module contains an implementation of loadlib for Unix systems diff --git a/third_party/lua/lobject.c b/third_party/lua/lobject.c index 24e03a9d6..d855ac166 100644 --- a/third_party/lua/lobject.c +++ b/third_party/lua/lobject.c @@ -38,11 +38,7 @@ #include "third_party/lua/lstring.h" #include "third_party/lua/lua.h" #include "third_party/lua/lvm.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); static lua_Integer intarith (lua_State *L, int op, lua_Integer v1, diff --git a/third_party/lua/lopcodes.c b/third_party/lua/lopcodes.c index 75558af3a..90d726553 100644 --- a/third_party/lua/lopcodes.c +++ b/third_party/lua/lopcodes.c @@ -29,11 +29,7 @@ #define LUA_CORE #include "third_party/lua/lopcodes.h" #include "third_party/lua/lprefix.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* ORDER OP */ diff --git a/third_party/lua/loslib.c b/third_party/lua/loslib.c index 647e473b9..4f979c59f 100644 --- a/third_party/lua/loslib.c +++ b/third_party/lua/loslib.c @@ -41,11 +41,7 @@ #include "third_party/lua/lprefix.h" #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* diff --git a/third_party/lua/lparser.c b/third_party/lua/lparser.c index 96f89293e..79e598fc1 100644 --- a/third_party/lua/lparser.c +++ b/third_party/lua/lparser.c @@ -42,11 +42,7 @@ #include "third_party/lua/lstring.h" #include "third_party/lua/ltable.h" #include "third_party/lua/lua.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* maximum number of local variables per function (must be smaller diff --git a/third_party/lua/lrepl.c b/third_party/lua/lrepl.c index 9a4236ac5..ec65fd069 100644 --- a/third_party/lua/lrepl.c +++ b/third_party/lua/lrepl.c @@ -48,11 +48,7 @@ #include "third_party/lua/lprefix.h" #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); static const char *const kKeywordHints[] = { diff --git a/third_party/lua/lstate.c b/third_party/lua/lstate.c index f2479b38c..e44a5cf31 100644 --- a/third_party/lua/lstate.c +++ b/third_party/lua/lstate.c @@ -42,11 +42,7 @@ #include "third_party/lua/ltable.h" #include "third_party/lua/ltm.h" #include "third_party/lua/lua.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* diff --git a/third_party/lua/lstring.c b/third_party/lua/lstring.c index 9aa128242..68efe07a0 100644 --- a/third_party/lua/lstring.c +++ b/third_party/lua/lstring.c @@ -36,11 +36,7 @@ #include "third_party/lua/lstate.h" #include "third_party/lua/lstring.h" #include "third_party/lua/lua.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* diff --git a/third_party/lua/lstrlib.c b/third_party/lua/lstrlib.c index 45aecbdbc..60db68c4a 100644 --- a/third_party/lua/lstrlib.c +++ b/third_party/lua/lstrlib.c @@ -34,11 +34,7 @@ #include "third_party/lua/lprefix.h" #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* diff --git a/third_party/lua/ltable.c b/third_party/lua/ltable.c index a00128613..63f2867f8 100644 --- a/third_party/lua/ltable.c +++ b/third_party/lua/ltable.c @@ -38,11 +38,7 @@ #include "third_party/lua/ltable.h" #include "third_party/lua/lua.h" #include "third_party/lua/lvm.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* diff --git a/third_party/lua/ltablib.c b/third_party/lua/ltablib.c index b7a524445..fe8a1fe62 100644 --- a/third_party/lua/ltablib.c +++ b/third_party/lua/ltablib.c @@ -34,11 +34,7 @@ #include "third_party/lua/lprefix.h" #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* diff --git a/third_party/lua/ltests.c b/third_party/lua/ltests.c index b96967efb..10f18d0dc 100644 --- a/third_party/lua/ltests.c +++ b/third_party/lua/ltests.c @@ -43,11 +43,7 @@ #include "third_party/lua/ltable.h" #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* diff --git a/third_party/lua/ltm.c b/third_party/lua/ltm.c index 59b9b8dd4..b7a0d3914 100644 --- a/third_party/lua/ltm.c +++ b/third_party/lua/ltm.c @@ -38,11 +38,7 @@ #include "third_party/lua/ltm.h" #include "third_party/lua/lua.h" #include "third_party/lua/lvm.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); static const char udatatypename[] = "userdata"; diff --git a/third_party/lua/lua.main.c b/third_party/lua/lua.main.c index f7155e185..a49b7ae58 100644 --- a/third_party/lua/lua.main.c +++ b/third_party/lua/lua.main.c @@ -50,11 +50,7 @@ #include "third_party/lua/lualib.h" #include "third_party/lua/lunix.h" #include "tool/args/args.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); STATIC_STACK_ALIGN(GetStackSize()); diff --git a/third_party/lua/luac.main.c b/third_party/lua/luac.main.c index c5765d888..8b22aa237 100644 --- a/third_party/lua/luac.main.c +++ b/third_party/lua/luac.main.c @@ -43,11 +43,7 @@ #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" #include "third_party/lua/lundump.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); static void PrintFunction(const Proto* f, int full); #define luaU_print PrintFunction diff --git a/third_party/lua/lvm.c b/third_party/lua/lvm.c index feaaf9cb4..36414ef32 100644 --- a/third_party/lua/lvm.c +++ b/third_party/lua/lvm.c @@ -41,11 +41,7 @@ #include "third_party/lua/ltm.h" #include "third_party/lua/lua.h" #include "third_party/lua/lvm.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); /* diff --git a/third_party/lua/lzio.c b/third_party/lua/lzio.c index da9e2f6c9..3c674495f 100644 --- a/third_party/lua/lzio.c +++ b/third_party/lua/lzio.c @@ -34,11 +34,7 @@ #include "third_party/lua/lstate.h" #include "third_party/lua/lua.h" #include "third_party/lua/lzio.h" - -asm(".ident\t\"\\n\\n\ -Lua 5.4.3 (MIT License)\\n\ -Copyright 1994–2021 Lua.org, PUC-Rio.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("lua_notice"); int luaZ_fill (ZIO *z) { diff --git a/third_party/lz4cli/BUILD.mk b/third_party/lz4cli/BUILD.mk index b1e6daf70..2440066db 100644 --- a/third_party/lz4cli/BUILD.mk +++ b/third_party/lz4cli/BUILD.mk @@ -58,9 +58,6 @@ o/$(MODE)/third_party/lz4cli/lz4cli.com.dbg: \ $(APE_NO_MODIFY_SELF) @$(APELINK) -o/$(MODE)/third_party/lz4cli/lz4cli.o: \ - third_party/lz4cli/COPYING - $(THIRD_PARTY_LZ4CLI_OBJS): \ $(BUILD_FILES) \ third_party/lz4cli/BUILD.mk diff --git a/third_party/lz4cli/COPYING b/third_party/lz4cli/COPYING index e5e0fa31d..c86990a2b 100644 --- a/third_party/lz4cli/COPYING +++ b/third_party/lz4cli/COPYING @@ -1,4 +1,3 @@ -.ident "\n LZ4cli - LZ4 Command Line Interface (GPL v2) Copyright (C) Yann Collet 2011-2016 @@ -18,4 +17,4 @@ with this program; if not, write to the Free Software Foundation, Inc., You can contact the author at : - LZ4 source repository : https://github.com/lz4/lz4 -- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c" +- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c diff --git a/third_party/lz4cli/lz4cli.c b/third_party/lz4cli/lz4cli.c index a2e5ecc57..55e327faa 100644 --- a/third_party/lz4cli/lz4cli.c +++ b/third_party/lz4cli/lz4cli.c @@ -30,7 +30,27 @@ The license of this compression CLI program is GPLv2. */ -asm(".include \"third_party/lz4cli/COPYING\""); +__notice(lz4cli_notice, "\ +LZ4cli - LZ4 Command Line Interface (GPL v2)\n\ +Copyright (C) Yann Collet 2011-2016\n\ +\n\ +This program is free software; you can redistribute it and/or modify\n\ +it under the terms of the GNU General Public License as published by\n\ +the Free Software Foundation; either version 2 of the License, or\n\ +(at your option) any later version.\n\ +\n\ +This program is distributed in the hope that it will be useful,\n\ +but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ +GNU General Public License for more details.\n\ +\n\ +You should have received a copy of the GNU General Public License along\n\ +with this program; if not, write to the Free Software Foundation, Inc.,\n\ +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n\ +\n\ +You can contact the author at :\n\ +- LZ4 source repository : https://github.com/lz4/lz4\n\ +- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c"); /**************************** * Includes diff --git a/third_party/maxmind/maxminddb.c b/third_party/maxmind/maxminddb.c index a3d176253..a51ecc495 100644 --- a/third_party/maxmind/maxminddb.c +++ b/third_party/maxmind/maxminddb.c @@ -37,10 +37,9 @@ #include "libc/sysv/consts/sock.h" #include "tool/build/lib/case.h" -asm(".ident\t\"\\n\\n\ -libmaxminddb (Apache 2.0)\\n\ -Copyright 2013-2021 MaxMind Incorporated\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(libmaxminddb_notice, "\ +libmaxminddb (Apache 2.0)\n\ +Copyright 2013-2021 MaxMind Incorporated"); #define METADATA_MARKER "\xab\xcd\xefMaxMind.com" #define METADATA_BLOCK_MAX_SIZE 131072 /* This is 128kb */ diff --git a/third_party/mbedtls/aes.c b/third_party/mbedtls/aes.c index a36aa52e5..6af04dd7c 100644 --- a/third_party/mbedtls/aes.c +++ b/third_party/mbedtls/aes.c @@ -27,12 +27,7 @@ #include "third_party/mbedtls/common.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview FIPS-197 compliant AES implementation diff --git a/third_party/mbedtls/aesce.c b/third_party/mbedtls/aesce.c index 2866769f5..8ede1f1b8 100644 --- a/third_party/mbedtls/aesce.c +++ b/third_party/mbedtls/aesce.c @@ -18,12 +18,7 @@ #include "third_party/mbedtls/aesce.h" #include "libc/str/str.h" #include "third_party/aarch64/arm_neon.internal.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * Armv8-A Cryptographic Extension support functions for Aarch64 diff --git a/third_party/mbedtls/aesni.c b/third_party/mbedtls/aesni.c index 13a913771..1babe33a7 100644 --- a/third_party/mbedtls/aesni.c +++ b/third_party/mbedtls/aesni.c @@ -19,12 +19,7 @@ #include "libc/serialize.h" #include "libc/str/str.h" #include "third_party/mbedtls/common.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * [AES-WP] http://software.intel.com/en-us/articles/intel-advanced-encryption-standard-aes-instructions-set diff --git a/third_party/mbedtls/asn1parse.c b/third_party/mbedtls/asn1parse.c index 8e6458fd1..aa419b6f4 100644 --- a/third_party/mbedtls/asn1parse.c +++ b/third_party/mbedtls/asn1parse.c @@ -21,12 +21,7 @@ #include "third_party/mbedtls/common.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); #if defined(MBEDTLS_ASN1_PARSE_C) diff --git a/third_party/mbedtls/asn1write.c b/third_party/mbedtls/asn1write.c index 5196bd333..1e968fea3 100644 --- a/third_party/mbedtls/asn1write.c +++ b/third_party/mbedtls/asn1write.c @@ -20,12 +20,7 @@ #include "third_party/mbedtls/common.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview ASN.1 buffer writing functionality diff --git a/third_party/mbedtls/base64.c b/third_party/mbedtls/base64.c index 56f5182c5..c46fcc1de 100644 --- a/third_party/mbedtls/base64.c +++ b/third_party/mbedtls/base64.c @@ -19,12 +19,7 @@ #include "libc/str/str.h" #include "third_party/mbedtls/common.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); #define ENC "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" diff --git a/third_party/mbedtls/bigmul4.c b/third_party/mbedtls/bigmul4.c index 686828503..89fbd362d 100644 --- a/third_party/mbedtls/bigmul4.c +++ b/third_party/mbedtls/bigmul4.c @@ -30,13 +30,11 @@ */ void (*Mul4x4)(uint64_t C[16], const uint64_t A[8], const uint64_t B[8]); -static textstartup void Mul4x4Init() +__attribute__((__constructor__(10))) static textstartup void Mul4x4Init() { Mul4x4 = X86_HAVE(ADX) && X86_HAVE(BMI2) ? Mul4x4Adx : Mul4x4Pure; } -const void *const Mul4x4Ctor[] initarray = {Mul4x4Init}; - void Mul4x4Pure(uint64_t C[16], const uint64_t A[8], const uint64_t B[8]) { uint128_t t; diff --git a/third_party/mbedtls/bignum.c b/third_party/mbedtls/bignum.c index a5695e4fa..96f0eb1a1 100644 --- a/third_party/mbedtls/bignum.c +++ b/third_party/mbedtls/bignum.c @@ -33,12 +33,7 @@ #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/profile.h" #include "third_party/mbedtls/select.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview Big Numbers. diff --git a/third_party/mbedtls/ccm.c b/third_party/mbedtls/ccm.c index bbe5fe76c..747c313f1 100644 --- a/third_party/mbedtls/ccm.c +++ b/third_party/mbedtls/ccm.c @@ -20,12 +20,7 @@ #include "third_party/mbedtls/common.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview NIST SP800-38C compliant CCM implementation diff --git a/third_party/mbedtls/certs.c b/third_party/mbedtls/certs.c index 3170df134..a946ad5a2 100644 --- a/third_party/mbedtls/certs.c +++ b/third_party/mbedtls/certs.c @@ -17,12 +17,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/mbedtls/certs.h" #include "third_party/mbedtls/common.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); #if defined(MBEDTLS_CERTS_C) diff --git a/third_party/mbedtls/chacha20.c b/third_party/mbedtls/chacha20.c index 12e3b052e..c7600c24e 100644 --- a/third_party/mbedtls/chacha20.c +++ b/third_party/mbedtls/chacha20.c @@ -22,12 +22,7 @@ #include "third_party/mbedtls/common.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* Parameter validation macros */ #define CHACHA20_VALIDATE_RET( cond ) \ diff --git a/third_party/mbedtls/chachapoly.c b/third_party/mbedtls/chachapoly.c index 84dc8fcd8..090b7ab4e 100644 --- a/third_party/mbedtls/chachapoly.c +++ b/third_party/mbedtls/chachapoly.c @@ -21,12 +21,7 @@ #include "third_party/mbedtls/common.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview ChaCha20-Poly1305 AEAD construction based on RFC 7539. diff --git a/third_party/mbedtls/cipher.c b/third_party/mbedtls/cipher.c index a031b054f..2390f1983 100644 --- a/third_party/mbedtls/cipher.c +++ b/third_party/mbedtls/cipher.c @@ -26,12 +26,7 @@ #include "third_party/mbedtls/gcm.h" #include "third_party/mbedtls/nist_kw.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); #if defined(MBEDTLS_CIPHER_C) diff --git a/third_party/mbedtls/cipher_wrap.c b/third_party/mbedtls/cipher_wrap.c index 176cca76f..c7926079f 100644 --- a/third_party/mbedtls/cipher_wrap.c +++ b/third_party/mbedtls/cipher_wrap.c @@ -27,12 +27,7 @@ #include "third_party/mbedtls/gcm.h" #include "third_party/mbedtls/nist_kw.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); #if defined(MBEDTLS_CIPHER_C) diff --git a/third_party/mbedtls/ctr_drbg.c b/third_party/mbedtls/ctr_drbg.c index 052c741e1..01ab3b2c9 100644 --- a/third_party/mbedtls/ctr_drbg.c +++ b/third_party/mbedtls/ctr_drbg.c @@ -21,12 +21,7 @@ #include "third_party/mbedtls/ctr_drbg.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview CTR_DRBG implementation based on AES-256 (NIST SP 800-90) diff --git a/third_party/mbedtls/debug.c b/third_party/mbedtls/debug.c index 462d986a4..27b395b10 100644 --- a/third_party/mbedtls/debug.c +++ b/third_party/mbedtls/debug.c @@ -20,12 +20,7 @@ #include "third_party/mbedtls/debug.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); char mbedtls_debug_threshold; diff --git a/third_party/mbedtls/des.c b/third_party/mbedtls/des.c index 1e412fdfc..33dc7f5bb 100644 --- a/third_party/mbedtls/des.c +++ b/third_party/mbedtls/des.c @@ -22,12 +22,7 @@ #include "third_party/mbedtls/common.h" #include "third_party/mbedtls/endian.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview FIPS-46-3 compliant Triple-DES implementation diff --git a/third_party/mbedtls/dhm.c b/third_party/mbedtls/dhm.c index 4b9652b0c..222b2f1aa 100644 --- a/third_party/mbedtls/dhm.c +++ b/third_party/mbedtls/dhm.c @@ -24,12 +24,7 @@ #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/pem.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview Diffie-Hellman-Merkle key exchange diff --git a/third_party/mbedtls/ecdh.c b/third_party/mbedtls/ecdh.c index a0d60e41c..76114dae9 100644 --- a/third_party/mbedtls/ecdh.c +++ b/third_party/mbedtls/ecdh.c @@ -19,12 +19,7 @@ #include "third_party/mbedtls/ecdh.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview Elliptic curve Diffie-Hellman diff --git a/third_party/mbedtls/ecdh_everest.c b/third_party/mbedtls/ecdh_everest.c index 29034ce02..f0f865121 100644 --- a/third_party/mbedtls/ecdh_everest.c +++ b/third_party/mbedtls/ecdh_everest.c @@ -20,12 +20,7 @@ #include "third_party/mbedtls/everest.h" #if defined(MBEDTLS_ECDH_C) && defined(MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED) #define KEYSIZE 32 - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * \brief This function sets up the ECDH context with the information diff --git a/third_party/mbedtls/ecdsa.c b/third_party/mbedtls/ecdsa.c index 0317da903..0d2528f25 100644 --- a/third_party/mbedtls/ecdsa.c +++ b/third_party/mbedtls/ecdsa.c @@ -23,12 +23,7 @@ #include "third_party/mbedtls/hmac_drbg.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/profile.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview Elliptic curve Digital Signature Algorithm diff --git a/third_party/mbedtls/ecp.c b/third_party/mbedtls/ecp.c index 3ac9211f7..4b696ef83 100644 --- a/third_party/mbedtls/ecp.c +++ b/third_party/mbedtls/ecp.c @@ -32,12 +32,7 @@ #include "third_party/mbedtls/hmac_drbg.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/profile.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview Elliptic curves over GF(p): generic functions diff --git a/third_party/mbedtls/ecp_curves.c b/third_party/mbedtls/ecp_curves.c index 06bef587a..7ab258e49 100644 --- a/third_party/mbedtls/ecp_curves.c +++ b/third_party/mbedtls/ecp_curves.c @@ -20,12 +20,7 @@ #include "third_party/mbedtls/ecp.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * Elliptic curves over GF(p): curve-specific data and functions diff --git a/third_party/mbedtls/entropy.c b/third_party/mbedtls/entropy.c index 32f8658d7..5edc6f7af 100644 --- a/third_party/mbedtls/entropy.c +++ b/third_party/mbedtls/entropy.c @@ -23,12 +23,7 @@ #include "third_party/mbedtls/entropy_poll.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * Entropy accumulator implementation diff --git a/third_party/mbedtls/error.c b/third_party/mbedtls/error.c index b53082847..b41282740 100644 --- a/third_party/mbedtls/error.c +++ b/third_party/mbedtls/error.c @@ -48,12 +48,7 @@ #include "third_party/mbedtls/sha512.h" #include "third_party/mbedtls/ssl.h" #include "third_party/mbedtls/x509.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * Error message information diff --git a/third_party/mbedtls/everest.c b/third_party/mbedtls/everest.c index 75208b8d1..a4af39253 100644 --- a/third_party/mbedtls/everest.c +++ b/third_party/mbedtls/everest.c @@ -18,10 +18,11 @@ #include "libc/serialize.h" #include "third_party/mbedtls/endian.h" -asm(".ident\t\"\\n\\n\ -Everest (Apache 2.0)\\n\ -Copyright 2016-2018 INRIA and Microsoft Corporation\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(cosmo_everest_notice, "\ +Cosmopolitan Everest (Apache 2.0)\n\ +Copyright 2024 Justine Alexndra Roberts Tunney\n\ +Copyright 2016-2018 INRIA and Microsoft Corporation\n\ +Changes: Made C code look nice and not have pointers"); #define DW(x) (uint128_t)(x) #define EQ(x, y) ((((x ^ y) | (~(x ^ y) + 1)) >> 63) - 1) diff --git a/third_party/mbedtls/gcm.c b/third_party/mbedtls/gcm.c index 61df3d1cd..5f330eaca 100644 --- a/third_party/mbedtls/gcm.c +++ b/third_party/mbedtls/gcm.c @@ -29,12 +29,7 @@ #include "third_party/mbedtls/endian.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * NIST SP800-38D compliant GCM implementation diff --git a/third_party/mbedtls/hkdf.c b/third_party/mbedtls/hkdf.c index f0369c159..d5b76ea40 100644 --- a/third_party/mbedtls/hkdf.c +++ b/third_party/mbedtls/hkdf.c @@ -20,12 +20,7 @@ #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/hkdf.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview HKDF implementation (RFC 5869) diff --git a/third_party/mbedtls/hmac_drbg.c b/third_party/mbedtls/hmac_drbg.c index a2b3c531e..fa5284d1d 100644 --- a/third_party/mbedtls/hmac_drbg.c +++ b/third_party/mbedtls/hmac_drbg.c @@ -21,12 +21,7 @@ #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/hmac_drbg.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * HMAC_DRBG implementation (NIST SP 800-90) diff --git a/third_party/mbedtls/md.c b/third_party/mbedtls/md.c index 8942051a2..82f371cea 100644 --- a/third_party/mbedtls/md.c +++ b/third_party/mbedtls/md.c @@ -27,12 +27,7 @@ #include "third_party/mbedtls/sha1.h" #include "third_party/mbedtls/sha256.h" #include "third_party/mbedtls/sha512.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * \file md.c diff --git a/third_party/mbedtls/md5.c b/third_party/mbedtls/md5.c index 10deb3367..71674bbe8 100644 --- a/third_party/mbedtls/md5.c +++ b/third_party/mbedtls/md5.c @@ -22,12 +22,7 @@ #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/md.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * RFC 1321 compliant MD5 implementation diff --git a/third_party/mbedtls/md5t.c b/third_party/mbedtls/md5t.c index 4d050ddf9..1955eac68 100644 --- a/third_party/mbedtls/md5t.c +++ b/third_party/mbedtls/md5t.c @@ -18,12 +18,7 @@ #include "libc/str/str.h" #include "third_party/mbedtls/md5.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * RFC 1321 test vectors diff --git a/third_party/mbedtls/memory_buffer_alloc.c b/third_party/mbedtls/memory_buffer_alloc.c index b3cc7d128..283b92d0a 100644 --- a/third_party/mbedtls/memory_buffer_alloc.c +++ b/third_party/mbedtls/memory_buffer_alloc.c @@ -18,12 +18,7 @@ #include "third_party/mbedtls/common.h" #include "third_party/mbedtls/memory_buffer_alloc.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * Buffer-based memory allocator diff --git a/third_party/mbedtls/nist_kw.c b/third_party/mbedtls/nist_kw.c index 4523b6192..a95956301 100644 --- a/third_party/mbedtls/nist_kw.c +++ b/third_party/mbedtls/nist_kw.c @@ -21,12 +21,7 @@ #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/nist_kw.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * Implementation of NIST SP 800-38F key wrapping, supporting KW and KWP modes diff --git a/third_party/mbedtls/notice.c b/third_party/mbedtls/notice.c new file mode 100644 index 000000000..efae1a05e --- /dev/null +++ b/third_party/mbedtls/notice.c @@ -0,0 +1,4 @@ +__notice(mbedtls_notice, "\ +Mbed TLS (Apache 2.0)\n\ +Copyright ARM Limited\n\ +Copyright The Mbed TLS Contributors"); diff --git a/third_party/mbedtls/oid.c b/third_party/mbedtls/oid.c index 88083c7ec..837e909b9 100644 --- a/third_party/mbedtls/oid.c +++ b/third_party/mbedtls/oid.c @@ -21,12 +21,7 @@ #include "third_party/mbedtls/oid.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/rsa.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview Object Identifier (OID) database diff --git a/third_party/mbedtls/pem.c b/third_party/mbedtls/pem.c index 8b8e12d7b..6bc84f716 100644 --- a/third_party/mbedtls/pem.c +++ b/third_party/mbedtls/pem.c @@ -27,12 +27,7 @@ #include "third_party/mbedtls/md5.h" #include "third_party/mbedtls/pem.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview Privacy Enhanced Mail (PEM) decoding diff --git a/third_party/mbedtls/pk.c b/third_party/mbedtls/pk.c index 736089ab0..92b3d3556 100644 --- a/third_party/mbedtls/pk.c +++ b/third_party/mbedtls/pk.c @@ -23,12 +23,7 @@ #include "third_party/mbedtls/pk_internal.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/rsa.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview Public Key abstraction layer diff --git a/third_party/mbedtls/pk_wrap.c b/third_party/mbedtls/pk_wrap.c index 9aaba9947..a649523a1 100644 --- a/third_party/mbedtls/pk_wrap.c +++ b/third_party/mbedtls/pk_wrap.c @@ -25,12 +25,7 @@ #include "third_party/mbedtls/pk_internal.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/rsa.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview Public Key abstraction layer: wrapper functions diff --git a/third_party/mbedtls/pkcs5.c b/third_party/mbedtls/pkcs5.c index 10cc06f42..00910985b 100644 --- a/third_party/mbedtls/pkcs5.c +++ b/third_party/mbedtls/pkcs5.c @@ -22,12 +22,7 @@ #include "third_party/mbedtls/oid.h" #include "third_party/mbedtls/pkcs5.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview PKCS#5 functions, e.g. PBKDF2 diff --git a/third_party/mbedtls/pkparse.c b/third_party/mbedtls/pkparse.c index d657fd801..a5b16e2f3 100644 --- a/third_party/mbedtls/pkparse.c +++ b/third_party/mbedtls/pkparse.c @@ -27,12 +27,7 @@ #include "third_party/mbedtls/pkcs5.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/rsa.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * Public Key layer for parsing key files and structures diff --git a/third_party/mbedtls/pkwrite.c b/third_party/mbedtls/pkwrite.c index 91c58252c..23d52818c 100644 --- a/third_party/mbedtls/pkwrite.c +++ b/third_party/mbedtls/pkwrite.c @@ -26,12 +26,7 @@ #include "third_party/mbedtls/pk.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/rsa.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * Public Key layer for writing key files and structures diff --git a/third_party/mbedtls/poly1305.c b/third_party/mbedtls/poly1305.c index c3f543d5c..20f3f838c 100644 --- a/third_party/mbedtls/poly1305.c +++ b/third_party/mbedtls/poly1305.c @@ -21,12 +21,7 @@ #include "third_party/mbedtls/common.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * \file poly1305.c diff --git a/third_party/mbedtls/rsa.c b/third_party/mbedtls/rsa.c index 158c526ba..edc6ae58f 100644 --- a/third_party/mbedtls/rsa.c +++ b/third_party/mbedtls/rsa.c @@ -27,12 +27,7 @@ #include "third_party/mbedtls/profile.h" #include "third_party/mbedtls/rsa_internal.h" #include "third_party/mbedtls/sha1.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview The RSA public-key cryptosystem diff --git a/third_party/mbedtls/rsa_internal.c b/third_party/mbedtls/rsa_internal.c index aa9d03fb2..5359d5d80 100644 --- a/third_party/mbedtls/rsa_internal.c +++ b/third_party/mbedtls/rsa_internal.c @@ -20,12 +20,7 @@ #include "third_party/mbedtls/profile.h" #include "third_party/mbedtls/rsa.h" #include "third_party/mbedtls/rsa_internal.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * Helper functions for the RSA module diff --git a/third_party/mbedtls/sha1.c b/third_party/mbedtls/sha1.c index 26806ab94..be933c939 100644 --- a/third_party/mbedtls/sha1.c +++ b/third_party/mbedtls/sha1.c @@ -27,12 +27,7 @@ #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/md.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview FIPS-180-1 compliant SHA-1 implementation diff --git a/third_party/mbedtls/sha1t.c b/third_party/mbedtls/sha1t.c index 641061b9c..fc553f34d 100644 --- a/third_party/mbedtls/sha1t.c +++ b/third_party/mbedtls/sha1t.c @@ -18,12 +18,7 @@ #include "libc/str/str.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/sha1.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * FIPS-180-1 test vectors diff --git a/third_party/mbedtls/sha256.c b/third_party/mbedtls/sha256.c index 3261608f1..88e621b53 100644 --- a/third_party/mbedtls/sha256.c +++ b/third_party/mbedtls/sha256.c @@ -27,12 +27,7 @@ #include "third_party/mbedtls/endian.h" #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/md.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview FIPS-180-2 compliant SHA-256 implementation diff --git a/third_party/mbedtls/sha256t.c b/third_party/mbedtls/sha256t.c index 996c30019..f3b822447 100644 --- a/third_party/mbedtls/sha256t.c +++ b/third_party/mbedtls/sha256t.c @@ -18,12 +18,7 @@ #include "libc/str/str.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/sha256.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * FIPS-180-2 test vectors diff --git a/third_party/mbedtls/sha512.c b/third_party/mbedtls/sha512.c index a5e197e18..8f4494787 100644 --- a/third_party/mbedtls/sha512.c +++ b/third_party/mbedtls/sha512.c @@ -28,12 +28,7 @@ #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/md.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview FIPS-180-2 compliant SHA-384/512 implementation diff --git a/third_party/mbedtls/sha512t.c b/third_party/mbedtls/sha512t.c index d8349047f..5e4730831 100644 --- a/third_party/mbedtls/sha512t.c +++ b/third_party/mbedtls/sha512t.c @@ -19,12 +19,7 @@ #include "libc/str/str.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/sha512.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * FIPS-180-2 test vectors diff --git a/third_party/mbedtls/shiftright.c b/third_party/mbedtls/shiftright.c index e3be94ccd..3162eae01 100644 --- a/third_party/mbedtls/shiftright.c +++ b/third_party/mbedtls/shiftright.c @@ -21,8 +21,7 @@ void (*ShiftRight)(uint64_t *, size_t, unsigned char); +__attribute__((__constructor__(10))) static textstartup void ShiftRightInit(void) { ShiftRight = 0 && X86_HAVE(AVX) ? ShiftRightAvx : ShiftRightPure; } - -const void *const ShiftRightCtor[] initarray = {ShiftRightInit}; diff --git a/third_party/mbedtls/ssl_cache.c b/third_party/mbedtls/ssl_cache.c index 453326702..c4d435ad9 100644 --- a/third_party/mbedtls/ssl_cache.c +++ b/third_party/mbedtls/ssl_cache.c @@ -21,12 +21,7 @@ #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/ssl_cache.h" #include "third_party/mbedtls/ssl_internal.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * SSL session cache implementation diff --git a/third_party/mbedtls/ssl_ciphersuites.c b/third_party/mbedtls/ssl_ciphersuites.c index a7a22a1ad..812fa4cce 100644 --- a/third_party/mbedtls/ssl_ciphersuites.c +++ b/third_party/mbedtls/ssl_ciphersuites.c @@ -25,12 +25,7 @@ #include "third_party/mbedtls/common.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/ssl.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * CRYPTOGRAPHY 101 diff --git a/third_party/mbedtls/ssl_cli.c b/third_party/mbedtls/ssl_cli.c index b8f2e79b7..002c4aba9 100644 --- a/third_party/mbedtls/ssl_cli.c +++ b/third_party/mbedtls/ssl_cli.c @@ -22,12 +22,7 @@ #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/ssl.h" #include "third_party/mbedtls/ssl_internal.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * SSLv3/TLSv1 client-side functions diff --git a/third_party/mbedtls/ssl_cookie.c b/third_party/mbedtls/ssl_cookie.c index 19eec8754..7ca95fcd4 100644 --- a/third_party/mbedtls/ssl_cookie.c +++ b/third_party/mbedtls/ssl_cookie.c @@ -20,12 +20,7 @@ #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/ssl_cookie.h" #include "third_party/mbedtls/ssl_internal.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * DTLS cookie callbacks implementation diff --git a/third_party/mbedtls/ssl_msg.c b/third_party/mbedtls/ssl_msg.c index 68ad70f49..41fb201d3 100644 --- a/third_party/mbedtls/ssl_msg.c +++ b/third_party/mbedtls/ssl_msg.c @@ -27,12 +27,7 @@ #include "third_party/mbedtls/ssl.h" #include "third_party/mbedtls/ssl_internal.h" #include "third_party/mbedtls/ssl_invasive.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * Generic SSL/TLS messaging layer functions diff --git a/third_party/mbedtls/ssl_srv.c b/third_party/mbedtls/ssl_srv.c index a51e54b6e..ef4e5c964 100644 --- a/third_party/mbedtls/ssl_srv.c +++ b/third_party/mbedtls/ssl_srv.c @@ -27,12 +27,7 @@ #include "third_party/mbedtls/profile.h" #include "third_party/mbedtls/ssl.h" #include "third_party/mbedtls/ssl_internal.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * SSLv3/TLSv1 server-side functions diff --git a/third_party/mbedtls/ssl_ticket.c b/third_party/mbedtls/ssl_ticket.c index 01fee1941..70e4a4fa9 100644 --- a/third_party/mbedtls/ssl_ticket.c +++ b/third_party/mbedtls/ssl_ticket.c @@ -21,12 +21,7 @@ #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/ssl_internal.h" #include "third_party/mbedtls/ssl_ticket.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * TLS server tickets callbacks implementation diff --git a/third_party/mbedtls/ssl_tls.c b/third_party/mbedtls/ssl_tls.c index df248f2f7..5460a4d74 100644 --- a/third_party/mbedtls/ssl_tls.c +++ b/third_party/mbedtls/ssl_tls.c @@ -28,12 +28,7 @@ #include "third_party/mbedtls/ssl_ciphersuites.h" #include "third_party/mbedtls/ssl_internal.h" #include "third_party/mbedtls/version.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview SSLv3/TLSv1 shared functions diff --git a/third_party/mbedtls/ssl_tls13_keys.c b/third_party/mbedtls/ssl_tls13_keys.c index ae521c0c2..523959f03 100644 --- a/third_party/mbedtls/ssl_tls13_keys.c +++ b/third_party/mbedtls/ssl_tls13_keys.c @@ -19,12 +19,7 @@ #include "third_party/mbedtls/hkdf.h" #include "third_party/mbedtls/ssl_internal.h" #include "third_party/mbedtls/ssl_tls13_keys.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * TLS 1.3 key schedule diff --git a/third_party/mbedtls/test/everest_unravaged.c b/third_party/mbedtls/test/everest_unravaged.c index cc4737be1..9c24f1f42 100644 --- a/third_party/mbedtls/test/everest_unravaged.c +++ b/third_party/mbedtls/test/everest_unravaged.c @@ -7,10 +7,8 @@ #include "libc/serialize.h" #include "third_party/mbedtls/platform.h" -asm(".ident\t\"\\n\\n\ -Everest (Apache 2.0)\\n\ -Copyright 2016-2018 INRIA and Microsoft Corporation\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(everest_notice, "Everest (Apache 2.0)\n\ +Copyright 2016-2018 INRIA and Microsoft Corporation"); /* * ECDH with curve-optimized implementation multiplexing diff --git a/third_party/mbedtls/test/lib.c b/third_party/mbedtls/test/lib.c index e284a67b4..5e72cd6b6 100644 --- a/third_party/mbedtls/test/lib.c +++ b/third_party/mbedtls/test/lib.c @@ -50,12 +50,7 @@ #include "third_party/mbedtls/error.h" #include "libc/serialize.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); __static_yoink("zipos"); diff --git a/third_party/mbedtls/test/test.inc b/third_party/mbedtls/test/test.inc index 9412aac12..f4a992d4e 100644 --- a/third_party/mbedtls/test/test.inc +++ b/third_party/mbedtls/test/test.inc @@ -6,11 +6,6 @@ #include "libc/sysv/consts/exit.h" #include "third_party/mbedtls/config.h" #include "third_party/mbedtls/test/lib.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); __static_yoink("zipos"); diff --git a/third_party/mbedtls/x509.c b/third_party/mbedtls/x509.c index dab92a7dd..62949a890 100644 --- a/third_party/mbedtls/x509.c +++ b/third_party/mbedtls/x509.c @@ -28,12 +28,7 @@ #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/x509.h" #include "third_party/mbedtls/x509_crt.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * X.509 common functions for parsing and verification diff --git a/third_party/mbedtls/x509_create.c b/third_party/mbedtls/x509_create.c index 2e2dbd8a0..ef28ec1ba 100644 --- a/third_party/mbedtls/x509_create.c +++ b/third_party/mbedtls/x509_create.c @@ -20,12 +20,7 @@ #include "third_party/mbedtls/error.h" #include "third_party/mbedtls/oid.h" #include "third_party/mbedtls/x509.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * X.509 base functions for creating certificates / CSRs diff --git a/third_party/mbedtls/x509_crl.c b/third_party/mbedtls/x509_crl.c index 807e63873..f9460fccb 100644 --- a/third_party/mbedtls/x509_crl.c +++ b/third_party/mbedtls/x509_crl.c @@ -21,12 +21,7 @@ #include "third_party/mbedtls/pem.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/x509_crl.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview X.509 Certidicate Revocation List (CRL) parsing diff --git a/third_party/mbedtls/x509_crt.c b/third_party/mbedtls/x509_crt.c index 47a4c103a..da53f29d2 100644 --- a/third_party/mbedtls/x509_crt.c +++ b/third_party/mbedtls/x509_crt.c @@ -32,12 +32,7 @@ #include "third_party/mbedtls/oid.h" #include "third_party/mbedtls/pem.h" #include "third_party/mbedtls/platform.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * X.509 certificate parsing and verification diff --git a/third_party/mbedtls/x509_csr.c b/third_party/mbedtls/x509_csr.c index ee8b4941c..4ebb8d39f 100644 --- a/third_party/mbedtls/x509_csr.c +++ b/third_party/mbedtls/x509_csr.c @@ -21,12 +21,7 @@ #include "third_party/mbedtls/pem.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/x509_csr.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /** * @fileoverview X.509 Certificate Signing Request (CSR) parsing diff --git a/third_party/mbedtls/x509write_crt.c b/third_party/mbedtls/x509write_crt.c index 987ba9df8..7558b38ec 100644 --- a/third_party/mbedtls/x509write_crt.c +++ b/third_party/mbedtls/x509write_crt.c @@ -23,12 +23,7 @@ #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/sha1.h" #include "third_party/mbedtls/x509_crt.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * X.509 certificate writing diff --git a/third_party/mbedtls/x509write_csr.c b/third_party/mbedtls/x509write_csr.c index 3d22704ad..ffda801af 100644 --- a/third_party/mbedtls/x509write_csr.c +++ b/third_party/mbedtls/x509write_csr.c @@ -22,12 +22,7 @@ #include "third_party/mbedtls/pem.h" #include "third_party/mbedtls/platform.h" #include "third_party/mbedtls/x509_csr.h" - -asm(".ident\t\"\\n\\n\ -Mbed TLS (Apache 2.0)\\n\ -Copyright ARM Limited\\n\ -Copyright Mbed TLS Contributors\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("mbedtls_notice"); /* * X.509 Certificate Signing Request writing diff --git a/third_party/musl/crypt.c b/third_party/musl/crypt.c index 2bf39082e..a6a81de20 100644 --- a/third_party/musl/crypt.c +++ b/third_party/musl/crypt.c @@ -27,11 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/musl/crypt.h" #include "third_party/musl/crypt.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); /** * Encrypts password the old fashioned way. diff --git a/third_party/musl/crypt_blowfish.c b/third_party/musl/crypt_blowfish.c index 9dfec80c0..5586c53c6 100644 --- a/third_party/musl/crypt_blowfish.c +++ b/third_party/musl/crypt_blowfish.c @@ -32,11 +32,7 @@ #include "libc/mem/gc.h" #include "libc/str/str.h" #include "third_party/musl/crypt.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); /* Modified by Rich Felker in for inclusion in musl libc, based on * Solar Designer's second size-optimized version sent to the musl diff --git a/third_party/musl/crypt_des.c b/third_party/musl/crypt_des.c index f301f8b22..564059ac7 100644 --- a/third_party/musl/crypt_des.c +++ b/third_party/musl/crypt_des.c @@ -31,14 +31,10 @@ #include "libc/str/str.h" #include "third_party/musl/crypt.internal.h" #include "third_party/musl/crypt_des.internal.h" +__static_yoink("musl_libc_notice"); #pragma GCC diagnostic ignored "-Wstringop-overflow" -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); - /* * This version has been further modified by Rich Felker, primary author * and maintainer of musl libc, to remove table generation code and diff --git a/third_party/musl/crypt_md5.c b/third_party/musl/crypt_md5.c index 1c00345b0..5bf75c1eb 100644 --- a/third_party/musl/crypt_md5.c +++ b/third_party/musl/crypt_md5.c @@ -30,14 +30,10 @@ #include "libc/literal.h" #include "libc/str/str.h" #include "third_party/musl/crypt.internal.h" +__static_yoink("musl_libc_notice"); #pragma GCC diagnostic ignored "-Wmissing-braces" -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); - /* * md5 crypt implementation * diff --git a/third_party/musl/crypt_r.c b/third_party/musl/crypt_r.c index e90411ff5..f0155b8c6 100644 --- a/third_party/musl/crypt_r.c +++ b/third_party/musl/crypt_r.c @@ -27,11 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/musl/crypt.h" #include "third_party/musl/crypt.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); /** * Encrypts password the old fashioned way. diff --git a/third_party/musl/crypt_sha256.c b/third_party/musl/crypt_sha256.c index ec699090d..d5e2f8059 100644 --- a/third_party/musl/crypt_sha256.c +++ b/third_party/musl/crypt_sha256.c @@ -40,14 +40,10 @@ #include "libc/sysv/consts/exit.h" #include "third_party/gdtoa/gdtoa.h" #include "third_party/musl/crypt.internal.h" +__static_yoink("musl_libc_notice"); #pragma GCC diagnostic ignored "-Wmissing-braces" -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); - /* * public domain sha256 crypt implementation * diff --git a/third_party/musl/crypt_sha512.c b/third_party/musl/crypt_sha512.c index 122a64cff..7d3b4414b 100644 --- a/third_party/musl/crypt_sha512.c +++ b/third_party/musl/crypt_sha512.c @@ -40,14 +40,10 @@ #include "libc/sysv/consts/exit.h" #include "third_party/gdtoa/gdtoa.h" #include "third_party/musl/crypt.internal.h" +__static_yoink("musl_libc_notice"); #pragma GCC diagnostic ignored "-Wmissing-braces" -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); - /* * public domain sha512 crypt implementation * diff --git a/third_party/musl/encrypt.c b/third_party/musl/encrypt.c index 6474c6a84..722ea2650 100644 --- a/third_party/musl/encrypt.c +++ b/third_party/musl/encrypt.c @@ -41,11 +41,7 @@ #include "libc/sysv/consts/ok.h" #include "third_party/gdtoa/gdtoa.h" #include "third_party/musl/crypt_des.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); static struct expanded_key __encrypt_key; diff --git a/third_party/musl/fnmatch.c b/third_party/musl/fnmatch.c index ed04f5c21..d2f32e259 100644 --- a/third_party/musl/fnmatch.c +++ b/third_party/musl/fnmatch.c @@ -28,6 +28,7 @@ #include "libc/limits.h" #include "libc/str/str.h" #include "third_party/musl/fnmatch.h" +__static_yoink("musl_libc_notice"); /* * An implementation of what I call the "Sea of Stars" algorithm for @@ -44,11 +45,6 @@ * - Rich Felker, April 2012 */ -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); - #define END 0 #define UNMATCHABLE -2 #define BRACKET -3 diff --git a/third_party/musl/forkpty.c b/third_party/musl/forkpty.c index f53984a51..ea881b0fb 100644 --- a/third_party/musl/forkpty.c +++ b/third_party/musl/forkpty.c @@ -35,11 +35,7 @@ #include "libc/runtime/runtime.h" #include "libc/errno.h" #include "libc/calls/termios.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); int forkpty(int *pm, char *name, const struct termios *tio, const struct winsize *ws) { diff --git a/third_party/musl/freeaddrinfo.c b/third_party/musl/freeaddrinfo.c index 9fda19caa..e9bcfbd7b 100644 --- a/third_party/musl/freeaddrinfo.c +++ b/third_party/musl/freeaddrinfo.c @@ -29,11 +29,7 @@ #include "libc/mem/mem.h" #include "third_party/musl/lookup.internal.h" #include "third_party/musl/netdb.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); void freeaddrinfo(struct addrinfo *p) { diff --git a/third_party/musl/gai_strerror.c b/third_party/musl/gai_strerror.c index 2b6e85850..7e9a83eed 100644 --- a/third_party/musl/gai_strerror.c +++ b/third_party/musl/gai_strerror.c @@ -26,11 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/musl/netdb.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); static const char msgs[] = "Invalid flags\0" diff --git a/third_party/musl/getaddrinfo.c b/third_party/musl/getaddrinfo.c index 7c418d518..9c8d0a926 100644 --- a/third_party/musl/getaddrinfo.c +++ b/third_party/musl/getaddrinfo.c @@ -40,11 +40,7 @@ #include "third_party/musl/lookup.internal.h" #include "libc/intrin/atomic.h" #include "third_party/musl/netdb.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); int getaddrinfo(const char *host, const char *serv, diff --git a/third_party/musl/gethostbyaddr_r.c b/third_party/musl/gethostbyaddr_r.c index e4934b94c..51572bb1d 100644 --- a/third_party/musl/gethostbyaddr_r.c +++ b/third_party/musl/gethostbyaddr_r.c @@ -32,11 +32,7 @@ #include "libc/str/str.h" #include "libc/sysv/consts/af.h" #include "third_party/musl/netdb.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); errno_t gethostbyaddr_r(const void *a, socklen_t l, int af, struct hostent *h, char *buf, size_t buflen, diff --git a/third_party/musl/gethostbyname2_r.c b/third_party/musl/gethostbyname2_r.c index 9ae8c0841..e95bf9535 100644 --- a/third_party/musl/gethostbyname2_r.c +++ b/third_party/musl/gethostbyname2_r.c @@ -30,11 +30,7 @@ #include "libc/sysv/consts/af.h" #include "third_party/musl/lookup.internal.h" #include "third_party/musl/netdb.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); errno_t gethostbyname2_r(const char *name, int af, struct hostent *h, char *buf, size_t buflen, diff --git a/third_party/musl/getnameinfo.c b/third_party/musl/getnameinfo.c index 62f20e009..ffced1dd3 100644 --- a/third_party/musl/getnameinfo.c +++ b/third_party/musl/getnameinfo.c @@ -37,11 +37,7 @@ #include "third_party/musl/lookup.internal.h" #include "third_party/musl/netdb.h" #include "third_party/musl/resolv.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); #define PTR_MAX (64 + sizeof ".in-addr.arpa") #define RR_PTR 12 diff --git a/third_party/musl/getservbyname_r.c b/third_party/musl/getservbyname_r.c index cd088ac29..d6e238fba 100644 --- a/third_party/musl/getservbyname_r.c +++ b/third_party/musl/getservbyname_r.c @@ -32,11 +32,7 @@ #include "libc/sysv/consts/ipproto.h" #include "third_party/musl/lookup.internal.h" #include "third_party/musl/netdb.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); #define ALIGN (sizeof(struct { char a; char *b; }) - sizeof(char *)) diff --git a/third_party/musl/getservbyport_r.c b/third_party/musl/getservbyport_r.c index 3749ef393..0e98a8eaf 100644 --- a/third_party/musl/getservbyport_r.c +++ b/third_party/musl/getservbyport_r.c @@ -31,11 +31,7 @@ #include "libc/str/str.h" #include "libc/sysv/consts/af.h" #include "third_party/musl/netdb.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); errno_t getservbyport_r(int port, const char *prots, struct servent *se, char *buf, size_t buflen, struct servent **res) diff --git a/third_party/musl/getspnam_r.c b/third_party/musl/getspnam_r.c index 3b08b1642..dae46748f 100644 --- a/third_party/musl/getspnam_r.c +++ b/third_party/musl/getspnam_r.c @@ -31,11 +31,7 @@ #include #include #include "pwf.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); /* This implementation support Openwall-style TCB passwords in place of * traditional shadow, if the appropriate directories and files exist. diff --git a/third_party/musl/glob.c b/third_party/musl/glob.c index c7156ccb3..655532d9a 100644 --- a/third_party/musl/glob.c +++ b/third_party/musl/glob.c @@ -36,14 +36,10 @@ #include "libc/sysv/consts/dt.h" #include "libc/sysv/consts/s.h" #include "third_party/musl/fnmatch.h" +__static_yoink("musl_libc_notice"); #define MAXPATH 1024 -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); - struct GlobList { struct GlobList *next; char name[]; diff --git a/third_party/musl/grp.c b/third_party/musl/grp.c index 7a788c7f0..17d777ef7 100644 --- a/third_party/musl/grp.c +++ b/third_party/musl/grp.c @@ -34,11 +34,7 @@ #include "libc/sysv/consts/limits.h" #include "libc/thread/thread.h" #include "third_party/musl/passwd.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); static unsigned atou(char **s) { unsigned x; diff --git a/third_party/musl/hsearch.c b/third_party/musl/hsearch.c index 883fec34f..e4140f404 100644 --- a/third_party/musl/hsearch.c +++ b/third_party/musl/hsearch.c @@ -28,11 +28,7 @@ #include "libc/mem/mem.h" #include "libc/str/str.h" #include "third_party/musl/search.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2020 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); /* open addressing hash table with 2^n table size diff --git a/third_party/musl/insque.c b/third_party/musl/insque.c index 63c00539b..cec746ec2 100644 --- a/third_party/musl/insque.c +++ b/third_party/musl/insque.c @@ -26,11 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/musl/search.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2020 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); struct node { struct node *next; diff --git a/third_party/musl/lockf.c b/third_party/musl/lockf.c index 33c57e3d6..a0ec7e670 100644 --- a/third_party/musl/lockf.c +++ b/third_party/musl/lockf.c @@ -32,11 +32,7 @@ #include "libc/errno.h" #include "libc/sysv/consts/f.h" #include "libc/sysv/errfuns.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); /** * Locks file. diff --git a/third_party/musl/lookup_ipliteral.c b/third_party/musl/lookup_ipliteral.c index 1ba9a2d5e..a02b0239b 100644 --- a/third_party/musl/lookup_ipliteral.c +++ b/third_party/musl/lookup_ipliteral.c @@ -32,11 +32,7 @@ #include "libc/limits.h" #include "libc/sock/sock.h" #include "third_party/musl/lookup.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); int __lookup_ipliteral(struct address buf[static 1], const char *name, int family) { diff --git a/third_party/musl/lookup_name.c b/third_party/musl/lookup_name.c index 37214b72a..291178213 100644 --- a/third_party/musl/lookup_name.c +++ b/third_party/musl/lookup_name.c @@ -40,11 +40,7 @@ #include "third_party/musl/lookup.internal.h" #include "third_party/musl/netdb.h" #include "third_party/musl/resolv.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); static int is_valid_hostname(const char *host) { diff --git a/third_party/musl/lookup_serv.c b/third_party/musl/lookup_serv.c index 8eb45abc7..cc7fbffb2 100644 --- a/third_party/musl/lookup_serv.c +++ b/third_party/musl/lookup_serv.c @@ -33,11 +33,7 @@ #include "libc/errno.h" #include "libc/calls/sysdir.internal.h" #include "third_party/musl/lookup.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); int __lookup_serv(struct service buf[static MAXSERVS], const char *name, diff --git a/third_party/musl/lsearch.c b/third_party/musl/lsearch.c index 1d1ee6a5c..c4b871206 100644 --- a/third_party/musl/lsearch.c +++ b/third_party/musl/lsearch.c @@ -28,11 +28,7 @@ #include "libc/mem/mem.h" #include "libc/str/str.h" #include "third_party/musl/search.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2020 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); void *lsearch(const void *key, void *base, size_t *nelp, size_t width, diff --git a/third_party/musl/mntent.c b/third_party/musl/mntent.c index cc010090a..275ba586d 100644 --- a/third_party/musl/mntent.c +++ b/third_party/musl/mntent.c @@ -31,11 +31,7 @@ #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "third_party/musl/mntent.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); static char *internal_buf; static size_t internal_bufsize; diff --git a/third_party/musl/ns_parse.c b/third_party/musl/ns_parse.c index e3964e2fb..dad9dd71c 100644 --- a/third_party/musl/ns_parse.c +++ b/third_party/musl/ns_parse.c @@ -28,11 +28,7 @@ #include "libc/errno.h" #include "third_party/musl/resolv.h" #include "third_party/musl/nameser.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); const struct _ns_flagdata _ns_flagdata[16] = { { 0x8000, 15 }, diff --git a/third_party/musl/proto.c b/third_party/musl/proto.c index 86edea22b..6e616dbf0 100644 --- a/third_party/musl/proto.c +++ b/third_party/musl/proto.c @@ -27,11 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/str/str.h" #include "third_party/musl/netdb.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); /* do we really need all these?? */ diff --git a/third_party/musl/pwd.c b/third_party/musl/pwd.c index a1dedd141..fc54b77cf 100644 --- a/third_party/musl/pwd.c +++ b/third_party/musl/pwd.c @@ -38,11 +38,7 @@ #include "libc/str/str.h" #include "libc/thread/thread.h" #include "third_party/musl/passwd.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); #ifdef FTRACE // if the default mode debugging tools are enabled, and we're linking diff --git a/third_party/musl/rand48.c b/third_party/musl/rand48.c index fa0a94db3..5c46adf7d 100644 --- a/third_party/musl/rand48.c +++ b/third_party/musl/rand48.c @@ -27,11 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/musl/rand48.h" #include "libc/str/str.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); static unsigned short __seed48[7] = { 0, 0, 0, 0xe66d, 0xdeec, 0x5, 0xb }; diff --git a/third_party/musl/res_mkquery.c b/third_party/musl/res_mkquery.c index f10f2148d..f955d6cfb 100644 --- a/third_party/musl/res_mkquery.c +++ b/third_party/musl/res_mkquery.c @@ -29,11 +29,7 @@ #include "libc/sysv/consts/clock.h" #include "libc/stdio/rand.h" #include "third_party/musl/resolv.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); int __res_mkquery(int op, const char *dname, int class, int type, const unsigned char *data, int datalen, diff --git a/third_party/musl/res_msend.c b/third_party/musl/res_msend.c index 705a0e5b4..1a65bee68 100644 --- a/third_party/musl/res_msend.c +++ b/third_party/musl/res_msend.c @@ -45,11 +45,7 @@ #include "libc/sysv/consts/tcp.h" #include "libc/thread/thread.h" #include "lookup.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); static void cleanup(void *p) { diff --git a/third_party/musl/res_query.c b/third_party/musl/res_query.c index f324e58bb..04d7427f7 100644 --- a/third_party/musl/res_query.c +++ b/third_party/musl/res_query.c @@ -28,11 +28,7 @@ #include "third_party/musl/resolv.h" #include "third_party/musl/netdb.h" #include "third_party/musl/resolv.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); int res_query(const char *name, int class, int type, unsigned char *dest, int len) { diff --git a/third_party/musl/res_querydomain.c b/third_party/musl/res_querydomain.c index 809bc6877..e91149944 100644 --- a/third_party/musl/res_querydomain.c +++ b/third_party/musl/res_querydomain.c @@ -27,11 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/str/str.h" #include "third_party/musl/resolv.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); int res_querydomain(const char *name, const char *domain, int class, int type, unsigned char *dest, int len) { diff --git a/third_party/musl/res_send.c b/third_party/musl/res_send.c index 1ef881370..cd28d0706 100644 --- a/third_party/musl/res_send.c +++ b/third_party/musl/res_send.c @@ -27,11 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/str/str.h" #include "third_party/musl/resolv.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); int __res_send(const unsigned char *msg, int msglen, unsigned char *answer, int anslen) { diff --git a/third_party/musl/resolvconf.c b/third_party/musl/resolvconf.c index bdcfbc719..ddc44b363 100644 --- a/third_party/musl/resolvconf.c +++ b/third_party/musl/resolvconf.c @@ -38,11 +38,7 @@ #include "libc/sysv/consts/af.h" #include "libc/sock/sock.h" #include "third_party/musl/lookup.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); // TODO(jart): ipv6 nameservers on windows // TODO(jart): can we polyfill `search` on windows diff --git a/third_party/musl/strfmon.c b/third_party/musl/strfmon.c index 82bd8c0a9..95e590612 100644 --- a/third_party/musl/strfmon.c +++ b/third_party/musl/strfmon.c @@ -30,11 +30,7 @@ #include "libc/str/locale.h" #include "libc/str/str.h" #include "libc/thread/tls.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); static ssize_t vstrfmon_l(char *s, size_t n, locale_t loc, const char *fmt, va_list ap) { diff --git a/third_party/musl/tdelete.c b/third_party/musl/tdelete.c index 6023062fc..52e871341 100644 --- a/third_party/musl/tdelete.c +++ b/third_party/musl/tdelete.c @@ -27,11 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/mem/mem.h" #include "third_party/musl/tsearch.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2020 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); void *tdelete(const void *restrict key, void **restrict rootp, diff --git a/third_party/musl/tdestroy.c b/third_party/musl/tdestroy.c index 51a681669..17904479c 100644 --- a/third_party/musl/tdestroy.c +++ b/third_party/musl/tdestroy.c @@ -27,11 +27,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/mem/mem.h" #include "third_party/musl/tsearch.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2020 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); void tdestroy(void *root, void (*freekey)(void *)) { diff --git a/third_party/musl/tempnam.c b/third_party/musl/tempnam.c index c72761ddd..93d8a6856 100644 --- a/third_party/musl/tempnam.c +++ b/third_party/musl/tempnam.c @@ -37,14 +37,10 @@ #include "libc/sysv/consts/at.h" #include "libc/sysv/consts/clock.h" #include "libc/time/time.h" +__static_yoink("musl_libc_notice"); #define MAXTRIES 100 -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); - static char * __randname(char *template) { diff --git a/third_party/musl/tfind.c b/third_party/musl/tfind.c index 0735306b9..b4a668bcd 100644 --- a/third_party/musl/tfind.c +++ b/third_party/musl/tfind.c @@ -26,11 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/musl/tsearch.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2020 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); void *tfind(const void *key, void *const *rootp, int(*cmp)(const void *, const void *)) diff --git a/third_party/musl/tsearch.c b/third_party/musl/tsearch.c index 1c9ceecd8..fdfd0c75e 100644 --- a/third_party/musl/tsearch.c +++ b/third_party/musl/tsearch.c @@ -28,11 +28,7 @@ #include "libc/mem/mem.h" #include "libc/str/str.h" #include "third_party/musl/tsearch.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2020 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); static inline int height(struct node *n) { return n ? n->h : 0; } diff --git a/third_party/musl/twalk.c b/third_party/musl/twalk.c index 48ec2ab19..ed5bdbffc 100644 --- a/third_party/musl/twalk.c +++ b/third_party/musl/twalk.c @@ -26,11 +26,7 @@ │ │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/musl/tsearch.internal.h" - -asm(".ident\t\"\\n\\n\ -Musl libc (MIT License)\\n\ -Copyright 2005-2020 Rich Felker, et. al.\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("musl_libc_notice"); static void walk(const struct node *r, void (*action)(const void *, VISIT, int), int d) diff --git a/third_party/nsync/common.c b/third_party/nsync/common.c index 93a9cd4db..a50b6f041 100644 --- a/third_party/nsync/common.c +++ b/third_party/nsync/common.c @@ -34,11 +34,7 @@ #include "third_party/nsync/mu_semaphore.h" #include "third_party/nsync/races.internal.h" #include "third_party/nsync/wait_s.internal.h" - -asm(".ident\t\"\\n\\n\ -*NSYNC (Apache 2.0)\\n\ -Copyright 2016 Google, Inc.\\n\ -https://github.com/google/nsync\""); +__static_yoink("nsync_notice"); /* This package provides a mutex nsync_mu and a Mesa-style condition * variable nsync_cv. */ diff --git a/third_party/nsync/mem/nsync_counter.c b/third_party/nsync/mem/nsync_counter.c index d974977d8..c508797fc 100644 --- a/third_party/nsync/mem/nsync_counter.c +++ b/third_party/nsync/mem/nsync_counter.c @@ -26,11 +26,7 @@ #include "third_party/nsync/races.internal.h" #include "third_party/nsync/wait_s.internal.h" #include "third_party/nsync/waiter.h" - -asm(".ident\t\"\\n\\n\ -*NSYNC (Apache 2.0)\\n\ -Copyright 2016 Google, Inc.\\n\ -https://github.com/google/nsync\""); +__static_yoink("nsync_notice"); /* Internal details of nsync_counter. */ struct nsync_counter_s_ { diff --git a/third_party/nsync/mem/nsync_cv.c b/third_party/nsync/mem/nsync_cv.c index fb081094a..926379340 100644 --- a/third_party/nsync/mem/nsync_cv.c +++ b/third_party/nsync/mem/nsync_cv.c @@ -26,16 +26,12 @@ #include "third_party/nsync/races.internal.h" #include "third_party/nsync/wait_s.internal.h" #include "third_party/nsync/waiter.h" +__static_yoink("nsync_notice"); // once we're paying the cost of nsync we might as well get the benefit // of a better pthread_once(), since no other component pulls it in now __static_yoink("nsync_run_once"); -asm(".ident\t\"\\n\\n\ -*NSYNC (Apache 2.0)\\n\ -Copyright 2016 Google, Inc.\\n\ -https://github.com/google/nsync\""); - /* Initialize *cv. */ void nsync_cv_init (nsync_cv *cv) { bzero ((void *) cv, sizeof (*cv)); diff --git a/third_party/nsync/mem/nsync_debug.c b/third_party/nsync/mem/nsync_debug.c index e677c7ea0..1b72c1175 100644 --- a/third_party/nsync/mem/nsync_debug.c +++ b/third_party/nsync/mem/nsync_debug.c @@ -21,11 +21,7 @@ #include "third_party/nsync/mu_semaphore.h" #include "third_party/nsync/races.internal.h" #include "third_party/nsync/wait_s.internal.h" - -asm(".ident\t\"\\n\\n\ -*NSYNC (Apache 2.0)\\n\ -Copyright 2016 Google, Inc.\\n\ -https://github.com/google/nsync\""); +__static_yoink("nsync_notice"); /* Routines for debugging. */ diff --git a/third_party/nsync/mem/nsync_mu_wait.c b/third_party/nsync/mem/nsync_mu_wait.c index c7839950a..3c48ea599 100644 --- a/third_party/nsync/mem/nsync_mu_wait.c +++ b/third_party/nsync/mem/nsync_mu_wait.c @@ -22,11 +22,7 @@ #include "third_party/nsync/mu_semaphore.h" #include "third_party/nsync/races.internal.h" #include "third_party/nsync/wait_s.internal.h" - -asm(".ident\t\"\\n\\n\ -*NSYNC (Apache 2.0)\\n\ -Copyright 2016 Google, Inc.\\n\ -https://github.com/google/nsync\""); +__static_yoink("nsync_notice"); /* Attempt to remove waiter *w from *mu's waiter queue. If successful, leave the lock held in mode *l_type, and diff --git a/third_party/nsync/mem/nsync_note.c b/third_party/nsync/mem/nsync_note.c index 8e7276112..bdf8e9ad0 100644 --- a/third_party/nsync/mem/nsync_note.c +++ b/third_party/nsync/mem/nsync_note.c @@ -25,11 +25,7 @@ #include "third_party/nsync/races.internal.h" #include "third_party/nsync/wait_s.internal.h" #include "third_party/nsync/waiter.h" - -asm(".ident\t\"\\n\\n\ -*NSYNC (Apache 2.0)\\n\ -Copyright 2016 Google, Inc.\\n\ -https://github.com/google/nsync\""); +__static_yoink("nsync_notice"); /* Locking discipline for the nsync_note implementation: diff --git a/third_party/nsync/mem/nsync_once.c b/third_party/nsync/mem/nsync_once.c index 8320a6266..163923359 100644 --- a/third_party/nsync/mem/nsync_once.c +++ b/third_party/nsync/mem/nsync_once.c @@ -22,11 +22,7 @@ #include "third_party/nsync/once.h" #include "third_party/nsync/races.internal.h" #include "third_party/nsync/wait_s.internal.h" - -asm(".ident\t\"\\n\\n\ -*NSYNC (Apache 2.0)\\n\ -Copyright 2016 Google, Inc.\\n\ -https://github.com/google/nsync\""); +__static_yoink("nsync_notice"); /* An once_sync_s struct contains a lock, and a condition variable on which threads may wait for an nsync_once to be initialized by another thread. diff --git a/third_party/nsync/mem/nsync_sem_wait.c b/third_party/nsync/mem/nsync_sem_wait.c index c32a2842e..62507d686 100644 --- a/third_party/nsync/mem/nsync_sem_wait.c +++ b/third_party/nsync/mem/nsync_sem_wait.c @@ -23,11 +23,7 @@ #include "third_party/nsync/common.internal.h" #include "third_party/nsync/mu_semaphore.h" #include "third_party/nsync/wait_s.internal.h" - -asm(".ident\t\"\\n\\n\ -*NSYNC (Apache 2.0)\\n\ -Copyright 2016 Google, Inc.\\n\ -https://github.com/google/nsync\""); +__static_yoink("nsync_notice"); /* Wait until one of: w->sem is non-zero----decrement it and return 0. diff --git a/third_party/nsync/mem/nsync_wait.c b/third_party/nsync/mem/nsync_wait.c index f669730ac..9d8e95b7d 100644 --- a/third_party/nsync/mem/nsync_wait.c +++ b/third_party/nsync/mem/nsync_wait.c @@ -25,11 +25,7 @@ #include "third_party/nsync/races.internal.h" #include "third_party/nsync/wait_s.internal.h" #include "third_party/nsync/waiter.h" - -asm(".ident\t\"\\n\\n\ -*NSYNC (Apache 2.0)\\n\ -Copyright 2016 Google, Inc.\\n\ -https://github.com/google/nsync\""); +__static_yoink("nsync_notice"); int nsync_wait_n (void *mu, void (*lock) (void *), void (*unlock) (void *), nsync_time abs_deadline, diff --git a/third_party/nsync/mu.c b/third_party/nsync/mu.c index 354c38621..4cae68328 100644 --- a/third_party/nsync/mu.c +++ b/third_party/nsync/mu.c @@ -23,11 +23,7 @@ #include "third_party/nsync/mu_semaphore.h" #include "third_party/nsync/races.internal.h" #include "third_party/nsync/wait_s.internal.h" - -asm(".ident\t\"\\n\\n\ -*NSYNC (Apache 2.0)\\n\ -Copyright 2016 Google, Inc.\\n\ -https://github.com/google/nsync\""); +__static_yoink("nsync_notice"); /* Initialize *mu. */ void nsync_mu_init (nsync_mu *mu) { diff --git a/third_party/nsync/mu_semaphore.c b/third_party/nsync/mu_semaphore.c index 43f2b5ccc..493efa2c4 100644 --- a/third_party/nsync/mu_semaphore.c +++ b/third_party/nsync/mu_semaphore.c @@ -19,17 +19,13 @@ #include "libc/calls/cp.internal.h" #include "libc/dce.h" #include "third_party/nsync/mu_semaphore.internal.h" +__static_yoink("nsync_notice"); /* Apple's ulock (part by Cosmo futexes) is an internal API, but: 1. Unlike GCD it's cancellable, i.e. can be EINTR'd by signals 2. We currently always use ulock anyway for joining threads */ #define PREFER_GCD_OVER_ULOCK 1 -asm(".ident\t\"\\n\\n\ -*NSYNC (Apache 2.0)\\n\ -Copyright 2016 Google, Inc.\\n\ -https://github.com/google/nsync\""); - /* Initialize *s; the initial value is 0. */ void nsync_mu_semaphore_init (nsync_semaphore *s) { if (PREFER_GCD_OVER_ULOCK && IsXnuSilicon ()) { diff --git a/third_party/nsync/notice.c b/third_party/nsync/notice.c new file mode 100644 index 000000000..614e3f62d --- /dev/null +++ b/third_party/nsync/notice.c @@ -0,0 +1,4 @@ +__notice(nsync_notice, "\ +*NSYNC (Apache 2.0)\n\ +Copyright 2016 Google, Inc.\n\ +https://github.com/google/nsync"); diff --git a/third_party/pcre/pcre2_compile.c b/third_party/pcre/pcre2_compile.c index fb910ed8e..4fc63cd57 100644 --- a/third_party/pcre/pcre2_compile.c +++ b/third_party/pcre/pcre2_compile.c @@ -1,9 +1,5 @@ #include "libc/str/str.h" -asm(".ident\t\"\\n\\n\ -PCRE2 (PCRE2 License)\\n\ -Copyright (c) 1997-2022 University of Cambridge\""); - /************************************************* * Perl-Compatible Regular Expressions * *************************************************/ diff --git a/third_party/pcre/pcre2_match.c b/third_party/pcre/pcre2_match.c index 606a850c3..897c88b29 100644 --- a/third_party/pcre/pcre2_match.c +++ b/third_party/pcre/pcre2_match.c @@ -1,8 +1,8 @@ #include "libc/str/str.h" -asm(".ident\t\"\\n\\n\ -PCRE2 (PCRE2 License)\\n\ -Copyright (c) 1997-2022 University of Cambridge\""); +__notice(pcre2_notice, "\ +PCRE2 (PCRE2 License)\n\ +Copyright (c) 1997-2022 University of Cambridge"); /************************************************* * Perl-Compatible Regular Expressions * diff --git a/third_party/puff/puff.c b/third_party/puff/puff.c index 94593db28..21bc92ec4 100644 --- a/third_party/puff/puff.c +++ b/third_party/puff/puff.c @@ -25,10 +25,9 @@ #include "third_party/puff/puff.h" #include "libc/runtime/runtime.h" -asm(".ident\t\"\\n\\n\ -puff (zlib License)\\n\ -Copyright 2002-203 Mark Adler\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(puff_notice, "\ +puff (zlib License)\n\ +Copyright 2002-2013 Mark Adler"); // Origin: git@github.com:madler/zlib.git // Commit: 03614c56ad299f9b238c75aa1e66f0c08fc4fc8b diff --git a/third_party/python/BUILD.mk b/third_party/python/BUILD.mk index 33ef97a47..81868c829 100644 --- a/third_party/python/BUILD.mk +++ b/third_party/python/BUILD.mk @@ -534,6 +534,7 @@ THIRD_PARTY_PYTHON_STAGE2_A_SRCS = \ third_party/python/Modules/_csv.c \ third_party/python/Modules/_datetimemodule.c \ third_party/python/Modules/_decimal/_decimal.c \ + third_party/python/Modules/_decimal/libmpdec/notice.c \ third_party/python/Modules/_decimal/libmpdec/basearith.c \ third_party/python/Modules/_decimal/libmpdec/constants.c \ third_party/python/Modules/_decimal/libmpdec/context.c \ diff --git a/third_party/python/Modules/_decimal/_decimal.c b/third_party/python/Modules/_decimal/_decimal.c index 0c0903a70..3940663c9 100644 --- a/third_party/python/Modules/_decimal/_decimal.c +++ b/third_party/python/Modules/_decimal/_decimal.c @@ -96,11 +96,6 @@ PYTHON_PROVIDE("_decimal.setcontext"); PYTHON_YOINK("numbers"); PYTHON_YOINK("collections"); -asm(".ident\t\"\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); - #if !defined(MPD_VERSION_HEX) || MPD_VERSION_HEX < 0x02040100 #error "libmpdec version >= 2.4.1 required" #endif diff --git a/third_party/python/Modules/_decimal/libmpdec/basearith.c b/third_party/python/Modules/_decimal/libmpdec/basearith.c index 893268e06..94c5835f6 100644 --- a/third_party/python/Modules/_decimal/libmpdec/basearith.c +++ b/third_party/python/Modules/_decimal/libmpdec/basearith.c @@ -31,11 +31,7 @@ #include "third_party/python/Modules/_decimal/libmpdec/constants.h" #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h" #include "third_party/python/Modules/_decimal/libmpdec/typearith.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); /*********************************************************************/ /* Calculations in base MPD_RADIX */ diff --git a/third_party/python/Modules/_decimal/libmpdec/constants.c b/third_party/python/Modules/_decimal/libmpdec/constants.c index 587482f19..7f85e4f2a 100644 --- a/third_party/python/Modules/_decimal/libmpdec/constants.c +++ b/third_party/python/Modules/_decimal/libmpdec/constants.c @@ -29,11 +29,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/python/Modules/_decimal/libmpdec/constants.h" #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); /* number-theory.c */ const mpd_uint_t mpd_moduli[3] = { diff --git a/third_party/python/Modules/_decimal/libmpdec/context.c b/third_party/python/Modules/_decimal/libmpdec/context.c index fff1b2dae..d57742f03 100644 --- a/third_party/python/Modules/_decimal/libmpdec/context.c +++ b/third_party/python/Modules/_decimal/libmpdec/context.c @@ -30,11 +30,7 @@ #include "libc/calls/calls.h" #include "libc/sysv/consts/sig.h" #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); void mpd_dflt_traphandler(mpd_context_t *ctx) diff --git a/third_party/python/Modules/_decimal/libmpdec/convolute.c b/third_party/python/Modules/_decimal/libmpdec/convolute.c index 25b85d128..89cd5db13 100644 --- a/third_party/python/Modules/_decimal/libmpdec/convolute.c +++ b/third_party/python/Modules/_decimal/libmpdec/convolute.c @@ -36,11 +36,7 @@ #include "third_party/python/Modules/_decimal/libmpdec/numbertheory.h" #include "third_party/python/Modules/_decimal/libmpdec/sixstep.h" #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); /* Bignum: Fast convolution using the Number Theoretic Transform. diff --git a/third_party/python/Modules/_decimal/libmpdec/crt.c b/third_party/python/Modules/_decimal/libmpdec/crt.c index e20acadc5..70db6f24f 100644 --- a/third_party/python/Modules/_decimal/libmpdec/crt.c +++ b/third_party/python/Modules/_decimal/libmpdec/crt.c @@ -31,11 +31,7 @@ #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h" #include "third_party/python/Modules/_decimal/libmpdec/numbertheory.h" #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); /* Bignum: Chinese Remainder Theorem, extends the maximum transform length. */ diff --git a/third_party/python/Modules/_decimal/libmpdec/difradix2.c b/third_party/python/Modules/_decimal/libmpdec/difradix2.c index ac59f54a6..c5f7b4fae 100644 --- a/third_party/python/Modules/_decimal/libmpdec/difradix2.c +++ b/third_party/python/Modules/_decimal/libmpdec/difradix2.c @@ -32,11 +32,7 @@ #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h" #include "third_party/python/Modules/_decimal/libmpdec/numbertheory.h" #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); /* Bignum: The actual transform routine (decimation in frequency). */ diff --git a/third_party/python/Modules/_decimal/libmpdec/fnt.c b/third_party/python/Modules/_decimal/libmpdec/fnt.c index 4c9b6147e..cad1f4abf 100644 --- a/third_party/python/Modules/_decimal/libmpdec/fnt.c +++ b/third_party/python/Modules/_decimal/libmpdec/fnt.c @@ -33,11 +33,7 @@ #include "third_party/python/Modules/_decimal/libmpdec/fnt.h" #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h" #include "third_party/python/Modules/_decimal/libmpdec/numbertheory.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); /* Bignum: Fast transform for medium-sized coefficients. */ diff --git a/third_party/python/Modules/_decimal/libmpdec/fourstep.c b/third_party/python/Modules/_decimal/libmpdec/fourstep.c index 60912059c..36d3bb815 100644 --- a/third_party/python/Modules/_decimal/libmpdec/fourstep.c +++ b/third_party/python/Modules/_decimal/libmpdec/fourstep.c @@ -33,11 +33,7 @@ #include "third_party/python/Modules/_decimal/libmpdec/sixstep.h" #include "third_party/python/Modules/_decimal/libmpdec/transpose.h" #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); /* Cache Efficient Matrix Fourier Transform diff --git a/third_party/python/Modules/_decimal/libmpdec/io.c b/third_party/python/Modules/_decimal/libmpdec/io.c index bb8a92fc6..00a7af879 100644 --- a/third_party/python/Modules/_decimal/libmpdec/io.c +++ b/third_party/python/Modules/_decimal/libmpdec/io.c @@ -35,16 +35,12 @@ #include "third_party/python/Modules/_decimal/libmpdec/io.h" #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h" #include "third_party/python/Modules/_decimal/libmpdec/typearith.h" +__static_yoink("libmpdec_notice"); #if __GNUC__ >= 11 #pragma GCC diagnostic ignored "-Wmisleading-indentation" #endif -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); - /* This file contains functions for decimal <-> string conversions, including PEP-3101 formatting for numeric types. */ diff --git a/third_party/python/Modules/_decimal/libmpdec/memory.c b/third_party/python/Modules/_decimal/libmpdec/memory.c index e194159fb..98914f293 100644 --- a/third_party/python/Modules/_decimal/libmpdec/memory.c +++ b/third_party/python/Modules/_decimal/libmpdec/memory.c @@ -31,11 +31,7 @@ #include "third_party/python/Modules/_decimal/libmpdec/mpalloc.h" #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h" #include "third_party/python/Modules/_decimal/libmpdec/typearith.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); /* Guaranteed minimum allocation for a coefficient. May be changed once at program start using mpd_setminalloc(). */ diff --git a/third_party/python/Modules/_decimal/libmpdec/mpdecimal.c b/third_party/python/Modules/_decimal/libmpdec/mpdecimal.c index e1a818f36..e93eee8a2 100644 --- a/third_party/python/Modules/_decimal/libmpdec/mpdecimal.c +++ b/third_party/python/Modules/_decimal/libmpdec/mpdecimal.c @@ -36,11 +36,7 @@ #include "third_party/python/Modules/_decimal/libmpdec/mpalloc.h" #include "third_party/python/Modules/_decimal/libmpdec/typearith.h" #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); #define MPD_NEWTONDIV_CUTOFF 1024L diff --git a/third_party/python/Modules/_decimal/libmpdec/notice.c b/third_party/python/Modules/_decimal/libmpdec/notice.c new file mode 100644 index 000000000..fd4f0fbe1 --- /dev/null +++ b/third_party/python/Modules/_decimal/libmpdec/notice.c @@ -0,0 +1,3 @@ +__notice(libmpdec_notice, "\ +libmpdec (BSD-2)\n\ +Copyright 2008-2016 Stefan Krah"); diff --git a/third_party/python/Modules/_decimal/libmpdec/numbertheory.c b/third_party/python/Modules/_decimal/libmpdec/numbertheory.c index 1551a5968..604d29f5c 100644 --- a/third_party/python/Modules/_decimal/libmpdec/numbertheory.c +++ b/third_party/python/Modules/_decimal/libmpdec/numbertheory.c @@ -31,11 +31,7 @@ #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h" #include "third_party/python/Modules/_decimal/libmpdec/numbertheory.h" #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); /* Bignum: Initialize the Number Theoretic Transform. */ diff --git a/third_party/python/Modules/_decimal/libmpdec/sixstep.c b/third_party/python/Modules/_decimal/libmpdec/sixstep.c index 06e8ba071..16d3e3f28 100644 --- a/third_party/python/Modules/_decimal/libmpdec/sixstep.c +++ b/third_party/python/Modules/_decimal/libmpdec/sixstep.c @@ -34,11 +34,7 @@ #include "third_party/python/Modules/_decimal/libmpdec/sixstep.h" #include "third_party/python/Modules/_decimal/libmpdec/transpose.h" #include "third_party/python/Modules/_decimal/libmpdec/umodarith.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); /* Cache Efficient Matrix Fourier Transform diff --git a/third_party/python/Modules/_decimal/libmpdec/transpose.c b/third_party/python/Modules/_decimal/libmpdec/transpose.c index 4419b6063..ceb3d9f63 100644 --- a/third_party/python/Modules/_decimal/libmpdec/transpose.c +++ b/third_party/python/Modules/_decimal/libmpdec/transpose.c @@ -34,11 +34,7 @@ #include "third_party/python/Modules/_decimal/libmpdec/constants.h" #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h" #include "third_party/python/Modules/_decimal/libmpdec/typearith.h" - -asm(".ident\t\"\\n\\n\ -libmpdec (BSD-2)\\n\ -Copyright 2008-2016 Stefan Krah\""); -asm(".include \"libc/disclaimer.inc\""); +__static_yoink("libmpdec_notice"); #define BUFSIZE 4096 #define SIDE 128 diff --git a/third_party/python/Modules/_randommodule.c b/third_party/python/Modules/_randommodule.c index 51fea4586..bafdf9fae 100644 --- a/third_party/python/Modules/_randommodule.c +++ b/third_party/python/Modules/_randommodule.c @@ -25,10 +25,8 @@ PYTHON_PROVIDE("_random"); PYTHON_PROVIDE("_random.Random"); -asm(".ident\t\"\\n\\n\ -mt19937 (BSD-3)\\n\ -Copyright 1997-2004 Makoto Matsumoto and Takuji Nishimura\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(python_mt19937_notice, "Python mt19937 (BSD-3)\n\ +Copyright 1997-2004 Makoto Matsumoto and Takuji Nishimura"); /* ------------------------------------------------------------------ The code in this module was based on a download from: diff --git a/third_party/python/Modules/_sqlite/cache.c b/third_party/python/Modules/_sqlite/cache.c index 3076e68f8..f88969fde 100644 --- a/third_party/python/Modules/_sqlite/cache.c +++ b/third_party/python/Modules/_sqlite/cache.c @@ -25,11 +25,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/python/Modules/_sqlite/cache.h" -asm(".ident\t\"\\n\\n\ -pysqlite (zlib license)\\n\ -Copyright (C) 2005-2010 Gerhard Häring \""); -asm(".include \"libc/disclaimer.inc\""); - /* only used internally */ pysqlite_Node* pysqlite_new_node(PyObject* key, PyObject* data) { diff --git a/third_party/python/Modules/_sqlite/connection.c b/third_party/python/Modules/_sqlite/connection.c index b1c3bb638..38651adbb 100644 --- a/third_party/python/Modules/_sqlite/connection.c +++ b/third_party/python/Modules/_sqlite/connection.c @@ -36,11 +36,6 @@ PYTHON_YOINK("sqlite3.dump"); -asm(".ident\t\"\\n\\n\ -pysqlite (zlib license)\\n\ -Copyright (C) 2005-2010 Gerhard Häring \""); -asm(".include \"libc/disclaimer.inc\""); - #define ACTION_FINALIZE 1 #define ACTION_RESET 2 diff --git a/third_party/python/Modules/_sqlite/cursor.c b/third_party/python/Modules/_sqlite/cursor.c index cc2c5e8f3..c4fc23f91 100644 --- a/third_party/python/Modules/_sqlite/cursor.c +++ b/third_party/python/Modules/_sqlite/cursor.c @@ -27,11 +27,6 @@ #include "third_party/python/Modules/_sqlite/module.h" #include "third_party/python/Modules/_sqlite/util.h" -asm(".ident\t\"\\n\\n\ -pysqlite (zlib license)\\n\ -Copyright (C) 2005-2010 Gerhard Häring \""); -asm(".include \"libc/disclaimer.inc\""); - PyObject* pysqlite_cursor_iternext(pysqlite_Cursor* self); static const char errmsg_fetch_across_rollback[] = "Cursor needed to be reset because of commit/rollback and can no longer be fetched from."; diff --git a/third_party/python/Modules/_sqlite/microprotocols.c b/third_party/python/Modules/_sqlite/microprotocols.c index 168b931be..eecada5b3 100644 --- a/third_party/python/Modules/_sqlite/microprotocols.c +++ b/third_party/python/Modules/_sqlite/microprotocols.c @@ -27,11 +27,6 @@ #include "third_party/python/Modules/_sqlite/microprotocols.h" #include "third_party/python/Modules/_sqlite/prepare_protocol.h" -asm(".ident\t\"\\n\\n\ -pysqlite (zlib license)\\n\ -Copyright (C) 2005-2010 Gerhard Häring \""); -asm(".include \"libc/disclaimer.inc\""); - /** the adapters registry **/ PyObject *psyco_adapters; diff --git a/third_party/python/Modules/_sqlite/module.c b/third_party/python/Modules/_sqlite/module.c index bf031a97f..63926be5c 100644 --- a/third_party/python/Modules/_sqlite/module.c +++ b/third_party/python/Modules/_sqlite/module.c @@ -32,12 +32,11 @@ #include "third_party/python/Modules/_sqlite/row.h" #include "third_party/python/Modules/_sqlite/statement.h" -PYTHON_PROVIDE("_sqlite3"); +__notice(pysqlite_notice, "\ +pysqlite (zlib license)\n\ +Copyright (C) 2005-2010 Gerhard Häring "); -asm(".ident\t\"\\n\\n\ -pysqlite (zlib license)\\n\ -Copyright (C) 2005-2010 Gerhard Häring \""); -asm(".include \"libc/disclaimer.inc\""); +PYTHON_PROVIDE("_sqlite3"); /* #if SQLITE_VERSION_NUMBER >= 3003003 */ /* #define HAVE_SHARED_CACHE */ diff --git a/third_party/python/Modules/_sqlite/prepare_protocol.c b/third_party/python/Modules/_sqlite/prepare_protocol.c index eafb29baf..5b110e9fd 100644 --- a/third_party/python/Modules/_sqlite/prepare_protocol.c +++ b/third_party/python/Modules/_sqlite/prepare_protocol.c @@ -25,11 +25,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/python/Modules/_sqlite/prepare_protocol.h" -asm(".ident\t\"\\n\\n\ -pysqlite (zlib license)\\n\ -Copyright (C) 2005-2010 Gerhard Häring \""); -asm(".include \"libc/disclaimer.inc\""); - int pysqlite_prepare_protocol_init(pysqlite_PrepareProtocol* self, PyObject* args, PyObject* kwargs) { return 0; diff --git a/third_party/python/Modules/_sqlite/row.c b/third_party/python/Modules/_sqlite/row.c index b23f1afc3..be06b9213 100644 --- a/third_party/python/Modules/_sqlite/row.c +++ b/third_party/python/Modules/_sqlite/row.c @@ -26,11 +26,6 @@ #include "third_party/python/Modules/_sqlite/cursor.h" #include "third_party/python/Modules/_sqlite/row.h" -asm(".ident\t\"\\n\\n\ -pysqlite (zlib license)\\n\ -Copyright (C) 2005-2010 Gerhard Häring \""); -asm(".include \"libc/disclaimer.inc\""); - void pysqlite_row_dealloc(pysqlite_Row* self) { Py_XDECREF(self->data); diff --git a/third_party/python/Modules/_sqlite/statement.c b/third_party/python/Modules/_sqlite/statement.c index 066a6e870..381d97d63 100644 --- a/third_party/python/Modules/_sqlite/statement.c +++ b/third_party/python/Modules/_sqlite/statement.c @@ -30,11 +30,6 @@ #include "third_party/python/Modules/_sqlite/statement.h" #include "third_party/python/Modules/_sqlite/util.h" -asm(".ident\t\"\\n\\n\ -pysqlite (zlib license)\\n\ -Copyright (C) 2005-2010 Gerhard Häring \""); -asm(".include \"libc/disclaimer.inc\""); - /* prototypes */ static int pysqlite_check_remaining_sql(const char* tail); diff --git a/third_party/python/Modules/_sqlite/util.c b/third_party/python/Modules/_sqlite/util.c index 7eae8f8a3..ab53adb08 100644 --- a/third_party/python/Modules/_sqlite/util.c +++ b/third_party/python/Modules/_sqlite/util.c @@ -26,11 +26,6 @@ #include "third_party/python/Modules/_sqlite/connection.h" #include "third_party/python/Modules/_sqlite/module.h" -asm(".ident\t\"\\n\\n\ -pysqlite (zlib license)\\n\ -Copyright (C) 2005-2010 Gerhard Häring \""); -asm(".include \"libc/disclaimer.inc\""); - int pysqlite_step(sqlite3_stmt* statement, pysqlite_Connection* connection) { int rc; diff --git a/third_party/python/Modules/_sre.c b/third_party/python/Modules/_sre.c index da31f9cdd..a786dbdb4 100644 --- a/third_party/python/Modules/_sre.c +++ b/third_party/python/Modules/_sre.c @@ -39,10 +39,9 @@ PYTHON_PROVIDE("_sre.compile"); PYTHON_PROVIDE("_sre.getcodesize"); PYTHON_PROVIDE("_sre.getlower"); -asm(".ident\t\"\\n\\n\ -SRE 2.2.2 (Python license)\\n\ -Copyright 1997-2002 Secret Labs AB\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(python_sre_notice, "\ +SRE 2.2.2 (Python license)\n\ +Copyright 1997-2002 Secret Labs AB"); /* * Secret Labs' Regular Expression Engine diff --git a/third_party/python/Modules/expat/xmlparse.c b/third_party/python/Modules/expat/xmlparse.c index 6bfce3613..a6ba69a5b 100644 --- a/third_party/python/Modules/expat/xmlparse.c +++ b/third_party/python/Modules/expat/xmlparse.c @@ -46,11 +46,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ -asm(".ident\t\"\\n\\n\ -expat (MIT License)\\n\ -Copyright (c) 1997-2000 Thai Open Source Software Center Ltd\\n\ -Copyright (c) 2000-2017 Expat development team\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(expat_notice, "expat (MIT License)\n\ +Copyright (c) 1997-2000 Thai Open Source Software Center Ltd\n\ +Copyright (c) 2000-2017 Expat development team"); #define XML_BUILDING_EXPAT 1 diff --git a/third_party/python/Modules/expat/xmlrole.c b/third_party/python/Modules/expat/xmlrole.c index 6ce8885db..401329321 100644 --- a/third_party/python/Modules/expat/xmlrole.c +++ b/third_party/python/Modules/expat/xmlrole.c @@ -30,12 +30,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ -asm(".ident\t\"\\n\\n\ -expat (MIT License)\\n\ -Copyright (c) 1997-2000 Thai Open Source Software Center Ltd\\n\ -Copyright (c) 2000-2017 Expat development team\""); -asm(".include \"libc/disclaimer.inc\""); - #include "third_party/python/Modules/expat/expat_config.h" #include "third_party/python/Modules/expat/expat_external.h" #include "third_party/python/Modules/expat/internal.inc" diff --git a/third_party/python/Modules/expat/xmltok.c b/third_party/python/Modules/expat/xmltok.c index 149b12e10..6f1e6df95 100644 --- a/third_party/python/Modules/expat/xmltok.c +++ b/third_party/python/Modules/expat/xmltok.c @@ -30,12 +30,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ -asm(".ident\t\"\\n\\n\ -expat (MIT License)\\n\ -Copyright (c) 1997-2000 Thai Open Source Software Center Ltd\\n\ -Copyright (c) 2000-2017 Expat development team\""); -asm(".include \"libc/disclaimer.inc\""); - #include "third_party/python/Modules/expat/expat_config.h" #include "third_party/python/Modules/expat/expat_external.h" #include "third_party/python/Modules/expat/internal.inc" diff --git a/third_party/python/Modules/hashtable.c b/third_party/python/Modules/hashtable.c index fbed762c3..335ed3b17 100644 --- a/third_party/python/Modules/hashtable.c +++ b/third_party/python/Modules/hashtable.c @@ -8,10 +8,9 @@ #include "third_party/python/Include/pymem.h" #include "third_party/python/Modules/hashtable.h" -asm(".ident\t\"\\n\\n\ -cfuhash (bsd-3)\\n\ -Copyright (c) 2005 Don Owens\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(cfuhash_notice, "\ +cfuhash (bsd-3)\n\ +Copyright (c) 2005 Don Owens"); /* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash project: diff --git a/third_party/python/Modules/parsermodule.c b/third_party/python/Modules/parsermodule.c index 001054863..cd9c7fa22 100644 --- a/third_party/python/Modules/parsermodule.c +++ b/third_party/python/Modules/parsermodule.c @@ -40,12 +40,13 @@ PYTHON_PROVIDE("parser.st2tuple"); PYTHON_PROVIDE("parser.suite"); PYTHON_PROVIDE("parser.tuple2st"); -asm(".ident\t\"\\n\\n\ -parsermodule (Python license)\\n\ -Copyright 1995-1996 by Virginia Polytechnic Institute & State\\n\ -University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\\n\ -Virginia, USA. Portions copyright 1991-1995 by Stichting Mathematisch\\n\ -Centrum, Amsterdam, The Netherlands.\""); +__notice(parsermodule_notice, "\ +parsermodule (Python license)\n\ +Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\ +University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\ +Virginia, USA. Portions copyright 1991-1995 by Stichting Mathematisch\n\ +Centrum, Amsterdam, The Netherlands."); + static const char parser_copyright_string[] = "Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\ University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\ diff --git a/third_party/python/Modules/syslogmodule.c b/third_party/python/Modules/syslogmodule.c index 6d2e4a916..31c66ba9b 100644 --- a/third_party/python/Modules/syslogmodule.c +++ b/third_party/python/Modules/syslogmodule.c @@ -59,11 +59,10 @@ PYTHON_PROVIDE("syslog.openlog"); PYTHON_PROVIDE("syslog.setlogmask"); PYTHON_PROVIDE("syslog.syslog"); -asm(".ident\t\"\\n\\n\ -syslogmodule (mit)\\n\ -Copyright 1994 by Lance Ellinghouse\\n\ -Cathedral City, California Republic, United States of America\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(python_syslogmodule_notice, "\ +syslogmodule (MIT License)\n\ +Copyright 1994 by Lance Ellinghouse\n\ +Cathedral City, California Republic, United States of America"); /*********************************************************** Copyright 1994 by Lance Ellinghouse, diff --git a/third_party/python/Modules/unicodedata_3.2.0.c b/third_party/python/Modules/unicodedata_3.2.0.c index 49e4df3a5..b2d967577 100644 --- a/third_party/python/Modules/unicodedata_3.2.0.c +++ b/third_party/python/Modules/unicodedata_3.2.0.c @@ -432,12 +432,10 @@ static const unsigned char _PyUnicode_ChangeIndex_3_2_0_rodata[365+1][2] = { /* { 2, 0x02}, {0}, }; +__attribute__((__constructor__(99))) static textstartup void _PyUnicode_ChangeIndex_3_2_0_init(void) { rldecode2(_PyUnicode_ChangeIndex_3_2_0, (void *)_PyUnicode_ChangeIndex_3_2_0_rodata); } -const void *const _PyUnicode_ChangeIndex_3_2_0_ctor[] initarray = { - _PyUnicode_ChangeIndex_3_2_0_init, -}; unsigned char _PyUnicode_ChangeData_3_2_0[31360]; static const unsigned char _PyUnicode_ChangeData_3_2_0_rodata[1390+1][2] = { /* 8.8648% profit */ @@ -1833,12 +1831,10 @@ static const unsigned char _PyUnicode_ChangeData_3_2_0_rodata[1390+1][2] = { /* { 16, 0x00}, {0}, }; +__attribute__((__constructor__(99))) static textstartup void _PyUnicode_ChangeData_3_2_0_init(void) { rldecode2(_PyUnicode_ChangeData_3_2_0, (void *)_PyUnicode_ChangeData_3_2_0_rodata); } -const void *const _PyUnicode_ChangeData_3_2_0_ctor[] initarray = { - _PyUnicode_ChangeData_3_2_0_init, -}; const _PyUnicode_ChangeRecord *_PyUnicode_GetChange_3_2_0(Py_UCS4 n) { diff --git a/third_party/python/Modules/unicodedata_comp.c b/third_party/python/Modules/unicodedata_comp.c index 9b2c142fe..95706fd0c 100644 --- a/third_party/python/Modules/unicodedata_comp.c +++ b/third_party/python/Modules/unicodedata_comp.c @@ -1152,6 +1152,7 @@ static const unsigned short _PyUnicode_CompIndex_rodata[1146+1][2] = { /* 38.598 { 1, 0x2a9}, {0}, }; +__attribute__((__constructor__(99))) static textstartup void _PyUnicode_CompIndex_init(void) { int i, j, k; for (k = i = 0; i < 1146; ++i) { @@ -1160,9 +1161,6 @@ static textstartup void _PyUnicode_CompIndex_init(void) { } } } -const void *const _PyUnicode_CompIndex_ctor[] initarray = { - _PyUnicode_CompIndex_init, -}; const unsigned int _PyUnicode_CompData[1449] = { 0, 0, 0, 922746880, 17, 9011200, 3758096384, 1101, 0, 50594176, diff --git a/third_party/python/Modules/unicodedata_decomp.c b/third_party/python/Modules/unicodedata_decomp.c index ca8b27add..8fbcf188f 100644 --- a/third_party/python/Modules/unicodedata_decomp.c +++ b/third_party/python/Modules/unicodedata_decomp.c @@ -1446,12 +1446,10 @@ static const unsigned char _PyUnicode_DecompIndex1_rodata[159+1][2] = { /* 3.653 { 39, 0x00}, {0}, }; +__attribute__((__constructor__(99))) static textstartup void _PyUnicode_DecompIndex1_init(void) { rldecode2(_PyUnicode_DecompIndex1, (void *)_PyUnicode_DecompIndex1_rodata); } -const void *const _PyUnicode_DecompIndex1_ctor[] initarray = { - _PyUnicode_DecompIndex1_init, -}; const unsigned short _PyUnicode_DecompIndex2[13312] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, diff --git a/third_party/python/Modules/unicodedata_phrasebook.c b/third_party/python/Modules/unicodedata_phrasebook.c index dfa0630fc..702ef6089 100644 --- a/third_party/python/Modules/unicodedata_phrasebook.c +++ b/third_party/python/Modules/unicodedata_phrasebook.c @@ -11647,12 +11647,10 @@ static const unsigned char _PyUnicode_PhrasebookOffset1_rodata[206+1][2] = { /* {253, 0x34}, {0}, }; +__attribute__((__constructor__(99))) static textstartup void _PyUnicode_PhrasebookOffset1_init(void) { rldecode2(_PyUnicode_PhrasebookOffset1, (void *)_PyUnicode_PhrasebookOffset1_rodata); } -const void *const _PyUnicode_PhrasebookOffset1_ctor[] initarray = { - _PyUnicode_PhrasebookOffset1_init, -}; const unsigned int _PyUnicode_PhrasebookOffset2[25056] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 786433, 37748832, diff --git a/third_party/python/Modules/unicodedata_records.c b/third_party/python/Modules/unicodedata_records.c index 9c7b8ba51..7c08e5974 100644 --- a/third_party/python/Modules/unicodedata_records.c +++ b/third_party/python/Modules/unicodedata_records.c @@ -674,6 +674,7 @@ static const unsigned short _PyUnicode_RecordsIndex1_rodata[323+1][2] = { /* 7.4 { 1, 0x10a}, {0}, }; +__attribute__((__constructor__(99))) static textstartup void _PyUnicode_RecordsIndex1_init(void) { int i, j, k; for (k = i = 0; i < 323; ++i) { @@ -682,9 +683,6 @@ static textstartup void _PyUnicode_RecordsIndex1_init(void) { } } } -const void *const _PyUnicode_RecordsIndex1_ctor[] initarray = { - _PyUnicode_RecordsIndex1_init, -}; unsigned short _PyUnicode_RecordsIndex2[34176]; static const unsigned short _PyUnicode_RecordsIndex2_rodata[4990+1][2] = { /* 29.2018% profit */ @@ -5680,6 +5678,7 @@ static const unsigned short _PyUnicode_RecordsIndex2_rodata[4990+1][2] = { /* 29 { 2, 0x00}, {0}, }; +__attribute__((__constructor__(99))) static textstartup void _PyUnicode_RecordsIndex2_init(void) { int i, j, k; for (k = i = 0; i < 4990; ++i) { @@ -5688,7 +5687,3 @@ static textstartup void _PyUnicode_RecordsIndex2_init(void) { } } } -const void *const _PyUnicode_RecordsIndex2_ctor[] initarray = { - _PyUnicode_RecordsIndex2_init, -}; - diff --git a/third_party/python/Modules/unicodedata_typerecords.c b/third_party/python/Modules/unicodedata_typerecords.c index afa05895c..864ba3b4c 100644 --- a/third_party/python/Modules/unicodedata_typerecords.c +++ b/third_party/python/Modules/unicodedata_typerecords.c @@ -881,6 +881,7 @@ static const unsigned short _PyUnicode_TypeRecordsIndex1_rodata[370+1][2] = { /* { 1, 0x112}, {0}, }; +__attribute__((__constructor__(99))) static textstartup void _PyUnicode_TypeRecordsIndex1_init(void) { int i, j, k; for (k = i = 0; i < 370; ++i) { @@ -889,9 +890,6 @@ static textstartup void _PyUnicode_TypeRecordsIndex1_init(void) { } } } -const void *const _PyUnicode_TypeRecordsIndex1_ctor[] initarray = { - _PyUnicode_TypeRecordsIndex1_init, -}; unsigned short _PyUnicode_TypeRecordsIndex2[35200]; static const unsigned short _PyUnicode_TypeRecordsIndex2_rodata[4889+1][2] = { /* 27.7784% profit */ @@ -5786,6 +5784,7 @@ static const unsigned short _PyUnicode_TypeRecordsIndex2_rodata[4889+1][2] = { / { 2, 0x00}, {0}, }; +__attribute__((__constructor__(99))) static textstartup void _PyUnicode_TypeRecordsIndex2_init(void) { int i, j, k; for (k = i = 0; i < 4889; ++i) { @@ -5794,7 +5793,3 @@ static textstartup void _PyUnicode_TypeRecordsIndex2_init(void) { } } } -const void *const _PyUnicode_TypeRecordsIndex2_ctor[] initarray = { - _PyUnicode_TypeRecordsIndex2_init, -}; - diff --git a/third_party/python/Python/getcopyright.c b/third_party/python/Python/getcopyright.c index 9ec5beadc..233148714 100644 --- a/third_party/python/Python/getcopyright.c +++ b/third_party/python/Python/getcopyright.c @@ -10,16 +10,16 @@ #include "libc/str/str.h" #include "third_party/python/Include/pylifecycle.h" -asm(".ident\t\"\\n\\n\ -Python 3.6 (https://docs.python.org/3/license.html)\\n\ -Copyright (c) 2001-2021 Python Software Foundation.\\n\ -All Rights Reserved.\\n\ -Copyright (c) 2000 BeOpen.com.\\n\ -All Rights Reserved.\\n\ -Copyright (c) 1995-2001 Corporation for National Research Initiatives.\\n\ -All Rights Reserved.\\n\ -Copyright (c) 1991-1995 Stichting Mathematisch Centrum, Amsterdam.\\n\ -All Rights Reserved.\""); +__notice(python_notice, "\ +Python 3.6 (https://docs.python.org/3/license.html)\n\ +Copyright (c) 2001-2021 Python Software Foundation.\n\ +All Rights Reserved.\n\ +Copyright (c) 2000 BeOpen.com.\n\ +All Rights Reserved.\n\ +Copyright (c) 1995-2001 Corporation for National Research Initiatives.\n\ +All Rights Reserved.\n\ +Copyright (c) 1991-1995 Stichting Mathematisch Centrum, Amsterdam.\n\ +All Rights Reserved."); const char * Py_GetCopyright(void) @@ -29,7 +29,7 @@ Py_GetCopyright(void) char *r = 0; const char *p; appends(&r, ""); - for (p = __comment_start; *p; p += strlen(p) + 1) { + for (p = __notices; *p; p += strlen(p) + 1) { appends(&r, p); } res = r; diff --git a/third_party/python/Python/getopt.c b/third_party/python/Python/getopt.c index c54c7aa68..24aa6b49b 100644 --- a/third_party/python/Python/getopt.c +++ b/third_party/python/Python/getopt.c @@ -8,9 +8,9 @@ #include "libc/str/str.h" #include "third_party/python/Include/pygetopt.h" -asm(".ident\t\"\\n\\n\ -python getopt (isc license)\\n\ -Copyright 1992-1994 David Gottner\""); +__notice(python_getopt_notice, "\ +python getopt (isc license)\n\ +Copyright 1992-1994 David Gottner"); /*---------------------------------------------------------------------------* * diff --git a/third_party/python/Python/sysmodule.c b/third_party/python/Python/sysmodule.c index 9cf5f5cf1..8b0c6edc7 100644 --- a/third_party/python/Python/sysmodule.c +++ b/third_party/python/Python/sysmodule.c @@ -2013,6 +2013,7 @@ _PySys_Init(void) PyBool_FromLong(Py_DontWriteBytecodeFlag)); SET_SYS_FROM_STRING("api_version", PyLong_FromLong(PYTHON_API_VERSION)); + // asm("int3"); SET_SYS_FROM_STRING("copyright", PyUnicode_FromString(Py_GetCopyright())); SET_SYS_FROM_STRING("platform", diff --git a/third_party/regex/notice.inc b/third_party/regex/notice.inc deleted file mode 100644 index d154d1146..000000000 --- a/third_party/regex/notice.inc +++ /dev/null @@ -1,10 +0,0 @@ -asm(".ident\t\"\\n\ -Musl Libc (MIT License)\\n\ -Copyright 2005-2014 Rich Felker\""); -asm(".include \"libc/disclaimer.inc\""); - -asm(".ident\t\"\\n\ -TRE regex (BSD-2 License)\\n\ -Copyright 2001-2009 Ville Laurikari \\n\ -Copyright 2016 Szabolcs Nagy\""); -asm(".include \"libc/disclaimer.inc\""); diff --git a/third_party/regex/regexec.c b/third_party/regex/regexec.c index 81f722087..fd4b4446f 100644 --- a/third_party/regex/regexec.c +++ b/third_party/regex/regexec.c @@ -60,6 +60,11 @@ #include "libc/limits.h" #include "third_party/regex/tre.inc" +__notice(tre_regex, "\ +TRE regex (BSD-2 License)\n\ +Copyright 2001-2009 Ville Laurikari \n\ +Copyright 2016 Szabolcs Nagy"); + static void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo); diff --git a/third_party/regex/tre.inc b/third_party/regex/tre.inc index 2c26396d9..2f6e39854 100644 --- a/third_party/regex/tre.inc +++ b/third_party/regex/tre.inc @@ -60,7 +60,6 @@ #include "libc/mem/alg.h" #include "libc/mem/mem.h" #include "libc/str/str.h" -#include "third_party/regex/notice.inc" #include "third_party/regex/regex.h" #undef TRE_MBSTATE diff --git a/third_party/stb/stb_image.c b/third_party/stb/stb_image.c index 6852c3adc..f796a034b 100644 --- a/third_party/stb/stb_image.c +++ b/third_party/stb/stb_image.c @@ -36,10 +36,10 @@ #include "third_party/aarch64/arm_neon.internal.h" #include "third_party/intel/ammintrin.internal.h" -asm(".ident\t\"\\n\\n\ -stb_image (Public Domain)\\n\ -Credit: Sean Barrett, et al.\\n\ -http://nothings.org/stb\""); +__notice(stb_image_notice, "\ +stb_image (Public Domain)\n\ +Credit: Sean Barrett, et al.\n\ +http://nothings.org/stb"); #ifdef __x86_64__ #define STBI_SSE2 diff --git a/third_party/stb/stb_image_resize.c b/third_party/stb/stb_image_resize.c index 24cb373e5..7fc71a33e 100644 --- a/third_party/stb/stb_image_resize.c +++ b/third_party/stb/stb_image_resize.c @@ -23,10 +23,10 @@ #include "libc/mem/mem.h" #include "libc/str/str.h" -asm(".ident\t\"\\n\\n\ -stb_image_resize (Public Domain)\\n\ -Credit: Jorge L Rodriguez (@VinoBS), Sean Barrett, et al.\\n\ -http://nothings.org/stb\""); +__notice(stb_image_resize_notice, "\ +stb_image_resize (Public Domain)\n\ +Credit: Jorge L Rodriguez (@VinoBS), Sean Barrett, et al.\n\ +http://nothings.org/stb"); #define STBIR_ASSERT(x) assert(x) #define STBIR_MALLOC(size, c) ((void)(c), malloc(size)) diff --git a/third_party/stb/stb_image_write.c b/third_party/stb/stb_image_write.c index 1f984bc78..9af55ae36 100644 --- a/third_party/stb/stb_image_write.c +++ b/third_party/stb/stb_image_write.c @@ -29,10 +29,10 @@ #include "libc/str/str.h" #include "third_party/zlib/zlib.h" -asm(".ident\t\"\\n\\n\ -stb_image_write (Public Domain)\\n\ -Credit: Sean Barrett, et al.\\n\ -http://nothings.org/stb\""); +__notice(stb_image_write_notice, "\ +stb_image_write (Public Domain)\n\ +Credit: Sean Barrett, et al.\n\ +http://nothings.org/stb"); #define STBIW_UCHAR(x) (unsigned char)((x)&0xff) diff --git a/third_party/stb/stb_rect_pack.c b/third_party/stb/stb_rect_pack.c index 65fab7afa..eb07e1e0d 100644 --- a/third_party/stb/stb_rect_pack.c +++ b/third_party/stb/stb_rect_pack.c @@ -21,10 +21,9 @@ #include "libc/dce.h" #include "libc/mem/alg.h" -asm(".ident\t\"\\n\\n\ -stb_rect_pack (MIT License)\\n\ -Copyright 2017 Sean Barrett\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(stb_rect_pack_notice, "\ +stb_rect_pack (MIT License)\n\ +Copyright 2017 Sean Barrett"); // stb_rect_pack.h - v1.01 - public domain - rectangle packing // Sean Barrett 2014 diff --git a/third_party/stb/stb_truetype.c b/third_party/stb/stb_truetype.c index a59346df3..e1449c11b 100644 --- a/third_party/stb/stb_truetype.c +++ b/third_party/stb/stb_truetype.c @@ -36,10 +36,9 @@ #include "libc/str/str.h" #include "third_party/stb/stb_rect_pack.h" -asm(".ident\t\"\\n\\n\ -stb_truetype (MIT License)\\n\ -Copyright 2017 Sean Barrett\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(stb_truetype_notice, "\ +stb_truetype (MIT License)\n\ +Copyright 2017 Sean Barrett"); // stb_truetype.h - v1.26 - public domain // authored from 2009-2021 by Sean Barrett / RAD Game Tools diff --git a/third_party/stb/stb_vorbis.c b/third_party/stb/stb_vorbis.c index 3b3da48f3..221dee242 100644 --- a/third_party/stb/stb_vorbis.c +++ b/third_party/stb/stb_vorbis.c @@ -46,10 +46,10 @@ #include "libc/mem/mem.h" #include "libc/str/str.h" -asm(".ident\t\"\\n\\n\ -stb_vorbis (Public Domain)\\n\ -Credit: Sean Barrett, et al.\\n\ -http://nothings.org/stb\""); +__notice(stb_vorbis_notice, "\ +stb_vorbis (Public Domain)\n\ +Credit: Sean Barrett, et al.\n\ +http://nothings.org/stb"); // STB_VORBIS_NO_PUSHDATA_API // does not compile the code for the various stb_vorbis_*_pushdata() diff --git a/third_party/xed/x86features.c b/third_party/xed/x86features.c index 3d8fca26c..12cebdf6e 100644 --- a/third_party/xed/x86features.c +++ b/third_party/xed/x86features.c @@ -18,13 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "third_party/xed/x86isa.h" -asm(".ident\t\"\\n\\n\ -Xed (Apache 2.0)\\n\ -Copyright 2018 Intel Corporation\\n\ -Copyright 2019 Justine Alexandra Roberts Tunney\\n\ -Modifications: Trimmed down to 3kb [2019-03-22 jart]\""); -asm(".include \"libc/disclaimer.inc\""); - /** * Mapping of enum XedChip -> bitset. * diff --git a/third_party/xed/x86ild.greg.c b/third_party/xed/x86ild.greg.c index 3e76b71af..5bcb2d927 100644 --- a/third_party/xed/x86ild.greg.c +++ b/third_party/xed/x86ild.greg.c @@ -28,12 +28,11 @@ #include "third_party/xed/private.h" #include "third_party/xed/x86.h" -asm(".ident\t\"\\n\\n\ -Xed (Apache 2.0)\\n\ -Copyright 2018 Intel Corporation\\n\ -Copyright 2019 Justine Alexandra Roberts Tunney\\n\ -Modifications: Trimmed down to 3kb [2019-03-22 jart]\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(xed_notice, "\ +Xed (Apache 2.0)\n\ +Copyright 2018 Intel Corporation\n\ +Copyright 2019 Justine Alexandra Roberts Tunney\n\ +Changes: Trimmed Intel's assembler down to 3kb [2019-03-22 jart]"); #define XED_ILD_HASMODRM_IGNORE_MOD 2 diff --git a/third_party/xed/x86isa.c b/third_party/xed/x86isa.c index 9cc96240e..48b3af6bd 100644 --- a/third_party/xed/x86isa.c +++ b/third_party/xed/x86isa.c @@ -19,13 +19,6 @@ #include "third_party/xed/x86.h" #include "third_party/xed/x86isa.h" -asm(".ident\t\"\\n\\n\ -Xed (Apache 2.0)\\n\ -Copyright 2018 Intel Corporation\\n\ -Copyright 2019 Justine Alexandra Roberts Tunney\\n\ -Modifications: Trimmed down to 3kb [2019-03-22 jart]\""); -asm(".include \"libc/disclaimer.inc\""); - bool xed_isa_set_is_valid_for_chip(int isa_set, int chip) { unsigned n, r; n = isa_set / 64; diff --git a/third_party/zlib/adler32_simd.c b/third_party/zlib/adler32_simd.c index f767c49bd..b3d310fbb 100644 --- a/third_party/zlib/adler32_simd.c +++ b/third_party/zlib/adler32_simd.c @@ -1,6 +1,4 @@ -asm(".ident\t\"\\n\\n\ -Chromium (BSD-3 License)\\n\ -Copyright 2017 The Chromium Authors\""); +__static_yoink("chromium_notice"); /* adler32_simd.c * diff --git a/third_party/zlib/crc32_simd.c b/third_party/zlib/crc32_simd.c index cf38bb94b..a1b35ab56 100644 --- a/third_party/zlib/crc32_simd.c +++ b/third_party/zlib/crc32_simd.c @@ -5,10 +5,7 @@ * found in the Chromium source repository LICENSE file. */ -asm(".ident\t\"\\n\\n\ -Chromium (BSD-3 License)\\n\ -Copyright 2017 The Chromium Authors\""); - +__static_yoink("chromium_notice"); #include "third_party/intel/x86gprintrin.internal.h" #include "third_party/zlib/crc32_simd.internal.h" #if defined(CRC32_SIMD_AVX512_PCLMUL) diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c index 6db7b0c28..58f9474e7 100644 --- a/third_party/zlib/deflate.c +++ b/third_party/zlib/deflate.c @@ -11,11 +11,7 @@ #include "third_party/zlib/deflate.internal.h" #include "third_party/zlib/internal.h" #include "third_party/zlib/zutil.internal.h" - -asm(".ident\t\"\\n\\n\ -zlib 1.2.13 (zlib License)\\n\ -Copyright 1995-2022 Jean-loup Gailly and Mark Adler\\n\ -Invented 1990 Phillip Walter Katz\""); +__static_yoink("zlib_notice"); /* * ALGORITHM diff --git a/third_party/zlib/inffast_chunk.c b/third_party/zlib/inffast_chunk.c index 7ab74c011..a5302a49b 100644 --- a/third_party/zlib/inffast_chunk.c +++ b/third_party/zlib/inffast_chunk.c @@ -7,10 +7,7 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -asm(".ident\t\"\\n\\n\ -Chromium (BSD-3 License)\\n\ -Copyright 2017 The Chromium Authors\""); - +__static_yoink("chromium_notice"); #include "third_party/zlib/zutil.internal.h" #include "third_party/zlib/inftrees.internal.h" #include "third_party/zlib/inflate.internal.h" diff --git a/third_party/zlib/inflate.c b/third_party/zlib/inflate.c index 2d5a8c424..75fa6b56e 100644 --- a/third_party/zlib/inflate.c +++ b/third_party/zlib/inflate.c @@ -6,10 +6,7 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -asm(".ident\t\"\\n\\n\ -zlib 1.2.13 (zlib License)\\n\ -Copyright 1995-2022 Jean-loup Gailly and Mark Adler\\n\ -Invented 1990 Phillip Walter Katz\""); +__static_yoink("zlib_notice"); /* * Change history: diff --git a/third_party/zlib/inftrees.c b/third_party/zlib/inftrees.c index 4a628af76..c089e77b0 100644 --- a/third_party/zlib/inftrees.c +++ b/third_party/zlib/inftrees.c @@ -5,14 +5,10 @@ * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ +__static_yoink("zlib_notice"); #include "third_party/zlib/inftrees.internal.h" #include "third_party/zlib/zutil.internal.h" -asm(".ident\t\"\\n\\n\ -zlib 1.2.13 (zlib License)\\n\ -Copyright 1995-2022 Jean-loup Gailly and Mark Adler\\n\ -Invented 1990 Phillip Walter Katz\""); - #define MAXBITS 15 /* diff --git a/third_party/zlib/notice.c b/third_party/zlib/notice.c new file mode 100644 index 000000000..5f6ca928d --- /dev/null +++ b/third_party/zlib/notice.c @@ -0,0 +1,4 @@ +__notice(zlib_notice, "\ +zlib 1.2.13 (zlib License)\n\ +Copyright 1995-2022 Jean-loup Gailly and Mark Adler\n\ +Invented 1990 Phillip Walter Katz"); diff --git a/tool/build/fixupobj.c b/tool/build/fixupobj.c index 7cb391bce..a5b713b47 100644 --- a/tool/build/fixupobj.c +++ b/tool/build/fixupobj.c @@ -29,7 +29,6 @@ #include "libc/errno.h" #include "libc/fmt/itoa.h" #include "libc/fmt/magnumstrs.internal.h" -#include "libc/intrin/kprintf.h" #include "libc/limits.h" #include "libc/log/log.h" #include "libc/macros.internal.h" @@ -226,8 +225,56 @@ static void CheckPrivilegedCrossReferences(void) { } } +// Change AMD code to use %gs:0x30 instead of %fs:0 +// We assume -mno-tls-direct-seg-refs has been used +static void ChangeTlsFsToGs(unsigned char *p, size_t n) { + unsigned char *e = p + n - 9; + while (p <= e) { + // we're checking for the following expression: + // 0144 == p[0] && // %fs + // 0110 == (p[1] & 0373) && // rex.w (and ignore rex.r) + // (0213 == p[2] || // mov reg/mem → reg (word-sized) + // 0003 == p[2]) && // add reg/mem → reg (word-sized) + // 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg + // 0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32 + // 0000 == p[5] && // displacement (von Neumann endian) + // 0000 == p[6] && // displacement + // 0000 == p[7] && // displacement + // 0000 == p[8] // displacement + uint64_t w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377"); + if ((w == READ64LE("\144\110\213\004\045\000\000\000") || + w == READ64LE("\144\110\003\004\045\000\000\000")) && + !p[8]) { + p[0] = 0145; // change %fs to %gs + p[5] = 0x30; // change 0 to 0x30 + p += 9; + } else { + ++p; + } + } +} + +static void RewriteTlsCodeAmd64(void) { + int i; + uint8_t *p; + Elf64_Shdr *shdr; + for (i = 0; i < elf->e_shnum; ++i) { + if (!(shdr = GetElfSectionHeaderAddress(elf, esize, i))) { + Die("elf header overflow #1"); + } + if (shdr->sh_type == SHT_PROGBITS && // + (shdr->sh_flags & SHF_ALLOC) && // + (shdr->sh_flags & SHF_EXECINSTR)) { + if (!(p = GetElfSectionAddress(elf, esize, shdr))) { + Die("elf header overflow #2"); + } + ChangeTlsFsToGs(p, shdr->sh_size); + } + } +} + // Modify ARM64 code to use x28 for TLS rather than tpidr_el0. -static void RewriteTlsCode(void) { +static void RewriteTlsCodeArm64(void) { int i; Elf64_Shdr *shdr; uint32_t *p, *pe; @@ -594,10 +641,11 @@ static void FixupObject(void) { CheckPrivilegedCrossReferences(); if (mode == O_RDWR) { if (elf->e_machine == EM_NEXGEN32E) { + RewriteTlsCodeAmd64(); OptimizePatchableFunctionEntries(); GenerateIfuncInit(); } else if (elf->e_machine == EM_AARCH64) { - RewriteTlsCode(); + RewriteTlsCodeArm64(); if (elf->e_type != ET_REL) { UseFreebsdOsAbi(); } diff --git a/tool/cosmocc/README.md b/tool/cosmocc/README.md index 1b3aa124a..d55b9717e 100644 --- a/tool/cosmocc/README.md +++ b/tool/cosmocc/README.md @@ -9,12 +9,13 @@ reach a broader audience from the platform(s) of your choosing. ## What's Included -This toolchain bundles GCC 11.2.0, Cosmopolitan Libc, LLVM LIBCXX, and -LLVM compiler-rt. Additional libraries were provided by Musl Libc, and -the venerable BSDs OSes. This lets you benefit from the awesome modern -GCC compiler with the strongest GPL barrier possible. The preprocessor -advertises cross compilers as both `__COSMOCC__` and `__COSMOPOLITAN__` -whereas `cosmocc` additionally defines `__FATCOSMOCC__`. +This toolchain bundles GCC 12.3.0, Cosmopolitan Libc, LLVM LIBCXX, LLVM +compiler-rt, and LLVM OpenMP. Additional libraries were provided by Musl +Libc, and the venerable BSDs OSes. This lets you benefit from the +awesome modern GCC compiler with the strongest GPL barrier possible. The +preprocessor advertises cross compilers as both `__COSMOCC__` and +`__COSMOPOLITAN__` whereas `cosmocc` additionally defines +`__FATCOSMOCC__`. ## Getting Started @@ -296,9 +297,9 @@ EINVAL: ... }` in cases where constants like `EINVAL` are linkable symbols. Your code will be rewritten in such cases to use a series of if statements instead, so that Cosmopolitan Libc's system constants will work as expected. Our modifications to GNU GCC are published under the -ISC license at . The +ISC license at . The binaries you see here were first published at - which + which is regularly updated. ## Legal diff --git a/tool/cosmocc/bin/cosmocc b/tool/cosmocc/bin/cosmocc index 719d0b5d6..af8f38f67 100755 --- a/tool/cosmocc/bin/cosmocc +++ b/tool/cosmocc/bin/cosmocc @@ -239,7 +239,7 @@ PLATFORM="-D__COSMOPOLITAN__ -D__COSMOCC__ -D__FATCOSMOCC__" PREDEF="-include libc/integral/normalize.inc" CPPFLAGS="-fno-pie -nostdinc -fno-math-errno -isystem $BIN/../include" CFLAGS="-fportcosmo -fno-dwarf2-cfi-asm -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-semantic-interposition" -LDFLAGS="-static -nostdlib -no-pie -fuse-ld=bfd -Wl,-z,norelro -Wl,--gc-sections" +LDFLAGS="-static -nostdlib -no-pie -fuse-ld=bfd -Wl,-z,noexecstack -Wl,-z,norelro -Wl,--gc-sections" PRECIOUS="-fno-omit-frame-pointer" if [ x"$OPT" != x"-Os" ] && [ x"$MODE" != x"tiny" ]; then diff --git a/tool/cosmocc/bin/cosmocross b/tool/cosmocc/bin/cosmocross index e1f3afd04..ba6d3e54a 100755 --- a/tool/cosmocc/bin/cosmocross +++ b/tool/cosmocc/bin/cosmocross @@ -49,7 +49,7 @@ PLATFORM="-D__COSMOPOLITAN__ -D__COSMOCC__" PREDEF="-include libc/integral/normalize.inc" CFLAGS="-fportcosmo -fno-dwarf2-cfi-asm -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-semantic-interposition" CPPFLAGS="-fno-pie -nostdinc -fno-math-errno -isystem $BIN/../include" -LDFLAGS="-static -no-pie -nostdlib -fuse-ld=bfd" +LDFLAGS="-static -no-pie -nostdlib -fuse-ld=bfd -Wl,-z,noexecstack" APEFLAGS="-Wl,--gc-sections" PRECIOUS="-fno-omit-frame-pointer" diff --git a/tool/cosmocc/package.sh b/tool/cosmocc/package.sh index 63e6bf191..8f43c94fb 100755 --- a/tool/cosmocc/package.sh +++ b/tool/cosmocc/package.sh @@ -19,6 +19,7 @@ OUTDIR=${1:-cosmocc} APELINK=o/$(mode)/tool/build/apelink.com AMD64=${2:-x86_64} ARM64=${3:-aarch64} +GCCVER=12.3.0 make -j32 m= \ $APELINK @@ -89,10 +90,10 @@ fetch() { OLD=$PWD cd "$OUTDIR/" if [ ! -x bin/x86_64-linux-cosmo-gcc ]; then - fetch https://github.com/ahgamut/superconfigure/releases/download/z0.0.30/aarch64-gcc.zip + fetch https://github.com/ahgamut/superconfigure/releases/download/z0.0.32/aarch64-gcc.zip unzip aarch64-gcc.zip rm -f aarch64-gcc.zip - fetch https://github.com/ahgamut/superconfigure/releases/download/z0.0.30/x86_64-gcc.zip + fetch https://github.com/ahgamut/superconfigure/releases/download/z0.0.32/x86_64-gcc.zip unzip x86_64-gcc.zip rm -f x86_64-gcc.zip fi @@ -113,14 +114,14 @@ for arch in aarch64 x86_64; do ln -sf $arch-linux-cosmo-objdump bin/$arch-unknown-cosmo-objdump ln -sf $arch-linux-cosmo-readelf bin/$arch-unknown-cosmo-readelf ln -sf $arch-linux-cosmo-strip bin/$arch-unknown-cosmo-strip - cmp -s libexec/gcc/$arch-linux-cosmo/11.2.0/ld.bfd libexec/gcc/$arch-linux-cosmo/11.2.0/ld - ln -sf ld.bfd libexec/gcc/$arch-linux-cosmo/11.2.0/ld - cmp -s libexec/gcc/$arch-linux-cosmo/11.2.0/ld.bfd bin/$arch-linux-cosmo-ld - ln -sf ../libexec/gcc/$arch-linux-cosmo/11.2.0/ld.bfd bin/$arch-linux-cosmo-ld - cmp -s libexec/gcc/$arch-linux-cosmo/11.2.0/as bin/$arch-linux-cosmo-as - ln -sf ../libexec/gcc/$arch-linux-cosmo/11.2.0/as bin/$arch-linux-cosmo-as - cmp -s libexec/gcc/$arch-linux-cosmo/11.2.0/ld.bfd bin/$arch-linux-cosmo-ld.bfd - ln -sf ../libexec/gcc/$arch-linux-cosmo/11.2.0/ld.bfd bin/$arch-linux-cosmo-ld.bfd + cmp -s libexec/gcc/$arch-linux-cosmo/$GCCVER/ld.bfd libexec/gcc/$arch-linux-cosmo/$GCCVER/ld + ln -sf ld.bfd libexec/gcc/$arch-linux-cosmo/$GCCVER/ld + cmp -s libexec/gcc/$arch-linux-cosmo/$GCCVER/ld.bfd bin/$arch-linux-cosmo-ld + ln -sf ../libexec/gcc/$arch-linux-cosmo/$GCCVER/ld.bfd bin/$arch-linux-cosmo-ld + cmp -s libexec/gcc/$arch-linux-cosmo/$GCCVER/as bin/$arch-linux-cosmo-as + ln -sf ../libexec/gcc/$arch-linux-cosmo/$GCCVER/as bin/$arch-linux-cosmo-as + cmp -s libexec/gcc/$arch-linux-cosmo/$GCCVER/ld.bfd bin/$arch-linux-cosmo-ld.bfd + ln -sf ../libexec/gcc/$arch-linux-cosmo/$GCCVER/ld.bfd bin/$arch-linux-cosmo-ld.bfd done cd "$OLD" diff --git a/tool/emacs/c.lang b/tool/emacs/c.lang index 99f7d7c08..a2dad3636 100644 --- a/tool/emacs/c.lang +++ b/tool/emacs/c.lang @@ -121,7 +121,6 @@ Keywords={ "hasatleast", "nodebuginfo", "noreturn", -"initarray", "mayalias", "dontinstrument", "interruptfn", diff --git a/tool/emacs/cosmo-asm-mode.el b/tool/emacs/cosmo-asm-mode.el index 70fd0edd2..b948e12af 100644 --- a/tool/emacs/cosmo-asm-mode.el +++ b/tool/emacs/cosmo-asm-mode.el @@ -65,6 +65,7 @@ "pltoff" "gotpcrel" "progbits" + "note" "nobits" "init_array" "fini_array" diff --git a/tool/emacs/cosmo-c-builtins.el b/tool/emacs/cosmo-c-builtins.el index 1a9a69330..3dee199be 100644 --- a/tool/emacs/cosmo-c-builtins.el +++ b/tool/emacs/cosmo-c-builtins.el @@ -204,6 +204,7 @@ '("DebugBreak" "__veil" "__conceal" + "__notice" "__expropriate" "__yoink" "__dll_import" diff --git a/tool/emacs/cosmo-c-keywords.el b/tool/emacs/cosmo-c-keywords.el index 7fd7cdd30..3f4f691c5 100644 --- a/tool/emacs/cosmo-c-keywords.el +++ b/tool/emacs/cosmo-c-keywords.el @@ -87,7 +87,6 @@ "nomsan" "dontubsan" "nostackprotector" - "initarray" "mayalias" "dontinstrument" "interruptfn" diff --git a/tool/emacs/key.py b/tool/emacs/key.py index 43a8b7c73..0d1a56ca1 100644 --- a/tool/emacs/key.py +++ b/tool/emacs/key.py @@ -335,7 +335,6 @@ cosmo_kws = frozenset([ "forcealignargpointer", "forceinline", "hasatleast", - "initarray", "interruptfn", "mallocesque", "mayalias", @@ -394,7 +393,6 @@ cosmo_kws = frozenset([ "forcealignargpointer", "forceinline", "hasatleast", - "initarray", "interruptfn", "mallocesque", "mayalias", diff --git a/tool/hello/BUILD.mk b/tool/hello/BUILD.mk index 3d2ac96e9..6047047bf 100644 --- a/tool/hello/BUILD.mk +++ b/tool/hello/BUILD.mk @@ -39,7 +39,7 @@ o/$(MODE)/tool/hello/hello.com.dbg: \ # uses apelink to turn it into an ape executable # support vector is set to all operating systems -o/$(MODE)/tool/hello/hello.com: \ +o/$(MODE)/tool/hello/hello.ape: \ o/$(MODE)/tool/hello/hello.com.dbg \ o/$(MODE)/tool/build/apelink.com \ o/$(MODE)/tool/build/pecheck.com \ @@ -49,7 +49,7 @@ o/$(MODE)/tool/hello/hello.com: \ # uses apelink to generate elf-only executable # support vector = linux/freebsd/openbsd/netbsd/metal -o/$(MODE)/tool/hello/hello-elf.com: \ +o/$(MODE)/tool/hello/hello-elf.ape: \ o/$(MODE)/tool/hello/hello.com.dbg \ o/$(MODE)/tool/build/apelink.com \ o/$(MODE)/ape/ape.elf @@ -59,7 +59,7 @@ o/$(MODE)/tool/hello/hello-elf.com: \ # support vector = macos/linux/freebsd/openbsd/netbsd # - great way to avoid attention from bad virus scanners # - creates tinier executable by reducing alignment requirement -o/$(MODE)/tool/hello/hello-unix.com: \ +o/$(MODE)/tool/hello/hello-unix.ape: \ o/$(MODE)/tool/hello/hello.com.dbg \ o/$(MODE)/tool/build/apelink.com \ o/$(MODE)/ape/ape.elf @@ -71,7 +71,7 @@ o/$(MODE)/tool/hello/hello-unix.com: \ o/$(MODE)/tool/hello/hello-pe.com.dbg: \ o/$(MODE)/tool/hello/hello-pe.o @$(COMPILE) -ALINK.elf $(LINK) $(LINKARGS) $(OUTPUT_OPTION) -q -e WinMain -o/$(MODE)/tool/hello/hello-pe.com: \ +o/$(MODE)/tool/hello/hello-pe.ape: \ o/$(MODE)/tool/hello/hello-pe.com.dbg \ o/$(MODE)/tool/build/elf2pe.com @$(COMPILE) -AELF2PE o/$(MODE)/tool/build/elf2pe.com -o $@ $< @@ -80,7 +80,7 @@ o/$(MODE)/tool/hello/hello-pe.com: \ o/$(MODE)/tool/hello/life-pe.com.dbg: \ o/$(MODE)/tool/hello/life-pe.o @$(COMPILE) -ALINK.elf $(LINK) $(LINKARGS) $(OUTPUT_OPTION) -q -e WinMain -o/$(MODE)/tool/hello/life-pe.com: \ +o/$(MODE)/tool/hello/life-pe.ape: \ o/$(MODE)/tool/hello/life-pe.com.dbg \ o/$(MODE)/tool/build/elf2pe.com @$(COMPILE) -AELF2PE o/$(MODE)/tool/build/elf2pe.com -o $@ $< @@ -89,7 +89,7 @@ o/$(MODE)/tool/hello/life-pe.com: \ o/$(MODE)/tool/hello/wait-pe.com.dbg: \ o/$(MODE)/tool/hello/wait-pe.o @$(COMPILE) -ALINK.elf $(LINK) $(LINKARGS) $(OUTPUT_OPTION) -q -e WinMain -o/$(MODE)/tool/hello/wait-pe.com: \ +o/$(MODE)/tool/hello/wait-pe.ape: \ o/$(MODE)/tool/hello/wait-pe.com.dbg \ o/$(MODE)/tool/build/elf2pe.com @$(COMPILE) -AELF2PE o/$(MODE)/tool/build/elf2pe.com -R 64kb -S 4kb -o $@ $< diff --git a/tool/net/largon2.c b/tool/net/largon2.c index e4a6a5c6f..6b5cd51c6 100644 --- a/tool/net/largon2.c +++ b/tool/net/largon2.c @@ -32,10 +32,9 @@ #include "third_party/lua/lua.h" #include "third_party/lua/lualib.h" -asm(".ident\t\"\\n\\n\ -largon2 (MIT License)\\n\ -Copyright 2016 Thibault Charbonnier\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(largon2_notice, "\ +largon2 (MIT License)\n\ +Copyright 2016 Thibault Charbonnier"); // clang-format off /*** diff --git a/tool/net/lsqlite3.c b/tool/net/lsqlite3.c index 945560538..f46eee72e 100644 --- a/tool/net/lsqlite3.c +++ b/tool/net/lsqlite3.c @@ -38,10 +38,9 @@ #include "third_party/sqlite3/sqlite3.h" // clang-format off -asm(".ident\t\"\\n\\n\ -lsqlite3 (MIT License)\\n\ -Copyright 2002-2016 Tiago Dionizio, Doug Currie\""); -asm(".include \"libc/disclaimer.inc\""); +__notice(lsqlite3_notice, "\ +lsqlite3 (MIT License)\n\ +Copyright 2002-2016 Tiago Dionizio, Doug Currie"); // LOCAL CHANGES // diff --git a/usr/share/zoneinfo/Anchorage b/usr/share/zoneinfo/Anchorage deleted file mode 100644 index cdf0572be..000000000 Binary files a/usr/share/zoneinfo/Anchorage and /dev/null differ diff --git a/usr/share/zoneinfo/Anchorage b/usr/share/zoneinfo/Anchorage new file mode 120000 index 000000000..cafb24b4f --- /dev/null +++ b/usr/share/zoneinfo/Anchorage @@ -0,0 +1 @@ +US/Alaska \ No newline at end of file diff --git a/usr/share/zoneinfo/Boulder b/usr/share/zoneinfo/Boulder deleted file mode 100644 index 7fc669171..000000000 Binary files a/usr/share/zoneinfo/Boulder and /dev/null differ diff --git a/usr/share/zoneinfo/Boulder b/usr/share/zoneinfo/Boulder new file mode 120000 index 000000000..8b727a113 --- /dev/null +++ b/usr/share/zoneinfo/Boulder @@ -0,0 +1 @@ +US/Mountain \ No newline at end of file diff --git a/usr/share/zoneinfo/Chicago b/usr/share/zoneinfo/Chicago deleted file mode 100644 index 3dd8f0fa8..000000000 Binary files a/usr/share/zoneinfo/Chicago and /dev/null differ diff --git a/usr/share/zoneinfo/Chicago b/usr/share/zoneinfo/Chicago new file mode 120000 index 000000000..0c6fef91f --- /dev/null +++ b/usr/share/zoneinfo/Chicago @@ -0,0 +1 @@ +US/Central \ No newline at end of file diff --git a/usr/share/zoneinfo/GMT b/usr/share/zoneinfo/GMT deleted file mode 100644 index c63474664..000000000 Binary files a/usr/share/zoneinfo/GMT and /dev/null differ diff --git a/usr/share/zoneinfo/GMT b/usr/share/zoneinfo/GMT new file mode 120000 index 000000000..1ed082089 --- /dev/null +++ b/usr/share/zoneinfo/GMT @@ -0,0 +1 @@ +UTC \ No newline at end of file diff --git a/usr/share/zoneinfo/GST b/usr/share/zoneinfo/GST deleted file mode 100644 index c0ce4402f..000000000 Binary files a/usr/share/zoneinfo/GST and /dev/null differ diff --git a/usr/share/zoneinfo/GST b/usr/share/zoneinfo/GST new file mode 120000 index 000000000..e2ebd1750 --- /dev/null +++ b/usr/share/zoneinfo/GST @@ -0,0 +1 @@ +US/Pacific \ No newline at end of file diff --git a/usr/share/zoneinfo/Honolulu b/usr/share/zoneinfo/Honolulu deleted file mode 100644 index d0c2595f9..000000000 Binary files a/usr/share/zoneinfo/Honolulu and /dev/null differ diff --git a/usr/share/zoneinfo/Honolulu b/usr/share/zoneinfo/Honolulu new file mode 120000 index 000000000..16c5c6023 --- /dev/null +++ b/usr/share/zoneinfo/Honolulu @@ -0,0 +1 @@ +US/Hawaii \ No newline at end of file diff --git a/usr/share/zoneinfo/Israel b/usr/share/zoneinfo/Israel index 4992a7929..4c49bbf52 100644 Binary files a/usr/share/zoneinfo/Israel and b/usr/share/zoneinfo/Israel differ diff --git a/usr/share/zoneinfo/New_York b/usr/share/zoneinfo/New_York deleted file mode 100644 index 7553fee37..000000000 Binary files a/usr/share/zoneinfo/New_York and /dev/null differ diff --git a/usr/share/zoneinfo/New_York b/usr/share/zoneinfo/New_York new file mode 120000 index 000000000..b8d5363be --- /dev/null +++ b/usr/share/zoneinfo/New_York @@ -0,0 +1 @@ +US/Eastern \ No newline at end of file diff --git a/usr/share/zoneinfo/Singapore b/usr/share/zoneinfo/Singapore index 785836666..dbbdea3c8 100644 Binary files a/usr/share/zoneinfo/Singapore and b/usr/share/zoneinfo/Singapore differ diff --git a/usr/share/zoneinfo/US/Alaska b/usr/share/zoneinfo/US/Alaska new file mode 100644 index 000000000..cdf0572be Binary files /dev/null and b/usr/share/zoneinfo/US/Alaska differ diff --git a/usr/share/zoneinfo/US/Aleutian b/usr/share/zoneinfo/US/Aleutian new file mode 100644 index 000000000..b1497bda6 Binary files /dev/null and b/usr/share/zoneinfo/US/Aleutian differ diff --git a/usr/share/zoneinfo/US/Arizona b/usr/share/zoneinfo/US/Arizona new file mode 100644 index 000000000..c2bd2f949 Binary files /dev/null and b/usr/share/zoneinfo/US/Arizona differ diff --git a/usr/share/zoneinfo/US/Central b/usr/share/zoneinfo/US/Central new file mode 100644 index 000000000..b01688065 Binary files /dev/null and b/usr/share/zoneinfo/US/Central differ diff --git a/usr/share/zoneinfo/US/East-Indiana b/usr/share/zoneinfo/US/East-Indiana new file mode 100644 index 000000000..6b08d15bd Binary files /dev/null and b/usr/share/zoneinfo/US/East-Indiana differ diff --git a/usr/share/zoneinfo/US/Eastern b/usr/share/zoneinfo/US/Eastern new file mode 100644 index 000000000..2b6c2eea1 Binary files /dev/null and b/usr/share/zoneinfo/US/Eastern differ diff --git a/usr/share/zoneinfo/US/Hawaii b/usr/share/zoneinfo/US/Hawaii new file mode 100644 index 000000000..40e3d492e Binary files /dev/null and b/usr/share/zoneinfo/US/Hawaii differ diff --git a/usr/share/zoneinfo/US/Indiana-Starke b/usr/share/zoneinfo/US/Indiana-Starke new file mode 100644 index 000000000..b187d5f8c Binary files /dev/null and b/usr/share/zoneinfo/US/Indiana-Starke differ diff --git a/usr/share/zoneinfo/US/Michigan b/usr/share/zoneinfo/US/Michigan new file mode 100644 index 000000000..6eb3ac46e Binary files /dev/null and b/usr/share/zoneinfo/US/Michigan differ diff --git a/usr/share/zoneinfo/US/Mountain b/usr/share/zoneinfo/US/Mountain new file mode 100644 index 000000000..09e54e5c7 Binary files /dev/null and b/usr/share/zoneinfo/US/Mountain differ diff --git a/usr/share/zoneinfo/US/Pacific b/usr/share/zoneinfo/US/Pacific new file mode 100644 index 000000000..aaf07787a Binary files /dev/null and b/usr/share/zoneinfo/US/Pacific differ diff --git a/usr/share/zoneinfo/US/Samoa b/usr/share/zoneinfo/US/Samoa new file mode 100644 index 000000000..001289cee Binary files /dev/null and b/usr/share/zoneinfo/US/Samoa differ diff --git a/usr/share/zoneinfo/UTC b/usr/share/zoneinfo/UTC index c3b97f1a1..157573b1d 100644 Binary files a/usr/share/zoneinfo/UTC and b/usr/share/zoneinfo/UTC differ