Make improvements

- Let OpenMP be usable via cosmocc
- Let libunwind be usable via cosmocc
- Make X86_HAVE(AVXVNNI) work correctly
- Avoid using MAP_GROWSDOWN on qemu-aarch64
- Introduce in6addr_any and in6addr_loopback
- Have thread stacks use MAP_GROWSDOWN by default
- Ask OpenMP to not use filesystem to manage threads
- Make NI_MAXHOST and NI_MAXSERV available w/o _GNU_SOURCE
This commit is contained in:
Justine Tunney 2024-01-29 15:45:10 -08:00
parent 5f8e9f14c1
commit 369aebfc48
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
36 changed files with 416 additions and 80 deletions

View file

@ -524,6 +524,7 @@ COSMOCC_PKGS = \
THIRD_PARTY_AARCH64 \ THIRD_PARTY_AARCH64 \
THIRD_PARTY_LIBCXX \ THIRD_PARTY_LIBCXX \
THIRD_PARTY_LIBCXXABI \ THIRD_PARTY_LIBCXXABI \
THIRD_PARTY_LIBUNWIND \
THIRD_PARTY_OPENMP \ THIRD_PARTY_OPENMP \
THIRD_PARTY_INTEL THIRD_PARTY_INTEL

View file

@ -251,6 +251,7 @@ libc/isystem/uio.h \
libc/isystem/unistd.h \ libc/isystem/unistd.h \
libc/isystem/unordered_map \ libc/isystem/unordered_map \
libc/isystem/unordered_set \ libc/isystem/unordered_set \
libc/isystem/unwind.h \
libc/isystem/utility \ libc/isystem/utility \
libc/isystem/utime.h \ libc/isystem/utime.h \
libc/isystem/utmp.h \ libc/isystem/utmp.h \

View file

@ -24,11 +24,15 @@
/** /**
* Returns true if process is running under qemu-x86_64 or qemu-aarch64. * Returns true if process is running under qemu-x86_64 or qemu-aarch64.
*/ */
int IsQemu(void) { int IsQemuUser(void) {
static char rplus1;
if (!rplus1) {
// qemu doesn't validate the advice argument // qemu doesn't validate the advice argument
// we could also check if __getcwd(0, 0) raises efault // we could also check if __getcwd(0, 0) raises efault
int e = errno; int e = errno;
int r = !sys_madvise(__executable_start, 16384, 127); int r = !sys_madvise(__executable_start, 16384, 127);
errno = e; errno = e;
return r; rplus1 = r + 1;
}
return rplus1 - 1;
} }

View file

@ -121,7 +121,7 @@ COSMOPOLITAN_C_START_
extern const int __hostos; extern const int __hostos;
int IsQemu(void); int IsQemuUser(void);
COSMOPOLITAN_C_END_ COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -649,8 +649,9 @@ void abort(void) wontreturn;
#pragma GCC diagnostic ignored "-Wformat-extra-args" /* todo: patch gcc */ #pragma GCC diagnostic ignored "-Wformat-extra-args" /* todo: patch gcc */
#pragma GCC diagnostic ignored "-Wunused-function" /* contradicts dce */ #pragma GCC diagnostic ignored "-Wunused-function" /* contradicts dce */
#pragma GCC diagnostic ignored "-Wunused-const-variable" /* sooo ridiculous */ #pragma GCC diagnostic ignored "-Wunused-const-variable" /* sooo ridiculous */
#pragma GCC diagnostic ignored "-Wbuiltin-declaration-mismatch"
#ifndef __cplusplus #ifndef __cplusplus
#pragma GCC diagnostic ignored "-Wold-style-definition" /* orwellian bulls */ #pragma GCC diagnostic ignored "-Wold-style-definition" /* orwellian bullsh */
#endif #endif
#ifndef __STRICT_ANSI__ #ifndef __STRICT_ANSI__

View file

@ -103,7 +103,7 @@ void __get_main_stack(void **out_addr, size_t *out_size, int *out_guardsize) {
if (IsWindows()) { if (IsWindows()) {
*out_addr = (void *)GetStaticStackAddr(0); *out_addr = (void *)GetStaticStackAddr(0);
*out_size = GetStaticStackSize(); *out_size = GetStaticStackSize();
*out_guardsize = GetGuardSize(); *out_guardsize = getauxval(AT_PAGESZ);
return; return;
} }
int pagesz = getauxval(AT_PAGESZ); int pagesz = getauxval(AT_PAGESZ);

4
libc/isystem/unwind.h Normal file
View file

@ -0,0 +1,4 @@
#ifndef _UNWIND_H
#define _UNWIND_H
#include "third_party/libunwind/include/unwind.h"
#endif /* _UNWIND_H */

View file

@ -31,28 +31,38 @@
// are quite toilsome. // are quite toilsome.
// //
// @see www.felixcloutier.com/x86/cpuid // @see www.felixcloutier.com/x86/cpuid
kCpuids:.long 0,0,0,0 # EAX=0 (Basic Processor Info) kCpuids:.long 0,0,0,0 // EAX=0 (Basic Processor Info)
.long 0,0,0,0 # EAX=1 (Processor Info) .long 0,0,0,0 // EAX=1 (Processor Info)
.long 0,0,0,0 # EAX=2 .long 0,0,0,0 // EAX=2
.long 0,0,0,0 # EAX=7 (Extended Features) .long 0,0,0,0 // EAX=7 (Extended Features)
.long 0,0,0,0 # EAX=0x80000001 (NexGen32e) .long 0,0,0,0 // EAX=0x80000001 (NexGen32e)
.long 0,0,0,0 # EAX=0x80000007 (APM) .long 0,0,0,0 // EAX=0x80000007 (APM)
.long 0,0,0,0 # EAX=16h (CPU Frequency) .long 0,0,0,0 // EAX=16h (CPU Frequency)
.long 0,0,0,0 // EAX=7 ECX=1 (Extended Feats)
.endobj kCpuids,globl .endobj kCpuids,globl
.previous .previous
.init.start 201,_init_kCpuids .init.start 201,_init_kCpuids
push %rbx push %rbx
push $0 push $0
push $0x16 push $1
push $0xffffffff80000007
push $0xffffffff80000001
push $7 push $7
push $0
push $0x16
push $0
push $0xffffffff80000007
push $0
push $0xffffffff80000001
push $0
push $7
push $0
push $2 push $2
push $0
push $1 push $1
mov %rdi,%r8 mov %rdi,%r8
xor %eax,%eax xor %eax,%eax
1: xor %ecx,%ecx xor %ecx,%ecx
1: nop
#ifdef FEATURELESS #ifdef FEATURELESS
// It's been reported that GDB reverse debugging doesn't // It's been reported that GDB reverse debugging doesn't
// understand VEX encoding. The workaround is to put: // understand VEX encoding. The workaround is to put:
@ -62,6 +72,7 @@ kCpuids:.long 0,0,0,0 # EAX=0 (Basic Processor Info)
// Inside your ~/.cosmo.mk file. // Inside your ~/.cosmo.mk file.
xor %eax,%eax xor %eax,%eax
xor %ebx,%ebx xor %ebx,%ebx
xor %ecx,%ecx
xor %edx,%edx xor %edx,%edx
#else #else
cpuid cpuid
@ -74,10 +85,11 @@ kCpuids:.long 0,0,0,0 # EAX=0 (Basic Processor Info)
xchg %eax,%edx xchg %eax,%edx
stosl stosl
2: pop %rax 2: pop %rax
test %eax,%eax # EAX = stacklist->pop() test %eax,%eax // EAX = stacklist->pop()
jz 3f # EAX 0 (EOL sentinel) jz 3f // EAX 0 (EOL sentinel)
cmp KCPUIDS(0H,EAX)(%r8),%al # EAX CPUID.0 max leaf pop %rcx // HERE WE GO AGAIN CPUID
jbe 1b # CPUID too new to probe cmp KCPUIDS(0H,EAX)(%r8),%al // EAX CPUID.0 max leaf
jbe 1b // CPUID too new to probe
add $4*4,%rdi add $4*4,%rdi
jmp 2b jmp 2b
3: nop 3: nop

View file

@ -8,7 +8,8 @@
#define KCPUIDS_80000001H 4 #define KCPUIDS_80000001H 4
#define KCPUIDS_80000007H 5 #define KCPUIDS_80000007H 5
#define KCPUIDS_16H 6 #define KCPUIDS_16H 6
#define KCPUIDS_LEN 7 #define KCPUIDS_7H_1H 7
#define KCPUIDS_LEN 8
#define KCPUIDS_6H -1 /* TBD: Thermal and Power Management */ #define KCPUIDS_6H -1 /* TBD: Thermal and Power Management */
#define KCPUIDS_DH -1 /* TBD: Extended state features */ #define KCPUIDS_DH -1 /* TBD: Extended state features */
#define KCPUIDS_80000008H -1 /* TBD: AMD Miscellaneous */ #define KCPUIDS_80000008H -1 /* TBD: AMD Miscellaneous */

View file

@ -37,6 +37,18 @@
#define _X86_CC_AVXVNNI 0 #define _X86_CC_AVXVNNI 0
#endif #endif
#ifdef __AVXVNNIINT8__
#define _X86_CC_AVXVNNIINT8 1
#else
#define _X86_CC_AVXVNNIINT8 0
#endif
#ifdef __AVXVNNIINT16__
#define _X86_CC_AVXVNNIINT16 1
#else
#define _X86_CC_AVXVNNIINT16 0
#endif
#ifdef __AVX512F__ #ifdef __AVX512F__
#define _X86_CC_AVX512F 1 #define _X86_CC_AVX512F 1
#else #else

View file

@ -28,7 +28,9 @@
#define X86_ARCH_CAPABILITIES 7H, EDX, 29, 0 #define X86_ARCH_CAPABILITIES 7H, EDX, 29, 0
#define X86_AVX 1H, ECX, 28, _X86_CC_AVX /* sandybridge c. 2012 */ #define X86_AVX 1H, ECX, 28, _X86_CC_AVX /* sandybridge c. 2012 */
#define X86_AVX2 7H, EBX, 5, _X86_CC_AVX2 /* haswell c. 2013 */ #define X86_AVX2 7H, EBX, 5, _X86_CC_AVX2 /* haswell c. 2013 */
#define X86_AVXVNNI 7H, EAX, 4, _X86_CC_AVXVNNI #define X86_AVXVNNI 7H_1H, EAX, 4, _X86_CC_AVXVNNI
#define X86_AVXVNNIINT8 7H_1H, EDX, 4, _X86_CC_AVXVNNIINT8
#define X86_AVXVNNIINT16 7H_1H, EDX, 10, _X86_CC_AVXVNNIINT16
#define X86_AVX512BW 7H, EBX, 30, 0 #define X86_AVX512BW 7H, EBX, 30, 0
#define X86_AVX512CD 7H, EBX, 28, 0 #define X86_AVX512CD 7H, EBX, 28, 0
#define X86_AVX512DQ 7H, EBX, 17, 0 #define X86_AVX512DQ 7H, EBX, 17, 0

View file

@ -23,6 +23,7 @@
#include "libc/runtime/memtrack.internal.h" #include "libc/runtime/memtrack.internal.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h" #include "libc/runtime/stack.h"
#include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/map.h" #include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/prot.h"
@ -42,10 +43,13 @@
void *NewCosmoStack(void) { void *NewCosmoStack(void) {
char *p; char *p;
if ((p = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, if ((p = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)) != MAP_FAILED) { MAP_ANONYMOUS |
(IsAarch64() && IsLinux() && IsQemuUser() ? MAP_PRIVATE
: MAP_STACK),
-1, 0)) != MAP_FAILED) {
if (IsAsan()) { if (IsAsan()) {
__asan_poison(p + GetStackSize() - 16, 16, kAsanStackOverflow); __asan_poison(p + GetStackSize() - 16, 16, kAsanStackOverflow);
__asan_poison(p, GetGuardSize(), kAsanStackOverflow); __asan_poison(p, getauxval(AT_PAGESZ), kAsanStackOverflow);
} }
return p; return p;
} else { } else {

22
libc/sock/in6addr_any.c Normal file
View file

@ -0,0 +1,22 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/sock/struct/sockaddr6.h"
#include "libc/sysv/consts/inaddr.h"
const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;

View file

@ -0,0 +1,22 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/sock/struct/sockaddr6.h"
#include "libc/sysv/consts/inaddr.h"
const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;

View file

@ -17,4 +17,7 @@ struct sockaddr_in6 { /* Linux+NT ABI */
uint32_t sin6_scope_id; /* rfc2553 */ uint32_t sin6_scope_id; /* rfc2553 */
}; };
extern const struct in6_addr in6addr_any;
extern const struct in6_addr in6addr_loopback;
#endif /* COSMOPOLITAN_LIBC_CALLS_STRUCT_SOCKADDR6_H_ */ #endif /* COSMOPOLITAN_LIBC_CALLS_STRUCT_SOCKADDR6_H_ */

View file

@ -38,7 +38,7 @@
errno_t pthread_attr_init(pthread_attr_t *attr) { errno_t pthread_attr_init(pthread_attr_t *attr) {
*attr = (pthread_attr_t){ *attr = (pthread_attr_t){
.__stacksize = GetStackSize(), .__stacksize = GetStackSize(),
.__guardsize = GetGuardSize(), .__guardsize = getauxval(AT_PAGESZ),
}; };
return 0; return 0;
} }

View file

@ -216,7 +216,9 @@ static errno_t pthread_create_impl(pthread_t *thread,
_pthread_free(pt, false); _pthread_free(pt, false);
return EINVAL; return EINVAL;
} }
if (pt->pt_attr.__guardsize == pagesize) { if (pt->pt_attr.__guardsize == pagesize &&
!(IsAarch64() && IsLinux() && IsQemuUser())) {
// MAP_GROWSDOWN doesn't work very well on qemu-aarch64
pt->pt_attr.__stackaddr = pt->pt_attr.__stackaddr =
mmap(0, pt->pt_attr.__stacksize, PROT_READ | PROT_WRITE, mmap(0, pt->pt_attr.__stacksize, PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0); MAP_STACK | MAP_ANONYMOUS, -1, 0);

View file

@ -33,7 +33,7 @@ void SetUpOnce(void) {
} }
TEST(__getcwd, zero) { TEST(__getcwd, zero) {
if (IsQemu()) return; if (IsQemuUser()) return;
ASSERT_SYS(ERANGE, -1, __getcwd(0, 0)); ASSERT_SYS(ERANGE, -1, __getcwd(0, 0));
} }

View file

@ -122,7 +122,7 @@ TEST(GetProramExecutableName, weirdArgv0NullEnv) {
TEST(GetProgramExecutableName, movedSelf) { TEST(GetProgramExecutableName, movedSelf) {
if (skiptests) return; if (skiptests) return;
if (IsAarch64() && IsQemu()) { if (IsAarch64() && IsQemuUser()) {
// clang-format off // clang-format off
// TODO(mrdomino): fix: make -j8 m=aarch64 o/aarch64/test/libc/calls/getprogramexecutablename_test.com.ok // TODO(mrdomino): fix: make -j8 m=aarch64 o/aarch64/test/libc/calls/getprogramexecutablename_test.com.ok
// possibly related to the intersection of binfmt_misc and qemu-aarch64 // possibly related to the intersection of binfmt_misc and qemu-aarch64

View file

@ -70,7 +70,7 @@ TEST(madvise, subPages) {
TEST(madvise, misalign) { TEST(madvise, misalign) {
char *p; char *p;
if (!IsLinux()) return; // most platforms don't care if (!IsLinux()) return; // most platforms don't care
if (IsQemu()) return; // qemu claims to be linux but doesn't care if (IsQemuUser()) return; // qemu claims to be linux but doesn't care
ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE, PROT_READ | PROT_WRITE, ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0))); MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)));
ASSERT_SYS(EINVAL, -1, madvise(p + 1, FRAMESIZE - 1, MADV_WILLNEED)); ASSERT_SYS(EINVAL, -1, madvise(p + 1, FRAMESIZE - 1, MADV_WILLNEED));
@ -79,7 +79,7 @@ TEST(madvise, misalign) {
TEST(madvise, badAdvice) { TEST(madvise, badAdvice) {
char *p; char *p;
if (IsAarch64() && IsQemu()) return; // qemu doesn't validate advice if (IsAarch64() && IsQemuUser()) return; // qemu doesn't validate advice
ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE, PROT_READ | PROT_WRITE, ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0))); MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)));
ASSERT_SYS(EINVAL, -1, madvise(p, FRAMESIZE, 127)); ASSERT_SYS(EINVAL, -1, madvise(p, FRAMESIZE, 127));
@ -88,7 +88,7 @@ TEST(madvise, badAdvice) {
TEST(madvise, missingMemory) { TEST(madvise, missingMemory) {
if (!IsLinux()) return; // most platforms don't care if (!IsLinux()) return; // most platforms don't care
if (IsQemu()) return; // qemu claims to be linux but doesn't care if (IsQemuUser()) return; // qemu claims to be linux but doesn't care
ASSERT_SYS(ENOMEM, -1, ASSERT_SYS(ENOMEM, -1,
madvise((char *)0x83483838000, FRAMESIZE, MADV_WILLNEED)); madvise((char *)0x83483838000, FRAMESIZE, MADV_WILLNEED));
} }

View file

@ -21,6 +21,7 @@
#include "libc/calls/struct/siginfo.h" #include "libc/calls/struct/siginfo.h"
#include "libc/calls/struct/ucontext.internal.h" #include "libc/calls/struct/ucontext.internal.h"
#include "libc/calls/ucontext.h" #include "libc/calls/ucontext.h"
#include "libc/dce.h"
#include "libc/intrin/kprintf.h" #include "libc/intrin/kprintf.h"
#include "libc/limits.h" #include "libc/limits.h"
#include "libc/mem/gc.h" #include "libc/mem/gc.h"

View file

@ -23,7 +23,6 @@
#include "libc/limits.h" #include "libc/limits.h"
#include "libc/macros.internal.h" #include "libc/macros.internal.h"
#include "libc/mem/gc.h" #include "libc/mem/gc.h"
#include "libc/mem/gc.h"
#include "libc/mem/mem.h" #include "libc/mem/mem.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/sock/sock.h" #include "libc/sock/sock.h"
@ -126,7 +125,7 @@ TEST(writev, empty_stillPerformsIoOperation) {
ASSERT_NE(-1, (fd = open("file", O_RDONLY))); ASSERT_NE(-1, (fd = open("file", O_RDONLY)));
errno = 0; errno = 0;
EXPECT_SYS(EBADF, -1, writev(fd, iov, ARRAYLEN(iov))); EXPECT_SYS(EBADF, -1, writev(fd, iov, ARRAYLEN(iov)));
if (!(IsAarch64() && IsQemu())) { if (!(IsAarch64() && IsQemuUser())) {
EXPECT_EQ(-1, writev(fd, NULL, 0)); EXPECT_EQ(-1, writev(fd, NULL, 0));
} }
EXPECT_NE(-1, close(fd)); EXPECT_NE(-1, close(fd));

View file

@ -19,6 +19,7 @@
#include "libc/calls/calls.h" #include "libc/calls/calls.h"
#include "libc/calls/ucontext.h" #include "libc/calls/ucontext.h"
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/intrin/kprintf.h"
#include "libc/limits.h" #include "libc/limits.h"
#include "libc/mem/gc.h" #include "libc/mem/gc.h"
#include "libc/nt/createfile.h" #include "libc/nt/createfile.h"

View file

@ -18,7 +18,6 @@ TEST_LIBCXX_DIRECTDEPS = \
LIBC_RUNTIME \ LIBC_RUNTIME \
LIBC_STDIO \ LIBC_STDIO \
THIRD_PARTY_LIBCXX \ THIRD_PARTY_LIBCXX \
THIRD_PARTY_DOUBLECONVERSION \
THIRD_PARTY_OPENMP THIRD_PARTY_OPENMP
TEST_LIBCXX_DEPS := \ TEST_LIBCXX_DEPS := \

View file

@ -68,7 +68,7 @@ void matmul(long m, long n, long k, const T *A, long sa, const T *B, long sb,
} }
template <long BM, long BN, typename T> template <long BM, long BN, typename T>
void sgemmk(long k, const T *A, long sa, const T *B, long sb, T *C, long sc) { void gemmk(long k, const T *A, long sa, const T *B, long sb, T *C, long sc) {
T S[BM][BN] = {0}; T S[BM][BN] = {0};
for (long l = 0; l < k; ++l) { for (long l = 0; l < k; ++l) {
for (long i = 0; i < BM; ++i) { for (long i = 0; i < BM; ++i) {
@ -86,12 +86,12 @@ void sgemmk(long k, const T *A, long sa, const T *B, long sb, T *C, long sc) {
// (m×k)ᵀ * k×n → m×n // (m×k)ᵀ * k×n → m×n
template <long BM, long BN, typename T> template <long BM, long BN, typename T>
void sgemm(long m, long n, long k, const T *A, long sa, const T *B, long sb, void gemm(long m, long n, long k, const T *A, long sa, const T *B, long sb,
T *C, long sc) { T *C, long sc) {
#pragma omp parallel for collapse(2) #pragma omp parallel for collapse(2)
for (long i = 0; i < m; i += BM) { for (long i = 0; i < m; i += BM) {
for (long j = 0; j < n; j += BN) { for (long j = 0; j < n; j += BN) {
sgemmk<BM, BN>(k, A + i, sa, B + j, sb, C + sc * i + j, sc); gemmk<BM, BN>(k, A + i, sa, B + j, sb, C + sc * i + j, sc);
} }
} }
} }
@ -221,7 +221,7 @@ void check_transposed_blocking_gemm_is_ok(void) {
bench(matmul(m, n, k, A, k, B, n, C, n)); bench(matmul(m, n, k, A, k, B, n, C, n));
float *At = new float[k * m]; float *At = new float[k * m];
bench(transpose(m, k, A, k, At, m)); bench(transpose(m, k, A, k, At, m));
bench((sgemm<8, 4>(m, n, k, At, m, B, n, D, n))); bench((gemm<8, 4>(m, n, k, At, m, B, n, D, n)));
check(FLAWLESS, m, n, C, n, D, n); check(FLAWLESS, m, n, C, n, D, n);
delete[] At; delete[] At;
delete[] D; delete[] D;

View file

@ -37,7 +37,8 @@ THIRD_PARTY_LIBUNWIND_A_SRCS_CC = \
THIRD_PARTY_LIBUNWIND_A_SRCS_C = \ THIRD_PARTY_LIBUNWIND_A_SRCS_C = \
third_party/libunwind/Unwind-sjlj.c \ third_party/libunwind/Unwind-sjlj.c \
third_party/libunwind/UnwindLevel1-gcc-ext.c \ third_party/libunwind/UnwindLevel1-gcc-ext.c \
third_party/libunwind/UnwindLevel1.c third_party/libunwind/UnwindLevel1.c \
third_party/libunwind/gcc_personality_v0.c
THIRD_PARTY_LIBUNWIND_A_SRCS = \ THIRD_PARTY_LIBUNWIND_A_SRCS = \
$(THIRD_PARTY_LIBUNWIND_A_SRCS_C) \ $(THIRD_PARTY_LIBUNWIND_A_SRCS_C) \
@ -70,6 +71,7 @@ $(THIRD_PARTY_LIBUNWIND_A).pkg: \
$(THIRD_PARTY_LIBUNWIND_A_OBJS): private \ $(THIRD_PARTY_LIBUNWIND_A_OBJS): private \
CFLAGS += \ CFLAGS += \
-fexceptions \
-fno-sanitize=all \ -fno-sanitize=all \
-ffunction-sections \ -ffunction-sections \
-fdata-sections \ -fdata-sections \
@ -77,6 +79,7 @@ $(THIRD_PARTY_LIBUNWIND_A_OBJS): private \
$(THIRD_PARTY_LIBUNWIND_A_OBJS): private \ $(THIRD_PARTY_LIBUNWIND_A_OBJS): private \
CXXFLAGS += \ CXXFLAGS += \
-fexceptions \
-fno-sanitize=all \ -fno-sanitize=all \
-ffunction-sections \ -ffunction-sections \
-fdata-sections \ -fdata-sections \

View file

@ -0,0 +1,236 @@
//===-- gcc_personality_v0.c - Implement __gcc_personality_v0 -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "third_party/libunwind/include/unwind.h"
#include "third_party/compiler_rt/int_lib.h"
// Pointer encodings documented at:
// http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html
#define DW_EH_PE_omit 0xff // no data follows
#define DW_EH_PE_absptr 0x00
#define DW_EH_PE_uleb128 0x01
#define DW_EH_PE_udata2 0x02
#define DW_EH_PE_udata4 0x03
#define DW_EH_PE_udata8 0x04
#define DW_EH_PE_sleb128 0x09
#define DW_EH_PE_sdata2 0x0A
#define DW_EH_PE_sdata4 0x0B
#define DW_EH_PE_sdata8 0x0C
#define DW_EH_PE_pcrel 0x10
#define DW_EH_PE_textrel 0x20
#define DW_EH_PE_datarel 0x30
#define DW_EH_PE_funcrel 0x40
#define DW_EH_PE_aligned 0x50
#define DW_EH_PE_indirect 0x80 // gcc extension
// read a uleb128 encoded value and advance pointer
static size_t readULEB128(const uint8_t **data) {
size_t result = 0;
size_t shift = 0;
unsigned char byte;
const uint8_t *p = *data;
do {
byte = *p++;
result |= (byte & 0x7f) << shift;
shift += 7;
} while (byte & 0x80);
*data = p;
return result;
}
// read a pointer encoded value and advance pointer
static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) {
const uint8_t *p = *data;
uintptr_t result = 0;
if (encoding == DW_EH_PE_omit)
return 0;
// first get value
switch (encoding & 0x0F) {
case DW_EH_PE_absptr:
result = *((const uintptr_t *)p);
p += sizeof(uintptr_t);
break;
case DW_EH_PE_uleb128:
result = readULEB128(&p);
break;
case DW_EH_PE_udata2:
result = *((const uint16_t *)p);
p += sizeof(uint16_t);
break;
case DW_EH_PE_udata4:
result = *((const uint32_t *)p);
p += sizeof(uint32_t);
break;
case DW_EH_PE_udata8:
result = *((const uint64_t *)p);
p += sizeof(uint64_t);
break;
case DW_EH_PE_sdata2:
result = *((const int16_t *)p);
p += sizeof(int16_t);
break;
case DW_EH_PE_sdata4:
result = *((const int32_t *)p);
p += sizeof(int32_t);
break;
case DW_EH_PE_sdata8:
result = *((const int64_t *)p);
p += sizeof(int64_t);
break;
case DW_EH_PE_sleb128:
default:
// not supported
compilerrt_abort();
break;
}
// then add relative offset
switch (encoding & 0x70) {
case DW_EH_PE_absptr:
// do nothing
break;
case DW_EH_PE_pcrel:
result += (uintptr_t)(*data);
break;
case DW_EH_PE_textrel:
case DW_EH_PE_datarel:
case DW_EH_PE_funcrel:
case DW_EH_PE_aligned:
default:
// not supported
compilerrt_abort();
break;
}
// then apply indirection
if (encoding & DW_EH_PE_indirect) {
result = *((const uintptr_t *)result);
}
*data = p;
return result;
}
#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
!defined(__ARM_DWARF_EH__) && !defined(__SEH__)
#define USING_ARM_EHABI 1
_Unwind_Reason_Code __gnu_unwind_frame(struct _Unwind_Exception *,
struct _Unwind_Context *);
#endif
static inline _Unwind_Reason_Code
continueUnwind(struct _Unwind_Exception *exceptionObject,
struct _Unwind_Context *context) {
#if USING_ARM_EHABI
// On ARM EHABI the personality routine is responsible for actually
// unwinding a single stack frame before returning (ARM EHABI Sec. 6.1).
if (__gnu_unwind_frame(exceptionObject, context) != _URC_OK)
return _URC_FAILURE;
#endif
return _URC_CONTINUE_UNWIND;
}
// The C compiler makes references to __gcc_personality_v0 in
// the dwarf unwind information for translation units that use
// __attribute__((cleanup(xx))) on local variables.
// This personality routine is called by the system unwinder
// on each frame as the stack is unwound during a C++ exception
// throw through a C function compiled with -fexceptions.
#if __USING_SJLJ_EXCEPTIONS__
// the setjump-longjump based exceptions personality routine has a
// different name
COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_sj0(
int version, _Unwind_Action actions, uint64_t exceptionClass,
struct _Unwind_Exception *exceptionObject, struct _Unwind_Context *context)
#elif USING_ARM_EHABI
// The ARM EHABI personality routine has a different signature.
COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0(
_Unwind_State state, struct _Unwind_Exception *exceptionObject,
struct _Unwind_Context *context)
#elif defined(__SEH__)
static _Unwind_Reason_Code __gcc_personality_imp(
int version, _Unwind_Action actions, uint64_t exceptionClass,
struct _Unwind_Exception *exceptionObject, struct _Unwind_Context *context)
#else
COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0(
int version, _Unwind_Action actions, uint64_t exceptionClass,
struct _Unwind_Exception *exceptionObject, struct _Unwind_Context *context)
#endif
{
// Since C does not have catch clauses, there is nothing to do during
// phase 1 (the search phase).
#if USING_ARM_EHABI
// After resuming from a cleanup we should also continue on to the next
// frame straight away.
if ((state & _US_ACTION_MASK) != _US_UNWIND_FRAME_STARTING)
#else
if (actions & _UA_SEARCH_PHASE)
#endif
return continueUnwind(exceptionObject, context);
// There is nothing to do if there is no LSDA for this frame.
const uint8_t *lsda = (uint8_t *)_Unwind_GetLanguageSpecificData(context);
if (lsda == (uint8_t *)0)
return continueUnwind(exceptionObject, context);
uintptr_t pc = (uintptr_t)_Unwind_GetIP(context) - 1;
uintptr_t funcStart = (uintptr_t)_Unwind_GetRegionStart(context);
uintptr_t pcOffset = pc - funcStart;
// Parse LSDA header.
uint8_t lpStartEncoding = *lsda++;
if (lpStartEncoding != DW_EH_PE_omit) {
readEncodedPointer(&lsda, lpStartEncoding);
}
uint8_t ttypeEncoding = *lsda++;
if (ttypeEncoding != DW_EH_PE_omit) {
readULEB128(&lsda);
}
// Walk call-site table looking for range that includes current PC.
uint8_t callSiteEncoding = *lsda++;
size_t callSiteTableLength = readULEB128(&lsda);
const uint8_t *callSiteTableStart = lsda;
const uint8_t *callSiteTableEnd = callSiteTableStart + callSiteTableLength;
const uint8_t *p = callSiteTableStart;
while (p < callSiteTableEnd) {
uintptr_t start = readEncodedPointer(&p, callSiteEncoding);
size_t length = readEncodedPointer(&p, callSiteEncoding);
size_t landingPad = readEncodedPointer(&p, callSiteEncoding);
readULEB128(&p); // action value not used for C code
if (landingPad == 0)
continue; // no landing pad for this entry
if ((start <= pcOffset) && (pcOffset < (start + length))) {
// Found landing pad for the PC.
// Set Instruction Pointer to so we re-enter function
// at landing pad. The landing pad is created by the compiler
// to take two parameters in registers.
_Unwind_SetGR(context, __builtin_eh_return_data_regno(0),
(uintptr_t)exceptionObject);
_Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0);
_Unwind_SetIP(context, (funcStart + landingPad));
return _URC_INSTALL_CONTEXT;
}
}
// No landing pad found, continue unwinding.
return continueUnwind(exceptionObject, context);
}
#if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
COMPILER_RT_ABI EXCEPTION_DISPOSITION
__gcc_personality_seh0(PEXCEPTION_RECORD ms_exc, void *this_frame,
PCONTEXT ms_orig_context, PDISPATCHER_CONTEXT ms_disp) {
return _GCC_specific_handler(ms_exc, this_frame, ms_orig_context, ms_disp,
__gcc_personality_imp);
}
#endif

View file

@ -99,6 +99,9 @@ struct protoent *getprotoent (void);
struct protoent *getprotobyname (const char *); struct protoent *getprotobyname (const char *);
struct protoent *getprotobynumber (int); struct protoent *getprotobynumber (int);
#define NI_MAXHOST 255
#define NI_MAXSERV 32
#if defined(_COSMO_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_SOURCE) \ #if defined(_COSMO_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_SOURCE) \
|| (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE+0 < 200809L) \ || (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE+0 < 200809L) \
|| (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE+0 < 700) || (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE+0 < 700)
@ -130,8 +133,6 @@ int getservbyname_r(const char *, const char *, struct servent *, char *, size_t
#define EAI_ALLDONE -103 #define EAI_ALLDONE -103
#define EAI_INTR -104 #define EAI_INTR -104
#define EAI_IDN_ENCODE -105 #define EAI_IDN_ENCODE -105
#define NI_MAXHOST 255
#define NI_MAXSERV 32
#endif #endif
COSMOPOLITAN_C_END_ COSMOPOLITAN_C_END_

View file

@ -14,3 +14,4 @@ LOCAL CHANGES
- Ran third_party/openmp/generate.sh - Ran third_party/openmp/generate.sh
- Removed usage of syscall() function - Removed usage of syscall() function
- Turned off quad floating point support (why does openmp have it?) - Turned off quad floating point support (why does openmp have it?)
- Remove bloat for checking if multiple OpenMP libraries are linked

View file

@ -18,12 +18,14 @@
#include "libc/stdio/syscall.h" #include "libc/stdio/syscall.h"
#endif #endif
#if IsModeDbg()
#define KMP_DEBUG 1
#endif
#define KMP_USE_FUTEX 0 #define KMP_USE_FUTEX 0
#define KMP_FTN_ENTRIES KMP_FTN_PLAIN #define KMP_FTN_ENTRIES KMP_FTN_PLAIN
#define syscall {{openmp_shall_not_use_syscall}} #define syscall {{openmp_shall_not_use_syscall}}
#define DEBUG_BUILD IsModeDbg()
#define RELWITHDEBINFO_BUILD (IsOptimized() && !IsTiny())
#define LIBOMP_USE_ITT_NOTIFY 0 #define LIBOMP_USE_ITT_NOTIFY 0
#define USE_ITT_NOTIFY LIBOMP_USE_ITT_NOTIFY #define USE_ITT_NOTIFY LIBOMP_USE_ITT_NOTIFY
#if ! LIBOMP_USE_ITT_NOTIFY #if ! LIBOMP_USE_ITT_NOTIFY
@ -152,9 +154,6 @@
#if STUBS_LIBRARY #if STUBS_LIBRARY
# define KMP_STUB 1 # define KMP_STUB 1
#endif #endif
#if DEBUG_BUILD || RELWITHDEBINFO_BUILD
# define KMP_DEBUG 1
#endif
#if KMP_OS_WINDOWS #if KMP_OS_WINDOWS
# define KMP_WIN_CDECL # define KMP_WIN_CDECL
@ -169,8 +168,4 @@
#define KMP_USE_SHM #define KMP_USE_SHM
#endif #endif
#ifdef __COSMOPOLITAN__
#define KMP_USE_SHM
#endif
#endif // KMP_CONFIG_H #endif // KMP_CONFIG_H

View file

@ -6699,10 +6699,12 @@ void __kmp_internal_end_thread(int gtid_req) {
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Library registration stuff. // Library registration stuff.
#ifndef __COSMOPOLITAN__
static long __kmp_registration_flag = 0; static long __kmp_registration_flag = 0;
// Random value used to indicate library initialization. // Random value used to indicate library initialization.
static char *__kmp_registration_str = NULL; static char *__kmp_registration_str = NULL;
// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>. // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
#endif // __COSMOPOLITAN__
static inline char *__kmp_reg_status_name() { static inline char *__kmp_reg_status_name() {
/* On RHEL 3u5 if linked statically, getpid() returns different values in /* On RHEL 3u5 if linked statically, getpid() returns different values in
@ -6726,6 +6728,7 @@ char *temp_reg_status_file_name = nullptr;
#endif #endif
void __kmp_register_library_startup(void) { void __kmp_register_library_startup(void) {
#ifndef __COSMOPOLITAN__
char *name = __kmp_reg_status_name(); // Name of the environment variable. char *name = __kmp_reg_status_name(); // Name of the environment variable.
int done = 0; int done = 0;
@ -6937,9 +6940,11 @@ void __kmp_register_library_startup(void) {
} // while } // while
KMP_INTERNAL_FREE((void *)name); KMP_INTERNAL_FREE((void *)name);
#endif // __COSMOPOLITAN__
} // func __kmp_register_library_startup } // func __kmp_register_library_startup
void __kmp_unregister_library(void) { void __kmp_unregister_library(void) {
#ifndef __COSMOPOLITAN__
char *name = __kmp_reg_status_name(); char *name = __kmp_reg_status_name();
char *value = NULL; char *value = NULL;
@ -7006,6 +7011,7 @@ void __kmp_unregister_library(void) {
__kmp_registration_flag = 0; __kmp_registration_flag = 0;
__kmp_registration_str = NULL; __kmp_registration_str = NULL;
#endif // __COSMOPOLITAN__
} // __kmp_unregister_library } // __kmp_unregister_library
// End of Library registration stuff. // End of Library registration stuff.

View file

@ -2054,6 +2054,7 @@ void __kmp_initialize_system_tick() {
} }
#endif #endif
#ifndef __COSMOPOLITAN__
/* Determine whether the given address is mapped into the current address /* Determine whether the given address is mapped into the current address
space. */ space. */
@ -2062,12 +2063,7 @@ int __kmp_is_address_mapped(void *addr) {
int found = 0; int found = 0;
int rc; int rc;
#if defined(__COSMOPOLITAN__) #if KMP_OS_LINUX || KMP_OS_HURD
(void)rc;
found = kisdangerous(addr);
#elif KMP_OS_LINUX || KMP_OS_HURD
/* On GNUish OSes, read the /proc/<pid>/maps pseudo-file to get all the /* On GNUish OSes, read the /proc/<pid>/maps pseudo-file to get all the
address ranges mapped into the address space. */ address ranges mapped into the address space. */
@ -2236,6 +2232,7 @@ int __kmp_is_address_mapped(void *addr) {
return found; return found;
} // __kmp_is_address_mapped } // __kmp_is_address_mapped
#endif // __COSMOPOLITAN__
#ifdef USE_LOAD_BALANCE #ifdef USE_LOAD_BALANCE

View file

@ -11,6 +11,14 @@
////===----------------------------------------------------------------------===// ////===----------------------------------------------------------------------===//
// //
#ifdef __COSMOPOLITAN__
.macro no.comm name:req size:req align:req
.globl \name
\name: .align \align
.byte \size
.endm
#endif
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------
// macros // macros
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------
@ -180,11 +188,11 @@ KMP_PREFIX_UNDERSCORE(\proc):
.macro COMMON name, size, align_power .macro COMMON name, size, align_power
#if KMP_OS_DARWIN #if KMP_OS_DARWIN
.comm \name, \size no.comm \name, \size
#elif KMP_OS_WINDOWS #elif KMP_OS_WINDOWS
.comm \name, \size, \align_power no.comm \name, \size, \align_power
#else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS #else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS
.comm \name, \size, (1<<(\align_power)) no.comm \name, \size, (1<<(\align_power))
#endif #endif
.endm .endm
@ -202,14 +210,14 @@ KMP_PREFIX_UNDERSCORE(\proc):
# if KMP_ARCH_X86 # if KMP_ARCH_X86
# if KMP_OS_DARWIN # if KMP_OS_DARWIN
.data .data
.comm .gomp_critical_user_,32 no.comm .gomp_critical_user_,32
.data .data
.globl ___kmp_unnamed_critical_addr .globl ___kmp_unnamed_critical_addr
___kmp_unnamed_critical_addr: ___kmp_unnamed_critical_addr:
.long .gomp_critical_user_ .long .gomp_critical_user_
# else /* Linux* OS */ # else /* Linux* OS */
.data .data
.comm .gomp_critical_user_,32,8 no.comm .gomp_critical_user_,32,8
.data .data
ALIGN 4 ALIGN 4
.global __kmp_unnamed_critical_addr .global __kmp_unnamed_critical_addr
@ -223,21 +231,14 @@ __kmp_unnamed_critical_addr:
# if KMP_ARCH_X86_64 # if KMP_ARCH_X86_64
# if KMP_OS_DARWIN # if KMP_OS_DARWIN
.data .data
.comm .gomp_critical_user_,32 no.comm .gomp_critical_user_,32
.data .data
.globl ___kmp_unnamed_critical_addr .globl ___kmp_unnamed_critical_addr
___kmp_unnamed_critical_addr: ___kmp_unnamed_critical_addr:
.quad .gomp_critical_user_ .quad .gomp_critical_user_
# else /* Linux* OS */ # else /* Linux* OS */
.data .data
#ifdef __COSMOPOLITAN__ no.comm .gomp_critical_user_,32,8
.globl .gomp_critical_user_
".gomp_critical_user_":
.align 8
.byte 32
#else
.comm .gomp_critical_user_,32,8
#endif
.data .data
ALIGN 8 ALIGN 8
.global __kmp_unnamed_critical_addr .global __kmp_unnamed_critical_addr

View file

@ -129,9 +129,11 @@ for arch in $AMD64 $ARM64; do
cp -f o/$arch/libc/crt/crt.o "$OUTDIR/$arch-linux-cosmo/lib/" cp -f o/$arch/libc/crt/crt.o "$OUTDIR/$arch-linux-cosmo/lib/"
cp -f o/$arch/cosmopolitan.a "$OUTDIR/$arch-linux-cosmo/lib/libcosmo.a" cp -f o/$arch/cosmopolitan.a "$OUTDIR/$arch-linux-cosmo/lib/libcosmo.a"
cp -f o/$arch/third_party/libcxx/libcxx.a "$OUTDIR/$arch-linux-cosmo/lib/" cp -f o/$arch/third_party/libcxx/libcxx.a "$OUTDIR/$arch-linux-cosmo/lib/"
for lib in c dl gcc_s m pthread resolv rt dl z stdc++; do for lib in c dl gcc_s m pthread resolv rt dl unwind gomp stdc++; do
printf '\041\074\141\162\143\150\076\012' >"$OUTDIR/$arch-linux-cosmo/lib/lib$lib.a" printf '\041\074\141\162\143\150\076\012' >"$OUTDIR/$arch-linux-cosmo/lib/lib$lib.a"
done done
mkdir -p "$OUTDIR/lib/gcc/"
touch "$OUTDIR/lib/gcc/libgomp.spec" # needed if user passes -fopenmp but not -lgomp
done done
cp -f o/$AMD64/ape/ape.o "$OUTDIR/x86_64-linux-cosmo/lib/" cp -f o/$AMD64/ape/ape.o "$OUTDIR/x86_64-linux-cosmo/lib/"
cp -f o/$AMD64/ape/ape.lds "$OUTDIR/x86_64-linux-cosmo/lib/" cp -f o/$AMD64/ape/ape.lds "$OUTDIR/x86_64-linux-cosmo/lib/"

View file

@ -59,7 +59,8 @@
"__AVX512VBMI__" "__AVX512VBMI__"
"__AVX512VNNI__" "__AVX512VNNI__"
"__AVXVNNI__" "__AVXVNNI__"
"__AVXVNNI__" "__AVXVNNIINT8__"
"__AVXVNNIINT16__"
"__ABM__" "__ABM__"
"__BMI__" "__BMI__"
"__BMI2__" "__BMI2__"

View file

@ -175,6 +175,8 @@ int main(int argc, char *argv[]) {
: ""); : "");
CANIUSE(AVXVNNI); CANIUSE(AVXVNNI);
CANIUSE(AVXVNNIINT8);
CANIUSE(AVXVNNIINT16);
CANIUSE(AVX512BW); CANIUSE(AVX512BW);
CANIUSE(AVX512CD); CANIUSE(AVX512CD);
CANIUSE(AVX512DQ); CANIUSE(AVX512DQ);