diff --git a/examples/BUILD.mk b/examples/BUILD.mk index a6965d922..3b868d186 100644 --- a/examples/BUILD.mk +++ b/examples/BUILD.mk @@ -154,7 +154,8 @@ o/$(MODE)/examples/picol.dbg: \ @$(APELINK) o/$(MODE)/usr/share/dict/words.zip.o: private ZIPOBJ_FLAGS += -C2 -o/$(MODE)/examples/wut.o: private COPTS += -fopenmp + +o/$(MODE)/examples/blas.o: private COPTS += -O3 -fopenmp $(EXAMPLES_OBJS): examples/BUILD.mk diff --git a/examples/blas.cc b/examples/blas.cc new file mode 100644 index 000000000..5bcc4b2ba --- /dev/null +++ b/examples/blas.cc @@ -0,0 +1,221 @@ +// Copyright 2024 Justine Alexandra Roberts Tunney +// +// Permission to use, copy, modify, and/or distribute this software for +// any purpose with or without fee is hereby granted, provided that the +// above copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE +// AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL +// DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR +// PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THIS SOFTWARE. + +#include +#include +#include +#include +#include +#include +#include "libc/assert.h" + +// high performance high accuracy matrix multiplication in ansi c + +#define MATH __target_clones("avx512f,fma") + +namespace { +namespace ansiBLAS { + +static constexpr int KN = 8; + +struct Vector { + double v[KN]; +}; + +Vector load(const float *p) { + Vector x; + for (int i = 0; i < KN; ++i) + x.v[i] = p[i]; + return x; +} + +Vector madd(Vector x, Vector y, Vector s) { + for (int i = 0; i < KN; ++i) + s.v[i] = fma(x.v[i], y.v[i], s.v[i]); + return s; +} + +float hsum(Vector x) { + double s = 0; + for (int i = 0; i < KN; ++i) + s += x.v[i]; + return s; +} + +struct ansiBLAS { + public: + ansiBLAS(int k, const float *A, int lda, const float *B, int ldb, float *C, + int ldc, int ith, int nth) + : k(k), + A(A), + lda(lda), + B(B), + ldb(ldb), + C(C), + ldc(ldc), + ith(ith), + nth(nth) { + } + + void matmul(int m, int n) { + mnpack(0, m, 0, n); + } + + private: + void mnpack(int m0, int m, int n0, int n) { + int mc, nc, mp, np; + if (m - m0 <= 0 || n - n0 <= 0) + return; + if (m - m0 >= 4 && n - n0 >= 3) { + mc = 4; + nc = 3; + gemm<4, 3>(m0, m, n0, n); + } else { + mc = 1; + nc = 1; + gemm<1, 1>(m0, m, n0, n); + } + mp = m0 + (m - m0) / mc * mc; + np = n0 + (n - n0) / nc * nc; + mnpack(mp, m, n0, np); + mnpack(m0, m, np, n); + } + + template + MATH void gemm(int m0, int m, int n0, int n) { + int ytiles = (m - m0) / RM; + int xtiles = (n - n0) / RN; + int tiles = xtiles * ytiles; + int duty = (tiles + nth - 1) / nth; + int start = duty * ith; + int end = start + duty; + if (end > tiles) + end = tiles; + for (int job = start; job < end; ++job) { + int ii = m0 + job / xtiles * RM; + int jj = n0 + job % xtiles * RN; + Vector Cv[RN][RM] = {}; + for (int l = 0; l < k; l += KN) + for (int j = 0; j < RN; ++j) + for (int i = 0; i < RM; ++i) + Cv[j][i] = madd(load(A + lda * (ii + i) + l), // + load(B + ldb * (jj + j) + l), // + Cv[j][i]); + for (int j = 0; j < RN; ++j) + for (int i = 0; i < RM; ++i) + C[ldc * (jj + j) + (ii + i)] = hsum(Cv[j][i]); + } + } + + const int k; + const float *const A; + const int lda; + const float *const B; + const int ldb; + float *const C; + const int ldc; + const int ith; + const int nth; +}; + +void sgemm(int m, int n, int k, // + const float *A, int lda, // + const float *B, int ldb, // + float *C, int ldc) { + int nth = sysconf(_SC_NPROCESSORS_ONLN); +#pragma omp parallel for + for (int ith = 0; ith < nth; ++ith) { + ansiBLAS tb{k, A, lda, B, ldb, C, ldc, ith, nth}; + tb.matmul(m, n); + } +} + +} // namespace ansiBLAS + +long micros(void) { + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return ts.tv_sec * 1000000 + (ts.tv_nsec + 999) / 1000; +} + +unsigned rand32(void) { + /* Knuth, D.E., "The Art of Computer Programming," Vol 2, + Seminumerical Algorithms, Third Edition, Addison-Wesley, 1998, + p. 106 (line 26) & p. 108 */ + static unsigned long long lcg = 1; + lcg *= 6364136223846793005; + lcg += 1442695040888963407; + return lcg >> 32; +} + +float float01(unsigned x) { // (0,1) + return 1.f / 8388608 * ((x >> 9) + .5f); +} + +float numba(void) { // (-1,1) + return float01(rand32()) * 2 - 1; +} + +void fill(int m, int n, float *A, int lda) { + for (int j = 0; j < n; ++j) + for (int i = 0; i < m; ++i) + A[lda * j + i] = numba(); +} + +float *new_matrix(int m, int n, int *lda) { + void *ptr = 0; + int b = 64 / sizeof(float); + *lda = (n + b - 1) & -b; + posix_memalign(&ptr, 4096, sizeof(float) * m * *lda); + return (float *)ptr; +} + +} // namespace + +void barrier(void) { +} +void (*pBarrier)(void) = barrier; + +#define BENCH(x) \ + do { \ + x; \ + int N = 10; \ + long t1 = micros(); \ + for (long i = 0; i < N; ++i) { \ + pBarrier(); \ + x; \ + } \ + long t2 = micros(); \ + printf("%8" PRId64 " µs %s %g gigaflops\n", (t2 - t1 + N - 1) / N, #x, \ + 1e6 / ((t2 - t1 + N - 1) / N) * m * n * k * 2 * 1e-9); \ + } while (0) + +int main() { + int m = 1024; + int n = 1024; + int k = 1024; + int lda, ldb, ldc; + float *A = new_matrix(m, k, &lda); + float *B = new_matrix(n, k, &ldb); + float *C = new_matrix(n, m, &ldc); + fill(k, n, A, lda); + fill(k, m, B, ldb); + BENCH(ansiBLAS::sgemm(m, n, k, A, lda, B, ldb, C, ldc)); + assert(C[0] == -0x1.20902ap+4); + assert(C[1] == -0x1.bf7726p+4); + free(C); + free(B); + free(A); +} diff --git a/libc/intrin/x86.c b/libc/intrin/x86.c index 4328f9c82..0ee34f136 100644 --- a/libc/intrin/x86.c +++ b/libc/intrin/x86.c @@ -12,135 +12,9 @@ // //===----------------------------------------------------------------------===// #if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) +#include "libc/intrin/x86.h" -enum VendorSignatures { - SIG_INTEL = 0x756e6547, // Genu - SIG_AMD = 0x68747541, // Auth -}; - -enum ProcessorVendors { - VENDOR_INTEL = 1, - VENDOR_AMD, - VENDOR_OTHER, - VENDOR_MAX -}; - -enum ProcessorTypes { - INTEL_BONNELL = 1, - INTEL_CORE2, - INTEL_COREI7, - AMDFAM10H, - AMDFAM15H, - INTEL_SILVERMONT, - INTEL_KNL, - AMD_BTVER1, - AMD_BTVER2, - AMDFAM17H, - INTEL_KNM, - INTEL_GOLDMONT, - INTEL_GOLDMONT_PLUS, - INTEL_TREMONT, - AMDFAM19H, - ZHAOXIN_FAM7H, - INTEL_SIERRAFOREST, - INTEL_GRANDRIDGE, - INTEL_CLEARWATERFOREST, - CPU_TYPE_MAX -}; - -enum ProcessorSubtypes { - INTEL_COREI7_NEHALEM = 1, - INTEL_COREI7_WESTMERE, - INTEL_COREI7_SANDYBRIDGE, - AMDFAM10H_BARCELONA, - AMDFAM10H_SHANGHAI, - AMDFAM10H_ISTANBUL, - AMDFAM15H_BDVER1, - AMDFAM15H_BDVER2, - AMDFAM15H_BDVER3, - AMDFAM15H_BDVER4, - AMDFAM17H_ZNVER1, - INTEL_COREI7_IVYBRIDGE, - INTEL_COREI7_HASWELL, - INTEL_COREI7_BROADWELL, - INTEL_COREI7_SKYLAKE, - INTEL_COREI7_SKYLAKE_AVX512, - INTEL_COREI7_CANNONLAKE, - INTEL_COREI7_ICELAKE_CLIENT, - INTEL_COREI7_ICELAKE_SERVER, - AMDFAM17H_ZNVER2, - INTEL_COREI7_CASCADELAKE, - INTEL_COREI7_TIGERLAKE, - INTEL_COREI7_COOPERLAKE, - INTEL_COREI7_SAPPHIRERAPIDS, - INTEL_COREI7_ALDERLAKE, - AMDFAM19H_ZNVER3, - INTEL_COREI7_ROCKETLAKE, - ZHAOXIN_FAM7H_LUJIAZUI, - AMDFAM19H_ZNVER4, - INTEL_COREI7_GRANITERAPIDS, - INTEL_COREI7_GRANITERAPIDS_D, - INTEL_COREI7_ARROWLAKE, - INTEL_COREI7_ARROWLAKE_S, - INTEL_COREI7_PANTHERLAKE, - CPU_SUBTYPE_MAX -}; - -enum ProcessorFeatures { - FEATURE_CMOV = 0, - FEATURE_MMX, - FEATURE_POPCNT, - FEATURE_SSE, - FEATURE_SSE2, - FEATURE_SSE3, - FEATURE_SSSE3, - FEATURE_SSE4_1, - FEATURE_SSE4_2, - FEATURE_AVX, - FEATURE_AVX2, - FEATURE_SSE4_A, - FEATURE_FMA4, - FEATURE_XOP, - FEATURE_FMA, - FEATURE_AVX512F, - FEATURE_BMI, - FEATURE_BMI2, - FEATURE_AES, - FEATURE_PCLMUL, - FEATURE_AVX512VL, - FEATURE_AVX512BW, - FEATURE_AVX512DQ, - FEATURE_AVX512CD, - FEATURE_AVX512ER, - FEATURE_AVX512PF, - FEATURE_AVX512VBMI, - FEATURE_AVX512IFMA, - FEATURE_AVX5124VNNIW, - FEATURE_AVX5124FMAPS, - FEATURE_AVX512VPOPCNTDQ, - FEATURE_AVX512VBMI2, - FEATURE_GFNI, - FEATURE_VPCLMULQDQ, - FEATURE_AVX512VNNI, - FEATURE_AVX512BITALG, - FEATURE_AVX512BF16, - FEATURE_AVX512VP2INTERSECT, - - FEATURE_CMPXCHG16B = 46, - FEATURE_F16C = 49, - FEATURE_LAHF_LM = 54, - FEATURE_LM, - FEATURE_WP, - FEATURE_LZCNT, - FEATURE_MOVBE, - - FEATURE_AVX512FP16 = 94, - FEATURE_X86_64_BASELINE, - FEATURE_X86_64_V2, - FEATURE_X86_64_V3, - FEATURE_X86_64_V4, - CPU_FEATURE_MAX -}; +struct __processor_model __cpu_model; // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID @@ -782,13 +656,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, #undef setFeature } -struct __processor_model { - unsigned int __cpu_vendor; - unsigned int __cpu_type; - unsigned int __cpu_subtype; - unsigned int __cpu_features[1]; -} __cpu_model = {0, 0, 0, {0}}; - unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32]; // A constructor function that is sets __cpu_model and __cpu_features2 with diff --git a/libc/intrin/x86.h b/libc/intrin/x86.h new file mode 100644 index 000000000..26093109e --- /dev/null +++ b/libc/intrin/x86.h @@ -0,0 +1,147 @@ +#ifndef COSMOPOLITAN_LIBC_INTRIN_X86_H_ +#define COSMOPOLITAN_LIBC_INTRIN_X86_H_ +COSMOPOLITAN_C_START_ + +enum VendorSignatures { + SIG_INTEL = 0x756e6547, // Genu + SIG_AMD = 0x68747541, // Auth +}; + +enum ProcessorVendors { + VENDOR_INTEL = 1, + VENDOR_AMD, + VENDOR_OTHER, + VENDOR_MAX +}; + +enum ProcessorTypes { + INTEL_BONNELL = 1, + INTEL_CORE2, + INTEL_COREI7, + AMDFAM10H, + AMDFAM15H, + INTEL_SILVERMONT, + INTEL_KNL, + AMD_BTVER1, + AMD_BTVER2, + AMDFAM17H, + INTEL_KNM, + INTEL_GOLDMONT, + INTEL_GOLDMONT_PLUS, + INTEL_TREMONT, + AMDFAM19H, + ZHAOXIN_FAM7H, + INTEL_SIERRAFOREST, + INTEL_GRANDRIDGE, + INTEL_CLEARWATERFOREST, + CPU_TYPE_MAX +}; + +enum ProcessorSubtypes { + INTEL_COREI7_NEHALEM = 1, + INTEL_COREI7_WESTMERE, + INTEL_COREI7_SANDYBRIDGE, + AMDFAM10H_BARCELONA, + AMDFAM10H_SHANGHAI, + AMDFAM10H_ISTANBUL, + AMDFAM15H_BDVER1, + AMDFAM15H_BDVER2, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + AMDFAM17H_ZNVER1, + INTEL_COREI7_IVYBRIDGE, + INTEL_COREI7_HASWELL, + INTEL_COREI7_BROADWELL, + INTEL_COREI7_SKYLAKE, + INTEL_COREI7_SKYLAKE_AVX512, + INTEL_COREI7_CANNONLAKE, + INTEL_COREI7_ICELAKE_CLIENT, + INTEL_COREI7_ICELAKE_SERVER, + AMDFAM17H_ZNVER2, + INTEL_COREI7_CASCADELAKE, + INTEL_COREI7_TIGERLAKE, + INTEL_COREI7_COOPERLAKE, + INTEL_COREI7_SAPPHIRERAPIDS, + INTEL_COREI7_ALDERLAKE, + AMDFAM19H_ZNVER3, + INTEL_COREI7_ROCKETLAKE, + ZHAOXIN_FAM7H_LUJIAZUI, + AMDFAM19H_ZNVER4, + INTEL_COREI7_GRANITERAPIDS, + INTEL_COREI7_GRANITERAPIDS_D, + INTEL_COREI7_ARROWLAKE, + INTEL_COREI7_ARROWLAKE_S, + INTEL_COREI7_PANTHERLAKE, + CPU_SUBTYPE_MAX +}; + +enum ProcessorFeatures { + FEATURE_CMOV = 0, + FEATURE_MMX, + FEATURE_POPCNT, + FEATURE_SSE, + FEATURE_SSE2, + FEATURE_SSE3, + FEATURE_SSSE3, + FEATURE_SSE4_1, + FEATURE_SSE4_2, + FEATURE_AVX, + FEATURE_AVX2, + FEATURE_SSE4_A, + FEATURE_FMA4, + FEATURE_XOP, + FEATURE_FMA, + FEATURE_AVX512F, + FEATURE_BMI, + FEATURE_BMI2, + FEATURE_AES, + FEATURE_PCLMUL, + FEATURE_AVX512VL, + FEATURE_AVX512BW, + FEATURE_AVX512DQ, + FEATURE_AVX512CD, + FEATURE_AVX512ER, + FEATURE_AVX512PF, + FEATURE_AVX512VBMI, + FEATURE_AVX512IFMA, + FEATURE_AVX5124VNNIW, + FEATURE_AVX5124FMAPS, + FEATURE_AVX512VPOPCNTDQ, + FEATURE_AVX512VBMI2, + FEATURE_GFNI, + FEATURE_VPCLMULQDQ, + FEATURE_AVX512VNNI, + FEATURE_AVX512BITALG, + FEATURE_AVX512BF16, + FEATURE_AVX512VP2INTERSECT, + + FEATURE_CMPXCHG16B = 46, + FEATURE_F16C = 49, + FEATURE_LAHF_LM = 54, + FEATURE_LM, + FEATURE_WP, + FEATURE_LZCNT, + FEATURE_MOVBE, + + FEATURE_AVX512FP16 = 94, + FEATURE_X86_64_BASELINE, + FEATURE_X86_64_V2, + FEATURE_X86_64_V3, + FEATURE_X86_64_V4, + CPU_FEATURE_MAX +}; + +struct __processor_model { + unsigned __cpu_vendor; + unsigned __cpu_type; + unsigned __cpu_subtype; + unsigned __cpu_features[1]; + const char *__cpu_march; +}; + +struct __processor_model __cpu_model; + +const char *__cpu_march(unsigned); + +COSMOPOLITAN_C_END_ +#endif /* COSMOPOLITAN_LIBC_INTRIN_X86_H_ */ diff --git a/libc/intrin/x86march.c b/libc/intrin/x86march.c new file mode 100644 index 000000000..cafc142fa --- /dev/null +++ b/libc/intrin/x86march.c @@ -0,0 +1,94 @@ +// Copyright 2024 Justine Alexandra Roberts Tunney +// +// Permission to use, copy, modify, and/or distribute this software for +// any purpose with or without fee is hereby granted, provided that the +// above copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE +// AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL +// DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR +// PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THIS SOFTWARE. +#include "libc/intrin/x86.h" + +/** + * Returns microarchitecture name, e.g. + * + * puts(__cpu_march(__cpu_model.__cpu_subtype)); + * + */ +const char *__cpu_march(unsigned subtype) { + switch (subtype) { + case INTEL_COREI7_NEHALEM: + return "nehalem"; + case INTEL_COREI7_WESTMERE: + return "westmere"; + case INTEL_COREI7_SANDYBRIDGE: + return "sandybridge"; + case AMDFAM10H_BARCELONA: + return "amdfam10"; + case AMDFAM10H_SHANGHAI: + return "amdfam10"; + case AMDFAM10H_ISTANBUL: + return "amdfam10"; + case AMDFAM15H_BDVER1: + return "bdver2"; + case AMDFAM15H_BDVER2: + return "bdver2"; + case AMDFAM15H_BDVER3: + return "bdver3"; + case AMDFAM15H_BDVER4: + return "bdver4"; + case AMDFAM17H_ZNVER1: + return "znver2"; + case INTEL_COREI7_IVYBRIDGE: + return "ivybridge"; + case INTEL_COREI7_HASWELL: + return "haswell"; + case INTEL_COREI7_BROADWELL: + return "broadwell"; + case INTEL_COREI7_SKYLAKE: + return "skylake"; + case INTEL_COREI7_SKYLAKE_AVX512: + return "skylake-avx512"; + case INTEL_COREI7_CANNONLAKE: + return "cannonlake"; + case INTEL_COREI7_ICELAKE_CLIENT: + return "icelake-client"; + case INTEL_COREI7_ICELAKE_SERVER: + return "icelake-server"; + case AMDFAM17H_ZNVER2: + return "znver2"; + case INTEL_COREI7_CASCADELAKE: + return "cascadelake"; + case INTEL_COREI7_TIGERLAKE: + return "tigerlake"; + case INTEL_COREI7_COOPERLAKE: + return "cooperlake"; + case INTEL_COREI7_SAPPHIRERAPIDS: + return "sapphirerapids"; + case INTEL_COREI7_ALDERLAKE: + return "alderlake"; + case AMDFAM19H_ZNVER3: + return "znver3"; + case INTEL_COREI7_ROCKETLAKE: + return "rocketlake"; + case AMDFAM19H_ZNVER4: + return "znver4"; + case INTEL_COREI7_GRANITERAPIDS: + return "graniterapids"; + case INTEL_COREI7_GRANITERAPIDS_D: + return "graniterapids-d"; + case INTEL_COREI7_ARROWLAKE: + return "arrowlake"; + case INTEL_COREI7_ARROWLAKE_S: + return "arrowlake-s"; + case INTEL_COREI7_PANTHERLAKE: + return "pantherlake"; + default: + return 0; + } +} diff --git a/libc/nexgen32e/kcpuids.S b/libc/nexgen32e/kcpuids.S index 0c5fdea05..adc6ef5d1 100644 --- a/libc/nexgen32e/kcpuids.S +++ b/libc/nexgen32e/kcpuids.S @@ -93,18 +93,37 @@ kCpuids:.long 0,0,0,0 // EAX=0 (Basic Processor Info) add $4*4,%rdi jmp 2b 3: nop -#if !X86_NEED(AVX2) + +// test if cpu supports avx testb X86_HAVE(AVX)(%r8) - jz 5f + jz 7f testb X86_HAVE(OSXSAVE)(%r8) - jz 4f + jz 5f xor %ecx,%ecx xgetbv + mov %eax,%ecx + +// test if operating system saves avx registers and $XCR0_SSE|XCR0_AVX,%eax cmp $XCR0_SSE|XCR0_AVX,%eax - je 5f -4: btr $X86_BIT(AVX),X86_WORD(AVX)(%r8) + jne 5f + +// test if operating system saves avx512 registers + and $XCR0_OPMASK|XCR0_ZMM_HI256|XCR0_HI16_ZMM,%ecx + cmp $XCR0_OPMASK|XCR0_ZMM_HI256|XCR0_HI16_ZMM,%ecx + jne 6f + je 7f + +// operating system doesn't support avx +5: btr $X86_BIT(AVX),X86_WORD(AVX)(%r8) btr $X86_BIT(AVX2),X86_WORD(AVX2)(%r8) -#endif -5: pop %rbx + +// operating system supports avx but not avx512 +6: andl $~(1<<30|1<<28|1<<17|1<<27|1<<16|1<<21|1<<26|1<<31),KCPUIDS(7H, EBX)(%r8) + andl $~(1<<1|1<<12|1<<6|1<<11|1<<14),KCPUIDS(7H, ECX)(%r8) + andl $~(1<<2|1<<3|1<<8),KCPUIDS(7H, EDX)(%r8) + andl $~(1<<5),KCPUIDS(7H_1H, EAX)(%r8) + +// we're done +7: pop %rbx .init.end 201,_init_kCpuids diff --git a/libc/nt/enum/memflags.h b/libc/nt/enum/memflags.h index f2eb173f6..fffb09ef9 100644 --- a/libc/nt/enum/memflags.h +++ b/libc/nt/enum/memflags.h @@ -17,6 +17,5 @@ #define kNtMem4mbPages 0x80000000 #define kNtMemReplacePlaceholder 0x00004000 -#define kNtMemLargePages 0x20000000 #endif /* COSMOPOLITAN_LIBC_NT_ENUM_MEMFLAGS_H_ */