Fold LIBC_BITS into LIBC_INTRIN

This commit is contained in:
Justine Tunney 2022-08-11 12:13:18 -07:00
parent 625aa365f1
commit 05b8f82371
603 changed files with 1071 additions and 1211 deletions

View file

@ -1,27 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/fmt/conv.h"
#include "libc/macros.internal.h"
/**
* Returns absolute value of x.
*/
int abs(int x) {
return ABS(x);
}

34
libc/bits/asmflag.h Normal file → Executable file
View file

@ -1,34 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_ASMFLAG_H_
#define COSMOPOLITAN_LIBC_BITS_ASMFLAG_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/*
* Constraints for virtual machine flags.
* @note we beseech clang devs for flag constraints
*/
#ifdef __GCC_ASM_FLAG_OUTPUTS__ /* GCC6+ CLANG10+ */
#define CFLAG_CONSTRAINT "=@ccc"
#define CFLAG_ASM(OP) OP
#define ZFLAG_CONSTRAINT "=@ccz"
#define ZFLAG_ASM(OP) OP
#define OFLAG_CONSTRAINT "=@cco"
#define OFLAG_ASM(OP) OP
#define SFLAG_CONSTRAINT "=@ccs"
#define SFLAG_ASM(SP) SP
#define ABOVE_CONSTRAINT "=@cca" /* i.e. !ZF && !CF */
#define ABOVEFLAG_ASM(OP) OP
#else
#define CFLAG_CONSTRAINT "=q"
#define CFLAG_ASM(OP) OP "\n\tsetc\t%b0"
#define ZFLAG_CONSTRAINT "=q"
#define ZFLAG_ASM(OP) OP "\n\tsetz\t%b0"
#define OFLAG_CONSTRAINT "=q"
#define OFLAG_ASM(OP) OP "\n\tseto\t%b0"
#define SFLAG_CONSTRAINT "=q"
#define SFLAG_ASM(SP) OP "\n\tsets\t%b0"
#define ABOVE_CONSTRAINT "=@cca"
#define ABOVEFLAG_ASM(OP) OP "\n\tseta\t%b0"
#endif
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_ASMFLAG_H_ */

View file

@ -1,170 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_ATOMIC_H_
#define COSMOPOLITAN_LIBC_BITS_ATOMIC_H_
#include "libc/atomic.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/**
* @fileoverview Cosmopolitan C11 Atomics Library
*
* - Forty-two different ways to say MOV.
* - Fourteen different ways to say XCHG.
* - Twenty different ways to say LOCK CMPXCHG.
*
* It's a lower level programming language than assembly!
*
* @see libc/atomic.h
*/
#define memory_order int
#define memory_order_relaxed 0
#define memory_order_consume 1
#define memory_order_acquire 2
#define memory_order_release 3
#define memory_order_acq_rel 4
#define memory_order_seq_cst 5
#define ATOMIC_VAR_INIT(value) (value)
#define atomic_is_lock_free(obj) ((void)(obj), sizeof(obj) <= sizeof(void *))
#define atomic_flag atomic_bool
#define ATOMIC_FLAG_INIT ATOMIC_VAR_INIT(0)
#define atomic_flag_test_and_set_explicit(x, order) \
atomic_exchange_explicit(x, 1, order)
#define atomic_flag_clear_explicit(x, order) atomic_store_explicit(x, 0, order)
#define atomic_compare_exchange_strong(pObject, pExpected, desired) \
atomic_compare_exchange_strong_explicit( \
pObject, pExpected, desired, memory_order_seq_cst, memory_order_seq_cst)
#define atomic_compare_exchange_weak(pObject, pExpected, desired) \
atomic_compare_exchange_weak_explicit( \
pObject, pExpected, desired, memory_order_seq_cst, memory_order_seq_cst)
#define atomic_exchange(pObject, desired) \
atomic_exchange_explicit(pObject, desired, memory_order_seq_cst)
#define atomic_fetch_add(pObject, operand) \
atomic_fetch_add_explicit(pObject, operand, memory_order_seq_cst)
#define atomic_fetch_and(pObject, operand) \
atomic_fetch_and_explicit(pObject, operand, memory_order_seq_cst)
#define atomic_fetch_or(pObject, operand) \
atomic_fetch_or_explicit(pObject, operand, memory_order_seq_cst)
#define atomic_fetch_sub(pObject, operand) \
atomic_fetch_sub_explicit(pObject, operand, memory_order_seq_cst)
#define atomic_fetch_xor(pObject, operand) \
atomic_fetch_xor_explicit(pObject, operand, memory_order_seq_cst)
#define atomic_load(pObject) atomic_load_explicit(pObject, memory_order_seq_cst)
#define atomic_store(pObject, desired) \
atomic_store_explicit(pObject, desired, memory_order_seq_cst)
#define atomic_flag_test_and_set(x) \
atomic_flag_test_and_set_explicit(x, memory_order_seq_cst)
#define atomic_flag_clear(x) atomic_flag_clear_explicit(x, memory_order_seq_cst)
#if defined(__CLANG_ATOMIC_BOOL_LOCK_FREE)
#define atomic_init(obj, value) __c11_atomic_init(obj, value)
#define atomic_thread_fence(order) __c11_atomic_thread_fence(order)
#define atomic_signal_fence(order) __c11_atomic_signal_fence(order)
#define atomic_compare_exchange_strong_explicit(object, expected, desired, \
success, failure) \
__c11_atomic_compare_exchange_strong(object, expected, desired, success, \
failure)
#define atomic_compare_exchange_weak_explicit(object, expected, desired, \
success, failure) \
__c11_atomic_compare_exchange_weak(object, expected, desired, success, \
failure)
#define atomic_exchange_explicit(object, desired, order) \
__c11_atomic_exchange(object, desired, order)
#define atomic_fetch_add_explicit(object, operand, order) \
__c11_atomic_fetch_add(object, operand, order)
#define atomic_fetch_and_explicit(object, operand, order) \
__c11_atomic_fetch_and(object, operand, order)
#define atomic_fetch_or_explicit(object, operand, order) \
__c11_atomic_fetch_or(object, operand, order)
#define atomic_fetch_sub_explicit(object, operand, order) \
__c11_atomic_fetch_sub(object, operand, order)
#define atomic_fetch_xor_explicit(object, operand, order) \
__c11_atomic_fetch_xor(object, operand, order)
#define atomic_load_explicit(object, order) __c11_atomic_load(object, order)
#define atomic_store_explicit(object, desired, order) \
__c11_atomic_store(object, desired, order)
#elif (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 407
#define atomic_init(obj, value) ((void)(*(obj) = (value)))
#define atomic_thread_fence(order) __atomic_thread_fence(order)
#define atomic_signal_fence(order) __atomic_signal_fence(order)
#define atomic_compare_exchange_strong_explicit(pObject, pExpected, desired, \
success, failure) \
__atomic_compare_exchange_n(pObject, pExpected, desired, 0, success, failure)
#define atomic_compare_exchange_weak_explicit(pObject, pExpected, desired, \
success, failure) \
__atomic_compare_exchange_n(pObject, pExpected, desired, 1, success, failure)
#define atomic_exchange_explicit(pObject, desired, order) \
__atomic_exchange_n(pObject, desired, order)
#define atomic_fetch_add_explicit(pObject, operand, order) \
__atomic_fetch_add(pObject, operand, order)
#define atomic_fetch_and_explicit(pObject, operand, order) \
__atomic_fetch_and(pObject, operand, order)
#define atomic_fetch_or_explicit(pObject, operand, order) \
__atomic_fetch_or(pObject, operand, order)
#define atomic_fetch_sub_explicit(pObject, operand, order) \
__atomic_fetch_sub(pObject, operand, order)
#define atomic_fetch_xor_explicit(pObject, operand, order) \
__atomic_fetch_xor(pObject, operand, order)
#define atomic_load_explicit(pObject, order) __atomic_load_n(pObject, order)
#define atomic_store_explicit(pObject, desired, order) \
__atomic_store_n(pObject, desired, order)
#else
#define atomic_init(obj, value) ((void)(*(obj) = (value)))
#define atomic_thread_fence(order) __sync_synchronize()
#define atomic_signal_fence(order) __asm__ volatile("" ::: "memory")
#define __atomic_apply_stride(object, operand) \
(((__typeof__(__atomic_val(object)))0) + (operand))
#define atomic_compare_exchange_strong_explicit(object, expected, desired, \
success, failure) \
__extension__({ \
__typeof__(expected) __ep = (expected); \
__typeof__(*__ep) __e = *__ep; \
(void)(success); \
(void)(failure); \
(_Bool)((*__ep = __sync_val_compare_and_swap(object, __e, desired)) == \
__e); \
})
#define atomic_compare_exchange_weak_explicit(object, expected, desired, \
success, failure) \
atomic_compare_exchange_strong_explicit(object, expected, desired, success, \
failure)
#if __has_builtin(__sync_swap)
#define atomic_exchange_explicit(object, desired, order) \
((void)(order), __sync_swap(object, desired))
#else
#define atomic_exchange_explicit(object, desired, order) \
__extension__({ \
__typeof__(object) __o = (object); \
__typeof__(desired) __d = (desired); \
(void)(order); \
__sync_synchronize(); \
__sync_lock_test_and_set(&__atomic_val(__o), __d); \
})
#endif
#define atomic_fetch_add_explicit(object, operand, order) \
((void)(order), \
__sync_fetch_and_add(object, __atomic_apply_stride(object, operand)))
#define atomic_fetch_and_explicit(object, operand, order) \
((void)(order), __sync_fetch_and_and(object, operand))
#define atomic_fetch_or_explicit(object, operand, order) \
((void)(order), __sync_fetch_and_or(object, operand))
#define atomic_fetch_sub_explicit(object, operand, order) \
((void)(order), \
__sync_fetch_and_sub(object, __atomic_apply_stride(object, operand)))
#define atomic_fetch_xor_explicit(object, operand, order) \
((void)(order), __sync_fetch_and_xor(object, operand))
#define atomic_load_explicit(object, order) \
((void)(order), __sync_fetch_and_add(object, 0))
#define atomic_store_explicit(object, desired, order) \
((void)atomic_exchange_explicit(object, desired, order))
#endif
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_ATOMIC_H_ */

View file

@ -1,133 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_AVX2INTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_AVX2INTRIN_H_
#include "libc/bits/avxintrin.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define _mm256_min_epi16(M256_0, M256_1) \
((__m256i)__builtin_ia32_minps((__v16hi)(M256_0), (__v16hi)(M256_1)))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » avx2 » simd ops
*/
#define _mm256_add_ps(M256_0, M256_1) \
((__m256)((__v8sf)(M256_0) + (__v8sf)(M256_1)))
#define _mm256_sub_ps(M256_0, M256_1) \
((__m256)((__v8sf)(M256_0) - (__v8sf)(M256_1)))
#define _mm256_mul_ps(M256_0, M256_1) \
((__m256)((__v8sf)(M256_0) * (__v8sf)(M256_1)))
#define _mm256_div_ps(M256_0, M256_1) \
((__m256)((__v8sf)(M256_0) / (__v8sf)(M256_1)))
#define _mm256_and_ps(M256_0, M256_1) \
((__m256)((__v8su)(M256_0) & (__v8su)(M256_1)))
#define _mm256_or_ps(M256_0, M256_1) \
((__m256)((__v8su)(M256_0) | (__v8su)(M256_1)))
#define _mm256_xor_ps(M256_0, M256_1) /* XORPD [u32 simd xor] */ \
((__m256)((__v8su)(M256_0) ^ (__v8su)(M256_1)))
#define _mm256_andnot_ps(M256_0, M256_1) /* ANDNPS [u32 simd nand] */ \
((__m256)(~(__v8su)(M256_0) & (__v8su)(M256_1)))
#define _mm256_rcp_ps(M256) __builtin_ia32_rcpps256((__v8sf)(M256))
#define _mm256_sqrt_ps(M256) __builtin_ia32_sqrtps256((__v8sf)(M256))
#define _mm256_rsqrt_ps(M256) __builtin_ia32_rsqrtps256((__v8sf)(M256))
#define _mm256_round_ps(M256, IMM) \
((__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(M256), IMM))
#define _mm256_add_epi32(M256I_0, M256I_1) \
((__m256i)((__v8su)(M256I_0) + (__v8su)(M256I_1)))
#define _mm256_cmpgt_epi32(M256I_0, M256I_1) \
((__m256i)((__v8si)(M256I_0) > (__v8si)(M256I_1)))
#define _mm256_min_epi32(M256I_0, M256I_1) \
((__m256i)__builtin_ia32_pminsd256((__v8si)(M256I_0), (__v8si)(M256I_1)))
#define _mm256_min_epu32(M256I_0, M256I_1) \
((__m256i)__builtin_ia32_pminud256((__v8si)(M256I_0), (__v8si)(M256I_1)))
#define _mm256_max_epi32(M256I_0, M256I_1) \
((__m256i)__builtin_ia32_pmaxsd256((__v8si)(M256I_0), (__v8si)(M256I_1)))
#define _mm256_max_epu32(M256I_0, M256I_1) \
((__m256i)__builtin_ia32_pmaxud256((__v8si)(M256I_0), (__v8si)(M256I_1)))
#define _mm256_blendv_epi8(M256I_0, M256I_1, M256I_2) \
((__m256i)__builtin_ia32_pblendvb256((__v32qi)(M256I_0), (__v32qi)(M256I_1), \
(__v32qi)(M256I_2)))
#define _mm256_min_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_minps256((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_max_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_maxps256((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmpneq_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpneqps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmplt_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpltps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmpnlt_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpnltps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmple_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpleps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmpnle_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpnleps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmpgt_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpltps((__v8sf)(__m256)(M256_1), \
(__v8sf)(__m256)(M256_0)))
#define _mm256_cmpngt_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpnltps((__v8sf)(__m256)(M256_1), \
(__v8sf)(__m256)(M256_0)))
#define _mm256_cmpge_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpleps((__v8sf)(__m256)(M256_1), \
(__v8sf)(__m256)(M256_0)))
#define _mm256_cmpnge_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpnleps((__v8sf)(__m256)(M256_1), \
(__v8sf)(__m256)(M256_0)))
#define _mm256_cmpord_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpordps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmpunord_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpunordps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § avx2 » memory ops
*/
struct thatispacked PackedMayaliasIntyYmm {
__m256i Ymm;
} mayalias;
#define _mm256_set_ps(FLT_0, FLT_1, FLT_2, FLT_3, FLT_4, FLT_5, FLT_6, FLT_7) \
((__m256)(__v8sf){(float)(FLT_0), (float)(FLT_1), (float)(FLT_2), \
(float)(FLT_3), (float)(FLT_4), (float)(FLT_5), \
(float)(FLT_6), (float)(FLT_7)})
#define _mm256_set1_ps(FLT_0) \
_mm256_set_ps(FLT_0, FLT_0, FLT_0, FLT_0, FLT_0, FLT_0, FLT_0, FLT_0)
#define _mm256_setr_ps(FLT_0, FLT_1, FLT_2, FLT_3, FLT_4, FLT_5, FLT_6, FLT_7) \
_mm256_set_ps(FLT_7, FLT_6, FLT_5, FLT_4, FLT_3, FLT_2, FLT_1, FLT_0)
#define _mm256_set_epi32(INT_0, INT_1, INT_2, INT_3, INT_4, INT_5, INT_6, \
INT_7) \
((__m256i)(__v8si){(int)(INT_0), (int)(INT_1), (int)(INT_2), (int)(INT_3), \
(int)(INT_4), (int)(INT_5), (int)(INT_6), (int)(INT_7)})
#define _mm256_set1_epi32(INT_0) \
_mm256_set_epi32(INT_0, INT_0, INT_0, INT_0, INT_0, INT_0, INT_0, INT_0)
#define _mm256_setr_epi32(INT_0, INT_1, INT_2, INT_3, INT_4, INT_5, INT_6, \
INT_7) \
_mm256_set_epi32(INT_7, INT_6, INT_5, INT_4, INT_3, INT_2, INT_1, INT_0)
#define _mm256_loadu_si256(M256IP_0) \
({ \
const __m256i *Ymm = (M256IP_0); \
((struct PackedMayaliasIntyYmm *)Ymm)->Ymm; \
})
#define _mm256_storeu_si256(M256IP_0, M256I_1) \
({ \
__m256i *Ymm = (M256IP_0); \
((struct PackedMayaliasIntyYmm *)Ymm)->Ymm = M256I_1; \
})
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_AVX2INTRIN_H_ */

View file

@ -1,51 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
typedef float __m256 _Vector_size(32) mayalias;
typedef double __m256d _Vector_size(32) mayalias;
typedef long long __m256i _Vector_size(32) mayalias;
typedef float __m256_u _Vector_size(32) forcealign(1) mayalias;
typedef double __m256d_u _Vector_size(32) forcealign(1) mayalias;
typedef long long __m256i_u _Vector_size(32) forcealign(1) mayalias;
typedef double __v4df _Vector_size(32);
typedef float __v8sf _Vector_size(32);
typedef long long __v4di _Vector_size(32);
typedef unsigned long long __v4du _Vector_size(32);
typedef int __v8si _Vector_size(32);
typedef unsigned __v8su _Vector_size(32);
typedef short __v16hi _Vector_size(32);
typedef unsigned short __v16hu _Vector_size(32);
typedef char __v32qi _Vector_size(32);
typedef unsigned char __v32qu _Vector_size(32);
#define _mm256_setzero_ps() ((__m256)(__v8sf){0})
#define _mm256_load_ps(FLOATPTR) (*(__m256 *)(FLOATPTR))
#define _mm256_loadu_ps(FLOATPTR) (*(__m256_u *)(FLOATPTR))
#define _mm256_store_ps(FLOATPTR, M256_0) \
(*(__m256 *)(FLOATPTR) = (__m256)(M256_0))
#define _mm256_storeu_ps(FLOATPTR, M256_0) \
(*(__m256_u *)(FLOATPTR) = (__m256)(M256_0))
#define _mm256_extractf128_ps(M256_0, INT_1) \
((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(M256_0), \
(int)(INT_1)))
#define _mm256_insertf128_ps(M256_0, M128_1, IMM_2) \
((__m256)__builtin_ia32_vinsertf128_ps256( \
(__v8sf)(__m256)(M256_0), (__v4sf)(__m128)(M128_1), (int)(IMM_2)))
#ifdef __llvm__
#define _mm256_castps128_ps256(M128_0) \
((__m256)__builtin_shufflevector((__v4sf)(__m128)(M128_0), \
(__v4sf)(__m128)(M128_0), 0, 1, 2, 3, -1, \
-1, -1, -1))
#else
#define _mm256_castps128_ps256(M128_0) \
((__m256)__builtin_ia32_ps256_ps((__v4sf)(__m128)(M128_0)))
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_ */

View file

@ -1,43 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
/**
* Extracts bit field from array.
*/
unsigned bextra(const unsigned *p, size_t i, char b) {
unsigned k, r, w;
w = sizeof(unsigned) * CHAR_BIT;
if (b) {
b &= w - 1;
i *= b;
k = i & (w - 1);
i /= w;
if (k <= w - b) {
return (p[i] >> k) & ((1u << (b - 1)) | ((1u << (b - 1)) - 1));
} else {
r = p[i] >> k;
r |= p[i + 1] << (w - k);
r &= (1ul << b) - 1;
return r;
}
} else {
return 0;
}
}

View file

@ -1,16 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_BIGWORD_H_
#define COSMOPOLITAN_LIBC_BITS_BIGWORD_H_
#ifndef BIGWORD
#if __AVX512F__ + 0
#define BIGWORD 64
#elif __AVX2__ + 0
#define BIGWORD 32
#elif __SSE2__ + 0
#define BIGWORD 16
#else
#define BIGWORD __BIGGEST_ALIGNMENT__
#endif
#endif /*BIGWORD*/
#endif /* COSMOPOLITAN_LIBC_BITS_BIGWORD_H_ */

View file

@ -1,45 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_BITOP_H_
#define COSMOPOLITAN_LIBC_BITS_BITOP_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define bts(MEM, BIT) __BitOp("bts", BIT, MEM) /** bit test and set */
#define btr(MEM, BIT) __BitOp("btr", BIT, MEM) /** bit test and reset */
#define btc(MEM, BIT) __BitOp("btc", BIT, MEM) /** bit test and complement */
#define lockbts(MEM, BIT) __BitOp("lock bts", BIT, MEM)
#define lockbtr(MEM, BIT) __BitOp("lock btr", BIT, MEM)
#define lockbtc(MEM, BIT) __BitOp("lock btc", BIT, MEM)
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#define __BitOp(OP, BIT, MEM) \
({ \
bool OldBit; \
if (__builtin_constant_p(BIT)) { \
asm(CFLAG_ASM(OP "%z1\t%2,%1") \
: CFLAG_CONSTRAINT(OldBit), \
"+m"((MEM)[(BIT) / (sizeof((MEM)[0]) * CHAR_BIT)]) \
: "J"((BIT) % (sizeof((MEM)[0]) * CHAR_BIT)) \
: "cc"); \
} else if (sizeof((MEM)[0]) == 2) { \
asm(CFLAG_ASM(OP "\t%w2,%1") \
: CFLAG_CONSTRAINT(OldBit), "+m"((MEM)[0]) \
: "r"(BIT) \
: "cc"); \
} else if (sizeof((MEM)[0]) == 4) { \
asm(CFLAG_ASM(OP "\t%k2,%1") \
: CFLAG_CONSTRAINT(OldBit), "+m"((MEM)[0]) \
: "r"(BIT) \
: "cc"); \
} else if (sizeof((MEM)[0]) == 8) { \
asm(CFLAG_ASM(OP "\t%q2,%1") \
: CFLAG_CONSTRAINT(OldBit), "+m"((MEM)[0]) \
: "r"(BIT) \
: "cc"); \
} \
OldBit; \
})
#endif /* __GNUC__ && !__STRICT_ANSI__ */
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_BITOP_H_ */

View file

@ -1,26 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
/**
* Reverses bits in 16-bit word.
*/
int bitreverse16(int x) {
return BITREVERSE16(x);
}

View file

@ -1,31 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
#include "libc/bits/bswap.h"
/**
* Reverses bits in 32-bit word.
*/
uint32_t bitreverse32(uint32_t x) {
x = bswap_32(x);
x = (x & 0xaaaaaaaa) >> 1 | (x & 0x55555555) << 1;
x = (x & 0xcccccccc) >> 2 | (x & 0x33333333) << 2;
x = (x & 0xf0f0f0f0) >> 4 | (x & 0x0f0f0f0f) << 4;
return x;
}

View file

@ -1,31 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
#include "libc/bits/bswap.h"
/**
* Reverses bits in 64-bit word.
*/
uint64_t bitreverse64(uint64_t x) {
x = bswap_64(x);
x = (x & 0xaaaaaaaaaaaaaaaa) >> 1 | (x & 0x5555555555555555) << 1;
x = (x & 0xcccccccccccccccc) >> 2 | (x & 0x3333333333333333) << 2;
x = (x & 0xf0f0f0f0f0f0f0f0) >> 4 | (x & 0x0f0f0f0f0f0f0f0f) << 4;
return x;
}

View file

@ -1,26 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
/**
* Reverses bits in 8-bit word.
*/
int bitreverse8(int x) {
return BITREVERSE8(x);
}

164
libc/bits/bits.h Normal file → Executable file
View file

@ -1,164 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_H_
#define COSMOPOLITAN_LIBC_BITS_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define CheckUnsigned(x) ((x) / !((typeof(x))(-1) < 0))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § bits
*/
extern const uint8_t kReverseBits[256];
uint32_t gray(uint32_t) pureconst;
uint32_t ungray(uint32_t) pureconst;
int bitreverse8(int) libcesque pureconst;
int bitreverse16(int) libcesque pureconst;
uint32_t bitreverse32(uint32_t) libcesque pureconst;
uint64_t bitreverse64(uint64_t) libcesque pureconst;
unsigned long roundup2pow(unsigned long) libcesque pureconst;
unsigned long roundup2log(unsigned long) libcesque pureconst;
unsigned long rounddown2pow(unsigned long) libcesque pureconst;
unsigned long hamming(unsigned long, unsigned long) pureconst;
unsigned bextra(const unsigned *, size_t, char);
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § bits » no assembly required
*/
#define BITREVERSE8(X) (kReverseBits[255 & (X)])
#define BITREVERSE16(X) \
(kReverseBits[0x00FF & (X)] << 8 | kReverseBits[(0xFF00 & (X)) >> 8])
#ifdef __STRICT_ANSI__
#define READ16LE(S) ((255 & (S)[1]) << 8 | (255 & (S)[0]))
#define READ16BE(S) ((255 & (S)[0]) << 8 | (255 & (S)[1]))
#define READ32LE(S) \
((uint32_t)(255 & (S)[3]) << 030 | (uint32_t)(255 & (S)[2]) << 020 | \
(uint32_t)(255 & (S)[1]) << 010 | (uint32_t)(255 & (S)[0]) << 000)
#define READ32BE(S) \
((uint32_t)(255 & (S)[0]) << 030 | (uint32_t)(255 & (S)[1]) << 020 | \
(uint32_t)(255 & (S)[2]) << 010 | (uint32_t)(255 & (S)[3]) << 000)
#define READ64LE(S) \
((uint64_t)(255 & (S)[7]) << 070 | (uint64_t)(255 & (S)[6]) << 060 | \
(uint64_t)(255 & (S)[5]) << 050 | (uint64_t)(255 & (S)[4]) << 040 | \
(uint64_t)(255 & (S)[3]) << 030 | (uint64_t)(255 & (S)[2]) << 020 | \
(uint64_t)(255 & (S)[1]) << 010 | (uint64_t)(255 & (S)[0]) << 000)
#define READ64BE(S) \
((uint64_t)(255 & (S)[0]) << 070 | (uint64_t)(255 & (S)[1]) << 060 | \
(uint64_t)(255 & (S)[2]) << 050 | (uint64_t)(255 & (S)[3]) << 040 | \
(uint64_t)(255 & (S)[4]) << 030 | (uint64_t)(255 & (S)[5]) << 020 | \
(uint64_t)(255 & (S)[6]) << 010 | (uint64_t)(255 & (S)[7]) << 000)
#else /* gcc needs help knowing above are mov if s isn't a variable */
#define READ16LE(S) \
({ \
const uint8_t *Ptr = (const uint8_t *)(S); \
Ptr[1] << 8 | Ptr[0]; \
})
#define READ16BE(S) \
({ \
const uint8_t *Ptr = (const uint8_t *)(S); \
Ptr[0] << 8 | Ptr[1]; \
})
#define READ32LE(S) \
({ \
const uint8_t *Ptr = (const uint8_t *)(S); \
((uint32_t)Ptr[3] << 030 | (uint32_t)Ptr[2] << 020 | \
(uint32_t)Ptr[1] << 010 | (uint32_t)Ptr[0] << 000); \
})
#define READ32BE(S) \
({ \
const uint8_t *Ptr = (const uint8_t *)(S); \
((uint32_t)Ptr[0] << 030 | (uint32_t)Ptr[1] << 020 | \
(uint32_t)Ptr[2] << 010 | (uint32_t)Ptr[3] << 000); \
})
#define READ64LE(S) \
({ \
const uint8_t *Ptr = (const uint8_t *)(S); \
((uint64_t)Ptr[7] << 070 | (uint64_t)Ptr[6] << 060 | \
(uint64_t)Ptr[5] << 050 | (uint64_t)Ptr[4] << 040 | \
(uint64_t)Ptr[3] << 030 | (uint64_t)Ptr[2] << 020 | \
(uint64_t)Ptr[1] << 010 | (uint64_t)Ptr[0] << 000); \
})
#define READ64BE(S) \
({ \
const uint8_t *Ptr = (const uint8_t *)(S); \
((uint64_t)Ptr[0] << 070 | (uint64_t)Ptr[1] << 060 | \
(uint64_t)Ptr[2] << 050 | (uint64_t)Ptr[3] << 040 | \
(uint64_t)Ptr[4] << 030 | (uint64_t)Ptr[5] << 020 | \
(uint64_t)Ptr[6] << 010 | (uint64_t)Ptr[7] << 000); \
})
#endif
#define WRITE16LE(P, V) \
((P)[0] = (0x00000000000000FF & (V)) >> 000, \
(P)[1] = (0x000000000000FF00 & (V)) >> 010, (P) + 2)
#define WRITE16BE(P, V) \
((P)[0] = (0x000000000000FF00 & (V)) >> 010, \
(P)[1] = (0x00000000000000FF & (V)) >> 000, (P) + 2)
#define WRITE32LE(P, V) \
((P)[0] = (0x00000000000000FF & (V)) >> 000, \
(P)[1] = (0x000000000000FF00 & (V)) >> 010, \
(P)[2] = (0x0000000000FF0000 & (V)) >> 020, \
(P)[3] = (0x00000000FF000000 & (V)) >> 030, (P) + 4)
#define WRITE32BE(P, V) \
((P)[0] = (0x00000000FF000000 & (V)) >> 030, \
(P)[1] = (0x0000000000FF0000 & (V)) >> 020, \
(P)[2] = (0x000000000000FF00 & (V)) >> 010, \
(P)[3] = (0x00000000000000FF & (V)) >> 000, (P) + 4)
#define WRITE64LE(P, V) \
((P)[0] = (0x00000000000000FF & (V)) >> 000, \
(P)[1] = (0x000000000000FF00 & (V)) >> 010, \
(P)[2] = (0x0000000000FF0000 & (V)) >> 020, \
(P)[3] = (0x00000000FF000000 & (V)) >> 030, \
(P)[4] = (0x000000FF00000000 & (V)) >> 040, \
(P)[5] = (0x0000FF0000000000 & (V)) >> 050, \
(P)[6] = (0x00FF000000000000 & (V)) >> 060, \
(P)[7] = (0xFF00000000000000 & (V)) >> 070, (P) + 8)
#define WRITE64BE(P, V) \
((P)[0] = (0xFF00000000000000 & (V)) >> 070, \
(P)[1] = (0x00FF000000000000 & (V)) >> 060, \
(P)[2] = (0x0000FF0000000000 & (V)) >> 050, \
(P)[3] = (0x000000FF00000000 & (V)) >> 040, \
(P)[4] = (0x00000000FF000000 & (V)) >> 030, \
(P)[5] = (0x0000000000FF0000 & (V)) >> 020, \
(P)[6] = (0x000000000000FF00 & (V)) >> 010, \
(P)[7] = (0x00000000000000FF & (V)) >> 000, (P) + 8)
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § bits » some assembly required
*/
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#define lockinc(MEM) __ArithmeticOp1("lock inc", MEM)
#define lockdec(MEM) __ArithmeticOp1("lock dec", MEM)
#define locknot(MEM) __ArithmeticOp1("lock not", MEM)
#define lockneg(MEM) __ArithmeticOp1("lock neg", MEM)
#define lockaddeq(MEM, VAL) __ArithmeticOp2("lock add", VAL, MEM)
#define locksubeq(MEM, VAL) __ArithmeticOp2("lock sub", VAL, MEM)
#define lockxoreq(MEM, VAL) __ArithmeticOp2("lock xor", VAL, MEM)
#define lockandeq(MEM, VAL) __ArithmeticOp2("lock and", VAL, MEM)
#define lockoreq(MEM, VAL) __ArithmeticOp2("lock or", VAL, MEM)
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § bits » implementation details
*/
#define __ArithmeticOp1(OP, MEM) \
({ \
asm(OP "%z0\t%0" : "+m"(*(MEM)) : /* no inputs */ : "cc"); \
MEM; \
})
#define __ArithmeticOp2(OP, VAL, MEM) \
({ \
asm(OP "%z0\t%1,%0" : "+m,m"(*(MEM)) : "i,r"(VAL) : "cc"); \
MEM; \
})
#endif /* __GNUC__ && !__STRICT_ANSI__ */
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_H_ */

50
libc/bits/bits.mk Normal file → Executable file
View file

@ -1,50 +0,0 @@
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
PKGS += LIBC_BITS
LIBC_BITS_ARTIFACTS += LIBC_BITS_A
LIBC_BITS = $(LIBC_BITS_A_DEPS) $(LIBC_BITS_A)
LIBC_BITS_A = o/$(MODE)/libc/bits/bits.a
LIBC_BITS_A_FILES := $(wildcard libc/bits/*)
LIBC_BITS_A_HDRS = $(filter %.h,$(LIBC_BITS_A_FILES))
LIBC_BITS_A_SRCS_S = $(filter %.S,$(LIBC_BITS_A_FILES))
LIBC_BITS_A_SRCS_C = $(filter %.c,$(LIBC_BITS_A_FILES))
LIBC_BITS_A_SRCS = \
$(LIBC_BITS_A_SRCS_S) \
$(LIBC_BITS_A_SRCS_C)
LIBC_BITS_A_OBJS = \
$(LIBC_BITS_A_SRCS_S:%.S=o/$(MODE)/%.o) \
$(LIBC_BITS_A_SRCS_C:%.c=o/$(MODE)/%.o)
LIBC_BITS_A_CHECKS = \
$(LIBC_BITS_A).pkg \
$(LIBC_BITS_A_HDRS:%=o/$(MODE)/%.ok)
LIBC_BITS_A_DIRECTDEPS = \
LIBC_STUBS \
LIBC_INTRIN \
LIBC_NEXGEN32E
LIBC_BITS_A_DEPS := \
$(call uniq,$(foreach x,$(LIBC_BITS_A_DIRECTDEPS),$($(x))))
$(LIBC_BITS_A): libc/bits/ \
$(LIBC_BITS_A).pkg \
$(LIBC_BITS_A_OBJS)
$(LIBC_BITS_A).pkg: \
$(LIBC_BITS_A_OBJS) \
$(foreach x,$(LIBC_BITS_A_DIRECTDEPS),$($(x)_A).pkg)
LIBC_BITS_LIBS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)))
LIBC_BITS_SRCS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_SRCS))
LIBC_BITS_HDRS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_HDRS))
LIBC_BITS_CHECKS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_CHECKS))
LIBC_BITS_OBJS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_OBJS))
$(LIBC_BITS_OBJS): $(BUILD_FILES) libc/bits/bits.mk
.PHONY: o/$(MODE)/libc/bits
o/$(MODE)/libc/bits: $(LIBC_BITS_CHECKS)

18
libc/bits/bswap.h Normal file → Executable file
View file

@ -1,18 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_BSWAP_H_
#define COSMOPOLITAN_LIBC_BITS_BSWAP_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
uint16_t bswap_16(uint16_t) pureconst;
uint32_t bswap_32(uint32_t) pureconst;
uint32_t bswap_64(uint32_t) pureconst;
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#define bswap_16(x) __builtin_bswap16(x)
#define bswap_32(x) __builtin_bswap32(x)
#define bswap_64(x) __builtin_bswap64(x)
#endif /* defined(__GNUC__) && !defined(__STRICT_ANSI__) */
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_BSWAP_H_ */

View file

@ -1,84 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/bits/bits.h"
#include "libc/dce.h"
#include "libc/nexgen32e/x86feature.h"
/**
* Returns population count of array.
*
* @param a is byte sequence
* @return number of bits set to one
* @note 30gbps on Nehalem (Intel 2008+) otherwise 3gbps
*/
size_t _countbits(const void *a, size_t n) {
int i;
size_t t;
unsigned b;
uint64_t x;
long Ai, Bi, Ci, Di;
long Ao, Bo, Co, Do;
const char *p, *e;
t = 0;
p = a;
e = p + n;
if (!IsTiny()) {
if (X86_HAVE(POPCNT)) {
while (p + sizeof(long) * 4 <= e) {
__builtin_memcpy(&Ai, p + 000, sizeof(long));
__builtin_memcpy(&Bi, p + 010, sizeof(long));
__builtin_memcpy(&Ci, p + 020, sizeof(long));
__builtin_memcpy(&Di, p + 030, sizeof(long));
asm("popcnt\t%1,%0" : "=r"(Ao) : "r"(Ai) : "cc");
asm("popcnt\t%1,%0" : "=r"(Bo) : "r"(Bi) : "cc");
asm("popcnt\t%1,%0" : "=r"(Co) : "r"(Ci) : "cc");
asm("popcnt\t%1,%0" : "=r"(Do) : "r"(Di) : "cc");
t += Ao + Bo + Co + Do;
p += sizeof(long) * 4;
}
while (p + sizeof(long) <= e) {
__builtin_memcpy(&Ai, p, 8);
asm("popcnt\t%1,%0" : "=r"(Ao) : "rm"(Ai) : "cc");
p += sizeof(long);
t += Ao;
}
} else {
while (p + 8 <= e) {
__builtin_memcpy(&x, p, 8);
x = x - ((x >> 1) & 0x5555555555555555);
x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333);
x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
x = (x + (x >> 32)) & 0xffffffff;
x = x + (x >> 16);
x = (x + (x >> 8)) & 0x7f;
t += x;
p += 8;
}
}
}
while (p < e) {
b = *p++ & 255;
b = b - ((b >> 1) & 0x55);
b = ((b >> 2) & 0x33) + (b & 0x33);
b = (b + (b >> 4)) & 15;
t += b;
}
return t;
}

View file

@ -1,242 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_
#include "libc/bits/xmmintrin.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » sse2
*/
typedef char __v16qi _Vector_size(16);
typedef unsigned char __v16qu _Vector_size(16);
typedef signed char __v16qs _Vector_size(16);
typedef short __v8hi _Vector_size(16);
typedef unsigned short __v8hu _Vector_size(16);
typedef double __v2df _Vector_size(16);
typedef double __m128d _Vector_size(16) forcealign(16);
typedef double __m128d_u _Vector_size(16) forcealign(1);
typedef long long __v2di _Vector_size(16);
typedef long long __m128i _Vector_size(16) forcealign(16);
typedef long long __m128i_u _Vector_size(16) forcealign(1);
typedef unsigned long long __v2du _Vector_size(16);
struct thatispacked mayalias __usi128ma {
__m128i_u __v;
};
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » sse2 » memory ops
*/
#define _mm_loadu_si128(M128IP) ((struct __usi128ma *)(M128IP))->__v
#define _mm_storeu_si128(M128IP, M128I) \
(((struct __usi128ma *)(M128IP))->__v = (M128I))
#define _mm_set_epi8(I8_15, I8_14, I8_13, I8_12, I8_11, I8_10, I8_9, I8_8, \
I8_7, I8_6, I8_5, I8_4, I8_3, I8_2, I8_1, I8_0) \
((__m128i)(__v16qi){I8_0, I8_1, I8_2, I8_3, I8_4, I8_5, I8_6, I8_7, I8_8, \
I8_9, I8_10, I8_11, I8_12, I8_13, I8_14, I8_15})
#define _mm_set_epi16(I16_7, I16_6, I16_5, I16_4, I16_3, I16_2, I16_1, I16_0) \
((__m128i)(__v8hi){I16_0, I16_1, I16_2, I16_3, I16_4, I16_5, I16_6, I16_7})
#define _mm_set_epi32(I32_3, I32_2, I32_1, I32_0) \
((__m128i)(__v4si){I32_0, I32_1, I32_2, I32_3})
#define _mm_set_epi64x(I64_1, I64_0) ((__m128i)(__v2di){I64_0, I64_1})
#define _mm_setr_epi8(I8_15, I8_14, I8_13, I8_12, I8_11, I8_10, I8_9, I8_8, \
I8_7, I8_6, I8_5, I8_4, I8_3, I8_2, I8_1, I8_0) \
_mm_set_epi8(I8_0, I8_1, I8_2, I8_3, I8_4, I8_5, I8_6, I8_7, I8_8, I8_9, \
I8_10, I8_11, I8_12, I8_13, I8_14, I8_15)
#define _mm_setr_epi16(I16_7, I16_6, I16_5, I16_4, I16_3, I16_2, I16_1, I16_0) \
_mm_set_epi16(I16_0, I16_1, I16_2, I16_3, I16_4, I16_5, I16_6, I16_7)
#define _mm_setr_epi32(I32_3, I32_2, I32_1, I32_0) \
_mm_set_epi32(I32_0, I32_1, I32_2, I32_3)
#define _mm_setr_epi64x(I64_1, I64_0) _mm_set_epi64x(I64_0, I64_1)
#define _mm_set1_epi8(I8) \
_mm_set_epi8(I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8)
#define _mm_set1_epi16(I16) \
_mm_set_epi16(I16, I16, I16, I16, I16, I16, I16, I16)
#define _mm_set1_epi32(I32) _mm_set_epi32(I32, I32, I32, I32)
#define _mm_set1_epi64x(I64) _mm_set_epi64x(I64, I64)
#define _mm_cvtsi128_si32(M128I) ((__v4si)(M128I))[0]
#define _mm_cvtsi32_si128(I32) ((__m128i)(__v4si){(I32), 0, 0, 0})
#define _mm_setzero_si128() ((__m128i)(__v2di){0LL, 0LL})
#define _mm_castsi128_ps(M128I) ((__m128)(M128I))
#define _mm_castps_si128(M128) ((__m128i)(M128))
#define _mm_load_si128(M128I) (*(M128I))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » sse2 » simd ops
*/
#define _mm_and_si128(M128I_0, M128I_1) \
((__m128i)((__v2du)(M128I_0) & (__v2du)(M128I_1)))
#define _mm_or_si128(M128I_0, M128I_1) \
((__m128i)((__v2du)(M128I_0) | (__v2du)(M128I_1)))
#define _mm_xor_si128(M128I_0, M128I_1) \
((__m128i)((__v2du)(M128I_0) ^ (__v2du)(M128I_1)))
#define _mm_andnot_si128(M128I_0, M128I_1) \
((__m128i)(~(__v2du)(M128I_0) & (__v2du)(M128I_1)))
#define _mm_add_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) + (__v2df)(M128D_1))
#define _mm_sub_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) - (__v2df)(M128D_1))
#define _mm_mul_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) * (__v2df)(M128D_1))
#define _mm_div_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) / (__v2df)(M128D_1))
#define _mm_and_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) & (__v2df)(M128D_1))
#define _mm_or_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) | (__v2df)(M128D_1))
#define _mm_xor_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) ^ (__v2df)(M128D_1))
#define _mm_andnot_pd(M128D_0, M128D_1) \
(__m128d)(~(__v2df)(M128D_0) & (__v2df)(M128D_1))
#define _mm_sqrt_pd(M128D) __builtin_ia32_sqrtpd((__v2df)(M128D))
#define _mm_min_pd(M128D_0, M128D_1) \
__builtin_ia32_minpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_max_pd(M128D_0, M128D_1) \
__builtin_ia32_maxpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpeq_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpeqpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpneq_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpneqpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmplt_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpltpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpnlt_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpnltpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmple_pd(M128D_0, M128D_1) \
__builtin_ia32_cmplepd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpnle_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpnlepd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpgt_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpltpd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpngt_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpnltpd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpge_pd(M128D_0, M128D_1) \
__builtin_ia32_cmplepd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpnge_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpnlepd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpord_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpordpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpunord_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpunordpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_sad_epu8(M128I_0, M128I_1) \
__builtin_ia32_psadbw128((__v16qi)(M128I_0), (__v16qi)(M128I_1))
#define _mm_subs_epi8(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_psubsb128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
#define _mm_subs_epu8(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_psubusw128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
#define _mm_subs_epi16(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_psubsw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
#define _mm_subs_epu16(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_psubusw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
#define _mm_add_epi32(M128I_0, M128I_1) \
((__m128i)((__v4su)(M128I_0) + (__v4su)(M128I_1)))
#define _mm_sub_epi32(M128I_0, M128I_1) \
((__m128i)((__v4su)(M128I_0) - (__v4su)(M128I_1)))
#define _mm_madd_epi16(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_pmaddwd128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
#define _mm_shuffle_epi32(V, IMM) \
((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(V), (int)(IMM)))
#define _mm_slli_epi32(M128I, COUNT) \
((__m128i)__builtin_ia32_pslldi128((__v4si)(M128I), (COUNT)))
#define _mm_slli_si128(M128I, IMM) \
((__m128i)__builtin_ia32_pslldqi128((__v2di)(__m128i)(M128I), (int)(IMM)*8))
#define _mm_srli_si128(M128I, IMM) \
((__m128i)__builtin_ia32_psrldqi128((__v2di)(__m128i)(M128I), (int)(IMM)*8))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » sse2 » scalar ops
*/
#define _mm_sqrt_sd(M128D_0, M128D_1) \
({ \
__m128d M128d2 = __builtin_ia32_sqrtsd((__v2df)(M128D_1)); \
(__m128d){M128d2[0], (M128D_0)[1]}; \
})
#define _mm_add_sd(M128D_0, M128D_1) \
({ \
(M128D_0)[0] += (M128D_1)[0]; \
(M128D_0); \
})
#define _mm_sub_sd(M128D_0, M128D_1) \
({ \
(M128D_0)[0] -= (M128D_1)[0]; \
(M128D_0); \
})
#define _mm_mul_sd(M128D_0, M128D_1) \
({ \
(M128D_0)[0] *= (M128D_1)[0]; \
(M128D_0); \
})
#define _mm_div_sd(M128D_0, M128D_1) \
({ \
(M128D_0)[0] /= (M128D_1)[0]; \
(M128D_0); \
})
#define _mm_min_sd(M128D_0, M128D_1) \
__builtin_ia32_minsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_max_sd(M128D_0, M128D_1) \
__builtin_ia32_maxsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpeq_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpeqsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpneq_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpneqsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmplt_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpltsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpnlt_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpnltsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmple_sd(M128D_0, M128D_1) \
__builtin_ia32_cmplesd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpnle_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpnlesd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpgt_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpltsd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpngt_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpnltsd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpge_sd(M128D_0, M128D_1) \
__builtin_ia32_cmplesd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpnge_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpnlesd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpord_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpordsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpunord_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpunordsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_SSE2(op, A, B) \
({ \
__m128i R = A; \
asm(#op " %1, %0" \
: "+x"(R) : "xm"(B)); \
R; \
})
#define _mm_mul_epu32(A, B) _mm_SSE2(pmuludq, A, B)
#define _mm_add_epi64(A, B) _mm_SSE2(paddq, A, B)
#define _mm_srli_epi64(A, B) _mm_SSE2(psrlq, A, B)
#define _mm_slli_epi64(A, B) _mm_SSE2(psllq, A, B)
#define _mm_unpacklo_epi64(A, B) _mm_SSE2(punpcklqdq, A, B)
#define _mm_unpackhi_epi64(A, B) _mm_SSE2(punpckhqdq, A, B)
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » sse2 » miscellaneous
*/
#define _mm_pause() asm("rep nop")
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_ */

View file

@ -1,12 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_EZLEA_H_
#define COSMOPOLITAN_LIBC_BITS_EZLEA_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#if __pic__ + __pie__ + __code_model_medium__ + __code_model_large__ + 0 > 1
#define ezlea(symbol) "lea\t" symbol "(%%rip),%"
#else
#define ezlea(symbol) "mov\t$" symbol ",%k"
#endif
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_EZLEA_H_ */

View file

@ -1,28 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
/**
* Returns gray code for x.
* @see https://en.wikipedia.org/wiki/Gray_code
* @see ungray()
*/
uint32_t gray(uint32_t x) {
return x ^ (x >> 1);
}

View file

@ -1,27 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/popcnt.h"
/**
* Counts number of different bits.
* @see https://en.wikipedia.org/wiki/Hamming_code
*/
unsigned long hamming(unsigned long x, unsigned long y) {
return popcnt(x ^ y);
}

View file

@ -1,76 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/hilbert.h"
static axdx_t RotateQuadrant(long n, long y, long x, long ry, long rx) {
long t;
if (ry == 0) {
if (rx == 1) {
y = n - 1 - y;
x = n - 1 - x;
}
t = x;
x = y;
y = t;
}
return (axdx_t){y, x};
}
/**
* Generates Hilbert space-filling curve.
*
* @see https://en.wikipedia.org/wiki/Hilbert_curve
* @see unhilbert()
*/
long hilbert(long n, long y, long x) {
axdx_t m;
long d, s, ry, rx;
d = 0;
for (s = n / 2; s > 0; s /= 2) {
ry = (y & s) > 0;
rx = (x & s) > 0;
d += s * s * ((3 * rx) ^ ry);
m = RotateQuadrant(n, y, x, ry, rx);
y = m.ax;
x = m.dx;
}
return d;
}
/**
* Decodes Hilbert space-filling curve.
*
* @see https://en.wikipedia.org/wiki/Hilbert_curve
* @see hilbert()
*/
axdx_t unhilbert(long n, long i) {
axdx_t m;
long s, t, y, x, ry, rx;
t = i;
x = y = 0;
for (s = 1; s < n; s *= 2) {
rx = (t / 2) & 1;
ry = (t ^ rx) & 1;
m = RotateQuadrant(s, y, x, ry, rx);
x = m.dx + s * rx;
y = m.ax + s * ry;
t /= 4;
}
return (axdx_t){y, x};
}

View file

@ -1,11 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_HILBERT_H_
#define COSMOPOLITAN_LIBC_BITS_HILBERT_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
long hilbert(long, long, long) pureconst;
axdx_t unhilbert(long, long) pureconst;
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_HILBERT_H_ */

View file

@ -1,22 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_INITIALIZER_H_
#define COSMOPOLITAN_LIBC_BITS_INITIALIZER_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/* TODO: DELETE */
/**
* Teleports code fragment inside _init().
*/
#ifndef INITIALIZER
#define INITIALIZER(PRI, NAME, CODE) \
asm(".section .init." #PRI "." #NAME ",\"ax\",@progbits\n\t" \
"call\t" #NAME "\n\t" \
".previous"); \
textstartup optimizesize void NAME(char *rdi, const char *rsi) { \
CODE; \
asm volatile("" : /* no outputs */ : "D"(rdi), "S"(rsi)); \
}
#endif /* INITIALIZER */
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_INITIALIZER_H_ */

21
libc/bits/likely.h Normal file → Executable file
View file

@ -1,21 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_LIKELY_H_
#define COSMOPOLITAN_LIBC_BITS_LIKELY_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#define LIKELY(x) __builtin_expect(!!(x), 1)
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#if __GNUC__ + 0 >= 9 && !defined(__chibicc__)
#define VERY_LIKELY(x) __builtin_expect_with_probability(!!(x), 1, 0.999)
#else
#define VERY_LIKELY(x) LIKELY(x)
#endif
#if __GNUC__ + 0 >= 9 && !defined(__chibicc__)
#define VERY_UNLIKELY(x) __builtin_expect_with_probability(!!(x), 0, 0.999)
#else
#define VERY_UNLIKELY(x) UNLIKELY(x)
#endif
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_LIKELY_H_ */

32
libc/bits/midpoint.h Normal file → Executable file
View file

@ -1,32 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_MIDPOINT_H_
#define COSMOPOLITAN_LIBC_BITS_MIDPOINT_H_
#include "libc/assert.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#if defined(__GNUC__) && !defined(__STRICT_ANSI__) && defined(__x86__)
/**
* Computes `(a + b) / 2` assuming unsigned.
*
* This implementation is the fastest on AMD Zen architecture.
*/
#define _midpoint(a, b) \
({ \
typeof((a) + (b)) a_ = (a); \
typeof(a_) b_ = (b); \
assert(a_ >= 0); \
assert(b_ >= 0); \
asm("add\t%1,%0\n\t" \
"rcr\t%0" \
: "+r"(a_) \
: "r"(b_)); \
a_; \
})
#else
/**
* Computes `(a + b) / 2` assuming unsigned.
*/
#define _midpoint(a, b) (((a) & (b)) + ((a) ^ (b)) / 2)
#endif /* __GNUC__ && !__STRICT_ANSI__ && x86 */
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_MIDPOINT_H_ */

View file

@ -1,38 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/morton.h"
/**
* Interleaves bits.
* @see https://en.wikipedia.org/wiki/Z-order_curve
* @see unmorton()
*/
unsigned long(morton)(unsigned long y, unsigned long x) {
x = (x | x << 020) & 0x0000FFFF0000FFFF;
x = (x | x << 010) & 0x00FF00FF00FF00FF;
x = (x | x << 004) & 0x0F0F0F0F0F0F0F0F;
x = (x | x << 002) & 0x3333333333333333;
x = (x | x << 001) & 0x5555555555555555;
y = (y | y << 020) & 0x0000FFFF0000FFFF;
y = (y | y << 010) & 0x00FF00FF00FF00FF;
y = (y | y << 004) & 0x0F0F0F0F0F0F0F0F;
y = (y | y << 002) & 0x3333333333333333;
y = (y | y << 001) & 0x5555555555555555;
return x | y << 1;
}

View file

@ -1,24 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_MORTON_H_
#define COSMOPOLITAN_LIBC_BITS_MORTON_H_
#include "libc/intrin/pdep.h"
#include "libc/intrin/pext.h"
#include "libc/nexgen32e/x86feature.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
unsigned long morton(unsigned long, unsigned long) libcesque;
axdx_t unmorton(unsigned long) libcesque;
#ifndef __STRICT_ANSI__
#define morton(Y, X) \
(X86_NEED(BMI2) ? pdep(X, 0x5555555555555555) | pdep(Y, 0xAAAAAAAAAAAAAAAA) \
: morton(Y, X))
#define unmorton(I) \
(X86_NEED(BMI2) \
? (axdx_t){pext(I, 0xAAAAAAAAAAAAAAAA), pext(I, 0x5555555555555555)} \
: unmorton(I))
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_MORTON_H_ */

View file

@ -1,55 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_NEWBIE_H_
#define COSMOPOLITAN_LIBC_BITS_NEWBIE_H_
#include "libc/bits/bswap.h"
/*
* Macros for newbies.
* https://justine.lol/endian.html
*/
#define BYTE_ORDER __BYTE_ORDER__
#define LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__
#define BIG_ENDIAN __ORDER_BIG_ENDIAN__
#define PDP_ENDIAN __ORDER_PDP_ENDIAN__
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define htobe16(x) bswap_16(x)
#define be16toh(x) bswap_16(x)
#define betoh16(x) bswap_16(x)
#define htobe32(x) bswap_32(x)
#define be32toh(x) bswap_32(x)
#define betoh32(x) bswap_32(x)
#define htobe64(x) bswap_64(x)
#define be64toh(x) bswap_64(x)
#define betoh64(x) bswap_64(x)
#define htole16(x) (uint16_t)(x)
#define le16toh(x) (uint16_t)(x)
#define letoh16(x) (uint16_t)(x)
#define htole32(x) (uint32_t)(x)
#define le32toh(x) (uint32_t)(x)
#define letoh32(x) (uint32_t)(x)
#define htole64(x) (uint64_t)(x)
#define le64toh(x) (uint64_t)(x)
#define letoh64(x) (uint64_t)(x)
#else
#define htobe16(x) (uint16_t)(x)
#define be16toh(x) (uint16_t)(x)
#define betoh16(x) (uint16_t)(x)
#define htobe32(x) (uint32_t)(x)
#define be32toh(x) (uint32_t)(x)
#define betoh32(x) (uint32_t)(x)
#define htobe64(x) (uint64_t)(x)
#define be64toh(x) (uint64_t)(x)
#define betoh64(x) (uint64_t)(x)
#define htole16(x) bswap_16(x)
#define le16toh(x) bswap_16(x)
#define letoh16(x) bswap_16(x)
#define htole32(x) bswap_32(x)
#define le32toh(x) bswap_32(x)
#define letoh32(x) bswap_32(x)
#define htole64(x) bswap_64(x)
#define le64toh(x) bswap_64(x)
#define letoh64(x) bswap_64(x)
#endif
#endif /* COSMOPOLITAN_LIBC_BITS_NEWBIE_H_ */

View file

@ -1,14 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » sse3
*/
#define _mm_hadd_ps(M128_0, M128_1) \
((__m128)__builtin_ia32_haddps((__v4sf)(__m128)(M128_0), \
(__v4sf)(__m128)(M128_0)))
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_ */

View file

@ -1,25 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_POPCNT_H_
#define COSMOPOLITAN_LIBC_BITS_POPCNT_H_
#include "libc/nexgen32e/x86feature.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
size_t _countbits(const void *, size_t);
unsigned long popcnt(unsigned long) pureconst;
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#define popcnt(X) \
(__builtin_constant_p(X) ? __builtin_popcountll(X) : ({ \
unsigned long PoP = (X); \
if (X86_HAVE(POPCNT)) { \
asm("popcnt\t%0,%0" : "+r"(PoP) : /* no inputs */ : "cc"); \
} else { \
PoP = (popcnt)(PoP); \
} \
PoP; \
}))
#endif /* GNUC && !ANSI */
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_POPCNT_H_ */

View file

@ -1,55 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_PUSHPOP_H_
#define COSMOPOLITAN_LIBC_BITS_PUSHPOP_H_
#include "libc/macros.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
#define pushpop(x) (x)
#else
/**
* PushPop
* An elegant weapon for a more civilized age.
*/
#define pushpop(x) \
({ \
typeof(x) Popped; \
if (__builtin_constant_p(x) && \
(TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
: (intptr_t)(x) < 128)) { \
if (x) { \
asm("push\t%1\n\t" \
"pop\t%q0" \
: "=r"(Popped) \
: "ir"(x)); \
} else { \
asm("xor\t%k0,%k0" : "=r"(Popped)); \
} \
} else { \
asm("" : "=r"(Popped) : "0"(x)); \
} \
Popped; \
})
#endif
#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
#define pushmov(d, x) (*(d) = (x))
#else
#define pushmov(d, x) \
({ \
typeof(*(d)) Popped = (x); \
if (__builtin_constant_p(x) && \
(TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
: (intptr_t)(x) < 128)) { \
asm("pushq\t%1\n\t" \
"popq\t%0" \
: "=m"(*(d)) \
: "ir"(Popped)); \
} else { \
*(d) = Popped; \
} \
Popped; \
})
#endif
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_PUSHPOP_H_ */

View file

@ -1,30 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
#include "libc/nexgen32e/bsr.h"
/**
* Returns 𝑥 rounded down to previous two power.
*
* @define (𝑥>02^log𝑥, x=00, 𝑇)
* @see roundup2pow()
*/
unsigned long rounddown2pow(unsigned long x) {
return x ? 1ul << bsrl(x) : 0;
}

View file

@ -1,28 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
#include "libc/nexgen32e/bsr.h"
/**
* Returns 𝑥 rounded up to next two power and log'd.
* @see roundup2pow()
*/
unsigned long roundup2log(unsigned long x) {
return x > 1 ? (bsrl(x - 1) + 1) : x ? 1 : 0;
}

View file

@ -1,30 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
#include "libc/nexgen32e/bsr.h"
/**
* Returns 𝑥 rounded up to next two power.
*
* @define (𝑥>02^logx, x=00, 𝑇)
* @see rounddown2pow()
*/
unsigned long roundup2pow(unsigned long x) {
return x > 1 ? 2ul << bsrl(x - 1) : x ? 1 : 0;
}

71
libc/bits/safemacros.internal.h Normal file → Executable file
View file

@ -1,71 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_SAFEMACROS_H_
#define COSMOPOLITAN_LIBC_BITS_SAFEMACROS_H_
#include "libc/macros.internal.h"
#include "libc/runtime/runtime.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define min(x, y) \
({ \
autotype(x) MinX = (x); \
autotype(y) MinY = (y); \
MinX < MinY ? MinX : MinY; \
})
#define max(x, y) \
({ \
autotype(x) MaxX = (x); \
autotype(y) MaxY = (y); \
MaxX > MaxY ? MaxX : MaxY; \
})
#define roundup(x, k) \
({ \
autotype(x) RoundupX = (x); \
autotype(k) RoundupK = (k); \
ROUNDUP(RoundupX, RoundupK); \
})
#define rounddown(x, k) \
({ \
autotype(x) RounddownX = (x); \
autotype(k) RounddownK = (k); \
ROUNDDOWN(RounddownX, RounddownK); \
})
#define isempty(s) \
({ \
autotype(s) IsEmptyS = (s); \
!IsEmptyS || !(*IsEmptyS); \
})
#define nulltoempty(s) \
({ \
autotype(s) NullToEmptyS = (s); \
NullToEmptyS ? NullToEmptyS : ""; \
})
#define firstnonnull(a, b) \
({ \
autotype(a) FirstNonNullA = (a); \
autotype(a) FirstNonNullB = (b); \
if (!FirstNonNullA && !FirstNonNullB) abort(); \
FirstNonNullA ? FirstNonNullA : FirstNonNullB; \
})
#define emptytonull(s) \
({ \
autotype(s) EmptyToNullS = (s); \
EmptyToNullS && !(*EmptyToNullS) ? NULL : EmptyToNullS; \
})
#define unsignedsubtract(a, b) \
({ \
uint64_t UnsubA = (a); \
uint64_t UnsubB = (b); \
UnsubA >= UnsubB ? UnsubA - UnsubB : ~UnsubB + UnsubA + 1; \
})
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_SAFEMACROS_H_ */

View file

@ -1,25 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_SEGMENTATION_H_
#define COSMOPOLITAN_LIBC_BITS_SEGMENTATION_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
/**
* Reads scalar from memory, offset by segment.
*
* @return *(MEM) relative to segment
* @see arch_prctl()
* @see pushpop()
*/
#define fs(MEM) __peek("fs", MEM)
#define gs(MEM) __peek("gs", MEM)
#define __peek(SEGMENT, ADDRESS) \
({ \
typeof(*(ADDRESS)) Pk; \
asm("mov\t%%" SEGMENT ":%1,%0" : "=r"(Pk) : "m"(*(ADDRESS))); \
Pk; \
})
#endif /* __GNUC__ && !__STRICT_ANSI__ */
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_SEGMENTATION_H_ */

View file

@ -1,37 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_SHAINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_SHAINTRIN_H_
#include "libc/bits/emmintrin.internal.h"
#include "libc/bits/xmmintrin.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#define _mm_sha1rnds4_epu32(M128I_0, M128I_1, MEM) \
__builtin_ia32_sha1rnds4((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1), (MEM))
#define _mm_sha1nexte_epu32(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_sha1nexte((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1)))
#define _mm_sha1msg1_epu32(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_sha1msg1((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1)))
#define _mm_sha1msg2_epu32(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_sha1msg2((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1)))
#define _mm_sha256rnds2_epu32(M128I_0, M128I_1, M128I_2) \
((__m128i)__builtin_ia32_sha256rnds2((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1), \
(__v4si)(__m128i)(M128I_2)))
#define _mm_sha256msg1_epu32(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_sha256msg1((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1)))
#define _mm_sha256msg2_epu32(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_sha256msg2((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1)))
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_SHAINTRIN_H_ */

View file

@ -1,31 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_SMMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_SMMINTRIN_H_
/**
* @fileoverview SSE4 intrinsics.
*/
#define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
#define _MM_FROUND_CUR_DIRECTION 4
#define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
#define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
#define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
#define _MM_FROUND_NO_EXC 8
#define _MM_FROUND_RAISE_EXC 0
#define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
#define _MM_FROUND_TO_NEAREST_INT 0
#define _MM_FROUND_TO_NEG_INF 1
#define _MM_FROUND_TO_POS_INF 2
#define _MM_FROUND_TO_ZERO 3
#define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#define _mm_extract_epi32(M128I, I32) \
((int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(M128I), (int)(I32)))
#define _mm_minpos_epu16(M128I) \
((int)__builtin_ia32_phminposuw128((__v4si)(__m128i)(M128I), (int)(I32)))
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_SMMINTRIN_H_ */

View file

@ -1,17 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_TMMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_TMMINTRIN_H_
#include "libc/bits/emmintrin.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » ssse3
*/
#define _mm_maddubs_epi16(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_pmaddubsw128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
#define _mm_shuffle_epi8(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_pshufb128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_TMMINTRIN_H_ */

View file

@ -1,33 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
/**
* Decodes gray code.
* @see https://en.wikipedia.org/wiki/Gray_code
* @see gray()
*/
uint32_t ungray(uint32_t x) {
x ^= x >> 16;
x ^= x >> 8;
x ^= x >> 4;
x ^= x >> 2;
x ^= x >> 1;
return x;
}

View file

@ -1,41 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/morton.h"
static unsigned long GetOddBits(unsigned long x) {
x = (x | x >> 000) & 0x5555555555555555;
x = (x | x >> 001) & 0x3333333333333333;
x = (x | x >> 002) & 0x0F0F0F0F0F0F0F0F;
x = (x | x >> 004) & 0x00FF00FF00FF00FF;
x = (x | x >> 010) & 0x0000FFFF0000FFFF;
x = (x | x >> 020) & 0x00000000FFFFFFFF;
return x;
}
/**
* Deinterleaves bits.
*
* @param 𝑖 is interleaved index
* @return deinterleaved coordinate {ax := 𝑦, dx := 𝑥}
* @see en.wikipedia.org/wiki/Z-order_curve
* @see morton()
*/
axdx_t(unmorton)(unsigned long i) {
return (axdx_t){GetOddBits(i >> 1), GetOddBits(i)};
}

28
libc/bits/weaken.h Normal file → Executable file
View file

@ -1,28 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_WEAKEN_H_
#define COSMOPOLITAN_LIBC_BITS_WEAKEN_H_
#include "libc/bits/ezlea.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef __STRICT_ANSI__
#define weaken(symbol) ((const typeof(&(symbol)))weakaddr(#symbol))
#define strongaddr(symbolstr) \
({ \
intptr_t waddr; \
asm(ezlea(symbolstr) "0" : "=r"(waddr)); \
waddr; \
})
#define weakaddr(symbolstr) \
({ \
intptr_t waddr; \
asm(".weak\t" symbolstr "\n\t" ezlea(symbolstr) "0" : "=r"(waddr)); \
waddr; \
})
#else
#define weaken(symbol) symbol
#define weakaddr(symbolstr) &(symbolstr)
#endif /* __STRICT_ANSI__ */
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_WEAKEN_H_ */

View file

@ -1,29 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_WMMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_WMMINTRIN_H_
#include "libc/bits/emmintrin.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#define _mm_clmulepi64_si128(X, Y, IMM) \
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \
(__v2di)(__m128i)(Y), (char)(IMM)))
#define _mm_aesenc_si128(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_aesenc128((__v2di)(M128I_0), (__v2di)(M128I_1)))
#define _mm_aesenclast_si128(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_aesenclast128((__v2di)(M128I_0), (__v2di)(M128I_1)))
#define _mm_aesdec_si128(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_aesdec128((__v2di)(M128I_0), (__v2di)(M128I_1)))
#define _mm_aesdeclast_si128(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_aesdeclast128((__v2di)(M128I_0), (__v2di)(M128I_1)))
#define _mm_aesimc_si128(M128I) \
((__m128i)__builtin_ia32_aesimc128((__v2di)(M128I)))
#define _mm_aesimclast_si128(M128I) \
((__m128i)__builtin_ia32_aesimclast128((__v2di)(M128I)))
#define _mm_aeskeygenassist_si128(X, Y) \
((__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(X), (int)(Y)))
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_WMMINTRIN_H_ */

View file

@ -1,16 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_XADD_H_
#define COSMOPOLITAN_LIBC_BITS_XADD_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define _xadd(p, v) \
({ \
typeof(*(p)) Res; \
autotype(Res) Val = (v); \
asm volatile("xadd\t%0,%1" : "=r"(Res), "+m"(*(p)) : "0"(Val)); \
Res + Val; \
})
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_XADD_H_ */

25
libc/bits/xchg.internal.h Normal file → Executable file
View file

@ -1,25 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_XCHG_H_
#define COSMOPOLITAN_LIBC_BITS_XCHG_H_
#include "libc/str/str.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/**
* Exchanges *MEMORY into *LOCALVAR.
*
* @return *MEMORY
* @see lockcmpxchg()
* todo(jart): what's the point of this?
*/
#define xchg(MEMORY, LOCALVAR) \
({ \
autotype(MEMORY) Memory = (MEMORY); \
typeof(Memory) LocalVar = (LOCALVAR); \
typeof(*Memory) Temp; \
memcpy(&Temp, Memory, sizeof(Temp)); \
memcpy(Memory, LocalVar, sizeof(Temp)); \
memcpy(LocalVar, &Temp, sizeof(Temp)); \
Temp; \
})
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_XCHG_H_ */

View file

@ -1,243 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_
#include "libc/bits/emmintrin.internal.h"
#include "libc/dce.h"
#define _MM_EXCEPT_MASK 0x003f
#define _MM_EXCEPT_INVALID 0x0001
#define _MM_EXCEPT_DENORM 0x0002
#define _MM_EXCEPT_DIV_ZERO 0x0004
#define _MM_EXCEPT_OVERFLOW 0x0008
#define _MM_EXCEPT_UNDERFLOW 0x0010
#define _MM_EXCEPT_INEXACT 0x0020
#define _MM_MASK_MASK 0x1f80
#define _MM_MASK_INVALID 0x0080
#define _MM_MASK_DENORM 0x0100
#define _MM_MASK_DIV_ZERO 0x0200
#define _MM_MASK_OVERFLOW 0x0400
#define _MM_MASK_UNDERFLOW 0x0800
#define _MM_MASK_INEXACT 0x1000
#define _MM_ROUND_MASK 0x6000
#define _MM_ROUND_NEAREST 0x0000
#define _MM_ROUND_DOWN 0x2000
#define _MM_ROUND_UP 0x4000
#define _MM_ROUND_TOWARD_ZERO 0x6000
#define _MM_FLUSH_ZERO_MASK 0x8000
#define _MM_FLUSH_ZERO_ON 0x8000
#define _MM_FLUSH_ZERO_OFF 0x0000
#define _MM_SHUFFLE(A, B, C, D) (((A) << 6) | ((B) << 4) | ((C) << 2) | (D))
#if !(__ASSEMBLER__ + __LINKER__ + 0)
typedef int __v4si _Vector_size(16);
typedef unsigned int __v4su _Vector_size(16);
typedef float __v4sf _Vector_size(16);
typedef float __m128 _Vector_size(16) forcealign(16) mayalias;
typedef float __m128_u _Vector_size(16) forcealign(1) mayalias;
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » sse » simd ops
*/
#define _mm_add_ps(M128_0, M128_1) \
((__m128)((__v4sf)(M128_0) + (__v4sf)(M128_1)))
#define _mm_sub_ps(M128_0, M128_1) \
((__m128)((__v4sf)(M128_0) - (__v4sf)(M128_1)))
#define _mm_mul_ps(M128_0, M128_1) \
((__m128)((__v4sf)(M128_0) * (__v4sf)(M128_1)))
#define _mm_div_ps(M128_0, M128_1) \
((__m128)((__v4sf)(M128_0) / (__v4sf)(M128_1)))
#define _mm_and_ps(M128_0, M128_1) \
((__m128)((__v4su)(M128_0) & (__v4su)(M128_1)))
#define _mm_or_ps(M128_0, M128_1) \
((__m128)((__v4su)(M128_0) | (__v4su)(M128_1)))
#define _mm_xor_ps(M128_0, M128_1) /* XORPD [u32 simd xor] */ \
((__m128)((__v4su)(M128_0) ^ (__v4su)(M128_1)))
#define _mm_andnot_ps(M128_0, M128_1) /* ANDNPS [u32 simd nand] */ \
((__m128)(~(__v4su)(M128_0) & (__v4su)(M128_1)))
#define _mm_rcp_ps(M128) __builtin_ia32_rcpps((__v4sf)(M128))
#define _mm_sqrt_ps(M128) __builtin_ia32_sqrtps((__v4sf)(M128))
#define _mm_rsqrt_ps(M128) __builtin_ia32_rsqrtps((__v4sf)(M128))
#define _mm_min_ps(M128_0, M128_1) \
__builtin_ia32_minps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_max_ps(M128_0, M128_1) \
__builtin_ia32_maxps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_min_ss(M128_0, M128_1) \
__builtin_ia32_minss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_max_ss(M128_0, M128_1) \
__builtin_ia32_maxss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpeq_ps(M128_0, M128_1) \
__builtin_ia32_cmpeqps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpneq_ps(M128_0, M128_1) \
__builtin_ia32_cmpneqps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmplt_ps(M128_0, M128_1) \
__builtin_ia32_cmpltps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpnlt_ps(M128_0, M128_1) \
__builtin_ia32_cmpnltps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmple_ps(M128_0, M128_1) \
__builtin_ia32_cmpleps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpnle_ps(M128_0, M128_1) \
__builtin_ia32_cmpnleps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpgt_ps(M128_0, M128_1) \
__builtin_ia32_cmpltps((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpngt_ps(M128_0, M128_1) \
__builtin_ia32_cmpnltps((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpge_ps(M128_0, M128_1) \
__builtin_ia32_cmpleps((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpnge_ps(M128_0, M128_1) \
__builtin_ia32_cmpnleps((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpord_ps(M128_0, M128_1) \
__builtin_ia32_cmpordps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpunord_ps(M128_0, M128_1) \
__builtin_ia32_cmpunordps((__v4sf)(M128_0), (__v4sf)(M128_1))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » sse » scalar ops
*/
#define _mm_add_ss(m128_0, m128_1) \
({ \
__m128 a = m128_0; \
__m128 b = m128_1; \
a[0] += b[0]; \
a; \
})
#define _mm_sub_ss(m128_0, m128_1) \
({ \
__m128 a = m128_0; \
__m128 b = m128_1; \
a[0] -= b[0]; \
a; \
})
#define _mm_mul_ss(m128_0, m128_1) \
({ \
__m128 a = m128_0; \
__m128 b = m128_1; \
a[0] *= b[0]; \
a; \
})
#define _mm_div_ss(m128_0, m128_1) \
({ \
__m128 a = m128_0; \
__m128 b = m128_1; \
a[0] /= b[0]; \
a; \
})
#define _mm_rcp_ss(M128) __builtin_ia32_rcpss((__v4sf)(M128)) /*~1/x*/
#define _mm_sqrt_ss(M128) __builtin_ia32_sqrtss((__v4sf)(M128)) /*sqrt𝑥*/
#define _mm_rsqrt_ss(M128) __builtin_ia32_rsqrtss((__v4sf)(M128)) /*~1/sqrt𝑥*/
#define _mm_min_ss(M128_0, M128_1) \
__builtin_ia32_minss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_max_ss(M128_0, M128_1) \
__builtin_ia32_maxss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpeq_ss(M128_0, M128_1) \
__builtin_ia32_cmpeqss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpneq_ss(M128_0, M128_1) \
__builtin_ia32_cmpneqss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmplt_ss(M128_0, M128_1) \
__builtin_ia32_cmpltss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpnlt_ss(M128_0, M128_1) \
__builtin_ia32_cmpnltss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmple_ss(M128_0, M128_1) \
__builtin_ia32_cmpless((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpnle_ss(M128_0, M128_1) \
__builtin_ia32_cmpnless((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpgt_ss(M128_0, M128_1) \
__builtin_ia32_cmpltss((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpngt_ss(M128_0, M128_1) \
__builtin_ia32_cmpnltss((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpge_ss(M128_0, M128_1) \
__builtin_ia32_cmpless((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpnge_ss(M128_0, M128_1) \
__builtin_ia32_cmpnless((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpord_ss(M128_0, M128_1) \
__builtin_ia32_cmpordss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpunord_ss(M128_0, M128_1) \
__builtin_ia32_cmpunordss((__v4sf)(M128_0), (__v4sf)(M128_1))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » sse » memory ops
*/
#define _mm_set1_ps(M128_0) ((__m128)(__v4sf){M128_0, M128_0, M128_0, M128_0})
#define _mm_setzero_ps() ((__m128)(__v4sf){0})
#define _mm_cvtss_f32(M128_0) (((__v4sf)(M128_0))[0])
#define _mm_load_ps(FLOATPTR) (*(__m128 *)(FLOATPTR))
#define _mm_loadu_ps(FLOATPTR) (*(__m128_u *)(FLOATPTR))
#define _mm_set_ps(WHO, DESIGNED, THIS, SHEESH) \
((__m128)(__v4sf){SHEESH, THIS, DESIGNED, WHO})
#define _mm_set_ss(FLOAT) ((__m128)(__v4sf){FLOAT, 0, 0, 0})
#define _mm_load_ss(FLOATPTR) _mm_set_ss(*(FLOATPTR))
#define _mm_store_ss(FLOATPTR, M128_0) ((FLOATPTR)[0] = ((__v4sf)(M128_0))[0])
#define _mm_store_ps(FLOATPTR, M128_0) (*(__m128 *)(FLOATPTR) = (M128_0))
#define _mm_storeu_ps(FLOATPTR, M128_0) (*(__m128_u *)(FLOATPTR) = (M128_0))
#define _mm_shuffle_ps(M128_0, M128_1, MASK) \
((__m128)__builtin_ia32_shufps((__v4sf)(M128_0), (__v4sf)(M128_1), (MASK)))
#ifdef __llvm__
#define _mm_movehl_ps(M128_0, M128_1) \
((__m128)__builtin_shufflevector((__v4sf)(__m128)(M128_0), \
(__v4sf)(__m128)(M128_1), 6, 7, 2, 3))
/* intrinsics unstable & constantly breaking, consider ansi c or asm. */
/* each version of llvm has a different incompatible impl for this one */
#else
#define _mm_movehl_ps(M128_0, M128_1) \
((__m128)__builtin_ia32_movhlps((__v4sf)(__m128)(M128_0), \
(__v4sf)(__m128)(M128_1)))
#define _mm_storel_pi(M64PTR, M128_0) \
__builtin_ia32_storelps((__v2sf *)(M64PTR), (__v4sf)(M128_0))
#endif
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » sse » cast ops
*/
#define _mm_cvtps_epi32(M128_0) \
((__m128i)__builtin_ia32_cvtps2dq((__v4sf)(M128_0)))
#ifdef __llvm__
#define _mm_cvtepi32_ps(M128I_0) \
((__m128) __builtin_convertvector((__v4si)(__m128i)(M128I_0), __v4sf))
#else
#define _mm_cvtepi32_ps(M128I_0) \
((__m128)__builtin_ia32_cvtdq2ps((__v4si)(M128I_0)))
#endif
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § it's a trap! » sse » misc
*/
#define _mm_getcsr() (__builtin_ia32_stmxcsr())
#define _mm_setcsr(U32CONF) (__builtin_ia32_ldmxcsr(U32CONF))
#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
#define _MM_SET_ROUNDING_MODE(MODE) \
(_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (MODE)))
#define XMM_DESTROY(VAR) \
do { \
if (!IsTrustworthy()) { \
asm volatile("xorps\t%1,%0" : "=x"(VAR) : "0"(VAR)); \
} \
} while (0)
/*
** Ternary:
**
** Integer: _mm_or_si128(_mm_and_si128(a, cond), _mm_andnot_si128(cond, b))
** 32-bit float: _mm_or_ps(_mm_and_ps(a, cond), _mm_andnot_ps(cond, b))
** 64-bit float: _mm_or_pd(_mm_and_pd(a, cond), _mm_andnot_pd(cond, b))
** Integer (SSE4.1+): _mm_blendv_epi8(a, b, cond)
** 32-bit float (SSE4.1+): _mm_blendv_ps(a, b, cond)
** 64-bit float (SSE4.1+): _mm_blendv_pd(a, b, cond)
*/
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_ */