mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-03 09:48:29 +00:00
Fold LIBC_BITS into LIBC_INTRIN
This commit is contained in:
parent
625aa365f1
commit
05b8f82371
603 changed files with 1071 additions and 1211 deletions
|
@ -1,27 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/fmt/conv.h"
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
/**
|
||||
* Returns absolute value of x.
|
||||
*/
|
||||
int abs(int x) {
|
||||
return ABS(x);
|
||||
}
|
34
libc/bits/asmflag.h
Normal file → Executable file
34
libc/bits/asmflag.h
Normal file → Executable file
|
@ -1,34 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_ASMFLAG_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_ASMFLAG_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/*
|
||||
* Constraints for virtual machine flags.
|
||||
* @note we beseech clang devs for flag constraints
|
||||
*/
|
||||
#ifdef __GCC_ASM_FLAG_OUTPUTS__ /* GCC6+ CLANG10+ */
|
||||
#define CFLAG_CONSTRAINT "=@ccc"
|
||||
#define CFLAG_ASM(OP) OP
|
||||
#define ZFLAG_CONSTRAINT "=@ccz"
|
||||
#define ZFLAG_ASM(OP) OP
|
||||
#define OFLAG_CONSTRAINT "=@cco"
|
||||
#define OFLAG_ASM(OP) OP
|
||||
#define SFLAG_CONSTRAINT "=@ccs"
|
||||
#define SFLAG_ASM(SP) SP
|
||||
#define ABOVE_CONSTRAINT "=@cca" /* i.e. !ZF && !CF */
|
||||
#define ABOVEFLAG_ASM(OP) OP
|
||||
#else
|
||||
#define CFLAG_CONSTRAINT "=q"
|
||||
#define CFLAG_ASM(OP) OP "\n\tsetc\t%b0"
|
||||
#define ZFLAG_CONSTRAINT "=q"
|
||||
#define ZFLAG_ASM(OP) OP "\n\tsetz\t%b0"
|
||||
#define OFLAG_CONSTRAINT "=q"
|
||||
#define OFLAG_ASM(OP) OP "\n\tseto\t%b0"
|
||||
#define SFLAG_CONSTRAINT "=q"
|
||||
#define SFLAG_ASM(SP) OP "\n\tsets\t%b0"
|
||||
#define ABOVE_CONSTRAINT "=@cca"
|
||||
#define ABOVEFLAG_ASM(OP) OP "\n\tseta\t%b0"
|
||||
#endif
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_ASMFLAG_H_ */
|
|
@ -1,170 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_ATOMIC_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_ATOMIC_H_
|
||||
#include "libc/atomic.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/**
|
||||
* @fileoverview Cosmopolitan C11 Atomics Library
|
||||
*
|
||||
* - Forty-two different ways to say MOV.
|
||||
* - Fourteen different ways to say XCHG.
|
||||
* - Twenty different ways to say LOCK CMPXCHG.
|
||||
*
|
||||
* It's a lower level programming language than assembly!
|
||||
*
|
||||
* @see libc/atomic.h
|
||||
*/
|
||||
|
||||
#define memory_order int
|
||||
#define memory_order_relaxed 0
|
||||
#define memory_order_consume 1
|
||||
#define memory_order_acquire 2
|
||||
#define memory_order_release 3
|
||||
#define memory_order_acq_rel 4
|
||||
#define memory_order_seq_cst 5
|
||||
|
||||
#define ATOMIC_VAR_INIT(value) (value)
|
||||
#define atomic_is_lock_free(obj) ((void)(obj), sizeof(obj) <= sizeof(void *))
|
||||
|
||||
#define atomic_flag atomic_bool
|
||||
#define ATOMIC_FLAG_INIT ATOMIC_VAR_INIT(0)
|
||||
#define atomic_flag_test_and_set_explicit(x, order) \
|
||||
atomic_exchange_explicit(x, 1, order)
|
||||
#define atomic_flag_clear_explicit(x, order) atomic_store_explicit(x, 0, order)
|
||||
|
||||
#define atomic_compare_exchange_strong(pObject, pExpected, desired) \
|
||||
atomic_compare_exchange_strong_explicit( \
|
||||
pObject, pExpected, desired, memory_order_seq_cst, memory_order_seq_cst)
|
||||
#define atomic_compare_exchange_weak(pObject, pExpected, desired) \
|
||||
atomic_compare_exchange_weak_explicit( \
|
||||
pObject, pExpected, desired, memory_order_seq_cst, memory_order_seq_cst)
|
||||
#define atomic_exchange(pObject, desired) \
|
||||
atomic_exchange_explicit(pObject, desired, memory_order_seq_cst)
|
||||
#define atomic_fetch_add(pObject, operand) \
|
||||
atomic_fetch_add_explicit(pObject, operand, memory_order_seq_cst)
|
||||
#define atomic_fetch_and(pObject, operand) \
|
||||
atomic_fetch_and_explicit(pObject, operand, memory_order_seq_cst)
|
||||
#define atomic_fetch_or(pObject, operand) \
|
||||
atomic_fetch_or_explicit(pObject, operand, memory_order_seq_cst)
|
||||
#define atomic_fetch_sub(pObject, operand) \
|
||||
atomic_fetch_sub_explicit(pObject, operand, memory_order_seq_cst)
|
||||
#define atomic_fetch_xor(pObject, operand) \
|
||||
atomic_fetch_xor_explicit(pObject, operand, memory_order_seq_cst)
|
||||
#define atomic_load(pObject) atomic_load_explicit(pObject, memory_order_seq_cst)
|
||||
#define atomic_store(pObject, desired) \
|
||||
atomic_store_explicit(pObject, desired, memory_order_seq_cst)
|
||||
#define atomic_flag_test_and_set(x) \
|
||||
atomic_flag_test_and_set_explicit(x, memory_order_seq_cst)
|
||||
#define atomic_flag_clear(x) atomic_flag_clear_explicit(x, memory_order_seq_cst)
|
||||
|
||||
#if defined(__CLANG_ATOMIC_BOOL_LOCK_FREE)
|
||||
|
||||
#define atomic_init(obj, value) __c11_atomic_init(obj, value)
|
||||
#define atomic_thread_fence(order) __c11_atomic_thread_fence(order)
|
||||
#define atomic_signal_fence(order) __c11_atomic_signal_fence(order)
|
||||
#define atomic_compare_exchange_strong_explicit(object, expected, desired, \
|
||||
success, failure) \
|
||||
__c11_atomic_compare_exchange_strong(object, expected, desired, success, \
|
||||
failure)
|
||||
#define atomic_compare_exchange_weak_explicit(object, expected, desired, \
|
||||
success, failure) \
|
||||
__c11_atomic_compare_exchange_weak(object, expected, desired, success, \
|
||||
failure)
|
||||
#define atomic_exchange_explicit(object, desired, order) \
|
||||
__c11_atomic_exchange(object, desired, order)
|
||||
#define atomic_fetch_add_explicit(object, operand, order) \
|
||||
__c11_atomic_fetch_add(object, operand, order)
|
||||
#define atomic_fetch_and_explicit(object, operand, order) \
|
||||
__c11_atomic_fetch_and(object, operand, order)
|
||||
#define atomic_fetch_or_explicit(object, operand, order) \
|
||||
__c11_atomic_fetch_or(object, operand, order)
|
||||
#define atomic_fetch_sub_explicit(object, operand, order) \
|
||||
__c11_atomic_fetch_sub(object, operand, order)
|
||||
#define atomic_fetch_xor_explicit(object, operand, order) \
|
||||
__c11_atomic_fetch_xor(object, operand, order)
|
||||
#define atomic_load_explicit(object, order) __c11_atomic_load(object, order)
|
||||
#define atomic_store_explicit(object, desired, order) \
|
||||
__c11_atomic_store(object, desired, order)
|
||||
|
||||
#elif (__GNUC__ + 0) * 100 + (__GNUC_MINOR__ + 0) >= 407
|
||||
|
||||
#define atomic_init(obj, value) ((void)(*(obj) = (value)))
|
||||
#define atomic_thread_fence(order) __atomic_thread_fence(order)
|
||||
#define atomic_signal_fence(order) __atomic_signal_fence(order)
|
||||
#define atomic_compare_exchange_strong_explicit(pObject, pExpected, desired, \
|
||||
success, failure) \
|
||||
__atomic_compare_exchange_n(pObject, pExpected, desired, 0, success, failure)
|
||||
#define atomic_compare_exchange_weak_explicit(pObject, pExpected, desired, \
|
||||
success, failure) \
|
||||
__atomic_compare_exchange_n(pObject, pExpected, desired, 1, success, failure)
|
||||
#define atomic_exchange_explicit(pObject, desired, order) \
|
||||
__atomic_exchange_n(pObject, desired, order)
|
||||
#define atomic_fetch_add_explicit(pObject, operand, order) \
|
||||
__atomic_fetch_add(pObject, operand, order)
|
||||
#define atomic_fetch_and_explicit(pObject, operand, order) \
|
||||
__atomic_fetch_and(pObject, operand, order)
|
||||
#define atomic_fetch_or_explicit(pObject, operand, order) \
|
||||
__atomic_fetch_or(pObject, operand, order)
|
||||
#define atomic_fetch_sub_explicit(pObject, operand, order) \
|
||||
__atomic_fetch_sub(pObject, operand, order)
|
||||
#define atomic_fetch_xor_explicit(pObject, operand, order) \
|
||||
__atomic_fetch_xor(pObject, operand, order)
|
||||
#define atomic_load_explicit(pObject, order) __atomic_load_n(pObject, order)
|
||||
#define atomic_store_explicit(pObject, desired, order) \
|
||||
__atomic_store_n(pObject, desired, order)
|
||||
|
||||
#else
|
||||
|
||||
#define atomic_init(obj, value) ((void)(*(obj) = (value)))
|
||||
#define atomic_thread_fence(order) __sync_synchronize()
|
||||
#define atomic_signal_fence(order) __asm__ volatile("" ::: "memory")
|
||||
#define __atomic_apply_stride(object, operand) \
|
||||
(((__typeof__(__atomic_val(object)))0) + (operand))
|
||||
#define atomic_compare_exchange_strong_explicit(object, expected, desired, \
|
||||
success, failure) \
|
||||
__extension__({ \
|
||||
__typeof__(expected) __ep = (expected); \
|
||||
__typeof__(*__ep) __e = *__ep; \
|
||||
(void)(success); \
|
||||
(void)(failure); \
|
||||
(_Bool)((*__ep = __sync_val_compare_and_swap(object, __e, desired)) == \
|
||||
__e); \
|
||||
})
|
||||
#define atomic_compare_exchange_weak_explicit(object, expected, desired, \
|
||||
success, failure) \
|
||||
atomic_compare_exchange_strong_explicit(object, expected, desired, success, \
|
||||
failure)
|
||||
#if __has_builtin(__sync_swap)
|
||||
#define atomic_exchange_explicit(object, desired, order) \
|
||||
((void)(order), __sync_swap(object, desired))
|
||||
#else
|
||||
#define atomic_exchange_explicit(object, desired, order) \
|
||||
__extension__({ \
|
||||
__typeof__(object) __o = (object); \
|
||||
__typeof__(desired) __d = (desired); \
|
||||
(void)(order); \
|
||||
__sync_synchronize(); \
|
||||
__sync_lock_test_and_set(&__atomic_val(__o), __d); \
|
||||
})
|
||||
#endif
|
||||
#define atomic_fetch_add_explicit(object, operand, order) \
|
||||
((void)(order), \
|
||||
__sync_fetch_and_add(object, __atomic_apply_stride(object, operand)))
|
||||
#define atomic_fetch_and_explicit(object, operand, order) \
|
||||
((void)(order), __sync_fetch_and_and(object, operand))
|
||||
#define atomic_fetch_or_explicit(object, operand, order) \
|
||||
((void)(order), __sync_fetch_and_or(object, operand))
|
||||
#define atomic_fetch_sub_explicit(object, operand, order) \
|
||||
((void)(order), \
|
||||
__sync_fetch_and_sub(object, __atomic_apply_stride(object, operand)))
|
||||
#define atomic_fetch_xor_explicit(object, operand, order) \
|
||||
((void)(order), __sync_fetch_and_xor(object, operand))
|
||||
#define atomic_load_explicit(object, order) \
|
||||
((void)(order), __sync_fetch_and_add(object, 0))
|
||||
#define atomic_store_explicit(object, desired, order) \
|
||||
((void)atomic_exchange_explicit(object, desired, order))
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_ATOMIC_H_ */
|
|
@ -1,133 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_AVX2INTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_AVX2INTRIN_H_
|
||||
#include "libc/bits/avxintrin.internal.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define _mm256_min_epi16(M256_0, M256_1) \
|
||||
((__m256i)__builtin_ia32_minps((__v16hi)(M256_0), (__v16hi)(M256_1)))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » avx2 » simd ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm256_add_ps(M256_0, M256_1) \
|
||||
((__m256)((__v8sf)(M256_0) + (__v8sf)(M256_1)))
|
||||
#define _mm256_sub_ps(M256_0, M256_1) \
|
||||
((__m256)((__v8sf)(M256_0) - (__v8sf)(M256_1)))
|
||||
#define _mm256_mul_ps(M256_0, M256_1) \
|
||||
((__m256)((__v8sf)(M256_0) * (__v8sf)(M256_1)))
|
||||
#define _mm256_div_ps(M256_0, M256_1) \
|
||||
((__m256)((__v8sf)(M256_0) / (__v8sf)(M256_1)))
|
||||
#define _mm256_and_ps(M256_0, M256_1) \
|
||||
((__m256)((__v8su)(M256_0) & (__v8su)(M256_1)))
|
||||
#define _mm256_or_ps(M256_0, M256_1) \
|
||||
((__m256)((__v8su)(M256_0) | (__v8su)(M256_1)))
|
||||
#define _mm256_xor_ps(M256_0, M256_1) /* XORPD [u32 simd xor] */ \
|
||||
((__m256)((__v8su)(M256_0) ^ (__v8su)(M256_1)))
|
||||
#define _mm256_andnot_ps(M256_0, M256_1) /* ANDNPS [u32 simd nand] */ \
|
||||
((__m256)(~(__v8su)(M256_0) & (__v8su)(M256_1)))
|
||||
#define _mm256_rcp_ps(M256) __builtin_ia32_rcpps256((__v8sf)(M256))
|
||||
#define _mm256_sqrt_ps(M256) __builtin_ia32_sqrtps256((__v8sf)(M256))
|
||||
#define _mm256_rsqrt_ps(M256) __builtin_ia32_rsqrtps256((__v8sf)(M256))
|
||||
#define _mm256_round_ps(M256, IMM) \
|
||||
((__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(M256), IMM))
|
||||
|
||||
#define _mm256_add_epi32(M256I_0, M256I_1) \
|
||||
((__m256i)((__v8su)(M256I_0) + (__v8su)(M256I_1)))
|
||||
#define _mm256_cmpgt_epi32(M256I_0, M256I_1) \
|
||||
((__m256i)((__v8si)(M256I_0) > (__v8si)(M256I_1)))
|
||||
#define _mm256_min_epi32(M256I_0, M256I_1) \
|
||||
((__m256i)__builtin_ia32_pminsd256((__v8si)(M256I_0), (__v8si)(M256I_1)))
|
||||
#define _mm256_min_epu32(M256I_0, M256I_1) \
|
||||
((__m256i)__builtin_ia32_pminud256((__v8si)(M256I_0), (__v8si)(M256I_1)))
|
||||
#define _mm256_max_epi32(M256I_0, M256I_1) \
|
||||
((__m256i)__builtin_ia32_pmaxsd256((__v8si)(M256I_0), (__v8si)(M256I_1)))
|
||||
#define _mm256_max_epu32(M256I_0, M256I_1) \
|
||||
((__m256i)__builtin_ia32_pmaxud256((__v8si)(M256I_0), (__v8si)(M256I_1)))
|
||||
#define _mm256_blendv_epi8(M256I_0, M256I_1, M256I_2) \
|
||||
((__m256i)__builtin_ia32_pblendvb256((__v32qi)(M256I_0), (__v32qi)(M256I_1), \
|
||||
(__v32qi)(M256I_2)))
|
||||
|
||||
#define _mm256_min_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_minps256((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_max_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_maxps256((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmpneq_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpneqps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmplt_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpltps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmpnlt_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpnltps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmple_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpleps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmpnle_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpnleps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmpgt_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpltps((__v8sf)(__m256)(M256_1), \
|
||||
(__v8sf)(__m256)(M256_0)))
|
||||
#define _mm256_cmpngt_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpnltps((__v8sf)(__m256)(M256_1), \
|
||||
(__v8sf)(__m256)(M256_0)))
|
||||
#define _mm256_cmpge_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpleps((__v8sf)(__m256)(M256_1), \
|
||||
(__v8sf)(__m256)(M256_0)))
|
||||
#define _mm256_cmpnge_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpnleps((__v8sf)(__m256)(M256_1), \
|
||||
(__v8sf)(__m256)(M256_0)))
|
||||
#define _mm256_cmpord_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpordps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmpunord_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpunordps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § avx2 » memory ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
struct thatispacked PackedMayaliasIntyYmm {
|
||||
__m256i Ymm;
|
||||
} mayalias;
|
||||
|
||||
#define _mm256_set_ps(FLT_0, FLT_1, FLT_2, FLT_3, FLT_4, FLT_5, FLT_6, FLT_7) \
|
||||
((__m256)(__v8sf){(float)(FLT_0), (float)(FLT_1), (float)(FLT_2), \
|
||||
(float)(FLT_3), (float)(FLT_4), (float)(FLT_5), \
|
||||
(float)(FLT_6), (float)(FLT_7)})
|
||||
#define _mm256_set1_ps(FLT_0) \
|
||||
_mm256_set_ps(FLT_0, FLT_0, FLT_0, FLT_0, FLT_0, FLT_0, FLT_0, FLT_0)
|
||||
#define _mm256_setr_ps(FLT_0, FLT_1, FLT_2, FLT_3, FLT_4, FLT_5, FLT_6, FLT_7) \
|
||||
_mm256_set_ps(FLT_7, FLT_6, FLT_5, FLT_4, FLT_3, FLT_2, FLT_1, FLT_0)
|
||||
|
||||
#define _mm256_set_epi32(INT_0, INT_1, INT_2, INT_3, INT_4, INT_5, INT_6, \
|
||||
INT_7) \
|
||||
((__m256i)(__v8si){(int)(INT_0), (int)(INT_1), (int)(INT_2), (int)(INT_3), \
|
||||
(int)(INT_4), (int)(INT_5), (int)(INT_6), (int)(INT_7)})
|
||||
#define _mm256_set1_epi32(INT_0) \
|
||||
_mm256_set_epi32(INT_0, INT_0, INT_0, INT_0, INT_0, INT_0, INT_0, INT_0)
|
||||
#define _mm256_setr_epi32(INT_0, INT_1, INT_2, INT_3, INT_4, INT_5, INT_6, \
|
||||
INT_7) \
|
||||
_mm256_set_epi32(INT_7, INT_6, INT_5, INT_4, INT_3, INT_2, INT_1, INT_0)
|
||||
|
||||
#define _mm256_loadu_si256(M256IP_0) \
|
||||
({ \
|
||||
const __m256i *Ymm = (M256IP_0); \
|
||||
((struct PackedMayaliasIntyYmm *)Ymm)->Ymm; \
|
||||
})
|
||||
|
||||
#define _mm256_storeu_si256(M256IP_0, M256I_1) \
|
||||
({ \
|
||||
__m256i *Ymm = (M256IP_0); \
|
||||
((struct PackedMayaliasIntyYmm *)Ymm)->Ymm = M256I_1; \
|
||||
})
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_AVX2INTRIN_H_ */
|
|
@ -1,51 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
typedef float __m256 _Vector_size(32) mayalias;
|
||||
typedef double __m256d _Vector_size(32) mayalias;
|
||||
typedef long long __m256i _Vector_size(32) mayalias;
|
||||
|
||||
typedef float __m256_u _Vector_size(32) forcealign(1) mayalias;
|
||||
typedef double __m256d_u _Vector_size(32) forcealign(1) mayalias;
|
||||
typedef long long __m256i_u _Vector_size(32) forcealign(1) mayalias;
|
||||
|
||||
typedef double __v4df _Vector_size(32);
|
||||
typedef float __v8sf _Vector_size(32);
|
||||
typedef long long __v4di _Vector_size(32);
|
||||
typedef unsigned long long __v4du _Vector_size(32);
|
||||
typedef int __v8si _Vector_size(32);
|
||||
typedef unsigned __v8su _Vector_size(32);
|
||||
typedef short __v16hi _Vector_size(32);
|
||||
typedef unsigned short __v16hu _Vector_size(32);
|
||||
typedef char __v32qi _Vector_size(32);
|
||||
typedef unsigned char __v32qu _Vector_size(32);
|
||||
|
||||
#define _mm256_setzero_ps() ((__m256)(__v8sf){0})
|
||||
#define _mm256_load_ps(FLOATPTR) (*(__m256 *)(FLOATPTR))
|
||||
#define _mm256_loadu_ps(FLOATPTR) (*(__m256_u *)(FLOATPTR))
|
||||
#define _mm256_store_ps(FLOATPTR, M256_0) \
|
||||
(*(__m256 *)(FLOATPTR) = (__m256)(M256_0))
|
||||
#define _mm256_storeu_ps(FLOATPTR, M256_0) \
|
||||
(*(__m256_u *)(FLOATPTR) = (__m256)(M256_0))
|
||||
#define _mm256_extractf128_ps(M256_0, INT_1) \
|
||||
((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(M256_0), \
|
||||
(int)(INT_1)))
|
||||
#define _mm256_insertf128_ps(M256_0, M128_1, IMM_2) \
|
||||
((__m256)__builtin_ia32_vinsertf128_ps256( \
|
||||
(__v8sf)(__m256)(M256_0), (__v4sf)(__m128)(M128_1), (int)(IMM_2)))
|
||||
|
||||
#ifdef __llvm__
|
||||
#define _mm256_castps128_ps256(M128_0) \
|
||||
((__m256)__builtin_shufflevector((__v4sf)(__m128)(M128_0), \
|
||||
(__v4sf)(__m128)(M128_0), 0, 1, 2, 3, -1, \
|
||||
-1, -1, -1))
|
||||
#else
|
||||
#define _mm256_castps128_ps256(M128_0) \
|
||||
((__m256)__builtin_ia32_ps256_ps((__v4sf)(__m128)(M128_0)))
|
||||
#endif
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_ */
|
|
@ -1,43 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
|
||||
/**
|
||||
* Extracts bit field from array.
|
||||
*/
|
||||
unsigned bextra(const unsigned *p, size_t i, char b) {
|
||||
unsigned k, r, w;
|
||||
w = sizeof(unsigned) * CHAR_BIT;
|
||||
if (b) {
|
||||
b &= w - 1;
|
||||
i *= b;
|
||||
k = i & (w - 1);
|
||||
i /= w;
|
||||
if (k <= w - b) {
|
||||
return (p[i] >> k) & ((1u << (b - 1)) | ((1u << (b - 1)) - 1));
|
||||
} else {
|
||||
r = p[i] >> k;
|
||||
r |= p[i + 1] << (w - k);
|
||||
r &= (1ul << b) - 1;
|
||||
return r;
|
||||
}
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_BIGWORD_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_BIGWORD_H_
|
||||
|
||||
#ifndef BIGWORD
|
||||
#if __AVX512F__ + 0
|
||||
#define BIGWORD 64
|
||||
#elif __AVX2__ + 0
|
||||
#define BIGWORD 32
|
||||
#elif __SSE2__ + 0
|
||||
#define BIGWORD 16
|
||||
#else
|
||||
#define BIGWORD __BIGGEST_ALIGNMENT__
|
||||
#endif
|
||||
#endif /*BIGWORD*/
|
||||
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_BIGWORD_H_ */
|
|
@ -1,45 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_BITOP_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_BITOP_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define bts(MEM, BIT) __BitOp("bts", BIT, MEM) /** bit test and set */
|
||||
#define btr(MEM, BIT) __BitOp("btr", BIT, MEM) /** bit test and reset */
|
||||
#define btc(MEM, BIT) __BitOp("btc", BIT, MEM) /** bit test and complement */
|
||||
#define lockbts(MEM, BIT) __BitOp("lock bts", BIT, MEM)
|
||||
#define lockbtr(MEM, BIT) __BitOp("lock btr", BIT, MEM)
|
||||
#define lockbtc(MEM, BIT) __BitOp("lock btc", BIT, MEM)
|
||||
|
||||
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||
#define __BitOp(OP, BIT, MEM) \
|
||||
({ \
|
||||
bool OldBit; \
|
||||
if (__builtin_constant_p(BIT)) { \
|
||||
asm(CFLAG_ASM(OP "%z1\t%2,%1") \
|
||||
: CFLAG_CONSTRAINT(OldBit), \
|
||||
"+m"((MEM)[(BIT) / (sizeof((MEM)[0]) * CHAR_BIT)]) \
|
||||
: "J"((BIT) % (sizeof((MEM)[0]) * CHAR_BIT)) \
|
||||
: "cc"); \
|
||||
} else if (sizeof((MEM)[0]) == 2) { \
|
||||
asm(CFLAG_ASM(OP "\t%w2,%1") \
|
||||
: CFLAG_CONSTRAINT(OldBit), "+m"((MEM)[0]) \
|
||||
: "r"(BIT) \
|
||||
: "cc"); \
|
||||
} else if (sizeof((MEM)[0]) == 4) { \
|
||||
asm(CFLAG_ASM(OP "\t%k2,%1") \
|
||||
: CFLAG_CONSTRAINT(OldBit), "+m"((MEM)[0]) \
|
||||
: "r"(BIT) \
|
||||
: "cc"); \
|
||||
} else if (sizeof((MEM)[0]) == 8) { \
|
||||
asm(CFLAG_ASM(OP "\t%q2,%1") \
|
||||
: CFLAG_CONSTRAINT(OldBit), "+m"((MEM)[0]) \
|
||||
: "r"(BIT) \
|
||||
: "cc"); \
|
||||
} \
|
||||
OldBit; \
|
||||
})
|
||||
#endif /* __GNUC__ && !__STRICT_ANSI__ */
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_BITOP_H_ */
|
|
@ -1,26 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
|
||||
/**
|
||||
* Reverses bits in 16-bit word.
|
||||
*/
|
||||
int bitreverse16(int x) {
|
||||
return BITREVERSE16(x);
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/bits/bswap.h"
|
||||
|
||||
/**
|
||||
* Reverses bits in 32-bit word.
|
||||
*/
|
||||
uint32_t bitreverse32(uint32_t x) {
|
||||
x = bswap_32(x);
|
||||
x = (x & 0xaaaaaaaa) >> 1 | (x & 0x55555555) << 1;
|
||||
x = (x & 0xcccccccc) >> 2 | (x & 0x33333333) << 2;
|
||||
x = (x & 0xf0f0f0f0) >> 4 | (x & 0x0f0f0f0f) << 4;
|
||||
return x;
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/bits/bswap.h"
|
||||
|
||||
/**
|
||||
* Reverses bits in 64-bit word.
|
||||
*/
|
||||
uint64_t bitreverse64(uint64_t x) {
|
||||
x = bswap_64(x);
|
||||
x = (x & 0xaaaaaaaaaaaaaaaa) >> 1 | (x & 0x5555555555555555) << 1;
|
||||
x = (x & 0xcccccccccccccccc) >> 2 | (x & 0x3333333333333333) << 2;
|
||||
x = (x & 0xf0f0f0f0f0f0f0f0) >> 4 | (x & 0x0f0f0f0f0f0f0f0f) << 4;
|
||||
return x;
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
|
||||
/**
|
||||
* Reverses bits in 8-bit word.
|
||||
*/
|
||||
int bitreverse8(int x) {
|
||||
return BITREVERSE8(x);
|
||||
}
|
164
libc/bits/bits.h
Normal file → Executable file
164
libc/bits/bits.h
Normal file → Executable file
|
@ -1,164 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define CheckUnsigned(x) ((x) / !((typeof(x))(-1) < 0))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § bits ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
extern const uint8_t kReverseBits[256];
|
||||
|
||||
uint32_t gray(uint32_t) pureconst;
|
||||
uint32_t ungray(uint32_t) pureconst;
|
||||
int bitreverse8(int) libcesque pureconst;
|
||||
int bitreverse16(int) libcesque pureconst;
|
||||
uint32_t bitreverse32(uint32_t) libcesque pureconst;
|
||||
uint64_t bitreverse64(uint64_t) libcesque pureconst;
|
||||
unsigned long roundup2pow(unsigned long) libcesque pureconst;
|
||||
unsigned long roundup2log(unsigned long) libcesque pureconst;
|
||||
unsigned long rounddown2pow(unsigned long) libcesque pureconst;
|
||||
unsigned long hamming(unsigned long, unsigned long) pureconst;
|
||||
unsigned bextra(const unsigned *, size_t, char);
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § bits » no assembly required ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define BITREVERSE8(X) (kReverseBits[255 & (X)])
|
||||
#define BITREVERSE16(X) \
|
||||
(kReverseBits[0x00FF & (X)] << 8 | kReverseBits[(0xFF00 & (X)) >> 8])
|
||||
|
||||
#ifdef __STRICT_ANSI__
|
||||
#define READ16LE(S) ((255 & (S)[1]) << 8 | (255 & (S)[0]))
|
||||
#define READ16BE(S) ((255 & (S)[0]) << 8 | (255 & (S)[1]))
|
||||
#define READ32LE(S) \
|
||||
((uint32_t)(255 & (S)[3]) << 030 | (uint32_t)(255 & (S)[2]) << 020 | \
|
||||
(uint32_t)(255 & (S)[1]) << 010 | (uint32_t)(255 & (S)[0]) << 000)
|
||||
#define READ32BE(S) \
|
||||
((uint32_t)(255 & (S)[0]) << 030 | (uint32_t)(255 & (S)[1]) << 020 | \
|
||||
(uint32_t)(255 & (S)[2]) << 010 | (uint32_t)(255 & (S)[3]) << 000)
|
||||
#define READ64LE(S) \
|
||||
((uint64_t)(255 & (S)[7]) << 070 | (uint64_t)(255 & (S)[6]) << 060 | \
|
||||
(uint64_t)(255 & (S)[5]) << 050 | (uint64_t)(255 & (S)[4]) << 040 | \
|
||||
(uint64_t)(255 & (S)[3]) << 030 | (uint64_t)(255 & (S)[2]) << 020 | \
|
||||
(uint64_t)(255 & (S)[1]) << 010 | (uint64_t)(255 & (S)[0]) << 000)
|
||||
#define READ64BE(S) \
|
||||
((uint64_t)(255 & (S)[0]) << 070 | (uint64_t)(255 & (S)[1]) << 060 | \
|
||||
(uint64_t)(255 & (S)[2]) << 050 | (uint64_t)(255 & (S)[3]) << 040 | \
|
||||
(uint64_t)(255 & (S)[4]) << 030 | (uint64_t)(255 & (S)[5]) << 020 | \
|
||||
(uint64_t)(255 & (S)[6]) << 010 | (uint64_t)(255 & (S)[7]) << 000)
|
||||
#else /* gcc needs help knowing above are mov if s isn't a variable */
|
||||
#define READ16LE(S) \
|
||||
({ \
|
||||
const uint8_t *Ptr = (const uint8_t *)(S); \
|
||||
Ptr[1] << 8 | Ptr[0]; \
|
||||
})
|
||||
#define READ16BE(S) \
|
||||
({ \
|
||||
const uint8_t *Ptr = (const uint8_t *)(S); \
|
||||
Ptr[0] << 8 | Ptr[1]; \
|
||||
})
|
||||
#define READ32LE(S) \
|
||||
({ \
|
||||
const uint8_t *Ptr = (const uint8_t *)(S); \
|
||||
((uint32_t)Ptr[3] << 030 | (uint32_t)Ptr[2] << 020 | \
|
||||
(uint32_t)Ptr[1] << 010 | (uint32_t)Ptr[0] << 000); \
|
||||
})
|
||||
#define READ32BE(S) \
|
||||
({ \
|
||||
const uint8_t *Ptr = (const uint8_t *)(S); \
|
||||
((uint32_t)Ptr[0] << 030 | (uint32_t)Ptr[1] << 020 | \
|
||||
(uint32_t)Ptr[2] << 010 | (uint32_t)Ptr[3] << 000); \
|
||||
})
|
||||
#define READ64LE(S) \
|
||||
({ \
|
||||
const uint8_t *Ptr = (const uint8_t *)(S); \
|
||||
((uint64_t)Ptr[7] << 070 | (uint64_t)Ptr[6] << 060 | \
|
||||
(uint64_t)Ptr[5] << 050 | (uint64_t)Ptr[4] << 040 | \
|
||||
(uint64_t)Ptr[3] << 030 | (uint64_t)Ptr[2] << 020 | \
|
||||
(uint64_t)Ptr[1] << 010 | (uint64_t)Ptr[0] << 000); \
|
||||
})
|
||||
#define READ64BE(S) \
|
||||
({ \
|
||||
const uint8_t *Ptr = (const uint8_t *)(S); \
|
||||
((uint64_t)Ptr[0] << 070 | (uint64_t)Ptr[1] << 060 | \
|
||||
(uint64_t)Ptr[2] << 050 | (uint64_t)Ptr[3] << 040 | \
|
||||
(uint64_t)Ptr[4] << 030 | (uint64_t)Ptr[5] << 020 | \
|
||||
(uint64_t)Ptr[6] << 010 | (uint64_t)Ptr[7] << 000); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#define WRITE16LE(P, V) \
|
||||
((P)[0] = (0x00000000000000FF & (V)) >> 000, \
|
||||
(P)[1] = (0x000000000000FF00 & (V)) >> 010, (P) + 2)
|
||||
#define WRITE16BE(P, V) \
|
||||
((P)[0] = (0x000000000000FF00 & (V)) >> 010, \
|
||||
(P)[1] = (0x00000000000000FF & (V)) >> 000, (P) + 2)
|
||||
#define WRITE32LE(P, V) \
|
||||
((P)[0] = (0x00000000000000FF & (V)) >> 000, \
|
||||
(P)[1] = (0x000000000000FF00 & (V)) >> 010, \
|
||||
(P)[2] = (0x0000000000FF0000 & (V)) >> 020, \
|
||||
(P)[3] = (0x00000000FF000000 & (V)) >> 030, (P) + 4)
|
||||
#define WRITE32BE(P, V) \
|
||||
((P)[0] = (0x00000000FF000000 & (V)) >> 030, \
|
||||
(P)[1] = (0x0000000000FF0000 & (V)) >> 020, \
|
||||
(P)[2] = (0x000000000000FF00 & (V)) >> 010, \
|
||||
(P)[3] = (0x00000000000000FF & (V)) >> 000, (P) + 4)
|
||||
#define WRITE64LE(P, V) \
|
||||
((P)[0] = (0x00000000000000FF & (V)) >> 000, \
|
||||
(P)[1] = (0x000000000000FF00 & (V)) >> 010, \
|
||||
(P)[2] = (0x0000000000FF0000 & (V)) >> 020, \
|
||||
(P)[3] = (0x00000000FF000000 & (V)) >> 030, \
|
||||
(P)[4] = (0x000000FF00000000 & (V)) >> 040, \
|
||||
(P)[5] = (0x0000FF0000000000 & (V)) >> 050, \
|
||||
(P)[6] = (0x00FF000000000000 & (V)) >> 060, \
|
||||
(P)[7] = (0xFF00000000000000 & (V)) >> 070, (P) + 8)
|
||||
#define WRITE64BE(P, V) \
|
||||
((P)[0] = (0xFF00000000000000 & (V)) >> 070, \
|
||||
(P)[1] = (0x00FF000000000000 & (V)) >> 060, \
|
||||
(P)[2] = (0x0000FF0000000000 & (V)) >> 050, \
|
||||
(P)[3] = (0x000000FF00000000 & (V)) >> 040, \
|
||||
(P)[4] = (0x00000000FF000000 & (V)) >> 030, \
|
||||
(P)[5] = (0x0000000000FF0000 & (V)) >> 020, \
|
||||
(P)[6] = (0x000000000000FF00 & (V)) >> 010, \
|
||||
(P)[7] = (0x00000000000000FF & (V)) >> 000, (P) + 8)
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § bits » some assembly required ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||
|
||||
#define lockinc(MEM) __ArithmeticOp1("lock inc", MEM)
|
||||
#define lockdec(MEM) __ArithmeticOp1("lock dec", MEM)
|
||||
#define locknot(MEM) __ArithmeticOp1("lock not", MEM)
|
||||
#define lockneg(MEM) __ArithmeticOp1("lock neg", MEM)
|
||||
|
||||
#define lockaddeq(MEM, VAL) __ArithmeticOp2("lock add", VAL, MEM)
|
||||
#define locksubeq(MEM, VAL) __ArithmeticOp2("lock sub", VAL, MEM)
|
||||
#define lockxoreq(MEM, VAL) __ArithmeticOp2("lock xor", VAL, MEM)
|
||||
#define lockandeq(MEM, VAL) __ArithmeticOp2("lock and", VAL, MEM)
|
||||
#define lockoreq(MEM, VAL) __ArithmeticOp2("lock or", VAL, MEM)
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § bits » implementation details ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define __ArithmeticOp1(OP, MEM) \
|
||||
({ \
|
||||
asm(OP "%z0\t%0" : "+m"(*(MEM)) : /* no inputs */ : "cc"); \
|
||||
MEM; \
|
||||
})
|
||||
|
||||
#define __ArithmeticOp2(OP, VAL, MEM) \
|
||||
({ \
|
||||
asm(OP "%z0\t%1,%0" : "+m,m"(*(MEM)) : "i,r"(VAL) : "cc"); \
|
||||
MEM; \
|
||||
})
|
||||
|
||||
#endif /* __GNUC__ && !__STRICT_ANSI__ */
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_H_ */
|
50
libc/bits/bits.mk
Normal file → Executable file
50
libc/bits/bits.mk
Normal file → Executable file
|
@ -1,50 +0,0 @@
|
|||
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
|
||||
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
|
||||
|
||||
PKGS += LIBC_BITS
|
||||
|
||||
LIBC_BITS_ARTIFACTS += LIBC_BITS_A
|
||||
LIBC_BITS = $(LIBC_BITS_A_DEPS) $(LIBC_BITS_A)
|
||||
LIBC_BITS_A = o/$(MODE)/libc/bits/bits.a
|
||||
LIBC_BITS_A_FILES := $(wildcard libc/bits/*)
|
||||
LIBC_BITS_A_HDRS = $(filter %.h,$(LIBC_BITS_A_FILES))
|
||||
LIBC_BITS_A_SRCS_S = $(filter %.S,$(LIBC_BITS_A_FILES))
|
||||
LIBC_BITS_A_SRCS_C = $(filter %.c,$(LIBC_BITS_A_FILES))
|
||||
|
||||
LIBC_BITS_A_SRCS = \
|
||||
$(LIBC_BITS_A_SRCS_S) \
|
||||
$(LIBC_BITS_A_SRCS_C)
|
||||
|
||||
LIBC_BITS_A_OBJS = \
|
||||
$(LIBC_BITS_A_SRCS_S:%.S=o/$(MODE)/%.o) \
|
||||
$(LIBC_BITS_A_SRCS_C:%.c=o/$(MODE)/%.o)
|
||||
|
||||
LIBC_BITS_A_CHECKS = \
|
||||
$(LIBC_BITS_A).pkg \
|
||||
$(LIBC_BITS_A_HDRS:%=o/$(MODE)/%.ok)
|
||||
|
||||
LIBC_BITS_A_DIRECTDEPS = \
|
||||
LIBC_STUBS \
|
||||
LIBC_INTRIN \
|
||||
LIBC_NEXGEN32E
|
||||
|
||||
LIBC_BITS_A_DEPS := \
|
||||
$(call uniq,$(foreach x,$(LIBC_BITS_A_DIRECTDEPS),$($(x))))
|
||||
|
||||
$(LIBC_BITS_A): libc/bits/ \
|
||||
$(LIBC_BITS_A).pkg \
|
||||
$(LIBC_BITS_A_OBJS)
|
||||
|
||||
$(LIBC_BITS_A).pkg: \
|
||||
$(LIBC_BITS_A_OBJS) \
|
||||
$(foreach x,$(LIBC_BITS_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
LIBC_BITS_LIBS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)))
|
||||
LIBC_BITS_SRCS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_SRCS))
|
||||
LIBC_BITS_HDRS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_HDRS))
|
||||
LIBC_BITS_CHECKS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_CHECKS))
|
||||
LIBC_BITS_OBJS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_OBJS))
|
||||
$(LIBC_BITS_OBJS): $(BUILD_FILES) libc/bits/bits.mk
|
||||
|
||||
.PHONY: o/$(MODE)/libc/bits
|
||||
o/$(MODE)/libc/bits: $(LIBC_BITS_CHECKS)
|
18
libc/bits/bswap.h
Normal file → Executable file
18
libc/bits/bswap.h
Normal file → Executable file
|
@ -1,18 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_BSWAP_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_BSWAP_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
uint16_t bswap_16(uint16_t) pureconst;
|
||||
uint32_t bswap_32(uint32_t) pureconst;
|
||||
uint32_t bswap_64(uint32_t) pureconst;
|
||||
|
||||
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||
#define bswap_16(x) __builtin_bswap16(x)
|
||||
#define bswap_32(x) __builtin_bswap32(x)
|
||||
#define bswap_64(x) __builtin_bswap64(x)
|
||||
#endif /* defined(__GNUC__) && !defined(__STRICT_ANSI__) */
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_BSWAP_H_ */
|
|
@ -1,84 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
|
||||
/**
|
||||
* Returns population count of array.
|
||||
*
|
||||
* @param a is byte sequence
|
||||
* @return number of bits set to one
|
||||
* @note 30gbps on Nehalem (Intel 2008+) otherwise 3gbps
|
||||
*/
|
||||
size_t _countbits(const void *a, size_t n) {
|
||||
int i;
|
||||
size_t t;
|
||||
unsigned b;
|
||||
uint64_t x;
|
||||
long Ai, Bi, Ci, Di;
|
||||
long Ao, Bo, Co, Do;
|
||||
const char *p, *e;
|
||||
t = 0;
|
||||
p = a;
|
||||
e = p + n;
|
||||
if (!IsTiny()) {
|
||||
if (X86_HAVE(POPCNT)) {
|
||||
while (p + sizeof(long) * 4 <= e) {
|
||||
__builtin_memcpy(&Ai, p + 000, sizeof(long));
|
||||
__builtin_memcpy(&Bi, p + 010, sizeof(long));
|
||||
__builtin_memcpy(&Ci, p + 020, sizeof(long));
|
||||
__builtin_memcpy(&Di, p + 030, sizeof(long));
|
||||
asm("popcnt\t%1,%0" : "=r"(Ao) : "r"(Ai) : "cc");
|
||||
asm("popcnt\t%1,%0" : "=r"(Bo) : "r"(Bi) : "cc");
|
||||
asm("popcnt\t%1,%0" : "=r"(Co) : "r"(Ci) : "cc");
|
||||
asm("popcnt\t%1,%0" : "=r"(Do) : "r"(Di) : "cc");
|
||||
t += Ao + Bo + Co + Do;
|
||||
p += sizeof(long) * 4;
|
||||
}
|
||||
while (p + sizeof(long) <= e) {
|
||||
__builtin_memcpy(&Ai, p, 8);
|
||||
asm("popcnt\t%1,%0" : "=r"(Ao) : "rm"(Ai) : "cc");
|
||||
p += sizeof(long);
|
||||
t += Ao;
|
||||
}
|
||||
} else {
|
||||
while (p + 8 <= e) {
|
||||
__builtin_memcpy(&x, p, 8);
|
||||
x = x - ((x >> 1) & 0x5555555555555555);
|
||||
x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333);
|
||||
x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
|
||||
x = (x + (x >> 32)) & 0xffffffff;
|
||||
x = x + (x >> 16);
|
||||
x = (x + (x >> 8)) & 0x7f;
|
||||
t += x;
|
||||
p += 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
while (p < e) {
|
||||
b = *p++ & 255;
|
||||
b = b - ((b >> 1) & 0x55);
|
||||
b = ((b >> 2) & 0x33) + (b & 0x33);
|
||||
b = (b + (b >> 4)) & 15;
|
||||
t += b;
|
||||
}
|
||||
return t;
|
||||
}
|
|
@ -1,242 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_
|
||||
#include "libc/bits/xmmintrin.internal.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » sse2 ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
typedef char __v16qi _Vector_size(16);
|
||||
typedef unsigned char __v16qu _Vector_size(16);
|
||||
typedef signed char __v16qs _Vector_size(16);
|
||||
|
||||
typedef short __v8hi _Vector_size(16);
|
||||
typedef unsigned short __v8hu _Vector_size(16);
|
||||
|
||||
typedef double __v2df _Vector_size(16);
|
||||
typedef double __m128d _Vector_size(16) forcealign(16);
|
||||
typedef double __m128d_u _Vector_size(16) forcealign(1);
|
||||
|
||||
typedef long long __v2di _Vector_size(16);
|
||||
typedef long long __m128i _Vector_size(16) forcealign(16);
|
||||
typedef long long __m128i_u _Vector_size(16) forcealign(1);
|
||||
typedef unsigned long long __v2du _Vector_size(16);
|
||||
|
||||
struct thatispacked mayalias __usi128ma {
|
||||
__m128i_u __v;
|
||||
};
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » sse2 » memory ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_loadu_si128(M128IP) ((struct __usi128ma *)(M128IP))->__v
|
||||
#define _mm_storeu_si128(M128IP, M128I) \
|
||||
(((struct __usi128ma *)(M128IP))->__v = (M128I))
|
||||
|
||||
#define _mm_set_epi8(I8_15, I8_14, I8_13, I8_12, I8_11, I8_10, I8_9, I8_8, \
|
||||
I8_7, I8_6, I8_5, I8_4, I8_3, I8_2, I8_1, I8_0) \
|
||||
((__m128i)(__v16qi){I8_0, I8_1, I8_2, I8_3, I8_4, I8_5, I8_6, I8_7, I8_8, \
|
||||
I8_9, I8_10, I8_11, I8_12, I8_13, I8_14, I8_15})
|
||||
#define _mm_set_epi16(I16_7, I16_6, I16_5, I16_4, I16_3, I16_2, I16_1, I16_0) \
|
||||
((__m128i)(__v8hi){I16_0, I16_1, I16_2, I16_3, I16_4, I16_5, I16_6, I16_7})
|
||||
#define _mm_set_epi32(I32_3, I32_2, I32_1, I32_0) \
|
||||
((__m128i)(__v4si){I32_0, I32_1, I32_2, I32_3})
|
||||
#define _mm_set_epi64x(I64_1, I64_0) ((__m128i)(__v2di){I64_0, I64_1})
|
||||
|
||||
#define _mm_setr_epi8(I8_15, I8_14, I8_13, I8_12, I8_11, I8_10, I8_9, I8_8, \
|
||||
I8_7, I8_6, I8_5, I8_4, I8_3, I8_2, I8_1, I8_0) \
|
||||
_mm_set_epi8(I8_0, I8_1, I8_2, I8_3, I8_4, I8_5, I8_6, I8_7, I8_8, I8_9, \
|
||||
I8_10, I8_11, I8_12, I8_13, I8_14, I8_15)
|
||||
#define _mm_setr_epi16(I16_7, I16_6, I16_5, I16_4, I16_3, I16_2, I16_1, I16_0) \
|
||||
_mm_set_epi16(I16_0, I16_1, I16_2, I16_3, I16_4, I16_5, I16_6, I16_7)
|
||||
#define _mm_setr_epi32(I32_3, I32_2, I32_1, I32_0) \
|
||||
_mm_set_epi32(I32_0, I32_1, I32_2, I32_3)
|
||||
#define _mm_setr_epi64x(I64_1, I64_0) _mm_set_epi64x(I64_0, I64_1)
|
||||
|
||||
#define _mm_set1_epi8(I8) \
|
||||
_mm_set_epi8(I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8)
|
||||
#define _mm_set1_epi16(I16) \
|
||||
_mm_set_epi16(I16, I16, I16, I16, I16, I16, I16, I16)
|
||||
#define _mm_set1_epi32(I32) _mm_set_epi32(I32, I32, I32, I32)
|
||||
#define _mm_set1_epi64x(I64) _mm_set_epi64x(I64, I64)
|
||||
|
||||
#define _mm_cvtsi128_si32(M128I) ((__v4si)(M128I))[0]
|
||||
#define _mm_cvtsi32_si128(I32) ((__m128i)(__v4si){(I32), 0, 0, 0})
|
||||
#define _mm_setzero_si128() ((__m128i)(__v2di){0LL, 0LL})
|
||||
#define _mm_castsi128_ps(M128I) ((__m128)(M128I))
|
||||
#define _mm_castps_si128(M128) ((__m128i)(M128))
|
||||
#define _mm_load_si128(M128I) (*(M128I))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » sse2 » simd ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_and_si128(M128I_0, M128I_1) \
|
||||
((__m128i)((__v2du)(M128I_0) & (__v2du)(M128I_1)))
|
||||
#define _mm_or_si128(M128I_0, M128I_1) \
|
||||
((__m128i)((__v2du)(M128I_0) | (__v2du)(M128I_1)))
|
||||
#define _mm_xor_si128(M128I_0, M128I_1) \
|
||||
((__m128i)((__v2du)(M128I_0) ^ (__v2du)(M128I_1)))
|
||||
#define _mm_andnot_si128(M128I_0, M128I_1) \
|
||||
((__m128i)(~(__v2du)(M128I_0) & (__v2du)(M128I_1)))
|
||||
|
||||
#define _mm_add_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) + (__v2df)(M128D_1))
|
||||
#define _mm_sub_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) - (__v2df)(M128D_1))
|
||||
#define _mm_mul_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) * (__v2df)(M128D_1))
|
||||
#define _mm_div_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) / (__v2df)(M128D_1))
|
||||
#define _mm_and_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) & (__v2df)(M128D_1))
|
||||
#define _mm_or_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) | (__v2df)(M128D_1))
|
||||
#define _mm_xor_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) ^ (__v2df)(M128D_1))
|
||||
#define _mm_andnot_pd(M128D_0, M128D_1) \
|
||||
(__m128d)(~(__v2df)(M128D_0) & (__v2df)(M128D_1))
|
||||
#define _mm_sqrt_pd(M128D) __builtin_ia32_sqrtpd((__v2df)(M128D))
|
||||
|
||||
#define _mm_min_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_minpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_max_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_maxpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpeq_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpeqpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpneq_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpneqpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmplt_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpltpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpnlt_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnltpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmple_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmplepd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpnle_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnlepd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpgt_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpltpd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpngt_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnltpd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpge_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmplepd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpnge_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnlepd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpord_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpordpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpunord_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpunordpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
|
||||
#define _mm_sad_epu8(M128I_0, M128I_1) \
|
||||
__builtin_ia32_psadbw128((__v16qi)(M128I_0), (__v16qi)(M128I_1))
|
||||
|
||||
#define _mm_subs_epi8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psubsb128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
#define _mm_subs_epu8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psubusw128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
#define _mm_subs_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psubsw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
#define _mm_subs_epu16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psubusw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_add_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)((__v4su)(M128I_0) + (__v4su)(M128I_1)))
|
||||
#define _mm_sub_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)((__v4su)(M128I_0) - (__v4su)(M128I_1)))
|
||||
#define _mm_madd_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pmaddwd128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
#define _mm_shuffle_epi32(V, IMM) \
|
||||
((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(V), (int)(IMM)))
|
||||
|
||||
#define _mm_slli_epi32(M128I, COUNT) \
|
||||
((__m128i)__builtin_ia32_pslldi128((__v4si)(M128I), (COUNT)))
|
||||
|
||||
#define _mm_slli_si128(M128I, IMM) \
|
||||
((__m128i)__builtin_ia32_pslldqi128((__v2di)(__m128i)(M128I), (int)(IMM)*8))
|
||||
#define _mm_srli_si128(M128I, IMM) \
|
||||
((__m128i)__builtin_ia32_psrldqi128((__v2di)(__m128i)(M128I), (int)(IMM)*8))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » sse2 » scalar ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_sqrt_sd(M128D_0, M128D_1) \
|
||||
({ \
|
||||
__m128d M128d2 = __builtin_ia32_sqrtsd((__v2df)(M128D_1)); \
|
||||
(__m128d){M128d2[0], (M128D_0)[1]}; \
|
||||
})
|
||||
|
||||
#define _mm_add_sd(M128D_0, M128D_1) \
|
||||
({ \
|
||||
(M128D_0)[0] += (M128D_1)[0]; \
|
||||
(M128D_0); \
|
||||
})
|
||||
#define _mm_sub_sd(M128D_0, M128D_1) \
|
||||
({ \
|
||||
(M128D_0)[0] -= (M128D_1)[0]; \
|
||||
(M128D_0); \
|
||||
})
|
||||
#define _mm_mul_sd(M128D_0, M128D_1) \
|
||||
({ \
|
||||
(M128D_0)[0] *= (M128D_1)[0]; \
|
||||
(M128D_0); \
|
||||
})
|
||||
#define _mm_div_sd(M128D_0, M128D_1) \
|
||||
({ \
|
||||
(M128D_0)[0] /= (M128D_1)[0]; \
|
||||
(M128D_0); \
|
||||
})
|
||||
|
||||
#define _mm_min_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_minsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_max_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_maxsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpeq_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpeqsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpneq_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpneqsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmplt_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpltsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpnlt_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnltsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmple_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmplesd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpnle_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnlesd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpgt_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpltsd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpngt_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnltsd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpge_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmplesd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpnge_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnlesd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpord_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpordsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpunord_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpunordsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
|
||||
#define _mm_SSE2(op, A, B) \
|
||||
({ \
|
||||
__m128i R = A; \
|
||||
asm(#op " %1, %0" \
|
||||
: "+x"(R) : "xm"(B)); \
|
||||
R; \
|
||||
})
|
||||
#define _mm_mul_epu32(A, B) _mm_SSE2(pmuludq, A, B)
|
||||
#define _mm_add_epi64(A, B) _mm_SSE2(paddq, A, B)
|
||||
#define _mm_srli_epi64(A, B) _mm_SSE2(psrlq, A, B)
|
||||
#define _mm_slli_epi64(A, B) _mm_SSE2(psllq, A, B)
|
||||
#define _mm_unpacklo_epi64(A, B) _mm_SSE2(punpcklqdq, A, B)
|
||||
#define _mm_unpackhi_epi64(A, B) _mm_SSE2(punpckhqdq, A, B)
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » sse2 » miscellaneous ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_pause() asm("rep nop")
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_ */
|
|
@ -1,12 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_EZLEA_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_EZLEA_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#if __pic__ + __pie__ + __code_model_medium__ + __code_model_large__ + 0 > 1
|
||||
#define ezlea(symbol) "lea\t" symbol "(%%rip),%"
|
||||
#else
|
||||
#define ezlea(symbol) "mov\t$" symbol ",%k"
|
||||
#endif
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_EZLEA_H_ */
|
|
@ -1,28 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
|
||||
/**
|
||||
* Returns gray code for x.
|
||||
* @see https://en.wikipedia.org/wiki/Gray_code
|
||||
* @see ungray()
|
||||
*/
|
||||
uint32_t gray(uint32_t x) {
|
||||
return x ^ (x >> 1);
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/popcnt.h"
|
||||
|
||||
/**
|
||||
* Counts number of different bits.
|
||||
* @see https://en.wikipedia.org/wiki/Hamming_code
|
||||
*/
|
||||
unsigned long hamming(unsigned long x, unsigned long y) {
|
||||
return popcnt(x ^ y);
|
||||
}
|
|
@ -1,76 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/hilbert.h"
|
||||
|
||||
static axdx_t RotateQuadrant(long n, long y, long x, long ry, long rx) {
|
||||
long t;
|
||||
if (ry == 0) {
|
||||
if (rx == 1) {
|
||||
y = n - 1 - y;
|
||||
x = n - 1 - x;
|
||||
}
|
||||
t = x;
|
||||
x = y;
|
||||
y = t;
|
||||
}
|
||||
return (axdx_t){y, x};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates Hilbert space-filling curve.
|
||||
*
|
||||
* @see https://en.wikipedia.org/wiki/Hilbert_curve
|
||||
* @see unhilbert()
|
||||
*/
|
||||
long hilbert(long n, long y, long x) {
|
||||
axdx_t m;
|
||||
long d, s, ry, rx;
|
||||
d = 0;
|
||||
for (s = n / 2; s > 0; s /= 2) {
|
||||
ry = (y & s) > 0;
|
||||
rx = (x & s) > 0;
|
||||
d += s * s * ((3 * rx) ^ ry);
|
||||
m = RotateQuadrant(n, y, x, ry, rx);
|
||||
y = m.ax;
|
||||
x = m.dx;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes Hilbert space-filling curve.
|
||||
*
|
||||
* @see https://en.wikipedia.org/wiki/Hilbert_curve
|
||||
* @see hilbert()
|
||||
*/
|
||||
axdx_t unhilbert(long n, long i) {
|
||||
axdx_t m;
|
||||
long s, t, y, x, ry, rx;
|
||||
t = i;
|
||||
x = y = 0;
|
||||
for (s = 1; s < n; s *= 2) {
|
||||
rx = (t / 2) & 1;
|
||||
ry = (t ^ rx) & 1;
|
||||
m = RotateQuadrant(s, y, x, ry, rx);
|
||||
x = m.dx + s * rx;
|
||||
y = m.ax + s * ry;
|
||||
t /= 4;
|
||||
}
|
||||
return (axdx_t){y, x};
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_HILBERT_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_HILBERT_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
long hilbert(long, long, long) pureconst;
|
||||
axdx_t unhilbert(long, long) pureconst;
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_HILBERT_H_ */
|
|
@ -1,22 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_INITIALIZER_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_INITIALIZER_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/* TODO: DELETE */
|
||||
|
||||
/**
|
||||
* Teleports code fragment inside _init().
|
||||
*/
|
||||
#ifndef INITIALIZER
|
||||
#define INITIALIZER(PRI, NAME, CODE) \
|
||||
asm(".section .init." #PRI "." #NAME ",\"ax\",@progbits\n\t" \
|
||||
"call\t" #NAME "\n\t" \
|
||||
".previous"); \
|
||||
textstartup optimizesize void NAME(char *rdi, const char *rsi) { \
|
||||
CODE; \
|
||||
asm volatile("" : /* no outputs */ : "D"(rdi), "S"(rsi)); \
|
||||
}
|
||||
#endif /* INITIALIZER */
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_INITIALIZER_H_ */
|
21
libc/bits/likely.h
Normal file → Executable file
21
libc/bits/likely.h
Normal file → Executable file
|
@ -1,21 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_LIKELY_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_LIKELY_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#define LIKELY(x) __builtin_expect(!!(x), 1)
|
||||
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
#if __GNUC__ + 0 >= 9 && !defined(__chibicc__)
|
||||
#define VERY_LIKELY(x) __builtin_expect_with_probability(!!(x), 1, 0.999)
|
||||
#else
|
||||
#define VERY_LIKELY(x) LIKELY(x)
|
||||
#endif
|
||||
|
||||
#if __GNUC__ + 0 >= 9 && !defined(__chibicc__)
|
||||
#define VERY_UNLIKELY(x) __builtin_expect_with_probability(!!(x), 0, 0.999)
|
||||
#else
|
||||
#define VERY_UNLIKELY(x) UNLIKELY(x)
|
||||
#endif
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_LIKELY_H_ */
|
32
libc/bits/midpoint.h
Normal file → Executable file
32
libc/bits/midpoint.h
Normal file → Executable file
|
@ -1,32 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_MIDPOINT_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_MIDPOINT_H_
|
||||
#include "libc/assert.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#if defined(__GNUC__) && !defined(__STRICT_ANSI__) && defined(__x86__)
|
||||
/**
|
||||
* Computes `(a + b) / 2` assuming unsigned.
|
||||
*
|
||||
* This implementation is the fastest on AMD Zen architecture.
|
||||
*/
|
||||
#define _midpoint(a, b) \
|
||||
({ \
|
||||
typeof((a) + (b)) a_ = (a); \
|
||||
typeof(a_) b_ = (b); \
|
||||
assert(a_ >= 0); \
|
||||
assert(b_ >= 0); \
|
||||
asm("add\t%1,%0\n\t" \
|
||||
"rcr\t%0" \
|
||||
: "+r"(a_) \
|
||||
: "r"(b_)); \
|
||||
a_; \
|
||||
})
|
||||
#else
|
||||
/**
|
||||
* Computes `(a + b) / 2` assuming unsigned.
|
||||
*/
|
||||
#define _midpoint(a, b) (((a) & (b)) + ((a) ^ (b)) / 2)
|
||||
#endif /* __GNUC__ && !__STRICT_ANSI__ && x86 */
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_MIDPOINT_H_ */
|
|
@ -1,38 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/morton.h"
|
||||
|
||||
/**
|
||||
* Interleaves bits.
|
||||
* @see https://en.wikipedia.org/wiki/Z-order_curve
|
||||
* @see unmorton()
|
||||
*/
|
||||
unsigned long(morton)(unsigned long y, unsigned long x) {
|
||||
x = (x | x << 020) & 0x0000FFFF0000FFFF;
|
||||
x = (x | x << 010) & 0x00FF00FF00FF00FF;
|
||||
x = (x | x << 004) & 0x0F0F0F0F0F0F0F0F;
|
||||
x = (x | x << 002) & 0x3333333333333333;
|
||||
x = (x | x << 001) & 0x5555555555555555;
|
||||
y = (y | y << 020) & 0x0000FFFF0000FFFF;
|
||||
y = (y | y << 010) & 0x00FF00FF00FF00FF;
|
||||
y = (y | y << 004) & 0x0F0F0F0F0F0F0F0F;
|
||||
y = (y | y << 002) & 0x3333333333333333;
|
||||
y = (y | y << 001) & 0x5555555555555555;
|
||||
return x | y << 1;
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_MORTON_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_MORTON_H_
|
||||
#include "libc/intrin/pdep.h"
|
||||
#include "libc/intrin/pext.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
unsigned long morton(unsigned long, unsigned long) libcesque;
|
||||
axdx_t unmorton(unsigned long) libcesque;
|
||||
|
||||
#ifndef __STRICT_ANSI__
|
||||
#define morton(Y, X) \
|
||||
(X86_NEED(BMI2) ? pdep(X, 0x5555555555555555) | pdep(Y, 0xAAAAAAAAAAAAAAAA) \
|
||||
: morton(Y, X))
|
||||
#define unmorton(I) \
|
||||
(X86_NEED(BMI2) \
|
||||
? (axdx_t){pext(I, 0xAAAAAAAAAAAAAAAA), pext(I, 0x5555555555555555)} \
|
||||
: unmorton(I))
|
||||
#endif
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_MORTON_H_ */
|
|
@ -1,55 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_NEWBIE_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_NEWBIE_H_
|
||||
#include "libc/bits/bswap.h"
|
||||
|
||||
/*
|
||||
* Macros for newbies.
|
||||
* https://justine.lol/endian.html
|
||||
*/
|
||||
|
||||
#define BYTE_ORDER __BYTE_ORDER__
|
||||
#define LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__
|
||||
#define BIG_ENDIAN __ORDER_BIG_ENDIAN__
|
||||
#define PDP_ENDIAN __ORDER_PDP_ENDIAN__
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define htobe16(x) bswap_16(x)
|
||||
#define be16toh(x) bswap_16(x)
|
||||
#define betoh16(x) bswap_16(x)
|
||||
#define htobe32(x) bswap_32(x)
|
||||
#define be32toh(x) bswap_32(x)
|
||||
#define betoh32(x) bswap_32(x)
|
||||
#define htobe64(x) bswap_64(x)
|
||||
#define be64toh(x) bswap_64(x)
|
||||
#define betoh64(x) bswap_64(x)
|
||||
#define htole16(x) (uint16_t)(x)
|
||||
#define le16toh(x) (uint16_t)(x)
|
||||
#define letoh16(x) (uint16_t)(x)
|
||||
#define htole32(x) (uint32_t)(x)
|
||||
#define le32toh(x) (uint32_t)(x)
|
||||
#define letoh32(x) (uint32_t)(x)
|
||||
#define htole64(x) (uint64_t)(x)
|
||||
#define le64toh(x) (uint64_t)(x)
|
||||
#define letoh64(x) (uint64_t)(x)
|
||||
#else
|
||||
#define htobe16(x) (uint16_t)(x)
|
||||
#define be16toh(x) (uint16_t)(x)
|
||||
#define betoh16(x) (uint16_t)(x)
|
||||
#define htobe32(x) (uint32_t)(x)
|
||||
#define be32toh(x) (uint32_t)(x)
|
||||
#define betoh32(x) (uint32_t)(x)
|
||||
#define htobe64(x) (uint64_t)(x)
|
||||
#define be64toh(x) (uint64_t)(x)
|
||||
#define betoh64(x) (uint64_t)(x)
|
||||
#define htole16(x) bswap_16(x)
|
||||
#define le16toh(x) bswap_16(x)
|
||||
#define letoh16(x) bswap_16(x)
|
||||
#define htole32(x) bswap_32(x)
|
||||
#define le32toh(x) bswap_32(x)
|
||||
#define letoh32(x) bswap_32(x)
|
||||
#define htole64(x) bswap_64(x)
|
||||
#define le64toh(x) bswap_64(x)
|
||||
#define letoh64(x) bswap_64(x)
|
||||
#endif
|
||||
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_NEWBIE_H_ */
|
|
@ -1,14 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » sse3 ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_hadd_ps(M128_0, M128_1) \
|
||||
((__m128)__builtin_ia32_haddps((__v4sf)(__m128)(M128_0), \
|
||||
(__v4sf)(__m128)(M128_0)))
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_ */
|
|
@ -1,25 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_POPCNT_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_POPCNT_H_
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
size_t _countbits(const void *, size_t);
|
||||
unsigned long popcnt(unsigned long) pureconst;
|
||||
|
||||
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||
#define popcnt(X) \
|
||||
(__builtin_constant_p(X) ? __builtin_popcountll(X) : ({ \
|
||||
unsigned long PoP = (X); \
|
||||
if (X86_HAVE(POPCNT)) { \
|
||||
asm("popcnt\t%0,%0" : "+r"(PoP) : /* no inputs */ : "cc"); \
|
||||
} else { \
|
||||
PoP = (popcnt)(PoP); \
|
||||
} \
|
||||
PoP; \
|
||||
}))
|
||||
#endif /* GNUC && !ANSI */
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_POPCNT_H_ */
|
|
@ -1,55 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_PUSHPOP_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_PUSHPOP_H_
|
||||
#include "libc/macros.internal.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
|
||||
#define pushpop(x) (x)
|
||||
#else
|
||||
/**
|
||||
* PushPop
|
||||
* An elegant weapon for a more civilized age.
|
||||
*/
|
||||
#define pushpop(x) \
|
||||
({ \
|
||||
typeof(x) Popped; \
|
||||
if (__builtin_constant_p(x) && \
|
||||
(TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
|
||||
: (intptr_t)(x) < 128)) { \
|
||||
if (x) { \
|
||||
asm("push\t%1\n\t" \
|
||||
"pop\t%q0" \
|
||||
: "=r"(Popped) \
|
||||
: "ir"(x)); \
|
||||
} else { \
|
||||
asm("xor\t%k0,%k0" : "=r"(Popped)); \
|
||||
} \
|
||||
} else { \
|
||||
asm("" : "=r"(Popped) : "0"(x)); \
|
||||
} \
|
||||
Popped; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
|
||||
#define pushmov(d, x) (*(d) = (x))
|
||||
#else
|
||||
#define pushmov(d, x) \
|
||||
({ \
|
||||
typeof(*(d)) Popped = (x); \
|
||||
if (__builtin_constant_p(x) && \
|
||||
(TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
|
||||
: (intptr_t)(x) < 128)) { \
|
||||
asm("pushq\t%1\n\t" \
|
||||
"popq\t%0" \
|
||||
: "=m"(*(d)) \
|
||||
: "ir"(Popped)); \
|
||||
} else { \
|
||||
*(d) = Popped; \
|
||||
} \
|
||||
Popped; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_PUSHPOP_H_ */
|
|
@ -1,30 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/nexgen32e/bsr.h"
|
||||
|
||||
/**
|
||||
* Returns 𝑥 rounded down to previous two power.
|
||||
*
|
||||
* @define (𝑥>0→2^⌊log₂𝑥⌋, x=0→0, 𝑇→⊥)
|
||||
* @see roundup2pow()
|
||||
*/
|
||||
unsigned long rounddown2pow(unsigned long x) {
|
||||
return x ? 1ul << bsrl(x) : 0;
|
||||
}
|
|
@ -1,28 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/nexgen32e/bsr.h"
|
||||
|
||||
/**
|
||||
* Returns 𝑥 rounded up to next two power and log'd.
|
||||
* @see roundup2pow()
|
||||
*/
|
||||
unsigned long roundup2log(unsigned long x) {
|
||||
return x > 1 ? (bsrl(x - 1) + 1) : x ? 1 : 0;
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/nexgen32e/bsr.h"
|
||||
|
||||
/**
|
||||
* Returns 𝑥 rounded up to next two power.
|
||||
*
|
||||
* @define (𝑥>0→2^⌈log₂x⌉, x=0→0, 𝑇→⊥)
|
||||
* @see rounddown2pow()
|
||||
*/
|
||||
unsigned long roundup2pow(unsigned long x) {
|
||||
return x > 1 ? 2ul << bsrl(x - 1) : x ? 1 : 0;
|
||||
}
|
71
libc/bits/safemacros.internal.h
Normal file → Executable file
71
libc/bits/safemacros.internal.h
Normal file → Executable file
|
@ -1,71 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_SAFEMACROS_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_SAFEMACROS_H_
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define min(x, y) \
|
||||
({ \
|
||||
autotype(x) MinX = (x); \
|
||||
autotype(y) MinY = (y); \
|
||||
MinX < MinY ? MinX : MinY; \
|
||||
})
|
||||
|
||||
#define max(x, y) \
|
||||
({ \
|
||||
autotype(x) MaxX = (x); \
|
||||
autotype(y) MaxY = (y); \
|
||||
MaxX > MaxY ? MaxX : MaxY; \
|
||||
})
|
||||
|
||||
#define roundup(x, k) \
|
||||
({ \
|
||||
autotype(x) RoundupX = (x); \
|
||||
autotype(k) RoundupK = (k); \
|
||||
ROUNDUP(RoundupX, RoundupK); \
|
||||
})
|
||||
|
||||
#define rounddown(x, k) \
|
||||
({ \
|
||||
autotype(x) RounddownX = (x); \
|
||||
autotype(k) RounddownK = (k); \
|
||||
ROUNDDOWN(RounddownX, RounddownK); \
|
||||
})
|
||||
|
||||
#define isempty(s) \
|
||||
({ \
|
||||
autotype(s) IsEmptyS = (s); \
|
||||
!IsEmptyS || !(*IsEmptyS); \
|
||||
})
|
||||
|
||||
#define nulltoempty(s) \
|
||||
({ \
|
||||
autotype(s) NullToEmptyS = (s); \
|
||||
NullToEmptyS ? NullToEmptyS : ""; \
|
||||
})
|
||||
|
||||
#define firstnonnull(a, b) \
|
||||
({ \
|
||||
autotype(a) FirstNonNullA = (a); \
|
||||
autotype(a) FirstNonNullB = (b); \
|
||||
if (!FirstNonNullA && !FirstNonNullB) abort(); \
|
||||
FirstNonNullA ? FirstNonNullA : FirstNonNullB; \
|
||||
})
|
||||
|
||||
#define emptytonull(s) \
|
||||
({ \
|
||||
autotype(s) EmptyToNullS = (s); \
|
||||
EmptyToNullS && !(*EmptyToNullS) ? NULL : EmptyToNullS; \
|
||||
})
|
||||
|
||||
#define unsignedsubtract(a, b) \
|
||||
({ \
|
||||
uint64_t UnsubA = (a); \
|
||||
uint64_t UnsubB = (b); \
|
||||
UnsubA >= UnsubB ? UnsubA - UnsubB : ~UnsubB + UnsubA + 1; \
|
||||
})
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_SAFEMACROS_H_ */
|
|
@ -1,25 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_SEGMENTATION_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_SEGMENTATION_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||
|
||||
/**
|
||||
* Reads scalar from memory, offset by segment.
|
||||
*
|
||||
* @return *(MEM) relative to segment
|
||||
* @see arch_prctl()
|
||||
* @see pushpop()
|
||||
*/
|
||||
#define fs(MEM) __peek("fs", MEM)
|
||||
#define gs(MEM) __peek("gs", MEM)
|
||||
|
||||
#define __peek(SEGMENT, ADDRESS) \
|
||||
({ \
|
||||
typeof(*(ADDRESS)) Pk; \
|
||||
asm("mov\t%%" SEGMENT ":%1,%0" : "=r"(Pk) : "m"(*(ADDRESS))); \
|
||||
Pk; \
|
||||
})
|
||||
|
||||
#endif /* __GNUC__ && !__STRICT_ANSI__ */
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_SEGMENTATION_H_ */
|
|
@ -1,37 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_SHAINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_SHAINTRIN_H_
|
||||
#include "libc/bits/emmintrin.internal.h"
|
||||
#include "libc/bits/xmmintrin.internal.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#define _mm_sha1rnds4_epu32(M128I_0, M128I_1, MEM) \
|
||||
__builtin_ia32_sha1rnds4((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1), (MEM))
|
||||
|
||||
#define _mm_sha1nexte_epu32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_sha1nexte((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1)))
|
||||
|
||||
#define _mm_sha1msg1_epu32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_sha1msg1((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1)))
|
||||
|
||||
#define _mm_sha1msg2_epu32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_sha1msg2((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1)))
|
||||
|
||||
#define _mm_sha256rnds2_epu32(M128I_0, M128I_1, M128I_2) \
|
||||
((__m128i)__builtin_ia32_sha256rnds2((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1), \
|
||||
(__v4si)(__m128i)(M128I_2)))
|
||||
|
||||
#define _mm_sha256msg1_epu32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_sha256msg1((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1)))
|
||||
|
||||
#define _mm_sha256msg2_epu32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_sha256msg2((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1)))
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_SHAINTRIN_H_ */
|
|
@ -1,31 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_SMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_SMMINTRIN_H_
|
||||
|
||||
/**
|
||||
* @fileoverview SSE4 intrinsics.
|
||||
*/
|
||||
|
||||
#define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_CUR_DIRECTION 4
|
||||
#define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
|
||||
#define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_NO_EXC 8
|
||||
#define _MM_FROUND_RAISE_EXC 0
|
||||
#define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_TO_NEAREST_INT 0
|
||||
#define _MM_FROUND_TO_NEG_INF 1
|
||||
#define _MM_FROUND_TO_POS_INF 2
|
||||
#define _MM_FROUND_TO_ZERO 3
|
||||
#define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
|
||||
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#define _mm_extract_epi32(M128I, I32) \
|
||||
((int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(M128I), (int)(I32)))
|
||||
|
||||
#define _mm_minpos_epu16(M128I) \
|
||||
((int)__builtin_ia32_phminposuw128((__v4si)(__m128i)(M128I), (int)(I32)))
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_SMMINTRIN_H_ */
|
|
@ -1,17 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_TMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_TMMINTRIN_H_
|
||||
#include "libc/bits/emmintrin.internal.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » ssse3 ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_maddubs_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pmaddubsw128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
|
||||
#define _mm_shuffle_epi8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pshufb128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_TMMINTRIN_H_ */
|
|
@ -1,33 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
|
||||
/**
|
||||
* Decodes gray code.
|
||||
* @see https://en.wikipedia.org/wiki/Gray_code
|
||||
* @see gray()
|
||||
*/
|
||||
uint32_t ungray(uint32_t x) {
|
||||
x ^= x >> 16;
|
||||
x ^= x >> 8;
|
||||
x ^= x >> 4;
|
||||
x ^= x >> 2;
|
||||
x ^= x >> 1;
|
||||
return x;
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/morton.h"
|
||||
|
||||
static unsigned long GetOddBits(unsigned long x) {
|
||||
x = (x | x >> 000) & 0x5555555555555555;
|
||||
x = (x | x >> 001) & 0x3333333333333333;
|
||||
x = (x | x >> 002) & 0x0F0F0F0F0F0F0F0F;
|
||||
x = (x | x >> 004) & 0x00FF00FF00FF00FF;
|
||||
x = (x | x >> 010) & 0x0000FFFF0000FFFF;
|
||||
x = (x | x >> 020) & 0x00000000FFFFFFFF;
|
||||
return x;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deinterleaves bits.
|
||||
*
|
||||
* @param 𝑖 is interleaved index
|
||||
* @return deinterleaved coordinate {ax := 𝑦, dx := 𝑥}
|
||||
* @see en.wikipedia.org/wiki/Z-order_curve
|
||||
* @see morton()
|
||||
*/
|
||||
axdx_t(unmorton)(unsigned long i) {
|
||||
return (axdx_t){GetOddBits(i >> 1), GetOddBits(i)};
|
||||
}
|
28
libc/bits/weaken.h
Normal file → Executable file
28
libc/bits/weaken.h
Normal file → Executable file
|
@ -1,28 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_WEAKEN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_WEAKEN_H_
|
||||
#include "libc/bits/ezlea.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef __STRICT_ANSI__
|
||||
|
||||
#define weaken(symbol) ((const typeof(&(symbol)))weakaddr(#symbol))
|
||||
|
||||
#define strongaddr(symbolstr) \
|
||||
({ \
|
||||
intptr_t waddr; \
|
||||
asm(ezlea(symbolstr) "0" : "=r"(waddr)); \
|
||||
waddr; \
|
||||
})
|
||||
|
||||
#define weakaddr(symbolstr) \
|
||||
({ \
|
||||
intptr_t waddr; \
|
||||
asm(".weak\t" symbolstr "\n\t" ezlea(symbolstr) "0" : "=r"(waddr)); \
|
||||
waddr; \
|
||||
})
|
||||
|
||||
#else
|
||||
#define weaken(symbol) symbol
|
||||
#define weakaddr(symbolstr) &(symbolstr)
|
||||
#endif /* __STRICT_ANSI__ */
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_WEAKEN_H_ */
|
|
@ -1,29 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_WMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_WMMINTRIN_H_
|
||||
#include "libc/bits/emmintrin.internal.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#define _mm_clmulepi64_si128(X, Y, IMM) \
|
||||
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \
|
||||
(__v2di)(__m128i)(Y), (char)(IMM)))
|
||||
|
||||
#define _mm_aesenc_si128(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_aesenc128((__v2di)(M128I_0), (__v2di)(M128I_1)))
|
||||
#define _mm_aesenclast_si128(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_aesenclast128((__v2di)(M128I_0), (__v2di)(M128I_1)))
|
||||
|
||||
#define _mm_aesdec_si128(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_aesdec128((__v2di)(M128I_0), (__v2di)(M128I_1)))
|
||||
#define _mm_aesdeclast_si128(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_aesdeclast128((__v2di)(M128I_0), (__v2di)(M128I_1)))
|
||||
|
||||
#define _mm_aesimc_si128(M128I) \
|
||||
((__m128i)__builtin_ia32_aesimc128((__v2di)(M128I)))
|
||||
#define _mm_aesimclast_si128(M128I) \
|
||||
((__m128i)__builtin_ia32_aesimclast128((__v2di)(M128I)))
|
||||
|
||||
#define _mm_aeskeygenassist_si128(X, Y) \
|
||||
((__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(X), (int)(Y)))
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_WMMINTRIN_H_ */
|
|
@ -1,16 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_XADD_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_XADD_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define _xadd(p, v) \
|
||||
({ \
|
||||
typeof(*(p)) Res; \
|
||||
autotype(Res) Val = (v); \
|
||||
asm volatile("xadd\t%0,%1" : "=r"(Res), "+m"(*(p)) : "0"(Val)); \
|
||||
Res + Val; \
|
||||
})
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_XADD_H_ */
|
25
libc/bits/xchg.internal.h
Normal file → Executable file
25
libc/bits/xchg.internal.h
Normal file → Executable file
|
@ -1,25 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_XCHG_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_XCHG_H_
|
||||
#include "libc/str/str.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/**
|
||||
* Exchanges *MEMORY into *LOCALVAR.
|
||||
*
|
||||
* @return *MEMORY
|
||||
* @see lockcmpxchg()
|
||||
* todo(jart): what's the point of this?
|
||||
*/
|
||||
#define xchg(MEMORY, LOCALVAR) \
|
||||
({ \
|
||||
autotype(MEMORY) Memory = (MEMORY); \
|
||||
typeof(Memory) LocalVar = (LOCALVAR); \
|
||||
typeof(*Memory) Temp; \
|
||||
memcpy(&Temp, Memory, sizeof(Temp)); \
|
||||
memcpy(Memory, LocalVar, sizeof(Temp)); \
|
||||
memcpy(LocalVar, &Temp, sizeof(Temp)); \
|
||||
Temp; \
|
||||
})
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_XCHG_H_ */
|
|
@ -1,243 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_
|
||||
#include "libc/bits/emmintrin.internal.h"
|
||||
#include "libc/dce.h"
|
||||
|
||||
#define _MM_EXCEPT_MASK 0x003f
|
||||
#define _MM_EXCEPT_INVALID 0x0001
|
||||
#define _MM_EXCEPT_DENORM 0x0002
|
||||
#define _MM_EXCEPT_DIV_ZERO 0x0004
|
||||
#define _MM_EXCEPT_OVERFLOW 0x0008
|
||||
#define _MM_EXCEPT_UNDERFLOW 0x0010
|
||||
#define _MM_EXCEPT_INEXACT 0x0020
|
||||
#define _MM_MASK_MASK 0x1f80
|
||||
#define _MM_MASK_INVALID 0x0080
|
||||
#define _MM_MASK_DENORM 0x0100
|
||||
#define _MM_MASK_DIV_ZERO 0x0200
|
||||
#define _MM_MASK_OVERFLOW 0x0400
|
||||
#define _MM_MASK_UNDERFLOW 0x0800
|
||||
#define _MM_MASK_INEXACT 0x1000
|
||||
#define _MM_ROUND_MASK 0x6000
|
||||
#define _MM_ROUND_NEAREST 0x0000
|
||||
#define _MM_ROUND_DOWN 0x2000
|
||||
#define _MM_ROUND_UP 0x4000
|
||||
#define _MM_ROUND_TOWARD_ZERO 0x6000
|
||||
#define _MM_FLUSH_ZERO_MASK 0x8000
|
||||
#define _MM_FLUSH_ZERO_ON 0x8000
|
||||
#define _MM_FLUSH_ZERO_OFF 0x0000
|
||||
|
||||
#define _MM_SHUFFLE(A, B, C, D) (((A) << 6) | ((B) << 4) | ((C) << 2) | (D))
|
||||
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
typedef int __v4si _Vector_size(16);
|
||||
typedef unsigned int __v4su _Vector_size(16);
|
||||
typedef float __v4sf _Vector_size(16);
|
||||
typedef float __m128 _Vector_size(16) forcealign(16) mayalias;
|
||||
typedef float __m128_u _Vector_size(16) forcealign(1) mayalias;
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » sse » simd ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_add_ps(M128_0, M128_1) \
|
||||
((__m128)((__v4sf)(M128_0) + (__v4sf)(M128_1)))
|
||||
#define _mm_sub_ps(M128_0, M128_1) \
|
||||
((__m128)((__v4sf)(M128_0) - (__v4sf)(M128_1)))
|
||||
#define _mm_mul_ps(M128_0, M128_1) \
|
||||
((__m128)((__v4sf)(M128_0) * (__v4sf)(M128_1)))
|
||||
#define _mm_div_ps(M128_0, M128_1) \
|
||||
((__m128)((__v4sf)(M128_0) / (__v4sf)(M128_1)))
|
||||
#define _mm_and_ps(M128_0, M128_1) \
|
||||
((__m128)((__v4su)(M128_0) & (__v4su)(M128_1)))
|
||||
#define _mm_or_ps(M128_0, M128_1) \
|
||||
((__m128)((__v4su)(M128_0) | (__v4su)(M128_1)))
|
||||
#define _mm_xor_ps(M128_0, M128_1) /* XORPD [u32 simd xor] */ \
|
||||
((__m128)((__v4su)(M128_0) ^ (__v4su)(M128_1)))
|
||||
#define _mm_andnot_ps(M128_0, M128_1) /* ANDNPS [u32 simd nand] */ \
|
||||
((__m128)(~(__v4su)(M128_0) & (__v4su)(M128_1)))
|
||||
#define _mm_rcp_ps(M128) __builtin_ia32_rcpps((__v4sf)(M128))
|
||||
#define _mm_sqrt_ps(M128) __builtin_ia32_sqrtps((__v4sf)(M128))
|
||||
#define _mm_rsqrt_ps(M128) __builtin_ia32_rsqrtps((__v4sf)(M128))
|
||||
|
||||
#define _mm_min_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_minps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_max_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_maxps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_min_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_minss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_max_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_maxss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpeq_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpeqps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpneq_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpneqps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmplt_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpltps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpnlt_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnltps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmple_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpleps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpnle_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnleps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpgt_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpltps((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpngt_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnltps((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpge_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpleps((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpnge_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnleps((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpord_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpordps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpunord_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpunordps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » sse » scalar ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_add_ss(m128_0, m128_1) \
|
||||
({ \
|
||||
__m128 a = m128_0; \
|
||||
__m128 b = m128_1; \
|
||||
a[0] += b[0]; \
|
||||
a; \
|
||||
})
|
||||
|
||||
#define _mm_sub_ss(m128_0, m128_1) \
|
||||
({ \
|
||||
__m128 a = m128_0; \
|
||||
__m128 b = m128_1; \
|
||||
a[0] -= b[0]; \
|
||||
a; \
|
||||
})
|
||||
|
||||
#define _mm_mul_ss(m128_0, m128_1) \
|
||||
({ \
|
||||
__m128 a = m128_0; \
|
||||
__m128 b = m128_1; \
|
||||
a[0] *= b[0]; \
|
||||
a; \
|
||||
})
|
||||
|
||||
#define _mm_div_ss(m128_0, m128_1) \
|
||||
({ \
|
||||
__m128 a = m128_0; \
|
||||
__m128 b = m128_1; \
|
||||
a[0] /= b[0]; \
|
||||
a; \
|
||||
})
|
||||
|
||||
#define _mm_rcp_ss(M128) __builtin_ia32_rcpss((__v4sf)(M128)) /*~1/x*/
|
||||
#define _mm_sqrt_ss(M128) __builtin_ia32_sqrtss((__v4sf)(M128)) /*sqrt𝑥*/
|
||||
#define _mm_rsqrt_ss(M128) __builtin_ia32_rsqrtss((__v4sf)(M128)) /*~1/sqrt𝑥*/
|
||||
|
||||
#define _mm_min_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_minss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_max_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_maxss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpeq_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpeqss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpneq_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpneqss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmplt_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpltss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpnlt_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnltss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmple_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpless((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpnle_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnless((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpgt_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpltss((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpngt_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnltss((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpge_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpless((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpnge_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnless((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpord_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpordss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpunord_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpunordss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » sse » memory ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_set1_ps(M128_0) ((__m128)(__v4sf){M128_0, M128_0, M128_0, M128_0})
|
||||
#define _mm_setzero_ps() ((__m128)(__v4sf){0})
|
||||
#define _mm_cvtss_f32(M128_0) (((__v4sf)(M128_0))[0])
|
||||
#define _mm_load_ps(FLOATPTR) (*(__m128 *)(FLOATPTR))
|
||||
#define _mm_loadu_ps(FLOATPTR) (*(__m128_u *)(FLOATPTR))
|
||||
#define _mm_set_ps(WHO, DESIGNED, THIS, SHEESH) \
|
||||
((__m128)(__v4sf){SHEESH, THIS, DESIGNED, WHO})
|
||||
#define _mm_set_ss(FLOAT) ((__m128)(__v4sf){FLOAT, 0, 0, 0})
|
||||
#define _mm_load_ss(FLOATPTR) _mm_set_ss(*(FLOATPTR))
|
||||
#define _mm_store_ss(FLOATPTR, M128_0) ((FLOATPTR)[0] = ((__v4sf)(M128_0))[0])
|
||||
#define _mm_store_ps(FLOATPTR, M128_0) (*(__m128 *)(FLOATPTR) = (M128_0))
|
||||
#define _mm_storeu_ps(FLOATPTR, M128_0) (*(__m128_u *)(FLOATPTR) = (M128_0))
|
||||
#define _mm_shuffle_ps(M128_0, M128_1, MASK) \
|
||||
((__m128)__builtin_ia32_shufps((__v4sf)(M128_0), (__v4sf)(M128_1), (MASK)))
|
||||
|
||||
#ifdef __llvm__
|
||||
#define _mm_movehl_ps(M128_0, M128_1) \
|
||||
((__m128)__builtin_shufflevector((__v4sf)(__m128)(M128_0), \
|
||||
(__v4sf)(__m128)(M128_1), 6, 7, 2, 3))
|
||||
/* intrinsics unstable & constantly breaking, consider ansi c or asm. */
|
||||
/* each version of llvm has a different incompatible impl for this one */
|
||||
#else
|
||||
#define _mm_movehl_ps(M128_0, M128_1) \
|
||||
((__m128)__builtin_ia32_movhlps((__v4sf)(__m128)(M128_0), \
|
||||
(__v4sf)(__m128)(M128_1)))
|
||||
#define _mm_storel_pi(M64PTR, M128_0) \
|
||||
__builtin_ia32_storelps((__v2sf *)(M64PTR), (__v4sf)(M128_0))
|
||||
#endif
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » sse » cast ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_cvtps_epi32(M128_0) \
|
||||
((__m128i)__builtin_ia32_cvtps2dq((__v4sf)(M128_0)))
|
||||
|
||||
#ifdef __llvm__
|
||||
#define _mm_cvtepi32_ps(M128I_0) \
|
||||
((__m128) __builtin_convertvector((__v4si)(__m128i)(M128I_0), __v4sf))
|
||||
#else
|
||||
#define _mm_cvtepi32_ps(M128I_0) \
|
||||
((__m128)__builtin_ia32_cvtdq2ps((__v4si)(M128I_0)))
|
||||
#endif
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » sse » misc ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_getcsr() (__builtin_ia32_stmxcsr())
|
||||
#define _mm_setcsr(U32CONF) (__builtin_ia32_ldmxcsr(U32CONF))
|
||||
|
||||
#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
|
||||
#define _MM_SET_ROUNDING_MODE(MODE) \
|
||||
(_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (MODE)))
|
||||
|
||||
#define XMM_DESTROY(VAR) \
|
||||
do { \
|
||||
if (!IsTrustworthy()) { \
|
||||
asm volatile("xorps\t%1,%0" : "=x"(VAR) : "0"(VAR)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
** Ternary:
|
||||
**
|
||||
** Integer: _mm_or_si128(_mm_and_si128(a, cond), _mm_andnot_si128(cond, b))
|
||||
** 32-bit float: _mm_or_ps(_mm_and_ps(a, cond), _mm_andnot_ps(cond, b))
|
||||
** 64-bit float: _mm_or_pd(_mm_and_pd(a, cond), _mm_andnot_pd(cond, b))
|
||||
** Integer (SSE4.1+): _mm_blendv_epi8(a, b, cond)
|
||||
** 32-bit float (SSE4.1+): _mm_blendv_ps(a, b, cond)
|
||||
** 64-bit float (SSE4.1+): _mm_blendv_pd(a, b, cond)
|
||||
*/
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_ */
|
Loading…
Add table
Add a link
Reference in a new issue