mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-23 05:42:29 +00:00
Initial import
This commit is contained in:
commit
c91b3c5006
14915 changed files with 590219 additions and 0 deletions
23
libc/bits/abs.c
Normal file
23
libc/bits/abs.c
Normal file
|
@ -0,0 +1,23 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/conv/conv.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
int(abs)(int x) { return ABS(x); }
|
52
libc/bits/atomic.h
Normal file
52
libc/bits/atomic.h
Normal file
|
@ -0,0 +1,52 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_ATOMIC_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_ATOMIC_H_
|
||||
#include "libc/bits/bits.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/**
|
||||
* @fileoverview C11 version of The Cosmopolitan Atomics Library.
|
||||
*
|
||||
* - Forty-two different ways to say MOV.
|
||||
* - Fourteen different ways to say XCHG.
|
||||
* - Twenty different ways to say LOCK CMPXCHG.
|
||||
*
|
||||
* Living proof high-level languages can be lower-level than assembly.
|
||||
*/
|
||||
|
||||
#define memory_order int
|
||||
#define memory_order_relaxed 0
|
||||
#define memory_order_consume 1
|
||||
#define memory_order_acquire 2
|
||||
#define memory_order_release 3
|
||||
#define memory_order_acq_rel 4
|
||||
#define memory_order_seq_cst 5
|
||||
|
||||
#define atomic_flag struct AtomicFlag
|
||||
#define atomic_flag_clear(PTR) atomic_store((PTR)->__cacheline, 0)
|
||||
#define atomic_flag_test_and_set(PTR) \
|
||||
({ \
|
||||
uint32_t ax = 0; \
|
||||
lockcmpxchg((PTR)->__cacheline, &ax, 1); \
|
||||
})
|
||||
#define atomic_init(PTR, VAL) atomic_store(PTR, VAL)
|
||||
#define atomic_exchange(PTR, VAL) lockxchg(PTR, &(VAL))
|
||||
#define atomic_compare_exchange_strong(X, Y, Z) lockcmpxchg(X, Y, Z)
|
||||
#define atomic_compare_exchange_weak(X, Y, Z) lockcmpxchg(X, Y, Z)
|
||||
#define atomic_load_explicit(PTR, ORDER) atomic_load(PTR)
|
||||
#define atomic_store_explicit(PTR, VAL, ORDER) atomic_store(PTR, VAL)
|
||||
#define atomic_flag_clear_explicit(PTR, ORDER) atomic_store(PTR, 0)
|
||||
#define atomic_exchange_explicit(PTR, VAL, ORDER) lockxchg(PTR, &(VAL))
|
||||
#define atomic_flag_test_and_set_explicit(PTR, ORDER) lockcmpxchg(PTR, 0, 1)
|
||||
#define atomic_compare_exchange_strong_explicit(X, Y, Z, S, F) \
|
||||
lockcmpxchg(X, Y, Z)
|
||||
#define atomic_compare_exchange_weak_explicit(X, Y, Z, S, F) \
|
||||
lockcmpxchg(X, Y, Z)
|
||||
|
||||
struct AtomicFlag {
|
||||
uint32_t __cacheline[16]; /* Intel V.O §9.4.6 */
|
||||
} aligned(64);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_ATOMIC_H_ */
|
133
libc/bits/avx2intrin.h
Normal file
133
libc/bits/avx2intrin.h
Normal file
|
@ -0,0 +1,133 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_AVX2INTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_AVX2INTRIN_H_
|
||||
#include "libc/bits/avxintrin.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define _mm256_min_epi16(M256_0, M256_1) \
|
||||
((__m256i)__builtin_ia32_minps((__v16hi)(M256_0), (__v16hi)(M256_1)))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § avx2 » simd ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm256_add_ps(M256_0, M256_1) \
|
||||
((__m256)((__v8sf)(M256_0) + (__v8sf)(M256_1)))
|
||||
#define _mm256_sub_ps(M256_0, M256_1) \
|
||||
((__m256)((__v8sf)(M256_0) - (__v8sf)(M256_1)))
|
||||
#define _mm256_mul_ps(M256_0, M256_1) \
|
||||
((__m256)((__v8sf)(M256_0) * (__v8sf)(M256_1)))
|
||||
#define _mm256_div_ps(M256_0, M256_1) \
|
||||
((__m256)((__v8sf)(M256_0) / (__v8sf)(M256_1)))
|
||||
#define _mm256_and_ps(M256_0, M256_1) \
|
||||
((__m256)((__v8su)(M256_0) & (__v8su)(M256_1)))
|
||||
#define _mm256_or_ps(M256_0, M256_1) \
|
||||
((__m256)((__v8su)(M256_0) | (__v8su)(M256_1)))
|
||||
#define _mm256_xor_ps(M256_0, M256_1) /* XORPD [u32 simd xor] */ \
|
||||
((__m256)((__v8su)(M256_0) ^ (__v8su)(M256_1)))
|
||||
#define _mm256_andnot_ps(M256_0, M256_1) /* ANDNPS [u32 simd nand] */ \
|
||||
((__m256)(~(__v8su)(M256_0) & (__v8su)(M256_1)))
|
||||
#define _mm256_rcp_ps(M256) __builtin_ia32_rcpps256((__v8sf)(M256))
|
||||
#define _mm256_sqrt_ps(M256) __builtin_ia32_sqrtps256((__v8sf)(M256))
|
||||
#define _mm256_rsqrt_ps(M256) __builtin_ia32_rsqrtps256((__v8sf)(M256))
|
||||
#define _mm256_round_ps(M256, IMM) \
|
||||
((__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(M256), IMM))
|
||||
|
||||
#define _mm256_add_epi32(M256I_0, M256I_1) \
|
||||
((__m256i)((__v8su)(M256I_0) + (__v8su)(M256I_1)))
|
||||
#define _mm256_cmpgt_epi32(M256I_0, M256I_1) \
|
||||
((__m256i)((__v8si)(M256I_0) > (__v8si)(M256I_1)))
|
||||
#define _mm256_min_epi32(M256I_0, M256I_1) \
|
||||
((__m256i)__builtin_ia32_pminsd256((__v8si)(M256I_0), (__v8si)(M256I_1)))
|
||||
#define _mm256_min_epu32(M256I_0, M256I_1) \
|
||||
((__m256i)__builtin_ia32_pminud256((__v8si)(M256I_0), (__v8si)(M256I_1)))
|
||||
#define _mm256_max_epi32(M256I_0, M256I_1) \
|
||||
((__m256i)__builtin_ia32_pmaxsd256((__v8si)(M256I_0), (__v8si)(M256I_1)))
|
||||
#define _mm256_max_epu32(M256I_0, M256I_1) \
|
||||
((__m256i)__builtin_ia32_pmaxud256((__v8si)(M256I_0), (__v8si)(M256I_1)))
|
||||
#define _mm256_blendv_epi8(M256I_0, M256I_1, M256I_2) \
|
||||
((__m256i)__builtin_ia32_pblendvb256((__v32qi)(M256I_0), (__v32qi)(M256I_1), \
|
||||
(__v32qi)(M256I_2)))
|
||||
|
||||
#define _mm256_min_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_minps256((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_max_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_maxps256((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmpneq_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpneqps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmplt_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpltps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmpnlt_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpnltps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmple_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpleps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmpnle_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpnleps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmpgt_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpltps((__v8sf)(__m256)(M256_1), \
|
||||
(__v8sf)(__m256)(M256_0)))
|
||||
#define _mm256_cmpngt_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpnltps((__v8sf)(__m256)(M256_1), \
|
||||
(__v8sf)(__m256)(M256_0)))
|
||||
#define _mm256_cmpge_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpleps((__v8sf)(__m256)(M256_1), \
|
||||
(__v8sf)(__m256)(M256_0)))
|
||||
#define _mm256_cmpnge_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpnleps((__v8sf)(__m256)(M256_1), \
|
||||
(__v8sf)(__m256)(M256_0)))
|
||||
#define _mm256_cmpord_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpordps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
#define _mm256_cmpunord_ps(M256_0, M256_1) \
|
||||
((__m256)__builtin_ia32_cmpunordps((__v8sf)(__m256)(M256_0), \
|
||||
(__v8sf)(__m256)(M256_1)))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § avx2 » memory ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
struct thatispacked PackedMayaliasIntyYmm {
|
||||
__m256i Ymm;
|
||||
} mayalias;
|
||||
|
||||
#define _mm256_set_ps(FLT_0, FLT_1, FLT_2, FLT_3, FLT_4, FLT_5, FLT_6, FLT_7) \
|
||||
((__m256)(__v8sf){(float)(FLT_0), (float)(FLT_1), (float)(FLT_2), \
|
||||
(float)(FLT_3), (float)(FLT_4), (float)(FLT_5), \
|
||||
(float)(FLT_6), (float)(FLT_7)})
|
||||
#define _mm256_set1_ps(FLT_0) \
|
||||
_mm256_set_ps(FLT_0, FLT_0, FLT_0, FLT_0, FLT_0, FLT_0, FLT_0, FLT_0)
|
||||
#define _mm256_setr_ps(FLT_0, FLT_1, FLT_2, FLT_3, FLT_4, FLT_5, FLT_6, FLT_7) \
|
||||
_mm256_set_ps(FLT_7, FLT_6, FLT_5, FLT_4, FLT_3, FLT_2, FLT_1, FLT_0)
|
||||
|
||||
#define _mm256_set_epi32(INT_0, INT_1, INT_2, INT_3, INT_4, INT_5, INT_6, \
|
||||
INT_7) \
|
||||
((__m256i)(__v8si){(int)(INT_0), (int)(INT_1), (int)(INT_2), (int)(INT_3), \
|
||||
(int)(INT_4), (int)(INT_5), (int)(INT_6), (int)(INT_7)})
|
||||
#define _mm256_set1_epi32(INT_0) \
|
||||
_mm256_set_epi32(INT_0, INT_0, INT_0, INT_0, INT_0, INT_0, INT_0, INT_0)
|
||||
#define _mm256_setr_epi32(INT_0, INT_1, INT_2, INT_3, INT_4, INT_5, INT_6, \
|
||||
INT_7) \
|
||||
_mm256_set_epi32(INT_7, INT_6, INT_5, INT_4, INT_3, INT_2, INT_1, INT_0)
|
||||
|
||||
#define _mm256_loadu_si256(M256IP_0) \
|
||||
({ \
|
||||
const __m256i *Ymm = (M256IP_0); \
|
||||
((struct PackedMayaliasIntyYmm *)Ymm)->Ymm; \
|
||||
})
|
||||
|
||||
#define _mm256_storeu_si256(M256IP_0, M256I_1) \
|
||||
({ \
|
||||
__m256i *Ymm = (M256IP_0); \
|
||||
((struct PackedMayaliasIntyYmm *)Ymm)->Ymm = M256I_1; \
|
||||
})
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_AVX2INTRIN_H_ */
|
51
libc/bits/avxintrin.h
Normal file
51
libc/bits/avxintrin.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
typedef float __m256 _Vector_size(32) mayalias;
|
||||
typedef double __m256d _Vector_size(32) mayalias;
|
||||
typedef long long __m256i _Vector_size(32) mayalias;
|
||||
|
||||
typedef float __m256_u _Vector_size(32) aligned(1) mayalias;
|
||||
typedef double __m256d_u _Vector_size(32) aligned(1) mayalias;
|
||||
typedef long long __m256i_u _Vector_size(32) aligned(1) mayalias;
|
||||
|
||||
typedef double __v4df _Vector_size(32);
|
||||
typedef float __v8sf _Vector_size(32);
|
||||
typedef long long __v4di _Vector_size(32);
|
||||
typedef unsigned long long __v4du _Vector_size(32);
|
||||
typedef int __v8si _Vector_size(32);
|
||||
typedef unsigned __v8su _Vector_size(32);
|
||||
typedef short __v16hi _Vector_size(32);
|
||||
typedef unsigned short __v16hu _Vector_size(32);
|
||||
typedef char __v32qi _Vector_size(32);
|
||||
typedef unsigned char __v32qu _Vector_size(32);
|
||||
|
||||
#define _mm256_setzero_ps() ((__m256)(__v8sf){0})
|
||||
#define _mm256_load_ps(FLOATPTR) (*(__m256 *)(FLOATPTR))
|
||||
#define _mm256_loadu_ps(FLOATPTR) (*(__m256_u *)(FLOATPTR))
|
||||
#define _mm256_store_ps(FLOATPTR, M256_0) \
|
||||
(*(__m256 *)(FLOATPTR) = (__m256)(M256_0))
|
||||
#define _mm256_storeu_ps(FLOATPTR, M256_0) \
|
||||
(*(__m256_u *)(FLOATPTR) = (__m256)(M256_0))
|
||||
#define _mm256_extractf128_ps(M256_0, INT_1) \
|
||||
((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(M256_0), \
|
||||
(int)(INT_1)))
|
||||
#define _mm256_insertf128_ps(M256_0, M128_1, IMM_2) \
|
||||
((__m256)__builtin_ia32_vinsertf128_ps256( \
|
||||
(__v8sf)(__m256)(M256_0), (__v4sf)(__m128)(M128_1), (int)(IMM_2)))
|
||||
|
||||
#ifdef __llvm__
|
||||
#define _mm256_castps128_ps256(M128_0) \
|
||||
((__m256)__builtin_shufflevector((__v4sf)(__m128)(M128_0), \
|
||||
(__v4sf)(__m128)(M128_0), 0, 1, 2, 3, -1, \
|
||||
-1, -1, -1))
|
||||
#else
|
||||
#define _mm256_castps128_ps256(M128_0) \
|
||||
((__m256)__builtin_ia32_ps256_ps((__v4sf)(__m128)(M128_0)))
|
||||
#endif
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_ */
|
42
libc/bits/bcd2i.S
Normal file
42
libc/bits/bcd2i.S
Normal file
|
@ -0,0 +1,42 @@
|
|||
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Converts binary-coded-decimal to integer.
|
||||
/
|
||||
/ @param rdi is the string copied into a word
|
||||
bcd2i: .leafprologue
|
||||
.profilable
|
||||
test %rdi,%rdi
|
||||
je 2f
|
||||
mov $1,%ecx
|
||||
xor %eax,%eax
|
||||
1: mov %edi,%edx
|
||||
and $15,%edx
|
||||
imul %rcx,%rdx
|
||||
add %rdx,%rax
|
||||
add %rcx,%rcx
|
||||
lea (%rcx,%rcx,4),%rcx
|
||||
shr $4,%rdi
|
||||
jne 1b
|
||||
ret
|
||||
2: xor %eax,%eax
|
||||
.leafepilogue
|
||||
.endfn bcd2i,globl
|
38
libc/bits/bcdadd.S
Normal file
38
libc/bits/bcdadd.S
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Performs addition on binary-coded decimals.
|
||||
bcdadd: .leafprologue
|
||||
.profilable
|
||||
lea 0x6666666(%rdi),%ecx
|
||||
xor %esi,%ecx
|
||||
lea (%rdi,%rsi),%eax
|
||||
add $0x6666666,%eax
|
||||
xor %eax,%ecx
|
||||
not %ecx
|
||||
and $0x11111110,%ecx
|
||||
mov %ecx,%edx
|
||||
shr $2,%edx
|
||||
shr $3,%ecx
|
||||
orl %edx,%ecx
|
||||
sub %ecx,%eax
|
||||
.leafepilogue
|
||||
.endfn bcdadd,globl
|
54
libc/bits/bcxcpy.S
Normal file
54
libc/bits/bcxcpy.S
Normal file
|
@ -0,0 +1,54 @@
|
|||
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
bcxcpy: push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
push %rbx
|
||||
xor %ebx,%ebx
|
||||
lea -64(%rbp),%rdx
|
||||
sub $24,%rsp
|
||||
3: lea (,%rbx,4),%ecx
|
||||
mov %rsi,%rax
|
||||
shr %cl,%rax
|
||||
and $15,%eax
|
||||
cmp $9,%eax
|
||||
lea 7(%rax),%ecx
|
||||
cmova %ecx,%eax
|
||||
add $48,%eax
|
||||
mov %al,(%rdx,%rbx)
|
||||
add $1,%rbx
|
||||
cmp $16,%rbx
|
||||
jne 3b
|
||||
mov %rdx,%rax
|
||||
lea -48(%rbp),%rcx
|
||||
lea 15(%rdi),%rdx
|
||||
4: movzbl (%rax),%ebx
|
||||
add $1,%rax
|
||||
sub $1,%rdx
|
||||
mov %bl,1(%rdx)
|
||||
cmp %rcx,%rax
|
||||
jne 4b
|
||||
add $24,%rsp
|
||||
pop %rbx
|
||||
pop %rbp
|
||||
ret
|
||||
.endfn bcxcpy,globl
|
28
libc/bits/bigword.h
Normal file
28
libc/bits/bigword.h
Normal file
|
@ -0,0 +1,28 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_BIGWORD_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_BIGWORD_H_
|
||||
|
||||
#if 0
|
||||
/**
|
||||
* Let BIGWORD be the the number of bytes in the largest cpu register
|
||||
* available within the instruction set architecture requirements chosen
|
||||
* at compile-time.
|
||||
*
|
||||
* In plainer terms, if you tune with flags like -mavx, you're not just
|
||||
* giving the compiler permission to generate code that's incompatible
|
||||
* with older computers; you're also asking Cosmopolitan to systemically
|
||||
* change alignment, vectoring, buffering, ABIs, memory allocation, etc.
|
||||
*/
|
||||
#endif
|
||||
#ifndef BIGWORD
|
||||
#if __AVX512F__ + 0
|
||||
#define BIGWORD 64
|
||||
#elif __AVX2__ + 0
|
||||
#define BIGWORD 32
|
||||
#elif __SSE2__ + 0
|
||||
#define BIGWORD 16
|
||||
#else
|
||||
#define BIGWORD __BIGGEST_ALIGNMENT__
|
||||
#endif
|
||||
#endif /*BIGWORD*/
|
||||
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_BIGWORD_H_ */
|
31
libc/bits/bitreverse16.S
Normal file
31
libc/bits/bitreverse16.S
Normal file
|
@ -0,0 +1,31 @@
|
|||
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
bitreverse16:
|
||||
push %rbx
|
||||
mov %edi,%eax
|
||||
mov $kReverseBits,%ebx
|
||||
xlat
|
||||
xchg %al,%ah
|
||||
xlat
|
||||
pop %rbx
|
||||
ret
|
||||
.endfn bitreverse16,globl
|
28
libc/bits/bitreverse32.c
Normal file
28
libc/bits/bitreverse32.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
|
||||
uint32_t(bitreverse32)(uint32_t x) {
|
||||
x = bswap_32(x);
|
||||
x = ((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1);
|
||||
x = ((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2);
|
||||
x = ((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4);
|
||||
return x;
|
||||
}
|
28
libc/bits/bitreverse64.c
Normal file
28
libc/bits/bitreverse64.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
|
||||
uint64_t bitreverse64(uint64_t x) {
|
||||
x = bswap_64(x);
|
||||
x = ((x & 0xaaaaaaaaaaaaaaaa) >> 1) | ((x & 0x5555555555555555) << 1);
|
||||
x = ((x & 0xcccccccccccccccc) >> 2) | ((x & 0x3333333333333333) << 2);
|
||||
x = ((x & 0xf0f0f0f0f0f0f0f0) >> 4) | ((x & 0x0f0f0f0f0f0f0f0f) << 4);
|
||||
return x;
|
||||
}
|
31
libc/bits/bitreverse8.S
Normal file
31
libc/bits/bitreverse8.S
Normal file
|
@ -0,0 +1,31 @@
|
|||
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
bitreverse8:
|
||||
.leafprologue
|
||||
.profilable
|
||||
push %rbx
|
||||
mov %edi,%eax
|
||||
mov $kReverseBits,%ebx
|
||||
xlat
|
||||
pop %rbx
|
||||
.leafepilogue
|
||||
.endfn bitreverse8,globl
|
531
libc/bits/bits.h
Normal file
531
libc/bits/bits.h
Normal file
|
@ -0,0 +1,531 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define CheckUnsigned(x) ((x) / !((typeof(x))(-1) < 0))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § bits ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
extern const bool kTrue;
|
||||
extern const bool kFalse;
|
||||
extern const uint8_t kReverseBits[256];
|
||||
|
||||
uint16_t bswap_16(uint16_t) pureconst;
|
||||
uint32_t bswap_32(uint32_t) pureconst;
|
||||
uint32_t bswap_64(uint32_t) pureconst;
|
||||
unsigned long popcount(unsigned long) pureconst;
|
||||
uint32_t gray(uint32_t) pureconst;
|
||||
uint32_t ungray(uint32_t) pureconst;
|
||||
unsigned bcdadd(unsigned, unsigned) pureconst;
|
||||
unsigned long bcd2i(unsigned long) pureconst;
|
||||
unsigned long i2bcd(unsigned long) pureconst;
|
||||
void bcxcpy(unsigned char (*)[16], unsigned long);
|
||||
int ffs(int) pureconst;
|
||||
int ffsl(long int) pureconst;
|
||||
int ffsll(long long int) pureconst;
|
||||
int fls(int) pureconst;
|
||||
int flsl(long int) pureconst;
|
||||
int flsll(long long int) pureconst;
|
||||
uint8_t bitreverse8(uint8_t) libcesque pureconst;
|
||||
uint16_t bitreverse16(uint16_t) libcesque pureconst;
|
||||
uint32_t bitreverse32(uint32_t) libcesque pureconst;
|
||||
uint64_t bitreverse64(uint64_t) libcesque pureconst;
|
||||
unsigned long roundup2pow(unsigned long) libcesque pureconst;
|
||||
unsigned long roundup2log(unsigned long) libcesque pureconst;
|
||||
unsigned long rounddown2pow(unsigned long) libcesque pureconst;
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § bits » no assembly required ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
/**
|
||||
* Undocumented incantations for ROR, ROL, and SAR.
|
||||
*/
|
||||
#define ROR(w, k) (CheckUnsigned(w) >> (k) | (w) << (sizeof(w) * 8 - (k)))
|
||||
#define ROL(w, k) ((w) << (k) | CheckUnsigned(w) >> (sizeof(w) * 8 - (k)))
|
||||
#define SAR(w, k) (((w) & ~(~0u >> (k))) | ((w) >> ((k) & (sizeof(w) * 8 - 1))))
|
||||
|
||||
#define bitreverse8(X) (kReverseBits[(X)&0xff])
|
||||
#define bitreverse16(X) \
|
||||
((uint16_t)kReverseBits[(X)&0xff] << 010 | \
|
||||
kReverseBits[((uint16_t)(X) >> 010) & 0xff])
|
||||
|
||||
#ifndef __GNUC__
|
||||
#define READ16LE(P) ((unsigned)(P)[1] << 010 | (unsigned)(P)[0])
|
||||
#define READ32LE(P) \
|
||||
((unsigned long)(P)[3] << 030 | (unsigned long)(P)[2] << 020 | \
|
||||
(unsigned long)(P)[1] << 010 | (unsigned long)(P)[0])
|
||||
#define READ64LE(P) \
|
||||
((unsigned long long)(P)[3] << 030 | (unsigned long)(P)[2] << 020 | \
|
||||
(unsigned long long)(P)[1] << 010 | (unsigned long)(P)[0])
|
||||
#else
|
||||
#define READ16LE(P) read16le(P)
|
||||
#define READ32LE(P) read32le(P)
|
||||
#define READ64LE(P) read64le(P)
|
||||
#define read16le(P) \
|
||||
({ \
|
||||
const unsigned char *Pu = (const unsigned char *)(P); \
|
||||
(uint16_t) Pu[1] << 010 | (uint16_t)Pu[0]; \
|
||||
})
|
||||
#define read32le(P) \
|
||||
({ \
|
||||
const unsigned char *Pu = (const unsigned char *)(P); \
|
||||
((uint32_t)Pu[3] << 030 | (uint32_t)Pu[2] << 020 | \
|
||||
(uint32_t)Pu[1] << 010 | (uint32_t)Pu[0] << 000); \
|
||||
})
|
||||
#define read64le(P) \
|
||||
({ \
|
||||
const unsigned char *Pu = (const unsigned char *)(P); \
|
||||
((uint64_t)Pu[7] << 070 | (uint64_t)Pu[6] << 060 | \
|
||||
(uint64_t)Pu[5] << 050 | (uint64_t)Pu[4] << 040 | \
|
||||
(uint64_t)Pu[3] << 030 | (uint64_t)Pu[2] << 020 | \
|
||||
(uint64_t)Pu[1] << 010 | (uint64_t)Pu[0] << 000); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#define WRITE16LE(P, V) \
|
||||
do { \
|
||||
uint8_t *Ple = (P); \
|
||||
uint16_t Vle = (V); \
|
||||
Ple[0] = (uint8_t)(Vle >> 000); \
|
||||
Ple[1] = (uint8_t)(Vle >> 010); \
|
||||
} while (0)
|
||||
|
||||
#define WRITE32LE(P, V) \
|
||||
do { \
|
||||
uint8_t *Ple = (P); \
|
||||
uint32_t Vle = (V); \
|
||||
Ple[0] = (uint8_t)(Vle >> 000); \
|
||||
Ple[1] = (uint8_t)(Vle >> 010); \
|
||||
Ple[2] = (uint8_t)(Vle >> 020); \
|
||||
Ple[3] = (uint8_t)(Vle >> 030); \
|
||||
} while (0)
|
||||
|
||||
#define WRITE64LE(P, V) \
|
||||
do { \
|
||||
uint8_t *Ple = (P); \
|
||||
uint64_t Vle = (V); \
|
||||
Ple[0] = (uint8_t)(Vle >> 000); \
|
||||
Ple[1] = (uint8_t)(Vle >> 010); \
|
||||
Ple[2] = (uint8_t)(Vle >> 020); \
|
||||
Ple[3] = (uint8_t)(Vle >> 030); \
|
||||
Ple[4] = (uint8_t)(Vle >> 040); \
|
||||
Ple[5] = (uint8_t)(Vle >> 050); \
|
||||
Ple[6] = (uint8_t)(Vle >> 060); \
|
||||
Ple[7] = (uint8_t)(Vle >> 070); \
|
||||
} while (0)
|
||||
|
||||
/* TODO(jart): these ones aren't coded correctly */
|
||||
#define read128le(P) ((uint128_t)read64le((P) + 8) << 0100 | read64le(P))
|
||||
#define read16be(P) ((uint16_t)(*(P) << 010) | (uint16_t)(*((P) + 1)))
|
||||
#define read32be(P) ((uint32_t)read16be(P) << 020 | (uint32_t)read16be((P) + 2))
|
||||
#define read64be(P) ((uint64_t)read32be(P) << 040 | read32be((P) + 4))
|
||||
#define read128be(P) ((uint128_t)read64be(P) << 0100 | read64be((P) + 8))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § bits » some assembly required ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
/**
|
||||
* Constraints for virtual machine flags.
|
||||
* @note we beseech clang devs for flag constraints
|
||||
*/
|
||||
#ifdef __GCC_ASM_FLAG_OUTPUTS__ /* GCC6+ CLANG10+ */
|
||||
#define CF "=@ccc"
|
||||
#define CFLAG(OP) OP
|
||||
#define ZF "=@ccz"
|
||||
#define ZFLAG(OP) OP
|
||||
#define OF "=@cco"
|
||||
#define OFLAG(OP) OP
|
||||
#define SF "=@ccs"
|
||||
#define SFLAG(SP) SP
|
||||
#define ABOVEF "=@cca" /* i.e. !ZF && !CF */
|
||||
#define ABOVEFLAG(OP) OP
|
||||
#else
|
||||
#define CF "=q"
|
||||
#define CFLAG(OP) OP "\n\tsetc\t%b0"
|
||||
#define ZF "=q"
|
||||
#define ZFLAG(OP) OP "\n\tsetz\t%b0"
|
||||
#define OF "=q"
|
||||
#define OFLAG(OP) OP "\n\tseto\t%b0"
|
||||
#define SF "=q"
|
||||
#define SFLAG(SP) OP "\n\tsets\t%b0"
|
||||
#define ABOVEF "=@cca"
|
||||
#define ABOVEFLAG(OP) OP "\n\tseta\t%b0"
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Reads scalar from memory, offset by segment.
|
||||
*
|
||||
* @return *(MEM) relative to segment
|
||||
* @see arch_prctl()
|
||||
* @see pushpop()
|
||||
*/
|
||||
#define fs(MEM) __peek("fs", MEM)
|
||||
#define gs(MEM) __peek("gs", MEM)
|
||||
|
||||
/**
|
||||
* Reads scalar from memory w/ one operation.
|
||||
*
|
||||
* @param MEM is alignas(𝑘) uint𝑘_t[hasatleast 1] where 𝑘 ∈ {8,16,32,64}
|
||||
* @return *(MEM)
|
||||
* @note defeats compiler load tearing optimizations
|
||||
* @note alignas(𝑘) is implied if compiler knows type
|
||||
* @note alignas(𝑘) only avoids multi-core / cross-page edge cases
|
||||
* @see Intel's Six-Thousand Page Manual V.3A §8.2.3.1
|
||||
* @see atomic_store()
|
||||
*/
|
||||
#define atomic_load(MEM) \
|
||||
({ \
|
||||
autotype(MEM) Mem = (MEM); \
|
||||
typeof(*Mem) Reg; \
|
||||
asm("mov\t%1,%0" : "=r"(Reg) : "m"(*Mem)); \
|
||||
Reg; \
|
||||
})
|
||||
|
||||
/**
|
||||
* Saves scalar to memory w/ one operation.
|
||||
*
|
||||
* This is guaranteed to happen in either one or zero operations,
|
||||
* depending on whether or not it's possible for *(MEM) to be read
|
||||
* afterwards. This macro only forbids compiler from using >1 ops.
|
||||
*
|
||||
* @param MEM is alignas(𝑘) uint𝑘_t[hasatleast 1] where 𝑘 ∈ {8,16,32,64}
|
||||
* @param VAL is uint𝑘_t w/ better encoding for immediates (constexpr)
|
||||
* @return VAL
|
||||
* @note alignas(𝑘) on nexgen32e only needed for end of page gotcha
|
||||
* @note alignas(𝑘) is implied if compiler knows type
|
||||
* @note needed to defeat store tearing optimizations
|
||||
* @see Intel Six-Thousand Page Manual Manual V.3A §8.2.3.1
|
||||
* @see atomic_load()
|
||||
*/
|
||||
#define atomic_store(MEM, VAL) \
|
||||
({ \
|
||||
autotype(VAL) Val = (VAL); \
|
||||
typeof(&Val) Mem = (MEM); \
|
||||
asm("mov%z1\t%1,%0" : "=m,m"(*Mem) : "i,r"(Val)); \
|
||||
Val; \
|
||||
})
|
||||
|
||||
/**
|
||||
* Returns true if bit is set in memory.
|
||||
*
|
||||
* This is a generically-typed Bitset<T> ∀ RAM. This macro is intended
|
||||
* to be container-like with optimal machine instruction encoding, cf.
|
||||
* machine-agnostic container abstractions. Memory accesses are words.
|
||||
* Register allocation can be avoided if BIT is known. Be careful when
|
||||
* casting character arrays since that should cause a page fault.
|
||||
*
|
||||
* @param MEM is uint𝑘_t[] where 𝑘 ∈ {16,32,64} base address
|
||||
* @param BIT ∈ [-(2**(𝑘-1)),2**(𝑘-1)) is zero-based index
|
||||
* @return true if bit is set, otherwise false
|
||||
* @see Intel's Six Thousand Page Manual V.2A 3-113
|
||||
* @see bts(), btr(), btc()
|
||||
*/
|
||||
#define bt(MEM, BIT) \
|
||||
({ \
|
||||
bool OldBit; \
|
||||
if (isconstant(BIT)) { \
|
||||
asm(CFLAG("bt%z1\t%2,%1") \
|
||||
: CF(OldBit) \
|
||||
: "m"((MEM)[(BIT) / (sizeof((MEM)[0]) * CHAR_BIT)]), \
|
||||
"J"((BIT) % (sizeof((MEM)[0]) * CHAR_BIT)) \
|
||||
: "cc"); \
|
||||
} else if (sizeof((MEM)[0]) == 2) { \
|
||||
asm(CFLAG("bt\t%w2,%1") : CF(OldBit) : "m"((MEM)[0]), "r"(BIT) : "cc"); \
|
||||
} else if (sizeof((MEM)[0]) == 4) { \
|
||||
asm(CFLAG("bt\t%k2,%1") : CF(OldBit) : "m"((MEM)[0]), "r"(BIT) : "cc"); \
|
||||
} else if (sizeof((MEM)[0]) == 8) { \
|
||||
asm(CFLAG("bt\t%q2,%1") : CF(OldBit) : "m"((MEM)[0]), "r"(BIT) : "cc"); \
|
||||
} \
|
||||
OldBit; \
|
||||
})
|
||||
|
||||
#define bts(MEM, BIT) __BitOp("bts", BIT, MEM) /** bit test and set */
|
||||
#define btr(MEM, BIT) __BitOp("btr", BIT, MEM) /** bit test and reset */
|
||||
#define btc(MEM, BIT) __BitOp("btc", BIT, MEM) /** bit test and complement */
|
||||
#define lockbts(MEM, BIT) __BitOp("lock bts", BIT, MEM)
|
||||
#define lockbtr(MEM, BIT) __BitOp("lock btr", BIT, MEM)
|
||||
#define lockbtc(MEM, BIT) __BitOp("lock btc", BIT, MEM)
|
||||
|
||||
#define lockinc(MEM) __ArithmeticOp1("lock inc", MEM)
|
||||
#define lockdec(MEM) __ArithmeticOp1("lock dec", MEM)
|
||||
#define locknot(MEM) __ArithmeticOp1("lock not", MEM)
|
||||
#define lockneg(MEM) __ArithmeticOp1("lock neg", MEM)
|
||||
|
||||
#define lockaddeq(MEM, VAL) __ArithmeticOp2("lock add", VAL, MEM)
|
||||
#define locksubeq(MEM, VAL) __ArithmeticOp2("lock sub", VAL, MEM)
|
||||
#define lockxoreq(MEM, VAL) __ArithmeticOp2("lock xor", VAL, MEM)
|
||||
#define lockandeq(MEM, VAL) __ArithmeticOp2("lock and", VAL, MEM)
|
||||
#define lockoreq(MEM, VAL) __ArithmeticOp2("lock or", VAL, MEM)
|
||||
|
||||
/**
|
||||
* Exchanges *MEMORY into *LOCALVAR w/ one operation.
|
||||
*
|
||||
* @param MEMORY is uint𝑘_t[hasatleast 1] where 𝑘 ∈ {8,16,32,64}
|
||||
* @param LOCALVAR is uint𝑘_t[hasatleast 1]
|
||||
* @return LOCALVAR[0]
|
||||
* @see xchg()
|
||||
*/
|
||||
#define lockxchg(MEMORY, LOCALVAR) \
|
||||
({ \
|
||||
static_assert(typescompatible(typeof(*(MEMORY)), typeof(*(LOCALVAR)))); \
|
||||
asm("xchg\t%0,%1" : "+%m"(*(MEMORY)), "+r"(*(LOCALVAR))); \
|
||||
*(LOCALVAR); \
|
||||
})
|
||||
|
||||
/**
|
||||
* Compares and exchanges.
|
||||
*
|
||||
* @param IFTHING is uint𝑘_t[hasatleast 1] where 𝑘 ∈ {8,16,32,64}
|
||||
* @return true if value was exchanged, otherwise false
|
||||
* @see lockcmpxchg()
|
||||
*/
|
||||
#define cmpxchg(IFTHING, ISEQUALTOME, REPLACEITWITHME) \
|
||||
({ \
|
||||
bool DidIt; \
|
||||
asm(ZFLAG("cmpxchg\t%3,%1") \
|
||||
: ZF(DidIt), "+m"(*(IFTHING)), "+a"(*(ISEQUALTOME)) \
|
||||
: "r"((typeof(*(IFTHING)))(REPLACEITWITHME)) \
|
||||
: "cc"); \
|
||||
DidIt; \
|
||||
})
|
||||
|
||||
#define ezcmpxchg(IFTHING, ISEQUALTOME, REPLACEITWITHME) \
|
||||
({ \
|
||||
bool DidIt; \
|
||||
autotype(IFTHING) IfThing = (IFTHING); \
|
||||
typeof(*IfThing) IsEqualToMe = (ISEQUALTOME); \
|
||||
typeof(*IfThing) ReplaceItWithMe = (REPLACEITWITHME); \
|
||||
asm(ZFLAG("cmpxchg\t%3,%1") \
|
||||
: ZF(DidIt), "+m"(*IfThing), "+a"(IsEqualToMe) \
|
||||
: "r"(ReplaceItWithMe) \
|
||||
: "cc"); \
|
||||
DidIt; \
|
||||
})
|
||||
|
||||
/**
|
||||
* Compares and exchanges w/ one operation.
|
||||
*
|
||||
* @param IFTHING is uint𝑘_t[hasatleast 1] where 𝑘 ∈ {8,16,32,64}
|
||||
* @return true if value was exchanged, otherwise false
|
||||
* @see lockcmpxchg()
|
||||
*/
|
||||
#define lockcmpxchg(IFTHING, ISEQUALTOME, REPLACEITWITHME) \
|
||||
({ \
|
||||
bool DidIt; \
|
||||
asm(ZFLAG("lock cmpxchg\t%3,%1") \
|
||||
: ZF(DidIt), "+m"(*(IFTHING)), "+a"(*(ISEQUALTOME)) \
|
||||
: "r"((typeof(*(IFTHING)))(REPLACEITWITHME)) \
|
||||
: "cc"); \
|
||||
DidIt; \
|
||||
})
|
||||
|
||||
/**
|
||||
* Gets value of extended control register.
|
||||
*/
|
||||
#define xgetbv(xcr_register_num) \
|
||||
({ \
|
||||
unsigned hi, lo; \
|
||||
asm("xgetbv" : "=d"(hi), "=a"(lo) : "c"(cr_register_num)); \
|
||||
(uint64_t) hi << 32 | lo; \
|
||||
})
|
||||
|
||||
/**
|
||||
* Reads model-specific register.
|
||||
* @note programs running as guests won't have authorization
|
||||
*/
|
||||
#define rdmsr(msr) \
|
||||
({ \
|
||||
uint32_t lo, hi; \
|
||||
asm volatile("rdmsr" : "=a"(lo), "=d"(hi) : "c"(msr)); \
|
||||
(uint64_t) hi << 32 | lo; \
|
||||
})
|
||||
|
||||
/**
|
||||
* Writes model-specific register.
|
||||
* @note programs running as guests won't have authorization
|
||||
*/
|
||||
#define wrmsr(msr, val) \
|
||||
do { \
|
||||
uint64_t val_ = (val); \
|
||||
asm volatile("wrmsr" \
|
||||
: /* no outputs */ \
|
||||
: "c"(msr), "a"((uint32_t)val_), \
|
||||
"d"((uint32_t)(val_ >> 32))); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Tells CPU page tables changed for virtual address.
|
||||
* @note programs running as guests won't have authorization
|
||||
*/
|
||||
#define invlpg(MEM) \
|
||||
asm volatile("invlpg\t(%0)" : /* no outputs */ : "r"(MEM) : "memory")
|
||||
|
||||
/**
|
||||
* Teleports code fragment inside _init().
|
||||
*/
|
||||
#define INITIALIZER(PRI, NAME, CODE) \
|
||||
asm(".pushsection .init." #PRI "." #NAME ",\"ax\",@progbits\n\t" \
|
||||
"call\t" #NAME "\n\t" \
|
||||
".popsection"); \
|
||||
textstartup optimizesize void NAME(char *rdi, const char *rsi) { \
|
||||
CODE; \
|
||||
asm volatile("" : /* no outputs */ : "D"(rdi), "S"(rsi)); \
|
||||
}
|
||||
|
||||
#ifndef __STRICT_ANSI__
|
||||
#if __PIC__ + __code_model_medium__ + __code_model_large__ + 0 > 1
|
||||
#define __EZLEA(SYMBOL) "lea\t" SYMBOL "(%%rip),%"
|
||||
#else
|
||||
#define __EZLEA(SYMBOL) "mov\t$" SYMBOL ",%k"
|
||||
#endif
|
||||
#define weaken(symbol) ((const typeof(&(symbol)))weakaddr(#symbol))
|
||||
#define strongaddr(symbolstr) \
|
||||
({ \
|
||||
intptr_t waddr; \
|
||||
asm(__EZLEA(symbolstr) "0" : "=r"(waddr)); \
|
||||
waddr; \
|
||||
})
|
||||
#define weakaddr(symbolstr) \
|
||||
({ \
|
||||
intptr_t waddr; \
|
||||
asm(".weak\t" symbolstr "\n\t" __EZLEA(symbolstr) "0" : "=r"(waddr)); \
|
||||
waddr; \
|
||||
})
|
||||
#else
|
||||
#define weaken(symbol) symbol
|
||||
#define weakaddr(symbolstr) &(symbolstr)
|
||||
#endif
|
||||
|
||||
#define slowcall(fn, arg1, arg2, arg3, arg4, arg5, arg6) \
|
||||
({ \
|
||||
void *ax; \
|
||||
asm volatile("push\t%7\n\t" \
|
||||
"push\t%6\n\t" \
|
||||
"push\t%5\n\t" \
|
||||
"push\t%4\n\t" \
|
||||
"push\t%3\n\t" \
|
||||
"push\t%2\n\t" \
|
||||
"push\t%1\n\t" \
|
||||
"call\tslowcall" \
|
||||
: "=a"(ax) \
|
||||
: "g"(fn), "g"(arg1), "g"(arg2), "g"(arg3), "g"(arg4), \
|
||||
"g"(arg5), "g"(arg6) \
|
||||
: "memory"); \
|
||||
ax; \
|
||||
})
|
||||
|
||||
#define IsAddressCanonicalForm(P) \
|
||||
({ \
|
||||
intptr_t p2 = (intptr_t)(P); \
|
||||
(0xffff800000000000l <= p2 && p2 <= 0x00007fffffffffffl); \
|
||||
})
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § bits » optimizations ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||
|
||||
#define popcount(X) (isconstant(X) ? __builtin_popcount(X) : __popcount(X))
|
||||
#define popcount$nehalem(X) \
|
||||
({ \
|
||||
typeof(X) BitCount; \
|
||||
asm("popcnt\t%1,%0" : "=r,r"(BitCount) : "r,m"(X) : "cc"); \
|
||||
BitCount; \
|
||||
})
|
||||
#ifdef __POPCNT__
|
||||
#define __popcount(X) popcount$nehalem(X)
|
||||
#else
|
||||
#define __popcount(X) (popcount)(X)
|
||||
#endif
|
||||
|
||||
#define bswap_16(U16) \
|
||||
(isconstant(U16) ? ((((U16)&0xff00) >> 010) | (((U16)&0x00ff) << 010)) : ({ \
|
||||
uint16_t Swapped16, Werd16 = (U16); \
|
||||
asm("xchg\t%b0,%h0" : "=Q"(Swapped16) : "0"(Werd16)); \
|
||||
Swapped16; \
|
||||
}))
|
||||
|
||||
#define bswap_32(U32) \
|
||||
(isconstant(U32) \
|
||||
? ((((U32)&0xff000000) >> 030) | (((U32)&0x000000ff) << 030) | \
|
||||
(((U32)&0x00ff0000) >> 010) | (((U32)&0x0000ff00) << 010)) \
|
||||
: ({ \
|
||||
uint32_t Swapped32, Werd32 = (U32); \
|
||||
asm("bswap\t%0" : "=r"(Swapped32) : "0"(Werd32)); \
|
||||
Swapped32; \
|
||||
}))
|
||||
|
||||
#define bswap_64(U64) \
|
||||
(isconstant(U64) ? ((((U64)&0xff00000000000000ul) >> 070) | \
|
||||
(((U64)&0x00000000000000fful) << 070) | \
|
||||
(((U64)&0x00ff000000000000ul) >> 050) | \
|
||||
(((U64)&0x000000000000ff00ul) << 050) | \
|
||||
(((U64)&0x0000ff0000000000ul) >> 030) | \
|
||||
(((U64)&0x0000000000ff0000ul) << 030) | \
|
||||
(((U64)&0x000000ff00000000ul) >> 010) | \
|
||||
(((U64)&0x00000000ff000000ul) << 010)) \
|
||||
: ({ \
|
||||
uint64_t Swapped64, Werd64 = (U64); \
|
||||
asm("bswap\t%0" : "=r"(Swapped64) : "0"(Werd64)); \
|
||||
Swapped64; \
|
||||
}))
|
||||
|
||||
#endif /* defined(__GNUC__) && !defined(__STRICT_ANSI__) */
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § bits » implementation details ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define __peek(SEGMENT, ADDRESS) \
|
||||
({ \
|
||||
typeof(*(ADDRESS)) Pk; \
|
||||
asm("mov\t%%" SEGMENT ":%1,%0" : "=r"(Pk) : "m"(*(ADDRESS))); \
|
||||
Pk; \
|
||||
})
|
||||
|
||||
#define __ArithmeticOp1(OP, MEM) \
|
||||
({ \
|
||||
asm(OP "%z0\t%0" : "+m"(*(MEM)) : /* no inputs */ : "cc"); \
|
||||
MEM; \
|
||||
})
|
||||
|
||||
#define __ArithmeticOp2(OP, VAL, MEM) \
|
||||
({ \
|
||||
asm(OP "%z0\t%1,%0" : "+m,m"(*(MEM)) : "i,r"(VAL) : "cc"); \
|
||||
MEM; \
|
||||
})
|
||||
|
||||
#define __BitOp(OP, BIT, MEM) \
|
||||
({ \
|
||||
bool OldBit; \
|
||||
if (isconstant(BIT)) { \
|
||||
asm(CFLAG(OP "%z1\t%2,%1") \
|
||||
: CF(OldBit), "+m"((MEM)[(BIT) / (sizeof((MEM)[0]) * CHAR_BIT)]) \
|
||||
: "J"((BIT) % (sizeof((MEM)[0]) * CHAR_BIT)) \
|
||||
: "cc"); \
|
||||
} else if (sizeof((MEM)[0]) == 2) { \
|
||||
asm(CFLAG(OP "\t%w2,%1") \
|
||||
: CF(OldBit), "+m"((MEM)[0]) \
|
||||
: "r"(BIT) \
|
||||
: "cc"); \
|
||||
} else if (sizeof((MEM)[0]) == 4) { \
|
||||
asm(CFLAG(OP "\t%k2,%1") \
|
||||
: CF(OldBit), "+m"((MEM)[0]) \
|
||||
: "r"(BIT) \
|
||||
: "cc"); \
|
||||
} else if (sizeof((MEM)[0]) == 8) { \
|
||||
asm(CFLAG(OP "\t%q2,%1") \
|
||||
: CF(OldBit), "+m"((MEM)[0]) \
|
||||
: "r"(BIT) \
|
||||
: "cc"); \
|
||||
} \
|
||||
OldBit; \
|
||||
})
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_H_ */
|
53
libc/bits/bits.mk
Normal file
53
libc/bits/bits.mk
Normal file
|
@ -0,0 +1,53 @@
|
|||
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
|
||||
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
|
||||
|
||||
PKGS += LIBC_BITS
|
||||
|
||||
LIBC_BITS_ARTIFACTS += LIBC_BITS_A
|
||||
LIBC_BITS = $(LIBC_BITS_A_DEPS) $(LIBC_BITS_A)
|
||||
LIBC_BITS_A = o/$(MODE)/libc/bits/bits.a
|
||||
LIBC_BITS_A_FILES := $(wildcard libc/bits/*)
|
||||
LIBC_BITS_A_HDRS = $(filter %.h,$(LIBC_BITS_A_FILES))
|
||||
LIBC_BITS_A_SRCS_S = $(filter %.S,$(LIBC_BITS_A_FILES))
|
||||
LIBC_BITS_A_SRCS_C = $(filter %.c,$(LIBC_BITS_A_FILES))
|
||||
|
||||
LIBC_BITS_A_SRCS = \
|
||||
$(LIBC_BITS_A_SRCS_S) \
|
||||
$(LIBC_BITS_A_SRCS_C)
|
||||
|
||||
LIBC_BITS_A_OBJS = \
|
||||
$(LIBC_BITS_A_SRCS:%=o/$(MODE)/%.zip.o) \
|
||||
$(LIBC_BITS_A_SRCS_S:%.S=o/$(MODE)/%.o) \
|
||||
$(LIBC_BITS_A_SRCS_C:%.c=o/$(MODE)/%.o)
|
||||
|
||||
LIBC_BITS_A_CHECKS = \
|
||||
$(LIBC_BITS_A).pkg \
|
||||
$(LIBC_BITS_A_HDRS:%=o/$(MODE)/%.ok)
|
||||
|
||||
LIBC_BITS_A_DIRECTDEPS = \
|
||||
LIBC_STUBS \
|
||||
LIBC_NEXGEN32E
|
||||
|
||||
LIBC_BITS_A_DEPS := \
|
||||
$(call uniq,$(foreach x,$(LIBC_BITS_A_DIRECTDEPS),$($(x))))
|
||||
|
||||
$(LIBC_BITS_A): libc/bits/ \
|
||||
$(LIBC_BITS_A).pkg \
|
||||
$(LIBC_BITS_A_OBJS)
|
||||
|
||||
$(LIBC_BITS_A).pkg: \
|
||||
$(LIBC_BITS_A_OBJS) \
|
||||
$(foreach x,$(LIBC_BITS_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
#o/$(MODE)/libc/bits/bsf.o: CC = clang-10
|
||||
#o/$(MODE)/libc/bits/bsf.o: CC = /opt/cross9cc/bin/x86_64-linux-musl-cc
|
||||
|
||||
LIBC_BITS_LIBS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)))
|
||||
LIBC_BITS_SRCS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_SRCS))
|
||||
LIBC_BITS_HDRS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_HDRS))
|
||||
LIBC_BITS_CHECKS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_CHECKS))
|
||||
LIBC_BITS_OBJS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_OBJS))
|
||||
$(LIBC_BITS_OBJS): $(BUILD_FILES) libc/bits/bits.mk
|
||||
|
||||
.PHONY: o/$(MODE)/libc/bits
|
||||
o/$(MODE)/libc/bits: $(LIBC_BITS_CHECKS)
|
217
libc/bits/emmintrin.h
Normal file
217
libc/bits/emmintrin.h
Normal file
|
@ -0,0 +1,217 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_
|
||||
#include "libc/bits/progn.h"
|
||||
#include "libc/bits/xmmintrin.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § sse2 ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
typedef char __v16qi _Vector_size(16);
|
||||
typedef unsigned char __v16qu _Vector_size(16);
|
||||
typedef signed char __v16qs _Vector_size(16);
|
||||
|
||||
typedef short __v8hi _Vector_size(16);
|
||||
typedef unsigned short __v8hu _Vector_size(16);
|
||||
|
||||
typedef double __v2df _Vector_size(16);
|
||||
typedef double __m128d _Vector_size(16) aligned(16);
|
||||
typedef double __m128d_u _Vector_size(16) aligned(1);
|
||||
|
||||
typedef long long __v2di _Vector_size(16);
|
||||
typedef long long __m128i _Vector_size(16) aligned(16);
|
||||
typedef long long __m128i_u _Vector_size(16) aligned(1);
|
||||
typedef unsigned long long __v2du _Vector_size(16);
|
||||
|
||||
struct thatispacked mayalias __usi128ma {
|
||||
__m128i_u __v;
|
||||
};
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § sse2 » memory ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_loadu_si128(M128IP) ((struct __usi128ma *)(M128IP))->__v
|
||||
#define _mm_storeu_si128(M128IP, M128I) \
|
||||
(((struct __usi128ma *)(M128IP))->__v = (M128I))
|
||||
|
||||
#define _mm_set_epi8(I8_15, I8_14, I8_13, I8_12, I8_11, I8_10, I8_9, I8_8, \
|
||||
I8_7, I8_6, I8_5, I8_4, I8_3, I8_2, I8_1, I8_0) \
|
||||
((__m128i)(__v16qi){I8_0, I8_1, I8_2, I8_3, I8_4, I8_5, I8_6, I8_7, I8_8, \
|
||||
I8_9, I8_10, I8_11, I8_12, I8_13, I8_14, I8_15})
|
||||
#define _mm_set_epi16(I16_7, I16_6, I16_5, I16_4, I16_3, I16_2, I16_1, I16_0) \
|
||||
((__m128i)(__v8hi){I16_0, I16_1, I16_2, I16_3, I16_4, I16_5, I16_6, I16_7})
|
||||
#define _mm_set_epi32(I32_3, I32_2, I32_1, I32_0) \
|
||||
((__m128i)(__v4si){I32_0, I32_1, I32_2, I32_3})
|
||||
#define _mm_set_epi64x(I64_1, I64_0) ((__m128i)(__v2di){I64_0, I64_1})
|
||||
|
||||
#define _mm_setr_epi8(I8_15, I8_14, I8_13, I8_12, I8_11, I8_10, I8_9, I8_8, \
|
||||
I8_7, I8_6, I8_5, I8_4, I8_3, I8_2, I8_1, I8_0) \
|
||||
_mm_set_epi8(I8_0, I8_1, I8_2, I8_3, I8_4, I8_5, I8_6, I8_7, I8_8, I8_9, \
|
||||
I8_10, I8_11, I8_12, I8_13, I8_14, I8_15)
|
||||
#define _mm_setr_epi16(I16_7, I16_6, I16_5, I16_4, I16_3, I16_2, I16_1, I16_0) \
|
||||
_mm_set_epi16(I16_0, I16_1, I16_2, I16_3, I16_4, I16_5, I16_6, I16_7)
|
||||
#define _mm_setr_epi32(I32_3, I32_2, I32_1, I32_0) \
|
||||
_mm_set_epi32(I32_0, I32_1, I32_2, I32_3)
|
||||
#define _mm_setr_epi64x(I64_1, I64_0) _mm_set_epi64x(I64_0, I64_1)
|
||||
|
||||
#define _mm_set1_epi8(I8) \
|
||||
_mm_set_epi8(I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8)
|
||||
#define _mm_set1_epi16(I16) \
|
||||
_mm_set_epi16(I16, I16, I16, I16, I16, I16, I16, I16)
|
||||
#define _mm_set1_epi32(I32) _mm_set_epi32(I32, I32, I32, I32)
|
||||
#define _mm_set1_epi64x(I64) _mm_set_epi64x(I64, I64)
|
||||
|
||||
#define _mm_cvtsi128_si32(M128I) ((__v4si)(M128I))[0]
|
||||
#define _mm_cvtsi32_si128(I32) ((__m128i)(__v4si){(I32), 0, 0, 0})
|
||||
#define _mm_setzero_si128() ((__m128i)(__v2di){0LL, 0LL})
|
||||
#define _mm_castsi128_ps(M128I) ((__m128)(M128I))
|
||||
#define _mm_castps_si128(M128) ((__m128i)(M128))
|
||||
#define _mm_load_si128(M128I) (*(M128I))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § sse2 » simd ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_and_si128(M128I_0, M128I_1) \
|
||||
((__m128i)((__v2du)(M128I_0) & (__v2du)(M128I_1)))
|
||||
#define _mm_or_si128(M128I_0, M128I_1) \
|
||||
((__m128i)((__v2du)(M128I_0) | (__v2du)(M128I_1)))
|
||||
#define _mm_xor_si128(M128I_0, M128I_1) \
|
||||
((__m128i)((__v2du)(M128I_0) ^ (__v2du)(M128I_1)))
|
||||
#define _mm_andnot_si128(M128I_0, M128I_1) \
|
||||
((__m128i)(~(__v2du)(M128I_0) & (__v2du)(M128I_1)))
|
||||
|
||||
#define _mm_add_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) + (__v2df)(M128D_1))
|
||||
#define _mm_sub_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) - (__v2df)(M128D_1))
|
||||
#define _mm_mul_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) * (__v2df)(M128D_1))
|
||||
#define _mm_div_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) / (__v2df)(M128D_1))
|
||||
#define _mm_and_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) & (__v2df)(M128D_1))
|
||||
#define _mm_or_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) | (__v2df)(M128D_1))
|
||||
#define _mm_xor_pd(M128D_0, M128D_1) \
|
||||
(__m128d)((__v2df)(M128D_0) ^ (__v2df)(M128D_1))
|
||||
#define _mm_andnot_pd(M128D_0, M128D_1) \
|
||||
(__m128d)(~(__v2df)(M128D_0) & (__v2df)(M128D_1))
|
||||
#define _mm_sqrt_pd(M128D) __builtin_ia32_sqrtpd((__v2df)(M128D))
|
||||
|
||||
#define _mm_min_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_minpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_max_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_maxpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpeq_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpeqpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpneq_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpneqpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmplt_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpltpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpnlt_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnltpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmple_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmplepd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpnle_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnlepd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpgt_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpltpd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpngt_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnltpd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpge_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmplepd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpnge_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnlepd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpord_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpordpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpunord_pd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpunordpd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
|
||||
#define _mm_sad_epu8(M128I_0, M128I_1) \
|
||||
__builtin_ia32_psadbw128((__v16qi)(M128I_0), (__v16qi)(M128I_1))
|
||||
|
||||
#define _mm_subs_epi8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psubsb128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
#define _mm_subs_epu8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psubusw128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
#define _mm_subs_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psubsw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
#define _mm_subs_epu16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psubusw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_add_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)((__v4su)(M128I_0) + (__v4su)(M128I_1)))
|
||||
#define _mm_sub_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)((__v4su)(M128I_0) - (__v4su)(M128I_1)))
|
||||
#define _mm_madd_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pmaddwd128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
#define _mm_shuffle_epi32(V, IMM) \
|
||||
((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(V), (int)(IMM)))
|
||||
|
||||
#define _mm_slli_epi32(M128I, COUNT) \
|
||||
((__m128i)__builtin_ia32_pslldi128((__v4si)(M128I), (COUNT)))
|
||||
|
||||
#define _mm_slli_si128(M128I, IMM) \
|
||||
((__m128i)__builtin_ia32_pslldqi128((__v2di)(__m128i)(M128I), (int)(IMM)*8))
|
||||
#define _mm_srli_si128(M128I, IMM) \
|
||||
((__m128i)__builtin_ia32_psrldqi128((__v2di)(__m128i)(M128I), (int)(IMM)*8))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § sse2 » scalar ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_sqrt_sd(M128D_0, M128D_1) \
|
||||
({ \
|
||||
__m128d M128d2 = __builtin_ia32_sqrtsd((__v2df)(M128D_1)); \
|
||||
(__m128d){M128d2[0], (M128D_0)[1]}; \
|
||||
})
|
||||
|
||||
#define _mm_add_sd(M128D_0, M128D_1) \
|
||||
PROGN((M128D_0)[0] += (M128D_1)[0], (M128D_0))
|
||||
#define _mm_sub_sd(M128D_0, M128D_1) \
|
||||
PROGN((M128D_0)[0] -= (M128D_1)[0], (M128D_0))
|
||||
#define _mm_mul_sd(M128D_0, M128D_1) \
|
||||
PROGN((M128D_0)[0] *= (M128D_1)[0], (M128D_0))
|
||||
#define _mm_div_sd(M128D_0, M128D_1) \
|
||||
PROGN((M128D_0)[0] /= (M128D_1)[0], (M128D_0))
|
||||
|
||||
#define _mm_min_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_minsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_max_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_maxsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpeq_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpeqsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpneq_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpneqsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmplt_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpltsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpnlt_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnltsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmple_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmplesd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpnle_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnlesd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpgt_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpltsd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpngt_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnltsd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpge_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmplesd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpnge_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpnlesd((__v2df)(M128D_1), (__v2df)(M128D_0))
|
||||
#define _mm_cmpord_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpordsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
#define _mm_cmpunord_sd(M128D_0, M128D_1) \
|
||||
__builtin_ia32_cmpunordsd((__v2df)(M128D_0), (__v2df)(M128D_1))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § sse2 » miscellaneous ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_pause() asm("rep nop")
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_ */
|
21
libc/bits/emptytonull.c
Normal file
21
libc/bits/emptytonull.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
|
||||
const char *emptytonull(const char *s) { return s && !*s ? 0 : s; }
|
26
libc/bits/firstnonnull.c
Normal file
26
libc/bits/firstnonnull.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/runtime/runtime.h"
|
||||
|
||||
const char *(firstnonnull)(const char *a, const char *b) {
|
||||
if (a) return a;
|
||||
if (b) return b;
|
||||
abort();
|
||||
}
|
30
libc/bits/gray.c
Normal file
30
libc/bits/gray.c
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
|
||||
uint32_t gray(uint32_t x) { return x ^ (x >> 1); }
|
||||
|
||||
uint32_t ungray(uint32_t x) {
|
||||
x ^= x >> 16;
|
||||
x ^= x >> 8;
|
||||
x ^= x >> 4;
|
||||
x ^= x >> 2;
|
||||
x ^= x >> 1;
|
||||
return x;
|
||||
}
|
24
libc/bits/hamming.c
Normal file
24
libc/bits/hamming.c
Normal file
|
@ -0,0 +1,24 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
|
||||
unsigned long(hamming)(unsigned long x, unsigned long y) {
|
||||
return popcount(x ^ y);
|
||||
}
|
75
libc/bits/hilbert.c
Normal file
75
libc/bits/hilbert.c
Normal file
|
@ -0,0 +1,75 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/hilbert.h"
|
||||
|
||||
static axdx_t RotateQuadrant(long n, long y, long x, long ry, long rx) {
|
||||
long t;
|
||||
if (ry == 0) {
|
||||
if (rx == 1) {
|
||||
y = n - 1 - y;
|
||||
x = n - 1 - x;
|
||||
}
|
||||
t = x;
|
||||
x = y;
|
||||
y = t;
|
||||
}
|
||||
return (axdx_t){y, x};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates Hilbert space-filling curve.
|
||||
*
|
||||
* @see morton()
|
||||
*/
|
||||
long hilbert(long n, long y, long x) {
|
||||
axdx_t m;
|
||||
long d, s, ry, rx;
|
||||
d = 0;
|
||||
for (s = n / 2; s > 0; s /= 2) {
|
||||
rx = (x & s) > 0;
|
||||
ry = (y & s) > 0;
|
||||
d += s * s * ((3 * rx) ^ ry);
|
||||
m = RotateQuadrant(n, y, x, ry, rx);
|
||||
x = m.dx;
|
||||
y = m.ax;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes Hilbert space-filling curve.
|
||||
*
|
||||
* @see unmorton()
|
||||
*/
|
||||
axdx_t unhilbert(long n, long i) {
|
||||
axdx_t m;
|
||||
long s, t, y, x, ry, rx;
|
||||
t = i;
|
||||
x = y = 0;
|
||||
for (s = 1; s < n; s *= 2) {
|
||||
rx = (t / 2) & 1;
|
||||
ry = (t ^ rx) & 1;
|
||||
m = RotateQuadrant(s, y, x, ry, rx);
|
||||
x = m.dx + s * rx;
|
||||
y = m.ax + s * ry;
|
||||
t /= 4;
|
||||
}
|
||||
return (axdx_t){y, x};
|
||||
}
|
11
libc/bits/hilbert.h
Normal file
11
libc/bits/hilbert.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_HILBERT_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_HILBERT_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
long hilbert(long, long, long) pureconst;
|
||||
axdx_t unhilbert(long, long) pureconst;
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_HILBERT_H_ */
|
48
libc/bits/i2bcd.S
Normal file
48
libc/bits/i2bcd.S
Normal file
|
@ -0,0 +1,48 @@
|
|||
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Converts integer to binary-coded decimal.
|
||||
i2bcd: .leafprologue
|
||||
.profilable
|
||||
test %rdi,%rdi
|
||||
je 2f
|
||||
mov %rdi,%rsi
|
||||
xor %r9d,%r9d
|
||||
mov $0xcccccccccccccccd,%r8
|
||||
xor %ecx,%ecx
|
||||
1: mov %rsi,%rax
|
||||
mul %r8
|
||||
shr $3,%rdx
|
||||
lea (%rdx,%rdx),%rax
|
||||
lea (%rax,%rax,4),%rax
|
||||
mov %rsi,%rdi
|
||||
sub %rax,%rdi
|
||||
shl %cl,%rdi
|
||||
add %rdi,%r9
|
||||
add $4,%rcx
|
||||
cmp $9,%rsi
|
||||
mov %rdx,%rsi
|
||||
ja 1b
|
||||
jmp 3f
|
||||
2: xor %r9d,%r9d
|
||||
3: mov %r9,%rax
|
||||
.leafepilogue
|
||||
.endfn i2bcd,globl
|
21
libc/bits/isempty.c
Normal file
21
libc/bits/isempty.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
|
||||
bool isempty(const char *s) { return !s || !*s; }
|
22
libc/bits/max.c
Normal file
22
libc/bits/max.c
Normal file
|
@ -0,0 +1,22 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
long(max)(long x, long y) { return MAX(x, y); }
|
22
libc/bits/min.c
Normal file
22
libc/bits/min.c
Normal file
|
@ -0,0 +1,22 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
long(min)(long x, long y) { return MIN(x, y); }
|
14
libc/bits/mmintrin.h
Normal file
14
libc/bits/mmintrin.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_MMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_MMINTRIN_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
typedef long long __m64 _Vector_size(8);
|
||||
typedef float __v2sf _Vector_size(8);
|
||||
typedef int __v2si _Vector_size(8);
|
||||
typedef short __v4hi _Vector_size(8);
|
||||
typedef char __v8qi _Vector_size(8);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_MMINTRIN_H_ */
|
37
libc/bits/morton.c
Normal file
37
libc/bits/morton.c
Normal file
|
@ -0,0 +1,37 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/morton.h"
|
||||
|
||||
/**
|
||||
* Interleaves bits.
|
||||
*/
|
||||
unsigned long(morton)(unsigned long y, unsigned long x) {
|
||||
x = (x | x << 020) & 0x0000FFFF0000FFFF;
|
||||
x = (x | x << 010) & 0x00FF00FF00FF00FF;
|
||||
x = (x | x << 004) & 0x0F0F0F0F0F0F0F0F;
|
||||
x = (x | x << 002) & 0x3333333333333333;
|
||||
x = (x | x << 001) & 0x5555555555555555;
|
||||
y = (y | y << 020) & 0x0000FFFF0000FFFF;
|
||||
y = (y | y << 010) & 0x00FF00FF00FF00FF;
|
||||
y = (y | y << 004) & 0x0F0F0F0F0F0F0F0F;
|
||||
y = (y | y << 002) & 0x3333333333333333;
|
||||
y = (y | y << 001) & 0x5555555555555555;
|
||||
return x | y << 1;
|
||||
}
|
25
libc/bits/morton.h
Normal file
25
libc/bits/morton.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_MORTON_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_MORTON_H_
|
||||
#include "libc/intrin/pdep.h"
|
||||
#include "libc/intrin/pext.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
unsigned long morton(unsigned long, unsigned long) libcesque pureconst;
|
||||
axdx_t unmorton(unsigned long) libcesque pureconst;
|
||||
|
||||
#ifndef __STRICT_ANSI__
|
||||
#define morton(Y, X) \
|
||||
(X86_NEED(BMI2) \
|
||||
? pdep(X, 0x5555555555555555ul) | pdep(Y, 0xAAAAAAAAAAAAAAAAul) \
|
||||
: morton(Y, X))
|
||||
#define unmorton(I) \
|
||||
(X86_NEED(BMI2) ? (axdx_t){pext(I, 0xAAAAAAAAAAAAAAAAul), \
|
||||
pext(I, 0x5555555555555555ul)} \
|
||||
: unmorton(I))
|
||||
#endif
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_MORTON_H_ */
|
21
libc/bits/nulltoempty.c
Normal file
21
libc/bits/nulltoempty.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
|
||||
const char *nulltoempty(const char *s) { return s ? s : ""; }
|
14
libc/bits/pmmintrin.h
Normal file
14
libc/bits/pmmintrin.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § sse3 ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_hadd_ps(M128_0, M128_1) \
|
||||
((__m128)__builtin_ia32_haddps((__v4sf)(__m128)(M128_0), \
|
||||
(__v4sf)(__m128)(M128_0)))
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_ */
|
42
libc/bits/popcount.c
Normal file
42
libc/bits/popcount.c
Normal file
|
@ -0,0 +1,42 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
|
||||
static noinline uint32_t popcount$swar32(uint32_t x) {
|
||||
x -= (x >> 1) & 0x55555555;
|
||||
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
|
||||
return (((x + (x >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
|
||||
}
|
||||
|
||||
unsigned long(popcount)(unsigned long x) {
|
||||
size_t i;
|
||||
unsigned long r;
|
||||
if (X86_HAVE(POPCNT)) {
|
||||
return popcount$nehalem(x);
|
||||
} else {
|
||||
r = 0;
|
||||
for (i = 0; i < sizeof(x); i += 4) {
|
||||
r |= popcount$swar32(x);
|
||||
x >>= 32;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
}
|
15
libc/bits/progn.h
Normal file
15
libc/bits/progn.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_PROGN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_PROGN_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/**
|
||||
* Evaluates args, returning value of last one.
|
||||
*
|
||||
* This API comes from LISP.
|
||||
*/
|
||||
#define PROGN(...) ({ __VA_ARGS__; })
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_PROGN_H_ */
|
53
libc/bits/pushpop.h
Normal file
53
libc/bits/pushpop.h
Normal file
|
@ -0,0 +1,53 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_PUSHPOP_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_PUSHPOP_H_
|
||||
#include "libc/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/**
|
||||
* PushPop
|
||||
* An elegant weapon for a more civilized age.
|
||||
*/
|
||||
#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
|
||||
#define pushpop(x) (x)
|
||||
#else
|
||||
#define pushpop(x) \
|
||||
({ \
|
||||
typeof(x) Popped; \
|
||||
if (isconstant(x) && (TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
|
||||
: (intptr_t)(x) < 128)) { \
|
||||
if (x) { \
|
||||
asm("push\t%1\n\t" \
|
||||
"pop\t%q0" \
|
||||
: "=r"(Popped) \
|
||||
: "ir"(x)); \
|
||||
} else { \
|
||||
asm("xor\t%k0,%k0" : "=r"(Popped)); \
|
||||
} \
|
||||
} else { \
|
||||
asm("" : "=r"(Popped) : "0"(x)); \
|
||||
} \
|
||||
Popped; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
|
||||
#define pushmov(d, x) ((d) = (x))
|
||||
#else
|
||||
#define pushmov(d, x) \
|
||||
({ \
|
||||
typeof(*(d)) Popped = (x); \
|
||||
if (isconstant(x) && (TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
|
||||
: (intptr_t)(x) < 128)) { \
|
||||
asm("pushq\t%1\n\t" \
|
||||
"popq\t%0" \
|
||||
: "=m"(*(d)) \
|
||||
: "ir"(Popped)); \
|
||||
} else { \
|
||||
*(d) = Popped; \
|
||||
} \
|
||||
Popped; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_PUSHPOP_H_ */
|
22
libc/bits/rounddown.c
Normal file
22
libc/bits/rounddown.c
Normal file
|
@ -0,0 +1,22 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
long(rounddown)(long w, long k) { return ROUNDDOWN(w, k); }
|
31
libc/bits/rounddown2pow.c
Normal file
31
libc/bits/rounddown2pow.c
Normal file
|
@ -0,0 +1,31 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/nexgen32e/bsr.h"
|
||||
|
||||
/**
|
||||
* Returns 𝑥 rounded down to previous two power.
|
||||
*
|
||||
* @define (𝑥>0→2^⌊log₂𝑥⌋, x=0→0, 𝑇→⊥)
|
||||
* @see roundup2pow()
|
||||
*/
|
||||
unsigned long rounddown2pow(unsigned long x) {
|
||||
return x ? 1ul << bsrl(x) : 0;
|
||||
}
|
22
libc/bits/roundup.c
Normal file
22
libc/bits/roundup.c
Normal file
|
@ -0,0 +1,22 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
long(roundup)(long w, long k) { return ROUNDUP(w, k); }
|
29
libc/bits/roundup2log.c
Normal file
29
libc/bits/roundup2log.c
Normal file
|
@ -0,0 +1,29 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/nexgen32e/bsr.h"
|
||||
|
||||
/**
|
||||
* Returns 𝑥 rounded up to next two power and log'd.
|
||||
* @see roundup2pow
|
||||
*/
|
||||
unsigned long roundup2log(unsigned long x) {
|
||||
return x > 1 ? (bsrl(x - 1) + 1) : x ? 1 : 0;
|
||||
}
|
31
libc/bits/roundup2pow.c
Normal file
31
libc/bits/roundup2pow.c
Normal file
|
@ -0,0 +1,31 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/nexgen32e/bsr.h"
|
||||
|
||||
/**
|
||||
* Returns 𝑥 rounded up to next two power.
|
||||
*
|
||||
* @define (𝑥>0→2^⌈log₂x⌉, x=0→0, 𝑇→⊥)
|
||||
* @see rounddown2pow)()
|
||||
*/
|
||||
unsigned long roundup2pow(unsigned long x) {
|
||||
return x > 1 ? 1ul << (bsrl(x - 1) + 1) : x ? 1 : 0;
|
||||
}
|
86
libc/bits/safemacros.h
Normal file
86
libc/bits/safemacros.h
Normal file
|
@ -0,0 +1,86 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_SAFEMACROS_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_SAFEMACROS_H_
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
long min(long x, long y);
|
||||
long max(long x, long y);
|
||||
long roundup(long w, long k);
|
||||
long rounddown(long w, long k);
|
||||
bool isempty(const char *s);
|
||||
const char *nulltoempty(const char *s);
|
||||
const char *emptytonull(const char *s);
|
||||
const char *firstnonnull(const char *a, const char *b);
|
||||
uint64_t(unsignedsubtract)(uint64_t x, uint64_t y) pureconst;
|
||||
|
||||
#if !defined(__STRICT_ANSI__) && defined(__GNUC__)
|
||||
|
||||
#define min(x, y) \
|
||||
({ \
|
||||
autotype(x) MinX = (x); \
|
||||
autotype(y) MinY = (y); \
|
||||
MinX < MinY ? MinX : MinY; \
|
||||
})
|
||||
|
||||
#define max(x, y) \
|
||||
({ \
|
||||
autotype(x) MaxX = (x); \
|
||||
autotype(y) MaxY = (y); \
|
||||
MaxX > MaxY ? MaxX : MaxY; \
|
||||
})
|
||||
|
||||
#define roundup(x, k) \
|
||||
({ \
|
||||
autotype(x) RoundupX = (x); \
|
||||
autotype(k) RoundupK = (k); \
|
||||
ROUNDUP(RoundupX, RoundupK); \
|
||||
})
|
||||
|
||||
#define rounddown(x, k) \
|
||||
({ \
|
||||
autotype(x) RounddownX = (x); \
|
||||
autotype(k) RounddownK = (k); \
|
||||
ROUNDDOWN(RounddownX, RounddownK); \
|
||||
})
|
||||
|
||||
#define isempty(s) \
|
||||
({ \
|
||||
autotype(s) IsEmptyS = (s); \
|
||||
!IsEmptyS || !(*IsEmptyS); \
|
||||
})
|
||||
|
||||
#define nulltoempty(s) \
|
||||
({ \
|
||||
autotype(s) NullToEmptyS = (s); \
|
||||
NullToEmptyS ? NullToEmptyS : ""; \
|
||||
})
|
||||
|
||||
#define firstnonnull(a, b) \
|
||||
({ \
|
||||
autotype(a) FirstNonNullA = (a); \
|
||||
autotype(a) FirstNonNullB = (b); \
|
||||
if (!FirstNonNullA && !FirstNonNullB) abort(); \
|
||||
FirstNonNullA ? FirstNonNullA : FirstNonNullB; \
|
||||
})
|
||||
|
||||
#define emptytonull(s) \
|
||||
({ \
|
||||
autotype(s) EmptyToNullS = (s); \
|
||||
EmptyToNullS && !(*EmptyToNullS) ? NULL : EmptyToNullS; \
|
||||
})
|
||||
|
||||
#define unsignedsubtract(a, b) \
|
||||
({ \
|
||||
uint64_t UnsubA = (a); \
|
||||
uint64_t UnsubB = (b); \
|
||||
UnsubA >= UnsubB ? UnsubA - UnsubB : ~UnsubB + UnsubA + 1; \
|
||||
})
|
||||
|
||||
#endif /* GNU && !ANSI */
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_SAFEMACROS_H_ */
|
37
libc/bits/shaintrin.h
Normal file
37
libc/bits/shaintrin.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_SHAINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_SHAINTRIN_H_
|
||||
#include "libc/bits/emmintrin.h"
|
||||
#include "libc/bits/xmmintrin.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#define _mm_sha1rnds4_epu32(M128I_0, M128I_1, MEM) \
|
||||
__builtin_ia32_sha1rnds4((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1), (MEM))
|
||||
|
||||
#define _mm_sha1nexte_epu32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_sha1nexte((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1)))
|
||||
|
||||
#define _mm_sha1msg1_epu32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_sha1msg1((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1)))
|
||||
|
||||
#define _mm_sha1msg2_epu32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_sha1msg2((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1)))
|
||||
|
||||
#define _mm_sha256rnds2_epu32(M128I_0, M128I_1, M128I_2) \
|
||||
((__m128i)__builtin_ia32_sha256rnds2((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1), \
|
||||
(__v4si)(__m128i)(M128I_2)))
|
||||
|
||||
#define _mm_sha256msg1_epu32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_sha256msg1((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1)))
|
||||
|
||||
#define _mm_sha256msg2_epu32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_sha256msg2((__v4si)(__m128i)(M128I_0), \
|
||||
(__v4si)(__m128i)(M128I_1)))
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_SHAINTRIN_H_ */
|
31
libc/bits/smmintrin.h
Normal file
31
libc/bits/smmintrin.h
Normal file
|
@ -0,0 +1,31 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_SMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_SMMINTRIN_H_
|
||||
|
||||
/**
|
||||
* @fileoverview SSE4 intrinsics.
|
||||
*/
|
||||
|
||||
#define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_CUR_DIRECTION 4
|
||||
#define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
|
||||
#define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_NO_EXC 8
|
||||
#define _MM_FROUND_RAISE_EXC 0
|
||||
#define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_TO_NEAREST_INT 0
|
||||
#define _MM_FROUND_TO_NEG_INF 1
|
||||
#define _MM_FROUND_TO_POS_INF 2
|
||||
#define _MM_FROUND_TO_ZERO 3
|
||||
#define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
|
||||
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#define _mm_extract_epi32(M128I, I32) \
|
||||
((int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(M128I), (int)(I32)))
|
||||
|
||||
#define _mm_minpos_epu16(M128I) \
|
||||
((int)__builtin_ia32_phminposuw128((__v4si)(__m128i)(M128I), (int)(I32)))
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_SMMINTRIN_H_ */
|
17
libc/bits/tmmintrin.h
Normal file
17
libc/bits/tmmintrin.h
Normal file
|
@ -0,0 +1,17 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_TMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_TMMINTRIN_H_
|
||||
#include "libc/bits/emmintrin.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § ssse3 ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_maddubs_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pmaddubsw128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
|
||||
#define _mm_shuffle_epi8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pshufb128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_TMMINTRIN_H_ */
|
14
libc/bits/typecheck.h
Normal file
14
libc/bits/typecheck.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_TYPECHECK_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_TYPECHECK_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#define TYPECHECK(T, X) \
|
||||
({ \
|
||||
T Lol1; \
|
||||
typeof(X) Lol2; \
|
||||
(void)(&Lol1 == &Lol2); \
|
||||
X; \
|
||||
})
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_TYPECHECK_H_ */
|
41
libc/bits/unmorton.c
Normal file
41
libc/bits/unmorton.c
Normal file
|
@ -0,0 +1,41 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/morton.h"
|
||||
|
||||
static unsigned long GetOddBits(unsigned long x) {
|
||||
x = (x | x >> 000) & 0x5555555555555555;
|
||||
x = (x | x >> 001) & 0x3333333333333333;
|
||||
x = (x | x >> 002) & 0x0F0F0F0F0F0F0F0F;
|
||||
x = (x | x >> 004) & 0x00FF00FF00FF00FF;
|
||||
x = (x | x >> 010) & 0x0000FFFF0000FFFF;
|
||||
x = (x | x >> 020) & 0x00000000FFFFFFFF;
|
||||
return x;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deinterleaves bits.
|
||||
*
|
||||
* @param 𝑖 is interleaved index
|
||||
* @return deinterleaved coordinate {ax := 𝑦, dx := 𝑥}
|
||||
* @see en.wikipedia.org/wiki/Z-order_curve
|
||||
*/
|
||||
axdx_t(unmorton)(unsigned long i) {
|
||||
return (axdx_t){GetOddBits(i >> 1), GetOddBits(i)};
|
||||
}
|
27
libc/bits/unsignedsubtract.c
Normal file
27
libc/bits/unsignedsubtract.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/safemacros.h"
|
||||
|
||||
/**
|
||||
* Subtracts unsigned integers w/ wraparound.
|
||||
*/
|
||||
uint64_t(unsignedsubtract)(uint64_t x, uint64_t y) {
|
||||
return unsignedsubtract(x, y);
|
||||
}
|
29
libc/bits/wmmintrin.h
Normal file
29
libc/bits/wmmintrin.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_WMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_WMMINTRIN_H_
|
||||
#include "libc/bits/emmintrin.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#define _mm_clmulepi64_si128(X, Y, IMM) \
|
||||
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \
|
||||
(__v2di)(__m128i)(Y), (char)(IMM)))
|
||||
|
||||
#define _mm_aesenc_si128(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_aesenc128((__v2di)(M128I_0), (__v2di)(M128I_1)))
|
||||
#define _mm_aesenclast_si128(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_aesenclast128((__v2di)(M128I_0), (__v2di)(M128I_1)))
|
||||
|
||||
#define _mm_aesdec_si128(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_aesdec128((__v2di)(M128I_0), (__v2di)(M128I_1)))
|
||||
#define _mm_aesdeclast_si128(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_aesdeclast128((__v2di)(M128I_0), (__v2di)(M128I_1)))
|
||||
|
||||
#define _mm_aesimc_si128(M128I) \
|
||||
((__m128i)__builtin_ia32_aesimc128((__v2di)(M128I)))
|
||||
#define _mm_aesimclast_si128(M128I) \
|
||||
((__m128i)__builtin_ia32_aesimclast128((__v2di)(M128I)))
|
||||
|
||||
#define _mm_aeskeygenassist_si128(X, Y) \
|
||||
((__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(X), (int)(Y)))
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_WMMINTRIN_H_ */
|
25
libc/bits/xchg.h
Normal file
25
libc/bits/xchg.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_XCHG_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_XCHG_H_
|
||||
#include "libc/str/str.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/**
|
||||
* Exchanges *MEMORY into *LOCALVAR.
|
||||
*
|
||||
* @return *MEMORY
|
||||
* @see lockcmpxchg()
|
||||
* todo(jart): what's the point of this?
|
||||
*/
|
||||
#define xchg(MEMORY, LOCALVAR) \
|
||||
({ \
|
||||
autotype(MEMORY) Memory = (MEMORY); \
|
||||
typeof(Memory) LocalVar = (LOCALVAR); \
|
||||
typeof(*Memory) Temp; \
|
||||
memcpy(&Temp, Memory, sizeof(Temp)); \
|
||||
memcpy(Memory, LocalVar, sizeof(Temp)); \
|
||||
memcpy(LocalVar, &Temp, sizeof(Temp)); \
|
||||
Temp; \
|
||||
})
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_XCHG_H_ */
|
233
libc/bits/xmmintrin.h
Normal file
233
libc/bits/xmmintrin.h
Normal file
|
@ -0,0 +1,233 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_
|
||||
#include "libc/bits/emmintrin.h"
|
||||
#include "libc/bits/mmintrin.h"
|
||||
#include "libc/bits/progn.h"
|
||||
#include "libc/dce.h"
|
||||
|
||||
#define _MM_EXCEPT_MASK 0x003f
|
||||
#define _MM_EXCEPT_INVALID 0x0001
|
||||
#define _MM_EXCEPT_DENORM 0x0002
|
||||
#define _MM_EXCEPT_DIV_ZERO 0x0004
|
||||
#define _MM_EXCEPT_OVERFLOW 0x0008
|
||||
#define _MM_EXCEPT_UNDERFLOW 0x0010
|
||||
#define _MM_EXCEPT_INEXACT 0x0020
|
||||
#define _MM_MASK_MASK 0x1f80
|
||||
#define _MM_MASK_INVALID 0x0080
|
||||
#define _MM_MASK_DENORM 0x0100
|
||||
#define _MM_MASK_DIV_ZERO 0x0200
|
||||
#define _MM_MASK_OVERFLOW 0x0400
|
||||
#define _MM_MASK_UNDERFLOW 0x0800
|
||||
#define _MM_MASK_INEXACT 0x1000
|
||||
#define _MM_ROUND_MASK 0x6000
|
||||
#define _MM_ROUND_NEAREST 0x0000
|
||||
#define _MM_ROUND_DOWN 0x2000
|
||||
#define _MM_ROUND_UP 0x4000
|
||||
#define _MM_ROUND_TOWARD_ZERO 0x6000
|
||||
#define _MM_FLUSH_ZERO_MASK 0x8000
|
||||
#define _MM_FLUSH_ZERO_ON 0x8000
|
||||
#define _MM_FLUSH_ZERO_OFF 0x0000
|
||||
|
||||
#define _MM_SHUFFLE(A, B, C, D) (((A) << 6) | ((B) << 4) | ((C) << 2) | (D))
|
||||
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
typedef int __v4si _Vector_size(16);
|
||||
typedef unsigned int __v4su _Vector_size(16);
|
||||
typedef float __v4sf _Vector_size(16);
|
||||
typedef float __m128 _Vector_size(16) aligned(16) mayalias;
|
||||
typedef float __m128_u _Vector_size(16) aligned(1) mayalias;
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § sse » simd ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_add_ps(M128_0, M128_1) \
|
||||
((__m128)((__v4sf)(M128_0) + (__v4sf)(M128_1)))
|
||||
#define _mm_sub_ps(M128_0, M128_1) \
|
||||
((__m128)((__v4sf)(M128_0) - (__v4sf)(M128_1)))
|
||||
#define _mm_mul_ps(M128_0, M128_1) \
|
||||
((__m128)((__v4sf)(M128_0) * (__v4sf)(M128_1)))
|
||||
#define _mm_div_ps(M128_0, M128_1) \
|
||||
((__m128)((__v4sf)(M128_0) / (__v4sf)(M128_1)))
|
||||
#define _mm_and_ps(M128_0, M128_1) \
|
||||
((__m128)((__v4su)(M128_0) & (__v4su)(M128_1)))
|
||||
#define _mm_or_ps(M128_0, M128_1) \
|
||||
((__m128)((__v4su)(M128_0) | (__v4su)(M128_1)))
|
||||
#define _mm_xor_ps(M128_0, M128_1) /* XORPD [u32 simd xor] */ \
|
||||
((__m128)((__v4su)(M128_0) ^ (__v4su)(M128_1)))
|
||||
#define _mm_andnot_ps(M128_0, M128_1) /* ANDNPS [u32 simd nand] */ \
|
||||
((__m128)(~(__v4su)(M128_0) & (__v4su)(M128_1)))
|
||||
#define _mm_rcp_ps(M128) __builtin_ia32_rcpps((__v4sf)(M128))
|
||||
#define _mm_sqrt_ps(M128) __builtin_ia32_sqrtps((__v4sf)(M128))
|
||||
#define _mm_rsqrt_ps(M128) __builtin_ia32_rsqrtps((__v4sf)(M128))
|
||||
|
||||
#define _mm_min_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_minps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_max_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_maxps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_min_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_minss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_max_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_maxss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpeq_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpeqps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpneq_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpneqps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmplt_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpltps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpnlt_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnltps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmple_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpleps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpnle_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnleps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpgt_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpltps((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpngt_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnltps((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpge_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpleps((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpnge_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnleps((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpord_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpordps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpunord_ps(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpunordps((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § sse » scalar ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
forceinline __m128 _mm_add_ss(__m128 m128_0, __m128 m128_1) {
|
||||
m128_0[0] += m128_1[0];
|
||||
return m128_0;
|
||||
}
|
||||
|
||||
forceinline __m128 _mm_sub_ss(__m128 m128_0, __m128 m128_1) {
|
||||
m128_0[0] -= m128_1[0];
|
||||
return m128_0;
|
||||
}
|
||||
|
||||
forceinline __m128 _mm_mul_ss(__m128 m128_0, __m128 m128_1) {
|
||||
m128_0[0] *= m128_1[0];
|
||||
return m128_0;
|
||||
}
|
||||
|
||||
forceinline __m128 _mm_div_ss(__m128 m128_0, __m128 m128_1) {
|
||||
m128_0[0] /= m128_1[0];
|
||||
return m128_0;
|
||||
}
|
||||
|
||||
#define _mm_rcp_ss(M128) __builtin_ia32_rcpss((__v4sf)(M128)) /*~1/x*/
|
||||
#define _mm_sqrt_ss(M128) __builtin_ia32_sqrtss((__v4sf)(M128)) /*sqrt𝑥*/
|
||||
#define _mm_rsqrt_ss(M128) __builtin_ia32_rsqrtss((__v4sf)(M128)) /*~1/sqrt𝑥*/
|
||||
|
||||
#define _mm_min_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_minss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_max_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_maxss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpeq_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpeqss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpneq_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpneqss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmplt_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpltss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpnlt_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnltss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmple_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpless((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpnle_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnless((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpgt_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpltss((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpngt_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnltss((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpge_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpless((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpnge_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpnless((__v4sf)(M128_1), (__v4sf)(M128_0))
|
||||
#define _mm_cmpord_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpordss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
#define _mm_cmpunord_ss(M128_0, M128_1) \
|
||||
__builtin_ia32_cmpunordss((__v4sf)(M128_0), (__v4sf)(M128_1))
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § sse » memory ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_set1_ps(M128_0) ((__m128)(__v4sf){M128_0, M128_0, M128_0, M128_0})
|
||||
#define _mm_setzero_ps() ((__m128)(__v4sf){0})
|
||||
#define _mm_cvtss_f32(M128_0) (((__v4sf)(M128_0))[0])
|
||||
#define _mm_load_ps(FLOATPTR) (*(__m128 *)(FLOATPTR))
|
||||
#define _mm_loadu_ps(FLOATPTR) (*(__m128_u *)(FLOATPTR))
|
||||
#define _mm_set_ps(WHO, DESIGNED, THIS, SHEESH) \
|
||||
((__m128)(__v4sf){SHEESH, THIS, DESIGNED, WHO})
|
||||
#define _mm_set_ss(FLOAT) ((__m128)(__v4sf){FLOAT, 0, 0, 0})
|
||||
#define _mm_load_ss(FLOATPTR) _mm_set_ss(*(FLOATPTR))
|
||||
#define _mm_store_ss(FLOATPTR, M128_0) ((FLOATPTR)[0] = ((__v4sf)(M128_0))[0])
|
||||
#define _mm_store_ps(FLOATPTR, M128_0) (*(__m128 *)(FLOATPTR) = (M128_0))
|
||||
#define _mm_storeu_ps(FLOATPTR, M128_0) (*(__m128_u *)(FLOATPTR) = (M128_0))
|
||||
#define _mm_shuffle_ps(M128_0, M128_1, MASK) \
|
||||
((__m128)__builtin_ia32_shufps((__v4sf)(M128_0), (__v4sf)(M128_1), (MASK)))
|
||||
|
||||
#ifdef __llvm__
|
||||
#define _mm_movehl_ps(M128_0, M128_1) \
|
||||
((__m128)__builtin_shufflevector((__v4sf)(__m128)(M128_0), \
|
||||
(__v4sf)(__m128)(M128_1), 6, 7, 2, 3))
|
||||
/* instrinsics unstable & constantly breaking, consider ansi c or asm. */
|
||||
/* each version of llvm has a different incompatible impl for this one */
|
||||
#else
|
||||
#define _mm_movehl_ps(M128_0, M128_1) \
|
||||
((__m128)__builtin_ia32_movhlps((__v4sf)(__m128)(M128_0), \
|
||||
(__v4sf)(__m128)(M128_1)))
|
||||
#define _mm_storel_pi(M64PTR, M128_0) \
|
||||
__builtin_ia32_storelps((__v2sf *)(M64PTR), (__v4sf)(M128_0))
|
||||
#endif
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § sse » cast ops ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_cvtps_epi32(M128_0) \
|
||||
((__m128i)__builtin_ia32_cvtps2dq((__v4sf)(M128_0)))
|
||||
|
||||
#ifdef __llvm__
|
||||
#define _mm_cvtepi32_ps(M128I_0) \
|
||||
((__m128) __builtin_convertvector((__v4si)(__m128i)(M128I_0), __v4sf))
|
||||
#else
|
||||
#define _mm_cvtepi32_ps(M128I_0) \
|
||||
((__m128)__builtin_ia32_cvtdq2ps((__v4si)(M128I_0)))
|
||||
#endif
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § sse » misc ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_getcsr() (__builtin_ia32_stmxcsr())
|
||||
#define _mm_setcsr(U32CONF) (__builtin_ia32_ldmxcsr(U32CONF))
|
||||
|
||||
#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
|
||||
#define _MM_SET_ROUNDING_MODE(MODE) \
|
||||
(_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (MODE)))
|
||||
|
||||
#define XMM_DESTROY(VAR) \
|
||||
do { \
|
||||
if (!IsTrustworthy()) { \
|
||||
asm volatile("xorps\t%1,%0" : "=x"(VAR) : "0"(VAR)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
** Ternary:
|
||||
**
|
||||
** Integer: _mm_or_si128(_mm_and_si128(a, cond), _mm_andnot_si128(cond, b))
|
||||
** 32-bit float: _mm_or_ps(_mm_and_ps(a, cond), _mm_andnot_ps(cond, b))
|
||||
** 64-bit float: _mm_or_pd(_mm_and_pd(a, cond), _mm_andnot_pd(cond, b))
|
||||
** Integer (SSE4.1+): _mm_blendv_epi8(a, b, cond)
|
||||
** 32-bit float (SSE4.1+): _mm_blendv_ps(a, b, cond)
|
||||
** 64-bit float (SSE4.1+): _mm_blendv_pd(a, b, cond)
|
||||
*/
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_ */
|
Loading…
Add table
Add a link
Reference in a new issue