Initial import

This commit is contained in:
Justine Tunney 2020-06-15 07:18:57 -07:00
commit c91b3c5006
14915 changed files with 590219 additions and 0 deletions

23
libc/bits/abs.c Normal file
View file

@ -0,0 +1,23 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/conv/conv.h"
#include "libc/macros.h"
int(abs)(int x) { return ABS(x); }

52
libc/bits/atomic.h Normal file
View file

@ -0,0 +1,52 @@
#ifndef COSMOPOLITAN_LIBC_BITS_ATOMIC_H_
#define COSMOPOLITAN_LIBC_BITS_ATOMIC_H_
#include "libc/bits/bits.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
/**
* @fileoverview C11 version of The Cosmopolitan Atomics Library.
*
* - Forty-two different ways to say MOV.
* - Fourteen different ways to say XCHG.
* - Twenty different ways to say LOCK CMPXCHG.
*
* Living proof high-level languages can be lower-level than assembly.
*/
#define memory_order int
#define memory_order_relaxed 0
#define memory_order_consume 1
#define memory_order_acquire 2
#define memory_order_release 3
#define memory_order_acq_rel 4
#define memory_order_seq_cst 5
#define atomic_flag struct AtomicFlag
#define atomic_flag_clear(PTR) atomic_store((PTR)->__cacheline, 0)
#define atomic_flag_test_and_set(PTR) \
({ \
uint32_t ax = 0; \
lockcmpxchg((PTR)->__cacheline, &ax, 1); \
})
#define atomic_init(PTR, VAL) atomic_store(PTR, VAL)
#define atomic_exchange(PTR, VAL) lockxchg(PTR, &(VAL))
#define atomic_compare_exchange_strong(X, Y, Z) lockcmpxchg(X, Y, Z)
#define atomic_compare_exchange_weak(X, Y, Z) lockcmpxchg(X, Y, Z)
#define atomic_load_explicit(PTR, ORDER) atomic_load(PTR)
#define atomic_store_explicit(PTR, VAL, ORDER) atomic_store(PTR, VAL)
#define atomic_flag_clear_explicit(PTR, ORDER) atomic_store(PTR, 0)
#define atomic_exchange_explicit(PTR, VAL, ORDER) lockxchg(PTR, &(VAL))
#define atomic_flag_test_and_set_explicit(PTR, ORDER) lockcmpxchg(PTR, 0, 1)
#define atomic_compare_exchange_strong_explicit(X, Y, Z, S, F) \
lockcmpxchg(X, Y, Z)
#define atomic_compare_exchange_weak_explicit(X, Y, Z, S, F) \
lockcmpxchg(X, Y, Z)
struct AtomicFlag {
uint32_t __cacheline[16]; /* Intel V.O §9.4.6 */
} aligned(64);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_ATOMIC_H_ */

133
libc/bits/avx2intrin.h Normal file
View file

@ -0,0 +1,133 @@
#ifndef COSMOPOLITAN_LIBC_BITS_AVX2INTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_AVX2INTRIN_H_
#include "libc/bits/avxintrin.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define _mm256_min_epi16(M256_0, M256_1) \
((__m256i)__builtin_ia32_minps((__v16hi)(M256_0), (__v16hi)(M256_1)))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § avx2 » simd ops
*/
#define _mm256_add_ps(M256_0, M256_1) \
((__m256)((__v8sf)(M256_0) + (__v8sf)(M256_1)))
#define _mm256_sub_ps(M256_0, M256_1) \
((__m256)((__v8sf)(M256_0) - (__v8sf)(M256_1)))
#define _mm256_mul_ps(M256_0, M256_1) \
((__m256)((__v8sf)(M256_0) * (__v8sf)(M256_1)))
#define _mm256_div_ps(M256_0, M256_1) \
((__m256)((__v8sf)(M256_0) / (__v8sf)(M256_1)))
#define _mm256_and_ps(M256_0, M256_1) \
((__m256)((__v8su)(M256_0) & (__v8su)(M256_1)))
#define _mm256_or_ps(M256_0, M256_1) \
((__m256)((__v8su)(M256_0) | (__v8su)(M256_1)))
#define _mm256_xor_ps(M256_0, M256_1) /* XORPD [u32 simd xor] */ \
((__m256)((__v8su)(M256_0) ^ (__v8su)(M256_1)))
#define _mm256_andnot_ps(M256_0, M256_1) /* ANDNPS [u32 simd nand] */ \
((__m256)(~(__v8su)(M256_0) & (__v8su)(M256_1)))
#define _mm256_rcp_ps(M256) __builtin_ia32_rcpps256((__v8sf)(M256))
#define _mm256_sqrt_ps(M256) __builtin_ia32_sqrtps256((__v8sf)(M256))
#define _mm256_rsqrt_ps(M256) __builtin_ia32_rsqrtps256((__v8sf)(M256))
#define _mm256_round_ps(M256, IMM) \
((__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(M256), IMM))
#define _mm256_add_epi32(M256I_0, M256I_1) \
((__m256i)((__v8su)(M256I_0) + (__v8su)(M256I_1)))
#define _mm256_cmpgt_epi32(M256I_0, M256I_1) \
((__m256i)((__v8si)(M256I_0) > (__v8si)(M256I_1)))
#define _mm256_min_epi32(M256I_0, M256I_1) \
((__m256i)__builtin_ia32_pminsd256((__v8si)(M256I_0), (__v8si)(M256I_1)))
#define _mm256_min_epu32(M256I_0, M256I_1) \
((__m256i)__builtin_ia32_pminud256((__v8si)(M256I_0), (__v8si)(M256I_1)))
#define _mm256_max_epi32(M256I_0, M256I_1) \
((__m256i)__builtin_ia32_pmaxsd256((__v8si)(M256I_0), (__v8si)(M256I_1)))
#define _mm256_max_epu32(M256I_0, M256I_1) \
((__m256i)__builtin_ia32_pmaxud256((__v8si)(M256I_0), (__v8si)(M256I_1)))
#define _mm256_blendv_epi8(M256I_0, M256I_1, M256I_2) \
((__m256i)__builtin_ia32_pblendvb256((__v32qi)(M256I_0), (__v32qi)(M256I_1), \
(__v32qi)(M256I_2)))
#define _mm256_min_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_minps256((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_max_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_maxps256((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmpneq_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpneqps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmplt_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpltps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmpnlt_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpnltps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmple_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpleps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmpnle_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpnleps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmpgt_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpltps((__v8sf)(__m256)(M256_1), \
(__v8sf)(__m256)(M256_0)))
#define _mm256_cmpngt_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpnltps((__v8sf)(__m256)(M256_1), \
(__v8sf)(__m256)(M256_0)))
#define _mm256_cmpge_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpleps((__v8sf)(__m256)(M256_1), \
(__v8sf)(__m256)(M256_0)))
#define _mm256_cmpnge_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpnleps((__v8sf)(__m256)(M256_1), \
(__v8sf)(__m256)(M256_0)))
#define _mm256_cmpord_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpordps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
#define _mm256_cmpunord_ps(M256_0, M256_1) \
((__m256)__builtin_ia32_cmpunordps((__v8sf)(__m256)(M256_0), \
(__v8sf)(__m256)(M256_1)))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § avx2 » memory ops
*/
struct thatispacked PackedMayaliasIntyYmm {
__m256i Ymm;
} mayalias;
#define _mm256_set_ps(FLT_0, FLT_1, FLT_2, FLT_3, FLT_4, FLT_5, FLT_6, FLT_7) \
((__m256)(__v8sf){(float)(FLT_0), (float)(FLT_1), (float)(FLT_2), \
(float)(FLT_3), (float)(FLT_4), (float)(FLT_5), \
(float)(FLT_6), (float)(FLT_7)})
#define _mm256_set1_ps(FLT_0) \
_mm256_set_ps(FLT_0, FLT_0, FLT_0, FLT_0, FLT_0, FLT_0, FLT_0, FLT_0)
#define _mm256_setr_ps(FLT_0, FLT_1, FLT_2, FLT_3, FLT_4, FLT_5, FLT_6, FLT_7) \
_mm256_set_ps(FLT_7, FLT_6, FLT_5, FLT_4, FLT_3, FLT_2, FLT_1, FLT_0)
#define _mm256_set_epi32(INT_0, INT_1, INT_2, INT_3, INT_4, INT_5, INT_6, \
INT_7) \
((__m256i)(__v8si){(int)(INT_0), (int)(INT_1), (int)(INT_2), (int)(INT_3), \
(int)(INT_4), (int)(INT_5), (int)(INT_6), (int)(INT_7)})
#define _mm256_set1_epi32(INT_0) \
_mm256_set_epi32(INT_0, INT_0, INT_0, INT_0, INT_0, INT_0, INT_0, INT_0)
#define _mm256_setr_epi32(INT_0, INT_1, INT_2, INT_3, INT_4, INT_5, INT_6, \
INT_7) \
_mm256_set_epi32(INT_7, INT_6, INT_5, INT_4, INT_3, INT_2, INT_1, INT_0)
#define _mm256_loadu_si256(M256IP_0) \
({ \
const __m256i *Ymm = (M256IP_0); \
((struct PackedMayaliasIntyYmm *)Ymm)->Ymm; \
})
#define _mm256_storeu_si256(M256IP_0, M256I_1) \
({ \
__m256i *Ymm = (M256IP_0); \
((struct PackedMayaliasIntyYmm *)Ymm)->Ymm = M256I_1; \
})
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_AVX2INTRIN_H_ */

51
libc/bits/avxintrin.h Normal file
View file

@ -0,0 +1,51 @@
#ifndef COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
typedef float __m256 _Vector_size(32) mayalias;
typedef double __m256d _Vector_size(32) mayalias;
typedef long long __m256i _Vector_size(32) mayalias;
typedef float __m256_u _Vector_size(32) aligned(1) mayalias;
typedef double __m256d_u _Vector_size(32) aligned(1) mayalias;
typedef long long __m256i_u _Vector_size(32) aligned(1) mayalias;
typedef double __v4df _Vector_size(32);
typedef float __v8sf _Vector_size(32);
typedef long long __v4di _Vector_size(32);
typedef unsigned long long __v4du _Vector_size(32);
typedef int __v8si _Vector_size(32);
typedef unsigned __v8su _Vector_size(32);
typedef short __v16hi _Vector_size(32);
typedef unsigned short __v16hu _Vector_size(32);
typedef char __v32qi _Vector_size(32);
typedef unsigned char __v32qu _Vector_size(32);
#define _mm256_setzero_ps() ((__m256)(__v8sf){0})
#define _mm256_load_ps(FLOATPTR) (*(__m256 *)(FLOATPTR))
#define _mm256_loadu_ps(FLOATPTR) (*(__m256_u *)(FLOATPTR))
#define _mm256_store_ps(FLOATPTR, M256_0) \
(*(__m256 *)(FLOATPTR) = (__m256)(M256_0))
#define _mm256_storeu_ps(FLOATPTR, M256_0) \
(*(__m256_u *)(FLOATPTR) = (__m256)(M256_0))
#define _mm256_extractf128_ps(M256_0, INT_1) \
((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(M256_0), \
(int)(INT_1)))
#define _mm256_insertf128_ps(M256_0, M128_1, IMM_2) \
((__m256)__builtin_ia32_vinsertf128_ps256( \
(__v8sf)(__m256)(M256_0), (__v4sf)(__m128)(M128_1), (int)(IMM_2)))
#ifdef __llvm__
#define _mm256_castps128_ps256(M128_0) \
((__m256)__builtin_shufflevector((__v4sf)(__m128)(M128_0), \
(__v4sf)(__m128)(M128_0), 0, 1, 2, 3, -1, \
-1, -1, -1))
#else
#define _mm256_castps128_ps256(M128_0) \
((__m256)__builtin_ia32_ps256_ps((__v4sf)(__m128)(M128_0)))
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_AVXINTRIN_H_ */

42
libc/bits/bcd2i.S Normal file
View file

@ -0,0 +1,42 @@
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify │
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License. │
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of │
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software │
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
/ Converts binary-coded-decimal to integer.
/
/ @param rdi is the string copied into a word
bcd2i: .leafprologue
.profilable
test %rdi,%rdi
je 2f
mov $1,%ecx
xor %eax,%eax
1: mov %edi,%edx
and $15,%edx
imul %rcx,%rdx
add %rdx,%rax
add %rcx,%rcx
lea (%rcx,%rcx,4),%rcx
shr $4,%rdi
jne 1b
ret
2: xor %eax,%eax
.leafepilogue
.endfn bcd2i,globl

38
libc/bits/bcdadd.S Normal file
View file

@ -0,0 +1,38 @@
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify │
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License. │
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of │
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software │
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
/ Performs addition on binary-coded decimals.
bcdadd: .leafprologue
.profilable
lea 0x6666666(%rdi),%ecx
xor %esi,%ecx
lea (%rdi,%rsi),%eax
add $0x6666666,%eax
xor %eax,%ecx
not %ecx
and $0x11111110,%ecx
mov %ecx,%edx
shr $2,%edx
shr $3,%ecx
orl %edx,%ecx
sub %ecx,%eax
.leafepilogue
.endfn bcdadd,globl

54
libc/bits/bcxcpy.S Normal file
View file

@ -0,0 +1,54 @@
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify │
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License. │
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of │
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software │
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
bcxcpy: push %rbp
mov %rsp,%rbp
.profilable
push %rbx
xor %ebx,%ebx
lea -64(%rbp),%rdx
sub $24,%rsp
3: lea (,%rbx,4),%ecx
mov %rsi,%rax
shr %cl,%rax
and $15,%eax
cmp $9,%eax
lea 7(%rax),%ecx
cmova %ecx,%eax
add $48,%eax
mov %al,(%rdx,%rbx)
add $1,%rbx
cmp $16,%rbx
jne 3b
mov %rdx,%rax
lea -48(%rbp),%rcx
lea 15(%rdi),%rdx
4: movzbl (%rax),%ebx
add $1,%rax
sub $1,%rdx
mov %bl,1(%rdx)
cmp %rcx,%rax
jne 4b
add $24,%rsp
pop %rbx
pop %rbp
ret
.endfn bcxcpy,globl

28
libc/bits/bigword.h Normal file
View file

@ -0,0 +1,28 @@
#ifndef COSMOPOLITAN_LIBC_BITS_BIGWORD_H_
#define COSMOPOLITAN_LIBC_BITS_BIGWORD_H_
#if 0
/**
* Let BIGWORD be the the number of bytes in the largest cpu register
* available within the instruction set architecture requirements chosen
* at compile-time.
*
* In plainer terms, if you tune with flags like -mavx, you're not just
* giving the compiler permission to generate code that's incompatible
* with older computers; you're also asking Cosmopolitan to systemically
* change alignment, vectoring, buffering, ABIs, memory allocation, etc.
*/
#endif
#ifndef BIGWORD
#if __AVX512F__ + 0
#define BIGWORD 64
#elif __AVX2__ + 0
#define BIGWORD 32
#elif __SSE2__ + 0
#define BIGWORD 16
#else
#define BIGWORD __BIGGEST_ALIGNMENT__
#endif
#endif /*BIGWORD*/
#endif /* COSMOPOLITAN_LIBC_BITS_BIGWORD_H_ */

31
libc/bits/bitreverse16.S Normal file
View file

@ -0,0 +1,31 @@
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify │
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License. │
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of │
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software │
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
bitreverse16:
push %rbx
mov %edi,%eax
mov $kReverseBits,%ebx
xlat
xchg %al,%ah
xlat
pop %rbx
ret
.endfn bitreverse16,globl

28
libc/bits/bitreverse32.c Normal file
View file

@ -0,0 +1,28 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/bits.h"
uint32_t(bitreverse32)(uint32_t x) {
x = bswap_32(x);
x = ((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1);
x = ((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2);
x = ((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4);
return x;
}

28
libc/bits/bitreverse64.c Normal file
View file

@ -0,0 +1,28 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/bits.h"
uint64_t bitreverse64(uint64_t x) {
x = bswap_64(x);
x = ((x & 0xaaaaaaaaaaaaaaaa) >> 1) | ((x & 0x5555555555555555) << 1);
x = ((x & 0xcccccccccccccccc) >> 2) | ((x & 0x3333333333333333) << 2);
x = ((x & 0xf0f0f0f0f0f0f0f0) >> 4) | ((x & 0x0f0f0f0f0f0f0f0f) << 4);
return x;
}

31
libc/bits/bitreverse8.S Normal file
View file

@ -0,0 +1,31 @@
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify │
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License. │
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of │
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software │
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
bitreverse8:
.leafprologue
.profilable
push %rbx
mov %edi,%eax
mov $kReverseBits,%ebx
xlat
pop %rbx
.leafepilogue
.endfn bitreverse8,globl

531
libc/bits/bits.h Normal file
View file

@ -0,0 +1,531 @@
#ifndef COSMOPOLITAN_LIBC_BITS_H_
#define COSMOPOLITAN_LIBC_BITS_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define CheckUnsigned(x) ((x) / !((typeof(x))(-1) < 0))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § bits
*/
extern const bool kTrue;
extern const bool kFalse;
extern const uint8_t kReverseBits[256];
uint16_t bswap_16(uint16_t) pureconst;
uint32_t bswap_32(uint32_t) pureconst;
uint32_t bswap_64(uint32_t) pureconst;
unsigned long popcount(unsigned long) pureconst;
uint32_t gray(uint32_t) pureconst;
uint32_t ungray(uint32_t) pureconst;
unsigned bcdadd(unsigned, unsigned) pureconst;
unsigned long bcd2i(unsigned long) pureconst;
unsigned long i2bcd(unsigned long) pureconst;
void bcxcpy(unsigned char (*)[16], unsigned long);
int ffs(int) pureconst;
int ffsl(long int) pureconst;
int ffsll(long long int) pureconst;
int fls(int) pureconst;
int flsl(long int) pureconst;
int flsll(long long int) pureconst;
uint8_t bitreverse8(uint8_t) libcesque pureconst;
uint16_t bitreverse16(uint16_t) libcesque pureconst;
uint32_t bitreverse32(uint32_t) libcesque pureconst;
uint64_t bitreverse64(uint64_t) libcesque pureconst;
unsigned long roundup2pow(unsigned long) libcesque pureconst;
unsigned long roundup2log(unsigned long) libcesque pureconst;
unsigned long rounddown2pow(unsigned long) libcesque pureconst;
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § bits » no assembly required
*/
/**
* Undocumented incantations for ROR, ROL, and SAR.
*/
#define ROR(w, k) (CheckUnsigned(w) >> (k) | (w) << (sizeof(w) * 8 - (k)))
#define ROL(w, k) ((w) << (k) | CheckUnsigned(w) >> (sizeof(w) * 8 - (k)))
#define SAR(w, k) (((w) & ~(~0u >> (k))) | ((w) >> ((k) & (sizeof(w) * 8 - 1))))
#define bitreverse8(X) (kReverseBits[(X)&0xff])
#define bitreverse16(X) \
((uint16_t)kReverseBits[(X)&0xff] << 010 | \
kReverseBits[((uint16_t)(X) >> 010) & 0xff])
#ifndef __GNUC__
#define READ16LE(P) ((unsigned)(P)[1] << 010 | (unsigned)(P)[0])
#define READ32LE(P) \
((unsigned long)(P)[3] << 030 | (unsigned long)(P)[2] << 020 | \
(unsigned long)(P)[1] << 010 | (unsigned long)(P)[0])
#define READ64LE(P) \
((unsigned long long)(P)[3] << 030 | (unsigned long)(P)[2] << 020 | \
(unsigned long long)(P)[1] << 010 | (unsigned long)(P)[0])
#else
#define READ16LE(P) read16le(P)
#define READ32LE(P) read32le(P)
#define READ64LE(P) read64le(P)
#define read16le(P) \
({ \
const unsigned char *Pu = (const unsigned char *)(P); \
(uint16_t) Pu[1] << 010 | (uint16_t)Pu[0]; \
})
#define read32le(P) \
({ \
const unsigned char *Pu = (const unsigned char *)(P); \
((uint32_t)Pu[3] << 030 | (uint32_t)Pu[2] << 020 | \
(uint32_t)Pu[1] << 010 | (uint32_t)Pu[0] << 000); \
})
#define read64le(P) \
({ \
const unsigned char *Pu = (const unsigned char *)(P); \
((uint64_t)Pu[7] << 070 | (uint64_t)Pu[6] << 060 | \
(uint64_t)Pu[5] << 050 | (uint64_t)Pu[4] << 040 | \
(uint64_t)Pu[3] << 030 | (uint64_t)Pu[2] << 020 | \
(uint64_t)Pu[1] << 010 | (uint64_t)Pu[0] << 000); \
})
#endif
#define WRITE16LE(P, V) \
do { \
uint8_t *Ple = (P); \
uint16_t Vle = (V); \
Ple[0] = (uint8_t)(Vle >> 000); \
Ple[1] = (uint8_t)(Vle >> 010); \
} while (0)
#define WRITE32LE(P, V) \
do { \
uint8_t *Ple = (P); \
uint32_t Vle = (V); \
Ple[0] = (uint8_t)(Vle >> 000); \
Ple[1] = (uint8_t)(Vle >> 010); \
Ple[2] = (uint8_t)(Vle >> 020); \
Ple[3] = (uint8_t)(Vle >> 030); \
} while (0)
#define WRITE64LE(P, V) \
do { \
uint8_t *Ple = (P); \
uint64_t Vle = (V); \
Ple[0] = (uint8_t)(Vle >> 000); \
Ple[1] = (uint8_t)(Vle >> 010); \
Ple[2] = (uint8_t)(Vle >> 020); \
Ple[3] = (uint8_t)(Vle >> 030); \
Ple[4] = (uint8_t)(Vle >> 040); \
Ple[5] = (uint8_t)(Vle >> 050); \
Ple[6] = (uint8_t)(Vle >> 060); \
Ple[7] = (uint8_t)(Vle >> 070); \
} while (0)
/* TODO(jart): these ones aren't coded correctly */
#define read128le(P) ((uint128_t)read64le((P) + 8) << 0100 | read64le(P))
#define read16be(P) ((uint16_t)(*(P) << 010) | (uint16_t)(*((P) + 1)))
#define read32be(P) ((uint32_t)read16be(P) << 020 | (uint32_t)read16be((P) + 2))
#define read64be(P) ((uint64_t)read32be(P) << 040 | read32be((P) + 4))
#define read128be(P) ((uint128_t)read64be(P) << 0100 | read64be((P) + 8))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § bits » some assembly required
*/
/**
* Constraints for virtual machine flags.
* @note we beseech clang devs for flag constraints
*/
#ifdef __GCC_ASM_FLAG_OUTPUTS__ /* GCC6+ CLANG10+ */
#define CF "=@ccc"
#define CFLAG(OP) OP
#define ZF "=@ccz"
#define ZFLAG(OP) OP
#define OF "=@cco"
#define OFLAG(OP) OP
#define SF "=@ccs"
#define SFLAG(SP) SP
#define ABOVEF "=@cca" /* i.e. !ZF && !CF */
#define ABOVEFLAG(OP) OP
#else
#define CF "=q"
#define CFLAG(OP) OP "\n\tsetc\t%b0"
#define ZF "=q"
#define ZFLAG(OP) OP "\n\tsetz\t%b0"
#define OF "=q"
#define OFLAG(OP) OP "\n\tseto\t%b0"
#define SF "=q"
#define SFLAG(SP) OP "\n\tsets\t%b0"
#define ABOVEF "=@cca"
#define ABOVEFLAG(OP) OP "\n\tseta\t%b0"
#endif
/**
* Reads scalar from memory, offset by segment.
*
* @return *(MEM) relative to segment
* @see arch_prctl()
* @see pushpop()
*/
#define fs(MEM) __peek("fs", MEM)
#define gs(MEM) __peek("gs", MEM)
/**
* Reads scalar from memory w/ one operation.
*
* @param MEM is alignas(𝑘) uint𝑘_t[hasatleast 1] where 𝑘 {8,16,32,64}
* @return *(MEM)
* @note defeats compiler load tearing optimizations
* @note alignas(𝑘) is implied if compiler knows type
* @note alignas(𝑘) only avoids multi-core / cross-page edge cases
* @see Intel's Six-Thousand Page Manual V.3A §8.2.3.1
* @see atomic_store()
*/
#define atomic_load(MEM) \
({ \
autotype(MEM) Mem = (MEM); \
typeof(*Mem) Reg; \
asm("mov\t%1,%0" : "=r"(Reg) : "m"(*Mem)); \
Reg; \
})
/**
* Saves scalar to memory w/ one operation.
*
* This is guaranteed to happen in either one or zero operations,
* depending on whether or not it's possible for *(MEM) to be read
* afterwards. This macro only forbids compiler from using >1 ops.
*
* @param MEM is alignas(𝑘) uint𝑘_t[hasatleast 1] where 𝑘 {8,16,32,64}
* @param VAL is uint𝑘_t w/ better encoding for immediates (constexpr)
* @return VAL
* @note alignas(𝑘) on nexgen32e only needed for end of page gotcha
* @note alignas(𝑘) is implied if compiler knows type
* @note needed to defeat store tearing optimizations
* @see Intel Six-Thousand Page Manual Manual V.3A §8.2.3.1
* @see atomic_load()
*/
#define atomic_store(MEM, VAL) \
({ \
autotype(VAL) Val = (VAL); \
typeof(&Val) Mem = (MEM); \
asm("mov%z1\t%1,%0" : "=m,m"(*Mem) : "i,r"(Val)); \
Val; \
})
/**
* Returns true if bit is set in memory.
*
* This is a generically-typed Bitset<T> RAM. This macro is intended
* to be container-like with optimal machine instruction encoding, cf.
* machine-agnostic container abstractions. Memory accesses are words.
* Register allocation can be avoided if BIT is known. Be careful when
* casting character arrays since that should cause a page fault.
*
* @param MEM is uint𝑘_t[] where 𝑘 {16,32,64} base address
* @param BIT [-(2**(𝑘-1)),2**(𝑘-1)) is zero-based index
* @return true if bit is set, otherwise false
* @see Intel's Six Thousand Page Manual V.2A 3-113
* @see bts(), btr(), btc()
*/
#define bt(MEM, BIT) \
({ \
bool OldBit; \
if (isconstant(BIT)) { \
asm(CFLAG("bt%z1\t%2,%1") \
: CF(OldBit) \
: "m"((MEM)[(BIT) / (sizeof((MEM)[0]) * CHAR_BIT)]), \
"J"((BIT) % (sizeof((MEM)[0]) * CHAR_BIT)) \
: "cc"); \
} else if (sizeof((MEM)[0]) == 2) { \
asm(CFLAG("bt\t%w2,%1") : CF(OldBit) : "m"((MEM)[0]), "r"(BIT) : "cc"); \
} else if (sizeof((MEM)[0]) == 4) { \
asm(CFLAG("bt\t%k2,%1") : CF(OldBit) : "m"((MEM)[0]), "r"(BIT) : "cc"); \
} else if (sizeof((MEM)[0]) == 8) { \
asm(CFLAG("bt\t%q2,%1") : CF(OldBit) : "m"((MEM)[0]), "r"(BIT) : "cc"); \
} \
OldBit; \
})
#define bts(MEM, BIT) __BitOp("bts", BIT, MEM) /** bit test and set */
#define btr(MEM, BIT) __BitOp("btr", BIT, MEM) /** bit test and reset */
#define btc(MEM, BIT) __BitOp("btc", BIT, MEM) /** bit test and complement */
#define lockbts(MEM, BIT) __BitOp("lock bts", BIT, MEM)
#define lockbtr(MEM, BIT) __BitOp("lock btr", BIT, MEM)
#define lockbtc(MEM, BIT) __BitOp("lock btc", BIT, MEM)
#define lockinc(MEM) __ArithmeticOp1("lock inc", MEM)
#define lockdec(MEM) __ArithmeticOp1("lock dec", MEM)
#define locknot(MEM) __ArithmeticOp1("lock not", MEM)
#define lockneg(MEM) __ArithmeticOp1("lock neg", MEM)
#define lockaddeq(MEM, VAL) __ArithmeticOp2("lock add", VAL, MEM)
#define locksubeq(MEM, VAL) __ArithmeticOp2("lock sub", VAL, MEM)
#define lockxoreq(MEM, VAL) __ArithmeticOp2("lock xor", VAL, MEM)
#define lockandeq(MEM, VAL) __ArithmeticOp2("lock and", VAL, MEM)
#define lockoreq(MEM, VAL) __ArithmeticOp2("lock or", VAL, MEM)
/**
* Exchanges *MEMORY into *LOCALVAR w/ one operation.
*
* @param MEMORY is uint𝑘_t[hasatleast 1] where 𝑘 {8,16,32,64}
* @param LOCALVAR is uint𝑘_t[hasatleast 1]
* @return LOCALVAR[0]
* @see xchg()
*/
#define lockxchg(MEMORY, LOCALVAR) \
({ \
static_assert(typescompatible(typeof(*(MEMORY)), typeof(*(LOCALVAR)))); \
asm("xchg\t%0,%1" : "+%m"(*(MEMORY)), "+r"(*(LOCALVAR))); \
*(LOCALVAR); \
})
/**
* Compares and exchanges.
*
* @param IFTHING is uint𝑘_t[hasatleast 1] where 𝑘 {8,16,32,64}
* @return true if value was exchanged, otherwise false
* @see lockcmpxchg()
*/
#define cmpxchg(IFTHING, ISEQUALTOME, REPLACEITWITHME) \
({ \
bool DidIt; \
asm(ZFLAG("cmpxchg\t%3,%1") \
: ZF(DidIt), "+m"(*(IFTHING)), "+a"(*(ISEQUALTOME)) \
: "r"((typeof(*(IFTHING)))(REPLACEITWITHME)) \
: "cc"); \
DidIt; \
})
#define ezcmpxchg(IFTHING, ISEQUALTOME, REPLACEITWITHME) \
({ \
bool DidIt; \
autotype(IFTHING) IfThing = (IFTHING); \
typeof(*IfThing) IsEqualToMe = (ISEQUALTOME); \
typeof(*IfThing) ReplaceItWithMe = (REPLACEITWITHME); \
asm(ZFLAG("cmpxchg\t%3,%1") \
: ZF(DidIt), "+m"(*IfThing), "+a"(IsEqualToMe) \
: "r"(ReplaceItWithMe) \
: "cc"); \
DidIt; \
})
/**
* Compares and exchanges w/ one operation.
*
* @param IFTHING is uint𝑘_t[hasatleast 1] where 𝑘 {8,16,32,64}
* @return true if value was exchanged, otherwise false
* @see lockcmpxchg()
*/
#define lockcmpxchg(IFTHING, ISEQUALTOME, REPLACEITWITHME) \
({ \
bool DidIt; \
asm(ZFLAG("lock cmpxchg\t%3,%1") \
: ZF(DidIt), "+m"(*(IFTHING)), "+a"(*(ISEQUALTOME)) \
: "r"((typeof(*(IFTHING)))(REPLACEITWITHME)) \
: "cc"); \
DidIt; \
})
/**
* Gets value of extended control register.
*/
#define xgetbv(xcr_register_num) \
({ \
unsigned hi, lo; \
asm("xgetbv" : "=d"(hi), "=a"(lo) : "c"(cr_register_num)); \
(uint64_t) hi << 32 | lo; \
})
/**
* Reads model-specific register.
* @note programs running as guests won't have authorization
*/
#define rdmsr(msr) \
({ \
uint32_t lo, hi; \
asm volatile("rdmsr" : "=a"(lo), "=d"(hi) : "c"(msr)); \
(uint64_t) hi << 32 | lo; \
})
/**
* Writes model-specific register.
* @note programs running as guests won't have authorization
*/
#define wrmsr(msr, val) \
do { \
uint64_t val_ = (val); \
asm volatile("wrmsr" \
: /* no outputs */ \
: "c"(msr), "a"((uint32_t)val_), \
"d"((uint32_t)(val_ >> 32))); \
} while (0)
/**
* Tells CPU page tables changed for virtual address.
* @note programs running as guests won't have authorization
*/
#define invlpg(MEM) \
asm volatile("invlpg\t(%0)" : /* no outputs */ : "r"(MEM) : "memory")
/**
* Teleports code fragment inside _init().
*/
#define INITIALIZER(PRI, NAME, CODE) \
asm(".pushsection .init." #PRI "." #NAME ",\"ax\",@progbits\n\t" \
"call\t" #NAME "\n\t" \
".popsection"); \
textstartup optimizesize void NAME(char *rdi, const char *rsi) { \
CODE; \
asm volatile("" : /* no outputs */ : "D"(rdi), "S"(rsi)); \
}
#ifndef __STRICT_ANSI__
#if __PIC__ + __code_model_medium__ + __code_model_large__ + 0 > 1
#define __EZLEA(SYMBOL) "lea\t" SYMBOL "(%%rip),%"
#else
#define __EZLEA(SYMBOL) "mov\t$" SYMBOL ",%k"
#endif
#define weaken(symbol) ((const typeof(&(symbol)))weakaddr(#symbol))
#define strongaddr(symbolstr) \
({ \
intptr_t waddr; \
asm(__EZLEA(symbolstr) "0" : "=r"(waddr)); \
waddr; \
})
#define weakaddr(symbolstr) \
({ \
intptr_t waddr; \
asm(".weak\t" symbolstr "\n\t" __EZLEA(symbolstr) "0" : "=r"(waddr)); \
waddr; \
})
#else
#define weaken(symbol) symbol
#define weakaddr(symbolstr) &(symbolstr)
#endif
#define slowcall(fn, arg1, arg2, arg3, arg4, arg5, arg6) \
({ \
void *ax; \
asm volatile("push\t%7\n\t" \
"push\t%6\n\t" \
"push\t%5\n\t" \
"push\t%4\n\t" \
"push\t%3\n\t" \
"push\t%2\n\t" \
"push\t%1\n\t" \
"call\tslowcall" \
: "=a"(ax) \
: "g"(fn), "g"(arg1), "g"(arg2), "g"(arg3), "g"(arg4), \
"g"(arg5), "g"(arg6) \
: "memory"); \
ax; \
})
#define IsAddressCanonicalForm(P) \
({ \
intptr_t p2 = (intptr_t)(P); \
(0xffff800000000000l <= p2 && p2 <= 0x00007fffffffffffl); \
})
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § bits » optimizations
*/
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#define popcount(X) (isconstant(X) ? __builtin_popcount(X) : __popcount(X))
#define popcount$nehalem(X) \
({ \
typeof(X) BitCount; \
asm("popcnt\t%1,%0" : "=r,r"(BitCount) : "r,m"(X) : "cc"); \
BitCount; \
})
#ifdef __POPCNT__
#define __popcount(X) popcount$nehalem(X)
#else
#define __popcount(X) (popcount)(X)
#endif
#define bswap_16(U16) \
(isconstant(U16) ? ((((U16)&0xff00) >> 010) | (((U16)&0x00ff) << 010)) : ({ \
uint16_t Swapped16, Werd16 = (U16); \
asm("xchg\t%b0,%h0" : "=Q"(Swapped16) : "0"(Werd16)); \
Swapped16; \
}))
#define bswap_32(U32) \
(isconstant(U32) \
? ((((U32)&0xff000000) >> 030) | (((U32)&0x000000ff) << 030) | \
(((U32)&0x00ff0000) >> 010) | (((U32)&0x0000ff00) << 010)) \
: ({ \
uint32_t Swapped32, Werd32 = (U32); \
asm("bswap\t%0" : "=r"(Swapped32) : "0"(Werd32)); \
Swapped32; \
}))
#define bswap_64(U64) \
(isconstant(U64) ? ((((U64)&0xff00000000000000ul) >> 070) | \
(((U64)&0x00000000000000fful) << 070) | \
(((U64)&0x00ff000000000000ul) >> 050) | \
(((U64)&0x000000000000ff00ul) << 050) | \
(((U64)&0x0000ff0000000000ul) >> 030) | \
(((U64)&0x0000000000ff0000ul) << 030) | \
(((U64)&0x000000ff00000000ul) >> 010) | \
(((U64)&0x00000000ff000000ul) << 010)) \
: ({ \
uint64_t Swapped64, Werd64 = (U64); \
asm("bswap\t%0" : "=r"(Swapped64) : "0"(Werd64)); \
Swapped64; \
}))
#endif /* defined(__GNUC__) && !defined(__STRICT_ANSI__) */
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § bits » implementation details
*/
#define __peek(SEGMENT, ADDRESS) \
({ \
typeof(*(ADDRESS)) Pk; \
asm("mov\t%%" SEGMENT ":%1,%0" : "=r"(Pk) : "m"(*(ADDRESS))); \
Pk; \
})
#define __ArithmeticOp1(OP, MEM) \
({ \
asm(OP "%z0\t%0" : "+m"(*(MEM)) : /* no inputs */ : "cc"); \
MEM; \
})
#define __ArithmeticOp2(OP, VAL, MEM) \
({ \
asm(OP "%z0\t%1,%0" : "+m,m"(*(MEM)) : "i,r"(VAL) : "cc"); \
MEM; \
})
#define __BitOp(OP, BIT, MEM) \
({ \
bool OldBit; \
if (isconstant(BIT)) { \
asm(CFLAG(OP "%z1\t%2,%1") \
: CF(OldBit), "+m"((MEM)[(BIT) / (sizeof((MEM)[0]) * CHAR_BIT)]) \
: "J"((BIT) % (sizeof((MEM)[0]) * CHAR_BIT)) \
: "cc"); \
} else if (sizeof((MEM)[0]) == 2) { \
asm(CFLAG(OP "\t%w2,%1") \
: CF(OldBit), "+m"((MEM)[0]) \
: "r"(BIT) \
: "cc"); \
} else if (sizeof((MEM)[0]) == 4) { \
asm(CFLAG(OP "\t%k2,%1") \
: CF(OldBit), "+m"((MEM)[0]) \
: "r"(BIT) \
: "cc"); \
} else if (sizeof((MEM)[0]) == 8) { \
asm(CFLAG(OP "\t%q2,%1") \
: CF(OldBit), "+m"((MEM)[0]) \
: "r"(BIT) \
: "cc"); \
} \
OldBit; \
})
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_H_ */

53
libc/bits/bits.mk Normal file
View file

@ -0,0 +1,53 @@
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
PKGS += LIBC_BITS
LIBC_BITS_ARTIFACTS += LIBC_BITS_A
LIBC_BITS = $(LIBC_BITS_A_DEPS) $(LIBC_BITS_A)
LIBC_BITS_A = o/$(MODE)/libc/bits/bits.a
LIBC_BITS_A_FILES := $(wildcard libc/bits/*)
LIBC_BITS_A_HDRS = $(filter %.h,$(LIBC_BITS_A_FILES))
LIBC_BITS_A_SRCS_S = $(filter %.S,$(LIBC_BITS_A_FILES))
LIBC_BITS_A_SRCS_C = $(filter %.c,$(LIBC_BITS_A_FILES))
LIBC_BITS_A_SRCS = \
$(LIBC_BITS_A_SRCS_S) \
$(LIBC_BITS_A_SRCS_C)
LIBC_BITS_A_OBJS = \
$(LIBC_BITS_A_SRCS:%=o/$(MODE)/%.zip.o) \
$(LIBC_BITS_A_SRCS_S:%.S=o/$(MODE)/%.o) \
$(LIBC_BITS_A_SRCS_C:%.c=o/$(MODE)/%.o)
LIBC_BITS_A_CHECKS = \
$(LIBC_BITS_A).pkg \
$(LIBC_BITS_A_HDRS:%=o/$(MODE)/%.ok)
LIBC_BITS_A_DIRECTDEPS = \
LIBC_STUBS \
LIBC_NEXGEN32E
LIBC_BITS_A_DEPS := \
$(call uniq,$(foreach x,$(LIBC_BITS_A_DIRECTDEPS),$($(x))))
$(LIBC_BITS_A): libc/bits/ \
$(LIBC_BITS_A).pkg \
$(LIBC_BITS_A_OBJS)
$(LIBC_BITS_A).pkg: \
$(LIBC_BITS_A_OBJS) \
$(foreach x,$(LIBC_BITS_A_DIRECTDEPS),$($(x)_A).pkg)
#o/$(MODE)/libc/bits/bsf.o: CC = clang-10
#o/$(MODE)/libc/bits/bsf.o: CC = /opt/cross9cc/bin/x86_64-linux-musl-cc
LIBC_BITS_LIBS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)))
LIBC_BITS_SRCS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_SRCS))
LIBC_BITS_HDRS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_HDRS))
LIBC_BITS_CHECKS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_CHECKS))
LIBC_BITS_OBJS = $(foreach x,$(LIBC_BITS_ARTIFACTS),$($(x)_OBJS))
$(LIBC_BITS_OBJS): $(BUILD_FILES) libc/bits/bits.mk
.PHONY: o/$(MODE)/libc/bits
o/$(MODE)/libc/bits: $(LIBC_BITS_CHECKS)

217
libc/bits/emmintrin.h Normal file
View file

@ -0,0 +1,217 @@
#ifndef COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_
#include "libc/bits/progn.h"
#include "libc/bits/xmmintrin.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § sse2
*/
typedef char __v16qi _Vector_size(16);
typedef unsigned char __v16qu _Vector_size(16);
typedef signed char __v16qs _Vector_size(16);
typedef short __v8hi _Vector_size(16);
typedef unsigned short __v8hu _Vector_size(16);
typedef double __v2df _Vector_size(16);
typedef double __m128d _Vector_size(16) aligned(16);
typedef double __m128d_u _Vector_size(16) aligned(1);
typedef long long __v2di _Vector_size(16);
typedef long long __m128i _Vector_size(16) aligned(16);
typedef long long __m128i_u _Vector_size(16) aligned(1);
typedef unsigned long long __v2du _Vector_size(16);
struct thatispacked mayalias __usi128ma {
__m128i_u __v;
};
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § sse2 » memory ops
*/
#define _mm_loadu_si128(M128IP) ((struct __usi128ma *)(M128IP))->__v
#define _mm_storeu_si128(M128IP, M128I) \
(((struct __usi128ma *)(M128IP))->__v = (M128I))
#define _mm_set_epi8(I8_15, I8_14, I8_13, I8_12, I8_11, I8_10, I8_9, I8_8, \
I8_7, I8_6, I8_5, I8_4, I8_3, I8_2, I8_1, I8_0) \
((__m128i)(__v16qi){I8_0, I8_1, I8_2, I8_3, I8_4, I8_5, I8_6, I8_7, I8_8, \
I8_9, I8_10, I8_11, I8_12, I8_13, I8_14, I8_15})
#define _mm_set_epi16(I16_7, I16_6, I16_5, I16_4, I16_3, I16_2, I16_1, I16_0) \
((__m128i)(__v8hi){I16_0, I16_1, I16_2, I16_3, I16_4, I16_5, I16_6, I16_7})
#define _mm_set_epi32(I32_3, I32_2, I32_1, I32_0) \
((__m128i)(__v4si){I32_0, I32_1, I32_2, I32_3})
#define _mm_set_epi64x(I64_1, I64_0) ((__m128i)(__v2di){I64_0, I64_1})
#define _mm_setr_epi8(I8_15, I8_14, I8_13, I8_12, I8_11, I8_10, I8_9, I8_8, \
I8_7, I8_6, I8_5, I8_4, I8_3, I8_2, I8_1, I8_0) \
_mm_set_epi8(I8_0, I8_1, I8_2, I8_3, I8_4, I8_5, I8_6, I8_7, I8_8, I8_9, \
I8_10, I8_11, I8_12, I8_13, I8_14, I8_15)
#define _mm_setr_epi16(I16_7, I16_6, I16_5, I16_4, I16_3, I16_2, I16_1, I16_0) \
_mm_set_epi16(I16_0, I16_1, I16_2, I16_3, I16_4, I16_5, I16_6, I16_7)
#define _mm_setr_epi32(I32_3, I32_2, I32_1, I32_0) \
_mm_set_epi32(I32_0, I32_1, I32_2, I32_3)
#define _mm_setr_epi64x(I64_1, I64_0) _mm_set_epi64x(I64_0, I64_1)
#define _mm_set1_epi8(I8) \
_mm_set_epi8(I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8, I8)
#define _mm_set1_epi16(I16) \
_mm_set_epi16(I16, I16, I16, I16, I16, I16, I16, I16)
#define _mm_set1_epi32(I32) _mm_set_epi32(I32, I32, I32, I32)
#define _mm_set1_epi64x(I64) _mm_set_epi64x(I64, I64)
#define _mm_cvtsi128_si32(M128I) ((__v4si)(M128I))[0]
#define _mm_cvtsi32_si128(I32) ((__m128i)(__v4si){(I32), 0, 0, 0})
#define _mm_setzero_si128() ((__m128i)(__v2di){0LL, 0LL})
#define _mm_castsi128_ps(M128I) ((__m128)(M128I))
#define _mm_castps_si128(M128) ((__m128i)(M128))
#define _mm_load_si128(M128I) (*(M128I))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § sse2 » simd ops
*/
#define _mm_and_si128(M128I_0, M128I_1) \
((__m128i)((__v2du)(M128I_0) & (__v2du)(M128I_1)))
#define _mm_or_si128(M128I_0, M128I_1) \
((__m128i)((__v2du)(M128I_0) | (__v2du)(M128I_1)))
#define _mm_xor_si128(M128I_0, M128I_1) \
((__m128i)((__v2du)(M128I_0) ^ (__v2du)(M128I_1)))
#define _mm_andnot_si128(M128I_0, M128I_1) \
((__m128i)(~(__v2du)(M128I_0) & (__v2du)(M128I_1)))
#define _mm_add_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) + (__v2df)(M128D_1))
#define _mm_sub_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) - (__v2df)(M128D_1))
#define _mm_mul_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) * (__v2df)(M128D_1))
#define _mm_div_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) / (__v2df)(M128D_1))
#define _mm_and_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) & (__v2df)(M128D_1))
#define _mm_or_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) | (__v2df)(M128D_1))
#define _mm_xor_pd(M128D_0, M128D_1) \
(__m128d)((__v2df)(M128D_0) ^ (__v2df)(M128D_1))
#define _mm_andnot_pd(M128D_0, M128D_1) \
(__m128d)(~(__v2df)(M128D_0) & (__v2df)(M128D_1))
#define _mm_sqrt_pd(M128D) __builtin_ia32_sqrtpd((__v2df)(M128D))
#define _mm_min_pd(M128D_0, M128D_1) \
__builtin_ia32_minpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_max_pd(M128D_0, M128D_1) \
__builtin_ia32_maxpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpeq_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpeqpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpneq_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpneqpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmplt_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpltpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpnlt_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpnltpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmple_pd(M128D_0, M128D_1) \
__builtin_ia32_cmplepd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpnle_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpnlepd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpgt_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpltpd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpngt_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpnltpd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpge_pd(M128D_0, M128D_1) \
__builtin_ia32_cmplepd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpnge_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpnlepd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpord_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpordpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpunord_pd(M128D_0, M128D_1) \
__builtin_ia32_cmpunordpd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_sad_epu8(M128I_0, M128I_1) \
__builtin_ia32_psadbw128((__v16qi)(M128I_0), (__v16qi)(M128I_1))
#define _mm_subs_epi8(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_psubsb128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
#define _mm_subs_epu8(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_psubusw128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
#define _mm_subs_epi16(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_psubsw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
#define _mm_subs_epu16(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_psubusw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
#define _mm_add_epi32(M128I_0, M128I_1) \
((__m128i)((__v4su)(M128I_0) + (__v4su)(M128I_1)))
#define _mm_sub_epi32(M128I_0, M128I_1) \
((__m128i)((__v4su)(M128I_0) - (__v4su)(M128I_1)))
#define _mm_madd_epi16(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_pmaddwd128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
#define _mm_shuffle_epi32(V, IMM) \
((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(V), (int)(IMM)))
#define _mm_slli_epi32(M128I, COUNT) \
((__m128i)__builtin_ia32_pslldi128((__v4si)(M128I), (COUNT)))
#define _mm_slli_si128(M128I, IMM) \
((__m128i)__builtin_ia32_pslldqi128((__v2di)(__m128i)(M128I), (int)(IMM)*8))
#define _mm_srli_si128(M128I, IMM) \
((__m128i)__builtin_ia32_psrldqi128((__v2di)(__m128i)(M128I), (int)(IMM)*8))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § sse2 » scalar ops
*/
#define _mm_sqrt_sd(M128D_0, M128D_1) \
({ \
__m128d M128d2 = __builtin_ia32_sqrtsd((__v2df)(M128D_1)); \
(__m128d){M128d2[0], (M128D_0)[1]}; \
})
#define _mm_add_sd(M128D_0, M128D_1) \
PROGN((M128D_0)[0] += (M128D_1)[0], (M128D_0))
#define _mm_sub_sd(M128D_0, M128D_1) \
PROGN((M128D_0)[0] -= (M128D_1)[0], (M128D_0))
#define _mm_mul_sd(M128D_0, M128D_1) \
PROGN((M128D_0)[0] *= (M128D_1)[0], (M128D_0))
#define _mm_div_sd(M128D_0, M128D_1) \
PROGN((M128D_0)[0] /= (M128D_1)[0], (M128D_0))
#define _mm_min_sd(M128D_0, M128D_1) \
__builtin_ia32_minsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_max_sd(M128D_0, M128D_1) \
__builtin_ia32_maxsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpeq_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpeqsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpneq_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpneqsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmplt_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpltsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpnlt_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpnltsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmple_sd(M128D_0, M128D_1) \
__builtin_ia32_cmplesd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpnle_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpnlesd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpgt_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpltsd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpngt_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpnltsd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpge_sd(M128D_0, M128D_1) \
__builtin_ia32_cmplesd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpnge_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpnlesd((__v2df)(M128D_1), (__v2df)(M128D_0))
#define _mm_cmpord_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpordsd((__v2df)(M128D_0), (__v2df)(M128D_1))
#define _mm_cmpunord_sd(M128D_0, M128D_1) \
__builtin_ia32_cmpunordsd((__v2df)(M128D_0), (__v2df)(M128D_1))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § sse2 » miscellaneous
*/
#define _mm_pause() asm("rep nop")
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_ */

21
libc/bits/emptytonull.c Normal file
View file

@ -0,0 +1,21 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
const char *emptytonull(const char *s) { return s && !*s ? 0 : s; }

26
libc/bits/firstnonnull.c Normal file
View file

@ -0,0 +1,26 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/runtime/runtime.h"
const char *(firstnonnull)(const char *a, const char *b) {
if (a) return a;
if (b) return b;
abort();
}

30
libc/bits/gray.c Normal file
View file

@ -0,0 +1,30 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
uint32_t gray(uint32_t x) { return x ^ (x >> 1); }
uint32_t ungray(uint32_t x) {
x ^= x >> 16;
x ^= x >> 8;
x ^= x >> 4;
x ^= x >> 2;
x ^= x >> 1;
return x;
}

24
libc/bits/hamming.c Normal file
View file

@ -0,0 +1,24 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/bits.h"
unsigned long(hamming)(unsigned long x, unsigned long y) {
return popcount(x ^ y);
}

75
libc/bits/hilbert.c Normal file
View file

@ -0,0 +1,75 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/hilbert.h"
static axdx_t RotateQuadrant(long n, long y, long x, long ry, long rx) {
long t;
if (ry == 0) {
if (rx == 1) {
y = n - 1 - y;
x = n - 1 - x;
}
t = x;
x = y;
y = t;
}
return (axdx_t){y, x};
}
/**
* Generates Hilbert space-filling curve.
*
* @see morton()
*/
long hilbert(long n, long y, long x) {
axdx_t m;
long d, s, ry, rx;
d = 0;
for (s = n / 2; s > 0; s /= 2) {
rx = (x & s) > 0;
ry = (y & s) > 0;
d += s * s * ((3 * rx) ^ ry);
m = RotateQuadrant(n, y, x, ry, rx);
x = m.dx;
y = m.ax;
}
return d;
}
/**
* Decodes Hilbert space-filling curve.
*
* @see unmorton()
*/
axdx_t unhilbert(long n, long i) {
axdx_t m;
long s, t, y, x, ry, rx;
t = i;
x = y = 0;
for (s = 1; s < n; s *= 2) {
rx = (t / 2) & 1;
ry = (t ^ rx) & 1;
m = RotateQuadrant(s, y, x, ry, rx);
x = m.dx + s * rx;
y = m.ax + s * ry;
t /= 4;
}
return (axdx_t){y, x};
}

11
libc/bits/hilbert.h Normal file
View file

@ -0,0 +1,11 @@
#ifndef COSMOPOLITAN_LIBC_BITS_HILBERT_H_
#define COSMOPOLITAN_LIBC_BITS_HILBERT_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
long hilbert(long, long, long) pureconst;
axdx_t unhilbert(long, long) pureconst;
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_HILBERT_H_ */

48
libc/bits/i2bcd.S Normal file
View file

@ -0,0 +1,48 @@
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify │
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License. │
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of │
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software │
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
/ Converts integer to binary-coded decimal.
i2bcd: .leafprologue
.profilable
test %rdi,%rdi
je 2f
mov %rdi,%rsi
xor %r9d,%r9d
mov $0xcccccccccccccccd,%r8
xor %ecx,%ecx
1: mov %rsi,%rax
mul %r8
shr $3,%rdx
lea (%rdx,%rdx),%rax
lea (%rax,%rax,4),%rax
mov %rsi,%rdi
sub %rax,%rdi
shl %cl,%rdi
add %rdi,%r9
add $4,%rcx
cmp $9,%rsi
mov %rdx,%rsi
ja 1b
jmp 3f
2: xor %r9d,%r9d
3: mov %r9,%rax
.leafepilogue
.endfn i2bcd,globl

21
libc/bits/isempty.c Normal file
View file

@ -0,0 +1,21 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
bool isempty(const char *s) { return !s || !*s; }

22
libc/bits/max.c Normal file
View file

@ -0,0 +1,22 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
long(max)(long x, long y) { return MAX(x, y); }

22
libc/bits/min.c Normal file
View file

@ -0,0 +1,22 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
long(min)(long x, long y) { return MIN(x, y); }

14
libc/bits/mmintrin.h Normal file
View file

@ -0,0 +1,14 @@
#ifndef COSMOPOLITAN_LIBC_BITS_MMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_MMINTRIN_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
typedef long long __m64 _Vector_size(8);
typedef float __v2sf _Vector_size(8);
typedef int __v2si _Vector_size(8);
typedef short __v4hi _Vector_size(8);
typedef char __v8qi _Vector_size(8);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_MMINTRIN_H_ */

37
libc/bits/morton.c Normal file
View file

@ -0,0 +1,37 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/morton.h"
/**
* Interleaves bits.
*/
unsigned long(morton)(unsigned long y, unsigned long x) {
x = (x | x << 020) & 0x0000FFFF0000FFFF;
x = (x | x << 010) & 0x00FF00FF00FF00FF;
x = (x | x << 004) & 0x0F0F0F0F0F0F0F0F;
x = (x | x << 002) & 0x3333333333333333;
x = (x | x << 001) & 0x5555555555555555;
y = (y | y << 020) & 0x0000FFFF0000FFFF;
y = (y | y << 010) & 0x00FF00FF00FF00FF;
y = (y | y << 004) & 0x0F0F0F0F0F0F0F0F;
y = (y | y << 002) & 0x3333333333333333;
y = (y | y << 001) & 0x5555555555555555;
return x | y << 1;
}

25
libc/bits/morton.h Normal file
View file

@ -0,0 +1,25 @@
#ifndef COSMOPOLITAN_LIBC_BITS_MORTON_H_
#define COSMOPOLITAN_LIBC_BITS_MORTON_H_
#include "libc/intrin/pdep.h"
#include "libc/intrin/pext.h"
#include "libc/nexgen32e/x86feature.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
unsigned long morton(unsigned long, unsigned long) libcesque pureconst;
axdx_t unmorton(unsigned long) libcesque pureconst;
#ifndef __STRICT_ANSI__
#define morton(Y, X) \
(X86_NEED(BMI2) \
? pdep(X, 0x5555555555555555ul) | pdep(Y, 0xAAAAAAAAAAAAAAAAul) \
: morton(Y, X))
#define unmorton(I) \
(X86_NEED(BMI2) ? (axdx_t){pext(I, 0xAAAAAAAAAAAAAAAAul), \
pext(I, 0x5555555555555555ul)} \
: unmorton(I))
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_MORTON_H_ */

21
libc/bits/nulltoempty.c Normal file
View file

@ -0,0 +1,21 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
const char *nulltoempty(const char *s) { return s ? s : ""; }

14
libc/bits/pmmintrin.h Normal file
View file

@ -0,0 +1,14 @@
#ifndef COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § sse3
*/
#define _mm_hadd_ps(M128_0, M128_1) \
((__m128)__builtin_ia32_haddps((__v4sf)(__m128)(M128_0), \
(__v4sf)(__m128)(M128_0)))
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_ */

42
libc/bits/popcount.c Normal file
View file

@ -0,0 +1,42 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/bits.h"
#include "libc/nexgen32e/x86feature.h"
static noinline uint32_t popcount$swar32(uint32_t x) {
x -= (x >> 1) & 0x55555555;
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
return (((x + (x >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
}
unsigned long(popcount)(unsigned long x) {
size_t i;
unsigned long r;
if (X86_HAVE(POPCNT)) {
return popcount$nehalem(x);
} else {
r = 0;
for (i = 0; i < sizeof(x); i += 4) {
r |= popcount$swar32(x);
x >>= 32;
}
return r;
}
}

15
libc/bits/progn.h Normal file
View file

@ -0,0 +1,15 @@
#ifndef COSMOPOLITAN_LIBC_BITS_PROGN_H_
#define COSMOPOLITAN_LIBC_BITS_PROGN_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
/**
* Evaluates args, returning value of last one.
*
* This API comes from LISP.
*/
#define PROGN(...) ({ __VA_ARGS__; })
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_PROGN_H_ */

53
libc/bits/pushpop.h Normal file
View file

@ -0,0 +1,53 @@
#ifndef COSMOPOLITAN_LIBC_BITS_PUSHPOP_H_
#define COSMOPOLITAN_LIBC_BITS_PUSHPOP_H_
#include "libc/macros.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/**
* PushPop
* An elegant weapon for a more civilized age.
*/
#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
#define pushpop(x) (x)
#else
#define pushpop(x) \
({ \
typeof(x) Popped; \
if (isconstant(x) && (TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
: (intptr_t)(x) < 128)) { \
if (x) { \
asm("push\t%1\n\t" \
"pop\t%q0" \
: "=r"(Popped) \
: "ir"(x)); \
} else { \
asm("xor\t%k0,%k0" : "=r"(Popped)); \
} \
} else { \
asm("" : "=r"(Popped) : "0"(x)); \
} \
Popped; \
})
#endif
#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
#define pushmov(d, x) ((d) = (x))
#else
#define pushmov(d, x) \
({ \
typeof(*(d)) Popped = (x); \
if (isconstant(x) && (TYPE_SIGNED(typeof(x)) ? (intptr_t)(x) + 128 < 256 \
: (intptr_t)(x) < 128)) { \
asm("pushq\t%1\n\t" \
"popq\t%0" \
: "=m"(*(d)) \
: "ir"(Popped)); \
} else { \
*(d) = Popped; \
} \
Popped; \
})
#endif
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_PUSHPOP_H_ */

22
libc/bits/rounddown.c Normal file
View file

@ -0,0 +1,22 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
long(rounddown)(long w, long k) { return ROUNDDOWN(w, k); }

31
libc/bits/rounddown2pow.c Normal file
View file

@ -0,0 +1,31 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/bits.h"
#include "libc/nexgen32e/bsr.h"
/**
* Returns 𝑥 rounded down to previous two power.
*
* @define (𝑥>02^log𝑥, x=00, 𝑇)
* @see roundup2pow()
*/
unsigned long rounddown2pow(unsigned long x) {
return x ? 1ul << bsrl(x) : 0;
}

22
libc/bits/roundup.c Normal file
View file

@ -0,0 +1,22 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
long(roundup)(long w, long k) { return ROUNDUP(w, k); }

29
libc/bits/roundup2log.c Normal file
View file

@ -0,0 +1,29 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/bits.h"
#include "libc/nexgen32e/bsr.h"
/**
* Returns 𝑥 rounded up to next two power and log'd.
* @see roundup2pow
*/
unsigned long roundup2log(unsigned long x) {
return x > 1 ? (bsrl(x - 1) + 1) : x ? 1 : 0;
}

31
libc/bits/roundup2pow.c Normal file
View file

@ -0,0 +1,31 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/bits.h"
#include "libc/nexgen32e/bsr.h"
/**
* Returns 𝑥 rounded up to next two power.
*
* @define (𝑥>02^logx, x=00, 𝑇)
* @see rounddown2pow)()
*/
unsigned long roundup2pow(unsigned long x) {
return x > 1 ? 1ul << (bsrl(x - 1) + 1) : x ? 1 : 0;
}

86
libc/bits/safemacros.h Normal file
View file

@ -0,0 +1,86 @@
#ifndef COSMOPOLITAN_LIBC_BITS_SAFEMACROS_H_
#define COSMOPOLITAN_LIBC_BITS_SAFEMACROS_H_
#include "libc/limits.h"
#include "libc/macros.h"
#include "libc/runtime/runtime.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
long min(long x, long y);
long max(long x, long y);
long roundup(long w, long k);
long rounddown(long w, long k);
bool isempty(const char *s);
const char *nulltoempty(const char *s);
const char *emptytonull(const char *s);
const char *firstnonnull(const char *a, const char *b);
uint64_t(unsignedsubtract)(uint64_t x, uint64_t y) pureconst;
#if !defined(__STRICT_ANSI__) && defined(__GNUC__)
#define min(x, y) \
({ \
autotype(x) MinX = (x); \
autotype(y) MinY = (y); \
MinX < MinY ? MinX : MinY; \
})
#define max(x, y) \
({ \
autotype(x) MaxX = (x); \
autotype(y) MaxY = (y); \
MaxX > MaxY ? MaxX : MaxY; \
})
#define roundup(x, k) \
({ \
autotype(x) RoundupX = (x); \
autotype(k) RoundupK = (k); \
ROUNDUP(RoundupX, RoundupK); \
})
#define rounddown(x, k) \
({ \
autotype(x) RounddownX = (x); \
autotype(k) RounddownK = (k); \
ROUNDDOWN(RounddownX, RounddownK); \
})
#define isempty(s) \
({ \
autotype(s) IsEmptyS = (s); \
!IsEmptyS || !(*IsEmptyS); \
})
#define nulltoempty(s) \
({ \
autotype(s) NullToEmptyS = (s); \
NullToEmptyS ? NullToEmptyS : ""; \
})
#define firstnonnull(a, b) \
({ \
autotype(a) FirstNonNullA = (a); \
autotype(a) FirstNonNullB = (b); \
if (!FirstNonNullA && !FirstNonNullB) abort(); \
FirstNonNullA ? FirstNonNullA : FirstNonNullB; \
})
#define emptytonull(s) \
({ \
autotype(s) EmptyToNullS = (s); \
EmptyToNullS && !(*EmptyToNullS) ? NULL : EmptyToNullS; \
})
#define unsignedsubtract(a, b) \
({ \
uint64_t UnsubA = (a); \
uint64_t UnsubB = (b); \
UnsubA >= UnsubB ? UnsubA - UnsubB : ~UnsubB + UnsubA + 1; \
})
#endif /* GNU && !ANSI */
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_SAFEMACROS_H_ */

37
libc/bits/shaintrin.h Normal file
View file

@ -0,0 +1,37 @@
#ifndef COSMOPOLITAN_LIBC_BITS_SHAINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_SHAINTRIN_H_
#include "libc/bits/emmintrin.h"
#include "libc/bits/xmmintrin.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#define _mm_sha1rnds4_epu32(M128I_0, M128I_1, MEM) \
__builtin_ia32_sha1rnds4((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1), (MEM))
#define _mm_sha1nexte_epu32(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_sha1nexte((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1)))
#define _mm_sha1msg1_epu32(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_sha1msg1((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1)))
#define _mm_sha1msg2_epu32(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_sha1msg2((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1)))
#define _mm_sha256rnds2_epu32(M128I_0, M128I_1, M128I_2) \
((__m128i)__builtin_ia32_sha256rnds2((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1), \
(__v4si)(__m128i)(M128I_2)))
#define _mm_sha256msg1_epu32(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_sha256msg1((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1)))
#define _mm_sha256msg2_epu32(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_sha256msg2((__v4si)(__m128i)(M128I_0), \
(__v4si)(__m128i)(M128I_1)))
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_SHAINTRIN_H_ */

31
libc/bits/smmintrin.h Normal file
View file

@ -0,0 +1,31 @@
#ifndef COSMOPOLITAN_LIBC_BITS_SMMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_SMMINTRIN_H_
/**
* @fileoverview SSE4 intrinsics.
*/
#define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
#define _MM_FROUND_CUR_DIRECTION 4
#define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
#define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
#define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
#define _MM_FROUND_NO_EXC 8
#define _MM_FROUND_RAISE_EXC 0
#define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
#define _MM_FROUND_TO_NEAREST_INT 0
#define _MM_FROUND_TO_NEG_INF 1
#define _MM_FROUND_TO_POS_INF 2
#define _MM_FROUND_TO_ZERO 3
#define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#define _mm_extract_epi32(M128I, I32) \
((int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(M128I), (int)(I32)))
#define _mm_minpos_epu16(M128I) \
((int)__builtin_ia32_phminposuw128((__v4si)(__m128i)(M128I), (int)(I32)))
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_SMMINTRIN_H_ */

17
libc/bits/tmmintrin.h Normal file
View file

@ -0,0 +1,17 @@
#ifndef COSMOPOLITAN_LIBC_BITS_TMMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_TMMINTRIN_H_
#include "libc/bits/emmintrin.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § ssse3
*/
#define _mm_maddubs_epi16(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_pmaddubsw128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
#define _mm_shuffle_epi8(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_pshufb128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_TMMINTRIN_H_ */

14
libc/bits/typecheck.h Normal file
View file

@ -0,0 +1,14 @@
#ifndef COSMOPOLITAN_LIBC_BITS_TYPECHECK_H_
#define COSMOPOLITAN_LIBC_BITS_TYPECHECK_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#define TYPECHECK(T, X) \
({ \
T Lol1; \
typeof(X) Lol2; \
(void)(&Lol1 == &Lol2); \
X; \
})
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_TYPECHECK_H_ */

41
libc/bits/unmorton.c Normal file
View file

@ -0,0 +1,41 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/morton.h"
static unsigned long GetOddBits(unsigned long x) {
x = (x | x >> 000) & 0x5555555555555555;
x = (x | x >> 001) & 0x3333333333333333;
x = (x | x >> 002) & 0x0F0F0F0F0F0F0F0F;
x = (x | x >> 004) & 0x00FF00FF00FF00FF;
x = (x | x >> 010) & 0x0000FFFF0000FFFF;
x = (x | x >> 020) & 0x00000000FFFFFFFF;
return x;
}
/**
* Deinterleaves bits.
*
* @param 𝑖 is interleaved index
* @return deinterleaved coordinate {ax := 𝑦, dx := 𝑥}
* @see en.wikipedia.org/wiki/Z-order_curve
*/
axdx_t(unmorton)(unsigned long i) {
return (axdx_t){GetOddBits(i >> 1), GetOddBits(i)};
}

View file

@ -0,0 +1,27 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/bits/safemacros.h"
/**
* Subtracts unsigned integers w/ wraparound.
*/
uint64_t(unsignedsubtract)(uint64_t x, uint64_t y) {
return unsignedsubtract(x, y);
}

29
libc/bits/wmmintrin.h Normal file
View file

@ -0,0 +1,29 @@
#ifndef COSMOPOLITAN_LIBC_BITS_WMMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_WMMINTRIN_H_
#include "libc/bits/emmintrin.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#define _mm_clmulepi64_si128(X, Y, IMM) \
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \
(__v2di)(__m128i)(Y), (char)(IMM)))
#define _mm_aesenc_si128(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_aesenc128((__v2di)(M128I_0), (__v2di)(M128I_1)))
#define _mm_aesenclast_si128(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_aesenclast128((__v2di)(M128I_0), (__v2di)(M128I_1)))
#define _mm_aesdec_si128(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_aesdec128((__v2di)(M128I_0), (__v2di)(M128I_1)))
#define _mm_aesdeclast_si128(M128I_0, M128I_1) \
((__m128i)__builtin_ia32_aesdeclast128((__v2di)(M128I_0), (__v2di)(M128I_1)))
#define _mm_aesimc_si128(M128I) \
((__m128i)__builtin_ia32_aesimc128((__v2di)(M128I)))
#define _mm_aesimclast_si128(M128I) \
((__m128i)__builtin_ia32_aesimclast128((__v2di)(M128I)))
#define _mm_aeskeygenassist_si128(X, Y) \
((__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(X), (int)(Y)))
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_WMMINTRIN_H_ */

25
libc/bits/xchg.h Normal file
View file

@ -0,0 +1,25 @@
#ifndef COSMOPOLITAN_LIBC_BITS_XCHG_H_
#define COSMOPOLITAN_LIBC_BITS_XCHG_H_
#include "libc/str/str.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/**
* Exchanges *MEMORY into *LOCALVAR.
*
* @return *MEMORY
* @see lockcmpxchg()
* todo(jart): what's the point of this?
*/
#define xchg(MEMORY, LOCALVAR) \
({ \
autotype(MEMORY) Memory = (MEMORY); \
typeof(Memory) LocalVar = (LOCALVAR); \
typeof(*Memory) Temp; \
memcpy(&Temp, Memory, sizeof(Temp)); \
memcpy(Memory, LocalVar, sizeof(Temp)); \
memcpy(LocalVar, &Temp, sizeof(Temp)); \
Temp; \
})
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_XCHG_H_ */

233
libc/bits/xmmintrin.h Normal file
View file

@ -0,0 +1,233 @@
#ifndef COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_
#define COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_
#include "libc/bits/emmintrin.h"
#include "libc/bits/mmintrin.h"
#include "libc/bits/progn.h"
#include "libc/dce.h"
#define _MM_EXCEPT_MASK 0x003f
#define _MM_EXCEPT_INVALID 0x0001
#define _MM_EXCEPT_DENORM 0x0002
#define _MM_EXCEPT_DIV_ZERO 0x0004
#define _MM_EXCEPT_OVERFLOW 0x0008
#define _MM_EXCEPT_UNDERFLOW 0x0010
#define _MM_EXCEPT_INEXACT 0x0020
#define _MM_MASK_MASK 0x1f80
#define _MM_MASK_INVALID 0x0080
#define _MM_MASK_DENORM 0x0100
#define _MM_MASK_DIV_ZERO 0x0200
#define _MM_MASK_OVERFLOW 0x0400
#define _MM_MASK_UNDERFLOW 0x0800
#define _MM_MASK_INEXACT 0x1000
#define _MM_ROUND_MASK 0x6000
#define _MM_ROUND_NEAREST 0x0000
#define _MM_ROUND_DOWN 0x2000
#define _MM_ROUND_UP 0x4000
#define _MM_ROUND_TOWARD_ZERO 0x6000
#define _MM_FLUSH_ZERO_MASK 0x8000
#define _MM_FLUSH_ZERO_ON 0x8000
#define _MM_FLUSH_ZERO_OFF 0x0000
#define _MM_SHUFFLE(A, B, C, D) (((A) << 6) | ((B) << 4) | ((C) << 2) | (D))
#if !(__ASSEMBLER__ + __LINKER__ + 0)
typedef int __v4si _Vector_size(16);
typedef unsigned int __v4su _Vector_size(16);
typedef float __v4sf _Vector_size(16);
typedef float __m128 _Vector_size(16) aligned(16) mayalias;
typedef float __m128_u _Vector_size(16) aligned(1) mayalias;
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § sse » simd ops
*/
#define _mm_add_ps(M128_0, M128_1) \
((__m128)((__v4sf)(M128_0) + (__v4sf)(M128_1)))
#define _mm_sub_ps(M128_0, M128_1) \
((__m128)((__v4sf)(M128_0) - (__v4sf)(M128_1)))
#define _mm_mul_ps(M128_0, M128_1) \
((__m128)((__v4sf)(M128_0) * (__v4sf)(M128_1)))
#define _mm_div_ps(M128_0, M128_1) \
((__m128)((__v4sf)(M128_0) / (__v4sf)(M128_1)))
#define _mm_and_ps(M128_0, M128_1) \
((__m128)((__v4su)(M128_0) & (__v4su)(M128_1)))
#define _mm_or_ps(M128_0, M128_1) \
((__m128)((__v4su)(M128_0) | (__v4su)(M128_1)))
#define _mm_xor_ps(M128_0, M128_1) /* XORPD [u32 simd xor] */ \
((__m128)((__v4su)(M128_0) ^ (__v4su)(M128_1)))
#define _mm_andnot_ps(M128_0, M128_1) /* ANDNPS [u32 simd nand] */ \
((__m128)(~(__v4su)(M128_0) & (__v4su)(M128_1)))
#define _mm_rcp_ps(M128) __builtin_ia32_rcpps((__v4sf)(M128))
#define _mm_sqrt_ps(M128) __builtin_ia32_sqrtps((__v4sf)(M128))
#define _mm_rsqrt_ps(M128) __builtin_ia32_rsqrtps((__v4sf)(M128))
#define _mm_min_ps(M128_0, M128_1) \
__builtin_ia32_minps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_max_ps(M128_0, M128_1) \
__builtin_ia32_maxps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_min_ss(M128_0, M128_1) \
__builtin_ia32_minss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_max_ss(M128_0, M128_1) \
__builtin_ia32_maxss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpeq_ps(M128_0, M128_1) \
__builtin_ia32_cmpeqps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpneq_ps(M128_0, M128_1) \
__builtin_ia32_cmpneqps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmplt_ps(M128_0, M128_1) \
__builtin_ia32_cmpltps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpnlt_ps(M128_0, M128_1) \
__builtin_ia32_cmpnltps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmple_ps(M128_0, M128_1) \
__builtin_ia32_cmpleps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpnle_ps(M128_0, M128_1) \
__builtin_ia32_cmpnleps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpgt_ps(M128_0, M128_1) \
__builtin_ia32_cmpltps((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpngt_ps(M128_0, M128_1) \
__builtin_ia32_cmpnltps((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpge_ps(M128_0, M128_1) \
__builtin_ia32_cmpleps((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpnge_ps(M128_0, M128_1) \
__builtin_ia32_cmpnleps((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpord_ps(M128_0, M128_1) \
__builtin_ia32_cmpordps((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpunord_ps(M128_0, M128_1) \
__builtin_ia32_cmpunordps((__v4sf)(M128_0), (__v4sf)(M128_1))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § sse » scalar ops
*/
forceinline __m128 _mm_add_ss(__m128 m128_0, __m128 m128_1) {
m128_0[0] += m128_1[0];
return m128_0;
}
forceinline __m128 _mm_sub_ss(__m128 m128_0, __m128 m128_1) {
m128_0[0] -= m128_1[0];
return m128_0;
}
forceinline __m128 _mm_mul_ss(__m128 m128_0, __m128 m128_1) {
m128_0[0] *= m128_1[0];
return m128_0;
}
forceinline __m128 _mm_div_ss(__m128 m128_0, __m128 m128_1) {
m128_0[0] /= m128_1[0];
return m128_0;
}
#define _mm_rcp_ss(M128) __builtin_ia32_rcpss((__v4sf)(M128)) /*~1/x*/
#define _mm_sqrt_ss(M128) __builtin_ia32_sqrtss((__v4sf)(M128)) /*sqrt𝑥*/
#define _mm_rsqrt_ss(M128) __builtin_ia32_rsqrtss((__v4sf)(M128)) /*~1/sqrt𝑥*/
#define _mm_min_ss(M128_0, M128_1) \
__builtin_ia32_minss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_max_ss(M128_0, M128_1) \
__builtin_ia32_maxss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpeq_ss(M128_0, M128_1) \
__builtin_ia32_cmpeqss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpneq_ss(M128_0, M128_1) \
__builtin_ia32_cmpneqss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmplt_ss(M128_0, M128_1) \
__builtin_ia32_cmpltss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpnlt_ss(M128_0, M128_1) \
__builtin_ia32_cmpnltss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmple_ss(M128_0, M128_1) \
__builtin_ia32_cmpless((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpnle_ss(M128_0, M128_1) \
__builtin_ia32_cmpnless((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpgt_ss(M128_0, M128_1) \
__builtin_ia32_cmpltss((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpngt_ss(M128_0, M128_1) \
__builtin_ia32_cmpnltss((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpge_ss(M128_0, M128_1) \
__builtin_ia32_cmpless((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpnge_ss(M128_0, M128_1) \
__builtin_ia32_cmpnless((__v4sf)(M128_1), (__v4sf)(M128_0))
#define _mm_cmpord_ss(M128_0, M128_1) \
__builtin_ia32_cmpordss((__v4sf)(M128_0), (__v4sf)(M128_1))
#define _mm_cmpunord_ss(M128_0, M128_1) \
__builtin_ia32_cmpunordss((__v4sf)(M128_0), (__v4sf)(M128_1))
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § sse » memory ops
*/
#define _mm_set1_ps(M128_0) ((__m128)(__v4sf){M128_0, M128_0, M128_0, M128_0})
#define _mm_setzero_ps() ((__m128)(__v4sf){0})
#define _mm_cvtss_f32(M128_0) (((__v4sf)(M128_0))[0])
#define _mm_load_ps(FLOATPTR) (*(__m128 *)(FLOATPTR))
#define _mm_loadu_ps(FLOATPTR) (*(__m128_u *)(FLOATPTR))
#define _mm_set_ps(WHO, DESIGNED, THIS, SHEESH) \
((__m128)(__v4sf){SHEESH, THIS, DESIGNED, WHO})
#define _mm_set_ss(FLOAT) ((__m128)(__v4sf){FLOAT, 0, 0, 0})
#define _mm_load_ss(FLOATPTR) _mm_set_ss(*(FLOATPTR))
#define _mm_store_ss(FLOATPTR, M128_0) ((FLOATPTR)[0] = ((__v4sf)(M128_0))[0])
#define _mm_store_ps(FLOATPTR, M128_0) (*(__m128 *)(FLOATPTR) = (M128_0))
#define _mm_storeu_ps(FLOATPTR, M128_0) (*(__m128_u *)(FLOATPTR) = (M128_0))
#define _mm_shuffle_ps(M128_0, M128_1, MASK) \
((__m128)__builtin_ia32_shufps((__v4sf)(M128_0), (__v4sf)(M128_1), (MASK)))
#ifdef __llvm__
#define _mm_movehl_ps(M128_0, M128_1) \
((__m128)__builtin_shufflevector((__v4sf)(__m128)(M128_0), \
(__v4sf)(__m128)(M128_1), 6, 7, 2, 3))
/* instrinsics unstable & constantly breaking, consider ansi c or asm. */
/* each version of llvm has a different incompatible impl for this one */
#else
#define _mm_movehl_ps(M128_0, M128_1) \
((__m128)__builtin_ia32_movhlps((__v4sf)(__m128)(M128_0), \
(__v4sf)(__m128)(M128_1)))
#define _mm_storel_pi(M64PTR, M128_0) \
__builtin_ia32_storelps((__v2sf *)(M64PTR), (__v4sf)(M128_0))
#endif
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § sse » cast ops
*/
#define _mm_cvtps_epi32(M128_0) \
((__m128i)__builtin_ia32_cvtps2dq((__v4sf)(M128_0)))
#ifdef __llvm__
#define _mm_cvtepi32_ps(M128I_0) \
((__m128) __builtin_convertvector((__v4si)(__m128i)(M128I_0), __v4sf))
#else
#define _mm_cvtepi32_ps(M128I_0) \
((__m128)__builtin_ia32_cvtdq2ps((__v4si)(M128I_0)))
#endif
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § sse » misc
*/
#define _mm_getcsr() (__builtin_ia32_stmxcsr())
#define _mm_setcsr(U32CONF) (__builtin_ia32_ldmxcsr(U32CONF))
#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
#define _MM_SET_ROUNDING_MODE(MODE) \
(_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (MODE)))
#define XMM_DESTROY(VAR) \
do { \
if (!IsTrustworthy()) { \
asm volatile("xorps\t%1,%0" : "=x"(VAR) : "0"(VAR)); \
} \
} while (0)
/*
** Ternary:
**
** Integer: _mm_or_si128(_mm_and_si128(a, cond), _mm_andnot_si128(cond, b))
** 32-bit float: _mm_or_ps(_mm_and_ps(a, cond), _mm_andnot_ps(cond, b))
** 64-bit float: _mm_or_pd(_mm_and_pd(a, cond), _mm_andnot_pd(cond, b))
** Integer (SSE4.1+): _mm_blendv_epi8(a, b, cond)
** 32-bit float (SSE4.1+): _mm_blendv_ps(a, b, cond)
** 64-bit float (SSE4.1+): _mm_blendv_pd(a, b, cond)
*/
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_XMMINTRIN_H_ */