Make progress towards aarch64 build

This commit is contained in:
Justine Tunney 2023-05-01 19:43:59 -07:00
parent 08ff26c817
commit ca2860947f
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
15428 changed files with 25694 additions and 23138 deletions

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.internal.h"
.privileged
.alignfunc
.balignfunc
// Returns 𝑥+𝑦, aborting on overflow.
//

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.internal.h"
.privileged
.alignfunc
.balignfunc
// Returns 𝑥+𝑦, aborting on overflow.
//

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.internal.h"
.privileged
.alignfunc
.balignfunc
// Returns 𝑥+𝑦, aborting on overflow.
//

View file

@ -9,6 +9,7 @@ int _bsfll(long long) pureconst;
int _bsf128(uintmax_t) pureconst;
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#ifdef __x86_64__
#define _bsf(u) \
({ \
unsigned BiTs; \
@ -22,6 +23,11 @@ int _bsf128(uintmax_t) pureconst;
(unsigned)BiTs; \
})
#define _bsfll(u) _bsfl(u)
#else
#define _bsf(x) __builtin_ctz(x)
#define _bsfl(x) __builtin_ctzl(x)
#define _bsfll(x) __builtin_ctzll(x)
#endif
#endif
COSMOPOLITAN_C_END_

View file

@ -7,7 +7,8 @@ int _bsr(int) pureconst;
int _bsrl(long) pureconst;
int _bsrll(long long) pureconst;
#if defined(__GNUC__) && defined(__x86_64__) && !defined(__STRICT_ANSI__)
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#ifdef __x86_64__
int _bsr128(uint128_t) pureconst;
#define _bsr(u) \
({ \
@ -22,6 +23,11 @@ int _bsr128(uint128_t) pureconst;
(unsigned)BiTs; \
})
#define _bsrll(u) _bsrl(u)
#else
#define _bsr(x) (__builtin_clz(x) ^ (sizeof(int) * CHAR_BIT - 1))
#define _bsrl(x) (__builtin_clzl(x) ^ (sizeof(long) * CHAR_BIT - 1))
#define _bsrll(x) (__builtin_clzll(x) ^ (sizeof(long long) * CHAR_BIT - 1))
#endif
#endif
COSMOPOLITAN_C_END_

View file

@ -43,6 +43,7 @@ static dontinline antiquity void bzero_sse(char *p, size_t n) {
}
}
#ifdef __x86_64__
microarchitecture("avx") static void bzero_avx(char *p, size_t n) {
xmm_t v = {0};
if (IsAsan()) __asan_verify(p, n);
@ -73,6 +74,7 @@ microarchitecture("avx") static void bzero_avx(char *p, size_t n) {
*(xmm_t *)p = v;
}
}
#endif
/**
* Sets memory to zero.
@ -134,7 +136,11 @@ void bzero(void *p, size_t n) {
char *b;
uint64_t x;
b = p;
#ifdef __x86_64__
asm("xorl\t%k0,%k0" : "=r"(x));
#else
x = 0;
#endif
if (n <= 16) {
if (n >= 8) {
__builtin_memcpy(b, &x, 8);
@ -148,11 +154,13 @@ void bzero(void *p, size_t n) {
b[--n] = x;
} while (n);
}
#ifdef __x86_64__
} else if (IsTiny()) {
asm("rep stosb" : "+D"(b), "+c"(n), "=m"(*(char(*)[n])b) : "a"(0));
return;
} else if (X86_HAVE(AVX)) {
bzero_avx(b, n);
#endif
} else {
bzero_sse(b, n);
}

View file

@ -4,7 +4,7 @@
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#if defined(__GNUC__) && !defined(__STRICT_ANSI__) && !defined(__x86__)
#if defined(__GNUC__) && !defined(__STRICT_ANSI__) && defined(__x86__)
#define _cmpxchg(IFTHING, ISEQUALTOME, REPLACEITWITHME) \
({ \
bool DidIt; \

View file

@ -40,6 +40,7 @@ size_t _countbits(const void *a, size_t n) {
p = a;
e = p + n;
if (!IsTiny()) {
#ifdef __x86_64__
if (X86_HAVE(POPCNT)) {
while (p + sizeof(long) * 4 <= e) {
__builtin_memcpy(&Ai, p + 000, sizeof(long));
@ -60,6 +61,7 @@ size_t _countbits(const void *a, size_t n) {
t += Ao;
}
} else {
#endif
while (p + 8 <= e) {
__builtin_memcpy(&x, p, 8);
x = x - ((x >> 1) & 0x5555555555555555);
@ -71,7 +73,9 @@ size_t _countbits(const void *a, size_t n) {
t += x;
p += 8;
}
#ifdef __x86_64__
}
#endif
}
while (p < e) {
b = *p++ & 255;

View file

@ -25,6 +25,7 @@
#include "libc/str/str.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/errfuns.h"
#ifdef __x86_64__
#define MAP_ANONYMOUS_linux 0x00000020
#define MAP_FIXED_linux 0x00000010
@ -109,3 +110,5 @@ noasan struct DirectMap sys_mmap_metal(void *vaddr, size_t size, int prot,
res.maphandle = -1;
return res;
}
#endif /* __x86_64__ */

View file

@ -39,6 +39,7 @@
privileged wontreturn void _Exit(int exitcode) {
int i;
STRACE("_Exit(%d)", exitcode);
#ifdef __x86_64__
if (!IsWindows() && !IsMetal()) {
// On Linux _Exit1 (exit) must be called in pledge("") mode. If we
// call _Exit (exit_group) when we haven't used pledge("stdio") then
@ -64,4 +65,13 @@ privileged wontreturn void _Exit(int exitcode) {
"cli\n\t"
"lidt\t(%rsp)");
for (;;) asm("ud2");
#elif defined(__aarch64__)
register long x0 asm("x0") = exitcode;
asm volatile("mov\tx8,%1\n"
"svc\t0"
: /* no outputs */
: "i"(94), "r"(x0)
: "x8", "memory");
notpossible;
#endif
}

View file

@ -43,6 +43,7 @@ __msabi extern typeof(ExitThread) *const __imp_ExitThread;
* @noreturn
*/
privileged wontreturn void _Exit1(int rc) {
#ifdef __x86_64__
char cf;
int ax, dx, di, si;
if (!IsWindows() && !IsMetal()) {
@ -72,4 +73,13 @@ privileged wontreturn void _Exit1(int rc) {
unreachable;
}
notpossible;
#elif defined(__aarch64__)
register long r0 asm("x0") = rc;
asm volatile("mov\tx8,%1\n"
"svc\t0"
: /* no outputs */
: "i"(93), "r"(r0)
: "x8", "memory");
notpossible;
#endif
}

119
libc/intrin/fenv.S Normal file
View file

@ -0,0 +1,119 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Musl Libc
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "libc/macros.internal.h"
feclearexcept:
# maintain exceptions in the sse mxcsr, clear x87 exceptions
mov %edi,%ecx
and $0x3f,%ecx
fnstsw %ax
test %eax,%ecx
jz 1f
fnclex
1: stmxcsr -8(%rsp)
and $0x3f,%eax
or %eax,-8(%rsp)
test %ecx,-8(%rsp)
jz 1f
not %ecx
and %ecx,-8(%rsp)
ldmxcsr -8(%rsp)
1: xor %eax,%eax
ret
.endfn feclearexcept,globl
feraiseexcept:
and $0x3f,%edi
stmxcsr -8(%rsp)
or %edi,-8(%rsp)
ldmxcsr -8(%rsp)
xor %eax,%eax
ret
.endfn feraiseexcept,globl
__fesetround:
push %rax
xor %eax,%eax
mov %edi,%ecx
fnstcw (%rsp)
andb $0xf3,1(%rsp)
or %ch,1(%rsp)
fldcw (%rsp)
stmxcsr (%rsp)
shl $3,%ch
andb $0x9f,1(%rsp)
or %ch,1(%rsp)
ldmxcsr (%rsp)
pop %rcx
ret
.endfn __fesetround,globl,hidden
fegetround:
push %rax
stmxcsr (%rsp)
pop %rax
shr $3,%eax
and $0xc00,%eax
ret
.endfn fegetround,globl
fegetenv:
xor %eax,%eax
fnstenv (%rdi)
stmxcsr 28(%rdi)
ret
.endfn fegetenv,globl
fesetenv:
xor %eax,%eax
inc %rdi
jz 1f
fldenv -1(%rdi)
ldmxcsr 27(%rdi)
ret
1: push %rax
push %rax
pushq $0xffff
pushq $0x37f
fldenv (%rsp)
pushq $0x1f80
ldmxcsr (%rsp)
add $40,%rsp
ret
.endfn fesetenv,globl
fetestexcept:
and $0x3f,%edi
push %rax
stmxcsr (%rsp)
pop %rsi
fnstsw %ax
or %esi,%eax
and %edi,%eax
ret
.endfn fetestexcept,globl

View file

@ -9,7 +9,7 @@ void *_wrfsbase(void *);
void *_wrgsbase(void *);
int _have_fsgsbase(void);
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#if defined(__GNUC__) && !defined(__STRICT_ANSI__) && defined(__x86_64__)
#define _rdfsbase() \
({ \
void *_p; \

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/runtime/runtime.h"
#ifdef __x86_64__
void GetCpuidBrand(char s[13], uint32_t leaf) {
int ax, cx;
@ -32,3 +33,5 @@ void GetCpuidBrand(char s[13], uint32_t leaf) {
: "rdx");
s[12] = 0;
}
#endif /* __x86_64__ */

View file

@ -20,6 +20,7 @@
#include "libc/errno.h"
#include "libc/intrin/fsgsbase.h"
#include "libc/nexgen32e/x86feature.h"
#ifdef __x86_64__
/**
* Returns true if FSGSBASE ISA can be used.
@ -61,3 +62,5 @@ privileged int _have_fsgsbase(void) {
return 0;
}
}
#endif /* __x86_64__ */

View file

@ -1,22 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_BITS_INITIALIZER_H_
#define COSMOPOLITAN_LIBC_BITS_INITIALIZER_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/* TODO: DELETE */
/**
* Teleports code fragment inside _init().
*/
#ifndef INITIALIZER
#define INITIALIZER(PRI, NAME, CODE) \
asm(".section .init." #PRI "." #NAME ",\"ax\",@progbits\n\t" \
"call\t" #NAME "\n\t" \
".previous"); \
textstartup optimizesize void NAME(char *rdi, const char *rsi) { \
CODE; \
asm volatile("" : /* no outputs */ : "D"(rdi), "S"(rsi)); \
}
#endif /* INITIALIZER */
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_BITS_INITIALIZER_H_ */

View file

@ -89,7 +89,7 @@ o/$(MODE)/libc/intrin/kprintf.greg.o: private \
-fpie \
-fwrapv \
-x-no-pg \
-mno-fentry \
$(MNO_FENTRY) \
-ffreestanding \
-fno-sanitize=all \
-fno-stack-protector
@ -105,7 +105,7 @@ o/$(MODE)/libc/intrin/_spinlock_debug_4.o: private \
OVERRIDE_CFLAGS += \
-fwrapv \
-x-no-pg \
-mno-fentry \
$(MNO_FENTRY) \
-ffreestanding \
-fno-sanitize=all \
-mgeneral-regs-only \
@ -186,6 +186,10 @@ o/$(MODE)/libc/intrin/memmove.o: private \
OVERRIDE_CFLAGS += \
-fpie
# these assembly files are safe to build on aarch64
o/$(MODE)/libc/intrin/kclocknames.o: libc/intrin/kclocknames.S
@$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $<
LIBC_INTRIN_LIBS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)))
LIBC_INTRIN_HDRS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_HDRS))
LIBC_INTRIN_INCS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_INCS))

View file

@ -28,18 +28,18 @@
.endm
.section .rodata,"a",@progbits
.align 4
.balign 4
.underrun
kClockNames:
.e CLOCK_REALTIME,"REALTIME"
.e CLOCK_REALTIME_FAST,"REALTIME_FAST" # order matters
.e CLOCK_REALTIME_PRECISE,"REALTIME_PRECISE" # order matters
.e CLOCK_REALTIME_COARSE,"REALTIME_COARSE" # order matters
.e CLOCK_REALTIME_FAST,"REALTIME_FAST" // order matters
.e CLOCK_REALTIME_PRECISE,"REALTIME_PRECISE" // order matters
.e CLOCK_REALTIME_COARSE,"REALTIME_COARSE" // order matters
.e CLOCK_MONOTONIC,"MONOTONIC"
.e CLOCK_MONOTONIC_FAST,"MONOTONIC_FAST" # order matters
.e CLOCK_MONOTONIC_RAW,"MONOTONIC_RAW" # order matters
.e CLOCK_MONOTONIC_PRECISE,"MONOTONIC_PRECISE" # order matters
.e CLOCK_MONOTONIC_COARSE,"MONOTONIC_COARSE" # order matters
.e CLOCK_MONOTONIC_FAST,"MONOTONIC_FAST" // order matters
.e CLOCK_MONOTONIC_RAW,"MONOTONIC_RAW" // order matters
.e CLOCK_MONOTONIC_PRECISE,"MONOTONIC_PRECISE" // order matters
.e CLOCK_MONOTONIC_COARSE,"MONOTONIC_COARSE" // order matters
.e CLOCK_PROCESS_CPUTIME_ID,"PROCESS_CPUTIME_ID"
.e CLOCK_THREAD_CPUTIME_ID,"THREAD_CPUTIME_ID"
.e CLOCK_TAI,"TAI"

View file

@ -22,7 +22,7 @@
// @see libc/sysv/dos2errno.sh for the numbers
.section .sort.rodata.dos2errno.1,"a",@progbits
.align 8
.balign 8
kDos2Errno:/*
...decentralized content...
*/.endobj kDos2Errno,globl

View file

@ -28,7 +28,7 @@
.endm
.section .rodata
.align 4
.balign 4
.underrun
kErrnoDocs:
.e EINVAL,"Invalid argument"

View file

@ -28,7 +28,7 @@
.endm
.section .rodata
.align 4
.balign 4
.underrun
kErrnoNames:
.e EINVAL

View file

@ -28,7 +28,7 @@
.endm
.section .rodata,"a",@progbits
.align 4
.balign 4
.underrun
kFcntlCmds:
.e F_GETFD,"GETFD"

View file

@ -28,7 +28,7 @@
.endm
.section .rodata
.align 4
.balign 4
.underrun
kIpOptnames:
.e IP_TOS,"TOS" # int

View file

@ -28,7 +28,7 @@
.endm
.section .rodata
.align 4
.balign 4
.underrun
kOpenFlags:
.e O_RDWR,"RDWR" // order matters

View file

@ -165,6 +165,7 @@ privileged bool kisdangerous(const void *p) {
}
privileged static void klog(const char *b, size_t n) {
#ifdef __x86_64__
int e;
bool cf;
size_t i;
@ -196,6 +197,17 @@ privileged static void klog(const char *b, size_t n) {
: "0"(__NR_write), "1"(2), "2"(b), "3"(n)
: "rcx", "r8", "r9", "r10", "r11", "memory", "cc");
}
#else
register long r0 asm("x0") = (long)2;
register long r1 asm("x1") = (long)b;
register long r2 asm("x2") = (long)n;
register long res_x0 asm("x0");
asm volatile("mov\tx8,%1\n"
"svc\t0"
: "=r"(res_x0)
: "i"(64), "r"(r0), "r"(r1), "r"(r2)
: "x8", "memory");
#endif
}
privileged static size_t kformat(char *b, size_t n, const char *fmt,
@ -798,7 +810,6 @@ privileged size_t kvsnprintf(char *b, size_t n, const char *fmt, va_list v) {
privileged void kvprintf(const char *fmt, va_list v) {
size_t n;
char b[4000];
if (!v) return;
n = kformat(b, sizeof(b), fmt, v);
klog(b, MIN(n, sizeof(b) - 1));
}

View file

@ -28,7 +28,7 @@
.endm
.section .rodata
.align 4
.balign 4
.underrun
kRlimitNames:
.e RLIMIT_AS,"AS"

View file

@ -28,7 +28,7 @@
.endm
.section .rodata
.align 4
.balign 4
.underrun
kSignalNames:
.e SIGHUP,"SIGHUP"

View file

@ -28,7 +28,7 @@
.endm
.section .rodata
.align 4
.balign 4
.underrun
kSockOptnames:
.e SO_DEBUG,"DEBUG" # bool32

View file

@ -28,7 +28,7 @@
.endm
.section .rodata
.align 4
.balign 4
.underrun
kTcpOptnames:
.e TCP_NODELAY,"NODELAY" # bool32

View file

@ -25,7 +25,7 @@
.globl _leaky_start,_leaky_end
.hidden _leaky_start,_leaky_end
.byte 0
.align __SIZEOF_POINTER__
.balign __SIZEOF_POINTER__
.underrun
_leaky_start:
.previous/*

View file

@ -3,9 +3,11 @@
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
/* TODO(jart): DELETE */
intptr_t lockxchg(void *, void *, size_t);
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#if defined(__GNUC__) && !defined(__STRICT_ANSI__) && defined(__x86_64__)
/**
* Exchanges *MEMORY into *LOCALVAR w/ one operation.
*

View file

@ -25,6 +25,8 @@
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
#ifdef __x86_64__
static dontinline antiquity int memcmp_sse(const unsigned char *p,
const unsigned char *q, size_t n) {
unsigned u;
@ -99,6 +101,8 @@ microarchitecture("avx") static int memcmp_avx(const unsigned char *p,
}
}
#endif /* __x86_64__ */
/**
* Compares memory byte by byte.
*
@ -136,6 +140,7 @@ int memcmp(const void *a, const void *b, size_t n) {
const unsigned char *p, *q;
if ((p = a) == (q = b) || !n) return 0;
if ((c = *p - *q)) return c;
#ifdef __x86_64__
if (!IsTiny()) {
if (n <= 16) {
if (n >= 8) {
@ -187,6 +192,7 @@ int memcmp(const void *a, const void *b, size_t n) {
return memcmp_sse(p, q, n);
}
}
#endif /* __x86_64__ */
for (; n; ++p, ++q, --n) {
if ((c = *p - *q)) {
return c;

View file

@ -93,6 +93,8 @@ void *memmove(void *dst, const void *src, size_t n) {
xmm_t v, w, x, y, V, W, X, Y, wut;
d = dst;
s = src;
#ifdef __x86__
if (IsTiny()) {
uint16_t w1, w2;
uint32_t l1, l2;
@ -133,6 +135,8 @@ void *memmove(void *dst, const void *src, size_t n) {
}
return dst;
}
#endif
switch (n) {
case 0:
return d;
@ -208,6 +212,8 @@ void *memmove(void *dst, const void *src, size_t n) {
return d;
default:
if (d == s) return d;
#ifdef __x86__
if (n < kHalfCache3 || !kHalfCache3) {
if (d > s) {
if (IsAsan() || n < 900 || !X86_HAVE(ERMS)) {
@ -280,6 +286,31 @@ void *memmove(void *dst, const void *src, size_t n) {
}
asm("sfence");
}
#else
if (d > s) {
do {
n -= 32;
v = *(const xmm_t *)(s + n);
w = *(const xmm_t *)(s + n + 16);
*(xmm_t *)(d + n) = v;
*(xmm_t *)(d + n + 16) = w;
} while (n >= 32);
} else {
i = 0;
do {
v = *(const xmm_t *)(s + i);
w = *(const xmm_t *)(s + i + 16);
*(xmm_t *)(d + i) = v;
*(xmm_t *)(d + i + 16) = w;
} while ((i += 32) + 32 <= n);
d += i;
s += i;
n -= i;
}
#endif
if (n) {
if (n >= 16) {
v = *(const xmm_t *)s;
@ -305,6 +336,7 @@ void *memmove(void *dst, const void *src, size_t n) {
*d = *s;
}
}
return dst;
}
}

View file

@ -44,6 +44,7 @@ static dontinline antiquity void *memset_sse(char *p, char c, size_t n) {
return p;
}
#ifdef __x86_64__
microarchitecture("avx") static void *memset_avx(char *p, char c, size_t n) {
char *t;
xmm_t v = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
@ -76,6 +77,7 @@ microarchitecture("avx") static void *memset_avx(char *p, char c, size_t n) {
}
return p;
}
#endif /* __x86_64__ */
/**
* Sets memory.
@ -155,11 +157,13 @@ void *memset(void *p, int c, size_t n) {
} while (n);
}
return b;
#ifdef __x86_64__
} else if (IsTiny()) {
asm("rep stosb" : "+D"(b), "+c"(n), "=m"(*(char(*)[n])b) : "0"(p), "a"(c));
return p;
} else if (X86_HAVE(AVX)) {
return memset_avx(b, c, n);
#endif
} else {
return memset_sse(b, c, n);
}

View file

@ -42,6 +42,7 @@
#include "libc/runtime/metalprintf.internal.h"
#include "libc/runtime/pc.internal.h"
#include "libc/runtime/runtime.h"
#ifdef __x86_64__
#define INVERT(x) (BANE + PHYSICAL(x))
#define NOPAGE ((uint64_t)-1)
@ -313,3 +314,5 @@ noasan textreal void __reclaim_boot_pages(struct mman *mm, uint64_t skip_start,
}
mm->frp = p;
}
#endif /* __x86_64__ */

View file

@ -6,7 +6,7 @@ COSMOPOLITAN_C_START_
void mpsadbw(uint16_t[8], const uint8_t[16], const uint8_t[16], uint8_t);
#ifndef __STRICT_ANSI__
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
__intrin_xmm_t __mpsadbws(__intrin_xmm_t, __intrin_xmm_t);
#define mpsadbw(C, B, A, I) \
do { \

View file

@ -22,7 +22,7 @@
//
// @note needs sse4 cf. core c. 2006 cf. bulldozer c. 2011
// @see mpsadbw()
.align 8
.balign 8
__mpsadbws:
i = 0
.rept 8

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.internal.h"
.privileged
.alignfunc
.balignfunc
// Returns 𝑥*𝑦, aborting on overflow.
//

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.internal.h"
.privileged
.alignfunc
.balignfunc
// Returns 𝑥*𝑦, aborting on overflow.
//

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.internal.h"
.privileged
.alignfunc
.balignfunc
// Returns 𝑥*𝑦, aborting on overflow.
//

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.internal.h"
.privileged
.alignfunc
.balignfunc
// Returns -𝑥, aborting on overflow (two's complement bane).
//

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.internal.h"
.privileged
.alignfunc
.balignfunc
// Returns -𝑥, aborting on overflow (two's complement bane).
//

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.internal.h"
.privileged
.alignfunc
.balignfunc
// Returns -𝑥, aborting on overflow.
//

View file

@ -1,7 +1,8 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_NOPL_H_
#define COSMOPOLITAN_LIBC_INTRIN_NOPL_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0) && defined(__GNUC__) && \
!defined(__llvm__) && !defined(__chibicc__) && !defined(__STRICT_ANSI__)
#if !(__ASSEMBLER__ + __LINKER__ + 0) && defined(__x86_64__) && \
defined(__GNUC__) && !defined(__llvm__) && !defined(__chibicc__) && \
!defined(__STRICT_ANSI__)
/**
* @fileoverview Turns CALLs into NOPs that are fixupable at runtime.

View file

@ -18,6 +18,7 @@
*/
#include "libc/nt/struct/teb.h"
#include "libc/runtime/runtime.h"
#ifdef __x86_64__
/**
* Returns New Technology version, e.g.
@ -29,3 +30,5 @@
textwindows noasan int NtGetVersion(void) {
return (NtGetPeb()->OSMajorVersion & 0xff) << 8 | NtGetPeb()->OSMinorVersion;
}
#endif /* __x86_64__ */

View file

@ -7,7 +7,7 @@ COSMOPOLITAN_C_START_
void palignr(void *, const void *, const void *, unsigned long);
#if !defined(__STRICT_ANSI__) && !defined(__chibicc__)
#if !defined(__STRICT_ANSI__) && !defined(__chibicc__) && defined(__x86_64__)
__intrin_xmm_t __palignrs(__intrin_xmm_t, __intrin_xmm_t);
#define palignr(C, B, A, I) \
do { \

View file

@ -22,7 +22,7 @@
//
// @note needs ssse3 cf. prescott c. 2004 cf. bulldozer c. 2011
// @see palignr()
.align 8
.balign 8
__palignrs:
palignr $0,%xmm1,%xmm0
ret

View file

@ -7,7 +7,7 @@ COSMOPOLITAN_C_START_
size_t _countbits(const void *, size_t);
unsigned long popcnt(unsigned long) pureconst;
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#if defined(__GNUC__) && !defined(__STRICT_ANSI__) && defined(__x86_64__)
#define popcnt(X) \
(__builtin_constant_p(X) ? __builtin_popcountll(X) : ({ \
unsigned long PoP = (X); \
@ -18,6 +18,8 @@ unsigned long popcnt(unsigned long) pureconst;
} \
PoP; \
}))
#else
#define popcnt(x) __builtin_popcountll(x)
#endif /* GNUC && !ANSI */
COSMOPOLITAN_C_END_

View file

@ -6,7 +6,7 @@ COSMOPOLITAN_C_START_
void pslldq(uint8_t[16], const uint8_t[16], unsigned long);
#ifndef __STRICT_ANSI__
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
__intrin_xmm_t __pslldqs(__intrin_xmm_t);
#define pslldq(B, A, I) \
do { \

View file

@ -19,7 +19,7 @@
#include "libc/macros.internal.h"
// Jump table for pslldq() with non-constexpr immediate parameter.
.align 8
.balign 8
__pslldqs:
pslldq $0,%xmm0
ret

View file

@ -6,7 +6,7 @@ COSMOPOLITAN_C_START_
void psrldq(uint8_t[16], const uint8_t[16], unsigned long);
#ifndef __STRICT_ANSI__
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
__intrin_xmm_t __psrldqs(__intrin_xmm_t);
#define psrldq(B, A, I) \
do { \

View file

@ -19,7 +19,7 @@
#include "libc/macros.internal.h"
// Jump table for psrldq() with non-constexpr immediate parameter.
.align 8
.balign 8
__psrldqs:
psrldq $0,%xmm0
ret

View file

@ -3,7 +3,7 @@
#include "libc/macros.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
#if !defined(__GNUC__) || defined(__STRICT_ANSI__) || !defined(__x86_64__)
#define pushpop(x) (x)
#else
/**
@ -31,7 +31,7 @@
})
#endif
#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
#if !defined(__GNUC__) || defined(__STRICT_ANSI__) || !defined(__x86_64__)
#define pushmov(d, x) (*(d) = (x))
#else
#define pushmov(d, x) \

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/fsgsbase.h"
#ifdef __x86_64__
/**
* Reads `%fs` base address.
@ -26,3 +27,5 @@
void *(_rdfsbase)(void) {
return _rdfsbase();
}
#endif /* __x86_64__ */

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/fsgsbase.h"
#ifdef __x86_64__
/**
* Reads `%gs` base address.
@ -26,3 +27,5 @@
void *(_rdgsbase)(void) {
return _rdgsbase();
}
#endif /* __x86_64__ */

View file

@ -18,13 +18,13 @@
*/
#include "libc/macros.internal.h"
.align 8
.balign 8
shufpdjt:
i=0
.rept 256
shufpd $i,%xmm1,%xmm0
ret
.align 8
.balign 8
i=i+1
.endr
.endfn shufpdjt,globl

View file

@ -18,13 +18,13 @@
*/
#include "libc/macros.internal.h"
.align 8
.balign 8
shufpsjt:
i=0
.rept 256
shufps $i,%xmm1,%xmm0
ret
.align 8
.balign 8
i=i+1
.endr
.endfn shufpsjt,globl

View file

@ -21,6 +21,7 @@
typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1)));
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
#ifdef __x86_64__
static inline noasan size_t stpcpy_sse2(char *d, const char *s, size_t i) {
xmm_t v, z = {0};
for (;;) {
@ -34,6 +35,7 @@ static inline noasan size_t stpcpy_sse2(char *d, const char *s, size_t i) {
}
return i;
}
#endif
/**
* Copies bytes from 𝑠 to 𝑑 until a NUL is encountered.
@ -45,13 +47,15 @@ static inline noasan size_t stpcpy_sse2(char *d, const char *s, size_t i) {
* @asyncsignalsafe
*/
char *stpcpy(char *d, const char *s) {
size_t i;
for (i = 0; (uintptr_t)(s + i) & 15; ++i) {
size_t i = 0;
#ifdef __x86_64__
for (; (uintptr_t)(s + i) & 15; ++i) {
if (!(d[i] = s[i])) {
return d + i;
}
}
i = stpcpy_sse2(d, s, i);
#endif
for (;;) {
if (!(d[i] = s[i])) {
return d + i;

View file

@ -21,6 +21,7 @@
typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1)));
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
#ifdef __x86_64__
static inline noasan size_t strcpy_sse2(char *d, const char *s, size_t i) {
xmm_t v, z = {0};
for (;;) {
@ -34,6 +35,7 @@ static inline noasan size_t strcpy_sse2(char *d, const char *s, size_t i) {
}
return i;
}
#endif
/**
* Copies bytes from 𝑠 to 𝑑 until a NUL is encountered.
@ -45,13 +47,15 @@ static inline noasan size_t strcpy_sse2(char *d, const char *s, size_t i) {
* @asyncsignalsafe
*/
char *strcpy(char *d, const char *s) {
size_t i;
for (i = 0; (uintptr_t)(s + i) & 15; ++i) {
size_t i = 0;
#ifdef __x86_64__
for (; (uintptr_t)(s + i) & 15; ++i) {
if (!(d[i] = s[i])) {
return d;
}
}
i = strcpy_sse2(d, s, i);
#endif
for (;;) {
if (!(d[i] = s[i])) {
return d;

View file

@ -30,6 +30,7 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
* @asyncsignalsafe
*/
noasan size_t strlen(const char *s) {
#ifdef __x86_64__
size_t n;
xmm_t z = {0};
unsigned m, k = (uintptr_t)s & 15;
@ -39,4 +40,9 @@ noasan size_t strlen(const char *s) {
while (!m) m = __builtin_ia32_pmovmskb128(*++p == z);
n = (const char *)p + __builtin_ctzl(m) - s;
return n;
#else
size_t n = 0;
while (*s++) ++n;
return n;
#endif
}

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.internal.h"
.privileged
.alignfunc
.balignfunc
// Returns 𝑥-𝑦, aborting on overflow.
//

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.internal.h"
.privileged
.alignfunc
.balignfunc
// Returns 𝑥-𝑦, aborting on overflow.
//

View file

@ -18,7 +18,7 @@
*/
#include "libc/macros.internal.h"
.privileged
.alignfunc
.balignfunc
// Returns 𝑥-𝑦, aborting on overflow.
//

View file

@ -25,6 +25,7 @@
__msabi extern typeof(GetCurrentThreadId) *const __imp_GetCurrentThreadId;
privileged int sys_gettid(void) {
#ifdef __x86_64__
int tid;
int64_t wut;
if (IsWindows()) {
@ -61,4 +62,13 @@ privileged int sys_gettid(void) {
tid = __pid;
}
return tid;
#elif defined(__aarch64__)
register long res_x0 asm("x0");
asm volatile("mov\tx8,%1\n"
"svc\t0"
: "=r"(res_x0)
: "i"(178)
: "x8", "memory");
return res_x0;
#endif
}

View file

@ -55,7 +55,7 @@ __syscall__:
.endfn __syscall__,globl,hidden
.bss
.align 8
.balign 8
.Lrcx: .quad 0 # clobbered by syscall
.Lrdi: .quad 0 # just in case
.Lrsi: .quad 0 # just in case

View file

@ -45,7 +45,7 @@ _tpenc: .leafprologue
.endfn _tpenc,globl
.rodata
.align 4
.balign 4
.underrun
kTpenc: .rept 4 # MSB10 (0x7FF)
.byte 1,0b11000000 # len,mark

View file

@ -4,10 +4,11 @@
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef __STRICT_ANSI__
#define _weaken(symbol) \
({ \
asm(".weak\t" #symbol); \
&symbol; \
#define _weaken(symbol) \
({ \
typeof(&symbol) _p = &symbol; \
asm(".weak\t" #symbol : "+r"(_p)); \
_p; \
})
#define _strongaddr(symbolstr) \

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/fsgsbase.h"
#ifdef __x86_64__
/**
* Changes `%fs` base address.
@ -26,3 +27,5 @@
void *(_wrfsbase)(void *p) {
return _wrfsbase(p);
}
#endif /* __x86_64__ */