Port a lot more code to AARCH64

- Introduce epoll_pwait()
- Rewrite -ftrapv and ffs() libraries in C code
- Use more FreeBSD code in math function library
- Get significantly more tests passing on qemu-aarch64
- Fix many Musl long double functions that were broken on AARCH64
This commit is contained in:
Justine Tunney 2023-05-14 09:32:15 -07:00
parent 91791e9f38
commit 550b52abf6
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
158 changed files with 6018 additions and 3499 deletions

View file

@ -8,16 +8,8 @@ DSP_CORE = $(DSP_CORE_A_DEPS) $(DSP_CORE_A)
DSP_CORE_A = o/$(MODE)/dsp/core/core.a
DSP_CORE_A_FILES := $(wildcard dsp/core/*)
DSP_CORE_A_HDRS = $(filter %.h,$(DSP_CORE_A_FILES))
DSP_CORE_A_SRCS_S = $(filter %.S,$(DSP_CORE_A_FILES))
DSP_CORE_A_SRCS_C = $(filter %.c,$(DSP_CORE_A_FILES))
DSP_CORE_A_SRCS = \
$(DSP_CORE_A_SRCS_S) \
$(DSP_CORE_A_SRCS_C)
DSP_CORE_A_OBJS = \
$(DSP_CORE_A_SRCS_S:%.S=o/$(MODE)/%.o) \
$(DSP_CORE_A_SRCS_C:%.c=o/$(MODE)/%.o)
DSP_CORE_A_SRCS = $(filter %.c,$(DSP_CORE_A_FILES))
DSP_CORE_A_OBJS = $(DSP_CORE_A_SRCS:%.c=o/$(MODE)/%.o)
DSP_CORE_A_CHECKS = \
$(DSP_CORE_A).pkg \

View file

@ -1,7 +1,7 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Copyright 2023 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
@ -16,24 +16,31 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "dsp/core/core.h"
#include "libc/limits.h"
#include "libc/macros.internal.h"
.balign 16
#include "third_party/aarch64/arm_neon.h"
#include "third_party/intel/emmintrin.internal.h"
// Mixes audio.
//
// @param rdi is # aligned int16[16] sample chunks to process
// @param rsi points to aligned pcm s16le input/output memory
// @param rdx points to aligned pcm s16le [0..1] input memory
sad16x8n:
.leafprologue
.profilable
test %rdi,%rdi
jz 1f
shl $3,%rdi
0: sub $8,%rdi
movdqa (%rsi,%rdi,2),%xmm0
paddsw (%rdx,%rdi,2),%xmm0
movdqa %xmm0,(%rsi,%rdi,2)
jnz 0b
1: .leafepilogue
.endfn sad16x8n,globl,hidden
/**
* Mixes audio.
*
* This function performs saturated addition on an array of shorts.
*
* @param x needs to be 16-byte aligned
* @param y needs to be 16-byte aligned
*/
void sad16x8n(size_t n, short x[n][8], const short y[n][8]) {
size_t i, j;
for (i = 0; i < n; ++i) {
#ifdef __x86_64__
*(__m128i *)x[i] = _mm_adds_epi16(*(__m128i *)x[i], *(__m128i *)y[i]);
#elif defined(__aarch64__)
*(int16x4_t *)x[i] = vqadd_s16(*(int16x4_t *)x[i], *(int16x4_t *)y[i]);
#else
for (j = 0; j < 8; ++j) {
x[i][j] = MIN(MAX(x[i][j] + y[i][j], INT16_MIN), INT16_MAX);
}
#endif
}
}