mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-06-28 07:18:30 +00:00
Port a lot more code to AARCH64
- Introduce epoll_pwait() - Rewrite -ftrapv and ffs() libraries in C code - Use more FreeBSD code in math function library - Get significantly more tests passing on qemu-aarch64 - Fix many Musl long double functions that were broken on AARCH64
This commit is contained in:
parent
91791e9f38
commit
550b52abf6
158 changed files with 6018 additions and 3499 deletions
|
@ -8,16 +8,8 @@ DSP_CORE = $(DSP_CORE_A_DEPS) $(DSP_CORE_A)
|
|||
DSP_CORE_A = o/$(MODE)/dsp/core/core.a
|
||||
DSP_CORE_A_FILES := $(wildcard dsp/core/*)
|
||||
DSP_CORE_A_HDRS = $(filter %.h,$(DSP_CORE_A_FILES))
|
||||
DSP_CORE_A_SRCS_S = $(filter %.S,$(DSP_CORE_A_FILES))
|
||||
DSP_CORE_A_SRCS_C = $(filter %.c,$(DSP_CORE_A_FILES))
|
||||
|
||||
DSP_CORE_A_SRCS = \
|
||||
$(DSP_CORE_A_SRCS_S) \
|
||||
$(DSP_CORE_A_SRCS_C)
|
||||
|
||||
DSP_CORE_A_OBJS = \
|
||||
$(DSP_CORE_A_SRCS_S:%.S=o/$(MODE)/%.o) \
|
||||
$(DSP_CORE_A_SRCS_C:%.c=o/$(MODE)/%.o)
|
||||
DSP_CORE_A_SRCS = $(filter %.c,$(DSP_CORE_A_FILES))
|
||||
DSP_CORE_A_OBJS = $(DSP_CORE_A_SRCS:%.c=o/$(MODE)/%.o)
|
||||
|
||||
DSP_CORE_A_CHECKS = \
|
||||
$(DSP_CORE_A).pkg \
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
|
@ -16,24 +16,31 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "dsp/core/core.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.internal.h"
|
||||
.balign 16
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/intel/emmintrin.internal.h"
|
||||
|
||||
// Mixes audio.
|
||||
//
|
||||
// @param rdi is # aligned int16[16] sample chunks to process
|
||||
// @param rsi points to aligned pcm s16le input/output memory
|
||||
// @param rdx points to aligned pcm s16le [0..1] input memory
|
||||
sad16x8n:
|
||||
.leafprologue
|
||||
.profilable
|
||||
test %rdi,%rdi
|
||||
jz 1f
|
||||
shl $3,%rdi
|
||||
0: sub $8,%rdi
|
||||
movdqa (%rsi,%rdi,2),%xmm0
|
||||
paddsw (%rdx,%rdi,2),%xmm0
|
||||
movdqa %xmm0,(%rsi,%rdi,2)
|
||||
jnz 0b
|
||||
1: .leafepilogue
|
||||
.endfn sad16x8n,globl,hidden
|
||||
/**
|
||||
* Mixes audio.
|
||||
*
|
||||
* This function performs saturated addition on an array of shorts.
|
||||
*
|
||||
* @param x needs to be 16-byte aligned
|
||||
* @param y needs to be 16-byte aligned
|
||||
*/
|
||||
void sad16x8n(size_t n, short x[n][8], const short y[n][8]) {
|
||||
size_t i, j;
|
||||
for (i = 0; i < n; ++i) {
|
||||
#ifdef __x86_64__
|
||||
*(__m128i *)x[i] = _mm_adds_epi16(*(__m128i *)x[i], *(__m128i *)y[i]);
|
||||
#elif defined(__aarch64__)
|
||||
*(int16x4_t *)x[i] = vqadd_s16(*(int16x4_t *)x[i], *(int16x4_t *)y[i]);
|
||||
#else
|
||||
for (j = 0; j < 8; ++j) {
|
||||
x[i][j] = MIN(MAX(x[i][j] + y[i][j], INT16_MIN), INT16_MAX);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue