Make considerably more progress on AARCH64

- Utilities like pledge.com now build
- kprintf() will no longer balk at 48-bit addresses
- There's a new aarch64-dbg build mode that should work
- gc() and defer() are mostly pacified; avoid using them on aarch64
- THIRD_PART_STB now has Arm Neon intrinsics for fast image handling
This commit is contained in:
Justine Tunney 2023-05-12 22:42:57 -07:00
parent 1bfb3aab1b
commit fd34ef732d
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
91 changed files with 1288 additions and 1192 deletions

View file

@ -54,9 +54,16 @@ $(DSP_TTY_A).pkg: \
$(foreach x,$(DSP_TTY_A_DIRECTDEPS),$($(x)_A).pkg)
o/$(MODE)/dsp/tty/ttyraster.o: private \
OVERRIDE_CFLAGS += \
OVERRIDE_CFLAGS += \
$(MATHEMATICAL)
ifeq ($(ARCH), aarch64)
# takes 14 seconds to compile with aarch64 gcc
o/$(MODE)/dsp/tty/ttyraster.o: private \
OVERRIDE_CFLAGS += \
-O1
endif
DSP_TTY_LIBS = $(foreach x,$(DSP_TTY_ARTIFACTS),$($(x)))
DSP_TTY_SRCS = $(foreach x,$(DSP_TTY_ARTIFACTS),$($(x)_SRCS))
DSP_TTY_HDRS = $(foreach x,$(DSP_TTY_ARTIFACTS),$($(x)_HDRS))

View file

@ -1,7 +1,7 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Copyright 2023 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
@ -16,26 +16,26 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "dsp/tty/windex.h"
#include "dsp/tty/tty.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/macros.internal.h"
// Dispatches to fastest windex() implementation.
.initbss 300,_init_windex
windex: .quad 0
.endobj windex,globl
.previous
extern unsigned windex_avx2(uint16_t *, size_t);
extern unsigned windex_sse4(uint16_t *, size_t);
extern unsigned windex_k8(uint16_t *, size_t);
.init.start 300,_init_windex
ezlea windex_avx2,ax
#if !X86_NEED(AVX2)
ezlea windex_sse4,dx
testb X86_HAVE(AVX2)+kCpuids(%rip)
cmovz %rdx,%rax
#endif /* AVX2 */
#if !X86_NEED(SSE4_2)
ezlea windex_k8,dx
testb X86_HAVE(SSE4_2)+kCpuids(%rip)
cmovz %rdx,%rax
#endif /* SSE4 */
stosq
.init.end 300,_init_windex
unsigned (*windex)(uint16_t *, size_t);
__attribute__((__constructor__)) static void init_windex(void) {
#ifdef __x86_64__
if (X86_HAVE(AVX2)) {
windex = windex_avx2;
} else if (X86_HAVE(SSE4_2)) {
windex = windex_sse4;
} else {
windex = windex_k8;
}
#else
windex = windex_k8;
#endif
}

View file

@ -3,7 +3,7 @@
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
extern unsigned (*const windex)(uint16_t *, size_t);
extern unsigned (*windex)(uint16_t *, size_t);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */