mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-14 06:59:10 +00:00
Make considerably more progress on AARCH64
- Utilities like pledge.com now build - kprintf() will no longer balk at 48-bit addresses - There's a new aarch64-dbg build mode that should work - gc() and defer() are mostly pacified; avoid using them on aarch64 - THIRD_PART_STB now has Arm Neon intrinsics for fast image handling
This commit is contained in:
parent
1bfb3aab1b
commit
fd34ef732d
91 changed files with 1288 additions and 1192 deletions
|
@ -54,9 +54,16 @@ $(DSP_TTY_A).pkg: \
|
|||
$(foreach x,$(DSP_TTY_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
o/$(MODE)/dsp/tty/ttyraster.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
OVERRIDE_CFLAGS += \
|
||||
$(MATHEMATICAL)
|
||||
|
||||
ifeq ($(ARCH), aarch64)
|
||||
# takes 14 seconds to compile with aarch64 gcc
|
||||
o/$(MODE)/dsp/tty/ttyraster.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-O1
|
||||
endif
|
||||
|
||||
DSP_TTY_LIBS = $(foreach x,$(DSP_TTY_ARTIFACTS),$($(x)))
|
||||
DSP_TTY_SRCS = $(foreach x,$(DSP_TTY_ARTIFACTS),$($(x)_SRCS))
|
||||
DSP_TTY_HDRS = $(foreach x,$(DSP_TTY_ARTIFACTS),$($(x)_HDRS))
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
|
@ -16,26 +16,26 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "dsp/tty/windex.h"
|
||||
#include "dsp/tty/tty.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Dispatches to fastest windex() implementation.
|
||||
.initbss 300,_init_windex
|
||||
windex: .quad 0
|
||||
.endobj windex,globl
|
||||
.previous
|
||||
extern unsigned windex_avx2(uint16_t *, size_t);
|
||||
extern unsigned windex_sse4(uint16_t *, size_t);
|
||||
extern unsigned windex_k8(uint16_t *, size_t);
|
||||
|
||||
.init.start 300,_init_windex
|
||||
ezlea windex_avx2,ax
|
||||
#if !X86_NEED(AVX2)
|
||||
ezlea windex_sse4,dx
|
||||
testb X86_HAVE(AVX2)+kCpuids(%rip)
|
||||
cmovz %rdx,%rax
|
||||
#endif /* AVX2 */
|
||||
#if !X86_NEED(SSE4_2)
|
||||
ezlea windex_k8,dx
|
||||
testb X86_HAVE(SSE4_2)+kCpuids(%rip)
|
||||
cmovz %rdx,%rax
|
||||
#endif /* SSE4 */
|
||||
stosq
|
||||
.init.end 300,_init_windex
|
||||
unsigned (*windex)(uint16_t *, size_t);
|
||||
|
||||
__attribute__((__constructor__)) static void init_windex(void) {
|
||||
#ifdef __x86_64__
|
||||
if (X86_HAVE(AVX2)) {
|
||||
windex = windex_avx2;
|
||||
} else if (X86_HAVE(SSE4_2)) {
|
||||
windex = windex_sse4;
|
||||
} else {
|
||||
windex = windex_k8;
|
||||
}
|
||||
#else
|
||||
windex = windex_k8;
|
||||
#endif
|
||||
}
|
|
@ -3,7 +3,7 @@
|
|||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
extern unsigned (*const windex)(uint16_t *, size_t);
|
||||
extern unsigned (*windex)(uint16_t *, size_t);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue