Make improvements

- Emulator can now test the αcτµαlly pδrταblε εxεcµταblε bootloader

- Whipped up a webserver named redbean. It services 150k requests per
  second on a single core. Bundling assets inside zip enables extremely
  fast serving for two reasons. The first is that zip central directory
  lookups go faster than stat() system calls. The second is that both
  zip and gzip content-encoding use DEFLATE, therefore, compressed
  responses can be served via the sendfile() system call which does an
  in-kernel copy directly from the zip executable structure. Also note
  that red bean zip executables can be deployed easily to all platforms,
  since these native executables work on Linux, Mac, BSD, and Windows.

- Address sanitizer now works very well
This commit is contained in:
Justine Tunney 2020-09-06 21:39:00 -07:00
parent 7327c345f9
commit 416fd86676
230 changed files with 9835 additions and 5682 deletions

45
libc/intrin/mpsadbw.c Normal file
View file

@ -0,0 +1,45 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/intrin/mpsadbw.h"
#include "libc/macros.h"
#include "libc/str/str.h"
/**
* Computes multiple sum of absolute differences.
*
* This appears to be intended for video encoding motion estimation. It
* can be combined with phminposuw. That allows us to search for an int
* overlapping inside 𝑏 that's nearest to an aligned int in 𝑎.
*
* @note goes fast w/ sse4 cf. core c. 2006 cf. bulldozer c. 2011
* @mayalias
*/
void(mpsadbw)(uint16_t c[8], const uint8_t b[16], const uint8_t a[16],
uint8_t control) {
unsigned i, j;
uint16_t r[8];
for (i = 0; i < 8; ++i) {
r[i] = 0;
for (j = 0; j < 4; ++j) {
r[i] += ABS(b[(control & 4) + i + j] - a[(control & 3) * 4 + j]);
}
}
memcpy(c, r, 16);
}

41
libc/intrin/mpsadbw.h Normal file
View file

@ -0,0 +1,41 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_MPSADBW_H_
#define COSMOPOLITAN_LIBC_INTRIN_MPSADBW_H_
#include "libc/intrin/macros.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
void mpsadbw(uint16_t[8], const uint8_t[16], const uint8_t[16], uint8_t);
#ifndef __STRICT_ANSI__
__intrin_xmm_t __mpsadbws(__intrin_xmm_t, __intrin_xmm_t);
#define mpsadbw(C, B, A, I) \
do { \
if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE4_1))) { \
__intrin_xmm_t *Xmm0 = (void *)(C); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
if (isconstant(I)) { \
if (!X86_NEED(AVX)) { \
asm("mpsadbw\t%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
} else { \
asm("vmpsadbw\t%3,%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
} \
} else { \
unsigned long Vimm = (I); \
typeof(__mpsadbws) *Fn; \
Fn = (typeof(__mpsadbws) *)((uintptr_t)&__mpsadbws + (Vimm & 7) * 8); \
*Xmm0 = Fn(*Xmm1, *Xmm2); \
} \
} else { \
mpsadbw(C, B, A, I); \
} \
} while (0)
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_MPSADBW_H_ */

35
libc/intrin/mpsadbws.S Normal file
View file

@ -0,0 +1,35 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify │
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License. │
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of │
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software │
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
/ Jump table for mpsadbw() with non-constexpr immediate parameter.
/
/ @note needs sse4 cf. core c. 2006 cf. bulldozer c. 2011
/ @see mpsadbw()
.align 8
__mpsadbws:
i = 0
.rept 8
mpsadbw $i,%xmm1,%xmm0
ret
nop
i = i + 1
.endr
.endfn __mpsadbws,globl

View file

@ -14,7 +14,7 @@ forceinline void *repstosb(void *dest, unsigned char al, size_t cx) {
void *Di = (DI); \
size_t Cx = (CX); \
unsigned char Al = (AL); \
asm("rep stosb" \
asm("rep stosb %b5,(%0)" \
: "=D"(Di), "=c"(Cx), "=m"(*(char(*)[Cx])Di) \
: "0"(Di), "1"(Cx), "a"(Al)); \
Di; \