Make AARCH64 harder, better, faster, stronger

- Perform some housekeeping on scalar math function code
- Import ARM's Optimized Routines for SIMD string processing
- Upgrade to latest Chromium zlib and enable more SIMD optimizations
This commit is contained in:
Justine Tunney 2023-05-15 01:51:29 -07:00
parent 550b52abf6
commit cc1732bc42
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
143 changed files with 15661 additions and 1329 deletions

View file

@ -5,15 +5,11 @@
* Copyright (C) 1995-2022 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "third_party/zlib/inffast.internal.h"
#include "third_party/zlib/inflate.internal.h"
#include "third_party/zlib/inftrees.internal.h"
#include "third_party/zlib/internal.h"
asm(".ident\t\"\\n\\n\
zlib (zlib License)\\n\
Copyright 1995-2017 Jean-loup Gailly and Mark Adler\"");
asm(".include \"libc/disclaimer.inc\"");
zlib 1.2.13 (zlib License)\\n\
Copyright 1995-2022 Jean-loup Gailly and Mark Adler\\n\
Invented 1990 Phillip Walter Katz\"");
// clang-format off
/*
@ -93,6 +89,13 @@ asm(".include \"libc/disclaimer.inc\"");
* The history for versions after 1.2.0 are in ChangeLog in zlib distribution.
*/
#include "third_party/zlib/zutil.internal.h"
#include "third_party/zlib/inftrees.internal.h"
#include "third_party/zlib/inflate.internal.h"
#include "third_party/zlib/inffast_chunk.internal.h"
#include "third_party/zlib/internal.h"
#include "third_party/zlib/chunkcopy.inc"
#ifdef MAKEFIXED
# ifndef BUILDFIXED
# define BUILDFIXED
@ -176,6 +179,8 @@ int windowBits;
/* extract wrap request from windowBits parameter */
if (windowBits < 0) {
if (windowBits < -15)
return Z_STREAM_ERROR;
wrap = 0;
windowBits = -windowBits;
}
@ -322,7 +327,14 @@ struct inflate_state FAR *state;
}
#ifdef MAKEFIXED
#include <stdio.h>
#include "libc/calls/calls.h"
#include "libc/calls/dprintf.h"
#include "libc/calls/weirdtypes.h"
#include "libc/fmt/fmt.h"
#include "libc/mem/fmt.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/temp.h"
#include "third_party/musl/tempnam.h"
/*
Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also
@ -408,10 +420,20 @@ unsigned copy;
/* if it hasn't been done already, allocate space for the window */
if (state->window == Z_NULL) {
unsigned wsize = 1U << state->wbits;
state->window = (unsigned char FAR *)
ZALLOC(strm, 1U << state->wbits,
ZALLOC(strm, wsize + CHUNKCOPY_CHUNK_SIZE,
sizeof(unsigned char));
if (state->window == Z_NULL) return 1;
#ifdef INFLATE_CLEAR_UNUSED_UNDEFINED
/* Copies from the overflow portion of this buffer are undefined and
may cause analysis tools to raise a warning if we don't initialize
it. However, this undefined data overwrites other undefined data
and is subsequently either overwritten or left deliberately
undefined at the end of decode; so there's really no point.
*/
zmemzero(state->window + wsize, CHUNKCOPY_CHUNK_SIZE);
#endif
}
/* if window not in use yet, initialize */
@ -1065,7 +1087,7 @@ int flush;
if (have >= INFLATE_FAST_MIN_INPUT &&
left >= INFLATE_FAST_MIN_OUTPUT) {
RESTORE();
inflate_fast(strm, out);
inflate_fast_chunk_(strm, out);
LOAD();
if (state->mode == TYPE)
state->back = -1;
@ -1200,17 +1222,16 @@ int flush;
else
from = state->window + (state->wnext - copy);
if (copy > state->length) copy = state->length;
if (copy > left) copy = left;
put = chunkcopy_safe(put, from, copy, put + left);
}
else { /* copy from output */
from = put - state->offset;
copy = state->length;
if (copy > left) copy = left;
put = chunkcopy_lapped_safe(put, state->offset, copy, put + left);
}
if (copy > left) copy = left;
left -= copy;
state->length -= copy;
do {
*put++ = *from++;
} while (--copy);
if (state->length == 0) state->mode = LEN;
break;
case LIT:
@ -1279,6 +1300,29 @@ int flush;
Note: a memory error from inflate() is non-recoverable.
*/
inf_leave:
#if defined(ZLIB_DEBUG)
/* XXX(cavalcantii): I put this in place back in 2017 to help debug faulty
* client code relying on undefined behavior when chunk_copy first landed.
*
* It is save to say after all these years that Chromium code is well
* behaved and works fine with the optimization, therefore we can enable
* this only for DEBUG builds.
*
* We write a defined value in the unused space to help mark
* where the stream has ended. We don't use zeros as that can
* mislead clients relying on undefined behavior (i.e. assuming
* that the data is over when the buffer has a zero/null value).
*
* The basic idea is that if client code is not relying on the zlib context
* to inform the amount of decompressed data, but instead reads the output
* buffer until a zero/null is found, it will fail faster and harder
* when the remaining of the buffer is marked with a symbol (e.g. 0x55).
*/
if (left >= CHUNKCOPY_CHUNK_SIZE)
memset(put, 0x55, CHUNKCOPY_CHUNK_SIZE);
else
memset(put, 0x55, left);
#endif
RESTORE();
if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
(state->mode < CHECK || flush != Z_FINISH)))