Make zlib go faster

This commit is contained in:
Justine Tunney 2022-09-18 06:30:45 -07:00
parent be29b709b7
commit 224c12f54d
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
6 changed files with 16 additions and 43 deletions

View file

@ -108,12 +108,6 @@ forceinline pureconst bool IsArenaFrame(int x) {
return 0x5004 <= x && x <= 0x7ffb;
}
forceinline pureconst bool IsKernelFrame(int x) {
intptr_t stack = GetStaticStackAddr(0);
return (int)(stack >> 16) <= x &&
x <= (int)((stack + (GetStackSize() - FRAMESIZE)) >> 16);
}
forceinline pureconst bool IsStaticStackFrame(int x) {
intptr_t stack = GetStaticStackAddr(0);
return (int)(stack >> 16) <= x &&

View file

@ -26,4 +26,5 @@ ORIGIN
LOCAL CHANGES
- Changed Trace() calls to use kprintf()
- Use X86_HAVE() for runtime dispatching
- We use our own crc32() implementation from LIBC_STR

View file

@ -2,6 +2,7 @@
* Copyright (C) 1995-2011, 2016 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "libc/nexgen32e/x86feature.h"
#include "third_party/zlib/macros.internal.h"
#include "third_party/zlib/zconf.h"
#include "third_party/zlib/zutil.internal.h"
@ -62,10 +63,7 @@ local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
# define MOD63(a) a %= BASE
#endif
//# include "cpu_features.h"
#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON)
# include "adler32_simd.h"
#endif
uint32_t ZLIB_INTERNAL adler32_simd_(uint32_t, const unsigned char *, z_size_t);
/* ========================================================================= */
uLong ZEXPORT adler32_z(adler, buf, len)
@ -77,7 +75,7 @@ uLong ZEXPORT adler32_z(adler, buf, len)
unsigned n;
#if defined(ADLER32_SIMD_SSSE3)
if (buf != Z_NULL && len >= 64 && x86_cpu_enable_ssse3)
if (buf != Z_NULL && len >= 64 && X86_HAVE(SSSE3))
return adler32_simd_(adler, buf, len);
#elif defined(ADLER32_SIMD_NEON)
if (buf != Z_NULL && len >= 64)
@ -99,24 +97,9 @@ uLong ZEXPORT adler32_z(adler, buf, len)
return adler | (sum2 << 16);
}
#if defined(ADLER32_SIMD_SSSE3)
/*
* Use SSSE3 to compute the adler32. Since this routine can be
* freely used, check CPU features here. zlib convention is to
* call adler32(0, NULL, 0), before making calls to adler32().
* So this is a good early (and infrequent) place to cache CPU
* features for those later, more interesting adler32() calls.
*/
if (buf == Z_NULL) {
if (!len) /* Assume user is calling adler32(0, NULL, 0); */
cpu_check_features();
return 1L;
}
#else
/* initial Adler-32 value (deferred check for len == 1 speed) */
if (buf == Z_NULL)
return 1L;
#endif
/* in case short lengths are provided, keep it somewhat fast */
if (len < 16) {

View file

@ -243,14 +243,6 @@ int ZEXPORT deflateInit2(strm, level, method, windowBits, memLevel, strategy)
deflate_state *s;
int wrap = 1;
// Needed to activate optimized insert_string() that helps compression
// for all wrapper formats (e.g. RAW, ZLIB, GZIP).
// Feature detection is not triggered while using RAW mode (i.e. we never
// call crc32() with a NULL buffer).
#if defined(CRC32_ARMV8_CRC32) || defined(CRC32_SIMD_SSE42_PCLMUL)
cpu_check_features();
#endif
if (strm == Z_NULL) return Z_STREAM_ERROR;
strm->msg = Z_NULL;
@ -306,7 +298,7 @@ int ZEXPORT deflateInit2(strm, level, method, windowBits, memLevel, strategy)
s->chromium_zlib_hash = 0;
#if !defined(USE_ZLIB_RABIN_KARP_ROLLING_HASH)
#if defined(TARGET_CPU_WITH_CRC) && defined(CRC32_SIMD_SSE42_PCLMUL)
if (x86_cpu_enable_simd)
if (X86_HAVE(SSE4_2))
s->chromium_zlib_hash = 1;
#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_ARMV8_CRC32)
if (arm_cpu_enable_crc32)

View file

@ -1,5 +1,6 @@
#ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_INSERT_STRING_H_
#define COSMOPOLITAN_THIRD_PARTY_ZLIB_INSERT_STRING_H_
#include "libc/nexgen32e/x86feature.h"
#include "third_party/zlib/deflate.internal.h"
#include "third_party/zlib/zutil.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
@ -16,7 +17,7 @@ COSMOPOLITAN_C_START_
// clang-format off
#if defined(CRC32_SIMD_SSE42_PCLMUL)
#include <smmintrin.h> /* Required to make MSVC bot build pass. */
// #include <smmintrin.h> /* Required to make MSVC bot build pass. */
#if defined(__clang__) || defined(__GNUC__)
#define TARGET_CPU_WITH_CRC __attribute__((target("sse4.2")))
@ -50,7 +51,6 @@ COSMOPOLITAN_C_START_
#if defined(TARGET_CPU_WITH_CRC)
TARGET_CPU_WITH_CRC
local INLINE Pos insert_string_simd(deflate_state* const s, const Pos str) {
Pos ret;
unsigned *ip, val, h = 0;
@ -61,7 +61,7 @@ local INLINE Pos insert_string_simd(deflate_state* const s, const Pos str) {
if (s->level >= 6) val &= 0xFFFFFF;
/* Compute hash from the CRC32C of |val|. */
h = _cpu_crc32c_hash_u32(h, val);
asm("crc32\t%1,%0" : "+r"(h) : "rm"(val));
ret = s->head[h & s->hash_mask];
s->head[h & s->hash_mask] = str;
@ -129,7 +129,7 @@ local INLINE Pos insert_string(deflate_state* const s, const Pos str) {
* the Rabin-Karp hash instead.
*/ /* FALLTHROUGH Rabin-Karp */
#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_SIMD_SSE42_PCLMUL)
if (x86_cpu_enable_simd) return insert_string_simd(s, str);
if (X86_HAVE(SSE4_2)) return insert_string_simd(s, str);
#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_ARMV8_CRC32)
if (arm_cpu_enable_crc32) return insert_string_simd(s, str);
#endif

View file

@ -45,10 +45,13 @@ o/$(MODE)/third_party/zlib/adler32simd.o: private \
OVERRIDE_CFLAGS += \
-mssse3
o/$(MODE)/third_party/zlib/crcfold.o: private \
OVERRIDE_CFLAGS += \
-mpclmul \
-mssse3
o/$(MODE)/third_party/zlib/adler32.o: private \
OVERRIDE_CPPFLAGS += \
-DADLER32_SIMD_SSSE3
o/$(MODE)/third_party/zlib/deflate.o: private \
OVERRIDE_CPPFLAGS += \
-DCRC32_SIMD_SSE42_PCLMUL
$(THIRD_PARTY_ZLIB_A_OBJS): private \
OVERRIDE_CFLAGS += \