mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-03-03 07:29:23 +00:00
Make zlib go faster
This commit is contained in:
parent
be29b709b7
commit
224c12f54d
6 changed files with 16 additions and 43 deletions
|
@ -108,12 +108,6 @@ forceinline pureconst bool IsArenaFrame(int x) {
|
|||
return 0x5004 <= x && x <= 0x7ffb;
|
||||
}
|
||||
|
||||
forceinline pureconst bool IsKernelFrame(int x) {
|
||||
intptr_t stack = GetStaticStackAddr(0);
|
||||
return (int)(stack >> 16) <= x &&
|
||||
x <= (int)((stack + (GetStackSize() - FRAMESIZE)) >> 16);
|
||||
}
|
||||
|
||||
forceinline pureconst bool IsStaticStackFrame(int x) {
|
||||
intptr_t stack = GetStaticStackAddr(0);
|
||||
return (int)(stack >> 16) <= x &&
|
||||
|
|
1
third_party/zlib/README.cosmo
vendored
1
third_party/zlib/README.cosmo
vendored
|
@ -26,4 +26,5 @@ ORIGIN
|
|||
LOCAL CHANGES
|
||||
|
||||
- Changed Trace() calls to use kprintf()
|
||||
- Use X86_HAVE() for runtime dispatching
|
||||
- We use our own crc32() implementation from LIBC_STR
|
||||
|
|
23
third_party/zlib/adler32.c
vendored
23
third_party/zlib/adler32.c
vendored
|
@ -2,6 +2,7 @@
|
|||
* Copyright (C) 1995-2011, 2016 Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "third_party/zlib/macros.internal.h"
|
||||
#include "third_party/zlib/zconf.h"
|
||||
#include "third_party/zlib/zutil.internal.h"
|
||||
|
@ -62,10 +63,7 @@ local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
|
|||
# define MOD63(a) a %= BASE
|
||||
#endif
|
||||
|
||||
//# include "cpu_features.h"
|
||||
#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON)
|
||||
# include "adler32_simd.h"
|
||||
#endif
|
||||
uint32_t ZLIB_INTERNAL adler32_simd_(uint32_t, const unsigned char *, z_size_t);
|
||||
|
||||
/* ========================================================================= */
|
||||
uLong ZEXPORT adler32_z(adler, buf, len)
|
||||
|
@ -77,7 +75,7 @@ uLong ZEXPORT adler32_z(adler, buf, len)
|
|||
unsigned n;
|
||||
|
||||
#if defined(ADLER32_SIMD_SSSE3)
|
||||
if (buf != Z_NULL && len >= 64 && x86_cpu_enable_ssse3)
|
||||
if (buf != Z_NULL && len >= 64 && X86_HAVE(SSSE3))
|
||||
return adler32_simd_(adler, buf, len);
|
||||
#elif defined(ADLER32_SIMD_NEON)
|
||||
if (buf != Z_NULL && len >= 64)
|
||||
|
@ -99,24 +97,9 @@ uLong ZEXPORT adler32_z(adler, buf, len)
|
|||
return adler | (sum2 << 16);
|
||||
}
|
||||
|
||||
#if defined(ADLER32_SIMD_SSSE3)
|
||||
/*
|
||||
* Use SSSE3 to compute the adler32. Since this routine can be
|
||||
* freely used, check CPU features here. zlib convention is to
|
||||
* call adler32(0, NULL, 0), before making calls to adler32().
|
||||
* So this is a good early (and infrequent) place to cache CPU
|
||||
* features for those later, more interesting adler32() calls.
|
||||
*/
|
||||
if (buf == Z_NULL) {
|
||||
if (!len) /* Assume user is calling adler32(0, NULL, 0); */
|
||||
cpu_check_features();
|
||||
return 1L;
|
||||
}
|
||||
#else
|
||||
/* initial Adler-32 value (deferred check for len == 1 speed) */
|
||||
if (buf == Z_NULL)
|
||||
return 1L;
|
||||
#endif
|
||||
|
||||
/* in case short lengths are provided, keep it somewhat fast */
|
||||
if (len < 16) {
|
||||
|
|
10
third_party/zlib/deflate.c
vendored
10
third_party/zlib/deflate.c
vendored
|
@ -243,14 +243,6 @@ int ZEXPORT deflateInit2(strm, level, method, windowBits, memLevel, strategy)
|
|||
deflate_state *s;
|
||||
int wrap = 1;
|
||||
|
||||
// Needed to activate optimized insert_string() that helps compression
|
||||
// for all wrapper formats (e.g. RAW, ZLIB, GZIP).
|
||||
// Feature detection is not triggered while using RAW mode (i.e. we never
|
||||
// call crc32() with a NULL buffer).
|
||||
#if defined(CRC32_ARMV8_CRC32) || defined(CRC32_SIMD_SSE42_PCLMUL)
|
||||
cpu_check_features();
|
||||
#endif
|
||||
|
||||
if (strm == Z_NULL) return Z_STREAM_ERROR;
|
||||
|
||||
strm->msg = Z_NULL;
|
||||
|
@ -306,7 +298,7 @@ int ZEXPORT deflateInit2(strm, level, method, windowBits, memLevel, strategy)
|
|||
s->chromium_zlib_hash = 0;
|
||||
#if !defined(USE_ZLIB_RABIN_KARP_ROLLING_HASH)
|
||||
#if defined(TARGET_CPU_WITH_CRC) && defined(CRC32_SIMD_SSE42_PCLMUL)
|
||||
if (x86_cpu_enable_simd)
|
||||
if (X86_HAVE(SSE4_2))
|
||||
s->chromium_zlib_hash = 1;
|
||||
#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_ARMV8_CRC32)
|
||||
if (arm_cpu_enable_crc32)
|
||||
|
|
8
third_party/zlib/insert_string.internal.h
vendored
8
third_party/zlib/insert_string.internal.h
vendored
|
@ -1,5 +1,6 @@
|
|||
#ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_INSERT_STRING_H_
|
||||
#define COSMOPOLITAN_THIRD_PARTY_ZLIB_INSERT_STRING_H_
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "third_party/zlib/deflate.internal.h"
|
||||
#include "third_party/zlib/zutil.internal.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
@ -16,7 +17,7 @@ COSMOPOLITAN_C_START_
|
|||
|
||||
// clang-format off
|
||||
#if defined(CRC32_SIMD_SSE42_PCLMUL)
|
||||
#include <smmintrin.h> /* Required to make MSVC bot build pass. */
|
||||
// #include <smmintrin.h> /* Required to make MSVC bot build pass. */
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define TARGET_CPU_WITH_CRC __attribute__((target("sse4.2")))
|
||||
|
@ -50,7 +51,6 @@ COSMOPOLITAN_C_START_
|
|||
|
||||
#if defined(TARGET_CPU_WITH_CRC)
|
||||
|
||||
TARGET_CPU_WITH_CRC
|
||||
local INLINE Pos insert_string_simd(deflate_state* const s, const Pos str) {
|
||||
Pos ret;
|
||||
unsigned *ip, val, h = 0;
|
||||
|
@ -61,7 +61,7 @@ local INLINE Pos insert_string_simd(deflate_state* const s, const Pos str) {
|
|||
if (s->level >= 6) val &= 0xFFFFFF;
|
||||
|
||||
/* Compute hash from the CRC32C of |val|. */
|
||||
h = _cpu_crc32c_hash_u32(h, val);
|
||||
asm("crc32\t%1,%0" : "+r"(h) : "rm"(val));
|
||||
|
||||
ret = s->head[h & s->hash_mask];
|
||||
s->head[h & s->hash_mask] = str;
|
||||
|
@ -129,7 +129,7 @@ local INLINE Pos insert_string(deflate_state* const s, const Pos str) {
|
|||
* the Rabin-Karp hash instead.
|
||||
*/ /* FALLTHROUGH Rabin-Karp */
|
||||
#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_SIMD_SSE42_PCLMUL)
|
||||
if (x86_cpu_enable_simd) return insert_string_simd(s, str);
|
||||
if (X86_HAVE(SSE4_2)) return insert_string_simd(s, str);
|
||||
#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_ARMV8_CRC32)
|
||||
if (arm_cpu_enable_crc32) return insert_string_simd(s, str);
|
||||
#endif
|
||||
|
|
11
third_party/zlib/zlib.mk
vendored
11
third_party/zlib/zlib.mk
vendored
|
@ -45,10 +45,13 @@ o/$(MODE)/third_party/zlib/adler32simd.o: private \
|
|||
OVERRIDE_CFLAGS += \
|
||||
-mssse3
|
||||
|
||||
o/$(MODE)/third_party/zlib/crcfold.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-mpclmul \
|
||||
-mssse3
|
||||
o/$(MODE)/third_party/zlib/adler32.o: private \
|
||||
OVERRIDE_CPPFLAGS += \
|
||||
-DADLER32_SIMD_SSSE3
|
||||
|
||||
o/$(MODE)/third_party/zlib/deflate.o: private \
|
||||
OVERRIDE_CPPFLAGS += \
|
||||
-DCRC32_SIMD_SSE42_PCLMUL
|
||||
|
||||
$(THIRD_PARTY_ZLIB_A_OBJS): private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
|
|
Loading…
Add table
Reference in a new issue