mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-06-27 14:58:30 +00:00
Make zlib go faster
This commit is contained in:
parent
be29b709b7
commit
224c12f54d
6 changed files with 16 additions and 43 deletions
|
@ -108,12 +108,6 @@ forceinline pureconst bool IsArenaFrame(int x) {
|
||||||
return 0x5004 <= x && x <= 0x7ffb;
|
return 0x5004 <= x && x <= 0x7ffb;
|
||||||
}
|
}
|
||||||
|
|
||||||
forceinline pureconst bool IsKernelFrame(int x) {
|
|
||||||
intptr_t stack = GetStaticStackAddr(0);
|
|
||||||
return (int)(stack >> 16) <= x &&
|
|
||||||
x <= (int)((stack + (GetStackSize() - FRAMESIZE)) >> 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
forceinline pureconst bool IsStaticStackFrame(int x) {
|
forceinline pureconst bool IsStaticStackFrame(int x) {
|
||||||
intptr_t stack = GetStaticStackAddr(0);
|
intptr_t stack = GetStaticStackAddr(0);
|
||||||
return (int)(stack >> 16) <= x &&
|
return (int)(stack >> 16) <= x &&
|
||||||
|
|
1
third_party/zlib/README.cosmo
vendored
1
third_party/zlib/README.cosmo
vendored
|
@ -26,4 +26,5 @@ ORIGIN
|
||||||
LOCAL CHANGES
|
LOCAL CHANGES
|
||||||
|
|
||||||
- Changed Trace() calls to use kprintf()
|
- Changed Trace() calls to use kprintf()
|
||||||
|
- Use X86_HAVE() for runtime dispatching
|
||||||
- We use our own crc32() implementation from LIBC_STR
|
- We use our own crc32() implementation from LIBC_STR
|
||||||
|
|
23
third_party/zlib/adler32.c
vendored
23
third_party/zlib/adler32.c
vendored
|
@ -2,6 +2,7 @@
|
||||||
* Copyright (C) 1995-2011, 2016 Mark Adler
|
* Copyright (C) 1995-2011, 2016 Mark Adler
|
||||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||||
*/
|
*/
|
||||||
|
#include "libc/nexgen32e/x86feature.h"
|
||||||
#include "third_party/zlib/macros.internal.h"
|
#include "third_party/zlib/macros.internal.h"
|
||||||
#include "third_party/zlib/zconf.h"
|
#include "third_party/zlib/zconf.h"
|
||||||
#include "third_party/zlib/zutil.internal.h"
|
#include "third_party/zlib/zutil.internal.h"
|
||||||
|
@ -62,10 +63,7 @@ local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
|
||||||
# define MOD63(a) a %= BASE
|
# define MOD63(a) a %= BASE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//# include "cpu_features.h"
|
uint32_t ZLIB_INTERNAL adler32_simd_(uint32_t, const unsigned char *, z_size_t);
|
||||||
#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON)
|
|
||||||
# include "adler32_simd.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* ========================================================================= */
|
/* ========================================================================= */
|
||||||
uLong ZEXPORT adler32_z(adler, buf, len)
|
uLong ZEXPORT adler32_z(adler, buf, len)
|
||||||
|
@ -77,7 +75,7 @@ uLong ZEXPORT adler32_z(adler, buf, len)
|
||||||
unsigned n;
|
unsigned n;
|
||||||
|
|
||||||
#if defined(ADLER32_SIMD_SSSE3)
|
#if defined(ADLER32_SIMD_SSSE3)
|
||||||
if (buf != Z_NULL && len >= 64 && x86_cpu_enable_ssse3)
|
if (buf != Z_NULL && len >= 64 && X86_HAVE(SSSE3))
|
||||||
return adler32_simd_(adler, buf, len);
|
return adler32_simd_(adler, buf, len);
|
||||||
#elif defined(ADLER32_SIMD_NEON)
|
#elif defined(ADLER32_SIMD_NEON)
|
||||||
if (buf != Z_NULL && len >= 64)
|
if (buf != Z_NULL && len >= 64)
|
||||||
|
@ -99,24 +97,9 @@ uLong ZEXPORT adler32_z(adler, buf, len)
|
||||||
return adler | (sum2 << 16);
|
return adler | (sum2 << 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(ADLER32_SIMD_SSSE3)
|
|
||||||
/*
|
|
||||||
* Use SSSE3 to compute the adler32. Since this routine can be
|
|
||||||
* freely used, check CPU features here. zlib convention is to
|
|
||||||
* call adler32(0, NULL, 0), before making calls to adler32().
|
|
||||||
* So this is a good early (and infrequent) place to cache CPU
|
|
||||||
* features for those later, more interesting adler32() calls.
|
|
||||||
*/
|
|
||||||
if (buf == Z_NULL) {
|
|
||||||
if (!len) /* Assume user is calling adler32(0, NULL, 0); */
|
|
||||||
cpu_check_features();
|
|
||||||
return 1L;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
/* initial Adler-32 value (deferred check for len == 1 speed) */
|
/* initial Adler-32 value (deferred check for len == 1 speed) */
|
||||||
if (buf == Z_NULL)
|
if (buf == Z_NULL)
|
||||||
return 1L;
|
return 1L;
|
||||||
#endif
|
|
||||||
|
|
||||||
/* in case short lengths are provided, keep it somewhat fast */
|
/* in case short lengths are provided, keep it somewhat fast */
|
||||||
if (len < 16) {
|
if (len < 16) {
|
||||||
|
|
10
third_party/zlib/deflate.c
vendored
10
third_party/zlib/deflate.c
vendored
|
@ -243,14 +243,6 @@ int ZEXPORT deflateInit2(strm, level, method, windowBits, memLevel, strategy)
|
||||||
deflate_state *s;
|
deflate_state *s;
|
||||||
int wrap = 1;
|
int wrap = 1;
|
||||||
|
|
||||||
// Needed to activate optimized insert_string() that helps compression
|
|
||||||
// for all wrapper formats (e.g. RAW, ZLIB, GZIP).
|
|
||||||
// Feature detection is not triggered while using RAW mode (i.e. we never
|
|
||||||
// call crc32() with a NULL buffer).
|
|
||||||
#if defined(CRC32_ARMV8_CRC32) || defined(CRC32_SIMD_SSE42_PCLMUL)
|
|
||||||
cpu_check_features();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (strm == Z_NULL) return Z_STREAM_ERROR;
|
if (strm == Z_NULL) return Z_STREAM_ERROR;
|
||||||
|
|
||||||
strm->msg = Z_NULL;
|
strm->msg = Z_NULL;
|
||||||
|
@ -306,7 +298,7 @@ int ZEXPORT deflateInit2(strm, level, method, windowBits, memLevel, strategy)
|
||||||
s->chromium_zlib_hash = 0;
|
s->chromium_zlib_hash = 0;
|
||||||
#if !defined(USE_ZLIB_RABIN_KARP_ROLLING_HASH)
|
#if !defined(USE_ZLIB_RABIN_KARP_ROLLING_HASH)
|
||||||
#if defined(TARGET_CPU_WITH_CRC) && defined(CRC32_SIMD_SSE42_PCLMUL)
|
#if defined(TARGET_CPU_WITH_CRC) && defined(CRC32_SIMD_SSE42_PCLMUL)
|
||||||
if (x86_cpu_enable_simd)
|
if (X86_HAVE(SSE4_2))
|
||||||
s->chromium_zlib_hash = 1;
|
s->chromium_zlib_hash = 1;
|
||||||
#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_ARMV8_CRC32)
|
#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_ARMV8_CRC32)
|
||||||
if (arm_cpu_enable_crc32)
|
if (arm_cpu_enable_crc32)
|
||||||
|
|
8
third_party/zlib/insert_string.internal.h
vendored
8
third_party/zlib/insert_string.internal.h
vendored
|
@ -1,5 +1,6 @@
|
||||||
#ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_INSERT_STRING_H_
|
#ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_INSERT_STRING_H_
|
||||||
#define COSMOPOLITAN_THIRD_PARTY_ZLIB_INSERT_STRING_H_
|
#define COSMOPOLITAN_THIRD_PARTY_ZLIB_INSERT_STRING_H_
|
||||||
|
#include "libc/nexgen32e/x86feature.h"
|
||||||
#include "third_party/zlib/deflate.internal.h"
|
#include "third_party/zlib/deflate.internal.h"
|
||||||
#include "third_party/zlib/zutil.internal.h"
|
#include "third_party/zlib/zutil.internal.h"
|
||||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
@ -16,7 +17,7 @@ COSMOPOLITAN_C_START_
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
#if defined(CRC32_SIMD_SSE42_PCLMUL)
|
#if defined(CRC32_SIMD_SSE42_PCLMUL)
|
||||||
#include <smmintrin.h> /* Required to make MSVC bot build pass. */
|
// #include <smmintrin.h> /* Required to make MSVC bot build pass. */
|
||||||
|
|
||||||
#if defined(__clang__) || defined(__GNUC__)
|
#if defined(__clang__) || defined(__GNUC__)
|
||||||
#define TARGET_CPU_WITH_CRC __attribute__((target("sse4.2")))
|
#define TARGET_CPU_WITH_CRC __attribute__((target("sse4.2")))
|
||||||
|
@ -50,7 +51,6 @@ COSMOPOLITAN_C_START_
|
||||||
|
|
||||||
#if defined(TARGET_CPU_WITH_CRC)
|
#if defined(TARGET_CPU_WITH_CRC)
|
||||||
|
|
||||||
TARGET_CPU_WITH_CRC
|
|
||||||
local INLINE Pos insert_string_simd(deflate_state* const s, const Pos str) {
|
local INLINE Pos insert_string_simd(deflate_state* const s, const Pos str) {
|
||||||
Pos ret;
|
Pos ret;
|
||||||
unsigned *ip, val, h = 0;
|
unsigned *ip, val, h = 0;
|
||||||
|
@ -61,7 +61,7 @@ local INLINE Pos insert_string_simd(deflate_state* const s, const Pos str) {
|
||||||
if (s->level >= 6) val &= 0xFFFFFF;
|
if (s->level >= 6) val &= 0xFFFFFF;
|
||||||
|
|
||||||
/* Compute hash from the CRC32C of |val|. */
|
/* Compute hash from the CRC32C of |val|. */
|
||||||
h = _cpu_crc32c_hash_u32(h, val);
|
asm("crc32\t%1,%0" : "+r"(h) : "rm"(val));
|
||||||
|
|
||||||
ret = s->head[h & s->hash_mask];
|
ret = s->head[h & s->hash_mask];
|
||||||
s->head[h & s->hash_mask] = str;
|
s->head[h & s->hash_mask] = str;
|
||||||
|
@ -129,7 +129,7 @@ local INLINE Pos insert_string(deflate_state* const s, const Pos str) {
|
||||||
* the Rabin-Karp hash instead.
|
* the Rabin-Karp hash instead.
|
||||||
*/ /* FALLTHROUGH Rabin-Karp */
|
*/ /* FALLTHROUGH Rabin-Karp */
|
||||||
#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_SIMD_SSE42_PCLMUL)
|
#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_SIMD_SSE42_PCLMUL)
|
||||||
if (x86_cpu_enable_simd) return insert_string_simd(s, str);
|
if (X86_HAVE(SSE4_2)) return insert_string_simd(s, str);
|
||||||
#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_ARMV8_CRC32)
|
#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_ARMV8_CRC32)
|
||||||
if (arm_cpu_enable_crc32) return insert_string_simd(s, str);
|
if (arm_cpu_enable_crc32) return insert_string_simd(s, str);
|
||||||
#endif
|
#endif
|
||||||
|
|
11
third_party/zlib/zlib.mk
vendored
11
third_party/zlib/zlib.mk
vendored
|
@ -45,10 +45,13 @@ o/$(MODE)/third_party/zlib/adler32simd.o: private \
|
||||||
OVERRIDE_CFLAGS += \
|
OVERRIDE_CFLAGS += \
|
||||||
-mssse3
|
-mssse3
|
||||||
|
|
||||||
o/$(MODE)/third_party/zlib/crcfold.o: private \
|
o/$(MODE)/third_party/zlib/adler32.o: private \
|
||||||
OVERRIDE_CFLAGS += \
|
OVERRIDE_CPPFLAGS += \
|
||||||
-mpclmul \
|
-DADLER32_SIMD_SSSE3
|
||||||
-mssse3
|
|
||||||
|
o/$(MODE)/third_party/zlib/deflate.o: private \
|
||||||
|
OVERRIDE_CPPFLAGS += \
|
||||||
|
-DCRC32_SIMD_SSE42_PCLMUL
|
||||||
|
|
||||||
$(THIRD_PARTY_ZLIB_A_OBJS): private \
|
$(THIRD_PARTY_ZLIB_A_OBJS): private \
|
||||||
OVERRIDE_CFLAGS += \
|
OVERRIDE_CFLAGS += \
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue