mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-06-28 07:18:30 +00:00
Support avx512f + vpclmulqdq crc32() acceleration
Cosmo's _Cz_crc32() function now goes 73 GiB/s on Threadripper. This will significantly improve the performance of the PKZIP file format. This algorithm is also used by apelink, to create deterministic ids.
This commit is contained in:
parent
7c8df05042
commit
a05ce3ad9d
8 changed files with 385 additions and 8 deletions
23
third_party/zlib/BUILD.mk
vendored
23
third_party/zlib/BUILD.mk
vendored
|
@ -38,16 +38,31 @@ $(THIRD_PARTY_ZLIB_A).pkg: \
|
|||
ifeq ($(ARCH), x86_64)
|
||||
o/$(MODE)/third_party/zlib/adler32_simd.o: private \
|
||||
TARGET_ARCH += \
|
||||
-O3 \
|
||||
-mssse3
|
||||
o/$(MODE)/third_party/zlib/crc_folding.o \
|
||||
o/$(MODE)/third_party/zlib/crc32_simd.o: private \
|
||||
o/$(MODE)/third_party/zlib/crc32_simd_sse42.o: private \
|
||||
TARGET_ARCH += \
|
||||
-O3 \
|
||||
-msse4.2 \
|
||||
-mpclmul
|
||||
-mpclmul \
|
||||
-UCRC32_SIMD_AVX512_PCLMUL \
|
||||
-DCRC32_SIMD_SSE42_PCLMUL \
|
||||
-DBUILD_SSE42
|
||||
o/$(MODE)/third_party/zlib/crc32_simd_avx512.o: private \
|
||||
TARGET_ARCH += \
|
||||
-O3 \
|
||||
-mpclmul \
|
||||
-mavx512f \
|
||||
-mvpclmulqdq \
|
||||
-UCRC32_SIMD_SSE42_PCLMUL \
|
||||
-DCRC32_SIMD_AVX512_PCLMUL \
|
||||
-DBUILD_AVX512
|
||||
$(THIRD_PARTY_ZLIB_A_OBJS): private \
|
||||
CPPFLAGS += \
|
||||
-DADLER32_SIMD_SSSE3 \
|
||||
-DCRC32_SIMD_SSE42_PCLMUL \
|
||||
-DCRC32_SIMD_AVX512_PCLMUL \
|
||||
-DDEFLATE_SLIDE_HASH_SSE2 \
|
||||
-DINFLATE_CHUNK_SIMD_SSE2 \
|
||||
-DINFLATE_CHUNK_READ_64LE
|
||||
|
@ -55,8 +70,10 @@ endif
|
|||
|
||||
ifeq ($(ARCH), aarch64)
|
||||
o/$(MODE)/third_party/zlib/deflate.o \
|
||||
o/$(MODE)/third_party/zlib/crc32_simd.o: private \
|
||||
o/$(MODE)/third_party/zlib/crc32_simd_neon.o: private \
|
||||
TARGET_ARCH += \
|
||||
-O3 \
|
||||
-DBUILD_NEON \
|
||||
-march=armv8-a+aes+crc
|
||||
$(THIRD_PARTY_ZLIB_A_OBJS): private \
|
||||
CPPFLAGS += \
|
||||
|
|
2
third_party/zlib/cpu_features.internal.h
vendored
2
third_party/zlib/cpu_features.internal.h
vendored
|
@ -16,7 +16,7 @@ COSMOPOLITAN_C_START_
|
|||
#define x86_cpu_enable_sse2 X86_HAVE(SSE2)
|
||||
#define x86_cpu_enable_ssse3 X86_HAVE(SSSE3)
|
||||
#define x86_cpu_enable_simd (X86_HAVE(SSE4_2) && X86_HAVE(PCLMUL))
|
||||
#define x86_cpu_enable_avx512 X86_HAVE(AVX512F)
|
||||
#define x86_cpu_enable_avx512 (X86_HAVE(AVX512F) && X86_HAVE(PCLMUL) && X86_HAVE(VPCLMULQDQ))
|
||||
#define cpu_check_features() ((void)0)
|
||||
|
||||
#elif defined(__aarch64__)
|
||||
|
|
5
third_party/zlib/crc32.c
vendored
5
third_party/zlib/crc32.c
vendored
|
@ -780,6 +780,7 @@ uint32_t ZEXPORT crc32_z(crc, buf_, len)
|
|||
}
|
||||
|
||||
#endif
|
||||
#if defined(__x86_64__)
|
||||
#if defined(CRC32_SIMD_AVX512_PCLMUL)
|
||||
if (x86_cpu_enable_avx512 && len >= Z_CRC32_AVX512_MINIMUM_LENGTH) {
|
||||
/* crc32 64-byte chunks */
|
||||
|
@ -792,7 +793,8 @@ uint32_t ZEXPORT crc32_z(crc, buf_, len)
|
|||
/* Fall into the default crc32 for the remaining data. */
|
||||
buf += chunk_size;
|
||||
}
|
||||
#elif defined(CRC32_SIMD_SSE42_PCLMUL)
|
||||
#endif
|
||||
#if defined(CRC32_SIMD_SSE42_PCLMUL)
|
||||
if (x86_cpu_enable_simd && len >= Z_CRC32_SSE42_MINIMUM_LENGTH) {
|
||||
/* crc32 16-byte chunks */
|
||||
z_size_t chunk_size = len & ~Z_CRC32_SSE42_CHUNKSIZE_MASK;
|
||||
|
@ -804,6 +806,7 @@ uint32_t ZEXPORT crc32_z(crc, buf_, len)
|
|||
/* Fall into the default crc32 for the remaining data. */
|
||||
buf += chunk_size;
|
||||
}
|
||||
#endif
|
||||
#elif defined(CRC32_ARMV8_CRC32)
|
||||
if (arm_cpu_enable_crc32) {
|
||||
#if defined(__aarch64__)
|
||||
|
|
3
third_party/zlib/crc32_simd_avx512.c
vendored
Normal file
3
third_party/zlib/crc32_simd_avx512.c
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
#ifdef BUILD_AVX512
|
||||
#include "third_party/zlib/crc32_simd.inc"
|
||||
#endif
|
3
third_party/zlib/crc32_simd_neon.c
vendored
Normal file
3
third_party/zlib/crc32_simd_neon.c
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
#ifdef BUILD_NEON
|
||||
#include "third_party/zlib/crc32_simd.inc"
|
||||
#endif
|
3
third_party/zlib/crc32_simd_sse42.c
vendored
Normal file
3
third_party/zlib/crc32_simd_sse42.c
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
#ifdef BUILD_SSE42
|
||||
#include "third_party/zlib/crc32_simd.inc"
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue