Add initial AVX512 support for dot product on Linux (#320)

* Update Makefile to detect AVX512 support and add compiler flags if it's available
 * Based on existing AVX2 implementation, dot product on one 32-value block of 4-bit quantized ints at a time
 * Perform 8 bit -> 16 bit sign extension and multiply+add on 32 values at time instead of 16
 * Use built-in AVX512 horizontal reduce add to get sum at the end
 * Manual unrolling on inner dot product loop to reduce loop counter overhead
This commit is contained in:
Casey Primozic 2023-03-21 07:35:42 -07:00 committed by GitHub
parent 8cf9f34edd
commit 2e664f1ff4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 109 additions and 3 deletions

View file

@ -95,6 +95,38 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
ifneq (,$(findstring sse3,$(SSE3_M)))
CFLAGS += -msse3
endif
AVX512F_M := $(shell grep "avx512f " /proc/cpuinfo)
ifneq (,$(findstring avx512f,$(AVX512F_M)))
CFLAGS += -mavx512f
endif
AVX512BW_M := $(shell grep "avx512bw " /proc/cpuinfo)
ifneq (,$(findstring avx512bw,$(AVX512BW_M)))
CFLAGS += -mavx512bw
endif
AVX512DQ_M := $(shell grep "avx512dq " /proc/cpuinfo)
ifneq (,$(findstring avx512dq,$(AVX512DQ_M)))
CFLAGS += -mavx512dq
endif
AVX512VL_M := $(shell grep "avx512vl " /proc/cpuinfo)
ifneq (,$(findstring avx512vl,$(AVX512VL_M)))
CFLAGS += -mavx512vl
endif
AVX512CD_M := $(shell grep "avx512cd " /proc/cpuinfo)
ifneq (,$(findstring avx512cd,$(AVX512CD_M)))
CFLAGS += -mavx512cd
endif
AVX512ER_M := $(shell grep "avx512er " /proc/cpuinfo)
ifneq (,$(findstring avx512er,$(AVX512ER_M)))
CFLAGS += -mavx512er
endif
AVX512IFMA_M := $(shell grep "avx512ifma " /proc/cpuinfo)
ifneq (,$(findstring avx512ifma,$(AVX512IFMA_M)))
CFLAGS += -mavx512ifma
endif
AVX512PF_M := $(shell grep "avx512pf " /proc/cpuinfo)
ifneq (,$(findstring avx512pf,$(AVX512PF_M)))
CFLAGS += -mavx512pf
endif
else ifeq ($(UNAME_S),Haiku)
AVX1_M := $(shell sysinfo -cpu | grep "AVX ")
ifneq (,$(findstring avx,$(AVX1_M)))