mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-09-10 02:33:49 +00:00
Remove division from matrix multiplication
This change reduces llama.com CPU cycles systemically by 2.5% according to the Linux Kernel `perf stat -Bddd` utility.
This commit is contained in:
parent
a88290e595
commit
1f6f9e6701
7 changed files with 191 additions and 70 deletions
14
third_party/ggml/ggml.mk
vendored
14
third_party/ggml/ggml.mk
vendored
|
@ -58,6 +58,20 @@ $(THIRD_PARTY_GGML_A_OBJS): private \
|
|||
-mfma
|
||||
endif
|
||||
|
||||
o/rel/third_party/ggml/ggml.o \
|
||||
o/opt/third_party/ggml/ggml.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-fomit-frame-pointer \
|
||||
-x-no-pg
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
o/rel/third_party/ggml/ggml.o \
|
||||
o/opt/third_party/ggml/ggml.o: private \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-fschedule-insns2 \
|
||||
-mred-zone
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
# command for running inference on large language models
|
||||
# make -j8 o//third_party/ggml/llama.com
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue