interim merge. do not use

This commit is contained in:
Concedo 2023-05-13 00:33:55 +08:00
parent e9caff1cda
commit 08810d5fee
3 changed files with 188 additions and 10756 deletions

View file

@ -219,18 +219,6 @@ ggml_clblast.o: ggml.c ggml.h
ggml-opencl.o: ggml-opencl.c ggml-opencl.h
$(CC) $(CFLAGS) -c $< -o $@
#old version llama compat
ggml_v2.o: ggml_v2.c ggml.h
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@
ggml_openblas_v2.o: ggml_v2.c ggml.h
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(OPENBLAS_FLAGS) -c $< -o $@
ggml_noavx2_v2.o: ggml_v2.c ggml.h
$(CC) $(CFLAGS) -c $< -o $@
ggml_openblas_noavx2_v2.o: ggml_v2.c ggml.h
$(CC) $(CFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
ggml_clblast_v2.o: ggml_v2.c ggml.h
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CLBLAST_FLAGS) -c $< -o $@
#extreme old version compat
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@
@ -262,31 +250,31 @@ main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS)
@echo '==== Run ./main -h for help. ===='
@echo
koboldcpp: ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o
koboldcpp: ggml.o ggml_v1.o expose.o common.o gpttype_adapter.o
$(DEFAULT_BUILD)
koboldcpp_openblas: ggml_openblas.o ggml_openblas_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o
koboldcpp_openblas: ggml_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o
$(OPENBLAS_BUILD)
koboldcpp_noavx2: ggml_noavx2.o ggml_noavx2_v2.o ggml_v1_noavx2.o expose.o common.o gpttype_adapter.o
koboldcpp_noavx2: ggml_noavx2.o ggml_v1_noavx2.o expose.o common.o gpttype_adapter.o
$(NOAVX2_BUILD)
koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_openblas_noavx2_v2.o ggml_v1_noavx2.o expose.o common.o gpttype_adapter.o
koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_v1_noavx2.o expose.o common.o gpttype_adapter.o
$(OPENBLAS_NOAVX2_BUILD)
koboldcpp_clblast: ggml_clblast.o ggml_clblast_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o ggml-opencl.o
koboldcpp_clblast: ggml_clblast.o ggml_v1.o expose.o common.o gpttype_adapter.o ggml-opencl.o
$(CLBLAST_BUILD)
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize_gptj: ggml.o ggml_v2.o llama.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
quantize_gptj: ggml.o llama.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize_gpt2: ggml.o ggml_v2.o llama.o otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp
quantize_gpt2: ggml.o llama.o otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize_neox: ggml.o ggml_v2.o llama.o otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
quantize_neox: ggml.o llama.o otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize-stats: examples/quantize-stats/quantize-stats.cpp ggml.o llama.o $(OBJS)

18
ggml.c
View file

@ -393,6 +393,8 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n) {
}
}
//legacy functions
#include "ggml_v2.c"
//
// timing
@ -3064,6 +3066,8 @@ static const int GGML_BLCK_SIZE[GGML_TYPE_COUNT] = {
[GGML_TYPE_F16] = 1,
[GGML_TYPE_Q4_0] = QK4_0,
[GGML_TYPE_Q4_1] = QK4_1,
[GGML_TYPE_Q4_2] = QK4_2,
[GGML_TYPE_Q4_3] = QK4_3,
[GGML_TYPE_Q5_0] = QK5_0,
[GGML_TYPE_Q5_1] = QK5_1,
[GGML_TYPE_Q8_0] = QK8_0,
@ -3079,6 +3083,8 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
[GGML_TYPE_F16] = sizeof(ggml_fp16_t),
[GGML_TYPE_Q4_0] = sizeof(block_q4_0),
[GGML_TYPE_Q4_1] = sizeof(block_q4_1),
[GGML_TYPE_Q4_2] = sizeof(block_q4_2),
[GGML_TYPE_Q4_3] = sizeof(block_q4_3),
[GGML_TYPE_Q5_0] = sizeof(block_q5_0),
[GGML_TYPE_Q5_1] = sizeof(block_q5_1),
[GGML_TYPE_Q8_0] = sizeof(block_q8_0),
@ -3095,6 +3101,8 @@ static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
[GGML_TYPE_F16] = "f16",
[GGML_TYPE_Q4_0] = "q4_0",
[GGML_TYPE_Q4_1] = "q4_1",
[GGML_TYPE_Q4_2] = "q4_2",
[GGML_TYPE_Q4_3] = "q4_3",
[GGML_TYPE_Q5_0] = "q5_0",
[GGML_TYPE_Q5_1] = "q5_1",
[GGML_TYPE_Q8_0] = "q8_0",
@ -3110,6 +3118,8 @@ static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = {
[GGML_TYPE_F16] = false,
[GGML_TYPE_Q4_0] = true,
[GGML_TYPE_Q4_1] = true,
[GGML_TYPE_Q4_2] = true,
[GGML_TYPE_Q4_3] = true,
[GGML_TYPE_Q5_0] = true,
[GGML_TYPE_Q5_1] = true,
[GGML_TYPE_Q8_0] = true,
@ -6396,6 +6406,8 @@ static void ggml_compute_forward_add(
} break;
case GGML_TYPE_Q4_0:
case GGML_TYPE_Q4_1:
case GGML_TYPE_Q4_2:
case GGML_TYPE_Q4_3:
case GGML_TYPE_Q5_0:
case GGML_TYPE_Q5_1:
case GGML_TYPE_Q8_0:
@ -7950,6 +7962,8 @@ static void ggml_compute_forward_mul_mat(
switch (src0->type) {
case GGML_TYPE_Q4_0:
case GGML_TYPE_Q4_1:
case GGML_TYPE_Q4_2:
case GGML_TYPE_Q4_3:
case GGML_TYPE_Q5_0:
case GGML_TYPE_Q5_1:
case GGML_TYPE_Q8_0:
@ -8180,6 +8194,8 @@ static void ggml_compute_forward_get_rows(
switch (src0->type) {
case GGML_TYPE_Q4_0:
case GGML_TYPE_Q4_1:
case GGML_TYPE_Q4_2:
case GGML_TYPE_Q4_3:
case GGML_TYPE_Q5_0:
case GGML_TYPE_Q5_1:
case GGML_TYPE_Q8_0:
@ -8504,6 +8520,8 @@ static void ggml_compute_forward_alibi(
} break;
case GGML_TYPE_Q4_0:
case GGML_TYPE_Q4_1:
case GGML_TYPE_Q4_2:
case GGML_TYPE_Q4_3:
case GGML_TYPE_Q5_0:
case GGML_TYPE_Q5_1:
case GGML_TYPE_Q8_0:

10898
ggml_v2.c

File diff suppressed because it is too large Load diff