interim merge. do not use
This commit is contained in:
parent
e9caff1cda
commit
08810d5fee
3 changed files with 188 additions and 10756 deletions
28
Makefile
28
Makefile
|
@ -219,18 +219,6 @@ ggml_clblast.o: ggml.c ggml.h
|
|||
ggml-opencl.o: ggml-opencl.c ggml-opencl.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
#old version llama compat
|
||||
ggml_v2.o: ggml_v2.c ggml.h
|
||||
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@
|
||||
ggml_openblas_v2.o: ggml_v2.c ggml.h
|
||||
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(OPENBLAS_FLAGS) -c $< -o $@
|
||||
ggml_noavx2_v2.o: ggml_v2.c ggml.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
ggml_openblas_noavx2_v2.o: ggml_v2.c ggml.h
|
||||
$(CC) $(CFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
|
||||
ggml_clblast_v2.o: ggml_v2.c ggml.h
|
||||
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CLBLAST_FLAGS) -c $< -o $@
|
||||
|
||||
#extreme old version compat
|
||||
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
|
||||
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@
|
||||
|
@ -262,31 +250,31 @@ main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
|||
@echo '==== Run ./main -h for help. ===='
|
||||
@echo
|
||||
|
||||
koboldcpp: ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o
|
||||
koboldcpp: ggml.o ggml_v1.o expose.o common.o gpttype_adapter.o
|
||||
$(DEFAULT_BUILD)
|
||||
|
||||
koboldcpp_openblas: ggml_openblas.o ggml_openblas_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o
|
||||
koboldcpp_openblas: ggml_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o
|
||||
$(OPENBLAS_BUILD)
|
||||
|
||||
koboldcpp_noavx2: ggml_noavx2.o ggml_noavx2_v2.o ggml_v1_noavx2.o expose.o common.o gpttype_adapter.o
|
||||
koboldcpp_noavx2: ggml_noavx2.o ggml_v1_noavx2.o expose.o common.o gpttype_adapter.o
|
||||
$(NOAVX2_BUILD)
|
||||
|
||||
koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_openblas_noavx2_v2.o ggml_v1_noavx2.o expose.o common.o gpttype_adapter.o
|
||||
koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_v1_noavx2.o expose.o common.o gpttype_adapter.o
|
||||
$(OPENBLAS_NOAVX2_BUILD)
|
||||
|
||||
koboldcpp_clblast: ggml_clblast.o ggml_clblast_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o ggml-opencl.o
|
||||
koboldcpp_clblast: ggml_clblast.o ggml_v1.o expose.o common.o gpttype_adapter.o ggml-opencl.o
|
||||
$(CLBLAST_BUILD)
|
||||
|
||||
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
||||
quantize_gptj: ggml.o ggml_v2.o llama.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
|
||||
quantize_gptj: ggml.o llama.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
||||
quantize_gpt2: ggml.o ggml_v2.o llama.o otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp
|
||||
quantize_gpt2: ggml.o llama.o otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
||||
quantize_neox: ggml.o ggml_v2.o llama.o otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
|
||||
quantize_neox: ggml.o llama.o otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
||||
quantize-stats: examples/quantize-stats/quantize-stats.cpp ggml.o llama.o $(OBJS)
|
||||
|
|
18
ggml.c
18
ggml.c
|
@ -393,6 +393,8 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n) {
|
|||
}
|
||||
}
|
||||
|
||||
//legacy functions
|
||||
#include "ggml_v2.c"
|
||||
|
||||
//
|
||||
// timing
|
||||
|
@ -3064,6 +3066,8 @@ static const int GGML_BLCK_SIZE[GGML_TYPE_COUNT] = {
|
|||
[GGML_TYPE_F16] = 1,
|
||||
[GGML_TYPE_Q4_0] = QK4_0,
|
||||
[GGML_TYPE_Q4_1] = QK4_1,
|
||||
[GGML_TYPE_Q4_2] = QK4_2,
|
||||
[GGML_TYPE_Q4_3] = QK4_3,
|
||||
[GGML_TYPE_Q5_0] = QK5_0,
|
||||
[GGML_TYPE_Q5_1] = QK5_1,
|
||||
[GGML_TYPE_Q8_0] = QK8_0,
|
||||
|
@ -3079,6 +3083,8 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
|
|||
[GGML_TYPE_F16] = sizeof(ggml_fp16_t),
|
||||
[GGML_TYPE_Q4_0] = sizeof(block_q4_0),
|
||||
[GGML_TYPE_Q4_1] = sizeof(block_q4_1),
|
||||
[GGML_TYPE_Q4_2] = sizeof(block_q4_2),
|
||||
[GGML_TYPE_Q4_3] = sizeof(block_q4_3),
|
||||
[GGML_TYPE_Q5_0] = sizeof(block_q5_0),
|
||||
[GGML_TYPE_Q5_1] = sizeof(block_q5_1),
|
||||
[GGML_TYPE_Q8_0] = sizeof(block_q8_0),
|
||||
|
@ -3095,6 +3101,8 @@ static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
|
|||
[GGML_TYPE_F16] = "f16",
|
||||
[GGML_TYPE_Q4_0] = "q4_0",
|
||||
[GGML_TYPE_Q4_1] = "q4_1",
|
||||
[GGML_TYPE_Q4_2] = "q4_2",
|
||||
[GGML_TYPE_Q4_3] = "q4_3",
|
||||
[GGML_TYPE_Q5_0] = "q5_0",
|
||||
[GGML_TYPE_Q5_1] = "q5_1",
|
||||
[GGML_TYPE_Q8_0] = "q8_0",
|
||||
|
@ -3110,6 +3118,8 @@ static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = {
|
|||
[GGML_TYPE_F16] = false,
|
||||
[GGML_TYPE_Q4_0] = true,
|
||||
[GGML_TYPE_Q4_1] = true,
|
||||
[GGML_TYPE_Q4_2] = true,
|
||||
[GGML_TYPE_Q4_3] = true,
|
||||
[GGML_TYPE_Q5_0] = true,
|
||||
[GGML_TYPE_Q5_1] = true,
|
||||
[GGML_TYPE_Q8_0] = true,
|
||||
|
@ -6396,6 +6406,8 @@ static void ggml_compute_forward_add(
|
|||
} break;
|
||||
case GGML_TYPE_Q4_0:
|
||||
case GGML_TYPE_Q4_1:
|
||||
case GGML_TYPE_Q4_2:
|
||||
case GGML_TYPE_Q4_3:
|
||||
case GGML_TYPE_Q5_0:
|
||||
case GGML_TYPE_Q5_1:
|
||||
case GGML_TYPE_Q8_0:
|
||||
|
@ -7950,6 +7962,8 @@ static void ggml_compute_forward_mul_mat(
|
|||
switch (src0->type) {
|
||||
case GGML_TYPE_Q4_0:
|
||||
case GGML_TYPE_Q4_1:
|
||||
case GGML_TYPE_Q4_2:
|
||||
case GGML_TYPE_Q4_3:
|
||||
case GGML_TYPE_Q5_0:
|
||||
case GGML_TYPE_Q5_1:
|
||||
case GGML_TYPE_Q8_0:
|
||||
|
@ -8180,6 +8194,8 @@ static void ggml_compute_forward_get_rows(
|
|||
switch (src0->type) {
|
||||
case GGML_TYPE_Q4_0:
|
||||
case GGML_TYPE_Q4_1:
|
||||
case GGML_TYPE_Q4_2:
|
||||
case GGML_TYPE_Q4_3:
|
||||
case GGML_TYPE_Q5_0:
|
||||
case GGML_TYPE_Q5_1:
|
||||
case GGML_TYPE_Q8_0:
|
||||
|
@ -8504,6 +8520,8 @@ static void ggml_compute_forward_alibi(
|
|||
} break;
|
||||
case GGML_TYPE_Q4_0:
|
||||
case GGML_TYPE_Q4_1:
|
||||
case GGML_TYPE_Q4_2:
|
||||
case GGML_TYPE_Q4_3:
|
||||
case GGML_TYPE_Q5_0:
|
||||
case GGML_TYPE_Q5_1:
|
||||
case GGML_TYPE_Q8_0:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue