Cleanups
This commit is contained in:
parent
de7d1823ed
commit
5ac68ccacb
3 changed files with 4 additions and 16 deletions
|
@ -32,8 +32,6 @@ bool ggml_vk_add_buffer(
|
||||||
void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
|
void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
|
||||||
void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
|
void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
|
||||||
|
|
||||||
void ggml_vk_dequantize_row_q4_0(const void * x, float * y, int k);
|
|
||||||
void ggml_vk_dequantize_row_q4_1(const void * x, float * y, int k);
|
|
||||||
void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf);
|
void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
6
ggml.c
6
ggml.c
|
@ -161,8 +161,6 @@ inline static void* ggml_aligned_malloc(size_t size) {
|
||||||
#endif
|
#endif
|
||||||
#elif defined(GGML_USE_OPENBLAS)
|
#elif defined(GGML_USE_OPENBLAS)
|
||||||
#include <cblas.h>
|
#include <cblas.h>
|
||||||
#elif defined(GGML_USE_KOMPUTE)
|
|
||||||
#include "ggml-vulkan.h"
|
|
||||||
#elif defined(GGML_USE_CUBLAS)
|
#elif defined(GGML_USE_CUBLAS)
|
||||||
#include "ggml-cuda.h"
|
#include "ggml-cuda.h"
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
#elif defined(GGML_USE_CLBLAST)
|
||||||
|
@ -1550,7 +1548,7 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
|
||||||
|
|
||||||
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
|
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
|
||||||
[GGML_TYPE_Q4_0] = {
|
[GGML_TYPE_Q4_0] = {
|
||||||
.dequantize_row_q = (dequantize_row_q_t) ggml_vk_dequantize_row_q4_0,
|
.dequantize_row_q = (dequantize_row_q_t) dequantize_row_q4_0,
|
||||||
.quantize_row_q = quantize_row_q4_0,
|
.quantize_row_q = quantize_row_q4_0,
|
||||||
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
|
||||||
.quantize_row_q_dot = quantize_row_q8_0,
|
.quantize_row_q_dot = quantize_row_q8_0,
|
||||||
|
@ -1558,7 +1556,7 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
|
||||||
.vec_dot_type = GGML_TYPE_Q8_0,
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q4_1] = {
|
[GGML_TYPE_Q4_1] = {
|
||||||
.dequantize_row_q = (dequantize_row_q_t) ggml_vk_dequantize_row_q4_1,
|
.dequantize_row_q = (dequantize_row_q_t) dequantize_row_q4_1,
|
||||||
.quantize_row_q = quantize_row_q4_1,
|
.quantize_row_q = quantize_row_q4_1,
|
||||||
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
|
||||||
.quantize_row_q_dot = quantize_row_q8_1,
|
.quantize_row_q_dot = quantize_row_q8_1,
|
||||||
|
|
12
llama.cpp
12
llama.cpp
|
@ -753,7 +753,7 @@ struct llama_model_loader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void load_all_data(llama_context & lctx, llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
|
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
|
||||||
size_t data_size = 0;
|
size_t data_size = 0;
|
||||||
size_t prefetch_size = 0;
|
size_t prefetch_size = 0;
|
||||||
size_t lock_size = 0;
|
size_t lock_size = 0;
|
||||||
|
@ -810,14 +810,6 @@ struct llama_model_loader {
|
||||||
free(lt.data);
|
free(lt.data);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
#elif defined(GGML_USE_KOMPUTE)
|
|
||||||
case GGML_BACKEND_GPU:
|
|
||||||
lt.ggml_tensor->data = lt.data;
|
|
||||||
ggml_vk_h2d_tensor(lctx.ctx_kompute, lt.ggml_tensor);
|
|
||||||
if (!use_mmap) {
|
|
||||||
free(lt.data);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
#endif
|
#endif
|
||||||
default:
|
default:
|
||||||
continue;
|
continue;
|
||||||
|
@ -1323,7 +1315,7 @@ static void llama_model_load_internal(
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ml->load_all_data(lctx, progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL);
|
ml->load_all_data(progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL);
|
||||||
|
|
||||||
if (progress_callback) {
|
if (progress_callback) {
|
||||||
progress_callback(1.0f, progress_callback_user_data);
|
progress_callback(1.0f, progress_callback_user_data);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue