Cleanups
This commit is contained in:
parent
de7d1823ed
commit
5ac68ccacb
3 changed files with 4 additions and 16 deletions
|
@ -32,8 +32,6 @@ bool ggml_vk_add_buffer(
|
|||
void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
|
||||
void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
|
||||
|
||||
void ggml_vk_dequantize_row_q4_0(const void * x, float * y, int k);
|
||||
void ggml_vk_dequantize_row_q4_1(const void * x, float * y, int k);
|
||||
void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
6
ggml.c
6
ggml.c
|
@ -161,8 +161,6 @@ inline static void* ggml_aligned_malloc(size_t size) {
|
|||
#endif
|
||||
#elif defined(GGML_USE_OPENBLAS)
|
||||
#include <cblas.h>
|
||||
#elif defined(GGML_USE_KOMPUTE)
|
||||
#include "ggml-vulkan.h"
|
||||
#elif defined(GGML_USE_CUBLAS)
|
||||
#include "ggml-cuda.h"
|
||||
#elif defined(GGML_USE_CLBLAST)
|
||||
|
@ -1550,7 +1548,7 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
|
|||
|
||||
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
|
||||
[GGML_TYPE_Q4_0] = {
|
||||
.dequantize_row_q = (dequantize_row_q_t) ggml_vk_dequantize_row_q4_0,
|
||||
.dequantize_row_q = (dequantize_row_q_t) dequantize_row_q4_0,
|
||||
.quantize_row_q = quantize_row_q4_0,
|
||||
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
|
||||
.quantize_row_q_dot = quantize_row_q8_0,
|
||||
|
@ -1558,7 +1556,7 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
|
|||
.vec_dot_type = GGML_TYPE_Q8_0,
|
||||
},
|
||||
[GGML_TYPE_Q4_1] = {
|
||||
.dequantize_row_q = (dequantize_row_q_t) ggml_vk_dequantize_row_q4_1,
|
||||
.dequantize_row_q = (dequantize_row_q_t) dequantize_row_q4_1,
|
||||
.quantize_row_q = quantize_row_q4_1,
|
||||
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
|
||||
.quantize_row_q_dot = quantize_row_q8_1,
|
||||
|
|
12
llama.cpp
12
llama.cpp
|
@ -753,7 +753,7 @@ struct llama_model_loader {
|
|||
}
|
||||
}
|
||||
|
||||
void load_all_data(llama_context & lctx, llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
|
||||
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
|
||||
size_t data_size = 0;
|
||||
size_t prefetch_size = 0;
|
||||
size_t lock_size = 0;
|
||||
|
@ -810,14 +810,6 @@ struct llama_model_loader {
|
|||
free(lt.data);
|
||||
}
|
||||
break;
|
||||
#elif defined(GGML_USE_KOMPUTE)
|
||||
case GGML_BACKEND_GPU:
|
||||
lt.ggml_tensor->data = lt.data;
|
||||
ggml_vk_h2d_tensor(lctx.ctx_kompute, lt.ggml_tensor);
|
||||
if (!use_mmap) {
|
||||
free(lt.data);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
continue;
|
||||
|
@ -1323,7 +1315,7 @@ static void llama_model_load_internal(
|
|||
}
|
||||
#endif
|
||||
|
||||
ml->load_all_data(lctx, progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL);
|
||||
ml->load_all_data(progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL);
|
||||
|
||||
if (progress_callback) {
|
||||
progress_callback(1.0f, progress_callback_user_data);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue