This commit is contained in:
niansa 2023-06-29 11:14:21 +02:00
parent de7d1823ed
commit 5ac68ccacb
3 changed files with 4 additions and 16 deletions

View file

@ -32,8 +32,6 @@ bool ggml_vk_add_buffer(
void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t); void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t); void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
void ggml_vk_dequantize_row_q4_0(const void * x, float * y, int k);
void ggml_vk_dequantize_row_q4_1(const void * x, float * y, int k);
void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf); void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf);
#ifdef __cplusplus #ifdef __cplusplus

6
ggml.c
View file

@ -161,8 +161,6 @@ inline static void* ggml_aligned_malloc(size_t size) {
#endif #endif
#elif defined(GGML_USE_OPENBLAS) #elif defined(GGML_USE_OPENBLAS)
#include <cblas.h> #include <cblas.h>
#elif defined(GGML_USE_KOMPUTE)
#include "ggml-vulkan.h"
#elif defined(GGML_USE_CUBLAS) #elif defined(GGML_USE_CUBLAS)
#include "ggml-cuda.h" #include "ggml-cuda.h"
#elif defined(GGML_USE_CLBLAST) #elif defined(GGML_USE_CLBLAST)
@ -1550,7 +1548,7 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = { static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
[GGML_TYPE_Q4_0] = { [GGML_TYPE_Q4_0] = {
.dequantize_row_q = (dequantize_row_q_t) ggml_vk_dequantize_row_q4_0, .dequantize_row_q = (dequantize_row_q_t) dequantize_row_q4_0,
.quantize_row_q = quantize_row_q4_0, .quantize_row_q = quantize_row_q4_0,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference, .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
.quantize_row_q_dot = quantize_row_q8_0, .quantize_row_q_dot = quantize_row_q8_0,
@ -1558,7 +1556,7 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
.vec_dot_type = GGML_TYPE_Q8_0, .vec_dot_type = GGML_TYPE_Q8_0,
}, },
[GGML_TYPE_Q4_1] = { [GGML_TYPE_Q4_1] = {
.dequantize_row_q = (dequantize_row_q_t) ggml_vk_dequantize_row_q4_1, .dequantize_row_q = (dequantize_row_q_t) dequantize_row_q4_1,
.quantize_row_q = quantize_row_q4_1, .quantize_row_q = quantize_row_q4_1,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference, .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
.quantize_row_q_dot = quantize_row_q8_1, .quantize_row_q_dot = quantize_row_q8_1,

View file

@ -753,7 +753,7 @@ struct llama_model_loader {
} }
} }
void load_all_data(llama_context & lctx, llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) { void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
size_t data_size = 0; size_t data_size = 0;
size_t prefetch_size = 0; size_t prefetch_size = 0;
size_t lock_size = 0; size_t lock_size = 0;
@ -810,14 +810,6 @@ struct llama_model_loader {
free(lt.data); free(lt.data);
} }
break; break;
#elif defined(GGML_USE_KOMPUTE)
case GGML_BACKEND_GPU:
lt.ggml_tensor->data = lt.data;
ggml_vk_h2d_tensor(lctx.ctx_kompute, lt.ggml_tensor);
if (!use_mmap) {
free(lt.data);
}
break;
#endif #endif
default: default:
continue; continue;
@ -1323,7 +1315,7 @@ static void llama_model_load_internal(
} }
#endif #endif
ml->load_all_data(lctx, progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL); ml->load_all_data(progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL);
if (progress_callback) { if (progress_callback) {
progress_callback(1.0f, progress_callback_user_data); progress_callback(1.0f, progress_callback_user_data);