Fix Vulkan no kv offload incoherence

This commit is contained in:
0cc4m 2024-03-09 07:41:36 +01:00
parent 6cdabe6526
commit 492ad4b0e0
3 changed files with 28 additions and 4 deletions

View file

@ -5080,6 +5080,9 @@ GGML_CALL static void ggml_vk_get_device_description(int device, char * descript
// CPU assist interface
void ggml_vk_init_cpu_assist() {
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_vk_init_cpu_assist()" << std::endl;
#endif
ggml_vk_instance_init();
std::cerr << "ggml_vulkan: Found " << ggml_vk_get_device_count() << " Vulkan devices:" << std::endl;
@ -5092,6 +5095,9 @@ void ggml_vk_init_cpu_assist() {
}
void ggml_vk_preallocate_buffers_graph_cpu_assist(ggml_tensor * node) {
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_vk_preallocate_buffers_graph_cpu_assist()" << std::endl;
#endif
ggml_backend_vk_context * ctx = &vk_instance.contexts[0];
if (!ctx->initialized) {
@ -5102,6 +5108,9 @@ void ggml_vk_preallocate_buffers_graph_cpu_assist(ggml_tensor * node) {
}
void ggml_vk_preallocate_buffers_cpu_assist() {
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_vk_preallocate_buffers_cpu_assist()" << std::endl;
#endif
ggml_backend_vk_context * ctx = &vk_instance.contexts[0];
if (!ctx->initialized) {
@ -5111,17 +5120,23 @@ void ggml_vk_preallocate_buffers_cpu_assist() {
ggml_vk_preallocate_buffers(ctx);
}
void ggml_vk_build_graph_cpu_assist(ggml_tensor * node, bool last_node) {
void ggml_vk_build_graph_cpu_assist(ggml_tensor * node) {
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_vk_build_graph_cpu_assist()" << std::endl;
#endif
ggml_backend_vk_context * ctx = &vk_instance.contexts[0];
if (!ctx->initialized) {
return;
}
ggml_vk_build_graph(ctx, node, last_node);
ggml_vk_build_graph(ctx, node, true);
}
bool ggml_vk_compute_forward_cpu_assist(ggml_compute_params * params, ggml_tensor * tensor){
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_vk_compute_forward_cpu_assist()" << std::endl;
#endif
ggml_backend_vk_context * ctx = &vk_instance.contexts[0];
if (!ctx->initialized) {
@ -5132,6 +5147,9 @@ bool ggml_vk_compute_forward_cpu_assist(ggml_compute_params * params, ggml_tenso
}
void ggml_vk_graph_cleanup_cpu_assist() {
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_vk_graph_cleanup_cpu_assist()" << std::endl;
#endif
ggml_backend_vk_context * ctx = &vk_instance.contexts[0];
if (!ctx->initialized) {
@ -5142,6 +5160,9 @@ void ggml_vk_graph_cleanup_cpu_assist() {
}
void ggml_vk_free_cpu_assist() {
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_vk_init_cpu_assist()" << std::endl;
#endif
ggml_backend_vk_context * ctx = &vk_instance.contexts[0];
if (!ctx->initialized || vk_instance.backends[0] == nullptr) {
@ -5574,6 +5595,9 @@ GGML_CALL static void ggml_backend_vk_synchronize(ggml_backend_t backend) {
}
GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_backend_vk_graph_compute(" << cgraph->n_nodes << " nodes)" << std::endl;
#endif
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
for (int i = 0; i < cgraph->n_nodes; i++) {

View file

@ -15,7 +15,7 @@ GGML_API void ggml_vk_init_cpu_assist(void);
GGML_API void ggml_vk_preallocate_buffers_graph_cpu_assist(struct ggml_tensor * node);
GGML_API void ggml_vk_preallocate_buffers_cpu_assist(void);
GGML_API void ggml_vk_build_graph_cpu_assist(struct ggml_tensor * node, bool last_node);
GGML_API void ggml_vk_build_graph_cpu_assist(struct ggml_tensor * node);
GGML_API bool ggml_vk_compute_forward_cpu_assist(struct ggml_compute_params * params, struct ggml_tensor * tensor);
#ifdef GGML_VULKAN_CHECK_RESULTS
void ggml_vk_check_results_1_cpu_assist(struct ggml_compute_params * params, struct ggml_tensor * tensor);

2
ggml.c
View file

@ -18044,7 +18044,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
ggml_vk_preallocate_buffers_cpu_assist();
for (int i = 0; i < cgraph->n_nodes; i++) {
ggml_vk_build_graph_cpu_assist(cgraph->nodes[i], i == cgraph->n_nodes - 1);
ggml_vk_build_graph_cpu_assist(cgraph->nodes[i]);
}
#endif