From 309ef242095a6e0e0be26cefa7ba864faa3114ba Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Sat, 15 Jun 2024 18:23:07 +0200 Subject: [PATCH] Fix unnecessary high llama-3 VRAM use --- ggml-vulkan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index fceaa1f85..56a97c0c7 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -5288,7 +5288,7 @@ static void ggml_vk_preallocate_buffers_graph(ggml_backend_vk_context * ctx, ggm bool mmp = (use_src0 && use_src1 && src1_type == GGML_TYPE_F32) ? ggml_vk_get_mul_mat_mat_pipeline(ctx, src0_type, y_non_contig ? GGML_TYPE_F16 : src1->type) != nullptr : false; - const bool qx_needs_dequant = use_src0 && (mmp || x_non_contig); + const bool qx_needs_dequant = use_src0 && (!mmp || x_non_contig); const bool qy_needs_dequant = use_src1 && ((src1->type != GGML_TYPE_F16 && !y_f32_kernel) || y_non_contig); int split_k;