From bcdd6531dbcb04f86fc87626d55d4a5c71f17ad3 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Tue, 19 Mar 2024 19:50:29 +0100 Subject: [PATCH] Default to all dedicated GPUs --- ggml-vulkan.cpp | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 082526781..b03207c42 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -1415,9 +1415,19 @@ void ggml_vk_instance_init() { vk_instance.device_indices.push_back(tmp); } } else { - vk_instance.device_indices.push_back(0); + // Default to using all dedicated GPUs + std::vector devices = vk_instance.instance.enumeratePhysicalDevices(); + for (size_t i = 0; i < devices.size(); i++) { + vk::PhysicalDeviceProperties props = devices[i].getProperties(); + + if (props.deviceType == vk::PhysicalDeviceType::eDiscreteGpu) { + vk_instance.device_indices.push_back(i); + } + } } + std::cerr << "ggml_vulkan: Found " << vk_instance.device_indices.size() << " Vulkan devices:" << std::endl; + for (size_t i = 0; i < vk_instance.device_indices.size(); i++) { ggml_vk_print_gpu_info(i); } @@ -2713,7 +2723,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context uint64_t x_buf_offset = 0; vk_buffer d_Y; uint64_t y_buf_offset = 0; - if(!src1_uma) { + if(!src0_uma) { d_Qx = extra_src0->buffer_gpu.lock(); qx_buf_offset = extra_src0->offset; GGML_ASSERT(d_Qx != nullptr); @@ -3633,6 +3643,8 @@ static void ggml_vk_print_matrix_area(const void * data, ggml_type type, int ne0 val = *((const float *) data + i2*ne1*ne0 + idx1*ne0 + idx0); } else if (type == GGML_TYPE_F16) { val = ggml_fp16_to_fp32(*((const ggml_fp16_t *) data + i2*ne1*ne0 + idx1*ne0 + idx0)); + } else { + GGML_ASSERT(false); } fprintf(stderr, "% 7.2f ", val); } else { @@ -3934,6 +3946,8 @@ static void ggml_vk_print_tensor_area(const ggml_tensor * tensor, int i0, int i1 val = *(float *) ((char *) tensor->data + i3*tensor->nb[3] + i2*tensor->nb[2] + idx1*tensor->nb[1] + idx0*tensor->nb[0]); } else if (tensor->type == GGML_TYPE_F16) { val = ggml_fp16_to_fp32(*(ggml_fp16_t *) ((char *) tensor->data + i3*tensor->nb[3] + i2*tensor->nb[2] + idx1*tensor->nb[1] + idx0*tensor->nb[0])); + } else { + GGML_ASSERT(false); } fprintf(stderr, "% 7.2f ", val); } else { @@ -4430,24 +4444,6 @@ static void ggml_vk_preallocate_buffers_graph(ggml_backend_vk_context * ctx, ggm const ggml_type src0_type = (use_src0 && src0->type == GGML_TYPE_F32) ? src0->type : GGML_TYPE_F16; const ggml_type src1_type = (use_src1 && src1->type == GGML_TYPE_F32) ? src1->type : GGML_TYPE_F16; - bool src0_uma = false; - bool src1_uma = false; - - if (ctx->device->uma) { - vk_buffer buf; - size_t tmp; - - if (use_src0) { - ggml_vk_host_get(ctx, src0->data, buf, tmp); - src0_uma = buf != nullptr; - } - - if (use_src1) { - ggml_vk_host_get(ctx, src1->data, buf, tmp); - src1_uma = buf != nullptr; - } - } - const bool x_non_contig = use_src0 && !ggml_vk_dim01_contiguous(src0); const bool y_non_contig = use_src1 && !ggml_vk_dim01_contiguous(src1); @@ -4535,7 +4531,7 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) { return; } #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_vk_preallocate_buffers(qx_size: " << ctx->prealloc_size_qx << " qy_size: " << ctx->prealloc_size_qy << " x_size: " << ctx->prealloc_size_x << " y_size: " << ctx->prealloc_size_y << " split_k_size: " << ctx->prealloc_size_split_k << ")" << std::endl; + std::cerr << "ggml_vk_preallocate_buffers(x_size: " << ctx->prealloc_size_x << " y_size: " << ctx->prealloc_size_y << " split_k_size: " << ctx->prealloc_size_split_k << ")" << std::endl; #endif #if defined(GGML_VULKAN_RUN_TESTS) ctx->staging = ggml_vk_create_buffer_check(ctx, 100ul * 1024ul * 1024ul, @@ -5282,7 +5278,7 @@ static ggml_backend_buffer_type_i ggml_backend_vk_buffer_type_interface = { GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num) { #ifdef GGML_VULKAN_DEBUG - std::cerr << "ggml_backend_vk_buffer_type(" << idx << ")" << std::endl; + std::cerr << "ggml_backend_vk_buffer_type(" << dev_num << ")" << std::endl; #endif GGML_ASSERT(dev_num < vk_instance.device_indices.size());