Parse graph early to pre-record command buffers

This commit is contained in:
0cc4m 2023-09-29 17:08:09 +02:00
parent 5ae5d2bd5b
commit 7f89e40e52
5 changed files with 491 additions and 332 deletions

View file

@ -75,6 +75,7 @@ set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA k
option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF)
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
option(LLAMA_VULKAN "llama: use Vulkan" OFF)
option(LLAMA_METAL "llama: use Metal" OFF)
option(LLAMA_MPI "llama: use MPI" OFF)
option(LLAMA_K_QUANTS "llama: use k-quants" ON)
@ -355,12 +356,14 @@ if (LLAMA_CLBLAST)
endif()
if (LLAMA_VULKAN)
find_package(Vulkan COMPONENTS glslc SPIRV-Tools)
find_package(Vulkan COMPONENTS shaderc_combined)
find_package(glslang)
find_package(SPIRV-Tools-opt)
if (Vulkan_FOUND)
message(STATUS "Vulkan found")
add_library(ggml-vulkan STATIC ggml-vulkan.cpp ggml-vulkan.h)
target_link_libraries(ggml-vulkan PUBLIC Vulkan::Vulkan SPIRV SPIRV-Tools-opt SPIRV-Tools shaderc_combined)
target_link_libraries(ggml-vulkan PUBLIC Vulkan::Vulkan Vulkan::shaderc_combined)
add_compile_definitions(GGML_USE_VULKAN)

File diff suppressed because it is too large Load diff

View file

@ -8,7 +8,11 @@ extern "C" {
void ggml_vk_init(void);
void ggml_vk_preallocate_buffers_graph(struct ggml_tensor * node);
void ggml_vk_preallocate_buffers(void);
void ggml_vk_build_graph(struct ggml_tensor * node);
bool ggml_vk_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
void ggml_vk_graph_cleanup(void);
void * ggml_vk_host_malloc(size_t size);
void ggml_vk_host_free(void * ptr);

2
ggml.c
View file

@ -14817,7 +14817,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU);
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
#elif defined(GGML_USE_VULKAN)
bool skip_cpu = ggml_vk_compute_forward(params, tensor);
const bool skip_cpu = ggml_vk_compute_forward(params, tensor);
if (skip_cpu) {
return;
}

View file

@ -110,7 +110,22 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
plan.work_data = buf.data();
}
#ifdef GGML_USE_VULKAN
for (int i = 0; i < graph->n_nodes; i++) {
ggml_vk_preallocate_buffers_graph(graph->nodes[i]);
}
ggml_vk_preallocate_buffers();
for (int i = 0; i < graph->n_nodes; i++) {
ggml_vk_build_graph(graph->nodes[i]);
}
#endif
ggml_graph_compute(graph, &plan);
#ifdef GGML_USE_VULKAN
ggml_vk_graph_cleanup();
#endif
}
//