Parse graph early to pre-record command buffers
This commit is contained in:
parent
5ae5d2bd5b
commit
7f89e40e52
5 changed files with 491 additions and 332 deletions
|
@ -75,6 +75,7 @@ set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA k
|
|||
option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF)
|
||||
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
|
||||
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
|
||||
option(LLAMA_VULKAN "llama: use Vulkan" OFF)
|
||||
option(LLAMA_METAL "llama: use Metal" OFF)
|
||||
option(LLAMA_MPI "llama: use MPI" OFF)
|
||||
option(LLAMA_K_QUANTS "llama: use k-quants" ON)
|
||||
|
@ -355,12 +356,14 @@ if (LLAMA_CLBLAST)
|
|||
endif()
|
||||
|
||||
if (LLAMA_VULKAN)
|
||||
find_package(Vulkan COMPONENTS glslc SPIRV-Tools)
|
||||
find_package(Vulkan COMPONENTS shaderc_combined)
|
||||
find_package(glslang)
|
||||
find_package(SPIRV-Tools-opt)
|
||||
if (Vulkan_FOUND)
|
||||
message(STATUS "Vulkan found")
|
||||
|
||||
add_library(ggml-vulkan STATIC ggml-vulkan.cpp ggml-vulkan.h)
|
||||
target_link_libraries(ggml-vulkan PUBLIC Vulkan::Vulkan SPIRV SPIRV-Tools-opt SPIRV-Tools shaderc_combined)
|
||||
target_link_libraries(ggml-vulkan PUBLIC Vulkan::Vulkan Vulkan::shaderc_combined)
|
||||
|
||||
add_compile_definitions(GGML_USE_VULKAN)
|
||||
|
||||
|
|
795
ggml-vulkan.cpp
795
ggml-vulkan.cpp
File diff suppressed because it is too large
Load diff
|
@ -8,7 +8,11 @@ extern "C" {
|
|||
|
||||
void ggml_vk_init(void);
|
||||
|
||||
void ggml_vk_preallocate_buffers_graph(struct ggml_tensor * node);
|
||||
void ggml_vk_preallocate_buffers(void);
|
||||
void ggml_vk_build_graph(struct ggml_tensor * node);
|
||||
bool ggml_vk_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
||||
void ggml_vk_graph_cleanup(void);
|
||||
|
||||
void * ggml_vk_host_malloc(size_t size);
|
||||
void ggml_vk_host_free(void * ptr);
|
||||
|
|
2
ggml.c
2
ggml.c
|
@ -14817,7 +14817,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU);
|
||||
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
|
||||
#elif defined(GGML_USE_VULKAN)
|
||||
bool skip_cpu = ggml_vk_compute_forward(params, tensor);
|
||||
const bool skip_cpu = ggml_vk_compute_forward(params, tensor);
|
||||
if (skip_cpu) {
|
||||
return;
|
||||
}
|
||||
|
|
15
llama.cpp
15
llama.cpp
|
@ -110,7 +110,22 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
|
|||
plan.work_data = buf.data();
|
||||
}
|
||||
|
||||
#ifdef GGML_USE_VULKAN
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
ggml_vk_preallocate_buffers_graph(graph->nodes[i]);
|
||||
}
|
||||
ggml_vk_preallocate_buffers();
|
||||
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
ggml_vk_build_graph(graph->nodes[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
ggml_graph_compute(graph, &plan);
|
||||
|
||||
#ifdef GGML_USE_VULKAN
|
||||
ggml_vk_graph_cleanup();
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue