Add Vulkan debug and validate flags to Make and CMakeLists.txt
This commit is contained in:
parent
4e9091f11e
commit
0dcf96d437
3 changed files with 112 additions and 89 deletions
|
@ -100,6 +100,9 @@ option(LLAMA_HIPBLAS "llama: use hipBLAS"
|
||||||
option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF)
|
option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF)
|
||||||
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
|
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
|
||||||
option(LLAMA_VULKAN "llama: use Vulkan" OFF)
|
option(LLAMA_VULKAN "llama: use Vulkan" OFF)
|
||||||
|
option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF)
|
||||||
|
option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF)
|
||||||
|
option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF)
|
||||||
option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT})
|
option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT})
|
||||||
option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF)
|
option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF)
|
||||||
option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF)
|
option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF)
|
||||||
|
@ -431,6 +434,18 @@ if (LLAMA_VULKAN)
|
||||||
|
|
||||||
add_compile_definitions(GGML_USE_VULKAN)
|
add_compile_definitions(GGML_USE_VULKAN)
|
||||||
|
|
||||||
|
if (LLAMA_VULKAN_CHECK_RESULTS)
|
||||||
|
target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_CHECK_RESULTS)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (LLAMA_VULKAN_DEBUG)
|
||||||
|
target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_DEBUG)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (LLAMA_VULKAN_VALIDATE)
|
||||||
|
target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_VALIDATE)
|
||||||
|
endif()
|
||||||
|
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ggml-vulkan)
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ggml-vulkan)
|
||||||
else()
|
else()
|
||||||
message(WARNING "Vulkan not found")
|
message(WARNING "Vulkan not found")
|
||||||
|
|
8
Makefile
8
Makefile
|
@ -457,6 +457,14 @@ ifdef LLAMA_VULKAN_CHECK_RESULTS
|
||||||
MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
|
MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef LLAMA_VULKAN_DEBUG
|
||||||
|
MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef LLAMA_VULKAN_VALIDATE
|
||||||
|
MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
|
||||||
|
endif
|
||||||
|
|
||||||
ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
|
ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
endif # LLAMA_VULKAN
|
endif # LLAMA_VULKAN
|
||||||
|
|
178
ggml-vulkan.cpp
178
ggml-vulkan.cpp
|
@ -1,6 +1,6 @@
|
||||||
#include "ggml-vulkan.h"
|
#include "ggml-vulkan.h"
|
||||||
|
|
||||||
#ifdef VK_RUN_TESTS
|
#ifdef GGML_VULKAN_RUN_TESTS
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -264,7 +264,7 @@ size_t vk_output_tensor;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static vk_pipeline ggml_vk_create_pipeline(const std::string& name, size_t spv_size, const void* spv_data, const std::string& entrypoint, uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t, 3> wg_denoms, std::vector<uint32_t>&& specialization_constants, uint32_t align) {
|
static vk_pipeline ggml_vk_create_pipeline(const std::string& name, size_t spv_size, const void* spv_data, const std::string& entrypoint, uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t, 3> wg_denoms, std::vector<uint32_t>&& specialization_constants, uint32_t align) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_create_pipeline(" << name << ", " << entrypoint << ", " << parameter_count << ", " << push_constant_size << ", (" << wg_denoms[0] << "," << wg_denoms[1] << "," << wg_denoms[2] << "), specialization_constants, " << align << ")" << std::endl;
|
std::cerr << "ggml_vk_create_pipeline(" << name << ", " << entrypoint << ", " << parameter_count << ", " << push_constant_size << ", (" << wg_denoms[0] << "," << wg_denoms[1] << "," << wg_denoms[2] << "), specialization_constants, " << align << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
GGML_ASSERT(parameter_count > 0);
|
GGML_ASSERT(parameter_count > 0);
|
||||||
|
@ -368,7 +368,7 @@ static vk_pipeline ggml_vk_create_pipeline(const std::string& name, size_t spv_s
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline& pipeline, uint32_t n) {
|
static void ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline& pipeline, uint32_t n) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_pipeline_allocate_descriptor_sets(" << pipeline.name << ", " << n << ")" << std::endl;
|
std::cerr << "ggml_vk_pipeline_allocate_descriptor_sets(" << pipeline.name << ", " << n << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
// Check if gc already contains pipeline before adding it
|
// Check if gc already contains pipeline before adding it
|
||||||
|
@ -413,14 +413,14 @@ static void ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline& pipeline, uin
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_pipeline_cleanup(vk_pipeline& pipeline) {
|
static void ggml_vk_pipeline_cleanup(vk_pipeline& pipeline) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_pipeline_cleanup(" << pipeline.name << ")" << std::endl;
|
std::cerr << "ggml_vk_pipeline_cleanup(" << pipeline.name << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
pipeline.descriptor_set_idx = 0;
|
pipeline.descriptor_set_idx = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk::CommandBuffer ggml_vk_create_cmd_buffer(vk_queue& q) {
|
static vk::CommandBuffer ggml_vk_create_cmd_buffer(vk_queue& q) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_create_cmd_buffer()" << std::endl;
|
std::cerr << "ggml_vk_create_cmd_buffer()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
if (q.cmd_buffers.size() > q.cmd_buffer_idx) {
|
if (q.cmd_buffers.size() > q.cmd_buffer_idx) {
|
||||||
|
@ -442,7 +442,7 @@ static vk::CommandBuffer ggml_vk_create_cmd_buffer(vk_queue& q) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk_submission ggml_vk_create_submission(vk_queue& q, std::vector<vk_semaphore> wait_semaphores, std::vector<vk_semaphore> signal_semaphores) {
|
static vk_submission ggml_vk_create_submission(vk_queue& q, std::vector<vk_semaphore> wait_semaphores, std::vector<vk_semaphore> signal_semaphores) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_create_submission()" << std::endl;
|
std::cerr << "ggml_vk_create_submission()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
vk_submission s;
|
vk_submission s;
|
||||||
|
@ -453,14 +453,14 @@ static vk_submission ggml_vk_create_submission(vk_queue& q, std::vector<vk_semap
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk_sequence ggml_vk_create_sequence_1(vk_queue& q, std::vector<vk_semaphore> wait_semaphores, std::vector<vk_semaphore> signal_semaphores) {
|
static vk_sequence ggml_vk_create_sequence_1(vk_queue& q, std::vector<vk_semaphore> wait_semaphores, std::vector<vk_semaphore> signal_semaphores) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_create_sequence_1()" << std::endl;
|
std::cerr << "ggml_vk_create_sequence_1()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return { ggml_vk_create_submission(q, std::move(wait_semaphores), std::move(signal_semaphores)) };
|
return { ggml_vk_create_submission(q, std::move(wait_semaphores), std::move(signal_semaphores)) };
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_submit(vk_context * ctx, vk::Fence fence) {
|
static void ggml_vk_submit(vk_context * ctx, vk::Fence fence) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_submit(" << ctx->seqs.size() << ", " << fence << ")" << std::endl;
|
std::cerr << "ggml_vk_submit(" << ctx->seqs.size() << ", " << fence << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
if (ctx->seqs.empty()) {
|
if (ctx->seqs.empty()) {
|
||||||
|
@ -536,7 +536,7 @@ static void ggml_vk_submit(vk_context * ctx, vk::Fence fence) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t ggml_vk_find_queue_family_index(std::vector<vk::QueueFamilyProperties>& queue_family_props, const vk::QueueFlags& required, const vk::QueueFlags& avoid, int32_t compute_index, uint32_t min_num_queues) {
|
static uint32_t ggml_vk_find_queue_family_index(std::vector<vk::QueueFamilyProperties>& queue_family_props, const vk::QueueFlags& required, const vk::QueueFlags& avoid, int32_t compute_index, uint32_t min_num_queues) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_find_queue_family_index()" << std::endl;
|
std::cerr << "ggml_vk_find_queue_family_index()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
const uint32_t qfsize = queue_family_props.size();
|
const uint32_t qfsize = queue_family_props.size();
|
||||||
|
@ -578,7 +578,7 @@ static uint32_t ggml_vk_find_queue_family_index(std::vector<vk::QueueFamilyPrope
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk_queue ggml_vk_create_queue(uint32_t queue_family_index, uint32_t queue_index, vk::PipelineStageFlags&& stage_flags) {
|
static vk_queue ggml_vk_create_queue(uint32_t queue_family_index, uint32_t queue_index, vk::PipelineStageFlags&& stage_flags) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_create_queue()" << std::endl;
|
std::cerr << "ggml_vk_create_queue()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
vk_queue q;
|
vk_queue q;
|
||||||
|
@ -597,7 +597,7 @@ static vk_queue ggml_vk_create_queue(uint32_t queue_family_index, uint32_t queue
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk_context * ggml_vk_create_context(vk_queue& q) {
|
static vk_context * ggml_vk_create_context(vk_queue& q) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_create_context()" << std::endl;
|
std::cerr << "ggml_vk_create_context()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
vk_gc.contexts.emplace_back();
|
vk_gc.contexts.emplace_back();
|
||||||
|
@ -609,7 +609,7 @@ static vk_context * ggml_vk_create_context(vk_queue& q) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk_semaphore * ggml_vk_create_binary_semaphore() {
|
static vk_semaphore * ggml_vk_create_binary_semaphore() {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_create_timeline_semaphore()" << std::endl;
|
std::cerr << "ggml_vk_create_timeline_semaphore()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
vk::SemaphoreTypeCreateInfo tci{ vk::SemaphoreType::eBinary, 0 };
|
vk::SemaphoreTypeCreateInfo tci{ vk::SemaphoreType::eBinary, 0 };
|
||||||
|
@ -621,7 +621,7 @@ static vk_semaphore * ggml_vk_create_binary_semaphore() {
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk_semaphore * ggml_vk_create_timeline_semaphore() {
|
static vk_semaphore * ggml_vk_create_timeline_semaphore() {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_create_timeline_semaphore()" << std::endl;
|
std::cerr << "ggml_vk_create_timeline_semaphore()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
if (vk_semaphore_idx >= vk_gc.tl_semaphores.size()) {
|
if (vk_semaphore_idx >= vk_gc.tl_semaphores.size()) {
|
||||||
|
@ -642,7 +642,7 @@ static vk::Event ggml_vk_create_event() {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_queue_cleanup(vk_queue& q) {
|
static void ggml_vk_queue_cleanup(vk_queue& q) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_queue_cleanup()" << std::endl;
|
std::cerr << "ggml_vk_queue_cleanup()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
// Requires command buffers to be done
|
// Requires command buffers to be done
|
||||||
|
@ -652,7 +652,7 @@ static void ggml_vk_queue_cleanup(vk_queue& q) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk_buffer ggml_vk_create_buffer(size_t size, vk::MemoryPropertyFlags req_flags) {
|
static vk_buffer ggml_vk_create_buffer(size_t size, vk::MemoryPropertyFlags req_flags) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_create_buffer(" << size << ", " << to_string(req_flags) << ")" << std::endl;
|
std::cerr << "ggml_vk_create_buffer(" << size << ", " << to_string(req_flags) << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
GGML_ASSERT(size > 0);
|
GGML_ASSERT(size > 0);
|
||||||
|
@ -743,7 +743,7 @@ static void ggml_vk_destroy_buffer(vk_buffer& buf) {
|
||||||
if (buf.size == 0) {
|
if (buf.size == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_destroy_buffer(" << buf.size << ")" << std::endl;
|
std::cerr << "ggml_vk_destroy_buffer(" << buf.size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -757,7 +757,7 @@ static vk_subbuffer ggml_vk_subbuffer(vk_buffer& buf) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_sync_buffers(vk_context * ctx) {
|
static void ggml_vk_sync_buffers(vk_context * ctx) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_sync_buffers()" << std::endl;
|
std::cerr << "ggml_vk_sync_buffers()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
const std::vector<vk::MemoryBarrier> mem_barriers{ { { vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite }, { vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite } } };
|
const std::vector<vk::MemoryBarrier> mem_barriers{ { { vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite }, { vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite } } };
|
||||||
|
@ -773,7 +773,7 @@ static void ggml_vk_sync_buffers(vk_context * ctx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_wait_events(vk::CommandBuffer& cmd_buffer, std::vector<vk::Event>&& events, vk::PipelineStageFlags src_stages, vk::PipelineStageFlags dst_stages) {
|
static void ggml_vk_wait_events(vk::CommandBuffer& cmd_buffer, std::vector<vk::Event>&& events, vk::PipelineStageFlags src_stages, vk::PipelineStageFlags dst_stages) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_wait_events()" << std::endl;
|
std::cerr << "ggml_vk_wait_events()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
if (events.empty()) {
|
if (events.empty()) {
|
||||||
|
@ -810,7 +810,7 @@ static bool ggml_vk_build_shader(ggml_type type) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_load_shaders() {
|
static void ggml_vk_load_shaders() {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_load_shaders()" << std::endl;
|
std::cerr << "ggml_vk_load_shaders()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -953,7 +953,7 @@ static void ggml_vk_load_shaders() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_vk_init() {
|
void ggml_vk_init() {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_init()" << std::endl;
|
std::cerr << "ggml_vk_init()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
static bool initialized = false;
|
static bool initialized = false;
|
||||||
|
@ -969,17 +969,17 @@ void ggml_vk_init() {
|
||||||
|
|
||||||
vk::ApplicationInfo app_info{ "ggml-vulkan", 1, nullptr, 0, VK_API_VERSION };
|
vk::ApplicationInfo app_info{ "ggml-vulkan", 1, nullptr, 0, VK_API_VERSION };
|
||||||
const std::vector<const char*> layers = {
|
const std::vector<const char*> layers = {
|
||||||
#ifdef VK_VALIDATE
|
#ifdef GGML_VULKAN_VALIDATE
|
||||||
"VK_LAYER_KHRONOS_validation",
|
"VK_LAYER_KHRONOS_validation",
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
const std::vector<const char*> extensions = {
|
const std::vector<const char*> extensions = {
|
||||||
#ifdef VK_VALIDATE
|
#ifdef GGML_VULKAN_VALIDATE
|
||||||
"VK_EXT_validation_features",
|
"VK_EXT_validation_features",
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
vk::InstanceCreateInfo instance_create_info(vk::InstanceCreateFlags(), &app_info, layers, extensions);
|
vk::InstanceCreateInfo instance_create_info(vk::InstanceCreateFlags(), &app_info, layers, extensions);
|
||||||
#ifdef VK_VALIDATE
|
#ifdef GGML_VULKAN_VALIDATE
|
||||||
const std::vector<vk::ValidationFeatureEnableEXT> features_enable = { vk::ValidationFeatureEnableEXT::eBestPractices };
|
const std::vector<vk::ValidationFeatureEnableEXT> features_enable = { vk::ValidationFeatureEnableEXT::eBestPractices };
|
||||||
vk::ValidationFeaturesEXT validation_features = {
|
vk::ValidationFeaturesEXT validation_features = {
|
||||||
features_enable,
|
features_enable,
|
||||||
|
@ -1090,7 +1090,7 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl;
|
||||||
|
|
||||||
device_extensions.push_back("VK_KHR_16bit_storage");
|
device_extensions.push_back("VK_KHR_16bit_storage");
|
||||||
|
|
||||||
#ifdef VK_VALIDATE
|
#ifdef GGML_VULKAN_VALIDATE
|
||||||
device_extensions.push_back("VK_KHR_shader_non_semantic_info");
|
device_extensions.push_back("VK_KHR_shader_non_semantic_info");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1136,7 +1136,7 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk_pipeline* ggml_vk_get_to_fp16(ggml_type type) {
|
static vk_pipeline* ggml_vk_get_to_fp16(ggml_type type) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_get_to_fp16()" << std::endl;
|
std::cerr << "ggml_vk_get_to_fp16()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
@ -1160,7 +1160,7 @@ static vk_pipeline* ggml_vk_get_to_fp16(ggml_type type) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk_pipeline* ggml_vk_get_dequantize_mul_mat_vec(ggml_type type) {
|
static vk_pipeline* ggml_vk_get_dequantize_mul_mat_vec(ggml_type type) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_get_dequantize_mul_mat_vec()" << std::endl;
|
std::cerr << "ggml_vk_get_dequantize_mul_mat_vec()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
@ -1189,7 +1189,7 @@ static vk_pipeline* ggml_vk_get_dequantize_mul_mat_vec(ggml_type type) {
|
||||||
static vk_buffer g_vk_buffer_pool[MAX_VK_BUFFERS];
|
static vk_buffer g_vk_buffer_pool[MAX_VK_BUFFERS];
|
||||||
|
|
||||||
static vk_buffer ggml_vk_pool_malloc(size_t size) {
|
static vk_buffer ggml_vk_pool_malloc(size_t size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_pool_malloc(" << size << ")" << std::endl;
|
std::cerr << "ggml_vk_pool_malloc(" << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
int best_i = -1;
|
int best_i = -1;
|
||||||
|
@ -1223,7 +1223,7 @@ static vk_buffer ggml_vk_pool_malloc(size_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_pool_free(vk_buffer& buffer) {
|
static void ggml_vk_pool_free(vk_buffer& buffer) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_pool_free(" << buffer.size << ")" << std::endl;
|
std::cerr << "ggml_vk_pool_free(" << buffer.size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
for (int i = 0; i < MAX_VK_BUFFERS; ++i) {
|
for (int i = 0; i < MAX_VK_BUFFERS; ++i) {
|
||||||
|
@ -1256,7 +1256,7 @@ static vk_buffer ggml_vk_create_buffer_temp(size_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void * ggml_vk_host_malloc(size_t size) {
|
static void * ggml_vk_host_malloc(size_t size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_host_malloc(" << size << ")" << std::endl;
|
std::cerr << "ggml_vk_host_malloc(" << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
vk_buffer buf = ggml_vk_create_buffer(size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached);
|
vk_buffer buf = ggml_vk_create_buffer(size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached);
|
||||||
|
@ -1279,7 +1279,7 @@ static void ggml_vk_host_free(void* ptr) {
|
||||||
if (ptr == nullptr) {
|
if (ptr == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_host_free(" << ptr << ")" << std::endl;
|
std::cerr << "ggml_vk_host_free(" << ptr << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
vk_buffer* buf = nullptr;
|
vk_buffer* buf = nullptr;
|
||||||
|
@ -1333,7 +1333,7 @@ static void ggml_vk_dispatch_pipeline(vk_context * ctx, vk_pipeline& pipeline, s
|
||||||
const uint32_t wg0 = CEIL_DIV(elements[0], pipeline.wg_denoms[0]);
|
const uint32_t wg0 = CEIL_DIV(elements[0], pipeline.wg_denoms[0]);
|
||||||
const uint32_t wg1 = CEIL_DIV(elements[1], pipeline.wg_denoms[1]);
|
const uint32_t wg1 = CEIL_DIV(elements[1], pipeline.wg_denoms[1]);
|
||||||
const uint32_t wg2 = CEIL_DIV(elements[2], pipeline.wg_denoms[2]);
|
const uint32_t wg2 = CEIL_DIV(elements[2], pipeline.wg_denoms[2]);
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_dispatch_pipeline(" << pipeline.name << ", (" << wg0 << "," << wg1 << "," << wg2 << "))" << std::endl;
|
std::cerr << "ggml_vk_dispatch_pipeline(" << pipeline.name << ", (" << wg0 << "," << wg1 << "," << wg2 << "))" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
std::vector<vk::DescriptorBufferInfo> descriptor_buffer_infos;
|
std::vector<vk::DescriptorBufferInfo> descriptor_buffer_infos;
|
||||||
|
@ -1368,7 +1368,7 @@ static void ggml_vk_end_submission(vk_submission& s, std::vector<vk_semaphore> w
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_ctx_end(vk_context * ctx) {
|
static void ggml_vk_ctx_end(vk_context * ctx) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_ctx_end(" << ctx << ", " << ctx->seqs.size() << ")" << std::endl;
|
std::cerr << "ggml_vk_ctx_end(" << ctx << ", " << ctx->seqs.size() << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
if (ctx->s == nullptr) {
|
if (ctx->s == nullptr) {
|
||||||
|
@ -1380,7 +1380,7 @@ static void ggml_vk_ctx_end(vk_context * ctx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_ctx_begin(vk_context * ctx) {
|
static void ggml_vk_ctx_begin(vk_context * ctx) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_ctx_begin(" << ctx << ")" << std::endl;
|
std::cerr << "ggml_vk_ctx_begin(" << ctx << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
if (ctx->s != nullptr) {
|
if (ctx->s != nullptr) {
|
||||||
|
@ -1411,7 +1411,7 @@ static void ensure_sync_staging_buffer(size_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_buffer_write_nc_async(vk_context * ctx, vk_buffer* dst, size_t offset, const ggml_tensor * tensor, bool sync_staging = false) {
|
static void ggml_vk_buffer_write_nc_async(vk_context * ctx, vk_buffer* dst, size_t offset, const ggml_tensor * tensor, bool sync_staging = false) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_buffer_write_nc_async(" << tensor << ")" << std::endl;
|
std::cerr << "ggml_vk_buffer_write_nc_async(" << tensor << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
GGML_ASSERT(!ggml_is_contiguous(tensor));
|
GGML_ASSERT(!ggml_is_contiguous(tensor));
|
||||||
|
@ -1518,7 +1518,7 @@ static void ggml_vk_buffer_write_nc_async(vk_context * ctx, vk_buffer* dst, size
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_buffer_write_2d_async(vk_context * ctx, vk_buffer* dst, size_t offset, const void * src, size_t spitch, size_t width, size_t height, bool sync_staging = false) {
|
static void ggml_vk_buffer_write_2d_async(vk_context * ctx, vk_buffer* dst, size_t offset, const void * src, size_t spitch, size_t width, size_t height, bool sync_staging = false) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_buffer_write_2d_async(" << width << ", " << height << ")" << std::endl;
|
std::cerr << "ggml_vk_buffer_write_2d_async(" << width << ", " << height << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
// Buffer is already mapped
|
// Buffer is already mapped
|
||||||
|
@ -1552,7 +1552,7 @@ static void ggml_vk_buffer_write_2d_async(vk_context * ctx, vk_buffer* dst, size
|
||||||
ctx->s->buffer.copyBuffer(buf->buffer, dst->buffer, slices);
|
ctx->s->buffer.copyBuffer(buf->buffer, dst->buffer, slices);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "STAGING" << std::endl;
|
std::cerr << "STAGING" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1589,14 +1589,14 @@ static void ggml_vk_buffer_write_2d_async(vk_context * ctx, vk_buffer* dst, size
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_buffer_write_async(vk_context * ctx, vk_buffer* dst, size_t offset, const void * src, size_t size, bool sync_staging = false) {
|
static void ggml_vk_buffer_write_async(vk_context * ctx, vk_buffer* dst, size_t offset, const void * src, size_t size, bool sync_staging = false) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_buffer_write_async(" << size << ")" << std::endl;
|
std::cerr << "ggml_vk_buffer_write_async(" << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return ggml_vk_buffer_write_2d_async(ctx, dst, offset, src, size, size, 1, sync_staging);
|
return ggml_vk_buffer_write_2d_async(ctx, dst, offset, src, size, size, 1, sync_staging);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_buffer_write_2d(vk_buffer* dst, size_t offset, const void * src, size_t spitch, size_t width, size_t height) {
|
static void ggml_vk_buffer_write_2d(vk_buffer* dst, size_t offset, const void * src, size_t spitch, size_t width, size_t height) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_buffer_write_2d(" << width << ", " << height << ")" << std::endl;
|
std::cerr << "ggml_vk_buffer_write_2d(" << width << ", " << height << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
// Buffer is already mapped
|
// Buffer is already mapped
|
||||||
|
@ -1623,14 +1623,14 @@ static void ggml_vk_buffer_write_2d(vk_buffer* dst, size_t offset, const void *
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_buffer_write(vk_buffer* dst, size_t offset, const void * src, size_t size) {
|
static void ggml_vk_buffer_write(vk_buffer* dst, size_t offset, const void * src, size_t size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_buffer_write(" << size << ")" << std::endl;
|
std::cerr << "ggml_vk_buffer_write(" << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
ggml_vk_buffer_write_2d(dst, offset, src, 0, size, 1);
|
ggml_vk_buffer_write_2d(dst, offset, src, 0, size, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_buffer_read_2d_async(vk_context * ctx, vk_buffer* src, size_t offset, void * dst, size_t spitch, size_t dpitch, size_t width, size_t height, bool sync_staging = false) {
|
static void ggml_vk_buffer_read_2d_async(vk_context * ctx, vk_buffer* src, size_t offset, void * dst, size_t spitch, size_t dpitch, size_t width, size_t height, bool sync_staging = false) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_buffer_read_2d_async(offset=" << offset << ", width=" << width << ", height=" << height << ")" << std::endl;
|
std::cerr << "ggml_vk_buffer_read_2d_async(offset=" << offset << ", width=" << width << ", height=" << height << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
GGML_ASSERT(width > 0);
|
GGML_ASSERT(width > 0);
|
||||||
|
@ -1663,7 +1663,7 @@ static void ggml_vk_buffer_read_2d_async(vk_context * ctx, vk_buffer* src, size_
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "STAGING" << std::endl;
|
std::cerr << "STAGING" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1692,7 +1692,7 @@ static void ggml_vk_buffer_read_async(vk_context * ctx, vk_buffer* src, size_t o
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_buffer_read(vk_buffer* src, size_t offset, void * dst, size_t size) {
|
static void ggml_vk_buffer_read(vk_buffer* src, size_t offset, void * dst, size_t size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_buffer_read(" << offset << ", " << size << ")" << std::endl;
|
std::cerr << "ggml_vk_buffer_read(" << offset << ", " << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
if(src->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
|
if(src->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
|
||||||
|
@ -1716,7 +1716,7 @@ static void ggml_vk_buffer_read(vk_buffer* src, size_t offset, void * dst, size_
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_buffer_copy_async(vk_context * ctx, vk_buffer * dst, size_t dst_offset, vk_buffer * src, size_t src_offset, size_t size) {
|
static void ggml_vk_buffer_copy_async(vk_context * ctx, vk_buffer * dst, size_t dst_offset, vk_buffer * src, size_t src_offset, size_t size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_buffer_copy_async(" << size << ")" << std::endl;
|
std::cerr << "ggml_vk_buffer_copy_async(" << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
VkBufferCopy bc{ src_offset, dst_offset, size };
|
VkBufferCopy bc{ src_offset, dst_offset, size };
|
||||||
|
@ -1725,7 +1725,7 @@ static void ggml_vk_buffer_copy_async(vk_context * ctx, vk_buffer * dst, size_t
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_buffer_copy(vk_buffer * dst, size_t dst_offset, vk_buffer * src, size_t src_offset, size_t size) {
|
static void ggml_vk_buffer_copy(vk_buffer * dst, size_t dst_offset, vk_buffer * src, size_t src_offset, size_t size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_buffer_copy(" << size << ")" << std::endl;
|
std::cerr << "ggml_vk_buffer_copy(" << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
VkBufferCopy bc{ src_offset, dst_offset, size };
|
VkBufferCopy bc{ src_offset, dst_offset, size };
|
||||||
|
@ -1741,7 +1741,7 @@ static void ggml_vk_buffer_copy(vk_buffer * dst, size_t dst_offset, vk_buffer *
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_buffer_memset(vk_buffer* dst, size_t offset, uint32_t c, size_t size) {
|
static void ggml_vk_buffer_memset(vk_buffer* dst, size_t offset, uint32_t c, size_t size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_buffer_memset(" << offset << ", " << c << ", " << size << ")" << std::endl;
|
std::cerr << "ggml_vk_buffer_memset(" << offset << ", " << c << ", " << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
vk_context * ctx = ggml_vk_create_context(vk_device.transfer_queue);
|
vk_context * ctx = ggml_vk_create_context(vk_device.transfer_queue);
|
||||||
|
@ -1755,7 +1755,7 @@ static void ggml_vk_buffer_memset(vk_buffer* dst, size_t offset, uint32_t c, siz
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_h2d_tensor_2d(vk_context * ctx, vk_buffer * dst, size_t offset, const ggml_tensor * src, uint64_t i3, uint64_t i2, uint64_t i1) {
|
static void ggml_vk_h2d_tensor_2d(vk_context * ctx, vk_buffer * dst, size_t offset, const ggml_tensor * src, uint64_t i3, uint64_t i2, uint64_t i1) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_h2d_tensor_2d(dst=" << dst << ", offset=" << offset << ", src=" << src << ", i3=" << i3 << ", i2=" << i2 << ", i1=" << i1 << ")" << std::endl;
|
std::cerr << "ggml_vk_h2d_tensor_2d(dst=" << dst << ", offset=" << offset << ", src=" << src << ", i3=" << i3 << ", i2=" << i2 << ", i1=" << i1 << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
const uint64_t ne0 = src->ne[0];
|
const uint64_t ne0 = src->ne[0];
|
||||||
|
@ -1785,7 +1785,7 @@ static void ggml_vk_h2d_tensor_2d(vk_context * ctx, vk_buffer * dst, size_t offs
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_d2h_tensor_2d(vk_context * ctx, vk_buffer * src, size_t offset, const ggml_tensor * dst) {
|
static void ggml_vk_d2h_tensor_2d(vk_context * ctx, vk_buffer * src, size_t offset, const ggml_tensor * dst) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_d2h_tensor_2d()" << std::endl;
|
std::cerr << "ggml_vk_d2h_tensor_2d()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
const uint64_t ne0 = dst->ne[0];
|
const uint64_t ne0 = dst->ne[0];
|
||||||
|
@ -1811,24 +1811,24 @@ static void ggml_vk_d2h_tensor_2d(vk_context * ctx, vk_buffer * src, size_t offs
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t ggml_vk_guess_split_k(int m, int n, int k) {
|
static uint32_t ggml_vk_guess_split_k(int m, int n, int k) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_guess_split_k(" << m << ", " << n << ", " << k << ")";
|
std::cerr << "ggml_vk_guess_split_k(" << m << ", " << n << ", " << k << ")";
|
||||||
#endif
|
#endif
|
||||||
if (k > 128 && (m < 128 || n < 128) && m > 2 && n > 2) {
|
if (k > 128 && (m < 128 || n < 128) && m > 2 && n > 2) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << " = 4" << std::endl;
|
std::cerr << " = 4" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << " = 1" << std::endl;
|
std::cerr << " = 1" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t ggml_vk_guess_matmul_pipeline_align(int m, int n) {
|
static uint32_t ggml_vk_guess_matmul_pipeline_align(int m, int n) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_guess_matmul_pipeline_align(" << m << ", " << n << ")" << std::endl;
|
std::cerr << "ggml_vk_guess_matmul_pipeline_align(" << m << ", " << n << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
if (m <= 32 || n <= 32) {
|
if (m <= 32 || n <= 32) {
|
||||||
|
@ -1841,41 +1841,41 @@ static uint32_t ggml_vk_guess_matmul_pipeline_align(int m, int n) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk_pipeline* ggml_vk_guess_matmul_pipeline(bool bit16_x, bool bit16_y, int m, int n, bool aligned) {
|
static vk_pipeline* ggml_vk_guess_matmul_pipeline(bool bit16_x, bool bit16_y, int m, int n, bool aligned) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_guess_matmul_pipeline(" << bit16_x << ", " << bit16_y << ", " << m << ", " << n << ", " << aligned << ")";
|
std::cerr << "ggml_vk_guess_matmul_pipeline(" << bit16_x << ", " << bit16_y << ", " << m << ", " << n << ", " << aligned << ")";
|
||||||
#endif
|
#endif
|
||||||
if (bit16_x && bit16_y) {
|
if (bit16_x && bit16_y) {
|
||||||
if (vk_device.vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) {
|
if (vk_device.vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << " S" << std::endl;
|
std::cerr << " S" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return aligned ? &vk_pipeline_matmul_f16_aligned_s : &vk_pipeline_matmul_f16_s;
|
return aligned ? &vk_pipeline_matmul_f16_aligned_s : &vk_pipeline_matmul_f16_s;
|
||||||
}
|
}
|
||||||
if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) {
|
if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << " M" << std::endl;
|
std::cerr << " M" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return aligned ? &vk_pipeline_matmul_f16_aligned_m : &vk_pipeline_matmul_f16_m;
|
return aligned ? &vk_pipeline_matmul_f16_aligned_m : &vk_pipeline_matmul_f16_m;
|
||||||
}
|
}
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << " L" << std::endl;
|
std::cerr << " L" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return aligned ? &vk_pipeline_matmul_f16_aligned_l : &vk_pipeline_matmul_f16_l;
|
return aligned ? &vk_pipeline_matmul_f16_aligned_l : &vk_pipeline_matmul_f16_l;
|
||||||
}
|
}
|
||||||
if (bit16_x && !bit16_y) {
|
if (bit16_x && !bit16_y) {
|
||||||
if (vk_device.vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) {
|
if (vk_device.vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << " S" << std::endl;
|
std::cerr << " S" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return aligned ? &vk_pipeline_matmul_f16_f32_aligned_s : &vk_pipeline_matmul_f16_f32_s;
|
return aligned ? &vk_pipeline_matmul_f16_f32_aligned_s : &vk_pipeline_matmul_f16_f32_s;
|
||||||
}
|
}
|
||||||
if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) {
|
if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << " M" << std::endl;
|
std::cerr << " M" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return aligned ? &vk_pipeline_matmul_f16_f32_aligned_m : &vk_pipeline_matmul_f16_f32_m;
|
return aligned ? &vk_pipeline_matmul_f16_f32_aligned_m : &vk_pipeline_matmul_f16_f32_m;
|
||||||
}
|
}
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << " L" << std::endl;
|
std::cerr << " L" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return aligned ? &vk_pipeline_matmul_f16_f32_aligned_l : &vk_pipeline_matmul_f16_f32_l;
|
return aligned ? &vk_pipeline_matmul_f16_f32_aligned_l : &vk_pipeline_matmul_f16_f32_l;
|
||||||
|
@ -1885,25 +1885,25 @@ static vk_pipeline* ggml_vk_guess_matmul_pipeline(bool bit16_x, bool bit16_y, in
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vk_device.vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) {
|
if (vk_device.vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << " S" << std::endl;
|
std::cerr << " S" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return aligned ? &vk_pipeline_matmul_f32_aligned_s : &vk_pipeline_matmul_f32_s;
|
return aligned ? &vk_pipeline_matmul_f32_aligned_s : &vk_pipeline_matmul_f32_s;
|
||||||
}
|
}
|
||||||
if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) {
|
if (vk_device.subgroup_size == 64 || m <= 64 || n <= 64) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << " M" << std::endl;
|
std::cerr << " M" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return aligned ? &vk_pipeline_matmul_f32_aligned_m : &vk_pipeline_matmul_f32_m;
|
return aligned ? &vk_pipeline_matmul_f32_aligned_m : &vk_pipeline_matmul_f32_m;
|
||||||
}
|
}
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << " L" << std::endl;
|
std::cerr << " L" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
return aligned ? &vk_pipeline_matmul_f32_aligned_l : &vk_pipeline_matmul_f32_l;
|
return aligned ? &vk_pipeline_matmul_f32_aligned_l : &vk_pipeline_matmul_f32_l;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_matmul(vk_context * ctx, vk_pipeline& pipeline, vk_subbuffer&& a, vk_subbuffer&& b, vk_subbuffer&& d, vk_subbuffer&& split_k_buffer, uint32_t m, uint32_t n, uint32_t k, uint32_t stride_a, uint32_t stride_b, uint32_t stride_d, uint32_t split_k, uint32_t batch, uint32_t ne02, uint32_t ne12, uint32_t broadcast2, uint32_t broadcast3, uint32_t batch_stride_a, uint32_t batch_stride_b, uint32_t batch_stride_d) {
|
static void ggml_vk_matmul(vk_context * ctx, vk_pipeline& pipeline, vk_subbuffer&& a, vk_subbuffer&& b, vk_subbuffer&& d, vk_subbuffer&& split_k_buffer, uint32_t m, uint32_t n, uint32_t k, uint32_t stride_a, uint32_t stride_b, uint32_t stride_d, uint32_t split_k, uint32_t batch, uint32_t ne02, uint32_t ne12, uint32_t broadcast2, uint32_t broadcast3, uint32_t batch_stride_a, uint32_t batch_stride_b, uint32_t batch_stride_d) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_matmul(a: (" << a.buffer.buffer << ", " << a.offset << ", " << a.size << "), b: (" << b.buffer.buffer << ", " << b.offset << ", " << b.size << "), c: (" << d.buffer.buffer << ", " << d.offset << ", " << d.size << "), split_k: (" << split_k_buffer.buffer.buffer << ", " << split_k_buffer.offset << ", " << split_k_buffer.size << "), m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", split_k: " << split_k << ", batch: " << batch << ", ne02: " << ne02 << ", ne12: " << ne12 << ", broadcast2: " << broadcast2 << ", broadcast3: " << broadcast3 << ", batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ")" << std::endl;
|
std::cerr << "ggml_vk_matmul(a: (" << a.buffer.buffer << ", " << a.offset << ", " << a.size << "), b: (" << b.buffer.buffer << ", " << b.offset << ", " << b.size << "), c: (" << d.buffer.buffer << ", " << d.offset << ", " << d.size << "), split_k: (" << split_k_buffer.buffer.buffer << ", " << split_k_buffer.offset << ", " << split_k_buffer.size << "), m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", split_k: " << split_k << ", batch: " << batch << ", ne02: " << ne02 << ", ne12: " << ne12 << ", broadcast2: " << broadcast2 << ", broadcast3: " << broadcast3 << ", batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
ggml_vk_sync_buffers(ctx);
|
ggml_vk_sync_buffers(ctx);
|
||||||
|
@ -1946,7 +1946,7 @@ static vk_pipeline * ggml_vk_get_cpy_pipeline(ggml_type from, ggml_type to) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_cpy_to_contiguous(vk_context * ctx, vk_pipeline * pipeline, const ggml_tensor * tensor, vk_subbuffer&& in, vk_subbuffer&& out, ggml_type buffer_type, bool aligned=true) {
|
static void ggml_vk_cpy_to_contiguous(vk_context * ctx, vk_pipeline * pipeline, const ggml_tensor * tensor, vk_subbuffer&& in, vk_subbuffer&& out, ggml_type buffer_type, bool aligned=true) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_cpy_to_contiguous((" << tensor << ", type=" << tensor->type << ", backend=" << tensor->backend << ", ne0=" << tensor->ne[0] << ", ne1=" << tensor->ne[1] << ", ne2=" << tensor->ne[2] << ", ne3=" << tensor->ne[3] << ", nb0=" << tensor->nb[0] << ", nb1=" << tensor->nb[1] << ", nb2=" << tensor->nb[2] << ", nb3=" << tensor->nb[3] << "), ";
|
std::cerr << "ggml_vk_cpy_to_contiguous((" << tensor << ", type=" << tensor->type << ", backend=" << tensor->backend << ", ne0=" << tensor->ne[0] << ", ne1=" << tensor->ne[1] << ", ne2=" << tensor->ne[2] << ", ne3=" << tensor->ne[3] << ", nb0=" << tensor->nb[0] << ", nb1=" << tensor->nb[1] << ", nb2=" << tensor->nb[2] << ", nb3=" << tensor->nb[3] << "), ";
|
||||||
std::cerr << "buffer in size=" << in.buffer.size << ", buffer out size=" << out.buffer.size << ")" << std::endl;
|
std::cerr << "buffer in size=" << in.buffer.size << ", buffer out size=" << out.buffer.size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
@ -1968,7 +1968,7 @@ static void ggml_vk_cpy_to_contiguous(vk_context * ctx, vk_pipeline * pipeline,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_mul_mat_q_f16(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_vk_mul_mat_q_f16(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_mul_mat_q_f16((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3];
|
std::cerr << "ggml_vk_mul_mat_q_f16((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3];
|
||||||
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
||||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl;
|
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl;
|
||||||
|
@ -2152,7 +2152,7 @@ static void ggml_vk_mul_mat_q_f16(vk_context * ctx, const ggml_tensor * src0, co
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_mul_mat_vec_q_f16(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_vk_mul_mat_vec_q_f16(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_mul_mat_vec_q_f16((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3];
|
std::cerr << "ggml_vk_mul_mat_vec_q_f16((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3];
|
||||||
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
||||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl;
|
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl;
|
||||||
|
@ -2332,7 +2332,7 @@ static void ggml_vk_mul_mat_vec_q_f16(vk_context * ctx, const ggml_tensor * src0
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_mul_mat_vec_p021_f16_f32(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_vk_mul_mat_vec_p021_f16_f32(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_mul_mat_p021_f16_f32((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3];
|
std::cerr << "ggml_vk_mul_mat_p021_f16_f32((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3];
|
||||||
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
||||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl;
|
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl;
|
||||||
|
@ -2421,7 +2421,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(vk_context * ctx, const ggml_tensor
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_mul_mat_vec_nc_f16_f32(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_vk_mul_mat_vec_nc_f16_f32(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_mul_mat_nc_f16_f32((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3];
|
std::cerr << "ggml_vk_mul_mat_nc_f16_f32((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3];
|
||||||
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
||||||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl;
|
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl;
|
||||||
|
@ -2527,7 +2527,7 @@ static bool ggml_vk_can_mul_mat(const ggml_tensor * src0, const ggml_tensor * sr
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_mul_mat(vk_context * ctx, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
|
static void ggml_vk_mul_mat(vk_context * ctx, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_mul_mat(" << src0 << ", " << src1 << ", " << dst << ")" << std::endl;
|
std::cerr << "ggml_vk_mul_mat(" << src0 << ", " << src1 << ", " << dst << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
if (src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {
|
if (src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {
|
||||||
|
@ -2740,7 +2740,7 @@ static void ggml_vk_check_results_0(ggml_compute_params * params, ggml_tensor *
|
||||||
|
|
||||||
template<typename PC>
|
template<typename PC>
|
||||||
static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, ggml_op op, const PC&& pc) {
|
static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, ggml_op op, const PC&& pc) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_op_f32((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3];
|
std::cerr << "ggml_vk_op_f32((" << src0 << ", name=" << src0->name << ", type=" << src0->type << ", backend=" << src0->backend << ", ne0=" << src0->ne[0] << ", ne1=" << src0->ne[1] << ", ne2=" << src0->ne[2] << ", ne3=" << src0->ne[3] << ", nb0=" << src0->nb[0] << ", nb1=" << src0->nb[1] << ", nb2=" << src0->nb[2] << ", nb3=" << src0->nb[3];
|
||||||
if (src1 != nullptr) {
|
if (src1 != nullptr) {
|
||||||
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", backend=" << src1->backend << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3];
|
||||||
|
@ -3061,7 +3061,7 @@ static void ggml_vk_nop(vk_context * ctx, const ggml_tensor * src0, ggml_tensor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef VK_RUN_TESTS
|
#ifdef GGML_VULKAN_RUN_TESTS
|
||||||
static void ggml_vk_print_matrix_area(const void * data, ggml_type type, int ne0, int ne1, int i0, int i1, int i2) {
|
static void ggml_vk_print_matrix_area(const void * data, ggml_type type, int ne0, int ne1, int i0, int i1, int i2) {
|
||||||
if (type != GGML_TYPE_F32 && type != GGML_TYPE_F16) {
|
if (type != GGML_TYPE_F32 && type != GGML_TYPE_F16) {
|
||||||
return;
|
return;
|
||||||
|
@ -3095,7 +3095,7 @@ static void ggml_vk_print_matrix_area(const void * data, ggml_type type, int ne0
|
||||||
|
|
||||||
template <typename X_TYPE, typename Y_TYPE>
|
template <typename X_TYPE, typename Y_TYPE>
|
||||||
static void ggml_vk_test_matmul(size_t m, size_t n, size_t k, size_t batch, size_t num_it, int split_k, int shader_size) {
|
static void ggml_vk_test_matmul(size_t m, size_t n, size_t k, size_t batch, size_t num_it, int split_k, int shader_size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_test_matmul(" << m << ", " << n << ", " << k << ", " << batch << ", " << num_it << ", " << split_k << ", " << shader_size << ")" << std::endl;
|
std::cerr << "ggml_vk_test_matmul(" << m << ", " << n << ", " << k << ", " << batch << ", " << num_it << ", " << split_k << ", " << shader_size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
const size_t x_ne = m * k * batch;
|
const size_t x_ne = m * k * batch;
|
||||||
|
@ -3486,7 +3486,7 @@ static void ggml_vk_test_h2d_nc(size_t ne0, size_t ne1, size_t ne2, size_t ne3)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_test_transfer(size_t ne, bool pinned) {
|
static void ggml_vk_test_transfer(size_t ne, bool pinned) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_test_transfer(" << ne << ")" << std::endl;
|
std::cerr << "ggml_vk_test_transfer(" << ne << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
// Check transfers are correct
|
// Check transfers are correct
|
||||||
|
@ -3568,7 +3568,7 @@ static void ggml_vk_test_transfer(size_t ne, bool pinned) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_test_dequant(size_t ne, ggml_type quant) {
|
static void ggml_vk_test_dequant(size_t ne, ggml_type quant) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_test_dequant(" << ne << ")" << std::endl;
|
std::cerr << "ggml_vk_test_dequant(" << ne << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
const size_t x_sz = sizeof(float) * ne;
|
const size_t x_sz = sizeof(float) * ne;
|
||||||
|
@ -3662,7 +3662,7 @@ static void ggml_vk_test_dequant(size_t ne, ggml_type quant) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor) {
|
static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_create_extra(" << tensor << " (" << tensor->name << ", " << ggml_op_name(tensor->op) << "))" << std::endl;
|
std::cerr << "ggml_vk_create_extra(" << tensor << " (" << tensor->name << ", " << ggml_op_name(tensor->op) << "))" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu;
|
ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu;
|
||||||
|
@ -3686,7 +3686,7 @@ static ggml_tensor * ggml_vk_find_last_use(const ggml_tensor * node, ggml_cgraph
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_vk_preallocate_buffers_graph(ggml_tensor * node){
|
void ggml_vk_preallocate_buffers_graph(ggml_tensor * node){
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_preallocate_buffers_graph(" << node << ")" << std::endl;
|
std::cerr << "ggml_vk_preallocate_buffers_graph(" << node << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
const bool any_on_device = node->backend == GGML_BACKEND_GPU
|
const bool any_on_device = node->backend == GGML_BACKEND_GPU
|
||||||
|
@ -3805,11 +3805,11 @@ void ggml_vk_preallocate_buffers() {
|
||||||
if (vk_disable) {
|
if (vk_disable) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_preallocate_buffers()" << std::endl;
|
std::cerr << "ggml_vk_preallocate_buffers()" << std::endl;
|
||||||
std::cerr << "qx_size: " << vk_prealloc_size_qx << " qy_size: " << vk_prealloc_size_qy << " x_size: " << vk_prealloc_size_x << " y_size: " << vk_prealloc_size_y << " split_k_size: " << vk_prealloc_size_split_k << std::endl;
|
std::cerr << "qx_size: " << vk_prealloc_size_qx << " qy_size: " << vk_prealloc_size_qy << " x_size: " << vk_prealloc_size_x << " y_size: " << vk_prealloc_size_y << " split_k_size: " << vk_prealloc_size_split_k << std::endl;
|
||||||
#endif
|
#endif
|
||||||
#if defined(VK_RUN_TESTS)
|
#if defined(GGML_VULKAN_RUN_TESTS)
|
||||||
vk_staging = ggml_vk_create_buffer_check(100ul * 1024ul * 1024ul, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached);
|
vk_staging = ggml_vk_create_buffer_check(100ul * 1024ul * 1024ul, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached);
|
||||||
ggml_vk_test_transfer(8192 * 1000, false);
|
ggml_vk_test_transfer(8192 * 1000, false);
|
||||||
ggml_vk_test_transfer(8192 * 1000, true);
|
ggml_vk_test_transfer(8192 * 1000, true);
|
||||||
|
@ -3915,7 +3915,7 @@ void ggml_vk_build_graph(ggml_tensor * node, bool last_node){
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_build_graph(" << node << ", " << ggml_op_name(node->op) << ")" << std::endl;
|
std::cerr << "ggml_vk_build_graph(" << node << ", " << ggml_op_name(node->op) << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
vk_semaphore_idx = 0;
|
vk_semaphore_idx = 0;
|
||||||
|
@ -4138,7 +4138,7 @@ bool ggml_vk_compute_forward(ggml_compute_params * params, ggml_tensor * tensor)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_compute_forward(" << tensor << ", name=" << tensor->name << ", op=" << ggml_op_name(tensor->op) << ", type=" << tensor->type << ", backend=" << tensor->backend << ", ne0=" << tensor->ne[0] << ", ne1=" << tensor->ne[1] << ", ne2=" << tensor->ne[2] << ", ne3=" << tensor->ne[3] << ", nb0=" << tensor->nb[0] << ", nb1=" << tensor->nb[1] << ", nb2=" << tensor->nb[2] << ", nb3=" << tensor->nb[3] << ", view_src=" << tensor->view_src << ", view_offs=" << tensor->view_offs << ")" << std::endl;
|
std::cerr << "ggml_vk_compute_forward(" << tensor << ", name=" << tensor->name << ", op=" << ggml_op_name(tensor->op) << ", type=" << tensor->type << ", backend=" << tensor->backend << ", ne0=" << tensor->ne[0] << ", ne1=" << tensor->ne[1] << ", ne2=" << tensor->ne[2] << ", ne3=" << tensor->ne[3] << ", nb0=" << tensor->nb[0] << ", nb1=" << tensor->nb[1] << ", nb2=" << tensor->nb[2] << ", nb3=" << tensor->nb[3] << ", view_src=" << tensor->view_src << ", view_offs=" << tensor->view_offs << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -4181,7 +4181,7 @@ void ggml_vk_graph_cleanup() {
|
||||||
if (vk_disable) {
|
if (vk_disable) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_graph_cleanup()" << std::endl;
|
std::cerr << "ggml_vk_graph_cleanup()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
for (auto& buffer : vk_gc.temp_buffers) {
|
for (auto& buffer : vk_gc.temp_buffers) {
|
||||||
|
@ -4220,7 +4220,7 @@ void ggml_vk_graph_cleanup() {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_cleanup() {
|
static void ggml_vk_cleanup() {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_vk_cleanup()" << std::endl;
|
std::cerr << "ggml_vk_cleanup()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
ggml_vk_destroy_buffer(vk_prealloc_x);
|
ggml_vk_destroy_buffer(vk_prealloc_x);
|
||||||
|
@ -4304,7 +4304,7 @@ GGML_CALL static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t bu
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
GGML_CALL static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_backend_vk_buffer_init_tensor(" << buffer << " (" << buffer->context << "), " << tensor << ")" << std::endl;
|
std::cerr << "ggml_backend_vk_buffer_init_tensor(" << buffer << " (" << buffer->context << "), " << tensor << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
||||||
|
@ -4324,7 +4324,7 @@ GGML_CALL static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t b
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_backend_vk_buffer_set_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl;
|
std::cerr << "ggml_backend_vk_buffer_set_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
|
GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
|
||||||
|
@ -4337,7 +4337,7 @@ GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t bu
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
GGML_CALL static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_backend_vk_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl;
|
std::cerr << "ggml_backend_vk_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
|
GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
|
||||||
|
@ -4393,7 +4393,7 @@ GGML_CALL static const char * ggml_backend_vk_buffer_type_name(ggml_backend_buff
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL static ggml_backend_buffer_t ggml_backend_vk_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
GGML_CALL static ggml_backend_buffer_t ggml_backend_vk_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_backend_vk_buffer_type_alloc_buffer(" << size << ")" << std::endl;
|
std::cerr << "ggml_backend_vk_buffer_type_alloc_buffer(" << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
vk_buffer dev_buffer = ggml_vk_create_buffer_device(size);
|
vk_buffer dev_buffer = ggml_vk_create_buffer_device(size);
|
||||||
|
@ -4537,7 +4537,7 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_vk_get_default_buffer_t
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
GGML_CALL static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_backend_vk_set_tensor_async(" << size << ")" << std::endl;
|
std::cerr << "ggml_backend_vk_set_tensor_async(" << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type() || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type() || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
||||||
|
@ -4557,7 +4557,7 @@ GGML_CALL static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, g
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_backend_vk_get_tensor_async(" << size << ")" << std::endl;
|
std::cerr << "ggml_backend_vk_get_tensor_async(" << size << ")" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type() || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type() || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
||||||
|
@ -4577,7 +4577,7 @@ GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, c
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, const ggml_tensor * src, ggml_tensor * dst) {
|
GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, const ggml_tensor * src, ggml_tensor * dst) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_backend_vk_cpy_tensor_async()" << std::endl;
|
std::cerr << "ggml_backend_vk_cpy_tensor_async()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
if ((dst->buffer->buft == ggml_backend_vk_buffer_type() || dst->buffer->buft == ggml_backend_vk_host_buffer_type()) && ggml_backend_buffer_is_vk(src->buffer)) {
|
if ((dst->buffer->buft == ggml_backend_vk_buffer_type() || dst->buffer->buft == ggml_backend_vk_host_buffer_type()) && ggml_backend_buffer_is_vk(src->buffer)) {
|
||||||
|
@ -4600,7 +4600,7 @@ GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, c
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL static void ggml_backend_vk_synchronize(ggml_backend_t backend) {
|
GGML_CALL static void ggml_backend_vk_synchronize(ggml_backend_t backend) {
|
||||||
#ifdef VK_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
std::cerr << "ggml_backend_vk_synchronize()" << std::endl;
|
std::cerr << "ggml_backend_vk_synchronize()" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
if(vk_ctx == nullptr) {
|
if(vk_ctx == nullptr) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue