Enable asynchronous transfers in Vulkan backend
This commit is contained in:
parent
0dcf96d437
commit
f04dc725d6
2 changed files with 25 additions and 23 deletions
|
@ -1437,7 +1437,7 @@ static void sched_compute_splits(ggml_backend_sched_t sched) {
|
|||
// this is important to avoid copying constants such as KQ_mask and inp_pos multiple times
|
||||
ggml_backend_tensor_copy_async(split_backend, input, input_cpy);
|
||||
}
|
||||
//ggml_backend_synchronize(split_backend); // necessary to measure copy time
|
||||
ggml_backend_synchronize(split_backend);
|
||||
int64_t copy_end_us = ggml_time_us();
|
||||
copy_us[split_backend_id] += copy_end_us - copy_start_us;
|
||||
|
||||
|
|
|
@ -255,6 +255,7 @@ static size_t vk_staging_offset;
|
|||
static vk_buffer vk_sync_staging;
|
||||
|
||||
static vk_context * vk_ctx;
|
||||
static vk_context * vk_transfer_ctx;
|
||||
|
||||
static bool vk_disable;
|
||||
|
||||
|
@ -1124,6 +1125,7 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl;
|
|||
vk_fence = vk_device.device.createFence({});
|
||||
|
||||
vk_ctx = nullptr;
|
||||
vk_transfer_ctx = nullptr;
|
||||
|
||||
vk_disable = false;
|
||||
|
||||
|
@ -4545,13 +4547,13 @@ GGML_CALL static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, g
|
|||
|
||||
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
||||
|
||||
if (vk_ctx == nullptr) {
|
||||
if (vk_transfer_ctx == nullptr) {
|
||||
// Initialize new transfer context
|
||||
vk_ctx = ggml_vk_create_context(vk_device.transfer_queue);
|
||||
ggml_vk_ctx_begin(vk_ctx);
|
||||
vk_transfer_ctx = ggml_vk_create_context(vk_device.transfer_queue);
|
||||
ggml_vk_ctx_begin(vk_transfer_ctx);
|
||||
}
|
||||
|
||||
ggml_vk_buffer_write_async(vk_ctx, &extra->buffer_gpu, extra->offset + offset, data, size);
|
||||
ggml_vk_buffer_write_async(vk_transfer_ctx, &extra->buffer_gpu, extra->offset + offset, data, size);
|
||||
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
@ -4565,13 +4567,13 @@ GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, c
|
|||
|
||||
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
||||
|
||||
if (vk_ctx == nullptr) {
|
||||
if (vk_transfer_ctx == nullptr) {
|
||||
// Initialize new transfer context
|
||||
vk_ctx = ggml_vk_create_context(vk_device.transfer_queue);
|
||||
ggml_vk_ctx_begin(vk_ctx);
|
||||
vk_transfer_ctx = ggml_vk_create_context(vk_device.transfer_queue);
|
||||
ggml_vk_ctx_begin(vk_transfer_ctx);
|
||||
}
|
||||
|
||||
ggml_vk_buffer_read_async(vk_ctx, &extra->buffer_gpu, extra->offset + offset, data, size);
|
||||
ggml_vk_buffer_read_async(vk_transfer_ctx, &extra->buffer_gpu, extra->offset + offset, data, size);
|
||||
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
@ -4584,13 +4586,13 @@ GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, c
|
|||
ggml_tensor_extra_gpu * src_extra = (ggml_tensor_extra_gpu *) src->extra;
|
||||
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
|
||||
|
||||
if (vk_ctx == nullptr) {
|
||||
if (vk_transfer_ctx == nullptr) {
|
||||
// Initialize new transfer context
|
||||
vk_ctx = ggml_vk_create_context(vk_device.transfer_queue);
|
||||
ggml_vk_ctx_begin(vk_ctx);
|
||||
vk_transfer_ctx = ggml_vk_create_context(vk_device.transfer_queue);
|
||||
ggml_vk_ctx_begin(vk_transfer_ctx);
|
||||
}
|
||||
|
||||
ggml_vk_buffer_copy_async(vk_ctx, &src_extra->buffer_gpu, src_extra->offset, &dst_extra->buffer_gpu, dst_extra->offset, ggml_nbytes(src));
|
||||
ggml_vk_buffer_copy_async(vk_transfer_ctx, &src_extra->buffer_gpu, src_extra->offset, &dst_extra->buffer_gpu, dst_extra->offset, ggml_nbytes(src));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -4603,25 +4605,25 @@ GGML_CALL static void ggml_backend_vk_synchronize(ggml_backend_t backend) {
|
|||
#ifdef GGML_VULKAN_DEBUG
|
||||
std::cerr << "ggml_backend_vk_synchronize()" << std::endl;
|
||||
#endif
|
||||
if(vk_ctx == nullptr) {
|
||||
if(vk_transfer_ctx == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
ggml_vk_ctx_end(vk_ctx);
|
||||
ggml_vk_ctx_end(vk_transfer_ctx);
|
||||
|
||||
for (auto& cpy : vk_ctx->in_memcpys) {
|
||||
for (auto& cpy : vk_transfer_ctx->in_memcpys) {
|
||||
memcpy(cpy.dst, cpy.src, cpy.n);
|
||||
}
|
||||
|
||||
ggml_vk_submit(vk_ctx, vk_fence);
|
||||
ggml_vk_submit(vk_transfer_ctx, vk_fence);
|
||||
VK_CHECK(vk_device.device.waitForFences({ vk_fence }, true, UINT64_MAX), "ggml_backend_vk_synchronize waitForFences");
|
||||
vk_device.device.resetFences({ vk_fence });
|
||||
|
||||
for (auto& cpy : vk_ctx->out_memcpys) {
|
||||
for (auto& cpy : vk_transfer_ctx->out_memcpys) {
|
||||
memcpy(cpy.dst, cpy.src, cpy.n);
|
||||
}
|
||||
|
||||
vk_ctx = nullptr;
|
||||
vk_transfer_ctx = nullptr;
|
||||
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
@ -4773,10 +4775,10 @@ static ggml_backend_i ggml_backend_vk_interface = {
|
|||
/* .get_name = */ ggml_backend_vk_name,
|
||||
/* .free = */ ggml_backend_vk_free,
|
||||
/* .get_default_buffer_type = */ ggml_backend_vk_get_default_buffer_type,
|
||||
/* .set_tensor_async = */ NULL, // ggml_backend_vk_set_tensor_async,
|
||||
/* .get_tensor_async = */ NULL, // ggml_backend_vk_get_tensor_async,
|
||||
/* .cpy_tensor_async = */ NULL, // ggml_backend_vk_cpy_tensor_async,
|
||||
/* .synchronize = */ NULL, // ggml_backend_vk_synchronize,
|
||||
/* .set_tensor_async = */ ggml_backend_vk_set_tensor_async,
|
||||
/* .get_tensor_async = */ ggml_backend_vk_get_tensor_async,
|
||||
/* .cpy_tensor_async = */ ggml_backend_vk_cpy_tensor_async,
|
||||
/* .synchronize = */ ggml_backend_vk_synchronize,
|
||||
/* .graph_plan_create = */ NULL,
|
||||
/* .graph_plan_free = */ NULL,
|
||||
/* .graph_plan_compute = */ NULL,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue