code : normalize enum names (#5697)
* coda : normalize enum names ggml-ci * code : cont * code : cont
This commit is contained in:
parent
69917dfa55
commit
ab336a9d5e
20 changed files with 502 additions and 502 deletions
102
ggml-vulkan.cpp
102
ggml-vulkan.cpp
|
@ -2320,8 +2320,8 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context * su
|
|||
src1_uma = d_Qy != nullptr;
|
||||
}
|
||||
|
||||
const bool load_x = src0->backend != GGML_BACKEND_GPU && !src0_uma;
|
||||
const bool load_y = src1->backend != GGML_BACKEND_GPU && !src1_uma;
|
||||
const bool load_x = src0->backend != GGML_BACKEND_TYPE_GPU && !src0_uma;
|
||||
const bool load_y = src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
|
||||
|
||||
const bool x_non_contig = !load_x && !ggml_vk_dim01_contiguous(src0);
|
||||
const bool y_non_contig = !load_y && !ggml_vk_dim01_contiguous(src1);
|
||||
|
@ -2453,7 +2453,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context * su
|
|||
// compute
|
||||
ggml_vk_matmul(ctx, subctx, *pipeline, { d_X, x_buf_offset, x_sz * ne02 * ne03 }, { d_Y, y_buf_offset, y_sz * ne12 * ne13 }, { d_D, d_buf_offset, d_sz * ne12 * ne13 }, { ctx->prealloc_split_k, 0, d_sz * ne12 * ne13 * split_k }, ne01, ne11, ne10, ne10, ne10, ne01, split_k, ne12*ne13, ne02, ne12, r2, r3, stride_batch_x, stride_batch_y, ne20*ne21); // NOLINT
|
||||
|
||||
if (dst->backend == GGML_BACKEND_CPU) {
|
||||
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
||||
// copy dst to host
|
||||
float * d = (float *) ((char *) dst->data);
|
||||
ggml_vk_buffer_read_async(ctx, subctx, d_D, 0, d, sizeof(float) * d_ne * ne12 * ne13);
|
||||
|
@ -2506,8 +2506,8 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context
|
|||
src1_uma = d_Qy != nullptr;
|
||||
}
|
||||
|
||||
const bool load_x = src0->backend != GGML_BACKEND_GPU && !src0_uma;
|
||||
const bool load_y = src1->backend != GGML_BACKEND_GPU && !src1_uma;
|
||||
const bool load_x = src0->backend != GGML_BACKEND_TYPE_GPU && !src0_uma;
|
||||
const bool load_y = src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
|
||||
|
||||
const bool x_non_contig = !load_x && !ggml_vk_dim01_contiguous(src0);
|
||||
const bool y_non_contig = !load_y && !ggml_vk_dim01_contiguous(src1);
|
||||
|
@ -2630,7 +2630,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context
|
|||
ggml_vk_sync_buffers(subctx);
|
||||
ggml_vk_dispatch_pipeline(ctx, subctx, *dmmv, { { d_X, x_offset, x_sz }, { d_Y, y_buffer_offset, y_sz + y_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 3 * sizeof(int), &pc, { (uint32_t)ne01, 1, 1});
|
||||
|
||||
if (dst->backend == GGML_BACKEND_CPU) {
|
||||
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
||||
// copy dst to host
|
||||
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
||||
ggml_vk_sync_buffers(subctx);
|
||||
|
@ -2647,7 +2647,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
|
|||
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl;
|
||||
#endif
|
||||
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
|
||||
GGML_ASSERT(src0->backend == GGML_BACKEND_GPU);
|
||||
GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
|
||||
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // NOLINT
|
||||
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // NOLINT
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||
|
@ -2679,7 +2679,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
|
|||
src1_uma = d_Qy != nullptr;
|
||||
}
|
||||
|
||||
const bool load_y = src1->backend != GGML_BACKEND_GPU && !src1_uma;
|
||||
const bool load_y = src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
|
||||
|
||||
const uint64_t x_ne = ne00 * ne01 * ne02;
|
||||
const uint64_t y_ne = ne10 * ne11 * ne12;
|
||||
|
@ -2721,7 +2721,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
|
|||
ggml_vk_sync_buffers(subctx);
|
||||
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->pipeline_mul_mat_vec_p021_f16_f32, { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
|
||||
|
||||
if (dst->backend == GGML_BACKEND_CPU) {
|
||||
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
||||
// copy dst to host
|
||||
float * d = (float *) dst->data;
|
||||
ggml_vk_sync_buffers(subctx);
|
||||
|
@ -2738,7 +2738,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
|
|||
GGML_ASSERT(!ggml_is_transposed(src0));
|
||||
GGML_ASSERT(!ggml_is_transposed(src1));
|
||||
GGML_ASSERT(!ggml_is_permuted(src0));
|
||||
GGML_ASSERT(src0->backend == GGML_BACKEND_GPU);
|
||||
GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||
|
||||
|
@ -2771,7 +2771,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
|
|||
src1_uma = d_Qy != nullptr;
|
||||
}
|
||||
|
||||
const bool load_y = src1->backend != GGML_BACKEND_GPU && !src1_uma;
|
||||
const bool load_y = src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
|
||||
|
||||
const uint64_t d_ne = ne01 * ne11 * ne12;
|
||||
|
||||
|
@ -2814,7 +2814,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
|
|||
ggml_vk_sync_buffers(subctx);
|
||||
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->pipeline_mul_mat_vec_nc_f16_f32, { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
|
||||
|
||||
if (dst->backend == GGML_BACKEND_CPU) {
|
||||
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
||||
// copy dst to host
|
||||
float * d = (float *) dst->data;
|
||||
ggml_vk_sync_buffers(subctx);
|
||||
|
@ -2832,7 +2832,7 @@ static bool ggml_vk_can_mul_mat(const ggml_tensor * src0, const ggml_tensor * sr
|
|||
return (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
|
||||
(src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16 || ggml_is_quantized(src1->type)) &&
|
||||
dst->type == GGML_TYPE_F32 &&
|
||||
((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_GPU);
|
||||
((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_TYPE_GPU);
|
||||
}
|
||||
|
||||
static void ggml_vk_mul_mat(ggml_backend_vk_context * ctx, vk_context * subctx, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
|
||||
|
@ -2880,8 +2880,8 @@ static void ggml_vk_op_repeat(ggml_backend_vk_context * ctx, vk_context * subctx
|
|||
// TODO: support for transposed / permuted tensors
|
||||
GGML_ASSERT(nb0 == sizeof(float));
|
||||
GGML_ASSERT(nb00 == sizeof(float));
|
||||
GGML_ASSERT(src0->backend == GGML_BACKEND_GPU);
|
||||
GGML_ASSERT(dst->backend == GGML_BACKEND_GPU);
|
||||
GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
|
||||
GGML_ASSERT(dst->backend == GGML_BACKEND_TYPE_GPU);
|
||||
|
||||
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) dst->extra;
|
||||
ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra;
|
||||
|
@ -3110,8 +3110,8 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, c
|
|||
}
|
||||
}
|
||||
|
||||
const bool transfer_src0 = src0->backend != GGML_BACKEND_GPU && !src0_uma;
|
||||
const bool transfer_src1 = use_src1 && src1->backend != GGML_BACKEND_GPU && !src1_uma;
|
||||
const bool transfer_src0 = src0->backend != GGML_BACKEND_TYPE_GPU && !src0_uma;
|
||||
const bool transfer_src1 = use_src1 && src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
|
||||
|
||||
uint64_t x_sz = ggml_vk_align_size(ggml_type_size(src0->type) * ne0, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment);
|
||||
uint64_t y_sz = use_src1 ? ggml_vk_align_size(ggml_type_size(src1->type) * ne1, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) : 0;
|
||||
|
@ -3120,7 +3120,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, c
|
|||
vk_buffer d_D = extra->buffer_gpu.lock();
|
||||
|
||||
// Workaround for tiny tensor inputs on ROPE
|
||||
if (use_src1 && src1->backend == GGML_BACKEND_GPU && y_sz > d_D->size) {
|
||||
if (use_src1 && src1->backend == GGML_BACKEND_TYPE_GPU && y_sz > d_D->size) {
|
||||
y_sz = VK_WHOLE_SIZE;
|
||||
}
|
||||
|
||||
|
@ -3209,9 +3209,9 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, c
|
|||
ggml_vk_sync_buffers(subctx);
|
||||
ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset, x_sz }, { d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
|
||||
}
|
||||
if (dst->backend == GGML_BACKEND_CPU && op == GGML_OP_CPY) {
|
||||
if (dst->backend == GGML_BACKEND_TYPE_CPU && op == GGML_OP_CPY) {
|
||||
ggml_vk_d2h_tensor_2d(ctx, subctx, d_D, 0, dst);
|
||||
} else if(dst->backend == GGML_BACKEND_CPU) {
|
||||
} else if(dst->backend == GGML_BACKEND_TYPE_CPU) {
|
||||
// copy dst to host
|
||||
float * d = (float *) dst->data;
|
||||
ggml_vk_buffer_read_async(ctx, subctx, d_D, 0, d, d_sz);
|
||||
|
@ -3253,7 +3253,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, c
|
|||
ggml_vk_sync_buffers(subctx);
|
||||
ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset + x_offset, x_sz }, { d_D, d_buf_offset + d_offset, d_sz } }, sizeof(PC), &pc, elements);
|
||||
}
|
||||
if (dst->backend == GGML_BACKEND_CPU) {
|
||||
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
||||
// copy dst to host
|
||||
ggml_vk_buffer_read_async(ctx, subctx, d_D, d_buf_offset + d_offset, (char *) dst->data + i02*nb2 + i03*nb3, d_sz);
|
||||
}
|
||||
|
@ -3359,7 +3359,7 @@ static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context * subctx, con
|
|||
|
||||
static void ggml_vk_nop(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) {
|
||||
// If backend is CPU, data from src0 has to be copied off the device
|
||||
if (dst->backend == GGML_BACKEND_CPU) {
|
||||
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
||||
ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra;
|
||||
vk_buffer d_D = extra_src0->buffer_gpu.lock();
|
||||
ggml_vk_sync_buffers(subctx);
|
||||
|
@ -3994,9 +3994,9 @@ static void ggml_vk_preallocate_buffers_graph(ggml_backend_vk_context * ctx, ggm
|
|||
#ifdef GGML_VULKAN_DEBUG
|
||||
std::cerr << "ggml_vk_preallocate_buffers_graph(" << node << ")" << std::endl;
|
||||
#endif
|
||||
const bool any_on_device = node->backend == GGML_BACKEND_GPU
|
||||
|| (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_GPU || node->src[0]->backend == GGML_BACKEND_GPU_SPLIT))
|
||||
|| (node->src[1] != nullptr && (node->src[1]->backend == GGML_BACKEND_GPU));
|
||||
const bool any_on_device = node->backend == GGML_BACKEND_TYPE_GPU
|
||||
|| (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_TYPE_GPU || node->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
|
||||
|| (node->src[1] != nullptr && (node->src[1]->backend == GGML_BACKEND_TYPE_GPU));
|
||||
|
||||
if (ctx->disable || (!any_on_device && node->op != GGML_OP_MUL_MAT)) {
|
||||
return;
|
||||
|
@ -4215,9 +4215,9 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
|
|||
}
|
||||
|
||||
static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * node, bool last_node){
|
||||
const bool any_on_device = node->backend == GGML_BACKEND_GPU
|
||||
|| (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_GPU || node->src[0]->backend == GGML_BACKEND_GPU_SPLIT))
|
||||
|| (node->src[1] != nullptr && node->src[1]->backend == GGML_BACKEND_GPU);
|
||||
const bool any_on_device = node->backend == GGML_BACKEND_TYPE_GPU
|
||||
|| (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_TYPE_GPU || node->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
|
||||
|| (node->src[1] != nullptr && node->src[1]->backend == GGML_BACKEND_TYPE_GPU);
|
||||
|
||||
if (ctx->disable || (!any_on_device && node->op != GGML_OP_MUL_MAT) || (node->op == GGML_OP_MUL_MAT && !any_on_device && !ggml_vk_can_mul_mat(node->src[0], node->src[1], node))) {
|
||||
return;
|
||||
|
@ -4371,7 +4371,7 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|||
last_node = true;
|
||||
#endif
|
||||
|
||||
if (node->backend == GGML_BACKEND_CPU || last_node) {
|
||||
if (node->backend == GGML_BACKEND_TYPE_CPU || last_node) {
|
||||
ggml_vk_ctx_end(ctx->compute_ctx);
|
||||
ctx->compute_ctx->exit_tensor = node;
|
||||
ctx->compute_ctx = nullptr;
|
||||
|
@ -4379,9 +4379,9 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|||
}
|
||||
|
||||
static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor){
|
||||
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
|
||||
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT))
|
||||
|| (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_GPU);
|
||||
const bool any_on_device = tensor->backend == GGML_BACKEND_TYPE_GPU
|
||||
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU || tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
|
||||
|| (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_TYPE_GPU);
|
||||
|
||||
if (ctx->disable || (!any_on_device && tensor->op != GGML_OP_MUL_MAT)) {
|
||||
return false;
|
||||
|
@ -4442,7 +4442,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_compute_
|
|||
if (params->ith != 0) {
|
||||
return true;
|
||||
}
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -4745,7 +4745,7 @@ GGML_CALL static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t b
|
|||
extra->offset = (uint8_t *) tensor->data - (uint8_t *) vk_ptr_base;
|
||||
}
|
||||
|
||||
tensor->backend = GGML_BACKEND_GPU;
|
||||
tensor->backend = GGML_BACKEND_TYPE_GPU;
|
||||
tensor->extra = extra;
|
||||
}
|
||||
|
||||
|
@ -4753,7 +4753,7 @@ GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t bu
|
|||
#ifdef GGML_VULKAN_DEBUG
|
||||
std::cerr << "ggml_backend_vk_buffer_set_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl;
|
||||
#endif
|
||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
|
||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
||||
|
||||
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
||||
|
||||
|
@ -4768,7 +4768,7 @@ GGML_CALL static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t bu
|
|||
#ifdef GGML_VULKAN_DEBUG
|
||||
std::cerr << "ggml_backend_vk_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl;
|
||||
#endif
|
||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
|
||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
||||
|
||||
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
||||
|
||||
|
@ -4999,7 +4999,7 @@ GGML_CALL static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, g
|
|||
#endif
|
||||
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
||||
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type(ctx->idx) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
|
||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
||||
|
||||
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
||||
|
||||
|
@ -5020,7 +5020,7 @@ GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, c
|
|||
#endif
|
||||
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
||||
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type(ctx->idx) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
|
||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
||||
|
||||
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
||||
|
||||
|
@ -5097,7 +5097,7 @@ GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml
|
|||
int last_node = cgraph->n_nodes - 1;
|
||||
|
||||
// If the last op in the cgraph isn't backend GPU, the command buffer doesn't get closed properly
|
||||
while (last_node > 0 && cgraph->nodes[last_node]->backend != GGML_BACKEND_GPU) {
|
||||
while (last_node > 0 && cgraph->nodes[last_node]->backend != GGML_BACKEND_TYPE_GPU) {
|
||||
last_node -= 1;
|
||||
}
|
||||
|
||||
|
@ -5106,7 +5106,7 @@ GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml
|
|||
}
|
||||
|
||||
ggml_compute_params params = {};
|
||||
params.type = GGML_TASK_COMPUTE;
|
||||
params.type = GGML_TASK_TYPE_COMPUTE;
|
||||
params.ith = 0;
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
ggml_tensor * node = cgraph->nodes[i];
|
||||
|
@ -5410,7 +5410,7 @@ static void ggml_vk_print_tensor_area(const ggml_tensor * tensor, const void * d
|
|||
static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tensor * tensor, const char * name) {
|
||||
void * tensor_data = tensor->data;
|
||||
|
||||
if (tensor->backend == GGML_BACKEND_GPU) {
|
||||
if (tensor->backend == GGML_BACKEND_TYPE_GPU) {
|
||||
const size_t tensor_size = ggml_nbytes(tensor);
|
||||
tensor_data = malloc(tensor_size);
|
||||
|
||||
|
@ -5436,14 +5436,14 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
|
|||
std::vector<const ggml_tensor *> done;
|
||||
ggml_vk_print_graph_origin(tensor, done);
|
||||
|
||||
if (tensor->backend == GGML_BACKEND_GPU) {
|
||||
if (tensor->backend == GGML_BACKEND_TYPE_GPU) {
|
||||
free(tensor_data);
|
||||
}
|
||||
}
|
||||
|
||||
static void ggml_vk_check_tensor(const std::string& name, const ggml_tensor * tensor) {
|
||||
return;
|
||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_CPU);
|
||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_CPU);
|
||||
if (tensor->type != GGML_TYPE_F32 && tensor->type != GGML_TYPE_F16) {
|
||||
return;
|
||||
}
|
||||
|
@ -5481,7 +5481,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|||
if (params->ith != 0) {
|
||||
return;
|
||||
}
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
|
||||
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -5518,10 +5518,10 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|||
|
||||
src0_buffer = malloc(src0_size);
|
||||
src0_clone->data = src0_buffer;
|
||||
if (src0->backend == GGML_BACKEND_CPU) {
|
||||
if (src0->backend == GGML_BACKEND_TYPE_CPU) {
|
||||
memcpy(src0_clone->data, src0->data, src0_size);
|
||||
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
||||
} else if (src0->backend == GGML_BACKEND_GPU) {
|
||||
} else if (src0->backend == GGML_BACKEND_TYPE_GPU) {
|
||||
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src0->extra;
|
||||
uint64_t offset = extra->offset;
|
||||
if (!ggml_is_contiguous(src0) && ggml_vk_dim01_contiguous(src0)) {
|
||||
|
@ -5561,10 +5561,10 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|||
|
||||
src1_buffer = malloc(src1_size);
|
||||
src1_clone->data = src1_buffer;
|
||||
if (src1->backend == GGML_BACKEND_CPU) {
|
||||
if (src1->backend == GGML_BACKEND_TYPE_CPU) {
|
||||
memcpy(src1_clone->data, src1->data, src1_size);
|
||||
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
||||
} else if (src1->backend == GGML_BACKEND_GPU) {
|
||||
} else if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
||||
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src1->extra;
|
||||
uint64_t offset = extra->offset;
|
||||
if (!ggml_is_contiguous(src1) && ggml_vk_dim01_contiguous(src1)) {
|
||||
|
@ -5723,7 +5723,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
|
|||
if (params->ith != 0) {
|
||||
return;
|
||||
}
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
|
||||
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
|
||||
return;
|
||||
}
|
||||
if (!(vk_output_tensor > 0 && vk_output_tensor == check_counter) && check_counter <= vk_skip_checks) {
|
||||
|
@ -5735,7 +5735,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
|
|||
|
||||
void * tensor_data = tensor->data;
|
||||
|
||||
if (tensor->backend == GGML_BACKEND_GPU) {
|
||||
if (tensor->backend == GGML_BACKEND_TYPE_GPU) {
|
||||
size_t tensor_size = ggml_nbytes(tensor);
|
||||
tensor_data = malloc(tensor_size);
|
||||
|
||||
|
@ -5868,7 +5868,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
|
|||
comp_result = nullptr;
|
||||
comp_size = 0;
|
||||
|
||||
if (tensor->backend == GGML_BACKEND_GPU) {
|
||||
if (tensor->backend == GGML_BACKEND_TYPE_GPU) {
|
||||
free(tensor_data);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue