code : cont
This commit is contained in:
parent
0aead81f4a
commit
cb246633ed
4 changed files with 33 additions and 33 deletions
|
@ -10804,7 +10804,7 @@ GGML_CALL bool ggml_cuda_compute_forward(struct ggml_compute_params * params, st
|
||||||
if (params->ith != 0) {
|
if (params->ith != 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
func(tensor->src[0], tensor->src[1], tensor);
|
func(tensor->src[0], tensor->src[1], tensor);
|
||||||
|
@ -11475,7 +11475,7 @@ GGML_CALL static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, gg
|
||||||
ggml_cuda_set_main_device(cuda_ctx->device);
|
ggml_cuda_set_main_device(cuda_ctx->device);
|
||||||
|
|
||||||
ggml_compute_params params = {};
|
ggml_compute_params params = {};
|
||||||
params.type = GGML_TASK_COMPUTE;
|
params.type = GGML_TASK_TYPE_COMPUTE;
|
||||||
params.ith = 0;
|
params.ith = 0;
|
||||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||||
ggml_tensor * node = cgraph->nodes[i];
|
ggml_tensor * node = cgraph->nodes[i];
|
||||||
|
|
|
@ -1354,7 +1354,7 @@ static void ggml_cl_pool_free(cl_mem mem, size_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_cl_free_data(const struct ggml_tensor* tensor) {
|
void ggml_cl_free_data(const struct ggml_tensor* tensor) {
|
||||||
if (tensor->backend != GGML_BACKEND_GPU) {
|
if (tensor->backend != GGML_BACKEND_TYPE_GPU) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1412,7 +1412,7 @@ static cl_int ggml_cl_h2d_tensor_2d(cl_command_queue queue, cl_mem dst, size_t o
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cl_mul_f32(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_cl_mul_f32(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_ASSERT(src1->backend == GGML_BACKEND_GPU);
|
GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
|
||||||
const int64_t ne00 = src0->ne[0];
|
const int64_t ne00 = src0->ne[0];
|
||||||
const int64_t ne01 = src0->ne[1];
|
const int64_t ne01 = src0->ne[1];
|
||||||
const int64_t ne02 = src0->ne[2];
|
const int64_t ne02 = src0->ne[2];
|
||||||
|
@ -1476,7 +1476,7 @@ void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_cl_add_f32(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_cl_add_f32(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_ASSERT(src1->backend == GGML_BACKEND_GPU);
|
GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
|
||||||
const int64_t ne00 = src0->ne[0];
|
const int64_t ne00 = src0->ne[0];
|
||||||
const int64_t ne01 = src0->ne[1];
|
const int64_t ne01 = src0->ne[1];
|
||||||
const int64_t ne02 = src0->ne[2];
|
const int64_t ne02 = src0->ne[2];
|
||||||
|
@ -1566,13 +1566,13 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
||||||
size_t y_size;
|
size_t y_size;
|
||||||
size_t d_size;
|
size_t d_size;
|
||||||
cl_mem d_X;
|
cl_mem d_X;
|
||||||
if (src0->backend == GGML_BACKEND_GPU) { // NOLINT
|
if (src0->backend == GGML_BACKEND_TYPE_GPU) { // NOLINT
|
||||||
d_X = (cl_mem) src0->extra;
|
d_X = (cl_mem) src0->extra;
|
||||||
} else {
|
} else {
|
||||||
d_X = ggml_cl_pool_malloc(sizeof(float) * x_ne, &x_size);
|
d_X = ggml_cl_pool_malloc(sizeof(float) * x_ne, &x_size);
|
||||||
}
|
}
|
||||||
cl_mem d_Y = src1->backend == GGML_BACKEND_GPU ? (cl_mem) src1->extra : ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
|
cl_mem d_Y = src1->backend == GGML_BACKEND_TYPE_GPU ? (cl_mem) src1->extra : ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
|
||||||
cl_mem d_D = dst->backend == GGML_BACKEND_GPU ? (cl_mem) dst->extra : ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
|
cl_mem d_D = dst->backend == GGML_BACKEND_TYPE_GPU ? (cl_mem) dst->extra : ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
|
||||||
|
|
||||||
size_t x_offset = 0;
|
size_t x_offset = 0;
|
||||||
|
|
||||||
|
@ -1580,7 +1580,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
||||||
// TODO: copy src0 here when r3>1
|
// TODO: copy src0 here when r3>1
|
||||||
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
|
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
|
||||||
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
||||||
if (src0->backend == GGML_BACKEND_GPU) {
|
if (src0->backend == GGML_BACKEND_TYPE_GPU) {
|
||||||
x_offset = (i03 * ne02 + i02) * x_ne;
|
x_offset = (i03 * ne02 + i02) * x_ne;
|
||||||
} else {
|
} else {
|
||||||
// copy src0 to device
|
// copy src0 to device
|
||||||
|
@ -1589,7 +1589,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
||||||
|
|
||||||
for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
|
for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
|
||||||
// copy src1 to device
|
// copy src1 to device
|
||||||
if (src1->backend == GGML_BACKEND_CPU) {
|
if (src1->backend == GGML_BACKEND_TYPE_CPU) {
|
||||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Y, 0, src1, i13, i12, NULL));
|
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Y, 0, src1, i13, i12, NULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1612,7 +1612,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy dst to host
|
// copy dst to host
|
||||||
if (dst->backend == GGML_BACKEND_CPU) {
|
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
||||||
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
||||||
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(float) * d_ne, d, 1, &ev_sgemm, NULL));
|
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(float) * d_ne, d, 1, &ev_sgemm, NULL));
|
||||||
}
|
}
|
||||||
|
@ -1621,13 +1621,13 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (src0->backend != GGML_BACKEND_GPU) {
|
if (src0->backend != GGML_BACKEND_TYPE_GPU) {
|
||||||
ggml_cl_pool_free(d_X, x_size);
|
ggml_cl_pool_free(d_X, x_size);
|
||||||
}
|
}
|
||||||
if (src1->backend != GGML_BACKEND_GPU) {
|
if (src1->backend != GGML_BACKEND_TYPE_GPU) {
|
||||||
ggml_cl_pool_free(d_Y, y_size);
|
ggml_cl_pool_free(d_Y, y_size);
|
||||||
}
|
}
|
||||||
if (dst->backend != GGML_BACKEND_GPU) {
|
if (dst->backend != GGML_BACKEND_TYPE_GPU) {
|
||||||
ggml_cl_pool_free(d_D, d_size);
|
ggml_cl_pool_free(d_D, d_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1670,7 +1670,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
||||||
size_t y_size;
|
size_t y_size;
|
||||||
size_t d_size;
|
size_t d_size;
|
||||||
cl_mem d_X;
|
cl_mem d_X;
|
||||||
if (src0->backend == GGML_BACKEND_GPU) { // NOLINT
|
if (src0->backend == GGML_BACKEND_TYPE_GPU) { // NOLINT
|
||||||
d_X = (cl_mem) src0->extra;
|
d_X = (cl_mem) src0->extra;
|
||||||
} else {
|
} else {
|
||||||
d_X = ggml_cl_pool_malloc(sizeof(ggml_fp16_t) * x_ne, &x_size);
|
d_X = ggml_cl_pool_malloc(sizeof(ggml_fp16_t) * x_ne, &x_size);
|
||||||
|
@ -1687,7 +1687,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
||||||
// TODO: copy src0 here when r3>1
|
// TODO: copy src0 here when r3>1
|
||||||
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
|
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
|
||||||
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
||||||
if (src0->backend == GGML_BACKEND_GPU) {
|
if (src0->backend == GGML_BACKEND_TYPE_GPU) {
|
||||||
x_offset = (i03 * ne02 + i02) * x_ne;
|
x_offset = (i03 * ne02 + i02) * x_ne;
|
||||||
} else {
|
} else {
|
||||||
// copy src0 to device
|
// copy src0 to device
|
||||||
|
@ -1741,7 +1741,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy dst to host, then convert to float
|
// copy dst to host, then convert to float
|
||||||
if (dst->backend == GGML_BACKEND_CPU) {
|
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
||||||
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(ggml_fp16_t) * d_ne, tmp, 1, &ev_sgemm, NULL));
|
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(ggml_fp16_t) * d_ne, tmp, 1, &ev_sgemm, NULL));
|
||||||
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
||||||
ggml_fp16_to_fp32_row(tmp, d, d_ne);
|
ggml_fp16_to_fp32_row(tmp, d, d_ne);
|
||||||
|
@ -1753,7 +1753,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (src0->backend != GGML_BACKEND_GPU) {
|
if (src0->backend != GGML_BACKEND_TYPE_GPU) {
|
||||||
ggml_cl_pool_free(d_X, x_size);
|
ggml_cl_pool_free(d_X, x_size);
|
||||||
}
|
}
|
||||||
ggml_cl_pool_free(d_Y, y_size);
|
ggml_cl_pool_free(d_Y, y_size);
|
||||||
|
@ -1798,7 +1798,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
||||||
cl_mem d_Y = ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
|
cl_mem d_Y = ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
|
||||||
cl_mem d_D = ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
|
cl_mem d_D = ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
|
||||||
cl_mem d_Q;
|
cl_mem d_Q;
|
||||||
if (src0->backend == GGML_BACKEND_CPU) {
|
if (src0->backend == GGML_BACKEND_TYPE_CPU) {
|
||||||
d_Q = ggml_cl_pool_malloc(q_sz, &q_size);
|
d_Q = ggml_cl_pool_malloc(q_sz, &q_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1817,10 +1817,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
||||||
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
|
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
|
||||||
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
||||||
// copy src0 to device if necessary
|
// copy src0 to device if necessary
|
||||||
if (src0->backend == GGML_BACKEND_CPU) {
|
if (src0->backend == GGML_BACKEND_TYPE_CPU) {
|
||||||
events.emplace_back();
|
events.emplace_back();
|
||||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Q, 0, src0, i03, i02, events.data() + ev_idx++));
|
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Q, 0, src0, i03, i02, events.data() + ev_idx++));
|
||||||
} else if (src0->backend == GGML_BACKEND_GPU) {
|
} else if (src0->backend == GGML_BACKEND_TYPE_GPU) {
|
||||||
d_Q = (cl_mem) src0->extra;
|
d_Q = (cl_mem) src0->extra;
|
||||||
} else {
|
} else {
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
|
@ -1829,7 +1829,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
||||||
if (!mul_mat_vec) {
|
if (!mul_mat_vec) {
|
||||||
// convert src0 to fp32 on device
|
// convert src0 to fp32 on device
|
||||||
const size_t global = x_ne / global_denom;
|
const size_t global = x_ne / global_denom;
|
||||||
const size_t offset = src0->backend == GGML_BACKEND_GPU ? (i03 * ne02 + i02) * x_bps : 0;
|
const size_t offset = src0->backend == GGML_BACKEND_TYPE_GPU ? (i03 * ne02 + i02) * x_bps : 0;
|
||||||
CL_CHECK(clSetKernelArg(*to_fp32_cl, 0, sizeof(cl_mem), &d_Q));
|
CL_CHECK(clSetKernelArg(*to_fp32_cl, 0, sizeof(cl_mem), &d_Q));
|
||||||
CL_CHECK(clSetKernelArg(*to_fp32_cl, 1, sizeof(cl_mem), &d_X));
|
CL_CHECK(clSetKernelArg(*to_fp32_cl, 1, sizeof(cl_mem), &d_X));
|
||||||
CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
|
CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
|
||||||
|
@ -1843,7 +1843,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
||||||
|
|
||||||
// compute
|
// compute
|
||||||
const size_t global = ne01 * local;
|
const size_t global = ne01 * local;
|
||||||
const size_t offset = src0->backend == GGML_BACKEND_GPU ? (i03 * ne02 + i02) * x_bps : 0;
|
const size_t offset = src0->backend == GGML_BACKEND_TYPE_GPU ? (i03 * ne02 + i02) * x_bps : 0;
|
||||||
const cl_int ncols = ne00;
|
const cl_int ncols = ne00;
|
||||||
events.emplace_back();
|
events.emplace_back();
|
||||||
CL_CHECK(clSetKernelArg(*dmmv, 0, sizeof(cl_mem), &d_Q));
|
CL_CHECK(clSetKernelArg(*dmmv, 0, sizeof(cl_mem), &d_Q));
|
||||||
|
@ -1895,7 +1895,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
||||||
}
|
}
|
||||||
ggml_cl_pool_free(d_Y, y_size);
|
ggml_cl_pool_free(d_Y, y_size);
|
||||||
ggml_cl_pool_free(d_D, d_size);
|
ggml_cl_pool_free(d_D, d_size);
|
||||||
if (src0->backend == GGML_BACKEND_CPU) {
|
if (src0->backend == GGML_BACKEND_TYPE_CPU) {
|
||||||
ggml_cl_pool_free(d_Q, q_size);
|
ggml_cl_pool_free(d_Q, q_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1911,7 +1911,7 @@ bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
|
||||||
if ((src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
|
if ((src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
|
||||||
src1->type == GGML_TYPE_F32 &&
|
src1->type == GGML_TYPE_F32 &&
|
||||||
dst->type == GGML_TYPE_F32 &&
|
dst->type == GGML_TYPE_F32 &&
|
||||||
((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_GPU)) {
|
((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_TYPE_GPU)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1993,7 +1993,7 @@ void ggml_cl_transform_tensor(void * data, ggml_tensor * tensor) {
|
||||||
CL_CHECK(clFinish(queue));
|
CL_CHECK(clFinish(queue));
|
||||||
|
|
||||||
tensor->extra = dst;
|
tensor->extra = dst;
|
||||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
|
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ggml-backend
|
// ggml-backend
|
||||||
|
@ -2045,7 +2045,7 @@ static void ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
||||||
ctx->sub_buffers.push_back(sub_buffer);
|
ctx->sub_buffers.push_back(sub_buffer);
|
||||||
tensor->extra = sub_buffer;
|
tensor->extra = sub_buffer;
|
||||||
}
|
}
|
||||||
tensor->backend = GGML_BACKEND_GPU;
|
tensor->backend = GGML_BACKEND_TYPE_GPU;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||||
|
|
|
@ -14366,7 +14366,7 @@ bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_
|
||||||
if (params->ith != 0) {
|
if (params->ith != 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
if (params->type == GGML_TASK_TYPE_TYPE_INIT || params->type == GGML_TASK_TYPE_TYPE_FINALIZE) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
func(tensor->src[0], tensor->src[1], tensor);
|
func(tensor->src[0], tensor->src[1], tensor);
|
||||||
|
@ -14880,7 +14880,7 @@ static bool ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph
|
||||||
ggml_sycl_set_main_device(sycl_ctx->device);
|
ggml_sycl_set_main_device(sycl_ctx->device);
|
||||||
|
|
||||||
ggml_compute_params params = {};
|
ggml_compute_params params = {};
|
||||||
params.type = GGML_TASK_COMPUTE;
|
params.type = GGML_TASK_TYPE_COMPUTE;
|
||||||
params.ith = 0;
|
params.ith = 0;
|
||||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||||
ggml_tensor * node = cgraph->nodes[i];
|
ggml_tensor * node = cgraph->nodes[i];
|
||||||
|
|
|
@ -4442,7 +4442,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_compute_
|
||||||
if (params->ith != 0) {
|
if (params->ith != 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5106,7 +5106,7 @@ GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_compute_params params = {};
|
ggml_compute_params params = {};
|
||||||
params.type = GGML_TASK_COMPUTE;
|
params.type = GGML_TASK_TYPE_COMPUTE;
|
||||||
params.ith = 0;
|
params.ith = 0;
|
||||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||||
ggml_tensor * node = cgraph->nodes[i];
|
ggml_tensor * node = cgraph->nodes[i];
|
||||||
|
@ -5481,7 +5481,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
||||||
if (params->ith != 0) {
|
if (params->ith != 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
|
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5723,7 +5723,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
|
||||||
if (params->ith != 0) {
|
if (params->ith != 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
|
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!(vk_output_tensor > 0 && vk_output_tensor == check_counter) && check_counter <= vk_skip_checks) {
|
if (!(vk_output_tensor > 0 && vk_output_tensor == check_counter) && check_counter <= vk_skip_checks) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue