Fix Vulkan check results
This commit is contained in:
parent
5e0a9a2d37
commit
c19c59812e
1 changed files with 16 additions and 12 deletions
|
@ -5344,8 +5344,9 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
|
||||||
tensor_data = malloc(tensor_size);
|
tensor_data = malloc(tensor_size);
|
||||||
|
|
||||||
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
||||||
|
vk_buffer buf = extra->buffer_gpu.lock();
|
||||||
|
|
||||||
ggml_vk_buffer_read(ctx, extra->buffer_gpu, extra->offset, tensor_data, tensor_size);
|
ggml_vk_buffer_read(ctx, buf, extra->offset, tensor_data, tensor_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
|
std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
|
||||||
|
@ -5452,12 +5453,13 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
||||||
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
||||||
} else if (src0->backend == GGML_BACKEND_GPU) {
|
} else if (src0->backend == GGML_BACKEND_GPU) {
|
||||||
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src0->extra;
|
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src0->extra;
|
||||||
|
vk_buffer buf = extra->buffer_gpu.lock();
|
||||||
uint64_t offset = extra->offset;
|
uint64_t offset = extra->offset;
|
||||||
if (!ggml_is_contiguous(src0) && ggml_vk_dim01_contiguous(src0)) {
|
if (!ggml_is_contiguous(src0) && ggml_vk_dim01_contiguous(src0)) {
|
||||||
for (int i3 = 0; i3 < src0->ne[3]; i3++) {
|
for (int i3 = 0; i3 < src0->ne[3]; i3++) {
|
||||||
for (int i2 = 0; i2 < src0->ne[2]; i2++) {
|
for (int i2 = 0; i2 < src0->ne[2]; i2++) {
|
||||||
const int idx = i3*src0->ne[2] + i2;
|
const int idx = i3*src0->ne[2] + i2;
|
||||||
ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
|
ggml_vk_buffer_read(ctx, buf, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5467,10 +5469,10 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
||||||
src0_clone->nb[i] = src0_clone->nb[i - 1]*src0_clone->ne[i - 1];
|
src0_clone->nb[i] = src0_clone->nb[i - 1]*src0_clone->ne[i - 1];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (offset + src0_size >= extra->buffer_gpu->size) {
|
if (offset + src0_size >= buf->size) {
|
||||||
src0_size = extra->buffer_gpu->size - offset;
|
src0_size = buf->size - offset;
|
||||||
}
|
}
|
||||||
ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset, src0_clone->data, src0_size);
|
ggml_vk_buffer_read(ctx, buf, offset, src0_clone->data, src0_size);
|
||||||
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -5495,12 +5497,13 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
||||||
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
||||||
} else if (src1->backend == GGML_BACKEND_GPU) {
|
} else if (src1->backend == GGML_BACKEND_GPU) {
|
||||||
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src1->extra;
|
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src1->extra;
|
||||||
|
vk_buffer buf = extra->buffer_gpu.lock();
|
||||||
uint64_t offset = extra->offset;
|
uint64_t offset = extra->offset;
|
||||||
if (!ggml_is_contiguous(src1) && ggml_vk_dim01_contiguous(src1)) {
|
if (!ggml_is_contiguous(src1) && ggml_vk_dim01_contiguous(src1)) {
|
||||||
for (int i3 = 0; i3 < src1->ne[3]; i3++) {
|
for (int i3 = 0; i3 < src1->ne[3]; i3++) {
|
||||||
for (int i2 = 0; i2 < src1->ne[2]; i2++) {
|
for (int i2 = 0; i2 < src1->ne[2]; i2++) {
|
||||||
const int idx = i3*src1->ne[2] + i2;
|
const int idx = i3*src1->ne[2] + i2;
|
||||||
ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
|
ggml_vk_buffer_read(ctx, buf, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5510,10 +5513,10 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
||||||
src1_clone->nb[i] = src1_clone->nb[i - 1]*src1_clone->ne[i - 1];
|
src1_clone->nb[i] = src1_clone->nb[i - 1]*src1_clone->ne[i - 1];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (offset + src1_size >= extra->buffer_gpu->size) {
|
if (offset + src1_size >= buf->size) {
|
||||||
src1_size = extra->buffer_gpu->size - offset;
|
src1_size = buf->size - offset;
|
||||||
}
|
}
|
||||||
ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset, src1_clone->data, src1_size);
|
ggml_vk_buffer_read(ctx, buf, offset, src1_clone->data, src1_size);
|
||||||
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -5669,12 +5672,13 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
|
||||||
tensor_data = malloc(tensor_size);
|
tensor_data = malloc(tensor_size);
|
||||||
|
|
||||||
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
||||||
|
vk_buffer buf = extra->buffer_gpu.lock();
|
||||||
|
|
||||||
if (extra->offset + tensor_size >= extra->buffer_gpu->size) {
|
if (extra->offset + tensor_size >= buf->size) {
|
||||||
tensor_size = extra->buffer_gpu->size - (extra->offset);
|
tensor_size = buf->size - (extra->offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_vk_buffer_read(ctx, extra->buffer_gpu, extra->offset, tensor_data, tensor_size);
|
ggml_vk_buffer_read(ctx, buf, extra->offset, tensor_data, tensor_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
float first_error_result = -1.0f;
|
float first_error_result = -1.0f;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue