Fix tensor load to device
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
This commit is contained in:
parent
b3ff66d87f
commit
de10afa80f
1 changed files with 10 additions and 4 deletions
|
@ -919,13 +919,19 @@ void ggml_cl_transform_tensor(ggml_tensor * tensor) {
|
||||||
const size_t q_sz = ggml_type_size(type) * ne0 * ne1 * ne2 * ne3 / ggml_blck_size(type);
|
const size_t q_sz = ggml_type_size(type) * ne0 * ne1 * ne2 * ne3 / ggml_blck_size(type);
|
||||||
|
|
||||||
size_t q_size;
|
size_t q_size;
|
||||||
cl_mem* d_Q = (cl_mem*) malloc(sizeof(cl_mem));
|
cl_mem* dst = (cl_mem*) malloc(sizeof(cl_mem));
|
||||||
*d_Q = ggml_cl_pool_malloc(q_sz, &q_size, CL_MEM_READ_ONLY);
|
*dst = ggml_cl_pool_malloc(q_sz, &q_size, CL_MEM_READ_ONLY);
|
||||||
|
|
||||||
// copy tensor to device
|
// copy tensor to device
|
||||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, *d_Q, 0, tensor, 0, 0, NULL), "ggml_cl_h2d_tensor_2d");
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
||||||
|
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
||||||
|
int i = i3*ne2 + i2;
|
||||||
|
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, *dst, i*ne0*ne1, tensor, i3, i2, NULL), "ggml_cl_h2d_tensor_2d");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CL_CHECK(clFinish(queue), "clFinish");
|
CL_CHECK(clFinish(queue), "clFinish");
|
||||||
|
|
||||||
tensor->data = d_Q;
|
tensor->data = dst;
|
||||||
tensor->backend = GGML_BACKEND_CL;
|
tensor->backend = GGML_BACKEND_CL;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue