[CANN]: Fix ggml_backend_cann_buffer_get_tensor (#8871)

* cann: fix ggml_backend_cann_buffer_get_tensor

 1. fix data ptr offset
 2. enable the acquisition of incomplete tensors

* fix backend cann set_tensor
This commit is contained in:
Mengqing Cao 2024-08-06 12:42:42 +08:00 committed by GitHub
parent d4ff847153
commit c21a896405
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -898,7 +898,6 @@ GGML_CALL static void ggml_backend_cann_buffer_init_tensor(
GGML_CALL static void ggml_backend_cann_buffer_set_tensor( GGML_CALL static void ggml_backend_cann_buffer_set_tensor(
ggml_backend_buffer_t buffer, ggml_tensor *tensor, const void *data, ggml_backend_buffer_t buffer, ggml_tensor *tensor, const void *data,
size_t offset, size_t size) { size_t offset, size_t size) {
// GGML_ASSERT(size == ggml_nbytes(tensor));
ggml_backend_cann_buffer_context *ctx = ggml_backend_cann_buffer_context *ctx =
(ggml_backend_cann_buffer_context *)buffer->context; (ggml_backend_cann_buffer_context *)buffer->context;
@ -908,22 +907,21 @@ GGML_CALL static void ggml_backend_cann_buffer_set_tensor(
// Why aclrtSynchronizeDevice? // Why aclrtSynchronizeDevice?
if (!need_transform(tensor->type)) { if (!need_transform(tensor->type)) {
ACL_CHECK(aclrtMemcpy(tensor->data, size, (const char*)data + offset, ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size, data, size,
size, ACL_MEMCPY_HOST_TO_DEVICE)); ACL_MEMCPY_HOST_TO_DEVICE));
} else { } else {
void *transform_buffer = malloc(size); void *transform_buffer = malloc(size);
ggml_backend_cann_transform(tensor, (const char*)data + offset, ggml_backend_cann_transform(tensor, data, transform_buffer);
transform_buffer);
#ifndef NDEBUG #ifndef NDEBUG
void *check_buffer = malloc(size); void *check_buffer = malloc(size);
ggml_backend_cann_transform_back(tensor, transform_buffer, ggml_backend_cann_transform_back(tensor, transform_buffer,
check_buffer); check_buffer);
GGML_ASSERT(memcmp((const char*)data + offset, check_buffer, size) == GGML_ASSERT(memcmp(data, check_buffer, size) == 0);
0);
free(check_buffer); free(check_buffer);
#endif #endif
ACL_CHECK(aclrtMemcpy(tensor->data, size, transform_buffer, size, ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size,
transform_buffer, size,
ACL_MEMCPY_HOST_TO_DEVICE)); ACL_MEMCPY_HOST_TO_DEVICE));
free(transform_buffer); free(transform_buffer);
} }
@ -945,21 +943,20 @@ GGML_CALL static void ggml_backend_cann_buffer_set_tensor(
GGML_CALL static void ggml_backend_cann_buffer_get_tensor( GGML_CALL static void ggml_backend_cann_buffer_get_tensor(
ggml_backend_buffer_t buffer, const ggml_tensor* tensor, void* data, ggml_backend_buffer_t buffer, const ggml_tensor* tensor, void* data,
size_t offset, size_t size) { size_t offset, size_t size) {
GGML_ASSERT(size == ggml_nbytes(tensor));
ggml_backend_cann_buffer_context* ctx = ggml_backend_cann_buffer_context* ctx =
(ggml_backend_cann_buffer_context*)buffer->context; (ggml_backend_cann_buffer_context*)buffer->context;
ggml_cann_set_device(ctx->device); ggml_cann_set_device(ctx->device);
if (!need_transform(tensor->type)) { if (!need_transform(tensor->type)) {
ACL_CHECK(aclrtMemcpy((char*)data + offset, size, tensor->data, size, ACL_CHECK(aclrtMemcpy(data, size, (char*)tensor->data + offset, size,
ACL_MEMCPY_DEVICE_TO_HOST)); ACL_MEMCPY_DEVICE_TO_HOST));
} else { } else {
void* transform_buffer = malloc(size); void* transform_buffer = malloc(size);
ACL_CHECK(aclrtMemcpy(transform_buffer, size, tensor->data, size, ACL_CHECK(aclrtMemcpy(transform_buffer, size,
(char*)tensor->data + offset, size,
ACL_MEMCPY_DEVICE_TO_HOST)); ACL_MEMCPY_DEVICE_TO_HOST));
ggml_backend_cann_transform_back(tensor, transform_buffer, ggml_backend_cann_transform_back(tensor, transform_buffer, data);
(char*)data + offset);
free(transform_buffer); free(transform_buffer);
} }
} }
@ -1456,24 +1453,23 @@ GGML_CALL static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
(ggml_backend_cann_context *)backend->context; (ggml_backend_cann_context *)backend->context;
if (!need_transform(tensor->type)) { if (!need_transform(tensor->type)) {
ACL_CHECK(aclrtMemcpyAsync( ACL_CHECK(aclrtMemcpyAsync((char *)tensor->data + offset, size, data,
tensor->data, size, (const char*)data + offset, size, size, ACL_MEMCPY_HOST_TO_DEVICE,
ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream())); cann_ctx->stream()));
} else { } else {
void *transform_buffer = malloc(size); void *transform_buffer = malloc(size);
ggml_backend_cann_transform(tensor, (const char*)data + offset, ggml_backend_cann_transform(tensor, data, transform_buffer);
transform_buffer);
#ifndef NDEBUG #ifndef NDEBUG
void *check_buffer = malloc(size); void *check_buffer = malloc(size);
ggml_backend_cann_transform_back(tensor, transform_buffer, ggml_backend_cann_transform_back(tensor, transform_buffer,
check_buffer); check_buffer);
GGML_ASSERT(memcmp((const char*)data + offset, check_buffer, size)); GGML_ASSERT(memcmp(data, check_buffer, size));
free(check_buffer); free(check_buffer);
#endif #endif
ACL_CHECK(aclrtMemcpyAsync(tensor->data, size, transform_buffer, size, ACL_CHECK(aclrtMemcpyAsync(
ACL_MEMCPY_HOST_TO_DEVICE, (char *)tensor->data + offset, size, transform_buffer, size,
cann_ctx->stream())); ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream()));
ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream())); ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
free(transform_buffer); free(transform_buffer);
} }
@ -1491,17 +1487,16 @@ GGML_CALL static void ggml_backend_cann_get_tensor_async(
"unsupported buffer type"); "unsupported buffer type");
if (!need_transform(tensor->type)) { if (!need_transform(tensor->type)) {
ACL_CHECK(aclrtMemcpyAsync((char*)data + offset, size, tensor->data, ACL_CHECK(aclrtMemcpyAsync(data, size, (char *)tensor->data + offset,
size, ACL_MEMCPY_DEVICE_TO_HOST, size, ACL_MEMCPY_DEVICE_TO_HOST,
cann_ctx->stream())); cann_ctx->stream()));
} else { } else {
void *transform_buffer = malloc(size); void *transform_buffer = malloc(size);
ACL_CHECK(aclrtMemcpyAsync(transform_buffer, size, tensor->data, size, ACL_CHECK(aclrtMemcpyAsync(
ACL_MEMCPY_DEVICE_TO_HOST, transform_buffer, size, (char *)tensor->data + offset, size,
cann_ctx->stream())); ACL_MEMCPY_DEVICE_TO_HOST, cann_ctx->stream()));
ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream())); ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
ggml_backend_cann_transform_back(tensor, transform_buffer, ggml_backend_cann_transform_back(tensor, transform_buffer, data);
(char*)data + offset);
free(transform_buffer); free(transform_buffer);
} }
} }