ggml-backend : add device and backend reg interfaces (#9707)
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
This commit is contained in:
parent
a39ab216aa
commit
c83ad6d01e
28 changed files with 1809 additions and 1303 deletions
|
@ -560,7 +560,7 @@ struct ggml_backend_cann_buffer_context {
|
|||
* @return A pointer to a C-string containing the name of the buffer.
|
||||
*/
|
||||
|
||||
GGML_CALL static const char* ggml_backend_cann_buffer_get_name(
|
||||
static const char* ggml_backend_cann_buffer_get_name(
|
||||
ggml_backend_buffer_t buffer) {
|
||||
return "CANN";
|
||||
|
||||
|
@ -576,7 +576,7 @@ GGML_CALL static const char* ggml_backend_cann_buffer_get_name(
|
|||
* @param buffer The buffer to check.
|
||||
* @return true if the buffer is a CANN buffer, false otherwise.
|
||||
*/
|
||||
GGML_CALL static bool ggml_backend_buffer_is_cann(
|
||||
static bool ggml_backend_buffer_is_cann(
|
||||
ggml_backend_buffer_t buffer) {
|
||||
return buffer->iface.get_name == ggml_backend_cann_buffer_get_name;
|
||||
}
|
||||
|
@ -589,7 +589,7 @@ GGML_CALL static bool ggml_backend_buffer_is_cann(
|
|||
*
|
||||
* @param buffer The CANN buffer to free.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_buffer_free_buffer(
|
||||
static void ggml_backend_cann_buffer_free_buffer(
|
||||
ggml_backend_buffer_t buffer) {
|
||||
ggml_backend_cann_buffer_context* ctx =
|
||||
(ggml_backend_cann_buffer_context*)buffer->context;
|
||||
|
@ -605,7 +605,7 @@ GGML_CALL static void ggml_backend_cann_buffer_free_buffer(
|
|||
* @param buffer The CANN buffer whose base pointer is to be retrieved.
|
||||
* @return A pointer to the base of the device memory allocated for the buffer.
|
||||
*/
|
||||
GGML_CALL static void* ggml_backend_cann_buffer_get_base(
|
||||
static void* ggml_backend_cann_buffer_get_base(
|
||||
ggml_backend_buffer_t buffer) {
|
||||
ggml_backend_cann_buffer_context* ctx =
|
||||
(ggml_backend_cann_buffer_context*)buffer->context;
|
||||
|
@ -625,9 +625,9 @@ GGML_CALL static void* ggml_backend_cann_buffer_get_base(
|
|||
* @param dst Pointer to the destination buffer where transformed data will be
|
||||
* stored.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_transform_q4_0(ggml_tensor* tensor,
|
||||
const void* src,
|
||||
void* dst) {
|
||||
static void ggml_backend_cann_transform_q4_0(ggml_tensor* tensor,
|
||||
const void* src,
|
||||
void* dst) {
|
||||
|
||||
int64_t n_elems = ggml_nelements(tensor);
|
||||
int64_t groups = n_elems / QK4_0;
|
||||
|
@ -677,7 +677,7 @@ GGML_CALL static void ggml_backend_cann_transform_q4_0(ggml_tensor* tensor,
|
|||
* @param dst Pointer to the destination buffer where the Q4.0 formatted data
|
||||
* will be stored.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_transform_back_q4_0(
|
||||
static void ggml_backend_cann_transform_back_q4_0(
|
||||
const ggml_tensor* tensor, void* src, void* dst) {
|
||||
|
||||
int64_t n_elems = ggml_nelements(tensor);
|
||||
|
@ -726,9 +726,9 @@ GGML_CALL static void ggml_backend_cann_transform_back_q4_0(
|
|||
* @param dst Pointer to the destination buffer where transformed data will be
|
||||
* stored.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_transform_q8_0(ggml_tensor* tensor,
|
||||
const void* src,
|
||||
void* dst) {
|
||||
static void ggml_backend_cann_transform_q8_0(ggml_tensor* tensor,
|
||||
const void* src,
|
||||
void* dst) {
|
||||
int64_t n_elems = ggml_nelements(tensor);
|
||||
int64_t groups = n_elems / QK8_0;
|
||||
size_t quant_bytes = n_elems * sizeof(uint8_t);
|
||||
|
@ -760,7 +760,7 @@ GGML_CALL static void ggml_backend_cann_transform_q8_0(ggml_tensor* tensor,
|
|||
* @param dst Pointer to the destination buffer where the Q8.0 formatted data
|
||||
* will be stored.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_transform_back_q8_0(
|
||||
static void ggml_backend_cann_transform_back_q8_0(
|
||||
const ggml_tensor* tensor, const void* src, void* dst) {
|
||||
int64_t n_elems = ggml_nelements(tensor);
|
||||
int64_t groups = n_elems / QK8_0;
|
||||
|
@ -792,8 +792,8 @@ GGML_CALL static void ggml_backend_cann_transform_back_q8_0(
|
|||
* @param dst Pointer to the destination buffer where transformed data will be
|
||||
* stored.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_transform(ggml_tensor* tensor,
|
||||
const void* src, void* dst) {
|
||||
static void ggml_backend_cann_transform(ggml_tensor* tensor,
|
||||
const void* src, void* dst) {
|
||||
switch (tensor->type) {
|
||||
case GGML_TYPE_Q4_0:
|
||||
ggml_backend_cann_transform_q4_0(tensor, src, dst);
|
||||
|
@ -818,7 +818,7 @@ GGML_CALL static void ggml_backend_cann_transform(ggml_tensor* tensor,
|
|||
* @param dst Pointer to the destination buffer where transformed tensor data
|
||||
* will be stored.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_transform_back(
|
||||
static void ggml_backend_cann_transform_back(
|
||||
const ggml_tensor* tensor, void* src, void* dst) {
|
||||
switch (tensor->type) {
|
||||
case GGML_TYPE_Q4_0:
|
||||
|
@ -841,7 +841,7 @@ GGML_CALL static void ggml_backend_cann_transform_back(
|
|||
* @param type The tensor type to check.
|
||||
* @return true if transformation is needed, false otherwise.
|
||||
*/
|
||||
GGML_CALL static bool need_transform(ggml_type type) {
|
||||
static bool need_transform(ggml_type type) {
|
||||
switch (type) {
|
||||
case GGML_TYPE_Q4_0:
|
||||
case GGML_TYPE_Q8_0:
|
||||
|
@ -860,7 +860,7 @@ GGML_CALL static bool need_transform(ggml_type type) {
|
|||
* @param buffer The CANN buffer from which to initialize the tensor.
|
||||
* @param tensor Pointer to the tensor to be initialized.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_buffer_init_tensor(
|
||||
static void ggml_backend_cann_buffer_init_tensor(
|
||||
ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
|
||||
if (tensor->view_src != NULL && tensor->view_offs == 0) {
|
||||
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
||||
|
@ -896,7 +896,7 @@ GGML_CALL static void ggml_backend_cann_buffer_init_tensor(
|
|||
* @param offset Offset in the source data from where to start copying.
|
||||
* @param size Size of the data to be copied, in bytes.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_buffer_set_tensor(
|
||||
static void ggml_backend_cann_buffer_set_tensor(
|
||||
ggml_backend_buffer_t buffer, ggml_tensor *tensor, const void *data,
|
||||
size_t offset, size_t size) {
|
||||
ggml_backend_cann_buffer_context *ctx =
|
||||
|
@ -941,7 +941,7 @@ GGML_CALL static void ggml_backend_cann_buffer_set_tensor(
|
|||
* @param offset Offset in the destination buffer where to start copying.
|
||||
* @param size Size of the data to be copied, in bytes.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_buffer_get_tensor(
|
||||
static void ggml_backend_cann_buffer_get_tensor(
|
||||
ggml_backend_buffer_t buffer, const ggml_tensor* tensor, void* data,
|
||||
size_t offset, size_t size) {
|
||||
ggml_backend_cann_buffer_context* ctx =
|
||||
|
@ -975,7 +975,7 @@ GGML_CALL static void ggml_backend_cann_buffer_get_tensor(
|
|||
* @param dst Pointer to the destination tensor where the data will be copied.
|
||||
* @return true if the copy operation succeeded, false otherwise.
|
||||
*/
|
||||
GGML_CALL static bool ggml_backend_cann_buffer_cpy_tensor(
|
||||
static bool ggml_backend_cann_buffer_cpy_tensor(
|
||||
ggml_backend_buffer_t buffer, const ggml_tensor* src, ggml_tensor* dst) {
|
||||
if (ggml_backend_buffer_is_cann(src->buffer)) {
|
||||
ggml_backend_cann_buffer_context* src_ctx =
|
||||
|
@ -1017,7 +1017,7 @@ GGML_CALL static bool ggml_backend_cann_buffer_cpy_tensor(
|
|||
* @param buffer The CANN buffer to be cleared.
|
||||
* @param value The value to which each byte in the buffer will be set.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_buffer_clear(
|
||||
static void ggml_backend_cann_buffer_clear(
|
||||
ggml_backend_buffer_t buffer, uint8_t value) {
|
||||
ggml_backend_cann_buffer_context* ctx =
|
||||
(ggml_backend_cann_buffer_context*)buffer->context;
|
||||
|
@ -1065,7 +1065,7 @@ struct ggml_backend_cann_buffer_type_context {
|
|||
* @param buft Pointer to the buffer type context.
|
||||
* @return Const pointer to the C-style string containing the name.
|
||||
*/
|
||||
GGML_CALL static const char* ggml_backend_cann_buffer_type_name(
|
||||
static const char* ggml_backend_cann_buffer_type_name(
|
||||
ggml_backend_buffer_type_t buft) {
|
||||
return "CANN";
|
||||
|
||||
|
@ -1082,7 +1082,7 @@ GGML_CALL static const char* ggml_backend_cann_buffer_type_name(
|
|||
* @param size Size in bytes of the buffer to allocate.
|
||||
* @return Pointer to the allocated buffer, or nullptr if allocation fails.
|
||||
*/
|
||||
GGML_CALL static ggml_backend_buffer_t
|
||||
static ggml_backend_buffer_t
|
||||
ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
||||
size_t size) {
|
||||
ggml_backend_cann_buffer_type_context* buft_ctx =
|
||||
|
@ -1121,7 +1121,7 @@ ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
|||
* @return The alignment requirement in bytes (fixed at 128 bytes for CANN
|
||||
* buffers).
|
||||
*/
|
||||
GGML_CALL static size_t ggml_backend_cann_buffer_type_get_alignment(
|
||||
static size_t ggml_backend_cann_buffer_type_get_alignment(
|
||||
ggml_backend_buffer_type_t buft) {
|
||||
return 128;
|
||||
|
||||
|
@ -1142,7 +1142,7 @@ GGML_CALL static size_t ggml_backend_cann_buffer_type_get_alignment(
|
|||
* @return The total allocation size in bytes required for the tensor in the
|
||||
* CANN buffer.
|
||||
*/
|
||||
GGML_CALL static size_t ggml_backend_cann_buffer_type_get_alloc_size(
|
||||
static size_t ggml_backend_cann_buffer_type_get_alloc_size(
|
||||
ggml_backend_buffer_type_t buft, const ggml_tensor* tensor) {
|
||||
size_t size = ggml_nbytes(tensor);
|
||||
int64_t ne0 = tensor->ne[0];
|
||||
|
@ -1193,7 +1193,7 @@ static ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
|
|||
* @return A pointer to the buffer type interface for the specified device, or
|
||||
* nullptr if the device index is out of range.
|
||||
*/
|
||||
GGML_CALL ggml_backend_buffer_type_t
|
||||
ggml_backend_buffer_type_t
|
||||
ggml_backend_cann_buffer_type(int32_t device) {
|
||||
static std::mutex mutex;
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
@ -1231,7 +1231,7 @@ ggml_backend_cann_buffer_type(int32_t device) {
|
|||
* @param buft Pointer to the host buffer type context.
|
||||
* @return Const pointer to the C-style string containing the name.
|
||||
*/
|
||||
GGML_CALL static const char * ggml_backend_cann_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
||||
static const char * ggml_backend_cann_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
||||
return "CANN_Host";
|
||||
|
||||
GGML_UNUSED(buft);
|
||||
|
@ -1246,7 +1246,7 @@ GGML_CALL static const char * ggml_backend_cann_host_buffer_type_name(ggml_backe
|
|||
* @param buft Pointer to the host buffer context.
|
||||
* @return Const pointer to the C-style string containing the name.
|
||||
*/
|
||||
GGML_CALL static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buffer) {
|
||||
static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buffer) {
|
||||
return "CANN_Host";
|
||||
|
||||
GGML_UNUSED(buffer);
|
||||
|
@ -1260,7 +1260,7 @@ GGML_CALL static const char * ggml_backend_cann_host_buffer_name(ggml_backend_bu
|
|||
*
|
||||
* @param buffer The CANN host buffer to free.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
|
||||
static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
|
||||
ACL_CHECK(aclrtFreeHost(buffer->context));
|
||||
}
|
||||
|
||||
|
@ -1294,7 +1294,7 @@ static void * ggml_cann_host_malloc(size_t size) {
|
|||
* @param size Size in bytes of the host buffer to allocate.
|
||||
* @return Pointer to the allocated host buffer, or CPU buffer pointer if allocation fails.
|
||||
*/
|
||||
GGML_CALL static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
||||
static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
||||
void * hostPtr = ggml_cann_host_malloc(size);
|
||||
|
||||
if (hostPtr == nullptr) {
|
||||
|
@ -1316,7 +1316,7 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_
|
|||
* Provides function pointers for allocating, querying properties, and managing
|
||||
* memory for CANN buffer types in the GGML backend.
|
||||
*/
|
||||
GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
|
||||
ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
|
||||
static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_cann_host_buffer_type_name,
|
||||
|
@ -1326,6 +1326,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
|
|||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||
},
|
||||
/* .device = */ nullptr,
|
||||
/* .context = */ nullptr,
|
||||
};
|
||||
|
||||
|
@ -1495,7 +1496,7 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|||
* @param backend Pointer to the CANN backend structure.
|
||||
* @return A pointer to a constant string representing the backend name.
|
||||
*/
|
||||
GGML_CALL static const char* ggml_backend_cann_name(ggml_backend_t backend) {
|
||||
static const char* ggml_backend_cann_name(ggml_backend_t backend) {
|
||||
ggml_backend_cann_context* cann_ctx =
|
||||
(ggml_backend_cann_context*)backend->context;
|
||||
|
||||
|
@ -1510,7 +1511,7 @@ GGML_CALL static const char* ggml_backend_cann_name(ggml_backend_t backend) {
|
|||
*
|
||||
* @param backend Pointer to the CANN backend structure to be freed.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_free(ggml_backend_t backend) {
|
||||
static void ggml_backend_cann_free(ggml_backend_t backend) {
|
||||
ggml_backend_cann_context* cann_ctx =
|
||||
(ggml_backend_cann_context*)backend->context;
|
||||
ACL_CHECK(aclrtSynchronizeDevice());
|
||||
|
@ -1535,7 +1536,7 @@ GGML_CALL static void ggml_backend_cann_free(ggml_backend_t backend) {
|
|||
* @param backend Pointer to the CANN backend structure.
|
||||
* @return Pointer to the buffer type structure for the CANN backend.
|
||||
*/
|
||||
GGML_CALL static ggml_backend_buffer_type_t
|
||||
static ggml_backend_buffer_type_t
|
||||
ggml_backend_cann_get_default_buffer_type(ggml_backend_t backend) {
|
||||
ggml_backend_cann_context* cann_ctx =
|
||||
(ggml_backend_cann_context*)backend->context;
|
||||
|
@ -1556,11 +1557,11 @@ ggml_backend_cann_get_default_buffer_type(ggml_backend_t backend) {
|
|||
* @param offset Offset in bytes within the host data.
|
||||
* @param size Size of the data to copy in bytes.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
|
||||
ggml_tensor *tensor,
|
||||
const void *data,
|
||||
size_t offset,
|
||||
size_t size) {
|
||||
static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
|
||||
ggml_tensor *tensor,
|
||||
const void *data,
|
||||
size_t offset,
|
||||
size_t size) {
|
||||
ggml_backend_cann_context *cann_ctx =
|
||||
(ggml_backend_cann_context *)backend->context;
|
||||
|
||||
|
@ -1587,7 +1588,7 @@ GGML_CALL static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
|
|||
}
|
||||
}
|
||||
|
||||
GGML_CALL static void ggml_backend_cann_get_tensor_async(
|
||||
static void ggml_backend_cann_get_tensor_async(
|
||||
ggml_backend_t backend, const ggml_tensor *tensor, void *data,
|
||||
size_t offset, size_t size) {
|
||||
ggml_backend_cann_context *cann_ctx =
|
||||
|
@ -1626,7 +1627,7 @@ GGML_CALL static void ggml_backend_cann_get_tensor_async(
|
|||
* @param dst Pointer to the destination tensor to copy data to.
|
||||
* @return true if the copy operation succeeds, false otherwise.
|
||||
*/
|
||||
GGML_CALL static bool ggml_backend_cann_cpy_tensor_async(
|
||||
static bool ggml_backend_cann_cpy_tensor_async(
|
||||
ggml_backend_t backend_src, ggml_backend_t backend_dst,
|
||||
const ggml_tensor* src, ggml_tensor* dst) {
|
||||
GGML_ASSERT(ggml_backend_is_cann(backend_src) ||
|
||||
|
@ -1694,7 +1695,7 @@ GGML_CALL static bool ggml_backend_cann_cpy_tensor_async(
|
|||
*
|
||||
* @param backend Pointer to the CANN backend structure to synchronize.
|
||||
*/
|
||||
GGML_CALL static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
|
||||
static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
|
||||
ggml_backend_cann_context* cann_ctx =
|
||||
(ggml_backend_cann_context*)backend->context;
|
||||
|
||||
|
@ -1715,7 +1716,7 @@ GGML_CALL static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
|
|||
* @return enum ggml_status Returns GGML_STATUS_SUCCESS if computation
|
||||
* completes successfully, otherwise an appropriate error status.
|
||||
*/
|
||||
GGML_CALL static enum ggml_status ggml_backend_cann_graph_compute(
|
||||
static enum ggml_status ggml_backend_cann_graph_compute(
|
||||
ggml_backend_t backend, ggml_cgraph* cgraph) {
|
||||
ggml_backend_cann_context* cann_ctx =
|
||||
(ggml_backend_cann_context*)backend->context;
|
||||
|
@ -1753,7 +1754,7 @@ GGML_CALL static enum ggml_status ggml_backend_cann_graph_compute(
|
|||
* @return bool Returns true if the operation is supported by the backend,
|
||||
* otherwise false.
|
||||
*/
|
||||
GGML_CALL static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
|
||||
static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
|
||||
const ggml_tensor* op) {
|
||||
switch (op->op) {
|
||||
case GGML_OP_UNARY:
|
||||
|
@ -1875,7 +1876,7 @@ static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft) {
|
|||
* @return bool Returns true if the CANN backend supports the buffer type,
|
||||
* otherwise false.
|
||||
*/
|
||||
GGML_CALL static bool ggml_backend_cann_supports_buft(
|
||||
static bool ggml_backend_cann_supports_buft(
|
||||
ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
|
||||
if (ggml_backend_buft_is_cann(buft)) {
|
||||
ggml_backend_cann_context * cann_ctx =
|
||||
|
@ -1901,7 +1902,7 @@ GGML_CALL static bool ggml_backend_cann_supports_buft(
|
|||
* @return bool Returns true if the operation should be offloaded, otherwise
|
||||
* false.
|
||||
*/
|
||||
GGML_CALL static bool ggml_backend_cann_offload_op(ggml_backend_t backend,
|
||||
static bool ggml_backend_cann_offload_op(ggml_backend_t backend,
|
||||
const ggml_tensor* op) {
|
||||
const int min_batch_size = 32;
|
||||
GGML_UNUSED(backend);
|
||||
|
@ -2021,11 +2022,8 @@ static ggml_backend_i ggml_backend_cann_interface = {
|
|||
/* .supports_op = */ ggml_backend_cann_supports_op,
|
||||
/* .supports_buft = */ ggml_backend_cann_supports_buft,
|
||||
/* .offload_op = */ ggml_backend_cann_offload_op,
|
||||
/* .event_new = */ ggml_backend_cann_event_new,
|
||||
/* .event_free = */ ggml_backend_cann_event_free,
|
||||
/* .event_record = */ ggml_backend_cann_event_record,
|
||||
/* .event_wait = */ ggml_backend_cann_event_wait,
|
||||
/* .event_synchronize = */ ggml_backend_cann_event_synchronize,
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -2042,7 +2040,7 @@ static ggml_guid_t ggml_backend_cann_guid() {
|
|||
return &guid;
|
||||
}
|
||||
|
||||
GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device) {
|
||||
ggml_backend_t ggml_backend_cann_init(int32_t device) {
|
||||
aclInit(nullptr);
|
||||
if (device < 0 || device >= ggml_backend_cann_get_device_count()) {
|
||||
GGML_CANN_LOG_ERROR("%s: error: invalid device %d\n", __func__, device);
|
||||
|
@ -2058,75 +2056,30 @@ GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device) {
|
|||
ggml_backend_t cann_backend =
|
||||
new ggml_backend{/* .guid = */ ggml_backend_cann_guid(),
|
||||
/* .interface = */ ggml_backend_cann_interface,
|
||||
/* .device = */ nullptr,
|
||||
/* .context = */ ctx};
|
||||
|
||||
return cann_backend;
|
||||
}
|
||||
|
||||
GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend) {
|
||||
bool ggml_backend_is_cann(ggml_backend_t backend) {
|
||||
return backend != NULL &&
|
||||
ggml_guid_matches(backend->guid, ggml_backend_cann_guid());
|
||||
}
|
||||
|
||||
GGML_CALL int32_t ggml_backend_cann_get_device_count() {
|
||||
int32_t ggml_backend_cann_get_device_count() {
|
||||
return ggml_cann_info().device_count;
|
||||
}
|
||||
|
||||
GGML_CALL void ggml_backend_cann_get_device_description(
|
||||
void ggml_backend_cann_get_device_description(
|
||||
int32_t device, char* description, size_t description_size) {
|
||||
ggml_cann_set_device(device);
|
||||
const char* soc_name = aclrtGetSocName();
|
||||
snprintf(description, description_size, "%s", soc_name);
|
||||
}
|
||||
|
||||
GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device, size_t* free,
|
||||
size_t* total) {
|
||||
void ggml_backend_cann_get_device_memory(int32_t device, size_t* free,
|
||||
size_t* total) {
|
||||
ggml_cann_set_device(device);
|
||||
ACL_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, free, total));
|
||||
}
|
||||
|
||||
// backend registry
|
||||
/**
|
||||
* @brief Initializes a CANN backend based on the provided parameters.
|
||||
*
|
||||
* This function initializes a CANN backend using the device index and then
|
||||
* initializes the backend using `ggml_backend_cann_init`.
|
||||
*
|
||||
* @param params Parameters for initialization (unused in this implementation).
|
||||
* @param user_data User data containing the device index to initialize the
|
||||
* backend.
|
||||
* @return ggml_backend_t The initialized CANN backend.
|
||||
*/
|
||||
GGML_CALL static ggml_backend_t ggml_backend_reg_cann_init(const char* params,
|
||||
void* user_data) {
|
||||
ggml_backend_t cann_backend =
|
||||
ggml_backend_cann_init((int)(intptr_t)user_data);
|
||||
return cann_backend;
|
||||
|
||||
GGML_UNUSED(params);
|
||||
}
|
||||
|
||||
extern "C" GGML_CALL int ggml_backend_cann_reg_devices();
|
||||
|
||||
/**
|
||||
* @brief Registers CANN (Ascend) devices as backend options.
|
||||
*
|
||||
* This function initializes ACL, retrieves the number of available CANN
|
||||
* devices, and registers each device as a backend option using
|
||||
* `ggml_backend_register`. Each device is given a unique name based on
|
||||
* `GGML_CANN_NAME` followed by its index.
|
||||
*
|
||||
* @return int The number of CANN devices registered.
|
||||
*/
|
||||
GGML_CALL int ggml_backend_cann_reg_devices() {
|
||||
uint32_t device_count = ggml_backend_cann_get_device_count();
|
||||
// initialization
|
||||
for (uint32_t i = 0; i < device_count; i++) {
|
||||
char name[128];
|
||||
snprintf(name, sizeof(name), "CANN%d", i);
|
||||
ggml_backend_register(name, ggml_backend_reg_cann_init,
|
||||
ggml_backend_cann_buffer_type(i),
|
||||
(void*)(intptr_t)i);
|
||||
}
|
||||
return device_count;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue