llama : refactor model loader with backend registry (#10026)

This commit is contained in:
Diego Devesa 2024-10-30 02:01:23 +01:00 committed by GitHub
parent 8f275a7c45
commit c5b0f4b5d9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 1903 additions and 2019 deletions

View file

@ -489,23 +489,6 @@ struct ggml_backend_cann_buffer_context {
~ggml_backend_cann_buffer_context() { ACL_CHECK(aclrtFree(dev_ptr)); }
};
/**
* @brief Retrieve the name associated with a CANN buffer.
*
* This function returns the name of a CANN buffer, which is stored in the
* context of the buffer.
*
* @param buffer The CANN buffer whose name is to be retrieved.
* @return A pointer to a C-string containing the name of the buffer.
*/
static const char* ggml_backend_cann_buffer_get_name(
ggml_backend_buffer_t buffer) {
return "CANN";
GGML_UNUSED(buffer);
}
/**
* @brief Check if a buffer is a CANN buffer.
*
@ -515,9 +498,10 @@ static const char* ggml_backend_cann_buffer_get_name(
* @param buffer The buffer to check.
* @return true if the buffer is a CANN buffer, false otherwise.
*/
static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft);
static bool ggml_backend_buffer_is_cann(
ggml_backend_buffer_t buffer) {
return buffer->iface.get_name == ggml_backend_cann_buffer_get_name;
return ggml_backend_buft_is_cann(buffer->buft);
}
/**
@ -965,7 +949,6 @@ static void ggml_backend_cann_buffer_clear(
* on a CANN buffer within the backend.
*/
static const ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
/* .get_name = */ ggml_backend_cann_buffer_get_name,
/* .free_buffer = */ ggml_backend_cann_buffer_free_buffer,
/* .get_base = */ ggml_backend_cann_buffer_get_base,
/* .init_tensor = */ ggml_backend_cann_buffer_init_tensor,
@ -999,9 +982,10 @@ struct ggml_backend_cann_buffer_type_context {
*/
static const char* ggml_backend_cann_buffer_type_name(
ggml_backend_buffer_type_t buft) {
return "CANN";
ggml_backend_cann_buffer_type_context* buft_ctx =
(ggml_backend_cann_buffer_type_context*)buft->context;
GGML_UNUSED(buft);
return buft_ctx->name.c_str();
}
/**
@ -1465,24 +1449,6 @@ static void ggml_backend_cann_free(ggml_backend_t backend) {
delete backend;
}
/**
* @brief Retrieves the default buffer type associated with the CANN backend.
*
* This function returns the buffer type specific to the device associated
* with the CANN backend. It is used to allocate buffers for computations
* performed by the backend.
*
* @param backend Pointer to the CANN backend structure.
* @return Pointer to the buffer type structure for the CANN backend.
*/
static ggml_backend_buffer_type_t
ggml_backend_cann_get_default_buffer_type(ggml_backend_t backend) {
ggml_backend_cann_context* cann_ctx =
(ggml_backend_cann_context*)backend->context;
return ggml_backend_cann_buffer_type(cann_ctx->device);
}
/**
* @brief Sets tensor data asynchronously in the CANN backend.
*
@ -1863,7 +1829,6 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend,
static const ggml_backend_i ggml_backend_cann_interface = {
/* .get_name = */ ggml_backend_cann_name,
/* .free = */ ggml_backend_cann_free,
/* .get_default_buffer_type = */ ggml_backend_cann_get_default_buffer_type,
/* .set_tensor_async = */ ggml_backend_cann_set_tensor_async,
/* .get_tensor_async = */ ggml_backend_cann_get_tensor_async,
/* .cpy_tensor_async = */ ggml_backend_cann_cpy_tensor_async,
@ -1873,9 +1838,6 @@ static const ggml_backend_i ggml_backend_cann_interface = {
/* .graph_plan_update = */ NULL,
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_cann_graph_compute,
/* .supports_op = */ NULL, // moved to device
/* .supports_buft = */ NULL, // moved to device
/* .offload_op = */ NULL, // moved to device
/* .event_record = */ ggml_backend_cann_event_record,
/* .event_wait = */ ggml_backend_cann_event_wait,
};
@ -1918,7 +1880,7 @@ static void ggml_backend_cann_device_get_memory(ggml_backend_dev_t dev, size_t *
static enum ggml_backend_dev_type ggml_backend_cann_device_get_type(ggml_backend_dev_t dev) {
GGML_UNUSED(dev);
return GGML_BACKEND_DEVICE_TYPE_GPU_FULL;
return GGML_BACKEND_DEVICE_TYPE_GPU;
}
static void ggml_backend_cann_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {