feat: Add host buffer type for Ascend NPU(CANN backend)
This commit is contained in:
parent
436787f170
commit
490da45f54
3 changed files with 78 additions and 0 deletions
|
@ -80,6 +80,13 @@ ggml_backend_cann_buffer_type(int32_t device);
|
|||
*/
|
||||
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
|
||||
|
||||
/**
|
||||
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
|
||||
*
|
||||
* @return A pointer to the host buffer type interface.
|
||||
*/
|
||||
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the description of a specific CANN device.
|
||||
*
|
||||
|
|
|
@ -1220,6 +1220,73 @@ ggml_backend_cann_buffer_type(int32_t device) {
|
|||
return &ggml_backend_cann_buffer_types[device];
|
||||
}
|
||||
|
||||
// host buffer type
|
||||
|
||||
GGML_CALL static const char * ggml_backend_cann_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
||||
return "CANN_Host";
|
||||
|
||||
GGML_UNUSED(buft);
|
||||
}
|
||||
|
||||
GGML_CALL static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buffer) {
|
||||
return "CANN_Host";
|
||||
|
||||
GGML_UNUSED(buffer);
|
||||
}
|
||||
|
||||
GGML_CALL static void ggml_backend_cann_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
ACL_CHECK(aclrtFreeHost(buffer->context));
|
||||
}
|
||||
|
||||
static void * ggml_cann_host_malloc(size_t size) {
|
||||
if (getenv("GGML_CANN_NO_PINNED") != nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void * ptr = nullptr;
|
||||
aclError err = aclrtMallocHost((void **) &ptr, size);
|
||||
if (err != ACL_SUCCESS) {
|
||||
|
||||
GGML_CANN_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
|
||||
size / 1024.0 / 1024.0, aclGetRecentErrMsg());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
GGML_CALL static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
||||
void * ptr = ggml_cann_host_malloc(size);
|
||||
|
||||
if (ptr == nullptr) {
|
||||
// fallback to cpu buffer
|
||||
return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
|
||||
buffer->buft = buft;
|
||||
buffer->iface.get_name = ggml_backend_cann_host_buffer_name;
|
||||
buffer->iface.free_buffer = ggml_backend_cann_host_buffer_free_buffer;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
|
||||
static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_cann_host_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||
},
|
||||
/* .context = */ nullptr,
|
||||
};
|
||||
|
||||
return &ggml_backend_cann_buffer_type_host;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Computes the forward operation for a given tensor using CANN
|
||||
* operations.
|
||||
|
|
|
@ -2088,6 +2088,10 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(bool host_buffer
|
|||
if (host_buffer) {
|
||||
buft = ggml_backend_sycl_host_buffer_type();
|
||||
}
|
||||
#elif defined(GGML_USE_CANN)
|
||||
if (host_buffer) {
|
||||
buft = ggml_backend_cann_host_buffer_type();
|
||||
}
|
||||
#elif defined(GGML_USE_CPU_HBM)
|
||||
buft = ggml_backend_cpu_hbm_buffer_type();
|
||||
#elif defined(GGML_USE_VULKAN)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue