feat: Add host buffer type for Ascend NPU(CANN backend)

This commit is contained in:
dou 2024-09-10 10:24:43 +08:00
parent 436787f170
commit 490da45f54
3 changed files with 78 additions and 0 deletions

View file

@ -80,6 +80,13 @@ ggml_backend_cann_buffer_type(int32_t device);
*/ */
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void); GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
/**
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
*
* @return A pointer to the host buffer type interface.
*/
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
/** /**
* @brief Retrieves the description of a specific CANN device. * @brief Retrieves the description of a specific CANN device.
* *

View file

@ -1220,6 +1220,73 @@ ggml_backend_cann_buffer_type(int32_t device) {
return &ggml_backend_cann_buffer_types[device]; return &ggml_backend_cann_buffer_types[device];
} }
// host buffer type
GGML_CALL static const char * ggml_backend_cann_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
return "CANN_Host";
GGML_UNUSED(buft);
}
GGML_CALL static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buffer) {
return "CANN_Host";
GGML_UNUSED(buffer);
}
GGML_CALL static void ggml_backend_cann_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
ACL_CHECK(aclrtFreeHost(buffer->context));
}
static void * ggml_cann_host_malloc(size_t size) {
if (getenv("GGML_CANN_NO_PINNED") != nullptr) {
return nullptr;
}
void * ptr = nullptr;
aclError err = aclrtMallocHost((void **) &ptr, size);
if (err != ACL_SUCCESS) {
GGML_CANN_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
size / 1024.0 / 1024.0, aclGetRecentErrMsg());
return nullptr;
}
return ptr;
}
GGML_CALL static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
void * ptr = ggml_cann_host_malloc(size);
if (ptr == nullptr) {
// fallback to cpu buffer
return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
}
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
buffer->buft = buft;
buffer->iface.get_name = ggml_backend_cann_host_buffer_name;
buffer->iface.free_buffer = ggml_backend_cann_host_buffer_free_buffer;
return buffer;
}
GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
/* .iface = */ {
/* .get_name = */ ggml_backend_cann_host_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
},
/* .context = */ nullptr,
};
return &ggml_backend_cann_buffer_type_host;
}
/** /**
* @brief Computes the forward operation for a given tensor using CANN * @brief Computes the forward operation for a given tensor using CANN
* operations. * operations.

View file

@ -2088,6 +2088,10 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(bool host_buffer
if (host_buffer) { if (host_buffer) {
buft = ggml_backend_sycl_host_buffer_type(); buft = ggml_backend_sycl_host_buffer_type();
} }
#elif defined(GGML_USE_CANN)
if (host_buffer) {
buft = ggml_backend_cann_host_buffer_type();
}
#elif defined(GGML_USE_CPU_HBM) #elif defined(GGML_USE_CPU_HBM)
buft = ggml_backend_cpu_hbm_buffer_type(); buft = ggml_backend_cpu_hbm_buffer_type();
#elif defined(GGML_USE_VULKAN) #elif defined(GGML_USE_VULKAN)